Updated cgemm- and sgemm-kernel for POWER8 SMP

This commit is contained in:
Werner Saar 2016-04-07 15:08:15 +02:00
parent a51102e9b7
commit 9c42f0374a
4 changed files with 45 additions and 28 deletions

View File

@ -798,7 +798,7 @@ Lmcount$lazy_ptr:
#elif defined(PPC440FP2) #elif defined(PPC440FP2)
#define BUFFER_SIZE ( 16 << 20) #define BUFFER_SIZE ( 16 << 20)
#elif defined(POWER8) #elif defined(POWER8)
#define BUFFER_SIZE ( 64 << 20) #define BUFFER_SIZE ( 32 << 20)
#else #else
#define BUFFER_SIZE ( 16 << 20) #define BUFFER_SIZE ( 16 << 20)
#endif #endif

View File

@ -82,7 +82,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#endif #endif
#ifdef __64BIT__ #ifdef __64BIT__
#define STACKSIZE 512 #define STACKSIZE 32000
#define ALPHA_R_SP 296(SP) #define ALPHA_R_SP 296(SP)
#define ALPHA_I_SP 304(SP) #define ALPHA_I_SP 304(SP)
#define FZERO 312(SP) #define FZERO 312(SP)
@ -136,6 +136,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define alpha_sr vs30 #define alpha_sr vs30
#define alpha_si vs31 #define alpha_si vs31
#define FRAMEPOINTER r12
#define BBUFFER r14 #define BBUFFER r14
#define L r15 #define L r15
@ -161,6 +162,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
PROLOGUE PROLOGUE
PROFCODE PROFCODE
mr FRAMEPOINTER, SP
addi SP, SP, -STACKSIZE
addi SP, SP, -STACKSIZE
addi SP, SP, -STACKSIZE
addi SP, SP, -STACKSIZE addi SP, SP, -STACKSIZE
li r0, 0 li r0, 0
@ -233,37 +238,37 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#ifdef linux #ifdef linux
#ifdef __64BIT__ #ifdef __64BIT__
ld LDC, FRAMESLOT(0) + STACKSIZE(SP) ld LDC, FRAMESLOT(0) + 0(FRAMEPOINTER)
#endif #endif
#endif #endif
#if defined(_AIX) || defined(__APPLE__) #if defined(_AIX) || defined(__APPLE__)
#ifdef __64BIT__ #ifdef __64BIT__
ld LDC, FRAMESLOT(0) + STACKSIZE(SP) ld LDC, FRAMESLOT(0) + 0(FRAMEPOINTER)
#else #else
#ifdef DOUBLE #ifdef DOUBLE
lwz B, FRAMESLOT(0) + STACKSIZE(SP) lwz B, FRAMESLOT(0) + 0(FRAMEPOINTER)
lwz C, FRAMESLOT(1) + STACKSIZE(SP) lwz C, FRAMESLOT(1) + 0(FRAMEPOINTER)
lwz LDC, FRAMESLOT(2) + STACKSIZE(SP) lwz LDC, FRAMESLOT(2) + 0(FRAMEPOINTER)
#else #else
lwz LDC, FRAMESLOT(0) + STACKSIZE(SP) lwz LDC, FRAMESLOT(0) + 0(FRAMEPOINTER)
#endif #endif
#endif #endif
#endif #endif
#ifdef TRMMKERNEL #ifdef TRMMKERNEL
#if defined(linux) && defined(__64BIT__) #if defined(linux) && defined(__64BIT__)
ld OFFSET, FRAMESLOT(1) + STACKSIZE(SP) ld OFFSET, FRAMESLOT(1) + 0(FRAMEPOINTER)
#endif #endif
#if defined(_AIX) || defined(__APPLE__) #if defined(_AIX) || defined(__APPLE__)
#ifdef __64BIT__ #ifdef __64BIT__
ld OFFSET, FRAMESLOT(1) + STACKSIZE(SP) ld OFFSET, FRAMESLOT(1) + 0(FRAMEPOINTER)
#else #else
#ifdef DOUBLE #ifdef DOUBLE
lwz OFFSET, FRAMESLOT(3) + STACKSIZE(SP) lwz OFFSET, FRAMESLOT(3) + 0(FRAMEPOINTER)
#else #else
lwz OFFSET, FRAMESLOT(1) + STACKSIZE(SP) lwz OFFSET, FRAMESLOT(1) + 0(FRAMEPOINTER)
#endif #endif
#endif #endif
#endif #endif
@ -290,9 +295,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
li o32 , 32 li o32 , 32
li o48 , 48 li o48 , 48
li T1, 512 addi BBUFFER, SP, 512+4096
slwi T1, T1, 16 li T1, -4096
add BBUFFER, A, T1 and BBUFFER, BBUFFER, T1
#ifdef __64BIT__ #ifdef __64BIT__
@ -392,6 +397,9 @@ L999:
#endif #endif
addi SP, SP, STACKSIZE addi SP, SP, STACKSIZE
addi SP, SP, STACKSIZE
addi SP, SP, STACKSIZE
addi SP, SP, STACKSIZE
blr blr

View File

@ -82,7 +82,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#endif #endif
#ifdef __64BIT__ #ifdef __64BIT__
#define STACKSIZE 512 #define STACKSIZE 32752
#define ALPHA_SP 296(SP) #define ALPHA_SP 296(SP)
#define FZERO 304(SP) #define FZERO 304(SP)
#else #else
@ -132,6 +132,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define o0 0 #define o0 0
#define FRAMEPOINTER r12
#define BBUFFER r14 #define BBUFFER r14
#define o4 r15 #define o4 r15
#define o12 r16 #define o12 r16
@ -160,6 +162,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
PROLOGUE PROLOGUE
PROFCODE PROFCODE
mr FRAMEPOINTER, SP
addi SP, SP, -STACKSIZE
addi SP, SP, -STACKSIZE
addi SP, SP, -STACKSIZE
addi SP, SP, -STACKSIZE addi SP, SP, -STACKSIZE
li r0, 0 li r0, 0
@ -231,7 +237,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#if defined(_AIX) || defined(__APPLE__) #if defined(_AIX) || defined(__APPLE__)
#if !defined(__64BIT__) && defined(DOUBLE) #if !defined(__64BIT__) && defined(DOUBLE)
lwz LDC, FRAMESLOT(0) + STACKSIZE(SP) lwz LDC, FRAMESLOT(0) + 0(FRAMEPOINTER)
#endif #endif
#endif #endif
@ -239,17 +245,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#if defined(TRMMKERNEL) #if defined(TRMMKERNEL)
#if defined(linux) && defined(__64BIT__) #if defined(linux) && defined(__64BIT__)
ld OFFSET, FRAMESLOT(0) + STACKSIZE(SP) ld OFFSET, FRAMESLOT(0) + 0(FRAMEPOINTER)
#endif #endif
#if defined(_AIX) || defined(__APPLE__) #if defined(_AIX) || defined(__APPLE__)
#ifdef __64BIT__ #ifdef __64BIT__
ld OFFSET, FRAMESLOT(0) + STACKSIZE(SP) ld OFFSET, FRAMESLOT(0) + 0(FRAMEPOINTER)
#else #else
#ifdef DOUBLE #ifdef DOUBLE
lwz OFFSET, FRAMESLOT(1) + STACKSIZE(SP) lwz OFFSET, FRAMESLOT(1) + 0(FRAMEPOINTER)
#else #else
lwz OFFSET, FRAMESLOT(0) + STACKSIZE(SP) lwz OFFSET, FRAMESLOT(0) + 0(FRAMEPOINTER)
#endif #endif
#endif #endif
#endif #endif
@ -271,9 +277,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
li o32, 32 li o32, 32
li o48, 48 li o48, 48
li T1, 512 addi BBUFFER, SP, 512+4096
slwi T1, T1, 16 li T1, -4096
add BBUFFER, A, T1 and BBUFFER, BBUFFER, T1
addi T1, SP, 300 addi T1, SP, 300
stxsspx f1, o0 , T1 stxsspx f1, o0 , T1
@ -355,6 +361,9 @@ L999:
#endif #endif
addi SP, SP, STACKSIZE addi SP, SP, STACKSIZE
addi SP, SP, STACKSIZE
addi SP, SP, STACKSIZE
addi SP, SP, STACKSIZE
blr blr

View File

@ -1964,8 +1964,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define SNUMOPT 16 #define SNUMOPT 16
#define DNUMOPT 8 #define DNUMOPT 8
#define GEMM_DEFAULT_OFFSET_A 131072 #define GEMM_DEFAULT_OFFSET_A 4096
#define GEMM_DEFAULT_OFFSET_B 131072 #define GEMM_DEFAULT_OFFSET_B 4096
#define GEMM_DEFAULT_ALIGN 0x03fffUL #define GEMM_DEFAULT_ALIGN 0x03fffUL
#define SGEMM_DEFAULT_UNROLL_M 16 #define SGEMM_DEFAULT_UNROLL_M 16
@ -1987,9 +1987,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define CGEMM_DEFAULT_Q 720 #define CGEMM_DEFAULT_Q 720
#define ZGEMM_DEFAULT_Q 720 #define ZGEMM_DEFAULT_Q 720
#define SGEMM_DEFAULT_R 14400 #define SGEMM_DEFAULT_R 21600
#define DGEMM_DEFAULT_R 14400 #define DGEMM_DEFAULT_R 14400
#define CGEMM_DEFAULT_R 14400 #define CGEMM_DEFAULT_R 16200
#define ZGEMM_DEFAULT_R 14400 #define ZGEMM_DEFAULT_R 14400
#define SYMV_P 8 #define SYMV_P 8