Updated cgemm- and sgemm-kernel for POWER8 SMP
This commit is contained in:
parent
a51102e9b7
commit
9c42f0374a
|
@ -798,7 +798,7 @@ Lmcount$lazy_ptr:
|
||||||
#elif defined(PPC440FP2)
|
#elif defined(PPC440FP2)
|
||||||
#define BUFFER_SIZE ( 16 << 20)
|
#define BUFFER_SIZE ( 16 << 20)
|
||||||
#elif defined(POWER8)
|
#elif defined(POWER8)
|
||||||
#define BUFFER_SIZE ( 64 << 20)
|
#define BUFFER_SIZE ( 32 << 20)
|
||||||
#else
|
#else
|
||||||
#define BUFFER_SIZE ( 16 << 20)
|
#define BUFFER_SIZE ( 16 << 20)
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -82,7 +82,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef __64BIT__
|
#ifdef __64BIT__
|
||||||
#define STACKSIZE 512
|
#define STACKSIZE 32000
|
||||||
#define ALPHA_R_SP 296(SP)
|
#define ALPHA_R_SP 296(SP)
|
||||||
#define ALPHA_I_SP 304(SP)
|
#define ALPHA_I_SP 304(SP)
|
||||||
#define FZERO 312(SP)
|
#define FZERO 312(SP)
|
||||||
|
@ -136,6 +136,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#define alpha_sr vs30
|
#define alpha_sr vs30
|
||||||
#define alpha_si vs31
|
#define alpha_si vs31
|
||||||
|
|
||||||
|
#define FRAMEPOINTER r12
|
||||||
|
|
||||||
#define BBUFFER r14
|
#define BBUFFER r14
|
||||||
#define L r15
|
#define L r15
|
||||||
|
@ -161,6 +162,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
PROLOGUE
|
PROLOGUE
|
||||||
PROFCODE
|
PROFCODE
|
||||||
|
|
||||||
|
mr FRAMEPOINTER, SP
|
||||||
|
addi SP, SP, -STACKSIZE
|
||||||
|
addi SP, SP, -STACKSIZE
|
||||||
|
addi SP, SP, -STACKSIZE
|
||||||
addi SP, SP, -STACKSIZE
|
addi SP, SP, -STACKSIZE
|
||||||
li r0, 0
|
li r0, 0
|
||||||
|
|
||||||
|
@ -233,37 +238,37 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
#ifdef linux
|
#ifdef linux
|
||||||
#ifdef __64BIT__
|
#ifdef __64BIT__
|
||||||
ld LDC, FRAMESLOT(0) + STACKSIZE(SP)
|
ld LDC, FRAMESLOT(0) + 0(FRAMEPOINTER)
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(_AIX) || defined(__APPLE__)
|
#if defined(_AIX) || defined(__APPLE__)
|
||||||
#ifdef __64BIT__
|
#ifdef __64BIT__
|
||||||
ld LDC, FRAMESLOT(0) + STACKSIZE(SP)
|
ld LDC, FRAMESLOT(0) + 0(FRAMEPOINTER)
|
||||||
#else
|
#else
|
||||||
#ifdef DOUBLE
|
#ifdef DOUBLE
|
||||||
lwz B, FRAMESLOT(0) + STACKSIZE(SP)
|
lwz B, FRAMESLOT(0) + 0(FRAMEPOINTER)
|
||||||
lwz C, FRAMESLOT(1) + STACKSIZE(SP)
|
lwz C, FRAMESLOT(1) + 0(FRAMEPOINTER)
|
||||||
lwz LDC, FRAMESLOT(2) + STACKSIZE(SP)
|
lwz LDC, FRAMESLOT(2) + 0(FRAMEPOINTER)
|
||||||
#else
|
#else
|
||||||
lwz LDC, FRAMESLOT(0) + STACKSIZE(SP)
|
lwz LDC, FRAMESLOT(0) + 0(FRAMEPOINTER)
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef TRMMKERNEL
|
#ifdef TRMMKERNEL
|
||||||
#if defined(linux) && defined(__64BIT__)
|
#if defined(linux) && defined(__64BIT__)
|
||||||
ld OFFSET, FRAMESLOT(1) + STACKSIZE(SP)
|
ld OFFSET, FRAMESLOT(1) + 0(FRAMEPOINTER)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(_AIX) || defined(__APPLE__)
|
#if defined(_AIX) || defined(__APPLE__)
|
||||||
#ifdef __64BIT__
|
#ifdef __64BIT__
|
||||||
ld OFFSET, FRAMESLOT(1) + STACKSIZE(SP)
|
ld OFFSET, FRAMESLOT(1) + 0(FRAMEPOINTER)
|
||||||
#else
|
#else
|
||||||
#ifdef DOUBLE
|
#ifdef DOUBLE
|
||||||
lwz OFFSET, FRAMESLOT(3) + STACKSIZE(SP)
|
lwz OFFSET, FRAMESLOT(3) + 0(FRAMEPOINTER)
|
||||||
#else
|
#else
|
||||||
lwz OFFSET, FRAMESLOT(1) + STACKSIZE(SP)
|
lwz OFFSET, FRAMESLOT(1) + 0(FRAMEPOINTER)
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
@ -290,9 +295,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
li o32 , 32
|
li o32 , 32
|
||||||
li o48 , 48
|
li o48 , 48
|
||||||
|
|
||||||
li T1, 512
|
addi BBUFFER, SP, 512+4096
|
||||||
slwi T1, T1, 16
|
li T1, -4096
|
||||||
add BBUFFER, A, T1
|
and BBUFFER, BBUFFER, T1
|
||||||
|
|
||||||
|
|
||||||
#ifdef __64BIT__
|
#ifdef __64BIT__
|
||||||
|
@ -392,6 +397,9 @@ L999:
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
addi SP, SP, STACKSIZE
|
addi SP, SP, STACKSIZE
|
||||||
|
addi SP, SP, STACKSIZE
|
||||||
|
addi SP, SP, STACKSIZE
|
||||||
|
addi SP, SP, STACKSIZE
|
||||||
|
|
||||||
blr
|
blr
|
||||||
|
|
||||||
|
|
|
@ -82,7 +82,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef __64BIT__
|
#ifdef __64BIT__
|
||||||
#define STACKSIZE 512
|
#define STACKSIZE 32752
|
||||||
#define ALPHA_SP 296(SP)
|
#define ALPHA_SP 296(SP)
|
||||||
#define FZERO 304(SP)
|
#define FZERO 304(SP)
|
||||||
#else
|
#else
|
||||||
|
@ -132,6 +132,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
#define o0 0
|
#define o0 0
|
||||||
|
|
||||||
|
#define FRAMEPOINTER r12
|
||||||
|
|
||||||
#define BBUFFER r14
|
#define BBUFFER r14
|
||||||
#define o4 r15
|
#define o4 r15
|
||||||
#define o12 r16
|
#define o12 r16
|
||||||
|
@ -160,6 +162,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
PROLOGUE
|
PROLOGUE
|
||||||
PROFCODE
|
PROFCODE
|
||||||
|
|
||||||
|
mr FRAMEPOINTER, SP
|
||||||
|
addi SP, SP, -STACKSIZE
|
||||||
|
addi SP, SP, -STACKSIZE
|
||||||
|
addi SP, SP, -STACKSIZE
|
||||||
addi SP, SP, -STACKSIZE
|
addi SP, SP, -STACKSIZE
|
||||||
li r0, 0
|
li r0, 0
|
||||||
|
|
||||||
|
@ -231,7 +237,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
#if defined(_AIX) || defined(__APPLE__)
|
#if defined(_AIX) || defined(__APPLE__)
|
||||||
#if !defined(__64BIT__) && defined(DOUBLE)
|
#if !defined(__64BIT__) && defined(DOUBLE)
|
||||||
lwz LDC, FRAMESLOT(0) + STACKSIZE(SP)
|
lwz LDC, FRAMESLOT(0) + 0(FRAMEPOINTER)
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -239,17 +245,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
#if defined(TRMMKERNEL)
|
#if defined(TRMMKERNEL)
|
||||||
#if defined(linux) && defined(__64BIT__)
|
#if defined(linux) && defined(__64BIT__)
|
||||||
ld OFFSET, FRAMESLOT(0) + STACKSIZE(SP)
|
ld OFFSET, FRAMESLOT(0) + 0(FRAMEPOINTER)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(_AIX) || defined(__APPLE__)
|
#if defined(_AIX) || defined(__APPLE__)
|
||||||
#ifdef __64BIT__
|
#ifdef __64BIT__
|
||||||
ld OFFSET, FRAMESLOT(0) + STACKSIZE(SP)
|
ld OFFSET, FRAMESLOT(0) + 0(FRAMEPOINTER)
|
||||||
#else
|
#else
|
||||||
#ifdef DOUBLE
|
#ifdef DOUBLE
|
||||||
lwz OFFSET, FRAMESLOT(1) + STACKSIZE(SP)
|
lwz OFFSET, FRAMESLOT(1) + 0(FRAMEPOINTER)
|
||||||
#else
|
#else
|
||||||
lwz OFFSET, FRAMESLOT(0) + STACKSIZE(SP)
|
lwz OFFSET, FRAMESLOT(0) + 0(FRAMEPOINTER)
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
@ -271,9 +277,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
li o32, 32
|
li o32, 32
|
||||||
li o48, 48
|
li o48, 48
|
||||||
|
|
||||||
li T1, 512
|
addi BBUFFER, SP, 512+4096
|
||||||
slwi T1, T1, 16
|
li T1, -4096
|
||||||
add BBUFFER, A, T1
|
and BBUFFER, BBUFFER, T1
|
||||||
|
|
||||||
addi T1, SP, 300
|
addi T1, SP, 300
|
||||||
stxsspx f1, o0 , T1
|
stxsspx f1, o0 , T1
|
||||||
|
@ -355,6 +361,9 @@ L999:
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
addi SP, SP, STACKSIZE
|
addi SP, SP, STACKSIZE
|
||||||
|
addi SP, SP, STACKSIZE
|
||||||
|
addi SP, SP, STACKSIZE
|
||||||
|
addi SP, SP, STACKSIZE
|
||||||
|
|
||||||
blr
|
blr
|
||||||
|
|
||||||
|
|
8
param.h
8
param.h
|
@ -1964,8 +1964,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#define SNUMOPT 16
|
#define SNUMOPT 16
|
||||||
#define DNUMOPT 8
|
#define DNUMOPT 8
|
||||||
|
|
||||||
#define GEMM_DEFAULT_OFFSET_A 131072
|
#define GEMM_DEFAULT_OFFSET_A 4096
|
||||||
#define GEMM_DEFAULT_OFFSET_B 131072
|
#define GEMM_DEFAULT_OFFSET_B 4096
|
||||||
#define GEMM_DEFAULT_ALIGN 0x03fffUL
|
#define GEMM_DEFAULT_ALIGN 0x03fffUL
|
||||||
|
|
||||||
#define SGEMM_DEFAULT_UNROLL_M 16
|
#define SGEMM_DEFAULT_UNROLL_M 16
|
||||||
|
@ -1987,9 +1987,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#define CGEMM_DEFAULT_Q 720
|
#define CGEMM_DEFAULT_Q 720
|
||||||
#define ZGEMM_DEFAULT_Q 720
|
#define ZGEMM_DEFAULT_Q 720
|
||||||
|
|
||||||
#define SGEMM_DEFAULT_R 14400
|
#define SGEMM_DEFAULT_R 21600
|
||||||
#define DGEMM_DEFAULT_R 14400
|
#define DGEMM_DEFAULT_R 14400
|
||||||
#define CGEMM_DEFAULT_R 14400
|
#define CGEMM_DEFAULT_R 16200
|
||||||
#define ZGEMM_DEFAULT_R 14400
|
#define ZGEMM_DEFAULT_R 14400
|
||||||
|
|
||||||
#define SYMV_P 8
|
#define SYMV_P 8
|
||||||
|
|
Loading…
Reference in New Issue