Updated cgemm- and sgemm-kernel for POWER8 SMP
This commit is contained in:
parent
a51102e9b7
commit
9c42f0374a
|
@ -798,7 +798,7 @@ Lmcount$lazy_ptr:
|
|||
#elif defined(PPC440FP2)
|
||||
#define BUFFER_SIZE ( 16 << 20)
|
||||
#elif defined(POWER8)
|
||||
#define BUFFER_SIZE ( 64 << 20)
|
||||
#define BUFFER_SIZE ( 32 << 20)
|
||||
#else
|
||||
#define BUFFER_SIZE ( 16 << 20)
|
||||
#endif
|
||||
|
|
|
@ -82,7 +82,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#endif
|
||||
|
||||
#ifdef __64BIT__
|
||||
#define STACKSIZE 512
|
||||
#define STACKSIZE 32000
|
||||
#define ALPHA_R_SP 296(SP)
|
||||
#define ALPHA_I_SP 304(SP)
|
||||
#define FZERO 312(SP)
|
||||
|
@ -136,6 +136,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#define alpha_sr vs30
|
||||
#define alpha_si vs31
|
||||
|
||||
#define FRAMEPOINTER r12
|
||||
|
||||
#define BBUFFER r14
|
||||
#define L r15
|
||||
|
@ -161,6 +162,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
PROLOGUE
|
||||
PROFCODE
|
||||
|
||||
mr FRAMEPOINTER, SP
|
||||
addi SP, SP, -STACKSIZE
|
||||
addi SP, SP, -STACKSIZE
|
||||
addi SP, SP, -STACKSIZE
|
||||
addi SP, SP, -STACKSIZE
|
||||
li r0, 0
|
||||
|
||||
|
@ -233,37 +238,37 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
#ifdef linux
|
||||
#ifdef __64BIT__
|
||||
ld LDC, FRAMESLOT(0) + STACKSIZE(SP)
|
||||
ld LDC, FRAMESLOT(0) + 0(FRAMEPOINTER)
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(_AIX) || defined(__APPLE__)
|
||||
#ifdef __64BIT__
|
||||
ld LDC, FRAMESLOT(0) + STACKSIZE(SP)
|
||||
ld LDC, FRAMESLOT(0) + 0(FRAMEPOINTER)
|
||||
#else
|
||||
#ifdef DOUBLE
|
||||
lwz B, FRAMESLOT(0) + STACKSIZE(SP)
|
||||
lwz C, FRAMESLOT(1) + STACKSIZE(SP)
|
||||
lwz LDC, FRAMESLOT(2) + STACKSIZE(SP)
|
||||
lwz B, FRAMESLOT(0) + 0(FRAMEPOINTER)
|
||||
lwz C, FRAMESLOT(1) + 0(FRAMEPOINTER)
|
||||
lwz LDC, FRAMESLOT(2) + 0(FRAMEPOINTER)
|
||||
#else
|
||||
lwz LDC, FRAMESLOT(0) + STACKSIZE(SP)
|
||||
lwz LDC, FRAMESLOT(0) + 0(FRAMEPOINTER)
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef TRMMKERNEL
|
||||
#if defined(linux) && defined(__64BIT__)
|
||||
ld OFFSET, FRAMESLOT(1) + STACKSIZE(SP)
|
||||
ld OFFSET, FRAMESLOT(1) + 0(FRAMEPOINTER)
|
||||
#endif
|
||||
|
||||
#if defined(_AIX) || defined(__APPLE__)
|
||||
#ifdef __64BIT__
|
||||
ld OFFSET, FRAMESLOT(1) + STACKSIZE(SP)
|
||||
ld OFFSET, FRAMESLOT(1) + 0(FRAMEPOINTER)
|
||||
#else
|
||||
#ifdef DOUBLE
|
||||
lwz OFFSET, FRAMESLOT(3) + STACKSIZE(SP)
|
||||
lwz OFFSET, FRAMESLOT(3) + 0(FRAMEPOINTER)
|
||||
#else
|
||||
lwz OFFSET, FRAMESLOT(1) + STACKSIZE(SP)
|
||||
lwz OFFSET, FRAMESLOT(1) + 0(FRAMEPOINTER)
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
|
@ -290,9 +295,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
li o32 , 32
|
||||
li o48 , 48
|
||||
|
||||
li T1, 512
|
||||
slwi T1, T1, 16
|
||||
add BBUFFER, A, T1
|
||||
addi BBUFFER, SP, 512+4096
|
||||
li T1, -4096
|
||||
and BBUFFER, BBUFFER, T1
|
||||
|
||||
|
||||
#ifdef __64BIT__
|
||||
|
@ -392,6 +397,9 @@ L999:
|
|||
#endif
|
||||
|
||||
addi SP, SP, STACKSIZE
|
||||
addi SP, SP, STACKSIZE
|
||||
addi SP, SP, STACKSIZE
|
||||
addi SP, SP, STACKSIZE
|
||||
|
||||
blr
|
||||
|
||||
|
|
|
@ -82,7 +82,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#endif
|
||||
|
||||
#ifdef __64BIT__
|
||||
#define STACKSIZE 512
|
||||
#define STACKSIZE 32752
|
||||
#define ALPHA_SP 296(SP)
|
||||
#define FZERO 304(SP)
|
||||
#else
|
||||
|
@ -132,6 +132,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
#define o0 0
|
||||
|
||||
#define FRAMEPOINTER r12
|
||||
|
||||
#define BBUFFER r14
|
||||
#define o4 r15
|
||||
#define o12 r16
|
||||
|
@ -160,6 +162,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
PROLOGUE
|
||||
PROFCODE
|
||||
|
||||
mr FRAMEPOINTER, SP
|
||||
addi SP, SP, -STACKSIZE
|
||||
addi SP, SP, -STACKSIZE
|
||||
addi SP, SP, -STACKSIZE
|
||||
addi SP, SP, -STACKSIZE
|
||||
li r0, 0
|
||||
|
||||
|
@ -231,7 +237,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
#if defined(_AIX) || defined(__APPLE__)
|
||||
#if !defined(__64BIT__) && defined(DOUBLE)
|
||||
lwz LDC, FRAMESLOT(0) + STACKSIZE(SP)
|
||||
lwz LDC, FRAMESLOT(0) + 0(FRAMEPOINTER)
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
@ -239,17 +245,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
#if defined(TRMMKERNEL)
|
||||
#if defined(linux) && defined(__64BIT__)
|
||||
ld OFFSET, FRAMESLOT(0) + STACKSIZE(SP)
|
||||
ld OFFSET, FRAMESLOT(0) + 0(FRAMEPOINTER)
|
||||
#endif
|
||||
|
||||
#if defined(_AIX) || defined(__APPLE__)
|
||||
#ifdef __64BIT__
|
||||
ld OFFSET, FRAMESLOT(0) + STACKSIZE(SP)
|
||||
ld OFFSET, FRAMESLOT(0) + 0(FRAMEPOINTER)
|
||||
#else
|
||||
#ifdef DOUBLE
|
||||
lwz OFFSET, FRAMESLOT(1) + STACKSIZE(SP)
|
||||
lwz OFFSET, FRAMESLOT(1) + 0(FRAMEPOINTER)
|
||||
#else
|
||||
lwz OFFSET, FRAMESLOT(0) + STACKSIZE(SP)
|
||||
lwz OFFSET, FRAMESLOT(0) + 0(FRAMEPOINTER)
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
|
@ -271,9 +277,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
li o32, 32
|
||||
li o48, 48
|
||||
|
||||
li T1, 512
|
||||
slwi T1, T1, 16
|
||||
add BBUFFER, A, T1
|
||||
addi BBUFFER, SP, 512+4096
|
||||
li T1, -4096
|
||||
and BBUFFER, BBUFFER, T1
|
||||
|
||||
addi T1, SP, 300
|
||||
stxsspx f1, o0 , T1
|
||||
|
@ -355,6 +361,9 @@ L999:
|
|||
#endif
|
||||
|
||||
addi SP, SP, STACKSIZE
|
||||
addi SP, SP, STACKSIZE
|
||||
addi SP, SP, STACKSIZE
|
||||
addi SP, SP, STACKSIZE
|
||||
|
||||
blr
|
||||
|
||||
|
|
8
param.h
8
param.h
|
@ -1964,8 +1964,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#define SNUMOPT 16
|
||||
#define DNUMOPT 8
|
||||
|
||||
#define GEMM_DEFAULT_OFFSET_A 131072
|
||||
#define GEMM_DEFAULT_OFFSET_B 131072
|
||||
#define GEMM_DEFAULT_OFFSET_A 4096
|
||||
#define GEMM_DEFAULT_OFFSET_B 4096
|
||||
#define GEMM_DEFAULT_ALIGN 0x03fffUL
|
||||
|
||||
#define SGEMM_DEFAULT_UNROLL_M 16
|
||||
|
@ -1987,9 +1987,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#define CGEMM_DEFAULT_Q 720
|
||||
#define ZGEMM_DEFAULT_Q 720
|
||||
|
||||
#define SGEMM_DEFAULT_R 14400
|
||||
#define SGEMM_DEFAULT_R 21600
|
||||
#define DGEMM_DEFAULT_R 14400
|
||||
#define CGEMM_DEFAULT_R 14400
|
||||
#define CGEMM_DEFAULT_R 16200
|
||||
#define ZGEMM_DEFAULT_R 14400
|
||||
|
||||
#define SYMV_P 8
|
||||
|
|
Loading…
Reference in New Issue