Updated cgemm- and sgemm-kernel for POWER8 SMP

This commit is contained in:
Werner Saar 2016-04-07 15:08:15 +02:00
parent a51102e9b7
commit 9c42f0374a
4 changed files with 45 additions and 28 deletions

View File

@ -798,7 +798,7 @@ Lmcount$lazy_ptr:
#elif defined(PPC440FP2)
#define BUFFER_SIZE ( 16 << 20)
#elif defined(POWER8)
#define BUFFER_SIZE ( 64 << 20)
#define BUFFER_SIZE ( 32 << 20)
#else
#define BUFFER_SIZE ( 16 << 20)
#endif

View File

@ -82,7 +82,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#endif
#ifdef __64BIT__
#define STACKSIZE 512
#define STACKSIZE 32000
#define ALPHA_R_SP 296(SP)
#define ALPHA_I_SP 304(SP)
#define FZERO 312(SP)
@ -136,6 +136,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define alpha_sr vs30
#define alpha_si vs31
#define FRAMEPOINTER r12
#define BBUFFER r14
#define L r15
@ -161,6 +162,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
PROLOGUE
PROFCODE
mr FRAMEPOINTER, SP
addi SP, SP, -STACKSIZE
addi SP, SP, -STACKSIZE
addi SP, SP, -STACKSIZE
addi SP, SP, -STACKSIZE
li r0, 0
@ -233,37 +238,37 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#ifdef linux
#ifdef __64BIT__
ld LDC, FRAMESLOT(0) + STACKSIZE(SP)
ld LDC, FRAMESLOT(0) + 0(FRAMEPOINTER)
#endif
#endif
#if defined(_AIX) || defined(__APPLE__)
#ifdef __64BIT__
ld LDC, FRAMESLOT(0) + STACKSIZE(SP)
ld LDC, FRAMESLOT(0) + 0(FRAMEPOINTER)
#else
#ifdef DOUBLE
lwz B, FRAMESLOT(0) + STACKSIZE(SP)
lwz C, FRAMESLOT(1) + STACKSIZE(SP)
lwz LDC, FRAMESLOT(2) + STACKSIZE(SP)
lwz B, FRAMESLOT(0) + 0(FRAMEPOINTER)
lwz C, FRAMESLOT(1) + 0(FRAMEPOINTER)
lwz LDC, FRAMESLOT(2) + 0(FRAMEPOINTER)
#else
lwz LDC, FRAMESLOT(0) + STACKSIZE(SP)
lwz LDC, FRAMESLOT(0) + 0(FRAMEPOINTER)
#endif
#endif
#endif
#ifdef TRMMKERNEL
#if defined(linux) && defined(__64BIT__)
ld OFFSET, FRAMESLOT(1) + STACKSIZE(SP)
ld OFFSET, FRAMESLOT(1) + 0(FRAMEPOINTER)
#endif
#if defined(_AIX) || defined(__APPLE__)
#ifdef __64BIT__
ld OFFSET, FRAMESLOT(1) + STACKSIZE(SP)
ld OFFSET, FRAMESLOT(1) + 0(FRAMEPOINTER)
#else
#ifdef DOUBLE
lwz OFFSET, FRAMESLOT(3) + STACKSIZE(SP)
lwz OFFSET, FRAMESLOT(3) + 0(FRAMEPOINTER)
#else
lwz OFFSET, FRAMESLOT(1) + STACKSIZE(SP)
lwz OFFSET, FRAMESLOT(1) + 0(FRAMEPOINTER)
#endif
#endif
#endif
@ -290,9 +295,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
li o32 , 32
li o48 , 48
li T1, 512
slwi T1, T1, 16
add BBUFFER, A, T1
addi BBUFFER, SP, 512+4096
li T1, -4096
and BBUFFER, BBUFFER, T1
#ifdef __64BIT__
@ -392,6 +397,9 @@ L999:
#endif
addi SP, SP, STACKSIZE
addi SP, SP, STACKSIZE
addi SP, SP, STACKSIZE
addi SP, SP, STACKSIZE
blr

View File

@ -82,7 +82,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#endif
#ifdef __64BIT__
#define STACKSIZE 512
#define STACKSIZE 32752
#define ALPHA_SP 296(SP)
#define FZERO 304(SP)
#else
@ -132,6 +132,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define o0 0
#define FRAMEPOINTER r12
#define BBUFFER r14
#define o4 r15
#define o12 r16
@ -160,6 +162,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
PROLOGUE
PROFCODE
mr FRAMEPOINTER, SP
addi SP, SP, -STACKSIZE
addi SP, SP, -STACKSIZE
addi SP, SP, -STACKSIZE
addi SP, SP, -STACKSIZE
li r0, 0
@ -231,7 +237,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#if defined(_AIX) || defined(__APPLE__)
#if !defined(__64BIT__) && defined(DOUBLE)
lwz LDC, FRAMESLOT(0) + STACKSIZE(SP)
lwz LDC, FRAMESLOT(0) + 0(FRAMEPOINTER)
#endif
#endif
@ -239,17 +245,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#if defined(TRMMKERNEL)
#if defined(linux) && defined(__64BIT__)
ld OFFSET, FRAMESLOT(0) + STACKSIZE(SP)
ld OFFSET, FRAMESLOT(0) + 0(FRAMEPOINTER)
#endif
#if defined(_AIX) || defined(__APPLE__)
#ifdef __64BIT__
ld OFFSET, FRAMESLOT(0) + STACKSIZE(SP)
ld OFFSET, FRAMESLOT(0) + 0(FRAMEPOINTER)
#else
#ifdef DOUBLE
lwz OFFSET, FRAMESLOT(1) + STACKSIZE(SP)
lwz OFFSET, FRAMESLOT(1) + 0(FRAMEPOINTER)
#else
lwz OFFSET, FRAMESLOT(0) + STACKSIZE(SP)
lwz OFFSET, FRAMESLOT(0) + 0(FRAMEPOINTER)
#endif
#endif
#endif
@ -271,9 +277,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
li o32, 32
li o48, 48
li T1, 512
slwi T1, T1, 16
add BBUFFER, A, T1
addi BBUFFER, SP, 512+4096
li T1, -4096
and BBUFFER, BBUFFER, T1
addi T1, SP, 300
stxsspx f1, o0 , T1
@ -355,6 +361,9 @@ L999:
#endif
addi SP, SP, STACKSIZE
addi SP, SP, STACKSIZE
addi SP, SP, STACKSIZE
addi SP, SP, STACKSIZE
blr

View File

@ -1964,8 +1964,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define SNUMOPT 16
#define DNUMOPT 8
#define GEMM_DEFAULT_OFFSET_A 131072
#define GEMM_DEFAULT_OFFSET_B 131072
#define GEMM_DEFAULT_OFFSET_A 4096
#define GEMM_DEFAULT_OFFSET_B 4096
#define GEMM_DEFAULT_ALIGN 0x03fffUL
#define SGEMM_DEFAULT_UNROLL_M 16
@ -1987,9 +1987,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define CGEMM_DEFAULT_Q 720
#define ZGEMM_DEFAULT_Q 720
#define SGEMM_DEFAULT_R 14400
#define SGEMM_DEFAULT_R 21600
#define DGEMM_DEFAULT_R 14400
#define CGEMM_DEFAULT_R 14400
#define CGEMM_DEFAULT_R 16200
#define ZGEMM_DEFAULT_R 14400
#define SYMV_P 8