Merge pull request #1317 from martin-frbg/power8-asm

Save and restore VSX registers
This commit is contained in:
Martin Kroeker 2017-10-08 23:30:46 +02:00 committed by GitHub
commit 1eb43cccad
15 changed files with 884 additions and 89 deletions

View File

@ -82,15 +82,15 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#endif #endif
#ifdef __64BIT__ #ifdef __64BIT__
#define STACKSIZE 32000 #define STACKSIZE 32196
#define ALPHA_R_SP 296(SP) #define ALPHA_R_SP 296+196(SP)
#define ALPHA_I_SP 304(SP) #define ALPHA_I_SP 304+196(SP)
#define FZERO 312(SP) #define FZERO 312+196(SP)
#else #else
#define STACKSIZE 256 #define STACKSIZE 456
#define ALPHA_R_SP 224(SP) #define ALPHA_R_SP 224+200(SP)
#define ALPHA_I_SP 232(SP) #define ALPHA_I_SP 232+200(SP)
#define FZERO 240(SP) #define FZERO 240+200(SP)
#endif #endif
#define M r3 #define M r3
@ -138,6 +138,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define FRAMEPOINTER r12 #define FRAMEPOINTER r12
#define VECSAVE r11
#define BBUFFER r14 #define BBUFFER r14
#define L r15 #define L r15
#define o12 r16 #define o12 r16
@ -167,6 +169,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
addi SP, SP, -STACKSIZE addi SP, SP, -STACKSIZE
addi SP, SP, -STACKSIZE addi SP, SP, -STACKSIZE
addi SP, SP, -STACKSIZE addi SP, SP, -STACKSIZE
li r0, 0 li r0, 0
stfd f14, 0(SP) stfd f14, 0(SP)
@ -211,6 +214,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
std r16, 264(SP) std r16, 264(SP)
std r15, 272(SP) std r15, 272(SP)
std r14, 280(SP) std r14, 280(SP)
addi r11, SP, 288
#else #else
stw r31, 144(SP) stw r31, 144(SP)
stw r30, 148(SP) stw r30, 148(SP)
@ -230,7 +234,32 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
stw r16, 204(SP) stw r16, 204(SP)
stw r15, 208(SP) stw r15, 208(SP)
stw r14, 212(SP) stw r14, 212(SP)
addi r11, SP, 224
#endif #endif
stvx v20, r11, r0
addi r11, r11, 16
stvx v21, r11, r0
addi r11, r11, 16
stvx v22, r11, r0
addi r11, r11, 16
stvx v23, r11, r0
addi r11, r11, 16
stvx v24, r11, r0
addi r11, r11, 16
stvx v25, r11, r0
addi r11, r11, 16
stvx v26, r11, r0
addi r11, r11, 16
stvx v27, r11, r0
addi r11, r11, 16
stvx v28, r11, r0
addi r11, r11, 16
stvx v29, r11, r0
addi r11, r11, 16
stvx v30, r11, r0
addi r11, r11, 16
stvx v31, r11, r0
li r11, 0
stfs f1, ALPHA_R_SP stfs f1, ALPHA_R_SP
stfs f2, ALPHA_I_SP stfs f2, ALPHA_I_SP
@ -301,9 +330,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#ifdef __64BIT__ #ifdef __64BIT__
addi T1 , SP, 296 addi T1 , SP, 296+196
#else #else
addi T1 , SP, 224 addi T1 , SP, 224+200
#endif #endif
stxsspx vs1, 0, T1 stxsspx vs1, 0, T1
@ -375,6 +404,7 @@ L999:
ld r16, 264(SP) ld r16, 264(SP)
ld r15, 272(SP) ld r15, 272(SP)
ld r14, 280(SP) ld r14, 280(SP)
addi r11, SP, 288
#else #else
lwz r31, 144(SP) lwz r31, 144(SP)
lwz r30, 148(SP) lwz r30, 148(SP)
@ -394,7 +424,32 @@ L999:
lwz r16, 204(SP) lwz r16, 204(SP)
lwz r15, 208(SP) lwz r15, 208(SP)
lwz r14, 212(SP) lwz r14, 212(SP)
addi r11, 224
#endif #endif
lvx v20, r11, r0
addi r11, r11, 16
lvx v21, r11, r0
addi r11, r11, 16
lvx v22, r11, r0
addi r11, r11, 16
lvx v23, r11, r0
addi r11, r11, 16
lvx v24, r11, r0
addi r11, r11, 16
lvx v25, r11, r0
addi r11, r11, 16
lvx v26, r11, r0
addi r11, r11, 16
lvx v27, r11, r0
addi r11, r11, 16
lvx v28, r11, r0
addi r11, r11, 16
lvx v29, r11, r0
addi r11, r11, 16
lvx v30, r11, r0
addi r11, r11, 16
lvx v31, r11, r0
li r11, 0
addi SP, SP, STACKSIZE addi SP, SP, STACKSIZE
addi SP, SP, STACKSIZE addi SP, SP, STACKSIZE
@ -404,4 +459,4 @@ L999:
blr blr
EPILOGUE EPILOGUE
#endif #endif^

View File

@ -88,6 +88,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define J r12 #define J r12
#define PREA r14 #define PREA r14
#define PREB r15 #define PREB r15
#define BO r16 #define BO r16
@ -109,7 +110,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "cgemm_tcopy_macros_8_power8.S" #include "cgemm_tcopy_macros_8_power8.S"
#define STACKSIZE 384 #define STACKSIZE 576
PROLOGUE PROLOGUE
@ -136,6 +137,31 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
std r16, 264(SP) std r16, 264(SP)
std r15, 272(SP) std r15, 272(SP)
std r14, 280(SP) std r14, 280(SP)
addi r11, SP, 288
stvx v20, r11, r0
addi r11, r11, 16
stvx v21, r11, r0
addi r11, r11, 16
stvx v22, r11, r0
addi r11, r11, 16
stvx v23, r11, r0
addi r11, r11, 16
stvx v24, r11, r0
addi r11, r11, 16
stvx v25, r11, r0
addi r11, r11, 16
stvx v26, r11, r0
addi r11, r11, 16
stvx v27, r11, r0
addi r11, r11, 16
stvx v28, r11, r0
addi r11, r11, 16
stvx v29, r11, r0
addi r11, r11, 16
stvx v30, r11, r0
addi r11, r11, 16
stvx v31, r11, r0
li r11, 0
cmpwi cr0, M, 0 cmpwi cr0, M, 0
ble- L999 ble- L999
@ -197,9 +223,33 @@ L999:
ld r16, 264(SP) ld r16, 264(SP)
ld r15, 272(SP) ld r15, 272(SP)
ld r14, 280(SP) ld r14, 280(SP)
addi r11, SP, 288
lvx v20, r11, r3
addi r11, r11, 16
lvx v21, r11, r3
addi r11, r11, 16
lvx v22, r11, r3
addi r11, r11, 16
lvx v23, r11, r3
addi r11, r11, 16
lvx v24, r11, r3
addi r11, r11, 16
lvx v25, r11, r3
addi r11, r11, 16
lvx v26, r11, r3
addi r11, r11, 16
lvx v27, r11, r3
addi r11, r11, 16
lvx v28, r11, r3
addi r11, r11, 16
lvx v29, r11, r3
addi r11, r11, 16
lvx v30, r11, r3
addi r11, r11, 16
lvx v31, r11, r3
li r11, 0
addi SP, SP, STACKSIZE addi SP, SP, STACKSIZE
blr blr
EPILOGUE EPILOGUE

View File

@ -83,13 +83,15 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#ifdef __64BIT__ #ifdef __64BIT__
#define STACKSIZE 400 #define STACKSIZE 400
#define ALPHA_R_SP 304(SP) #define STACKSIZE 592
#define ALPHA_I_SP 312(SP) #define ALPHA_R_SP 304+192(SP)
#define ALPHA_I_SP 312+192(SP)
#else #else
#define STACKSIZE 256 #define STACKSIZE 256
#define ALPHA_R_SP 224(SP) #define STACKSIZE 452
#define ALPHA_I_SP 232(SP) #define ALPHA_R_SP 224+196(SP)
#define FZERO 240(SP) #define ALPHA_I_SP 232+196(SP)
#define FZERO 240+196(SP)
#endif #endif
#define M r3 #define M r3
@ -135,6 +137,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define alpha_sr vs30 #define alpha_sr vs30
#define alpha_si vs31 #define alpha_si vs31
#define VECSAVE r11
#define o12 r12 #define o12 r12
#define KKK r13 #define KKK r13
#define K1 r14 #define K1 r14
@ -208,6 +212,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
std r14, 280(SP) std r14, 280(SP)
std r13, 288(SP) std r13, 288(SP)
std r12, 296(SP) std r12, 296(SP)
addi r11, SP, 304
#else #else
stw r31, 144(SP) stw r31, 144(SP)
stw r30, 148(SP) stw r30, 148(SP)
@ -228,7 +233,32 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
stw r15, 208(SP) stw r15, 208(SP)
stw r14, 212(SP) stw r14, 212(SP)
stw r13, 216(SP) stw r13, 216(SP)
addi r11, SP, 224
#endif #endif
stvx v20, r11, r0
addi r11, r11, 16
stvx v21, r11, r0
addi r11, r11, 16
stvx v22, r11, r0
addi r11, r11, 16
stvx v23, r11, r0
addi r11, r11, 16
stvx v24, r11, r0
addi r11, r11, 16
stvx v25, r11, r0
addi r11, r11, 16
stvx v26, r11, r0
addi r11, r11, 16
stvx v27, r11, r0
addi r11, r11, 16
stvx v28, r11, r0
addi r11, r11, 16
stvx v29, r11, r0
addi r11, r11, 16
stvx v30, r11, r0
addi r11, r11, 16
stvx v31, r11, r0
li r11, 0
stfs f1, ALPHA_R_SP stfs f1, ALPHA_R_SP
stfs f2, ALPHA_I_SP stfs f2, ALPHA_I_SP
@ -295,9 +325,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#ifdef __64BIT__ #ifdef __64BIT__
addi T1, SP, 304 addi T1, SP, 304+192
#else #else
addi T1, SP, 224 addi T1, SP, 224+196
#endif #endif
lxsspx alpha_dr, 0, T1 lxsspx alpha_dr, 0, T1
@ -369,6 +399,7 @@ L999:
ld r14, 280(SP) ld r14, 280(SP)
ld r13, 288(SP) ld r13, 288(SP)
ld r12, 296(SP) ld r12, 296(SP)
addi r11, SP, 304
#else #else
lwz r31, 144(SP) lwz r31, 144(SP)
lwz r30, 148(SP) lwz r30, 148(SP)
@ -389,10 +420,34 @@ L999:
lwz r15, 208(SP) lwz r15, 208(SP)
lwz r14, 212(SP) lwz r14, 212(SP)
lwz r13, 216(SP) lwz r13, 216(SP)
addi r11, SP, 224
#endif #endif
lvx v20, r11, r3
addi r11, r11, 16
lvx v21, r11, r3
addi r11, r11, 16
lvx v22, r11, r3
addi r11, r11, 16
lvx v23, r11, r3
addi r11, r11, 16
lvx v24, r11, r3
addi r11, r11, 16
lvx v25, r11, r3
addi r11, r11, 16
lvx v26, r11, r3
addi r11, r11, 16
lvx v27, r11, r3
addi r11, r11, 16
lvx v28, r11, r3
addi r11, r11, 16
lvx v29, r11, r3
addi r11, r11, 16
lvx v30, r11, r3
addi r11, r11, 16
lvx v31, r11, r3
li r11, 0
addi SP, SP, STACKSIZE addi SP, SP, STACKSIZE
blr blr
EPILOGUE EPILOGUE

View File

@ -83,12 +83,14 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#ifdef __64BIT__ #ifdef __64BIT__
#define STACKSIZE 320 #define STACKSIZE 320
#define ALPHA_SP 296(SP) #define STACKSIZE 512
#define FZERO 304(SP) #define ALPHA_SP 296+192(SP)
#define FZERO 304+192(SP)
#else #else
#define STACKSIZE 240 #define STACKSIZE 240
#define ALPHA_SP 224(SP) #define STACKSIZE 440
#define FZERO 232(SP) #define ALPHA_SP 224+200(SP)
#define FZERO 232+200(SP)
#endif #endif
#define M r3 #define M r3
@ -210,6 +212,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
std r16, 264(SP) std r16, 264(SP)
std r15, 272(SP) std r15, 272(SP)
std r14, 280(SP) std r14, 280(SP)
addi r11,SP,288
#else #else
stw r31, 144(SP) stw r31, 144(SP)
stw r30, 148(SP) stw r30, 148(SP)
@ -229,7 +232,32 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
stw r16, 204(SP) stw r16, 204(SP)
stw r15, 208(SP) stw r15, 208(SP)
stw r14, 212(SP) stw r14, 212(SP)
addi r11,SP,224
#endif #endif
stvx v20, r11,r0
addi r11,r11,16
stvx v21, r11,r0
addi r11,r11,16
stvx v22, r11,r0
addi r11,r11,16
stvx v23, r11,r0
addi r11,r11,16
stvx v24, r11,r0
addi r11,r11,16
stvx v25, r11,r0
addi r11,r11,16
stvx v26, r11,r0
addi r11,r11,16
stvx v27, r11,r0
addi r11,r11,16
stvx v28, r11,r0
addi r11,r11,16
stvx v29, r11,r0
addi r11,r11,16
stvx v30, r11,r0
addi r11,r11,16
stvx v31, r11,r0
li r11,0
stfd f1, ALPHA_SP stfd f1, ALPHA_SP
stw r0, FZERO stw r0, FZERO
@ -269,9 +297,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
ble .L999_H1 ble .L999_H1
#ifdef __64BIT__ #ifdef __64BIT__
addi T1, SP, 296 addi T1, SP, 296+192
#else #else
addi T1, SP, 224 addi T1, SP, 224+200
#endif #endif
li PRE, 384 li PRE, 384
@ -334,6 +362,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
ld r16, 264(SP) ld r16, 264(SP)
ld r15, 272(SP) ld r15, 272(SP)
ld r14, 280(SP) ld r14, 280(SP)
addi r11,SP,288
#else #else
lwz r31, 144(SP) lwz r31, 144(SP)
lwz r30, 148(SP) lwz r30, 148(SP)
@ -353,10 +382,34 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
lwz r16, 204(SP) lwz r16, 204(SP)
lwz r15, 208(SP) lwz r15, 208(SP)
lwz r14, 212(SP) lwz r14, 212(SP)
addi r11,SP,224
#endif #endif
lvx v20, r11,r3
addi r11,r11,16
lvx v21, r11,r3
addi r11,r11,16
lvx v22, r11,r3
addi r11,r11,16
lvx v23, r11,r3
addi r11,r11,16
lvx v24, r11,r3
addi r11,r11,16
lvx v25, r11,r3
addi r11,r11,16
lvx v26, r11,r3
addi r11,r11,16
lvx v27, r11,r3
addi r11,r11,16
lvx v28, r11,r3
addi r11,r11,16
lvx v29, r11,r3
addi r11,r11,16
lvx v30, r11,r3
addi r11,r11,16
lvx v31, r11,r3
li r11,0
addi SP, SP, STACKSIZE addi SP, SP, STACKSIZE
blr blr
EPILOGUE EPILOGUE

View File

@ -110,12 +110,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "dgemm_ncopy_macros_4_power8.S" #include "dgemm_ncopy_macros_4_power8.S"
#define STACKSIZE 384 #define STACKSIZE 384
#define STACKSIZE 576
PROLOGUE PROLOGUE
PROFCODE PROFCODE
addi SP, SP, -STACKSIZE addi SP, SP, -STACKSIZE
//addi SP, SP, -208
li r0, 0 li r0, 0
stfd f14, 0(SP) stfd f14, 0(SP)
@ -157,6 +158,32 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
std r15, 272(SP) std r15, 272(SP)
std r14, 280(SP) std r14, 280(SP)
addi r11,SP,288
stvx v20, r11,r0
addi r11,r11,16
stvx v21, r11,r0
addi r11,r11,16
stvx v22, r11,r0
addi r11,r11,16
stvx v23, r11,r0
addi r11,r11,16
stvx v24, r11,r0
addi r11,r11,16
stvx v25, r11,r0
addi r11,r11,16
stvx v26, r11,r0
addi r11,r11,16
stvx v27, r11,r0
addi r11,r11,16
stvx v28, r11,r0
addi r11,r11,16
stvx v29, r11,r0
addi r11,r11,16
stvx v30, r11,r0
addi r11,r11,16
stvx v31, r11,r0
li r11,0
cmpwi cr0, M, 0 cmpwi cr0, M, 0
ble- L999 ble- L999
cmpwi cr0, N, 0 cmpwi cr0, N, 0
@ -164,8 +191,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
slwi LDA, LDA, BASE_SHIFT slwi LDA, LDA, BASE_SHIFT
li PREA, 384 //li PREA, 384
li PREB, 384 //li PREB, 384
li PREA, 576
li PREB, 576
li o8, 8 li o8, 8
li o16, 16 li o16, 16
@ -219,9 +249,34 @@ L999:
ld r16, 264(SP) ld r16, 264(SP)
ld r15, 272(SP) ld r15, 272(SP)
ld r14, 280(SP) ld r14, 280(SP)
addi r11,SP,288
lvx v20, r11,r3
addi r11,r11,16
lvx v21, r11,r3
addi r11,r11,16
lvx v22, r11,r3
addi r11,r11,16
lvx v23, r11,r3
addi r11,r11,16
lvx v24, r11,r3
addi r11,r11,16
lvx v25, r11,r3
addi r11,r11,16
lvx v26, r11,r3
addi r11,r11,16
lvx v27, r11,r3
addi r11,r11,16
lvx v28, r11,r3
addi r11,r11,16
lvx v29, r11,r3
addi r11,r11,16
lvx v30, r11,r3
addi r11,r11,16
lvx v31, r11,r3
li r11,0
addi SP, SP, STACKSIZE addi SP, SP, STACKSIZE
//addi SP, SP, 208
blr blr
EPILOGUE EPILOGUE

View File

@ -110,12 +110,15 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "dgemm_tcopy_macros_16_power8.S" #include "dgemm_tcopy_macros_16_power8.S"
#define STACKSIZE 384 #define STACKSIZE 384
#define STACKSIZE 576
PROLOGUE PROLOGUE
PROFCODE PROFCODE
addi SP, SP, -STACKSIZE addi SP, SP, -STACKSIZE
//addi SP, SP, -208
li r0, 0 li r0, 0
std r31, 144(SP) std r31, 144(SP)
@ -136,6 +139,31 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
std r16, 264(SP) std r16, 264(SP)
std r15, 272(SP) std r15, 272(SP)
std r14, 280(SP) std r14, 280(SP)
addi r11,SP,288
stvx v20, r11,r0
addi r11,r11,16
stvx v21, r11,r0
addi r11,r11,16
stvx v22, r11,r0
addi r11,r11,16
stvx v23, r11,r0
addi r11,r11,16
stvx v24, r11,r0
addi r11,r11,16
stvx v25, r11,r0
addi r11,r11,16
stvx v26, r11,r0
addi r11,r11,16
stvx v27, r11,r0
addi r11,r11,16
stvx v28, r11,r0
addi r11,r11,16
stvx v29, r11,r0
addi r11,r11,16
stvx v30, r11,r0
addi r11,r11,16
stvx v31, r11,r0
li r11,0
cmpwi cr0, M, 0 cmpwi cr0, M, 0
ble- L999 ble- L999
@ -170,7 +198,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
add B2, B2, B add B2, B2, B
add B1, B1, B add B1, B1, B
li PREA, 384 //li PREA, 384
li PREA, 576
addi PREB, M16, 128 addi PREB, M16, 128
li o8, 8 li o8, 8
@ -202,9 +231,34 @@ L999:
ld r16, 264(SP) ld r16, 264(SP)
ld r15, 272(SP) ld r15, 272(SP)
ld r14, 280(SP) ld r14, 280(SP)
addi r11,SP,288
lvx v20, r11,r3
addi r11,r11,16
lvx v21, r11,r3
addi r11,r11,16
lvx v22, r11,r3
addi r11,r11,16
lvx v23, r11,r3
addi r11,r11,16
lvx v24, r11,r3
addi r11,r11,16
lvx v25, r11,r3
addi r11,r11,16
lvx v26, r11,r3
addi r11,r11,16
lvx v27, r11,r3
addi r11,r11,16
lvx v28, r11,r3
addi r11,r11,16
lvx v29, r11,r3
addi r11,r11,16
lvx v30, r11,r3
addi r11,r11,16
lvx v31, r11,r3
li r11,0
addi SP, SP, STACKSIZE addi SP, SP, STACKSIZE
//addi SP, SP, 208
blr blr
EPILOGUE EPILOGUE

View File

@ -83,12 +83,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#ifdef __64BIT__ #ifdef __64BIT__
#define STACKSIZE 320 #define STACKSIZE 320
#define ALPHA_SP 296(SP) #define STACKSIZE 520
#define FZERO 304(SP) #define ALPHA_SP 296+200(SP)
#define FZERO 304+200(SP)
#else #else
#define STACKSIZE 240 #define STACKSIZE 436
#define ALPHA_SP 224(SP) #define ALPHA_SP 224+196(SP)
#define FZERO 232(SP) #define FZERO 232+196(SP)
#endif #endif
#define M r3 #define M r3
@ -152,6 +153,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define PRE r30 #define PRE r30
#define T2 r31 #define T2 r31
#define VECSAVE r11
#include "dtrmm_macros_16x4_power8.S" #include "dtrmm_macros_16x4_power8.S"
@ -206,6 +209,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
std r15, 272(SP) std r15, 272(SP)
std r14, 280(SP) std r14, 280(SP)
std r13, 288(SP) std r13, 288(SP)
addi r11, SP, 304
#else #else
stw r31, 144(SP) stw r31, 144(SP)
stw r30, 148(SP) stw r30, 148(SP)
@ -226,7 +230,34 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
stw r15, 208(SP) stw r15, 208(SP)
stw r14, 212(SP) stw r14, 212(SP)
stw r13, 216(SP) stw r13, 216(SP)
addi r11, r0, 224
#endif #endif
stvx v20, r11, r0
addi r11, r11, 16
stvx v21, r11, r0
addi r11, r11, 16
stvx v22, r11, r0
addi r11, r11, 16
stvx v23, r11, r0
addi r11, r11, 16
stvx v24, r11, r0
addi r11, r11, 16
stvx v25, r11, r0
addi r11, r11, 16
stvx v26, r11, r0
addi r11 ,r11, 16
stvx v27, r11, r0
addi r11, r11, 16
stvx v28, r11, r0
addi r11, r11, 16
stvx v29, r11, r0
addi r11, r11, 16
stvx v30, r11, r0
addi r11, r11, 16
stvx v31, r11, r0
li r11,0
stw r31, 144(SP)
stfd f1, ALPHA_SP stfd f1, ALPHA_SP
stw r0, FZERO stw r0, FZERO
@ -270,9 +301,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
ble .L999_H1 ble .L999_H1
#ifdef __64BIT__ #ifdef __64BIT__
addi ALPHA, SP, 296 addi ALPHA, SP, 296+200
#else #else
addi ALPHA, SP, 224 addi ALPHA, SP, 224+196
#endif #endif
li PRE, 256 li PRE, 256
@ -332,6 +363,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
ld r15, 272(SP) ld r15, 272(SP)
ld r14, 280(SP) ld r14, 280(SP)
ld r13, 288(SP) ld r13, 288(SP)
addi r11, SP, 304
#else #else
lwz r31, 144(SP) lwz r31, 144(SP)
lwz r30, 148(SP) lwz r30, 148(SP)
@ -352,10 +384,34 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
lwz r15, 208(SP) lwz r15, 208(SP)
lwz r14, 212(SP) lwz r14, 212(SP)
lwz r13, 216(SP) lwz r13, 216(SP)
addi r11, SP, 224
#endif #endif
lvx v20, r11, r3
addi r11, r11, 16
lvx v21, r11, r3
addi r11, r11, 16
lvx v22, r11, r3
addi r11, r11, 16
lvx v23, r11, r3
addi r11, r11, 16
lvx v24, r11, r3
addi r11, r11, 16
lvx v25, r11, r3
addi r11, r11, 16
lvx v26, r11, r3
addi r11, r11, 16
lvx v27, r11, r3
addi r11, r11, 16
lvx v28, r11, r3
addi r11, r11, 16
lvx v29, r11, r3
addi r11, r11, 16
lvx v30, r11, r3
addi r11, r11, 16
lvx v31, r11, r3
li r11, 0
addi SP, SP, STACKSIZE addi SP, SP, STACKSIZE
blr blr
EPILOGUE EPILOGUE

View File

@ -48,8 +48,9 @@
#ifdef __64BIT__ #ifdef __64BIT__
#define STACKSIZE 320 #define STACKSIZE 320
#define ALPHA 296(SP) #define STACKSIZE 520
#define FZERO 304(SP) #define ALPHA 296+200(SP)
#define FZERO 304+200(SP)
#else #else
#define STACKSIZE 240 #define STACKSIZE 240
#define ALPHA 224(SP) #define ALPHA 224(SP)
@ -112,6 +113,8 @@
#define o48 r30 #define o48 r30
#define T1 r31 #define T1 r31
#define VECSAVE r11
#include "dtrsm_macros_LT_16x4_power8.S" #include "dtrsm_macros_LT_16x4_power8.S"
#ifndef NEEDPARAM #ifndef NEEDPARAM
@ -163,6 +166,7 @@
std r17, 256(SP) std r17, 256(SP)
std r16, 264(SP) std r16, 264(SP)
std r15, 272(SP) std r15, 272(SP)
addi r11,SP,288
#else #else
stw r31, 144(SP) stw r31, 144(SP)
stw r30, 148(SP) stw r30, 148(SP)
@ -178,7 +182,32 @@
stw r20, 188(SP) stw r20, 188(SP)
stw r19, 192(SP) stw r19, 192(SP)
stw r18, 196(SP) stw r18, 196(SP)
addi r11,SP,208
#endif #endif
stvx v20, r11,r0
addi r11,r11,16
stvx v21, r11,r0
addi r11,r11,16
stvx v22, r11,r0
addi r11,r11,16
stvx v23, r11,r0
addi r11,r11,16
stvx v24, r11,r0
addi r11,r11,16
stvx v25, r11,r0
addi r11,r11,16
stvx v26, r11,r0
addi r11,r11,16
stvx v27, r11,r0
addi r11,r11,16
stvx v28, r11,r0
addi r11,r11,16
stvx v29, r11,r0
addi r11,r11,16
stvx v30, r11,r0
addi r11,r11,16
stvx v31, r11,r0
li r11,0
#if defined(_AIX) || defined(__APPLE__) #if defined(_AIX) || defined(__APPLE__)
@ -269,6 +298,7 @@ L999:
ld r17, 256(SP) ld r17, 256(SP)
ld r16, 264(SP) ld r16, 264(SP)
ld r15, 272(SP) ld r15, 272(SP)
addi r11,SP,288
#else #else
lwz r31, 144(SP) lwz r31, 144(SP)
lwz r30, 148(SP) lwz r30, 148(SP)
@ -284,10 +314,35 @@ L999:
lwz r20, 188(SP) lwz r20, 188(SP)
lwz r19, 192(SP) lwz r19, 192(SP)
lwz r18, 196(SP) lwz r18, 196(SP)
addi r11,SP,208
#endif #endif
lvx v20, r11,r3
addi r11,r11,16
lvx v21, r11,r3
addi r11,r11,16
lvx v22, r11,r3
addi r11,r11,16
lvx v23, r11,r3
addi r11,r11,16
lvx v24, r11,r3
addi r11,r11,16
lvx v25, r11,r3
addi r11,r11,16
lvx v26, r11,r3
addi r11,r11,16
lvx v27, r11,r3
addi r11,r11,16
lvx v28, r11,r3
addi r11,r11,16
lvx v29, r11,r3
addi r11,r11,16
lvx v30, r11,r3
addi r11,r11,16
lvx v31, r11,r3
li r11,0
addi SP, SP, STACKSIZE addi SP, SP, STACKSIZE
blr blr
EPILOGUE EPILOGUE

View File

@ -83,12 +83,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#ifdef __64BIT__ #ifdef __64BIT__
#define STACKSIZE 32752 #define STACKSIZE 32752
#define ALPHA_SP 296(SP) #define ALPHA_SP 296+192(SP)
#define FZERO 304(SP) #define FZERO 304+192(SP)
#else #else
#define STACKSIZE 240 #define STACKSIZE 440
#define ALPHA_SP 224(SP) #define ALPHA_SP 224+200(SP)
#define FZERO 232(SP) #define FZERO 232+200(SP)
#endif #endif
#define M r3 #define M r3
@ -132,6 +132,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define o0 0 #define o0 0
#define VECSAVE r11
#define FRAMEPOINTER r12 #define FRAMEPOINTER r12
#define BBUFFER r14 #define BBUFFER r14
@ -211,6 +213,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
std r16, 264(SP) std r16, 264(SP)
std r15, 272(SP) std r15, 272(SP)
std r14, 280(SP) std r14, 280(SP)
addi r11, SP, 288
#else #else
stw r31, 144(SP) stw r31, 144(SP)
stw r30, 148(SP) stw r30, 148(SP)
@ -230,7 +233,33 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
stw r16, 204(SP) stw r16, 204(SP)
stw r15, 208(SP) stw r15, 208(SP)
stw r14, 212(SP) stw r14, 212(SP)
addi r11, SP, 224
#endif #endif
stvx v20, r11, r0
addi r11, r11, 16
stvx v21, r11, r0
addi r11, r11, 16
stvx v22, r11, r0
addi r11, r11, 16
stvx v23, r11, r0
addi r11, r11, 16
stvx v24, r11, r0
addi r11, r11, 16
stvx v25, r11, r0
addi r11, r11, 16
stvx v26, r11, r0
addi r11, r11, 16
stvx v27, r11, r0
addi r11, r11, 16
stvx v28, r11, r0
addi r11, r11, 16
stvx v29, r11, r0
addi r11, r11, 16
stvx v30, r11, r0
addi r11, r11, 16
stvx v31, r11, r0
li r11,0
// stfd f1, ALPHA_SP // stfd f1, ALPHA_SP
// stw r0, FZERO // stw r0, FZERO
@ -281,7 +310,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
li T1, -4096 li T1, -4096
and BBUFFER, BBUFFER, T1 and BBUFFER, BBUFFER, T1
addi T1, SP, 300 addi T1, SP, 300+192
stxsspx f1, o0 , T1 stxsspx f1, o0 , T1
stxsspx f1, o4 , T1 stxsspx f1, o4 , T1
stxsspx f1, o8 , T1 stxsspx f1, o8 , T1
@ -339,6 +368,7 @@ L999:
ld r16, 264(SP) ld r16, 264(SP)
ld r15, 272(SP) ld r15, 272(SP)
ld r14, 280(SP) ld r14, 280(SP)
addi r11, SP, 288
#else #else
lwz r31, 144(SP) lwz r31, 144(SP)
lwz r30, 148(SP) lwz r30, 148(SP)
@ -358,13 +388,38 @@ L999:
lwz r16, 204(SP) lwz r16, 204(SP)
lwz r15, 208(SP) lwz r15, 208(SP)
lwz r14, 212(SP) lwz r14, 212(SP)
addi r11, SP, 224
#endif #endif
lvx v20, r11, r3
addi r11, r11, 16
lvx v21, r11, r3
addi r11, r11, 16
lvx v22, r11, r3
addi r11, r11, 16
lvx v23, r11, r3
addi r11, r11, 16
lvx v24, r11, r3
addi r11, r11, 16
lvx v25, r11, r3
addi r11, r11, 16
lvx v26, r11, r3
addi r11, r11, 16
lvx v27, r11, r3
addi r11, r11, 16
lvx v28, r11, r3
addi r11, r11, 16
lvx v29, r11, r3
addi r11, r11, 16
lvx v30, r11, r3
addi r11, r11, 16
lvx v31, r11, r3
li r11, 0
addi SP, SP, STACKSIZE addi SP, SP, STACKSIZE
addi SP, SP, STACKSIZE addi SP, SP, STACKSIZE
addi SP, SP, STACKSIZE addi SP, SP, STACKSIZE
addi SP, SP, STACKSIZE addi SP, SP, STACKSIZE
blr blr
EPILOGUE EPILOGUE

View File

@ -110,8 +110,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "sgemm_tcopy_macros_16_power8.S" #include "sgemm_tcopy_macros_16_power8.S"
#define STACKSIZE 384 #define STACKSIZE 576
PROLOGUE PROLOGUE
PROFCODE PROFCODE
@ -137,6 +136,31 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
std r16, 264(SP) std r16, 264(SP)
std r15, 272(SP) std r15, 272(SP)
std r14, 280(SP) std r14, 280(SP)
addi r11 ,SP, 288
stvx v20, r11, r0
addi r11, r11, 16
stvx v21, r11, r0
addi r11, r11, 16
stvx v22, r11, r0
addi r11, r11, 16
stvx v23, r11, r0
addi r11, r11, 16
stvx v24, r11, r0
addi r11, r11, 16
stvx v25, r11, r0
addi r11, r11, 16
stvx v26, r11, r0
addi r11, r11, 16
stvx v27, r11, r0
addi r11, r11, 16
stvx v28, r11, r0
addi r11, r11, 16
stvx v29, r11, r0
addi r11, r11, 16
stvx v30, r11, r0
addi r11, r11, 16
stvx v31, r11, r0
li r11, 0
cmpwi cr0, M, 0 cmpwi cr0, M, 0
ble- L999 ble- L999
@ -203,9 +227,33 @@ L999:
ld r16, 264(SP) ld r16, 264(SP)
ld r15, 272(SP) ld r15, 272(SP)
ld r14, 280(SP) ld r14, 280(SP)
addi r11, SP, 288
lvx v20, r11, r3
addi r11, r11, 16
lvx v21, r11, r3
addi r11, r11, 16
lvx v22, r11, r3
addi r11, r11, 16
lvx v23, r11, r3
addi r11, r11, 16
lvx v24, r11, r3
addi r11, r11, 16
lvx v25, r11, r3
addi r11, r11, 16
lvx v26, r11, r3
addi r11, r11, 16
lvx v27, r11, r3
addi r11, r11, 16
lvx v28, r11, r3
addi r11, r11, 16
lvx v29, r11, r3
addi r11, r11, 16
lvx v30, r11, r3
addi r11, r11, 16
lvx v31, r11, r3
li r11, 0
addi SP, SP, STACKSIZE addi SP, SP, STACKSIZE
blr blr
EPILOGUE EPILOGUE

View File

@ -110,8 +110,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "sgemm_tcopy_macros_8_power8.S" #include "sgemm_tcopy_macros_8_power8.S"
#define STACKSIZE 384 #define STACKSIZE 576
PROLOGUE PROLOGUE
PROFCODE PROFCODE
@ -137,6 +136,31 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
std r16, 264(SP) std r16, 264(SP)
std r15, 272(SP) std r15, 272(SP)
std r14, 280(SP) std r14, 280(SP)
addi r11, SP, 288
stvx v20, r11, r0
addi r11, r11, 16
stvx v21, r11, r0
addi r11, r11, 16
stvx v22, r11, r0
addi r11, r11, 16
stvx v23, r11, r0
addi r11, r11, 16
stvx v24, r11, r0
addi r11, r11, 16
stvx v25, r11, r0
addi r11, r11, 16
stvx v26, r11, r0
addi r11, r11, 16
stvx v27, r11, r0
addi r11, r11, 16
stvx v28, r11, r0
addi r11, r11, 16
stvx v29, r11, r0
addi r11, r11, 16
stvx v30, r11, r0
addi r11, r11, 16
stvx v31, r11, r0
li r11, 0
cmpwi cr0, M, 0 cmpwi cr0, M, 0
ble- L999 ble- L999
@ -198,9 +222,33 @@ L999:
ld r16, 264(SP) ld r16, 264(SP)
ld r15, 272(SP) ld r15, 272(SP)
ld r14, 280(SP) ld r14, 280(SP)
addi r11,SP,288
lvx v20, r11, r3
addi r11, r11, 16
lvx v21, r11, r3
addi r11, r11, 16
lvx v22, r11, r3
addi r11, r11, 16
lvx v23, r11, r3
addi r11, r11, 16
lvx v24, r11, r3
addi r11, r11, 16
lvx v25, r11, r3
addi r11, r11, 16
lvx v26, r11, r3
addi r11, r11, 16
lvx v27, r11, r3
addi r11, r11, 16
lvx v28, r11, r3
addi r11, r11, 16
lvx v29, r11, r3
addi r11, r11, 16
lvx v30, r11, r3
addi r11, r11, 16
lvx v31, r11, r3
li r11, 0
addi SP, SP, STACKSIZE addi SP, SP, STACKSIZE
blr blr
EPILOGUE EPILOGUE

View File

@ -83,8 +83,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#ifdef __64BIT__ #ifdef __64BIT__
#define STACKSIZE 340 #define STACKSIZE 340
#define ALPHA_SP 296(SP) #define STACKSIZE 540
#define FZERO 304(SP) #define ALPHA_SP 296+200(SP)
#define FZERO 304+200(SP)
#else #else
#define STACKSIZE 240 #define STACKSIZE 240
#define ALPHA_SP 224(SP) #define ALPHA_SP 224(SP)
@ -132,6 +133,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define o0 0 #define o0 0
#define VECSAVE r11
#define TBUFFER r13 #define TBUFFER r13
#define o12 r14 #define o12 r14
#define o4 r15 #define o4 r15
@ -207,6 +210,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
std r15, 272(SP) std r15, 272(SP)
std r14, 280(SP) std r14, 280(SP)
std r13, 288(SP) std r13, 288(SP)
addi r11, SP, 304
#else #else
stw r31, 144(SP) stw r31, 144(SP)
stw r30, 148(SP) stw r30, 148(SP)
@ -227,7 +231,33 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
stw r15, 208(SP) stw r15, 208(SP)
stw r14, 212(SP) stw r14, 212(SP)
stw r13, 216(SP) stw r13, 216(SP)
addi r11, SP, 224
#endif #endif
stvx v20, r11, r0
addi r11, r11, 16
stvx v21, r11, r0
addi r11, r11, 16
stvx v22, r11, r0
addi r11, r11, 16
stvx v23, r11, r0
addi r11, r11, 16
stvx v24, r11, r0
addi r11, r11, 16
stvx v25, r11, r0
addi r11, r11, 16
stvx v26, r11, r0
addi r11, r11, 16
stvx v27, r11, r0
addi r11, r11, 16
stvx v28, r11, r0
addi r11, r11, 16
stvx v29, r11, r0
addi r11, r11, 16
stvx v30, r11, r0
addi r11, r11, 16
stvx v31, r11, r0
li r11, 0
// stfd f1, ALPHA_SP // stfd f1, ALPHA_SP
// stw r0, FZERO // stw r0, FZERO
@ -278,9 +308,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
li o16, 16 li o16, 16
li o32, 32 li o32, 32
li o48, 48 li o48, 48
addi TBUFFER, SP, 320 addi TBUFFER, SP, 320+200
addi T1, SP, 300 addi T1, SP, 300+200
stxsspx f1, o0 , T1 stxsspx f1, o0 , T1
stxsspx f1, o4 , T1 stxsspx f1, o4 , T1
stxsspx f1, o8 , T1 stxsspx f1, o8 , T1
@ -339,6 +369,7 @@ L999:
ld r15, 272(SP) ld r15, 272(SP)
ld r14, 280(SP) ld r14, 280(SP)
ld r13, 288(SP) ld r13, 288(SP)
addi r11, SP, 304
#else #else
lwz r31, 144(SP) lwz r31, 144(SP)
lwz r30, 148(SP) lwz r30, 148(SP)
@ -359,10 +390,34 @@ L999:
lwz r15, 208(SP) lwz r15, 208(SP)
lwz r14, 212(SP) lwz r14, 212(SP)
lwz r13, 216(SP) lwz r13, 216(SP)
addi r11, SP, 224
#endif #endif
lvx v20, r11, r3
addi r11, r11, 16
lvx v21, r11, r3
addi r11, r11, 16
lvx v22, r11, r3
addi r11, r11, 16
lvx v23, r11, r3
addi r11, r11, 16
lvx v24, r11, r3
addi r11, r11, 16
lvx v25, r11, r3
addi r11, r11, 16
lvx v26, r11, r3
addi r11, r11, 16
lvx v27, r11, r3
addi r11, r11, 16
lvx v28, r11, r3
addi r11, r11, 16
lvx v29, r11, r3
addi r11, r11, 16
lvx v30, r11, r3
addi r11, r11, 16
lvx v31, r11, r3
li r11, 0
addi SP, SP, STACKSIZE addi SP, SP, STACKSIZE
blr blr
EPILOGUE EPILOGUE

View File

@ -117,15 +117,15 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#endif #endif
#ifdef __64BIT__ #ifdef __64BIT__
#define STACKSIZE 32000 #define STACKSIZE 32192
#define ALPHA_R_SP 296(SP) #define ALPHA_R_SP 296+192(SP)
#define ALPHA_I_SP 304(SP) #define ALPHA_I_SP 304+192(SP)
#define FZERO 312(SP) #define FZERO 312+192(SP)
#else #else
#define STACKSIZE 256 #define STACKSIZE 460
#define ALPHA_R_SP 224(SP) #define ALPHA_R_SP 224+204(SP)
#define ALPHA_I_SP 232(SP) #define ALPHA_I_SP 232+204(SP)
#define FZERO 240(SP) #define FZERO 240+204(SP)
#endif #endif
#define M r3 #define M r3
@ -168,6 +168,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define alpha_r vs30 #define alpha_r vs30
#define alpha_i vs31 #define alpha_i vs31
#define VECSAVE r11
#define FRAMEPOINTER r12 #define FRAMEPOINTER r12
@ -245,6 +246,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
std r16, 264(SP) std r16, 264(SP)
std r15, 272(SP) std r15, 272(SP)
std r14, 280(SP) std r14, 280(SP)
addi r11, SP, 288
#else #else
stw r31, 144(SP) stw r31, 144(SP)
stw r30, 148(SP) stw r30, 148(SP)
@ -263,7 +265,32 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
stw r17, 200(SP) stw r17, 200(SP)
stw r16, 204(SP) stw r16, 204(SP)
stw r15, 208(SP) stw r15, 208(SP)
addi r11, SP, 224
#endif #endif
stvx v20, r11, r0
addi r11, r11, 16
stvx v21, r11, r0
addi r11, r11, 16
stvx v22, r11, r0
addi r11, r11, 16
stvx v23, r11, r0
addi r11, r11, 16
stvx v24, r11, r0
addi r11, r11, 16
stvx v25, r11, r0
addi r11, r11, 16
stvx v26, r11, r0
addi r11, r11, 16
stvx v27, r11, r0
addi r11, r11, 16
stvx v28, r11, r0
addi r11, r11, 16
stvx v29, r11, r0
addi r11, r11, 16
stvx v30, r11, r0
addi r11, r11, 16
stvx v31, r11, r0
li r11,0
stfd f1, ALPHA_R_SP stfd f1, ALPHA_R_SP
stfd f2, ALPHA_I_SP stfd f2, ALPHA_I_SP
@ -332,9 +359,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
and BBUFFER, BBUFFER, T1 and BBUFFER, BBUFFER, T1
#ifdef __64BIT__ #ifdef __64BIT__
addi ALPHA, SP, 296 addi ALPHA, SP, 296+192
#else #else
addi ALPHA, SP, 224 addi ALPHA, SP, 224+192+12
#endif #endif
lxsdx alpha_r, 0, ALPHA lxsdx alpha_r, 0, ALPHA
@ -389,6 +416,7 @@ L999:
ld r16, 264(SP) ld r16, 264(SP)
ld r15, 272(SP) ld r15, 272(SP)
ld r14, 280(SP) ld r14, 280(SP)
addi r11, SP, 288
#else #else
lwz r31, 144(SP) lwz r31, 144(SP)
lwz r30, 148(SP) lwz r30, 148(SP)
@ -407,13 +435,37 @@ L999:
lwz r17, 200(SP) lwz r17, 200(SP)
lwz r16, 204(SP) lwz r16, 204(SP)
lwz r15, 208(SP) lwz r15, 208(SP)
addi r11, SP, 224
#endif #endif
lvx v20, r11, r3
addi r11, r11, 16
lvx v21, r11, r3
addi r11, r11, 16
lvx v22, r11, r3
addi r11, r11, 16
lvx v23, r11, r3
addi r11, r11, 16
lvx v24, r11, r3
addi r11, r11, 16
lvx v25, r11, r3
addi r11, r11, 16
lvx v26, r11, r3
addi r11, r11, 16
lvx v27, r11, r3
addi r11, r11, 16
lvx v28, r11, r3
addi r11, r11, 16
lvx v29, r11, r3
addi r11, r11, 16
lvx v30, r11, r3
addi r11, r11, 16
lvx v31, r11, r3
li r11, 0
addi SP, SP, STACKSIZE addi SP, SP, STACKSIZE
addi SP, SP, STACKSIZE addi SP, SP, STACKSIZE
addi SP, SP, STACKSIZE addi SP, SP, STACKSIZE
addi SP, SP, STACKSIZE addi SP, SP, STACKSIZE
blr blr
EPILOGUE EPILOGUE

View File

@ -110,6 +110,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "zgemm_tcopy_macros_8_power8.S" #include "zgemm_tcopy_macros_8_power8.S"
#define STACKSIZE 384 #define STACKSIZE 384
#define STACKSIZE 576
PROLOGUE PROLOGUE
@ -136,6 +137,31 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
std r16, 264(SP) std r16, 264(SP)
std r15, 272(SP) std r15, 272(SP)
std r14, 280(SP) std r14, 280(SP)
addi r11, SP ,288
stvx v20, r11, r0
addi r11, r11, 16
stvx v21, r11, r0
addi r11, r11, 16
stvx v22, r11, r0
addi r11, r11, 16
stvx v23, r11, r0
addi r11, r11, 16
stvx v24, r11, r0
addi r11, r11, 16
stvx v25, r11, r0
addi r11, r11, 16
stvx v26, r11, r0
addi r11, r11, 16
stvx v27, r11, r0
addi r11, r11, 16
stvx v28, r11, r0
addi r11, r11, 16
stvx v29, r11, r0
addi r11, r11, 16
stvx v30, r11, r0
addi r11, r11 ,16
stvx v31, r11, r0
li r11,0
cmpwi cr0, M, 0 cmpwi cr0, M, 0
ble- L999 ble- L999
@ -196,9 +222,33 @@ L999:
ld r16, 264(SP) ld r16, 264(SP)
ld r15, 272(SP) ld r15, 272(SP)
ld r14, 280(SP) ld r14, 280(SP)
addi r11, SP, 288
lvx v20, r11,r3
addi r11, r11, 16
lvx v21, r11, r3
addi r11, r11, 16
lvx v22, r11, r3
addi r11, r11, 16
lvx v23, r11, r3
addi r11, r11, 16
lvx v24, r11, r3
addi r11, r11, 16
lvx v25, r11, r3
addi r11, r11, 16
lvx v26, r11, r3
addi r11, r11, 16
lvx v27, r11, r3
addi r11, r11, 16
lvx v28, r11, r3
addi r11, r11, 16
lvx v29, r11, r3
addi r11, r11, 16
lvx v30, r11, r3
addi r11, r11, 16
lvx v31, r11, r3
li r11,0
addi SP, SP, STACKSIZE addi SP, SP, STACKSIZE
blr blr
EPILOGUE EPILOGUE

View File

@ -1,3 +1,4 @@
/*************************************************************************** /***************************************************************************
Copyright (c) 2013-2016, The OpenBLAS Project Copyright (c) 2013-2016, The OpenBLAS Project
All rights reserved. All rights reserved.
@ -82,15 +83,15 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#endif #endif
#ifdef __64BIT__ #ifdef __64BIT__
#define STACKSIZE 320 #define STACKSIZE 520
#define ALPHA_R_SP 296(SP) #define ALPHA_R_SP 296+200(SP)
#define ALPHA_I_SP 304(SP) #define ALPHA_I_SP 304+200(SP)
#define FZERO 312(SP) #define FZERO 312+200(SP)
#else #else
#define STACKSIZE 256 #define STACKSIZE 452
#define ALPHA_R_SP 224(SP) #define ALPHA_R_SP 224+196(SP)
#define ALPHA_I_SP 232(SP) #define ALPHA_I_SP 232+196(SP)
#define FZERO 240(SP) #define FZERO 240+196(SP)
#endif #endif
#define M r3 #define M r3
@ -133,6 +134,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define alpha_r vs30 #define alpha_r vs30
#define alpha_i vs31 #define alpha_i vs31
#define VECSAVE r11
#define KKK r13 #define KKK r13
#define K1 r14 #define K1 r14
#define L r15 #define L r15
@ -204,6 +207,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
std r15, 272(SP) std r15, 272(SP)
std r14, 280(SP) std r14, 280(SP)
std r13, 288(SP) std r13, 288(SP)
addi r11, SP, 304
#else #else
stw r31, 144(SP) stw r31, 144(SP)
stw r30, 148(SP) stw r30, 148(SP)
@ -224,7 +228,32 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
stw r15, 208(SP) stw r15, 208(SP)
stw r14, 212(SP) stw r14, 212(SP)
stw r13, 216(SP) stw r13, 216(SP)
addi r11, SP, 224
#endif #endif
stvx v20, r11, r0
addi r11, r11, 16
stvx v21, r11, r0
addi r11, r11, 16
stvx v22, r11, r0
addi r11, r11, 16
stvx v23, r11, r0
addi r11, r11, 16
stvx v24, r11, r0
addi r11, r11, 16
stvx v25, r11, r0
addi r11, r11, 16
stvx v26, r11, r0
addi r11, r11, 16
stvx v27, r11, r0
addi r11, r11, 16
stvx v28, r11, r0
addi r11, r11, 16
stvx v29, r11, r0
addi r11, r11, 16
stvx v30, r11, r0
addi r11, r11, 16
stvx v31, r11, r0
li r11, 0
stfd f1, ALPHA_R_SP stfd f1, ALPHA_R_SP
stfd f2, ALPHA_I_SP stfd f2, ALPHA_I_SP
@ -289,9 +318,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
li o48 , 48 li o48 , 48
#ifdef __64BIT__ #ifdef __64BIT__
addi ALPHA, SP, 296 addi ALPHA, SP, 296+200
#else #else
addi ALPHA, SP, 224 addi ALPHA, SP, 224+196
#endif #endif
lxsdx alpha_r, 0, ALPHA lxsdx alpha_r, 0, ALPHA
@ -347,6 +376,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
ld r15, 272(SP) ld r15, 272(SP)
ld r14, 280(SP) ld r14, 280(SP)
ld r13, 288(SP) ld r13, 288(SP)
addi r11, SP, 304
#else #else
lwz r31, 144(SP) lwz r31, 144(SP)
lwz r30, 148(SP) lwz r30, 148(SP)
@ -367,10 +397,34 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
lwz r15, 208(SP) lwz r15, 208(SP)
lwz r14, 212(SP) lwz r14, 212(SP)
lwz r13, 216(SP) lwz r13, 216(SP)
addi r11, SP, 224
#endif #endif
lvx v20, r11, r3
addi r11, r11, 16
lvx v21, r11, r3
addi r11, r11, 16
lvx v22, r11, r3
addi r11, r11, 16
lvx v23, r11, r3
addi r11, r11, 16
lvx v24, r11, r3
addi r11, r11, 16
lvx v25, r11, r3
addi r11, r11, 16
lvx v26, r11, r3
addi r11, r11, 16
lvx v27, r11, r3
addi r11, r11, 16
lvx v28, r11, r3
addi r11, r11, 16
lvx v29, r11, r3
addi r11, r11, 16
lvx v30, r11, r3
addi r11, r11, 16
lvx v31, r11, r3
li r11, 0
addi SP, SP, STACKSIZE addi SP, SP, STACKSIZE
blr blr
EPILOGUE EPILOGUE