From 9c017a221827496dcfecbfcebd2bca74fd46e93a Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Thu, 28 Sep 2017 12:17:09 +0200 Subject: [PATCH] Save and restore VSX registers --- kernel/power/cgemm_kernel_8x4_power8.S | 77 ++++++++++++++++++---- kernel/power/cgemm_tcopy_8_power8.S | 54 ++++++++++++++- kernel/power/ctrmm_kernel_8x4_power8.S | 71 +++++++++++++++++--- kernel/power/dgemm_kernel_16x4_power8.S | 69 ++++++++++++++++--- kernel/power/dgemm_ncopy_4_power8.S | 63 ++++++++++++++++-- kernel/power/dgemm_tcopy_16_power8.S | 58 +++++++++++++++- kernel/power/dtrmm_kernel_16x4_power8.S | 72 +++++++++++++++++--- kernel/power/dtrsm_kernel_LT_16x4_power8.S | 61 ++++++++++++++++- kernel/power/sgemm_kernel_16x8_power8.S | 69 +++++++++++++++++-- kernel/power/sgemm_tcopy_16_power8.S | 54 ++++++++++++++- kernel/power/sgemm_tcopy_8_power8.S | 54 ++++++++++++++- kernel/power/strmm_kernel_16x8_power8.S | 69 +++++++++++++++++-- kernel/power/zgemm_kernel_8x2_power8.S | 74 +++++++++++++++++---- kernel/power/zgemm_tcopy_8_power8.S | 52 ++++++++++++++- kernel/power/ztrmm_kernel_8x2_power8.S | 76 +++++++++++++++++---- 15 files changed, 884 insertions(+), 89 deletions(-) diff --git a/kernel/power/cgemm_kernel_8x4_power8.S b/kernel/power/cgemm_kernel_8x4_power8.S index 0c462ce8e..8dbb6011d 100644 --- a/kernel/power/cgemm_kernel_8x4_power8.S +++ b/kernel/power/cgemm_kernel_8x4_power8.S @@ -82,15 +82,15 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #endif #ifdef __64BIT__ -#define STACKSIZE 32000 -#define ALPHA_R_SP 296(SP) -#define ALPHA_I_SP 304(SP) -#define FZERO 312(SP) +#define STACKSIZE 32196 +#define ALPHA_R_SP 296+196(SP) +#define ALPHA_I_SP 304+196(SP) +#define FZERO 312+196(SP) #else -#define STACKSIZE 256 -#define ALPHA_R_SP 224(SP) -#define ALPHA_I_SP 232(SP) -#define FZERO 240(SP) +#define STACKSIZE 456 +#define ALPHA_R_SP 224+200(SP) +#define ALPHA_I_SP 232+200(SP) +#define FZERO 240+200(SP) #endif #define M r3 @@ -138,6 +138,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define FRAMEPOINTER r12 +#define VECSAVE r11 + #define BBUFFER r14 #define L r15 #define o12 r16 @@ -167,6 +169,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi SP, SP, -STACKSIZE addi SP, SP, -STACKSIZE addi SP, SP, -STACKSIZE + li r0, 0 stfd f14, 0(SP) @@ -211,6 +214,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. std r16, 264(SP) std r15, 272(SP) std r14, 280(SP) + addi r11, SP, 288 #else stw r31, 144(SP) stw r30, 148(SP) @@ -230,7 +234,32 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. stw r16, 204(SP) stw r15, 208(SP) stw r14, 212(SP) + addi r11, SP, 224 #endif + stvx v20, r11, r0 + addi r11, r11, 16 + stvx v21, r11, r0 + addi r11, r11, 16 + stvx v22, r11, r0 + addi r11, r11, 16 + stvx v23, r11, r0 + addi r11, r11, 16 + stvx v24, r11, r0 + addi r11, r11, 16 + stvx v25, r11, r0 + addi r11, r11, 16 + stvx v26, r11, r0 + addi r11, r11, 16 + stvx v27, r11, r0 + addi r11, r11, 16 + stvx v28, r11, r0 + addi r11, r11, 16 + stvx v29, r11, r0 + addi r11, r11, 16 + stvx v30, r11, r0 + addi r11, r11, 16 + stvx v31, r11, r0 + li r11, 0 stfs f1, ALPHA_R_SP stfs f2, ALPHA_I_SP @@ -301,9 +330,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #ifdef __64BIT__ - addi T1 , SP, 296 + addi T1 , SP, 296+196 #else - addi T1 , SP, 224 + addi T1 , SP, 224+200 #endif stxsspx vs1, 0, T1 @@ -375,6 +404,7 @@ L999: ld r16, 264(SP) ld r15, 272(SP) ld r14, 280(SP) + addi r11, SP, 288 #else lwz r31, 144(SP) lwz r30, 148(SP) @@ -394,7 +424,32 @@ L999: lwz r16, 204(SP) lwz r15, 208(SP) lwz r14, 212(SP) + addi r11, 224 #endif + lvx v20, r11, r0 + addi r11, r11, 16 + lvx v21, r11, r0 + addi r11, r11, 16 + lvx v22, r11, r0 + addi r11, r11, 16 + lvx v23, r11, r0 + addi r11, r11, 16 + lvx v24, r11, r0 + addi r11, r11, 16 + lvx v25, r11, r0 + addi r11, r11, 16 + lvx v26, r11, r0 + addi r11, r11, 16 + lvx v27, r11, r0 + addi r11, r11, 16 + lvx v28, r11, r0 + addi r11, r11, 16 + lvx v29, r11, r0 + addi r11, r11, 16 + lvx v30, r11, r0 + addi r11, r11, 16 + lvx v31, r11, r0 + li r11, 0 addi SP, SP, STACKSIZE addi SP, SP, STACKSIZE @@ -404,4 +459,4 @@ L999: blr EPILOGUE -#endif +#endif^ diff --git a/kernel/power/cgemm_tcopy_8_power8.S b/kernel/power/cgemm_tcopy_8_power8.S index b1a7d2b27..66a50584c 100644 --- a/kernel/power/cgemm_tcopy_8_power8.S +++ b/kernel/power/cgemm_tcopy_8_power8.S @@ -88,6 +88,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define J r12 + #define PREA r14 #define PREB r15 #define BO r16 @@ -109,7 +110,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "cgemm_tcopy_macros_8_power8.S" -#define STACKSIZE 384 +#define STACKSIZE 576 PROLOGUE @@ -136,6 +137,31 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. std r16, 264(SP) std r15, 272(SP) std r14, 280(SP) + addi r11, SP, 288 + stvx v20, r11, r0 + addi r11, r11, 16 + stvx v21, r11, r0 + addi r11, r11, 16 + stvx v22, r11, r0 + addi r11, r11, 16 + stvx v23, r11, r0 + addi r11, r11, 16 + stvx v24, r11, r0 + addi r11, r11, 16 + stvx v25, r11, r0 + addi r11, r11, 16 + stvx v26, r11, r0 + addi r11, r11, 16 + stvx v27, r11, r0 + addi r11, r11, 16 + stvx v28, r11, r0 + addi r11, r11, 16 + stvx v29, r11, r0 + addi r11, r11, 16 + stvx v30, r11, r0 + addi r11, r11, 16 + stvx v31, r11, r0 + li r11, 0 cmpwi cr0, M, 0 ble- L999 @@ -197,9 +223,33 @@ L999: ld r16, 264(SP) ld r15, 272(SP) ld r14, 280(SP) + addi r11, SP, 288 + lvx v20, r11, r3 + addi r11, r11, 16 + lvx v21, r11, r3 + addi r11, r11, 16 + lvx v22, r11, r3 + addi r11, r11, 16 + lvx v23, r11, r3 + addi r11, r11, 16 + lvx v24, r11, r3 + addi r11, r11, 16 + lvx v25, r11, r3 + addi r11, r11, 16 + lvx v26, r11, r3 + addi r11, r11, 16 + lvx v27, r11, r3 + addi r11, r11, 16 + lvx v28, r11, r3 + addi r11, r11, 16 + lvx v29, r11, r3 + addi r11, r11, 16 + lvx v30, r11, r3 + addi r11, r11, 16 + lvx v31, r11, r3 + li r11, 0 addi SP, SP, STACKSIZE - blr EPILOGUE diff --git a/kernel/power/ctrmm_kernel_8x4_power8.S b/kernel/power/ctrmm_kernel_8x4_power8.S index 460a387fb..26f49c663 100644 --- a/kernel/power/ctrmm_kernel_8x4_power8.S +++ b/kernel/power/ctrmm_kernel_8x4_power8.S @@ -83,13 +83,15 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #ifdef __64BIT__ #define STACKSIZE 400 -#define ALPHA_R_SP 304(SP) -#define ALPHA_I_SP 312(SP) +#define STACKSIZE 592 +#define ALPHA_R_SP 304+192(SP) +#define ALPHA_I_SP 312+192(SP) #else #define STACKSIZE 256 -#define ALPHA_R_SP 224(SP) -#define ALPHA_I_SP 232(SP) -#define FZERO 240(SP) +#define STACKSIZE 452 +#define ALPHA_R_SP 224+196(SP) +#define ALPHA_I_SP 232+196(SP) +#define FZERO 240+196(SP) #endif #define M r3 @@ -135,6 +137,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define alpha_sr vs30 #define alpha_si vs31 +#define VECSAVE r11 + #define o12 r12 #define KKK r13 #define K1 r14 @@ -208,6 +212,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. std r14, 280(SP) std r13, 288(SP) std r12, 296(SP) + addi r11, SP, 304 #else stw r31, 144(SP) stw r30, 148(SP) @@ -228,7 +233,32 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. stw r15, 208(SP) stw r14, 212(SP) stw r13, 216(SP) + addi r11, SP, 224 #endif + stvx v20, r11, r0 + addi r11, r11, 16 + stvx v21, r11, r0 + addi r11, r11, 16 + stvx v22, r11, r0 + addi r11, r11, 16 + stvx v23, r11, r0 + addi r11, r11, 16 + stvx v24, r11, r0 + addi r11, r11, 16 + stvx v25, r11, r0 + addi r11, r11, 16 + stvx v26, r11, r0 + addi r11, r11, 16 + stvx v27, r11, r0 + addi r11, r11, 16 + stvx v28, r11, r0 + addi r11, r11, 16 + stvx v29, r11, r0 + addi r11, r11, 16 + stvx v30, r11, r0 + addi r11, r11, 16 + stvx v31, r11, r0 + li r11, 0 stfs f1, ALPHA_R_SP stfs f2, ALPHA_I_SP @@ -295,9 +325,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #ifdef __64BIT__ - addi T1, SP, 304 + addi T1, SP, 304+192 #else - addi T1, SP, 224 + addi T1, SP, 224+196 #endif lxsspx alpha_dr, 0, T1 @@ -369,6 +399,7 @@ L999: ld r14, 280(SP) ld r13, 288(SP) ld r12, 296(SP) + addi r11, SP, 304 #else lwz r31, 144(SP) lwz r30, 148(SP) @@ -389,10 +420,34 @@ L999: lwz r15, 208(SP) lwz r14, 212(SP) lwz r13, 216(SP) + addi r11, SP, 224 #endif + lvx v20, r11, r3 + addi r11, r11, 16 + lvx v21, r11, r3 + addi r11, r11, 16 + lvx v22, r11, r3 + addi r11, r11, 16 + lvx v23, r11, r3 + addi r11, r11, 16 + lvx v24, r11, r3 + addi r11, r11, 16 + lvx v25, r11, r3 + addi r11, r11, 16 + lvx v26, r11, r3 + addi r11, r11, 16 + lvx v27, r11, r3 + addi r11, r11, 16 + lvx v28, r11, r3 + addi r11, r11, 16 + lvx v29, r11, r3 + addi r11, r11, 16 + lvx v30, r11, r3 + addi r11, r11, 16 + lvx v31, r11, r3 + li r11, 0 addi SP, SP, STACKSIZE - blr EPILOGUE diff --git a/kernel/power/dgemm_kernel_16x4_power8.S b/kernel/power/dgemm_kernel_16x4_power8.S index 8af7fe389..41958eab0 100644 --- a/kernel/power/dgemm_kernel_16x4_power8.S +++ b/kernel/power/dgemm_kernel_16x4_power8.S @@ -83,12 +83,14 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #ifdef __64BIT__ #define STACKSIZE 320 -#define ALPHA_SP 296(SP) -#define FZERO 304(SP) +#define STACKSIZE 512 +#define ALPHA_SP 296+192(SP) +#define FZERO 304+192(SP) #else #define STACKSIZE 240 -#define ALPHA_SP 224(SP) -#define FZERO 232(SP) +#define STACKSIZE 440 +#define ALPHA_SP 224+200(SP) +#define FZERO 232+200(SP) #endif #define M r3 @@ -210,6 +212,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. std r16, 264(SP) std r15, 272(SP) std r14, 280(SP) + addi r11,SP,288 #else stw r31, 144(SP) stw r30, 148(SP) @@ -229,7 +232,32 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. stw r16, 204(SP) stw r15, 208(SP) stw r14, 212(SP) + addi r11,SP,224 #endif + stvx v20, r11,r0 +addi r11,r11,16 + stvx v21, r11,r0 +addi r11,r11,16 + stvx v22, r11,r0 +addi r11,r11,16 + stvx v23, r11,r0 +addi r11,r11,16 + stvx v24, r11,r0 +addi r11,r11,16 + stvx v25, r11,r0 +addi r11,r11,16 + stvx v26, r11,r0 +addi r11,r11,16 + stvx v27, r11,r0 +addi r11,r11,16 + stvx v28, r11,r0 +addi r11,r11,16 + stvx v29, r11,r0 +addi r11,r11,16 + stvx v30, r11,r0 +addi r11,r11,16 + stvx v31, r11,r0 +li r11,0 stfd f1, ALPHA_SP stw r0, FZERO @@ -269,12 +297,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ble .L999_H1 #ifdef __64BIT__ - addi T1, SP, 296 + addi T1, SP, 296+192 #else - addi T1, SP, 224 + addi T1, SP, 224+200 #endif - li PRE, 384 + li PRE, 384 li o8 , 8 li o16, 16 li o24, 24 @@ -334,6 +362,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ld r16, 264(SP) ld r15, 272(SP) ld r14, 280(SP) + addi r11,SP,288 #else lwz r31, 144(SP) lwz r30, 148(SP) @@ -353,10 +382,34 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. lwz r16, 204(SP) lwz r15, 208(SP) lwz r14, 212(SP) + addi r11,SP,224 #endif + lvx v20, r11,r3 +addi r11,r11,16 + lvx v21, r11,r3 +addi r11,r11,16 + lvx v22, r11,r3 +addi r11,r11,16 + lvx v23, r11,r3 +addi r11,r11,16 + lvx v24, r11,r3 +addi r11,r11,16 + lvx v25, r11,r3 +addi r11,r11,16 + lvx v26, r11,r3 +addi r11,r11,16 + lvx v27, r11,r3 +addi r11,r11,16 + lvx v28, r11,r3 +addi r11,r11,16 + lvx v29, r11,r3 +addi r11,r11,16 + lvx v30, r11,r3 +addi r11,r11,16 + lvx v31, r11,r3 +li r11,0 addi SP, SP, STACKSIZE - blr EPILOGUE diff --git a/kernel/power/dgemm_ncopy_4_power8.S b/kernel/power/dgemm_ncopy_4_power8.S index 31966047f..e0936574d 100644 --- a/kernel/power/dgemm_ncopy_4_power8.S +++ b/kernel/power/dgemm_ncopy_4_power8.S @@ -110,12 +110,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "dgemm_ncopy_macros_4_power8.S" #define STACKSIZE 384 - +#define STACKSIZE 576 PROLOGUE PROFCODE addi SP, SP, -STACKSIZE +//addi SP, SP, -208 li r0, 0 stfd f14, 0(SP) @@ -157,6 +158,32 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. std r15, 272(SP) std r14, 280(SP) +addi r11,SP,288 + stvx v20, r11,r0 +addi r11,r11,16 + stvx v21, r11,r0 +addi r11,r11,16 + stvx v22, r11,r0 +addi r11,r11,16 + stvx v23, r11,r0 +addi r11,r11,16 + stvx v24, r11,r0 +addi r11,r11,16 + stvx v25, r11,r0 +addi r11,r11,16 + stvx v26, r11,r0 +addi r11,r11,16 + stvx v27, r11,r0 +addi r11,r11,16 + stvx v28, r11,r0 +addi r11,r11,16 + stvx v29, r11,r0 +addi r11,r11,16 + stvx v30, r11,r0 +addi r11,r11,16 + stvx v31, r11,r0 +li r11,0 + cmpwi cr0, M, 0 ble- L999 cmpwi cr0, N, 0 @@ -164,8 +191,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. slwi LDA, LDA, BASE_SHIFT - li PREA, 384 - li PREB, 384 + //li PREA, 384 + //li PREB, 384 + li PREA, 576 + li PREB, 576 + li o8, 8 li o16, 16 @@ -219,9 +249,34 @@ L999: ld r16, 264(SP) ld r15, 272(SP) ld r14, 280(SP) +addi r11,SP,288 + lvx v20, r11,r3 +addi r11,r11,16 + lvx v21, r11,r3 +addi r11,r11,16 + lvx v22, r11,r3 +addi r11,r11,16 + lvx v23, r11,r3 +addi r11,r11,16 + lvx v24, r11,r3 +addi r11,r11,16 + lvx v25, r11,r3 +addi r11,r11,16 + lvx v26, r11,r3 +addi r11,r11,16 + lvx v27, r11,r3 +addi r11,r11,16 + lvx v28, r11,r3 +addi r11,r11,16 + lvx v29, r11,r3 +addi r11,r11,16 + lvx v30, r11,r3 +addi r11,r11,16 + lvx v31, r11,r3 +li r11,0 addi SP, SP, STACKSIZE - +//addi SP, SP, 208 blr EPILOGUE diff --git a/kernel/power/dgemm_tcopy_16_power8.S b/kernel/power/dgemm_tcopy_16_power8.S index eb37877e0..6da816220 100644 --- a/kernel/power/dgemm_tcopy_16_power8.S +++ b/kernel/power/dgemm_tcopy_16_power8.S @@ -110,12 +110,15 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "dgemm_tcopy_macros_16_power8.S" #define STACKSIZE 384 +#define STACKSIZE 576 PROLOGUE PROFCODE addi SP, SP, -STACKSIZE +//addi SP, SP, -208 + li r0, 0 std r31, 144(SP) @@ -136,6 +139,31 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. std r16, 264(SP) std r15, 272(SP) std r14, 280(SP) +addi r11,SP,288 + stvx v20, r11,r0 +addi r11,r11,16 + stvx v21, r11,r0 +addi r11,r11,16 + stvx v22, r11,r0 +addi r11,r11,16 + stvx v23, r11,r0 +addi r11,r11,16 + stvx v24, r11,r0 +addi r11,r11,16 + stvx v25, r11,r0 +addi r11,r11,16 + stvx v26, r11,r0 +addi r11,r11,16 + stvx v27, r11,r0 +addi r11,r11,16 + stvx v28, r11,r0 +addi r11,r11,16 + stvx v29, r11,r0 +addi r11,r11,16 + stvx v30, r11,r0 +addi r11,r11,16 + stvx v31, r11,r0 +li r11,0 cmpwi cr0, M, 0 ble- L999 @@ -170,7 +198,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. add B2, B2, B add B1, B1, B - li PREA, 384 + //li PREA, 384 + li PREA, 576 addi PREB, M16, 128 li o8, 8 @@ -202,9 +231,34 @@ L999: ld r16, 264(SP) ld r15, 272(SP) ld r14, 280(SP) +addi r11,SP,288 + lvx v20, r11,r3 +addi r11,r11,16 + lvx v21, r11,r3 +addi r11,r11,16 + lvx v22, r11,r3 +addi r11,r11,16 + lvx v23, r11,r3 +addi r11,r11,16 + lvx v24, r11,r3 +addi r11,r11,16 + lvx v25, r11,r3 +addi r11,r11,16 + lvx v26, r11,r3 +addi r11,r11,16 + lvx v27, r11,r3 +addi r11,r11,16 + lvx v28, r11,r3 +addi r11,r11,16 + lvx v29, r11,r3 +addi r11,r11,16 + lvx v30, r11,r3 +addi r11,r11,16 + lvx v31, r11,r3 +li r11,0 addi SP, SP, STACKSIZE - +//addi SP, SP, 208 blr EPILOGUE diff --git a/kernel/power/dtrmm_kernel_16x4_power8.S b/kernel/power/dtrmm_kernel_16x4_power8.S index e9dbd991e..47e703a3a 100644 --- a/kernel/power/dtrmm_kernel_16x4_power8.S +++ b/kernel/power/dtrmm_kernel_16x4_power8.S @@ -83,12 +83,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #ifdef __64BIT__ #define STACKSIZE 320 -#define ALPHA_SP 296(SP) -#define FZERO 304(SP) +#define STACKSIZE 520 +#define ALPHA_SP 296+200(SP) +#define FZERO 304+200(SP) #else -#define STACKSIZE 240 -#define ALPHA_SP 224(SP) -#define FZERO 232(SP) +#define STACKSIZE 436 +#define ALPHA_SP 224+196(SP) +#define FZERO 232+196(SP) #endif #define M r3 @@ -152,6 +153,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define PRE r30 #define T2 r31 +#define VECSAVE r11 + #include "dtrmm_macros_16x4_power8.S" @@ -206,6 +209,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. std r15, 272(SP) std r14, 280(SP) std r13, 288(SP) + addi r11, SP, 304 #else stw r31, 144(SP) stw r30, 148(SP) @@ -226,7 +230,34 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. stw r15, 208(SP) stw r14, 212(SP) stw r13, 216(SP) + addi r11, r0, 224 #endif + stvx v20, r11, r0 + addi r11, r11, 16 + stvx v21, r11, r0 + addi r11, r11, 16 + stvx v22, r11, r0 + addi r11, r11, 16 + stvx v23, r11, r0 + addi r11, r11, 16 + stvx v24, r11, r0 + addi r11, r11, 16 + stvx v25, r11, r0 + addi r11, r11, 16 + stvx v26, r11, r0 + addi r11 ,r11, 16 + stvx v27, r11, r0 + addi r11, r11, 16 + stvx v28, r11, r0 + addi r11, r11, 16 + stvx v29, r11, r0 + addi r11, r11, 16 + stvx v30, r11, r0 + addi r11, r11, 16 + stvx v31, r11, r0 + li r11,0 + + stw r31, 144(SP) stfd f1, ALPHA_SP stw r0, FZERO @@ -270,9 +301,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ble .L999_H1 #ifdef __64BIT__ - addi ALPHA, SP, 296 + addi ALPHA, SP, 296+200 #else - addi ALPHA, SP, 224 + addi ALPHA, SP, 224+196 #endif li PRE, 256 @@ -332,6 +363,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ld r15, 272(SP) ld r14, 280(SP) ld r13, 288(SP) + addi r11, SP, 304 #else lwz r31, 144(SP) lwz r30, 148(SP) @@ -352,10 +384,34 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. lwz r15, 208(SP) lwz r14, 212(SP) lwz r13, 216(SP) + addi r11, SP, 224 #endif + lvx v20, r11, r3 + addi r11, r11, 16 + lvx v21, r11, r3 + addi r11, r11, 16 + lvx v22, r11, r3 + addi r11, r11, 16 + lvx v23, r11, r3 + addi r11, r11, 16 + lvx v24, r11, r3 + addi r11, r11, 16 + lvx v25, r11, r3 + addi r11, r11, 16 + lvx v26, r11, r3 + addi r11, r11, 16 + lvx v27, r11, r3 + addi r11, r11, 16 + lvx v28, r11, r3 + addi r11, r11, 16 + lvx v29, r11, r3 + addi r11, r11, 16 + lvx v30, r11, r3 + addi r11, r11, 16 + lvx v31, r11, r3 + li r11, 0 addi SP, SP, STACKSIZE - blr EPILOGUE diff --git a/kernel/power/dtrsm_kernel_LT_16x4_power8.S b/kernel/power/dtrsm_kernel_LT_16x4_power8.S index fdfc5ac70..7a4a30390 100644 --- a/kernel/power/dtrsm_kernel_LT_16x4_power8.S +++ b/kernel/power/dtrsm_kernel_LT_16x4_power8.S @@ -48,8 +48,9 @@ #ifdef __64BIT__ #define STACKSIZE 320 -#define ALPHA 296(SP) -#define FZERO 304(SP) +#define STACKSIZE 520 +#define ALPHA 296+200(SP) +#define FZERO 304+200(SP) #else #define STACKSIZE 240 #define ALPHA 224(SP) @@ -112,6 +113,8 @@ #define o48 r30 #define T1 r31 +#define VECSAVE r11 + #include "dtrsm_macros_LT_16x4_power8.S" #ifndef NEEDPARAM @@ -163,6 +166,7 @@ std r17, 256(SP) std r16, 264(SP) std r15, 272(SP) + addi r11,SP,288 #else stw r31, 144(SP) stw r30, 148(SP) @@ -178,7 +182,32 @@ stw r20, 188(SP) stw r19, 192(SP) stw r18, 196(SP) + addi r11,SP,208 #endif + stvx v20, r11,r0 +addi r11,r11,16 + stvx v21, r11,r0 +addi r11,r11,16 + stvx v22, r11,r0 +addi r11,r11,16 + stvx v23, r11,r0 +addi r11,r11,16 + stvx v24, r11,r0 +addi r11,r11,16 + stvx v25, r11,r0 +addi r11,r11,16 + stvx v26, r11,r0 +addi r11,r11,16 + stvx v27, r11,r0 +addi r11,r11,16 + stvx v28, r11,r0 +addi r11,r11,16 + stvx v29, r11,r0 +addi r11,r11,16 + stvx v30, r11,r0 +addi r11,r11,16 + stvx v31, r11,r0 +li r11,0 #if defined(_AIX) || defined(__APPLE__) @@ -269,6 +298,7 @@ L999: ld r17, 256(SP) ld r16, 264(SP) ld r15, 272(SP) + addi r11,SP,288 #else lwz r31, 144(SP) lwz r30, 148(SP) @@ -284,10 +314,35 @@ L999: lwz r20, 188(SP) lwz r19, 192(SP) lwz r18, 196(SP) + addi r11,SP,208 #endif + lvx v20, r11,r3 +addi r11,r11,16 + lvx v21, r11,r3 +addi r11,r11,16 + lvx v22, r11,r3 +addi r11,r11,16 + lvx v23, r11,r3 +addi r11,r11,16 + lvx v24, r11,r3 +addi r11,r11,16 + lvx v25, r11,r3 +addi r11,r11,16 + lvx v26, r11,r3 +addi r11,r11,16 + lvx v27, r11,r3 +addi r11,r11,16 + lvx v28, r11,r3 +addi r11,r11,16 + lvx v29, r11,r3 +addi r11,r11,16 + lvx v30, r11,r3 +addi r11,r11,16 + lvx v31, r11,r3 +li r11,0 + addi SP, SP, STACKSIZE - blr EPILOGUE diff --git a/kernel/power/sgemm_kernel_16x8_power8.S b/kernel/power/sgemm_kernel_16x8_power8.S index e169eb970..c72b00cf6 100644 --- a/kernel/power/sgemm_kernel_16x8_power8.S +++ b/kernel/power/sgemm_kernel_16x8_power8.S @@ -83,12 +83,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #ifdef __64BIT__ #define STACKSIZE 32752 -#define ALPHA_SP 296(SP) -#define FZERO 304(SP) +#define ALPHA_SP 296+192(SP) +#define FZERO 304+192(SP) #else -#define STACKSIZE 240 -#define ALPHA_SP 224(SP) -#define FZERO 232(SP) +#define STACKSIZE 440 +#define ALPHA_SP 224+200(SP) +#define FZERO 232+200(SP) #endif #define M r3 @@ -132,6 +132,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define o0 0 +#define VECSAVE r11 + #define FRAMEPOINTER r12 #define BBUFFER r14 @@ -211,6 +213,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. std r16, 264(SP) std r15, 272(SP) std r14, 280(SP) + addi r11, SP, 288 #else stw r31, 144(SP) stw r30, 148(SP) @@ -230,7 +233,33 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. stw r16, 204(SP) stw r15, 208(SP) stw r14, 212(SP) + addi r11, SP, 224 #endif + stvx v20, r11, r0 + addi r11, r11, 16 + stvx v21, r11, r0 + addi r11, r11, 16 + stvx v22, r11, r0 + addi r11, r11, 16 + stvx v23, r11, r0 + addi r11, r11, 16 + stvx v24, r11, r0 + addi r11, r11, 16 + stvx v25, r11, r0 + addi r11, r11, 16 + stvx v26, r11, r0 + addi r11, r11, 16 + stvx v27, r11, r0 + addi r11, r11, 16 + stvx v28, r11, r0 + addi r11, r11, 16 + stvx v29, r11, r0 + addi r11, r11, 16 + stvx v30, r11, r0 + addi r11, r11, 16 + stvx v31, r11, r0 + li r11,0 + // stfd f1, ALPHA_SP // stw r0, FZERO @@ -281,7 +310,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. li T1, -4096 and BBUFFER, BBUFFER, T1 - addi T1, SP, 300 + addi T1, SP, 300+192 stxsspx f1, o0 , T1 stxsspx f1, o4 , T1 stxsspx f1, o8 , T1 @@ -339,6 +368,7 @@ L999: ld r16, 264(SP) ld r15, 272(SP) ld r14, 280(SP) + addi r11, SP, 288 #else lwz r31, 144(SP) lwz r30, 148(SP) @@ -358,13 +388,38 @@ L999: lwz r16, 204(SP) lwz r15, 208(SP) lwz r14, 212(SP) + addi r11, SP, 224 #endif + lvx v20, r11, r3 + addi r11, r11, 16 + lvx v21, r11, r3 + addi r11, r11, 16 + lvx v22, r11, r3 + addi r11, r11, 16 + lvx v23, r11, r3 + addi r11, r11, 16 + lvx v24, r11, r3 + addi r11, r11, 16 + lvx v25, r11, r3 + addi r11, r11, 16 + lvx v26, r11, r3 + addi r11, r11, 16 + lvx v27, r11, r3 + addi r11, r11, 16 + lvx v28, r11, r3 + addi r11, r11, 16 + lvx v29, r11, r3 + addi r11, r11, 16 + lvx v30, r11, r3 + addi r11, r11, 16 + lvx v31, r11, r3 + li r11, 0 + addi SP, SP, STACKSIZE addi SP, SP, STACKSIZE addi SP, SP, STACKSIZE addi SP, SP, STACKSIZE - blr EPILOGUE diff --git a/kernel/power/sgemm_tcopy_16_power8.S b/kernel/power/sgemm_tcopy_16_power8.S index 764d5b187..8f6b4d8c4 100644 --- a/kernel/power/sgemm_tcopy_16_power8.S +++ b/kernel/power/sgemm_tcopy_16_power8.S @@ -110,8 +110,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "sgemm_tcopy_macros_16_power8.S" -#define STACKSIZE 384 - +#define STACKSIZE 576 PROLOGUE PROFCODE @@ -137,6 +136,31 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. std r16, 264(SP) std r15, 272(SP) std r14, 280(SP) + addi r11 ,SP, 288 + stvx v20, r11, r0 + addi r11, r11, 16 + stvx v21, r11, r0 + addi r11, r11, 16 + stvx v22, r11, r0 + addi r11, r11, 16 + stvx v23, r11, r0 + addi r11, r11, 16 + stvx v24, r11, r0 + addi r11, r11, 16 + stvx v25, r11, r0 + addi r11, r11, 16 + stvx v26, r11, r0 + addi r11, r11, 16 + stvx v27, r11, r0 + addi r11, r11, 16 + stvx v28, r11, r0 + addi r11, r11, 16 + stvx v29, r11, r0 + addi r11, r11, 16 + stvx v30, r11, r0 + addi r11, r11, 16 + stvx v31, r11, r0 + li r11, 0 cmpwi cr0, M, 0 ble- L999 @@ -203,9 +227,33 @@ L999: ld r16, 264(SP) ld r15, 272(SP) ld r14, 280(SP) + addi r11, SP, 288 + lvx v20, r11, r3 + addi r11, r11, 16 + lvx v21, r11, r3 + addi r11, r11, 16 + lvx v22, r11, r3 + addi r11, r11, 16 + lvx v23, r11, r3 + addi r11, r11, 16 + lvx v24, r11, r3 + addi r11, r11, 16 + lvx v25, r11, r3 + addi r11, r11, 16 + lvx v26, r11, r3 + addi r11, r11, 16 + lvx v27, r11, r3 + addi r11, r11, 16 + lvx v28, r11, r3 + addi r11, r11, 16 + lvx v29, r11, r3 + addi r11, r11, 16 + lvx v30, r11, r3 + addi r11, r11, 16 + lvx v31, r11, r3 + li r11, 0 addi SP, SP, STACKSIZE - blr EPILOGUE diff --git a/kernel/power/sgemm_tcopy_8_power8.S b/kernel/power/sgemm_tcopy_8_power8.S index 2bbd6e696..98185432a 100644 --- a/kernel/power/sgemm_tcopy_8_power8.S +++ b/kernel/power/sgemm_tcopy_8_power8.S @@ -110,8 +110,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "sgemm_tcopy_macros_8_power8.S" -#define STACKSIZE 384 - +#define STACKSIZE 576 PROLOGUE PROFCODE @@ -137,6 +136,31 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. std r16, 264(SP) std r15, 272(SP) std r14, 280(SP) + addi r11, SP, 288 + stvx v20, r11, r0 + addi r11, r11, 16 + stvx v21, r11, r0 + addi r11, r11, 16 + stvx v22, r11, r0 + addi r11, r11, 16 + stvx v23, r11, r0 + addi r11, r11, 16 + stvx v24, r11, r0 + addi r11, r11, 16 + stvx v25, r11, r0 + addi r11, r11, 16 + stvx v26, r11, r0 + addi r11, r11, 16 + stvx v27, r11, r0 + addi r11, r11, 16 + stvx v28, r11, r0 + addi r11, r11, 16 + stvx v29, r11, r0 + addi r11, r11, 16 + stvx v30, r11, r0 + addi r11, r11, 16 + stvx v31, r11, r0 + li r11, 0 cmpwi cr0, M, 0 ble- L999 @@ -198,9 +222,33 @@ L999: ld r16, 264(SP) ld r15, 272(SP) ld r14, 280(SP) + addi r11,SP,288 + lvx v20, r11, r3 + addi r11, r11, 16 + lvx v21, r11, r3 + addi r11, r11, 16 + lvx v22, r11, r3 + addi r11, r11, 16 + lvx v23, r11, r3 + addi r11, r11, 16 + lvx v24, r11, r3 + addi r11, r11, 16 + lvx v25, r11, r3 + addi r11, r11, 16 + lvx v26, r11, r3 + addi r11, r11, 16 + lvx v27, r11, r3 + addi r11, r11, 16 + lvx v28, r11, r3 + addi r11, r11, 16 + lvx v29, r11, r3 + addi r11, r11, 16 + lvx v30, r11, r3 + addi r11, r11, 16 + lvx v31, r11, r3 + li r11, 0 addi SP, SP, STACKSIZE - blr EPILOGUE diff --git a/kernel/power/strmm_kernel_16x8_power8.S b/kernel/power/strmm_kernel_16x8_power8.S index f756d5d92..f9b8a0bb8 100644 --- a/kernel/power/strmm_kernel_16x8_power8.S +++ b/kernel/power/strmm_kernel_16x8_power8.S @@ -83,8 +83,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #ifdef __64BIT__ #define STACKSIZE 340 -#define ALPHA_SP 296(SP) -#define FZERO 304(SP) +#define STACKSIZE 540 +#define ALPHA_SP 296+200(SP) +#define FZERO 304+200(SP) #else #define STACKSIZE 240 #define ALPHA_SP 224(SP) @@ -132,6 +133,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define o0 0 +#define VECSAVE r11 + #define TBUFFER r13 #define o12 r14 #define o4 r15 @@ -207,6 +210,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. std r15, 272(SP) std r14, 280(SP) std r13, 288(SP) + addi r11, SP, 304 #else stw r31, 144(SP) stw r30, 148(SP) @@ -226,8 +230,34 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. stw r16, 204(SP) stw r15, 208(SP) stw r14, 212(SP) - stw r13, 216(SP) + stw r13, 216(SP) + addi r11, SP, 224 #endif + stvx v20, r11, r0 + addi r11, r11, 16 + stvx v21, r11, r0 + addi r11, r11, 16 + stvx v22, r11, r0 + addi r11, r11, 16 + stvx v23, r11, r0 + addi r11, r11, 16 + stvx v24, r11, r0 + addi r11, r11, 16 + stvx v25, r11, r0 + addi r11, r11, 16 + stvx v26, r11, r0 + addi r11, r11, 16 + stvx v27, r11, r0 + addi r11, r11, 16 + stvx v28, r11, r0 + addi r11, r11, 16 + stvx v29, r11, r0 + addi r11, r11, 16 + stvx v30, r11, r0 + addi r11, r11, 16 + stvx v31, r11, r0 + li r11, 0 + // stfd f1, ALPHA_SP // stw r0, FZERO @@ -271,16 +301,16 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. cmpwi cr0, K, 0 ble L999_H1 - li PRE, 256 + li PRE, 256 li o4 , 4 li o8 , 8 li o12, 12 li o16, 16 li o32, 32 li o48, 48 - addi TBUFFER, SP, 320 + addi TBUFFER, SP, 320+200 - addi T1, SP, 300 + addi T1, SP, 300+200 stxsspx f1, o0 , T1 stxsspx f1, o4 , T1 stxsspx f1, o8 , T1 @@ -339,6 +369,7 @@ L999: ld r15, 272(SP) ld r14, 280(SP) ld r13, 288(SP) + addi r11, SP, 304 #else lwz r31, 144(SP) lwz r30, 148(SP) @@ -359,10 +390,34 @@ L999: lwz r15, 208(SP) lwz r14, 212(SP) lwz r13, 216(SP) + addi r11, SP, 224 #endif + lvx v20, r11, r3 + addi r11, r11, 16 + lvx v21, r11, r3 + addi r11, r11, 16 + lvx v22, r11, r3 + addi r11, r11, 16 + lvx v23, r11, r3 + addi r11, r11, 16 + lvx v24, r11, r3 + addi r11, r11, 16 + lvx v25, r11, r3 + addi r11, r11, 16 + lvx v26, r11, r3 + addi r11, r11, 16 + lvx v27, r11, r3 + addi r11, r11, 16 + lvx v28, r11, r3 + addi r11, r11, 16 + lvx v29, r11, r3 + addi r11, r11, 16 + lvx v30, r11, r3 + addi r11, r11, 16 + lvx v31, r11, r3 + li r11, 0 addi SP, SP, STACKSIZE - blr EPILOGUE diff --git a/kernel/power/zgemm_kernel_8x2_power8.S b/kernel/power/zgemm_kernel_8x2_power8.S index 02c94a88a..5526b91c9 100644 --- a/kernel/power/zgemm_kernel_8x2_power8.S +++ b/kernel/power/zgemm_kernel_8x2_power8.S @@ -117,15 +117,15 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #endif #ifdef __64BIT__ -#define STACKSIZE 32000 -#define ALPHA_R_SP 296(SP) -#define ALPHA_I_SP 304(SP) -#define FZERO 312(SP) +#define STACKSIZE 32192 +#define ALPHA_R_SP 296+192(SP) +#define ALPHA_I_SP 304+192(SP) +#define FZERO 312+192(SP) #else -#define STACKSIZE 256 -#define ALPHA_R_SP 224(SP) -#define ALPHA_I_SP 232(SP) -#define FZERO 240(SP) +#define STACKSIZE 460 +#define ALPHA_R_SP 224+204(SP) +#define ALPHA_I_SP 232+204(SP) +#define FZERO 240+204(SP) #endif #define M r3 @@ -168,6 +168,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define alpha_r vs30 #define alpha_i vs31 +#define VECSAVE r11 #define FRAMEPOINTER r12 @@ -245,6 +246,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. std r16, 264(SP) std r15, 272(SP) std r14, 280(SP) + addi r11, SP, 288 #else stw r31, 144(SP) stw r30, 148(SP) @@ -263,7 +265,32 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. stw r17, 200(SP) stw r16, 204(SP) stw r15, 208(SP) + addi r11, SP, 224 #endif + stvx v20, r11, r0 + addi r11, r11, 16 + stvx v21, r11, r0 + addi r11, r11, 16 + stvx v22, r11, r0 + addi r11, r11, 16 + stvx v23, r11, r0 + addi r11, r11, 16 + stvx v24, r11, r0 + addi r11, r11, 16 + stvx v25, r11, r0 + addi r11, r11, 16 + stvx v26, r11, r0 + addi r11, r11, 16 + stvx v27, r11, r0 + addi r11, r11, 16 + stvx v28, r11, r0 + addi r11, r11, 16 + stvx v29, r11, r0 + addi r11, r11, 16 + stvx v30, r11, r0 + addi r11, r11, 16 + stvx v31, r11, r0 + li r11,0 stfd f1, ALPHA_R_SP stfd f2, ALPHA_I_SP @@ -332,9 +359,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. and BBUFFER, BBUFFER, T1 #ifdef __64BIT__ - addi ALPHA, SP, 296 + addi ALPHA, SP, 296+192 #else - addi ALPHA, SP, 224 + addi ALPHA, SP, 224+192+12 #endif lxsdx alpha_r, 0, ALPHA @@ -389,6 +416,7 @@ L999: ld r16, 264(SP) ld r15, 272(SP) ld r14, 280(SP) + addi r11, SP, 288 #else lwz r31, 144(SP) lwz r30, 148(SP) @@ -407,13 +435,37 @@ L999: lwz r17, 200(SP) lwz r16, 204(SP) lwz r15, 208(SP) + addi r11, SP, 224 #endif + lvx v20, r11, r3 + addi r11, r11, 16 + lvx v21, r11, r3 + addi r11, r11, 16 + lvx v22, r11, r3 + addi r11, r11, 16 + lvx v23, r11, r3 + addi r11, r11, 16 + lvx v24, r11, r3 + addi r11, r11, 16 + lvx v25, r11, r3 + addi r11, r11, 16 + lvx v26, r11, r3 + addi r11, r11, 16 + lvx v27, r11, r3 + addi r11, r11, 16 + lvx v28, r11, r3 + addi r11, r11, 16 + lvx v29, r11, r3 + addi r11, r11, 16 + lvx v30, r11, r3 + addi r11, r11, 16 + lvx v31, r11, r3 + li r11, 0 addi SP, SP, STACKSIZE addi SP, SP, STACKSIZE addi SP, SP, STACKSIZE addi SP, SP, STACKSIZE - blr EPILOGUE diff --git a/kernel/power/zgemm_tcopy_8_power8.S b/kernel/power/zgemm_tcopy_8_power8.S index 1f3f35419..2841a9921 100644 --- a/kernel/power/zgemm_tcopy_8_power8.S +++ b/kernel/power/zgemm_tcopy_8_power8.S @@ -110,6 +110,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "zgemm_tcopy_macros_8_power8.S" #define STACKSIZE 384 +#define STACKSIZE 576 PROLOGUE @@ -136,6 +137,31 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. std r16, 264(SP) std r15, 272(SP) std r14, 280(SP) + addi r11, SP ,288 + stvx v20, r11, r0 + addi r11, r11, 16 + stvx v21, r11, r0 + addi r11, r11, 16 + stvx v22, r11, r0 + addi r11, r11, 16 + stvx v23, r11, r0 + addi r11, r11, 16 + stvx v24, r11, r0 + addi r11, r11, 16 + stvx v25, r11, r0 + addi r11, r11, 16 + stvx v26, r11, r0 + addi r11, r11, 16 + stvx v27, r11, r0 + addi r11, r11, 16 + stvx v28, r11, r0 + addi r11, r11, 16 + stvx v29, r11, r0 + addi r11, r11, 16 + stvx v30, r11, r0 + addi r11, r11 ,16 + stvx v31, r11, r0 + li r11,0 cmpwi cr0, M, 0 ble- L999 @@ -196,9 +222,33 @@ L999: ld r16, 264(SP) ld r15, 272(SP) ld r14, 280(SP) + addi r11, SP, 288 + lvx v20, r11,r3 + addi r11, r11, 16 + lvx v21, r11, r3 + addi r11, r11, 16 + lvx v22, r11, r3 + addi r11, r11, 16 + lvx v23, r11, r3 + addi r11, r11, 16 + lvx v24, r11, r3 + addi r11, r11, 16 + lvx v25, r11, r3 + addi r11, r11, 16 + lvx v26, r11, r3 + addi r11, r11, 16 + lvx v27, r11, r3 + addi r11, r11, 16 + lvx v28, r11, r3 + addi r11, r11, 16 + lvx v29, r11, r3 + addi r11, r11, 16 + lvx v30, r11, r3 + addi r11, r11, 16 + lvx v31, r11, r3 + li r11,0 addi SP, SP, STACKSIZE - blr EPILOGUE diff --git a/kernel/power/ztrmm_kernel_8x2_power8.S b/kernel/power/ztrmm_kernel_8x2_power8.S index 0cfe613d5..c1415138c 100644 --- a/kernel/power/ztrmm_kernel_8x2_power8.S +++ b/kernel/power/ztrmm_kernel_8x2_power8.S @@ -1,3 +1,4 @@ + /*************************************************************************** Copyright (c) 2013-2016, The OpenBLAS Project All rights reserved. @@ -82,15 +83,15 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #endif #ifdef __64BIT__ -#define STACKSIZE 320 -#define ALPHA_R_SP 296(SP) -#define ALPHA_I_SP 304(SP) -#define FZERO 312(SP) +#define STACKSIZE 520 +#define ALPHA_R_SP 296+200(SP) +#define ALPHA_I_SP 304+200(SP) +#define FZERO 312+200(SP) #else -#define STACKSIZE 256 -#define ALPHA_R_SP 224(SP) -#define ALPHA_I_SP 232(SP) -#define FZERO 240(SP) +#define STACKSIZE 452 +#define ALPHA_R_SP 224+196(SP) +#define ALPHA_I_SP 232+196(SP) +#define FZERO 240+196(SP) #endif #define M r3 @@ -133,6 +134,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define alpha_r vs30 #define alpha_i vs31 +#define VECSAVE r11 + #define KKK r13 #define K1 r14 #define L r15 @@ -204,6 +207,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. std r15, 272(SP) std r14, 280(SP) std r13, 288(SP) + addi r11, SP, 304 #else stw r31, 144(SP) stw r30, 148(SP) @@ -224,7 +228,32 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. stw r15, 208(SP) stw r14, 212(SP) stw r13, 216(SP) + addi r11, SP, 224 #endif + stvx v20, r11, r0 + addi r11, r11, 16 + stvx v21, r11, r0 + addi r11, r11, 16 + stvx v22, r11, r0 + addi r11, r11, 16 + stvx v23, r11, r0 + addi r11, r11, 16 + stvx v24, r11, r0 + addi r11, r11, 16 + stvx v25, r11, r0 + addi r11, r11, 16 + stvx v26, r11, r0 + addi r11, r11, 16 + stvx v27, r11, r0 + addi r11, r11, 16 + stvx v28, r11, r0 + addi r11, r11, 16 + stvx v29, r11, r0 + addi r11, r11, 16 + stvx v30, r11, r0 + addi r11, r11, 16 + stvx v31, r11, r0 + li r11, 0 stfd f1, ALPHA_R_SP stfd f2, ALPHA_I_SP @@ -289,9 +318,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. li o48 , 48 #ifdef __64BIT__ - addi ALPHA, SP, 296 + addi ALPHA, SP, 296+200 #else - addi ALPHA, SP, 224 + addi ALPHA, SP, 224+196 #endif lxsdx alpha_r, 0, ALPHA @@ -347,6 +376,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ld r15, 272(SP) ld r14, 280(SP) ld r13, 288(SP) + addi r11, SP, 304 #else lwz r31, 144(SP) lwz r30, 148(SP) @@ -367,10 +397,34 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. lwz r15, 208(SP) lwz r14, 212(SP) lwz r13, 216(SP) + addi r11, SP, 224 #endif + lvx v20, r11, r3 + addi r11, r11, 16 + lvx v21, r11, r3 + addi r11, r11, 16 + lvx v22, r11, r3 + addi r11, r11, 16 + lvx v23, r11, r3 + addi r11, r11, 16 + lvx v24, r11, r3 + addi r11, r11, 16 + lvx v25, r11, r3 + addi r11, r11, 16 + lvx v26, r11, r3 + addi r11, r11, 16 + lvx v27, r11, r3 + addi r11, r11, 16 + lvx v28, r11, r3 + addi r11, r11, 16 + lvx v29, r11, r3 + addi r11, r11, 16 + lvx v30, r11, r3 + addi r11, r11, 16 + lvx v31, r11, r3 + li r11, 0 addi SP, SP, STACKSIZE - blr EPILOGUE