Merge pull request #1317 from martin-frbg/power8-asm
Save and restore VSX registers
This commit is contained in:
commit
1eb43cccad
|
@ -82,15 +82,15 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#endif
|
||||
|
||||
#ifdef __64BIT__
|
||||
#define STACKSIZE 32000
|
||||
#define ALPHA_R_SP 296(SP)
|
||||
#define ALPHA_I_SP 304(SP)
|
||||
#define FZERO 312(SP)
|
||||
#define STACKSIZE 32196
|
||||
#define ALPHA_R_SP 296+196(SP)
|
||||
#define ALPHA_I_SP 304+196(SP)
|
||||
#define FZERO 312+196(SP)
|
||||
#else
|
||||
#define STACKSIZE 256
|
||||
#define ALPHA_R_SP 224(SP)
|
||||
#define ALPHA_I_SP 232(SP)
|
||||
#define FZERO 240(SP)
|
||||
#define STACKSIZE 456
|
||||
#define ALPHA_R_SP 224+200(SP)
|
||||
#define ALPHA_I_SP 232+200(SP)
|
||||
#define FZERO 240+200(SP)
|
||||
#endif
|
||||
|
||||
#define M r3
|
||||
|
@ -138,6 +138,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
#define FRAMEPOINTER r12
|
||||
|
||||
#define VECSAVE r11
|
||||
|
||||
#define BBUFFER r14
|
||||
#define L r15
|
||||
#define o12 r16
|
||||
|
@ -167,6 +169,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
addi SP, SP, -STACKSIZE
|
||||
addi SP, SP, -STACKSIZE
|
||||
addi SP, SP, -STACKSIZE
|
||||
|
||||
li r0, 0
|
||||
|
||||
stfd f14, 0(SP)
|
||||
|
@ -211,6 +214,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
std r16, 264(SP)
|
||||
std r15, 272(SP)
|
||||
std r14, 280(SP)
|
||||
addi r11, SP, 288
|
||||
#else
|
||||
stw r31, 144(SP)
|
||||
stw r30, 148(SP)
|
||||
|
@ -230,7 +234,32 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
stw r16, 204(SP)
|
||||
stw r15, 208(SP)
|
||||
stw r14, 212(SP)
|
||||
addi r11, SP, 224
|
||||
#endif
|
||||
stvx v20, r11, r0
|
||||
addi r11, r11, 16
|
||||
stvx v21, r11, r0
|
||||
addi r11, r11, 16
|
||||
stvx v22, r11, r0
|
||||
addi r11, r11, 16
|
||||
stvx v23, r11, r0
|
||||
addi r11, r11, 16
|
||||
stvx v24, r11, r0
|
||||
addi r11, r11, 16
|
||||
stvx v25, r11, r0
|
||||
addi r11, r11, 16
|
||||
stvx v26, r11, r0
|
||||
addi r11, r11, 16
|
||||
stvx v27, r11, r0
|
||||
addi r11, r11, 16
|
||||
stvx v28, r11, r0
|
||||
addi r11, r11, 16
|
||||
stvx v29, r11, r0
|
||||
addi r11, r11, 16
|
||||
stvx v30, r11, r0
|
||||
addi r11, r11, 16
|
||||
stvx v31, r11, r0
|
||||
li r11, 0
|
||||
|
||||
stfs f1, ALPHA_R_SP
|
||||
stfs f2, ALPHA_I_SP
|
||||
|
@ -301,9 +330,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
|
||||
#ifdef __64BIT__
|
||||
addi T1 , SP, 296
|
||||
addi T1 , SP, 296+196
|
||||
#else
|
||||
addi T1 , SP, 224
|
||||
addi T1 , SP, 224+200
|
||||
#endif
|
||||
|
||||
stxsspx vs1, 0, T1
|
||||
|
@ -375,6 +404,7 @@ L999:
|
|||
ld r16, 264(SP)
|
||||
ld r15, 272(SP)
|
||||
ld r14, 280(SP)
|
||||
addi r11, SP, 288
|
||||
#else
|
||||
lwz r31, 144(SP)
|
||||
lwz r30, 148(SP)
|
||||
|
@ -394,7 +424,32 @@ L999:
|
|||
lwz r16, 204(SP)
|
||||
lwz r15, 208(SP)
|
||||
lwz r14, 212(SP)
|
||||
addi r11, 224
|
||||
#endif
|
||||
lvx v20, r11, r0
|
||||
addi r11, r11, 16
|
||||
lvx v21, r11, r0
|
||||
addi r11, r11, 16
|
||||
lvx v22, r11, r0
|
||||
addi r11, r11, 16
|
||||
lvx v23, r11, r0
|
||||
addi r11, r11, 16
|
||||
lvx v24, r11, r0
|
||||
addi r11, r11, 16
|
||||
lvx v25, r11, r0
|
||||
addi r11, r11, 16
|
||||
lvx v26, r11, r0
|
||||
addi r11, r11, 16
|
||||
lvx v27, r11, r0
|
||||
addi r11, r11, 16
|
||||
lvx v28, r11, r0
|
||||
addi r11, r11, 16
|
||||
lvx v29, r11, r0
|
||||
addi r11, r11, 16
|
||||
lvx v30, r11, r0
|
||||
addi r11, r11, 16
|
||||
lvx v31, r11, r0
|
||||
li r11, 0
|
||||
|
||||
addi SP, SP, STACKSIZE
|
||||
addi SP, SP, STACKSIZE
|
||||
|
@ -404,4 +459,4 @@ L999:
|
|||
blr
|
||||
|
||||
EPILOGUE
|
||||
#endif
|
||||
#endif^
|
||||
|
|
|
@ -88,6 +88,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
#define J r12
|
||||
|
||||
|
||||
#define PREA r14
|
||||
#define PREB r15
|
||||
#define BO r16
|
||||
|
@ -109,7 +110,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
#include "cgemm_tcopy_macros_8_power8.S"
|
||||
|
||||
#define STACKSIZE 384
|
||||
#define STACKSIZE 576
|
||||
|
||||
|
||||
PROLOGUE
|
||||
|
@ -136,6 +137,31 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
std r16, 264(SP)
|
||||
std r15, 272(SP)
|
||||
std r14, 280(SP)
|
||||
addi r11, SP, 288
|
||||
stvx v20, r11, r0
|
||||
addi r11, r11, 16
|
||||
stvx v21, r11, r0
|
||||
addi r11, r11, 16
|
||||
stvx v22, r11, r0
|
||||
addi r11, r11, 16
|
||||
stvx v23, r11, r0
|
||||
addi r11, r11, 16
|
||||
stvx v24, r11, r0
|
||||
addi r11, r11, 16
|
||||
stvx v25, r11, r0
|
||||
addi r11, r11, 16
|
||||
stvx v26, r11, r0
|
||||
addi r11, r11, 16
|
||||
stvx v27, r11, r0
|
||||
addi r11, r11, 16
|
||||
stvx v28, r11, r0
|
||||
addi r11, r11, 16
|
||||
stvx v29, r11, r0
|
||||
addi r11, r11, 16
|
||||
stvx v30, r11, r0
|
||||
addi r11, r11, 16
|
||||
stvx v31, r11, r0
|
||||
li r11, 0
|
||||
|
||||
cmpwi cr0, M, 0
|
||||
ble- L999
|
||||
|
@ -197,9 +223,33 @@ L999:
|
|||
ld r16, 264(SP)
|
||||
ld r15, 272(SP)
|
||||
ld r14, 280(SP)
|
||||
addi r11, SP, 288
|
||||
lvx v20, r11, r3
|
||||
addi r11, r11, 16
|
||||
lvx v21, r11, r3
|
||||
addi r11, r11, 16
|
||||
lvx v22, r11, r3
|
||||
addi r11, r11, 16
|
||||
lvx v23, r11, r3
|
||||
addi r11, r11, 16
|
||||
lvx v24, r11, r3
|
||||
addi r11, r11, 16
|
||||
lvx v25, r11, r3
|
||||
addi r11, r11, 16
|
||||
lvx v26, r11, r3
|
||||
addi r11, r11, 16
|
||||
lvx v27, r11, r3
|
||||
addi r11, r11, 16
|
||||
lvx v28, r11, r3
|
||||
addi r11, r11, 16
|
||||
lvx v29, r11, r3
|
||||
addi r11, r11, 16
|
||||
lvx v30, r11, r3
|
||||
addi r11, r11, 16
|
||||
lvx v31, r11, r3
|
||||
li r11, 0
|
||||
|
||||
addi SP, SP, STACKSIZE
|
||||
|
||||
blr
|
||||
EPILOGUE
|
||||
|
||||
|
|
|
@ -83,13 +83,15 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
#ifdef __64BIT__
|
||||
#define STACKSIZE 400
|
||||
#define ALPHA_R_SP 304(SP)
|
||||
#define ALPHA_I_SP 312(SP)
|
||||
#define STACKSIZE 592
|
||||
#define ALPHA_R_SP 304+192(SP)
|
||||
#define ALPHA_I_SP 312+192(SP)
|
||||
#else
|
||||
#define STACKSIZE 256
|
||||
#define ALPHA_R_SP 224(SP)
|
||||
#define ALPHA_I_SP 232(SP)
|
||||
#define FZERO 240(SP)
|
||||
#define STACKSIZE 452
|
||||
#define ALPHA_R_SP 224+196(SP)
|
||||
#define ALPHA_I_SP 232+196(SP)
|
||||
#define FZERO 240+196(SP)
|
||||
#endif
|
||||
|
||||
#define M r3
|
||||
|
@ -135,6 +137,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#define alpha_sr vs30
|
||||
#define alpha_si vs31
|
||||
|
||||
#define VECSAVE r11
|
||||
|
||||
#define o12 r12
|
||||
#define KKK r13
|
||||
#define K1 r14
|
||||
|
@ -208,6 +212,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
std r14, 280(SP)
|
||||
std r13, 288(SP)
|
||||
std r12, 296(SP)
|
||||
addi r11, SP, 304
|
||||
#else
|
||||
stw r31, 144(SP)
|
||||
stw r30, 148(SP)
|
||||
|
@ -228,7 +233,32 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
stw r15, 208(SP)
|
||||
stw r14, 212(SP)
|
||||
stw r13, 216(SP)
|
||||
addi r11, SP, 224
|
||||
#endif
|
||||
stvx v20, r11, r0
|
||||
addi r11, r11, 16
|
||||
stvx v21, r11, r0
|
||||
addi r11, r11, 16
|
||||
stvx v22, r11, r0
|
||||
addi r11, r11, 16
|
||||
stvx v23, r11, r0
|
||||
addi r11, r11, 16
|
||||
stvx v24, r11, r0
|
||||
addi r11, r11, 16
|
||||
stvx v25, r11, r0
|
||||
addi r11, r11, 16
|
||||
stvx v26, r11, r0
|
||||
addi r11, r11, 16
|
||||
stvx v27, r11, r0
|
||||
addi r11, r11, 16
|
||||
stvx v28, r11, r0
|
||||
addi r11, r11, 16
|
||||
stvx v29, r11, r0
|
||||
addi r11, r11, 16
|
||||
stvx v30, r11, r0
|
||||
addi r11, r11, 16
|
||||
stvx v31, r11, r0
|
||||
li r11, 0
|
||||
|
||||
stfs f1, ALPHA_R_SP
|
||||
stfs f2, ALPHA_I_SP
|
||||
|
@ -295,9 +325,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
|
||||
#ifdef __64BIT__
|
||||
addi T1, SP, 304
|
||||
addi T1, SP, 304+192
|
||||
#else
|
||||
addi T1, SP, 224
|
||||
addi T1, SP, 224+196
|
||||
#endif
|
||||
|
||||
lxsspx alpha_dr, 0, T1
|
||||
|
@ -369,6 +399,7 @@ L999:
|
|||
ld r14, 280(SP)
|
||||
ld r13, 288(SP)
|
||||
ld r12, 296(SP)
|
||||
addi r11, SP, 304
|
||||
#else
|
||||
lwz r31, 144(SP)
|
||||
lwz r30, 148(SP)
|
||||
|
@ -389,10 +420,34 @@ L999:
|
|||
lwz r15, 208(SP)
|
||||
lwz r14, 212(SP)
|
||||
lwz r13, 216(SP)
|
||||
addi r11, SP, 224
|
||||
#endif
|
||||
lvx v20, r11, r3
|
||||
addi r11, r11, 16
|
||||
lvx v21, r11, r3
|
||||
addi r11, r11, 16
|
||||
lvx v22, r11, r3
|
||||
addi r11, r11, 16
|
||||
lvx v23, r11, r3
|
||||
addi r11, r11, 16
|
||||
lvx v24, r11, r3
|
||||
addi r11, r11, 16
|
||||
lvx v25, r11, r3
|
||||
addi r11, r11, 16
|
||||
lvx v26, r11, r3
|
||||
addi r11, r11, 16
|
||||
lvx v27, r11, r3
|
||||
addi r11, r11, 16
|
||||
lvx v28, r11, r3
|
||||
addi r11, r11, 16
|
||||
lvx v29, r11, r3
|
||||
addi r11, r11, 16
|
||||
lvx v30, r11, r3
|
||||
addi r11, r11, 16
|
||||
lvx v31, r11, r3
|
||||
li r11, 0
|
||||
|
||||
addi SP, SP, STACKSIZE
|
||||
|
||||
blr
|
||||
|
||||
EPILOGUE
|
||||
|
|
|
@ -83,12 +83,14 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
#ifdef __64BIT__
|
||||
#define STACKSIZE 320
|
||||
#define ALPHA_SP 296(SP)
|
||||
#define FZERO 304(SP)
|
||||
#define STACKSIZE 512
|
||||
#define ALPHA_SP 296+192(SP)
|
||||
#define FZERO 304+192(SP)
|
||||
#else
|
||||
#define STACKSIZE 240
|
||||
#define ALPHA_SP 224(SP)
|
||||
#define FZERO 232(SP)
|
||||
#define STACKSIZE 440
|
||||
#define ALPHA_SP 224+200(SP)
|
||||
#define FZERO 232+200(SP)
|
||||
#endif
|
||||
|
||||
#define M r3
|
||||
|
@ -210,6 +212,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
std r16, 264(SP)
|
||||
std r15, 272(SP)
|
||||
std r14, 280(SP)
|
||||
addi r11,SP,288
|
||||
#else
|
||||
stw r31, 144(SP)
|
||||
stw r30, 148(SP)
|
||||
|
@ -229,7 +232,32 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
stw r16, 204(SP)
|
||||
stw r15, 208(SP)
|
||||
stw r14, 212(SP)
|
||||
addi r11,SP,224
|
||||
#endif
|
||||
stvx v20, r11,r0
|
||||
addi r11,r11,16
|
||||
stvx v21, r11,r0
|
||||
addi r11,r11,16
|
||||
stvx v22, r11,r0
|
||||
addi r11,r11,16
|
||||
stvx v23, r11,r0
|
||||
addi r11,r11,16
|
||||
stvx v24, r11,r0
|
||||
addi r11,r11,16
|
||||
stvx v25, r11,r0
|
||||
addi r11,r11,16
|
||||
stvx v26, r11,r0
|
||||
addi r11,r11,16
|
||||
stvx v27, r11,r0
|
||||
addi r11,r11,16
|
||||
stvx v28, r11,r0
|
||||
addi r11,r11,16
|
||||
stvx v29, r11,r0
|
||||
addi r11,r11,16
|
||||
stvx v30, r11,r0
|
||||
addi r11,r11,16
|
||||
stvx v31, r11,r0
|
||||
li r11,0
|
||||
|
||||
stfd f1, ALPHA_SP
|
||||
stw r0, FZERO
|
||||
|
@ -269,9 +297,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
ble .L999_H1
|
||||
|
||||
#ifdef __64BIT__
|
||||
addi T1, SP, 296
|
||||
addi T1, SP, 296+192
|
||||
#else
|
||||
addi T1, SP, 224
|
||||
addi T1, SP, 224+200
|
||||
#endif
|
||||
|
||||
li PRE, 384
|
||||
|
@ -334,6 +362,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
ld r16, 264(SP)
|
||||
ld r15, 272(SP)
|
||||
ld r14, 280(SP)
|
||||
addi r11,SP,288
|
||||
#else
|
||||
lwz r31, 144(SP)
|
||||
lwz r30, 148(SP)
|
||||
|
@ -353,10 +382,34 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
lwz r16, 204(SP)
|
||||
lwz r15, 208(SP)
|
||||
lwz r14, 212(SP)
|
||||
addi r11,SP,224
|
||||
#endif
|
||||
lvx v20, r11,r3
|
||||
addi r11,r11,16
|
||||
lvx v21, r11,r3
|
||||
addi r11,r11,16
|
||||
lvx v22, r11,r3
|
||||
addi r11,r11,16
|
||||
lvx v23, r11,r3
|
||||
addi r11,r11,16
|
||||
lvx v24, r11,r3
|
||||
addi r11,r11,16
|
||||
lvx v25, r11,r3
|
||||
addi r11,r11,16
|
||||
lvx v26, r11,r3
|
||||
addi r11,r11,16
|
||||
lvx v27, r11,r3
|
||||
addi r11,r11,16
|
||||
lvx v28, r11,r3
|
||||
addi r11,r11,16
|
||||
lvx v29, r11,r3
|
||||
addi r11,r11,16
|
||||
lvx v30, r11,r3
|
||||
addi r11,r11,16
|
||||
lvx v31, r11,r3
|
||||
li r11,0
|
||||
|
||||
addi SP, SP, STACKSIZE
|
||||
|
||||
blr
|
||||
|
||||
EPILOGUE
|
||||
|
|
|
@ -110,12 +110,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#include "dgemm_ncopy_macros_4_power8.S"
|
||||
|
||||
#define STACKSIZE 384
|
||||
|
||||
#define STACKSIZE 576
|
||||
|
||||
PROLOGUE
|
||||
PROFCODE
|
||||
|
||||
addi SP, SP, -STACKSIZE
|
||||
//addi SP, SP, -208
|
||||
li r0, 0
|
||||
|
||||
stfd f14, 0(SP)
|
||||
|
@ -157,6 +158,32 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
std r15, 272(SP)
|
||||
std r14, 280(SP)
|
||||
|
||||
addi r11,SP,288
|
||||
stvx v20, r11,r0
|
||||
addi r11,r11,16
|
||||
stvx v21, r11,r0
|
||||
addi r11,r11,16
|
||||
stvx v22, r11,r0
|
||||
addi r11,r11,16
|
||||
stvx v23, r11,r0
|
||||
addi r11,r11,16
|
||||
stvx v24, r11,r0
|
||||
addi r11,r11,16
|
||||
stvx v25, r11,r0
|
||||
addi r11,r11,16
|
||||
stvx v26, r11,r0
|
||||
addi r11,r11,16
|
||||
stvx v27, r11,r0
|
||||
addi r11,r11,16
|
||||
stvx v28, r11,r0
|
||||
addi r11,r11,16
|
||||
stvx v29, r11,r0
|
||||
addi r11,r11,16
|
||||
stvx v30, r11,r0
|
||||
addi r11,r11,16
|
||||
stvx v31, r11,r0
|
||||
li r11,0
|
||||
|
||||
cmpwi cr0, M, 0
|
||||
ble- L999
|
||||
cmpwi cr0, N, 0
|
||||
|
@ -164,8 +191,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
slwi LDA, LDA, BASE_SHIFT
|
||||
|
||||
li PREA, 384
|
||||
li PREB, 384
|
||||
//li PREA, 384
|
||||
//li PREB, 384
|
||||
li PREA, 576
|
||||
li PREB, 576
|
||||
|
||||
|
||||
li o8, 8
|
||||
li o16, 16
|
||||
|
@ -219,9 +249,34 @@ L999:
|
|||
ld r16, 264(SP)
|
||||
ld r15, 272(SP)
|
||||
ld r14, 280(SP)
|
||||
addi r11,SP,288
|
||||
lvx v20, r11,r3
|
||||
addi r11,r11,16
|
||||
lvx v21, r11,r3
|
||||
addi r11,r11,16
|
||||
lvx v22, r11,r3
|
||||
addi r11,r11,16
|
||||
lvx v23, r11,r3
|
||||
addi r11,r11,16
|
||||
lvx v24, r11,r3
|
||||
addi r11,r11,16
|
||||
lvx v25, r11,r3
|
||||
addi r11,r11,16
|
||||
lvx v26, r11,r3
|
||||
addi r11,r11,16
|
||||
lvx v27, r11,r3
|
||||
addi r11,r11,16
|
||||
lvx v28, r11,r3
|
||||
addi r11,r11,16
|
||||
lvx v29, r11,r3
|
||||
addi r11,r11,16
|
||||
lvx v30, r11,r3
|
||||
addi r11,r11,16
|
||||
lvx v31, r11,r3
|
||||
li r11,0
|
||||
|
||||
addi SP, SP, STACKSIZE
|
||||
|
||||
//addi SP, SP, 208
|
||||
blr
|
||||
EPILOGUE
|
||||
|
||||
|
|
|
@ -110,12 +110,15 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#include "dgemm_tcopy_macros_16_power8.S"
|
||||
|
||||
#define STACKSIZE 384
|
||||
#define STACKSIZE 576
|
||||
|
||||
|
||||
PROLOGUE
|
||||
PROFCODE
|
||||
|
||||
addi SP, SP, -STACKSIZE
|
||||
//addi SP, SP, -208
|
||||
|
||||
li r0, 0
|
||||
|
||||
std r31, 144(SP)
|
||||
|
@ -136,6 +139,31 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
std r16, 264(SP)
|
||||
std r15, 272(SP)
|
||||
std r14, 280(SP)
|
||||
addi r11,SP,288
|
||||
stvx v20, r11,r0
|
||||
addi r11,r11,16
|
||||
stvx v21, r11,r0
|
||||
addi r11,r11,16
|
||||
stvx v22, r11,r0
|
||||
addi r11,r11,16
|
||||
stvx v23, r11,r0
|
||||
addi r11,r11,16
|
||||
stvx v24, r11,r0
|
||||
addi r11,r11,16
|
||||
stvx v25, r11,r0
|
||||
addi r11,r11,16
|
||||
stvx v26, r11,r0
|
||||
addi r11,r11,16
|
||||
stvx v27, r11,r0
|
||||
addi r11,r11,16
|
||||
stvx v28, r11,r0
|
||||
addi r11,r11,16
|
||||
stvx v29, r11,r0
|
||||
addi r11,r11,16
|
||||
stvx v30, r11,r0
|
||||
addi r11,r11,16
|
||||
stvx v31, r11,r0
|
||||
li r11,0
|
||||
|
||||
cmpwi cr0, M, 0
|
||||
ble- L999
|
||||
|
@ -170,7 +198,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
add B2, B2, B
|
||||
add B1, B1, B
|
||||
|
||||
li PREA, 384
|
||||
//li PREA, 384
|
||||
li PREA, 576
|
||||
addi PREB, M16, 128
|
||||
|
||||
li o8, 8
|
||||
|
@ -202,9 +231,34 @@ L999:
|
|||
ld r16, 264(SP)
|
||||
ld r15, 272(SP)
|
||||
ld r14, 280(SP)
|
||||
addi r11,SP,288
|
||||
lvx v20, r11,r3
|
||||
addi r11,r11,16
|
||||
lvx v21, r11,r3
|
||||
addi r11,r11,16
|
||||
lvx v22, r11,r3
|
||||
addi r11,r11,16
|
||||
lvx v23, r11,r3
|
||||
addi r11,r11,16
|
||||
lvx v24, r11,r3
|
||||
addi r11,r11,16
|
||||
lvx v25, r11,r3
|
||||
addi r11,r11,16
|
||||
lvx v26, r11,r3
|
||||
addi r11,r11,16
|
||||
lvx v27, r11,r3
|
||||
addi r11,r11,16
|
||||
lvx v28, r11,r3
|
||||
addi r11,r11,16
|
||||
lvx v29, r11,r3
|
||||
addi r11,r11,16
|
||||
lvx v30, r11,r3
|
||||
addi r11,r11,16
|
||||
lvx v31, r11,r3
|
||||
li r11,0
|
||||
|
||||
addi SP, SP, STACKSIZE
|
||||
|
||||
//addi SP, SP, 208
|
||||
blr
|
||||
EPILOGUE
|
||||
|
||||
|
|
|
@ -83,12 +83,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
#ifdef __64BIT__
|
||||
#define STACKSIZE 320
|
||||
#define ALPHA_SP 296(SP)
|
||||
#define FZERO 304(SP)
|
||||
#define STACKSIZE 520
|
||||
#define ALPHA_SP 296+200(SP)
|
||||
#define FZERO 304+200(SP)
|
||||
#else
|
||||
#define STACKSIZE 240
|
||||
#define ALPHA_SP 224(SP)
|
||||
#define FZERO 232(SP)
|
||||
#define STACKSIZE 436
|
||||
#define ALPHA_SP 224+196(SP)
|
||||
#define FZERO 232+196(SP)
|
||||
#endif
|
||||
|
||||
#define M r3
|
||||
|
@ -152,6 +153,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#define PRE r30
|
||||
#define T2 r31
|
||||
|
||||
#define VECSAVE r11
|
||||
|
||||
#include "dtrmm_macros_16x4_power8.S"
|
||||
|
||||
|
||||
|
@ -206,6 +209,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
std r15, 272(SP)
|
||||
std r14, 280(SP)
|
||||
std r13, 288(SP)
|
||||
addi r11, SP, 304
|
||||
#else
|
||||
stw r31, 144(SP)
|
||||
stw r30, 148(SP)
|
||||
|
@ -226,7 +230,34 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
stw r15, 208(SP)
|
||||
stw r14, 212(SP)
|
||||
stw r13, 216(SP)
|
||||
addi r11, r0, 224
|
||||
#endif
|
||||
stvx v20, r11, r0
|
||||
addi r11, r11, 16
|
||||
stvx v21, r11, r0
|
||||
addi r11, r11, 16
|
||||
stvx v22, r11, r0
|
||||
addi r11, r11, 16
|
||||
stvx v23, r11, r0
|
||||
addi r11, r11, 16
|
||||
stvx v24, r11, r0
|
||||
addi r11, r11, 16
|
||||
stvx v25, r11, r0
|
||||
addi r11, r11, 16
|
||||
stvx v26, r11, r0
|
||||
addi r11 ,r11, 16
|
||||
stvx v27, r11, r0
|
||||
addi r11, r11, 16
|
||||
stvx v28, r11, r0
|
||||
addi r11, r11, 16
|
||||
stvx v29, r11, r0
|
||||
addi r11, r11, 16
|
||||
stvx v30, r11, r0
|
||||
addi r11, r11, 16
|
||||
stvx v31, r11, r0
|
||||
li r11,0
|
||||
|
||||
stw r31, 144(SP)
|
||||
|
||||
stfd f1, ALPHA_SP
|
||||
stw r0, FZERO
|
||||
|
@ -270,9 +301,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
ble .L999_H1
|
||||
|
||||
#ifdef __64BIT__
|
||||
addi ALPHA, SP, 296
|
||||
addi ALPHA, SP, 296+200
|
||||
#else
|
||||
addi ALPHA, SP, 224
|
||||
addi ALPHA, SP, 224+196
|
||||
#endif
|
||||
|
||||
li PRE, 256
|
||||
|
@ -332,6 +363,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
ld r15, 272(SP)
|
||||
ld r14, 280(SP)
|
||||
ld r13, 288(SP)
|
||||
addi r11, SP, 304
|
||||
#else
|
||||
lwz r31, 144(SP)
|
||||
lwz r30, 148(SP)
|
||||
|
@ -352,10 +384,34 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
lwz r15, 208(SP)
|
||||
lwz r14, 212(SP)
|
||||
lwz r13, 216(SP)
|
||||
addi r11, SP, 224
|
||||
#endif
|
||||
lvx v20, r11, r3
|
||||
addi r11, r11, 16
|
||||
lvx v21, r11, r3
|
||||
addi r11, r11, 16
|
||||
lvx v22, r11, r3
|
||||
addi r11, r11, 16
|
||||
lvx v23, r11, r3
|
||||
addi r11, r11, 16
|
||||
lvx v24, r11, r3
|
||||
addi r11, r11, 16
|
||||
lvx v25, r11, r3
|
||||
addi r11, r11, 16
|
||||
lvx v26, r11, r3
|
||||
addi r11, r11, 16
|
||||
lvx v27, r11, r3
|
||||
addi r11, r11, 16
|
||||
lvx v28, r11, r3
|
||||
addi r11, r11, 16
|
||||
lvx v29, r11, r3
|
||||
addi r11, r11, 16
|
||||
lvx v30, r11, r3
|
||||
addi r11, r11, 16
|
||||
lvx v31, r11, r3
|
||||
li r11, 0
|
||||
|
||||
addi SP, SP, STACKSIZE
|
||||
|
||||
blr
|
||||
|
||||
EPILOGUE
|
||||
|
|
|
@ -48,8 +48,9 @@
|
|||
|
||||
#ifdef __64BIT__
|
||||
#define STACKSIZE 320
|
||||
#define ALPHA 296(SP)
|
||||
#define FZERO 304(SP)
|
||||
#define STACKSIZE 520
|
||||
#define ALPHA 296+200(SP)
|
||||
#define FZERO 304+200(SP)
|
||||
#else
|
||||
#define STACKSIZE 240
|
||||
#define ALPHA 224(SP)
|
||||
|
@ -112,6 +113,8 @@
|
|||
#define o48 r30
|
||||
#define T1 r31
|
||||
|
||||
#define VECSAVE r11
|
||||
|
||||
#include "dtrsm_macros_LT_16x4_power8.S"
|
||||
|
||||
#ifndef NEEDPARAM
|
||||
|
@ -163,6 +166,7 @@
|
|||
std r17, 256(SP)
|
||||
std r16, 264(SP)
|
||||
std r15, 272(SP)
|
||||
addi r11,SP,288
|
||||
#else
|
||||
stw r31, 144(SP)
|
||||
stw r30, 148(SP)
|
||||
|
@ -178,7 +182,32 @@
|
|||
stw r20, 188(SP)
|
||||
stw r19, 192(SP)
|
||||
stw r18, 196(SP)
|
||||
addi r11,SP,208
|
||||
#endif
|
||||
stvx v20, r11,r0
|
||||
addi r11,r11,16
|
||||
stvx v21, r11,r0
|
||||
addi r11,r11,16
|
||||
stvx v22, r11,r0
|
||||
addi r11,r11,16
|
||||
stvx v23, r11,r0
|
||||
addi r11,r11,16
|
||||
stvx v24, r11,r0
|
||||
addi r11,r11,16
|
||||
stvx v25, r11,r0
|
||||
addi r11,r11,16
|
||||
stvx v26, r11,r0
|
||||
addi r11,r11,16
|
||||
stvx v27, r11,r0
|
||||
addi r11,r11,16
|
||||
stvx v28, r11,r0
|
||||
addi r11,r11,16
|
||||
stvx v29, r11,r0
|
||||
addi r11,r11,16
|
||||
stvx v30, r11,r0
|
||||
addi r11,r11,16
|
||||
stvx v31, r11,r0
|
||||
li r11,0
|
||||
|
||||
|
||||
#if defined(_AIX) || defined(__APPLE__)
|
||||
|
@ -269,6 +298,7 @@ L999:
|
|||
ld r17, 256(SP)
|
||||
ld r16, 264(SP)
|
||||
ld r15, 272(SP)
|
||||
addi r11,SP,288
|
||||
#else
|
||||
lwz r31, 144(SP)
|
||||
lwz r30, 148(SP)
|
||||
|
@ -284,10 +314,35 @@ L999:
|
|||
lwz r20, 188(SP)
|
||||
lwz r19, 192(SP)
|
||||
lwz r18, 196(SP)
|
||||
addi r11,SP,208
|
||||
#endif
|
||||
lvx v20, r11,r3
|
||||
addi r11,r11,16
|
||||
lvx v21, r11,r3
|
||||
addi r11,r11,16
|
||||
lvx v22, r11,r3
|
||||
addi r11,r11,16
|
||||
lvx v23, r11,r3
|
||||
addi r11,r11,16
|
||||
lvx v24, r11,r3
|
||||
addi r11,r11,16
|
||||
lvx v25, r11,r3
|
||||
addi r11,r11,16
|
||||
lvx v26, r11,r3
|
||||
addi r11,r11,16
|
||||
lvx v27, r11,r3
|
||||
addi r11,r11,16
|
||||
lvx v28, r11,r3
|
||||
addi r11,r11,16
|
||||
lvx v29, r11,r3
|
||||
addi r11,r11,16
|
||||
lvx v30, r11,r3
|
||||
addi r11,r11,16
|
||||
lvx v31, r11,r3
|
||||
li r11,0
|
||||
|
||||
|
||||
addi SP, SP, STACKSIZE
|
||||
|
||||
blr
|
||||
|
||||
EPILOGUE
|
||||
|
|
|
@ -83,12 +83,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
#ifdef __64BIT__
|
||||
#define STACKSIZE 32752
|
||||
#define ALPHA_SP 296(SP)
|
||||
#define FZERO 304(SP)
|
||||
#define ALPHA_SP 296+192(SP)
|
||||
#define FZERO 304+192(SP)
|
||||
#else
|
||||
#define STACKSIZE 240
|
||||
#define ALPHA_SP 224(SP)
|
||||
#define FZERO 232(SP)
|
||||
#define STACKSIZE 440
|
||||
#define ALPHA_SP 224+200(SP)
|
||||
#define FZERO 232+200(SP)
|
||||
#endif
|
||||
|
||||
#define M r3
|
||||
|
@ -132,6 +132,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
#define o0 0
|
||||
|
||||
#define VECSAVE r11
|
||||
|
||||
#define FRAMEPOINTER r12
|
||||
|
||||
#define BBUFFER r14
|
||||
|
@ -211,6 +213,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
std r16, 264(SP)
|
||||
std r15, 272(SP)
|
||||
std r14, 280(SP)
|
||||
addi r11, SP, 288
|
||||
#else
|
||||
stw r31, 144(SP)
|
||||
stw r30, 148(SP)
|
||||
|
@ -230,7 +233,33 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
stw r16, 204(SP)
|
||||
stw r15, 208(SP)
|
||||
stw r14, 212(SP)
|
||||
addi r11, SP, 224
|
||||
#endif
|
||||
stvx v20, r11, r0
|
||||
addi r11, r11, 16
|
||||
stvx v21, r11, r0
|
||||
addi r11, r11, 16
|
||||
stvx v22, r11, r0
|
||||
addi r11, r11, 16
|
||||
stvx v23, r11, r0
|
||||
addi r11, r11, 16
|
||||
stvx v24, r11, r0
|
||||
addi r11, r11, 16
|
||||
stvx v25, r11, r0
|
||||
addi r11, r11, 16
|
||||
stvx v26, r11, r0
|
||||
addi r11, r11, 16
|
||||
stvx v27, r11, r0
|
||||
addi r11, r11, 16
|
||||
stvx v28, r11, r0
|
||||
addi r11, r11, 16
|
||||
stvx v29, r11, r0
|
||||
addi r11, r11, 16
|
||||
stvx v30, r11, r0
|
||||
addi r11, r11, 16
|
||||
stvx v31, r11, r0
|
||||
li r11,0
|
||||
|
||||
|
||||
// stfd f1, ALPHA_SP
|
||||
// stw r0, FZERO
|
||||
|
@ -281,7 +310,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
li T1, -4096
|
||||
and BBUFFER, BBUFFER, T1
|
||||
|
||||
addi T1, SP, 300
|
||||
addi T1, SP, 300+192
|
||||
stxsspx f1, o0 , T1
|
||||
stxsspx f1, o4 , T1
|
||||
stxsspx f1, o8 , T1
|
||||
|
@ -339,6 +368,7 @@ L999:
|
|||
ld r16, 264(SP)
|
||||
ld r15, 272(SP)
|
||||
ld r14, 280(SP)
|
||||
addi r11, SP, 288
|
||||
#else
|
||||
lwz r31, 144(SP)
|
||||
lwz r30, 148(SP)
|
||||
|
@ -358,13 +388,38 @@ L999:
|
|||
lwz r16, 204(SP)
|
||||
lwz r15, 208(SP)
|
||||
lwz r14, 212(SP)
|
||||
addi r11, SP, 224
|
||||
#endif
|
||||
lvx v20, r11, r3
|
||||
addi r11, r11, 16
|
||||
lvx v21, r11, r3
|
||||
addi r11, r11, 16
|
||||
lvx v22, r11, r3
|
||||
addi r11, r11, 16
|
||||
lvx v23, r11, r3
|
||||
addi r11, r11, 16
|
||||
lvx v24, r11, r3
|
||||
addi r11, r11, 16
|
||||
lvx v25, r11, r3
|
||||
addi r11, r11, 16
|
||||
lvx v26, r11, r3
|
||||
addi r11, r11, 16
|
||||
lvx v27, r11, r3
|
||||
addi r11, r11, 16
|
||||
lvx v28, r11, r3
|
||||
addi r11, r11, 16
|
||||
lvx v29, r11, r3
|
||||
addi r11, r11, 16
|
||||
lvx v30, r11, r3
|
||||
addi r11, r11, 16
|
||||
lvx v31, r11, r3
|
||||
li r11, 0
|
||||
|
||||
|
||||
addi SP, SP, STACKSIZE
|
||||
addi SP, SP, STACKSIZE
|
||||
addi SP, SP, STACKSIZE
|
||||
addi SP, SP, STACKSIZE
|
||||
|
||||
blr
|
||||
|
||||
EPILOGUE
|
||||
|
|
|
@ -110,8 +110,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
#include "sgemm_tcopy_macros_16_power8.S"
|
||||
|
||||
#define STACKSIZE 384
|
||||
|
||||
#define STACKSIZE 576
|
||||
|
||||
PROLOGUE
|
||||
PROFCODE
|
||||
|
@ -137,6 +136,31 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
std r16, 264(SP)
|
||||
std r15, 272(SP)
|
||||
std r14, 280(SP)
|
||||
addi r11 ,SP, 288
|
||||
stvx v20, r11, r0
|
||||
addi r11, r11, 16
|
||||
stvx v21, r11, r0
|
||||
addi r11, r11, 16
|
||||
stvx v22, r11, r0
|
||||
addi r11, r11, 16
|
||||
stvx v23, r11, r0
|
||||
addi r11, r11, 16
|
||||
stvx v24, r11, r0
|
||||
addi r11, r11, 16
|
||||
stvx v25, r11, r0
|
||||
addi r11, r11, 16
|
||||
stvx v26, r11, r0
|
||||
addi r11, r11, 16
|
||||
stvx v27, r11, r0
|
||||
addi r11, r11, 16
|
||||
stvx v28, r11, r0
|
||||
addi r11, r11, 16
|
||||
stvx v29, r11, r0
|
||||
addi r11, r11, 16
|
||||
stvx v30, r11, r0
|
||||
addi r11, r11, 16
|
||||
stvx v31, r11, r0
|
||||
li r11, 0
|
||||
|
||||
cmpwi cr0, M, 0
|
||||
ble- L999
|
||||
|
@ -203,9 +227,33 @@ L999:
|
|||
ld r16, 264(SP)
|
||||
ld r15, 272(SP)
|
||||
ld r14, 280(SP)
|
||||
addi r11, SP, 288
|
||||
lvx v20, r11, r3
|
||||
addi r11, r11, 16
|
||||
lvx v21, r11, r3
|
||||
addi r11, r11, 16
|
||||
lvx v22, r11, r3
|
||||
addi r11, r11, 16
|
||||
lvx v23, r11, r3
|
||||
addi r11, r11, 16
|
||||
lvx v24, r11, r3
|
||||
addi r11, r11, 16
|
||||
lvx v25, r11, r3
|
||||
addi r11, r11, 16
|
||||
lvx v26, r11, r3
|
||||
addi r11, r11, 16
|
||||
lvx v27, r11, r3
|
||||
addi r11, r11, 16
|
||||
lvx v28, r11, r3
|
||||
addi r11, r11, 16
|
||||
lvx v29, r11, r3
|
||||
addi r11, r11, 16
|
||||
lvx v30, r11, r3
|
||||
addi r11, r11, 16
|
||||
lvx v31, r11, r3
|
||||
li r11, 0
|
||||
|
||||
addi SP, SP, STACKSIZE
|
||||
|
||||
blr
|
||||
EPILOGUE
|
||||
|
||||
|
|
|
@ -110,8 +110,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
#include "sgemm_tcopy_macros_8_power8.S"
|
||||
|
||||
#define STACKSIZE 384
|
||||
|
||||
#define STACKSIZE 576
|
||||
|
||||
PROLOGUE
|
||||
PROFCODE
|
||||
|
@ -137,6 +136,31 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
std r16, 264(SP)
|
||||
std r15, 272(SP)
|
||||
std r14, 280(SP)
|
||||
addi r11, SP, 288
|
||||
stvx v20, r11, r0
|
||||
addi r11, r11, 16
|
||||
stvx v21, r11, r0
|
||||
addi r11, r11, 16
|
||||
stvx v22, r11, r0
|
||||
addi r11, r11, 16
|
||||
stvx v23, r11, r0
|
||||
addi r11, r11, 16
|
||||
stvx v24, r11, r0
|
||||
addi r11, r11, 16
|
||||
stvx v25, r11, r0
|
||||
addi r11, r11, 16
|
||||
stvx v26, r11, r0
|
||||
addi r11, r11, 16
|
||||
stvx v27, r11, r0
|
||||
addi r11, r11, 16
|
||||
stvx v28, r11, r0
|
||||
addi r11, r11, 16
|
||||
stvx v29, r11, r0
|
||||
addi r11, r11, 16
|
||||
stvx v30, r11, r0
|
||||
addi r11, r11, 16
|
||||
stvx v31, r11, r0
|
||||
li r11, 0
|
||||
|
||||
cmpwi cr0, M, 0
|
||||
ble- L999
|
||||
|
@ -198,9 +222,33 @@ L999:
|
|||
ld r16, 264(SP)
|
||||
ld r15, 272(SP)
|
||||
ld r14, 280(SP)
|
||||
addi r11,SP,288
|
||||
lvx v20, r11, r3
|
||||
addi r11, r11, 16
|
||||
lvx v21, r11, r3
|
||||
addi r11, r11, 16
|
||||
lvx v22, r11, r3
|
||||
addi r11, r11, 16
|
||||
lvx v23, r11, r3
|
||||
addi r11, r11, 16
|
||||
lvx v24, r11, r3
|
||||
addi r11, r11, 16
|
||||
lvx v25, r11, r3
|
||||
addi r11, r11, 16
|
||||
lvx v26, r11, r3
|
||||
addi r11, r11, 16
|
||||
lvx v27, r11, r3
|
||||
addi r11, r11, 16
|
||||
lvx v28, r11, r3
|
||||
addi r11, r11, 16
|
||||
lvx v29, r11, r3
|
||||
addi r11, r11, 16
|
||||
lvx v30, r11, r3
|
||||
addi r11, r11, 16
|
||||
lvx v31, r11, r3
|
||||
li r11, 0
|
||||
|
||||
addi SP, SP, STACKSIZE
|
||||
|
||||
blr
|
||||
EPILOGUE
|
||||
|
||||
|
|
|
@ -83,8 +83,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
#ifdef __64BIT__
|
||||
#define STACKSIZE 340
|
||||
#define ALPHA_SP 296(SP)
|
||||
#define FZERO 304(SP)
|
||||
#define STACKSIZE 540
|
||||
#define ALPHA_SP 296+200(SP)
|
||||
#define FZERO 304+200(SP)
|
||||
#else
|
||||
#define STACKSIZE 240
|
||||
#define ALPHA_SP 224(SP)
|
||||
|
@ -132,6 +133,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
#define o0 0
|
||||
|
||||
#define VECSAVE r11
|
||||
|
||||
#define TBUFFER r13
|
||||
#define o12 r14
|
||||
#define o4 r15
|
||||
|
@ -207,6 +210,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
std r15, 272(SP)
|
||||
std r14, 280(SP)
|
||||
std r13, 288(SP)
|
||||
addi r11, SP, 304
|
||||
#else
|
||||
stw r31, 144(SP)
|
||||
stw r30, 148(SP)
|
||||
|
@ -227,7 +231,33 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
stw r15, 208(SP)
|
||||
stw r14, 212(SP)
|
||||
stw r13, 216(SP)
|
||||
addi r11, SP, 224
|
||||
#endif
|
||||
stvx v20, r11, r0
|
||||
addi r11, r11, 16
|
||||
stvx v21, r11, r0
|
||||
addi r11, r11, 16
|
||||
stvx v22, r11, r0
|
||||
addi r11, r11, 16
|
||||
stvx v23, r11, r0
|
||||
addi r11, r11, 16
|
||||
stvx v24, r11, r0
|
||||
addi r11, r11, 16
|
||||
stvx v25, r11, r0
|
||||
addi r11, r11, 16
|
||||
stvx v26, r11, r0
|
||||
addi r11, r11, 16
|
||||
stvx v27, r11, r0
|
||||
addi r11, r11, 16
|
||||
stvx v28, r11, r0
|
||||
addi r11, r11, 16
|
||||
stvx v29, r11, r0
|
||||
addi r11, r11, 16
|
||||
stvx v30, r11, r0
|
||||
addi r11, r11, 16
|
||||
stvx v31, r11, r0
|
||||
li r11, 0
|
||||
|
||||
|
||||
// stfd f1, ALPHA_SP
|
||||
// stw r0, FZERO
|
||||
|
@ -278,9 +308,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
li o16, 16
|
||||
li o32, 32
|
||||
li o48, 48
|
||||
addi TBUFFER, SP, 320
|
||||
addi TBUFFER, SP, 320+200
|
||||
|
||||
addi T1, SP, 300
|
||||
addi T1, SP, 300+200
|
||||
stxsspx f1, o0 , T1
|
||||
stxsspx f1, o4 , T1
|
||||
stxsspx f1, o8 , T1
|
||||
|
@ -339,6 +369,7 @@ L999:
|
|||
ld r15, 272(SP)
|
||||
ld r14, 280(SP)
|
||||
ld r13, 288(SP)
|
||||
addi r11, SP, 304
|
||||
#else
|
||||
lwz r31, 144(SP)
|
||||
lwz r30, 148(SP)
|
||||
|
@ -359,10 +390,34 @@ L999:
|
|||
lwz r15, 208(SP)
|
||||
lwz r14, 212(SP)
|
||||
lwz r13, 216(SP)
|
||||
addi r11, SP, 224
|
||||
#endif
|
||||
lvx v20, r11, r3
|
||||
addi r11, r11, 16
|
||||
lvx v21, r11, r3
|
||||
addi r11, r11, 16
|
||||
lvx v22, r11, r3
|
||||
addi r11, r11, 16
|
||||
lvx v23, r11, r3
|
||||
addi r11, r11, 16
|
||||
lvx v24, r11, r3
|
||||
addi r11, r11, 16
|
||||
lvx v25, r11, r3
|
||||
addi r11, r11, 16
|
||||
lvx v26, r11, r3
|
||||
addi r11, r11, 16
|
||||
lvx v27, r11, r3
|
||||
addi r11, r11, 16
|
||||
lvx v28, r11, r3
|
||||
addi r11, r11, 16
|
||||
lvx v29, r11, r3
|
||||
addi r11, r11, 16
|
||||
lvx v30, r11, r3
|
||||
addi r11, r11, 16
|
||||
lvx v31, r11, r3
|
||||
li r11, 0
|
||||
|
||||
addi SP, SP, STACKSIZE
|
||||
|
||||
blr
|
||||
|
||||
EPILOGUE
|
||||
|
|
|
@ -117,15 +117,15 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#endif
|
||||
|
||||
#ifdef __64BIT__
|
||||
#define STACKSIZE 32000
|
||||
#define ALPHA_R_SP 296(SP)
|
||||
#define ALPHA_I_SP 304(SP)
|
||||
#define FZERO 312(SP)
|
||||
#define STACKSIZE 32192
|
||||
#define ALPHA_R_SP 296+192(SP)
|
||||
#define ALPHA_I_SP 304+192(SP)
|
||||
#define FZERO 312+192(SP)
|
||||
#else
|
||||
#define STACKSIZE 256
|
||||
#define ALPHA_R_SP 224(SP)
|
||||
#define ALPHA_I_SP 232(SP)
|
||||
#define FZERO 240(SP)
|
||||
#define STACKSIZE 460
|
||||
#define ALPHA_R_SP 224+204(SP)
|
||||
#define ALPHA_I_SP 232+204(SP)
|
||||
#define FZERO 240+204(SP)
|
||||
#endif
|
||||
|
||||
#define M r3
|
||||
|
@ -168,6 +168,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#define alpha_r vs30
|
||||
#define alpha_i vs31
|
||||
|
||||
#define VECSAVE r11
|
||||
|
||||
#define FRAMEPOINTER r12
|
||||
|
||||
|
@ -245,6 +246,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
std r16, 264(SP)
|
||||
std r15, 272(SP)
|
||||
std r14, 280(SP)
|
||||
addi r11, SP, 288
|
||||
#else
|
||||
stw r31, 144(SP)
|
||||
stw r30, 148(SP)
|
||||
|
@ -263,7 +265,32 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
stw r17, 200(SP)
|
||||
stw r16, 204(SP)
|
||||
stw r15, 208(SP)
|
||||
addi r11, SP, 224
|
||||
#endif
|
||||
stvx v20, r11, r0
|
||||
addi r11, r11, 16
|
||||
stvx v21, r11, r0
|
||||
addi r11, r11, 16
|
||||
stvx v22, r11, r0
|
||||
addi r11, r11, 16
|
||||
stvx v23, r11, r0
|
||||
addi r11, r11, 16
|
||||
stvx v24, r11, r0
|
||||
addi r11, r11, 16
|
||||
stvx v25, r11, r0
|
||||
addi r11, r11, 16
|
||||
stvx v26, r11, r0
|
||||
addi r11, r11, 16
|
||||
stvx v27, r11, r0
|
||||
addi r11, r11, 16
|
||||
stvx v28, r11, r0
|
||||
addi r11, r11, 16
|
||||
stvx v29, r11, r0
|
||||
addi r11, r11, 16
|
||||
stvx v30, r11, r0
|
||||
addi r11, r11, 16
|
||||
stvx v31, r11, r0
|
||||
li r11,0
|
||||
|
||||
stfd f1, ALPHA_R_SP
|
||||
stfd f2, ALPHA_I_SP
|
||||
|
@ -332,9 +359,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
and BBUFFER, BBUFFER, T1
|
||||
|
||||
#ifdef __64BIT__
|
||||
addi ALPHA, SP, 296
|
||||
addi ALPHA, SP, 296+192
|
||||
#else
|
||||
addi ALPHA, SP, 224
|
||||
addi ALPHA, SP, 224+192+12
|
||||
#endif
|
||||
|
||||
lxsdx alpha_r, 0, ALPHA
|
||||
|
@ -389,6 +416,7 @@ L999:
|
|||
ld r16, 264(SP)
|
||||
ld r15, 272(SP)
|
||||
ld r14, 280(SP)
|
||||
addi r11, SP, 288
|
||||
#else
|
||||
lwz r31, 144(SP)
|
||||
lwz r30, 148(SP)
|
||||
|
@ -407,13 +435,37 @@ L999:
|
|||
lwz r17, 200(SP)
|
||||
lwz r16, 204(SP)
|
||||
lwz r15, 208(SP)
|
||||
addi r11, SP, 224
|
||||
#endif
|
||||
lvx v20, r11, r3
|
||||
addi r11, r11, 16
|
||||
lvx v21, r11, r3
|
||||
addi r11, r11, 16
|
||||
lvx v22, r11, r3
|
||||
addi r11, r11, 16
|
||||
lvx v23, r11, r3
|
||||
addi r11, r11, 16
|
||||
lvx v24, r11, r3
|
||||
addi r11, r11, 16
|
||||
lvx v25, r11, r3
|
||||
addi r11, r11, 16
|
||||
lvx v26, r11, r3
|
||||
addi r11, r11, 16
|
||||
lvx v27, r11, r3
|
||||
addi r11, r11, 16
|
||||
lvx v28, r11, r3
|
||||
addi r11, r11, 16
|
||||
lvx v29, r11, r3
|
||||
addi r11, r11, 16
|
||||
lvx v30, r11, r3
|
||||
addi r11, r11, 16
|
||||
lvx v31, r11, r3
|
||||
li r11, 0
|
||||
|
||||
addi SP, SP, STACKSIZE
|
||||
addi SP, SP, STACKSIZE
|
||||
addi SP, SP, STACKSIZE
|
||||
addi SP, SP, STACKSIZE
|
||||
|
||||
blr
|
||||
|
||||
EPILOGUE
|
||||
|
|
|
@ -110,6 +110,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#include "zgemm_tcopy_macros_8_power8.S"
|
||||
|
||||
#define STACKSIZE 384
|
||||
#define STACKSIZE 576
|
||||
|
||||
|
||||
PROLOGUE
|
||||
|
@ -136,6 +137,31 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
std r16, 264(SP)
|
||||
std r15, 272(SP)
|
||||
std r14, 280(SP)
|
||||
addi r11, SP ,288
|
||||
stvx v20, r11, r0
|
||||
addi r11, r11, 16
|
||||
stvx v21, r11, r0
|
||||
addi r11, r11, 16
|
||||
stvx v22, r11, r0
|
||||
addi r11, r11, 16
|
||||
stvx v23, r11, r0
|
||||
addi r11, r11, 16
|
||||
stvx v24, r11, r0
|
||||
addi r11, r11, 16
|
||||
stvx v25, r11, r0
|
||||
addi r11, r11, 16
|
||||
stvx v26, r11, r0
|
||||
addi r11, r11, 16
|
||||
stvx v27, r11, r0
|
||||
addi r11, r11, 16
|
||||
stvx v28, r11, r0
|
||||
addi r11, r11, 16
|
||||
stvx v29, r11, r0
|
||||
addi r11, r11, 16
|
||||
stvx v30, r11, r0
|
||||
addi r11, r11 ,16
|
||||
stvx v31, r11, r0
|
||||
li r11,0
|
||||
|
||||
cmpwi cr0, M, 0
|
||||
ble- L999
|
||||
|
@ -196,9 +222,33 @@ L999:
|
|||
ld r16, 264(SP)
|
||||
ld r15, 272(SP)
|
||||
ld r14, 280(SP)
|
||||
addi r11, SP, 288
|
||||
lvx v20, r11,r3
|
||||
addi r11, r11, 16
|
||||
lvx v21, r11, r3
|
||||
addi r11, r11, 16
|
||||
lvx v22, r11, r3
|
||||
addi r11, r11, 16
|
||||
lvx v23, r11, r3
|
||||
addi r11, r11, 16
|
||||
lvx v24, r11, r3
|
||||
addi r11, r11, 16
|
||||
lvx v25, r11, r3
|
||||
addi r11, r11, 16
|
||||
lvx v26, r11, r3
|
||||
addi r11, r11, 16
|
||||
lvx v27, r11, r3
|
||||
addi r11, r11, 16
|
||||
lvx v28, r11, r3
|
||||
addi r11, r11, 16
|
||||
lvx v29, r11, r3
|
||||
addi r11, r11, 16
|
||||
lvx v30, r11, r3
|
||||
addi r11, r11, 16
|
||||
lvx v31, r11, r3
|
||||
li r11,0
|
||||
|
||||
addi SP, SP, STACKSIZE
|
||||
|
||||
blr
|
||||
EPILOGUE
|
||||
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
|
||||
/***************************************************************************
|
||||
Copyright (c) 2013-2016, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
|
@ -82,15 +83,15 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#endif
|
||||
|
||||
#ifdef __64BIT__
|
||||
#define STACKSIZE 320
|
||||
#define ALPHA_R_SP 296(SP)
|
||||
#define ALPHA_I_SP 304(SP)
|
||||
#define FZERO 312(SP)
|
||||
#define STACKSIZE 520
|
||||
#define ALPHA_R_SP 296+200(SP)
|
||||
#define ALPHA_I_SP 304+200(SP)
|
||||
#define FZERO 312+200(SP)
|
||||
#else
|
||||
#define STACKSIZE 256
|
||||
#define ALPHA_R_SP 224(SP)
|
||||
#define ALPHA_I_SP 232(SP)
|
||||
#define FZERO 240(SP)
|
||||
#define STACKSIZE 452
|
||||
#define ALPHA_R_SP 224+196(SP)
|
||||
#define ALPHA_I_SP 232+196(SP)
|
||||
#define FZERO 240+196(SP)
|
||||
#endif
|
||||
|
||||
#define M r3
|
||||
|
@ -133,6 +134,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#define alpha_r vs30
|
||||
#define alpha_i vs31
|
||||
|
||||
#define VECSAVE r11
|
||||
|
||||
#define KKK r13
|
||||
#define K1 r14
|
||||
#define L r15
|
||||
|
@ -204,6 +207,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
std r15, 272(SP)
|
||||
std r14, 280(SP)
|
||||
std r13, 288(SP)
|
||||
addi r11, SP, 304
|
||||
#else
|
||||
stw r31, 144(SP)
|
||||
stw r30, 148(SP)
|
||||
|
@ -224,7 +228,32 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
stw r15, 208(SP)
|
||||
stw r14, 212(SP)
|
||||
stw r13, 216(SP)
|
||||
addi r11, SP, 224
|
||||
#endif
|
||||
stvx v20, r11, r0
|
||||
addi r11, r11, 16
|
||||
stvx v21, r11, r0
|
||||
addi r11, r11, 16
|
||||
stvx v22, r11, r0
|
||||
addi r11, r11, 16
|
||||
stvx v23, r11, r0
|
||||
addi r11, r11, 16
|
||||
stvx v24, r11, r0
|
||||
addi r11, r11, 16
|
||||
stvx v25, r11, r0
|
||||
addi r11, r11, 16
|
||||
stvx v26, r11, r0
|
||||
addi r11, r11, 16
|
||||
stvx v27, r11, r0
|
||||
addi r11, r11, 16
|
||||
stvx v28, r11, r0
|
||||
addi r11, r11, 16
|
||||
stvx v29, r11, r0
|
||||
addi r11, r11, 16
|
||||
stvx v30, r11, r0
|
||||
addi r11, r11, 16
|
||||
stvx v31, r11, r0
|
||||
li r11, 0
|
||||
|
||||
stfd f1, ALPHA_R_SP
|
||||
stfd f2, ALPHA_I_SP
|
||||
|
@ -289,9 +318,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
li o48 , 48
|
||||
|
||||
#ifdef __64BIT__
|
||||
addi ALPHA, SP, 296
|
||||
addi ALPHA, SP, 296+200
|
||||
#else
|
||||
addi ALPHA, SP, 224
|
||||
addi ALPHA, SP, 224+196
|
||||
#endif
|
||||
|
||||
lxsdx alpha_r, 0, ALPHA
|
||||
|
@ -347,6 +376,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
ld r15, 272(SP)
|
||||
ld r14, 280(SP)
|
||||
ld r13, 288(SP)
|
||||
addi r11, SP, 304
|
||||
#else
|
||||
lwz r31, 144(SP)
|
||||
lwz r30, 148(SP)
|
||||
|
@ -367,10 +397,34 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
lwz r15, 208(SP)
|
||||
lwz r14, 212(SP)
|
||||
lwz r13, 216(SP)
|
||||
addi r11, SP, 224
|
||||
#endif
|
||||
lvx v20, r11, r3
|
||||
addi r11, r11, 16
|
||||
lvx v21, r11, r3
|
||||
addi r11, r11, 16
|
||||
lvx v22, r11, r3
|
||||
addi r11, r11, 16
|
||||
lvx v23, r11, r3
|
||||
addi r11, r11, 16
|
||||
lvx v24, r11, r3
|
||||
addi r11, r11, 16
|
||||
lvx v25, r11, r3
|
||||
addi r11, r11, 16
|
||||
lvx v26, r11, r3
|
||||
addi r11, r11, 16
|
||||
lvx v27, r11, r3
|
||||
addi r11, r11, 16
|
||||
lvx v28, r11, r3
|
||||
addi r11, r11, 16
|
||||
lvx v29, r11, r3
|
||||
addi r11, r11, 16
|
||||
lvx v30, r11, r3
|
||||
addi r11, r11, 16
|
||||
lvx v31, r11, r3
|
||||
li r11, 0
|
||||
|
||||
addi SP, SP, STACKSIZE
|
||||
|
||||
blr
|
||||
|
||||
EPILOGUE
|
||||
|
|
Loading…
Reference in New Issue