Merge pull request #1317 from martin-frbg/power8-asm
Save and restore VSX registers
This commit is contained in:
commit
1eb43cccad
|
@ -82,15 +82,15 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef __64BIT__
|
#ifdef __64BIT__
|
||||||
#define STACKSIZE 32000
|
#define STACKSIZE 32196
|
||||||
#define ALPHA_R_SP 296(SP)
|
#define ALPHA_R_SP 296+196(SP)
|
||||||
#define ALPHA_I_SP 304(SP)
|
#define ALPHA_I_SP 304+196(SP)
|
||||||
#define FZERO 312(SP)
|
#define FZERO 312+196(SP)
|
||||||
#else
|
#else
|
||||||
#define STACKSIZE 256
|
#define STACKSIZE 456
|
||||||
#define ALPHA_R_SP 224(SP)
|
#define ALPHA_R_SP 224+200(SP)
|
||||||
#define ALPHA_I_SP 232(SP)
|
#define ALPHA_I_SP 232+200(SP)
|
||||||
#define FZERO 240(SP)
|
#define FZERO 240+200(SP)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#define M r3
|
#define M r3
|
||||||
|
@ -138,6 +138,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
#define FRAMEPOINTER r12
|
#define FRAMEPOINTER r12
|
||||||
|
|
||||||
|
#define VECSAVE r11
|
||||||
|
|
||||||
#define BBUFFER r14
|
#define BBUFFER r14
|
||||||
#define L r15
|
#define L r15
|
||||||
#define o12 r16
|
#define o12 r16
|
||||||
|
@ -167,6 +169,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
addi SP, SP, -STACKSIZE
|
addi SP, SP, -STACKSIZE
|
||||||
addi SP, SP, -STACKSIZE
|
addi SP, SP, -STACKSIZE
|
||||||
addi SP, SP, -STACKSIZE
|
addi SP, SP, -STACKSIZE
|
||||||
|
|
||||||
li r0, 0
|
li r0, 0
|
||||||
|
|
||||||
stfd f14, 0(SP)
|
stfd f14, 0(SP)
|
||||||
|
@ -211,6 +214,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
std r16, 264(SP)
|
std r16, 264(SP)
|
||||||
std r15, 272(SP)
|
std r15, 272(SP)
|
||||||
std r14, 280(SP)
|
std r14, 280(SP)
|
||||||
|
addi r11, SP, 288
|
||||||
#else
|
#else
|
||||||
stw r31, 144(SP)
|
stw r31, 144(SP)
|
||||||
stw r30, 148(SP)
|
stw r30, 148(SP)
|
||||||
|
@ -230,7 +234,32 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
stw r16, 204(SP)
|
stw r16, 204(SP)
|
||||||
stw r15, 208(SP)
|
stw r15, 208(SP)
|
||||||
stw r14, 212(SP)
|
stw r14, 212(SP)
|
||||||
|
addi r11, SP, 224
|
||||||
#endif
|
#endif
|
||||||
|
stvx v20, r11, r0
|
||||||
|
addi r11, r11, 16
|
||||||
|
stvx v21, r11, r0
|
||||||
|
addi r11, r11, 16
|
||||||
|
stvx v22, r11, r0
|
||||||
|
addi r11, r11, 16
|
||||||
|
stvx v23, r11, r0
|
||||||
|
addi r11, r11, 16
|
||||||
|
stvx v24, r11, r0
|
||||||
|
addi r11, r11, 16
|
||||||
|
stvx v25, r11, r0
|
||||||
|
addi r11, r11, 16
|
||||||
|
stvx v26, r11, r0
|
||||||
|
addi r11, r11, 16
|
||||||
|
stvx v27, r11, r0
|
||||||
|
addi r11, r11, 16
|
||||||
|
stvx v28, r11, r0
|
||||||
|
addi r11, r11, 16
|
||||||
|
stvx v29, r11, r0
|
||||||
|
addi r11, r11, 16
|
||||||
|
stvx v30, r11, r0
|
||||||
|
addi r11, r11, 16
|
||||||
|
stvx v31, r11, r0
|
||||||
|
li r11, 0
|
||||||
|
|
||||||
stfs f1, ALPHA_R_SP
|
stfs f1, ALPHA_R_SP
|
||||||
stfs f2, ALPHA_I_SP
|
stfs f2, ALPHA_I_SP
|
||||||
|
@ -301,9 +330,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
|
|
||||||
#ifdef __64BIT__
|
#ifdef __64BIT__
|
||||||
addi T1 , SP, 296
|
addi T1 , SP, 296+196
|
||||||
#else
|
#else
|
||||||
addi T1 , SP, 224
|
addi T1 , SP, 224+200
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
stxsspx vs1, 0, T1
|
stxsspx vs1, 0, T1
|
||||||
|
@ -375,6 +404,7 @@ L999:
|
||||||
ld r16, 264(SP)
|
ld r16, 264(SP)
|
||||||
ld r15, 272(SP)
|
ld r15, 272(SP)
|
||||||
ld r14, 280(SP)
|
ld r14, 280(SP)
|
||||||
|
addi r11, SP, 288
|
||||||
#else
|
#else
|
||||||
lwz r31, 144(SP)
|
lwz r31, 144(SP)
|
||||||
lwz r30, 148(SP)
|
lwz r30, 148(SP)
|
||||||
|
@ -394,7 +424,32 @@ L999:
|
||||||
lwz r16, 204(SP)
|
lwz r16, 204(SP)
|
||||||
lwz r15, 208(SP)
|
lwz r15, 208(SP)
|
||||||
lwz r14, 212(SP)
|
lwz r14, 212(SP)
|
||||||
|
addi r11, 224
|
||||||
#endif
|
#endif
|
||||||
|
lvx v20, r11, r0
|
||||||
|
addi r11, r11, 16
|
||||||
|
lvx v21, r11, r0
|
||||||
|
addi r11, r11, 16
|
||||||
|
lvx v22, r11, r0
|
||||||
|
addi r11, r11, 16
|
||||||
|
lvx v23, r11, r0
|
||||||
|
addi r11, r11, 16
|
||||||
|
lvx v24, r11, r0
|
||||||
|
addi r11, r11, 16
|
||||||
|
lvx v25, r11, r0
|
||||||
|
addi r11, r11, 16
|
||||||
|
lvx v26, r11, r0
|
||||||
|
addi r11, r11, 16
|
||||||
|
lvx v27, r11, r0
|
||||||
|
addi r11, r11, 16
|
||||||
|
lvx v28, r11, r0
|
||||||
|
addi r11, r11, 16
|
||||||
|
lvx v29, r11, r0
|
||||||
|
addi r11, r11, 16
|
||||||
|
lvx v30, r11, r0
|
||||||
|
addi r11, r11, 16
|
||||||
|
lvx v31, r11, r0
|
||||||
|
li r11, 0
|
||||||
|
|
||||||
addi SP, SP, STACKSIZE
|
addi SP, SP, STACKSIZE
|
||||||
addi SP, SP, STACKSIZE
|
addi SP, SP, STACKSIZE
|
||||||
|
@ -404,4 +459,4 @@ L999:
|
||||||
blr
|
blr
|
||||||
|
|
||||||
EPILOGUE
|
EPILOGUE
|
||||||
#endif
|
#endif^
|
||||||
|
|
|
@ -88,6 +88,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
#define J r12
|
#define J r12
|
||||||
|
|
||||||
|
|
||||||
#define PREA r14
|
#define PREA r14
|
||||||
#define PREB r15
|
#define PREB r15
|
||||||
#define BO r16
|
#define BO r16
|
||||||
|
@ -109,7 +110,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
#include "cgemm_tcopy_macros_8_power8.S"
|
#include "cgemm_tcopy_macros_8_power8.S"
|
||||||
|
|
||||||
#define STACKSIZE 384
|
#define STACKSIZE 576
|
||||||
|
|
||||||
|
|
||||||
PROLOGUE
|
PROLOGUE
|
||||||
|
@ -136,6 +137,31 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
std r16, 264(SP)
|
std r16, 264(SP)
|
||||||
std r15, 272(SP)
|
std r15, 272(SP)
|
||||||
std r14, 280(SP)
|
std r14, 280(SP)
|
||||||
|
addi r11, SP, 288
|
||||||
|
stvx v20, r11, r0
|
||||||
|
addi r11, r11, 16
|
||||||
|
stvx v21, r11, r0
|
||||||
|
addi r11, r11, 16
|
||||||
|
stvx v22, r11, r0
|
||||||
|
addi r11, r11, 16
|
||||||
|
stvx v23, r11, r0
|
||||||
|
addi r11, r11, 16
|
||||||
|
stvx v24, r11, r0
|
||||||
|
addi r11, r11, 16
|
||||||
|
stvx v25, r11, r0
|
||||||
|
addi r11, r11, 16
|
||||||
|
stvx v26, r11, r0
|
||||||
|
addi r11, r11, 16
|
||||||
|
stvx v27, r11, r0
|
||||||
|
addi r11, r11, 16
|
||||||
|
stvx v28, r11, r0
|
||||||
|
addi r11, r11, 16
|
||||||
|
stvx v29, r11, r0
|
||||||
|
addi r11, r11, 16
|
||||||
|
stvx v30, r11, r0
|
||||||
|
addi r11, r11, 16
|
||||||
|
stvx v31, r11, r0
|
||||||
|
li r11, 0
|
||||||
|
|
||||||
cmpwi cr0, M, 0
|
cmpwi cr0, M, 0
|
||||||
ble- L999
|
ble- L999
|
||||||
|
@ -197,9 +223,33 @@ L999:
|
||||||
ld r16, 264(SP)
|
ld r16, 264(SP)
|
||||||
ld r15, 272(SP)
|
ld r15, 272(SP)
|
||||||
ld r14, 280(SP)
|
ld r14, 280(SP)
|
||||||
|
addi r11, SP, 288
|
||||||
|
lvx v20, r11, r3
|
||||||
|
addi r11, r11, 16
|
||||||
|
lvx v21, r11, r3
|
||||||
|
addi r11, r11, 16
|
||||||
|
lvx v22, r11, r3
|
||||||
|
addi r11, r11, 16
|
||||||
|
lvx v23, r11, r3
|
||||||
|
addi r11, r11, 16
|
||||||
|
lvx v24, r11, r3
|
||||||
|
addi r11, r11, 16
|
||||||
|
lvx v25, r11, r3
|
||||||
|
addi r11, r11, 16
|
||||||
|
lvx v26, r11, r3
|
||||||
|
addi r11, r11, 16
|
||||||
|
lvx v27, r11, r3
|
||||||
|
addi r11, r11, 16
|
||||||
|
lvx v28, r11, r3
|
||||||
|
addi r11, r11, 16
|
||||||
|
lvx v29, r11, r3
|
||||||
|
addi r11, r11, 16
|
||||||
|
lvx v30, r11, r3
|
||||||
|
addi r11, r11, 16
|
||||||
|
lvx v31, r11, r3
|
||||||
|
li r11, 0
|
||||||
|
|
||||||
addi SP, SP, STACKSIZE
|
addi SP, SP, STACKSIZE
|
||||||
|
|
||||||
blr
|
blr
|
||||||
EPILOGUE
|
EPILOGUE
|
||||||
|
|
||||||
|
|
|
@ -83,13 +83,15 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
#ifdef __64BIT__
|
#ifdef __64BIT__
|
||||||
#define STACKSIZE 400
|
#define STACKSIZE 400
|
||||||
#define ALPHA_R_SP 304(SP)
|
#define STACKSIZE 592
|
||||||
#define ALPHA_I_SP 312(SP)
|
#define ALPHA_R_SP 304+192(SP)
|
||||||
|
#define ALPHA_I_SP 312+192(SP)
|
||||||
#else
|
#else
|
||||||
#define STACKSIZE 256
|
#define STACKSIZE 256
|
||||||
#define ALPHA_R_SP 224(SP)
|
#define STACKSIZE 452
|
||||||
#define ALPHA_I_SP 232(SP)
|
#define ALPHA_R_SP 224+196(SP)
|
||||||
#define FZERO 240(SP)
|
#define ALPHA_I_SP 232+196(SP)
|
||||||
|
#define FZERO 240+196(SP)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#define M r3
|
#define M r3
|
||||||
|
@ -135,6 +137,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#define alpha_sr vs30
|
#define alpha_sr vs30
|
||||||
#define alpha_si vs31
|
#define alpha_si vs31
|
||||||
|
|
||||||
|
#define VECSAVE r11
|
||||||
|
|
||||||
#define o12 r12
|
#define o12 r12
|
||||||
#define KKK r13
|
#define KKK r13
|
||||||
#define K1 r14
|
#define K1 r14
|
||||||
|
@ -208,6 +212,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
std r14, 280(SP)
|
std r14, 280(SP)
|
||||||
std r13, 288(SP)
|
std r13, 288(SP)
|
||||||
std r12, 296(SP)
|
std r12, 296(SP)
|
||||||
|
addi r11, SP, 304
|
||||||
#else
|
#else
|
||||||
stw r31, 144(SP)
|
stw r31, 144(SP)
|
||||||
stw r30, 148(SP)
|
stw r30, 148(SP)
|
||||||
|
@ -228,7 +233,32 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
stw r15, 208(SP)
|
stw r15, 208(SP)
|
||||||
stw r14, 212(SP)
|
stw r14, 212(SP)
|
||||||
stw r13, 216(SP)
|
stw r13, 216(SP)
|
||||||
|
addi r11, SP, 224
|
||||||
#endif
|
#endif
|
||||||
|
stvx v20, r11, r0
|
||||||
|
addi r11, r11, 16
|
||||||
|
stvx v21, r11, r0
|
||||||
|
addi r11, r11, 16
|
||||||
|
stvx v22, r11, r0
|
||||||
|
addi r11, r11, 16
|
||||||
|
stvx v23, r11, r0
|
||||||
|
addi r11, r11, 16
|
||||||
|
stvx v24, r11, r0
|
||||||
|
addi r11, r11, 16
|
||||||
|
stvx v25, r11, r0
|
||||||
|
addi r11, r11, 16
|
||||||
|
stvx v26, r11, r0
|
||||||
|
addi r11, r11, 16
|
||||||
|
stvx v27, r11, r0
|
||||||
|
addi r11, r11, 16
|
||||||
|
stvx v28, r11, r0
|
||||||
|
addi r11, r11, 16
|
||||||
|
stvx v29, r11, r0
|
||||||
|
addi r11, r11, 16
|
||||||
|
stvx v30, r11, r0
|
||||||
|
addi r11, r11, 16
|
||||||
|
stvx v31, r11, r0
|
||||||
|
li r11, 0
|
||||||
|
|
||||||
stfs f1, ALPHA_R_SP
|
stfs f1, ALPHA_R_SP
|
||||||
stfs f2, ALPHA_I_SP
|
stfs f2, ALPHA_I_SP
|
||||||
|
@ -295,9 +325,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
|
|
||||||
#ifdef __64BIT__
|
#ifdef __64BIT__
|
||||||
addi T1, SP, 304
|
addi T1, SP, 304+192
|
||||||
#else
|
#else
|
||||||
addi T1, SP, 224
|
addi T1, SP, 224+196
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
lxsspx alpha_dr, 0, T1
|
lxsspx alpha_dr, 0, T1
|
||||||
|
@ -369,6 +399,7 @@ L999:
|
||||||
ld r14, 280(SP)
|
ld r14, 280(SP)
|
||||||
ld r13, 288(SP)
|
ld r13, 288(SP)
|
||||||
ld r12, 296(SP)
|
ld r12, 296(SP)
|
||||||
|
addi r11, SP, 304
|
||||||
#else
|
#else
|
||||||
lwz r31, 144(SP)
|
lwz r31, 144(SP)
|
||||||
lwz r30, 148(SP)
|
lwz r30, 148(SP)
|
||||||
|
@ -389,10 +420,34 @@ L999:
|
||||||
lwz r15, 208(SP)
|
lwz r15, 208(SP)
|
||||||
lwz r14, 212(SP)
|
lwz r14, 212(SP)
|
||||||
lwz r13, 216(SP)
|
lwz r13, 216(SP)
|
||||||
|
addi r11, SP, 224
|
||||||
#endif
|
#endif
|
||||||
|
lvx v20, r11, r3
|
||||||
|
addi r11, r11, 16
|
||||||
|
lvx v21, r11, r3
|
||||||
|
addi r11, r11, 16
|
||||||
|
lvx v22, r11, r3
|
||||||
|
addi r11, r11, 16
|
||||||
|
lvx v23, r11, r3
|
||||||
|
addi r11, r11, 16
|
||||||
|
lvx v24, r11, r3
|
||||||
|
addi r11, r11, 16
|
||||||
|
lvx v25, r11, r3
|
||||||
|
addi r11, r11, 16
|
||||||
|
lvx v26, r11, r3
|
||||||
|
addi r11, r11, 16
|
||||||
|
lvx v27, r11, r3
|
||||||
|
addi r11, r11, 16
|
||||||
|
lvx v28, r11, r3
|
||||||
|
addi r11, r11, 16
|
||||||
|
lvx v29, r11, r3
|
||||||
|
addi r11, r11, 16
|
||||||
|
lvx v30, r11, r3
|
||||||
|
addi r11, r11, 16
|
||||||
|
lvx v31, r11, r3
|
||||||
|
li r11, 0
|
||||||
|
|
||||||
addi SP, SP, STACKSIZE
|
addi SP, SP, STACKSIZE
|
||||||
|
|
||||||
blr
|
blr
|
||||||
|
|
||||||
EPILOGUE
|
EPILOGUE
|
||||||
|
|
|
@ -83,12 +83,14 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
#ifdef __64BIT__
|
#ifdef __64BIT__
|
||||||
#define STACKSIZE 320
|
#define STACKSIZE 320
|
||||||
#define ALPHA_SP 296(SP)
|
#define STACKSIZE 512
|
||||||
#define FZERO 304(SP)
|
#define ALPHA_SP 296+192(SP)
|
||||||
|
#define FZERO 304+192(SP)
|
||||||
#else
|
#else
|
||||||
#define STACKSIZE 240
|
#define STACKSIZE 240
|
||||||
#define ALPHA_SP 224(SP)
|
#define STACKSIZE 440
|
||||||
#define FZERO 232(SP)
|
#define ALPHA_SP 224+200(SP)
|
||||||
|
#define FZERO 232+200(SP)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#define M r3
|
#define M r3
|
||||||
|
@ -210,6 +212,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
std r16, 264(SP)
|
std r16, 264(SP)
|
||||||
std r15, 272(SP)
|
std r15, 272(SP)
|
||||||
std r14, 280(SP)
|
std r14, 280(SP)
|
||||||
|
addi r11,SP,288
|
||||||
#else
|
#else
|
||||||
stw r31, 144(SP)
|
stw r31, 144(SP)
|
||||||
stw r30, 148(SP)
|
stw r30, 148(SP)
|
||||||
|
@ -229,7 +232,32 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
stw r16, 204(SP)
|
stw r16, 204(SP)
|
||||||
stw r15, 208(SP)
|
stw r15, 208(SP)
|
||||||
stw r14, 212(SP)
|
stw r14, 212(SP)
|
||||||
|
addi r11,SP,224
|
||||||
#endif
|
#endif
|
||||||
|
stvx v20, r11,r0
|
||||||
|
addi r11,r11,16
|
||||||
|
stvx v21, r11,r0
|
||||||
|
addi r11,r11,16
|
||||||
|
stvx v22, r11,r0
|
||||||
|
addi r11,r11,16
|
||||||
|
stvx v23, r11,r0
|
||||||
|
addi r11,r11,16
|
||||||
|
stvx v24, r11,r0
|
||||||
|
addi r11,r11,16
|
||||||
|
stvx v25, r11,r0
|
||||||
|
addi r11,r11,16
|
||||||
|
stvx v26, r11,r0
|
||||||
|
addi r11,r11,16
|
||||||
|
stvx v27, r11,r0
|
||||||
|
addi r11,r11,16
|
||||||
|
stvx v28, r11,r0
|
||||||
|
addi r11,r11,16
|
||||||
|
stvx v29, r11,r0
|
||||||
|
addi r11,r11,16
|
||||||
|
stvx v30, r11,r0
|
||||||
|
addi r11,r11,16
|
||||||
|
stvx v31, r11,r0
|
||||||
|
li r11,0
|
||||||
|
|
||||||
stfd f1, ALPHA_SP
|
stfd f1, ALPHA_SP
|
||||||
stw r0, FZERO
|
stw r0, FZERO
|
||||||
|
@ -269,9 +297,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
ble .L999_H1
|
ble .L999_H1
|
||||||
|
|
||||||
#ifdef __64BIT__
|
#ifdef __64BIT__
|
||||||
addi T1, SP, 296
|
addi T1, SP, 296+192
|
||||||
#else
|
#else
|
||||||
addi T1, SP, 224
|
addi T1, SP, 224+200
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
li PRE, 384
|
li PRE, 384
|
||||||
|
@ -334,6 +362,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
ld r16, 264(SP)
|
ld r16, 264(SP)
|
||||||
ld r15, 272(SP)
|
ld r15, 272(SP)
|
||||||
ld r14, 280(SP)
|
ld r14, 280(SP)
|
||||||
|
addi r11,SP,288
|
||||||
#else
|
#else
|
||||||
lwz r31, 144(SP)
|
lwz r31, 144(SP)
|
||||||
lwz r30, 148(SP)
|
lwz r30, 148(SP)
|
||||||
|
@ -353,10 +382,34 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
lwz r16, 204(SP)
|
lwz r16, 204(SP)
|
||||||
lwz r15, 208(SP)
|
lwz r15, 208(SP)
|
||||||
lwz r14, 212(SP)
|
lwz r14, 212(SP)
|
||||||
|
addi r11,SP,224
|
||||||
#endif
|
#endif
|
||||||
|
lvx v20, r11,r3
|
||||||
|
addi r11,r11,16
|
||||||
|
lvx v21, r11,r3
|
||||||
|
addi r11,r11,16
|
||||||
|
lvx v22, r11,r3
|
||||||
|
addi r11,r11,16
|
||||||
|
lvx v23, r11,r3
|
||||||
|
addi r11,r11,16
|
||||||
|
lvx v24, r11,r3
|
||||||
|
addi r11,r11,16
|
||||||
|
lvx v25, r11,r3
|
||||||
|
addi r11,r11,16
|
||||||
|
lvx v26, r11,r3
|
||||||
|
addi r11,r11,16
|
||||||
|
lvx v27, r11,r3
|
||||||
|
addi r11,r11,16
|
||||||
|
lvx v28, r11,r3
|
||||||
|
addi r11,r11,16
|
||||||
|
lvx v29, r11,r3
|
||||||
|
addi r11,r11,16
|
||||||
|
lvx v30, r11,r3
|
||||||
|
addi r11,r11,16
|
||||||
|
lvx v31, r11,r3
|
||||||
|
li r11,0
|
||||||
|
|
||||||
addi SP, SP, STACKSIZE
|
addi SP, SP, STACKSIZE
|
||||||
|
|
||||||
blr
|
blr
|
||||||
|
|
||||||
EPILOGUE
|
EPILOGUE
|
||||||
|
|
|
@ -110,12 +110,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#include "dgemm_ncopy_macros_4_power8.S"
|
#include "dgemm_ncopy_macros_4_power8.S"
|
||||||
|
|
||||||
#define STACKSIZE 384
|
#define STACKSIZE 384
|
||||||
|
#define STACKSIZE 576
|
||||||
|
|
||||||
PROLOGUE
|
PROLOGUE
|
||||||
PROFCODE
|
PROFCODE
|
||||||
|
|
||||||
addi SP, SP, -STACKSIZE
|
addi SP, SP, -STACKSIZE
|
||||||
|
//addi SP, SP, -208
|
||||||
li r0, 0
|
li r0, 0
|
||||||
|
|
||||||
stfd f14, 0(SP)
|
stfd f14, 0(SP)
|
||||||
|
@ -157,6 +158,32 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
std r15, 272(SP)
|
std r15, 272(SP)
|
||||||
std r14, 280(SP)
|
std r14, 280(SP)
|
||||||
|
|
||||||
|
addi r11,SP,288
|
||||||
|
stvx v20, r11,r0
|
||||||
|
addi r11,r11,16
|
||||||
|
stvx v21, r11,r0
|
||||||
|
addi r11,r11,16
|
||||||
|
stvx v22, r11,r0
|
||||||
|
addi r11,r11,16
|
||||||
|
stvx v23, r11,r0
|
||||||
|
addi r11,r11,16
|
||||||
|
stvx v24, r11,r0
|
||||||
|
addi r11,r11,16
|
||||||
|
stvx v25, r11,r0
|
||||||
|
addi r11,r11,16
|
||||||
|
stvx v26, r11,r0
|
||||||
|
addi r11,r11,16
|
||||||
|
stvx v27, r11,r0
|
||||||
|
addi r11,r11,16
|
||||||
|
stvx v28, r11,r0
|
||||||
|
addi r11,r11,16
|
||||||
|
stvx v29, r11,r0
|
||||||
|
addi r11,r11,16
|
||||||
|
stvx v30, r11,r0
|
||||||
|
addi r11,r11,16
|
||||||
|
stvx v31, r11,r0
|
||||||
|
li r11,0
|
||||||
|
|
||||||
cmpwi cr0, M, 0
|
cmpwi cr0, M, 0
|
||||||
ble- L999
|
ble- L999
|
||||||
cmpwi cr0, N, 0
|
cmpwi cr0, N, 0
|
||||||
|
@ -164,8 +191,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
slwi LDA, LDA, BASE_SHIFT
|
slwi LDA, LDA, BASE_SHIFT
|
||||||
|
|
||||||
li PREA, 384
|
//li PREA, 384
|
||||||
li PREB, 384
|
//li PREB, 384
|
||||||
|
li PREA, 576
|
||||||
|
li PREB, 576
|
||||||
|
|
||||||
|
|
||||||
li o8, 8
|
li o8, 8
|
||||||
li o16, 16
|
li o16, 16
|
||||||
|
@ -219,9 +249,34 @@ L999:
|
||||||
ld r16, 264(SP)
|
ld r16, 264(SP)
|
||||||
ld r15, 272(SP)
|
ld r15, 272(SP)
|
||||||
ld r14, 280(SP)
|
ld r14, 280(SP)
|
||||||
|
addi r11,SP,288
|
||||||
|
lvx v20, r11,r3
|
||||||
|
addi r11,r11,16
|
||||||
|
lvx v21, r11,r3
|
||||||
|
addi r11,r11,16
|
||||||
|
lvx v22, r11,r3
|
||||||
|
addi r11,r11,16
|
||||||
|
lvx v23, r11,r3
|
||||||
|
addi r11,r11,16
|
||||||
|
lvx v24, r11,r3
|
||||||
|
addi r11,r11,16
|
||||||
|
lvx v25, r11,r3
|
||||||
|
addi r11,r11,16
|
||||||
|
lvx v26, r11,r3
|
||||||
|
addi r11,r11,16
|
||||||
|
lvx v27, r11,r3
|
||||||
|
addi r11,r11,16
|
||||||
|
lvx v28, r11,r3
|
||||||
|
addi r11,r11,16
|
||||||
|
lvx v29, r11,r3
|
||||||
|
addi r11,r11,16
|
||||||
|
lvx v30, r11,r3
|
||||||
|
addi r11,r11,16
|
||||||
|
lvx v31, r11,r3
|
||||||
|
li r11,0
|
||||||
|
|
||||||
addi SP, SP, STACKSIZE
|
addi SP, SP, STACKSIZE
|
||||||
|
//addi SP, SP, 208
|
||||||
blr
|
blr
|
||||||
EPILOGUE
|
EPILOGUE
|
||||||
|
|
||||||
|
|
|
@ -110,12 +110,15 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#include "dgemm_tcopy_macros_16_power8.S"
|
#include "dgemm_tcopy_macros_16_power8.S"
|
||||||
|
|
||||||
#define STACKSIZE 384
|
#define STACKSIZE 384
|
||||||
|
#define STACKSIZE 576
|
||||||
|
|
||||||
|
|
||||||
PROLOGUE
|
PROLOGUE
|
||||||
PROFCODE
|
PROFCODE
|
||||||
|
|
||||||
addi SP, SP, -STACKSIZE
|
addi SP, SP, -STACKSIZE
|
||||||
|
//addi SP, SP, -208
|
||||||
|
|
||||||
li r0, 0
|
li r0, 0
|
||||||
|
|
||||||
std r31, 144(SP)
|
std r31, 144(SP)
|
||||||
|
@ -136,6 +139,31 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
std r16, 264(SP)
|
std r16, 264(SP)
|
||||||
std r15, 272(SP)
|
std r15, 272(SP)
|
||||||
std r14, 280(SP)
|
std r14, 280(SP)
|
||||||
|
addi r11,SP,288
|
||||||
|
stvx v20, r11,r0
|
||||||
|
addi r11,r11,16
|
||||||
|
stvx v21, r11,r0
|
||||||
|
addi r11,r11,16
|
||||||
|
stvx v22, r11,r0
|
||||||
|
addi r11,r11,16
|
||||||
|
stvx v23, r11,r0
|
||||||
|
addi r11,r11,16
|
||||||
|
stvx v24, r11,r0
|
||||||
|
addi r11,r11,16
|
||||||
|
stvx v25, r11,r0
|
||||||
|
addi r11,r11,16
|
||||||
|
stvx v26, r11,r0
|
||||||
|
addi r11,r11,16
|
||||||
|
stvx v27, r11,r0
|
||||||
|
addi r11,r11,16
|
||||||
|
stvx v28, r11,r0
|
||||||
|
addi r11,r11,16
|
||||||
|
stvx v29, r11,r0
|
||||||
|
addi r11,r11,16
|
||||||
|
stvx v30, r11,r0
|
||||||
|
addi r11,r11,16
|
||||||
|
stvx v31, r11,r0
|
||||||
|
li r11,0
|
||||||
|
|
||||||
cmpwi cr0, M, 0
|
cmpwi cr0, M, 0
|
||||||
ble- L999
|
ble- L999
|
||||||
|
@ -170,7 +198,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
add B2, B2, B
|
add B2, B2, B
|
||||||
add B1, B1, B
|
add B1, B1, B
|
||||||
|
|
||||||
li PREA, 384
|
//li PREA, 384
|
||||||
|
li PREA, 576
|
||||||
addi PREB, M16, 128
|
addi PREB, M16, 128
|
||||||
|
|
||||||
li o8, 8
|
li o8, 8
|
||||||
|
@ -202,9 +231,34 @@ L999:
|
||||||
ld r16, 264(SP)
|
ld r16, 264(SP)
|
||||||
ld r15, 272(SP)
|
ld r15, 272(SP)
|
||||||
ld r14, 280(SP)
|
ld r14, 280(SP)
|
||||||
|
addi r11,SP,288
|
||||||
|
lvx v20, r11,r3
|
||||||
|
addi r11,r11,16
|
||||||
|
lvx v21, r11,r3
|
||||||
|
addi r11,r11,16
|
||||||
|
lvx v22, r11,r3
|
||||||
|
addi r11,r11,16
|
||||||
|
lvx v23, r11,r3
|
||||||
|
addi r11,r11,16
|
||||||
|
lvx v24, r11,r3
|
||||||
|
addi r11,r11,16
|
||||||
|
lvx v25, r11,r3
|
||||||
|
addi r11,r11,16
|
||||||
|
lvx v26, r11,r3
|
||||||
|
addi r11,r11,16
|
||||||
|
lvx v27, r11,r3
|
||||||
|
addi r11,r11,16
|
||||||
|
lvx v28, r11,r3
|
||||||
|
addi r11,r11,16
|
||||||
|
lvx v29, r11,r3
|
||||||
|
addi r11,r11,16
|
||||||
|
lvx v30, r11,r3
|
||||||
|
addi r11,r11,16
|
||||||
|
lvx v31, r11,r3
|
||||||
|
li r11,0
|
||||||
|
|
||||||
addi SP, SP, STACKSIZE
|
addi SP, SP, STACKSIZE
|
||||||
|
//addi SP, SP, 208
|
||||||
blr
|
blr
|
||||||
EPILOGUE
|
EPILOGUE
|
||||||
|
|
||||||
|
|
|
@ -83,12 +83,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
#ifdef __64BIT__
|
#ifdef __64BIT__
|
||||||
#define STACKSIZE 320
|
#define STACKSIZE 320
|
||||||
#define ALPHA_SP 296(SP)
|
#define STACKSIZE 520
|
||||||
#define FZERO 304(SP)
|
#define ALPHA_SP 296+200(SP)
|
||||||
|
#define FZERO 304+200(SP)
|
||||||
#else
|
#else
|
||||||
#define STACKSIZE 240
|
#define STACKSIZE 436
|
||||||
#define ALPHA_SP 224(SP)
|
#define ALPHA_SP 224+196(SP)
|
||||||
#define FZERO 232(SP)
|
#define FZERO 232+196(SP)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#define M r3
|
#define M r3
|
||||||
|
@ -152,6 +153,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#define PRE r30
|
#define PRE r30
|
||||||
#define T2 r31
|
#define T2 r31
|
||||||
|
|
||||||
|
#define VECSAVE r11
|
||||||
|
|
||||||
#include "dtrmm_macros_16x4_power8.S"
|
#include "dtrmm_macros_16x4_power8.S"
|
||||||
|
|
||||||
|
|
||||||
|
@ -206,6 +209,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
std r15, 272(SP)
|
std r15, 272(SP)
|
||||||
std r14, 280(SP)
|
std r14, 280(SP)
|
||||||
std r13, 288(SP)
|
std r13, 288(SP)
|
||||||
|
addi r11, SP, 304
|
||||||
#else
|
#else
|
||||||
stw r31, 144(SP)
|
stw r31, 144(SP)
|
||||||
stw r30, 148(SP)
|
stw r30, 148(SP)
|
||||||
|
@ -226,7 +230,34 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
stw r15, 208(SP)
|
stw r15, 208(SP)
|
||||||
stw r14, 212(SP)
|
stw r14, 212(SP)
|
||||||
stw r13, 216(SP)
|
stw r13, 216(SP)
|
||||||
|
addi r11, r0, 224
|
||||||
#endif
|
#endif
|
||||||
|
stvx v20, r11, r0
|
||||||
|
addi r11, r11, 16
|
||||||
|
stvx v21, r11, r0
|
||||||
|
addi r11, r11, 16
|
||||||
|
stvx v22, r11, r0
|
||||||
|
addi r11, r11, 16
|
||||||
|
stvx v23, r11, r0
|
||||||
|
addi r11, r11, 16
|
||||||
|
stvx v24, r11, r0
|
||||||
|
addi r11, r11, 16
|
||||||
|
stvx v25, r11, r0
|
||||||
|
addi r11, r11, 16
|
||||||
|
stvx v26, r11, r0
|
||||||
|
addi r11 ,r11, 16
|
||||||
|
stvx v27, r11, r0
|
||||||
|
addi r11, r11, 16
|
||||||
|
stvx v28, r11, r0
|
||||||
|
addi r11, r11, 16
|
||||||
|
stvx v29, r11, r0
|
||||||
|
addi r11, r11, 16
|
||||||
|
stvx v30, r11, r0
|
||||||
|
addi r11, r11, 16
|
||||||
|
stvx v31, r11, r0
|
||||||
|
li r11,0
|
||||||
|
|
||||||
|
stw r31, 144(SP)
|
||||||
|
|
||||||
stfd f1, ALPHA_SP
|
stfd f1, ALPHA_SP
|
||||||
stw r0, FZERO
|
stw r0, FZERO
|
||||||
|
@ -270,9 +301,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
ble .L999_H1
|
ble .L999_H1
|
||||||
|
|
||||||
#ifdef __64BIT__
|
#ifdef __64BIT__
|
||||||
addi ALPHA, SP, 296
|
addi ALPHA, SP, 296+200
|
||||||
#else
|
#else
|
||||||
addi ALPHA, SP, 224
|
addi ALPHA, SP, 224+196
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
li PRE, 256
|
li PRE, 256
|
||||||
|
@ -332,6 +363,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
ld r15, 272(SP)
|
ld r15, 272(SP)
|
||||||
ld r14, 280(SP)
|
ld r14, 280(SP)
|
||||||
ld r13, 288(SP)
|
ld r13, 288(SP)
|
||||||
|
addi r11, SP, 304
|
||||||
#else
|
#else
|
||||||
lwz r31, 144(SP)
|
lwz r31, 144(SP)
|
||||||
lwz r30, 148(SP)
|
lwz r30, 148(SP)
|
||||||
|
@ -352,10 +384,34 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
lwz r15, 208(SP)
|
lwz r15, 208(SP)
|
||||||
lwz r14, 212(SP)
|
lwz r14, 212(SP)
|
||||||
lwz r13, 216(SP)
|
lwz r13, 216(SP)
|
||||||
|
addi r11, SP, 224
|
||||||
#endif
|
#endif
|
||||||
|
lvx v20, r11, r3
|
||||||
|
addi r11, r11, 16
|
||||||
|
lvx v21, r11, r3
|
||||||
|
addi r11, r11, 16
|
||||||
|
lvx v22, r11, r3
|
||||||
|
addi r11, r11, 16
|
||||||
|
lvx v23, r11, r3
|
||||||
|
addi r11, r11, 16
|
||||||
|
lvx v24, r11, r3
|
||||||
|
addi r11, r11, 16
|
||||||
|
lvx v25, r11, r3
|
||||||
|
addi r11, r11, 16
|
||||||
|
lvx v26, r11, r3
|
||||||
|
addi r11, r11, 16
|
||||||
|
lvx v27, r11, r3
|
||||||
|
addi r11, r11, 16
|
||||||
|
lvx v28, r11, r3
|
||||||
|
addi r11, r11, 16
|
||||||
|
lvx v29, r11, r3
|
||||||
|
addi r11, r11, 16
|
||||||
|
lvx v30, r11, r3
|
||||||
|
addi r11, r11, 16
|
||||||
|
lvx v31, r11, r3
|
||||||
|
li r11, 0
|
||||||
|
|
||||||
addi SP, SP, STACKSIZE
|
addi SP, SP, STACKSIZE
|
||||||
|
|
||||||
blr
|
blr
|
||||||
|
|
||||||
EPILOGUE
|
EPILOGUE
|
||||||
|
|
|
@ -48,8 +48,9 @@
|
||||||
|
|
||||||
#ifdef __64BIT__
|
#ifdef __64BIT__
|
||||||
#define STACKSIZE 320
|
#define STACKSIZE 320
|
||||||
#define ALPHA 296(SP)
|
#define STACKSIZE 520
|
||||||
#define FZERO 304(SP)
|
#define ALPHA 296+200(SP)
|
||||||
|
#define FZERO 304+200(SP)
|
||||||
#else
|
#else
|
||||||
#define STACKSIZE 240
|
#define STACKSIZE 240
|
||||||
#define ALPHA 224(SP)
|
#define ALPHA 224(SP)
|
||||||
|
@ -112,6 +113,8 @@
|
||||||
#define o48 r30
|
#define o48 r30
|
||||||
#define T1 r31
|
#define T1 r31
|
||||||
|
|
||||||
|
#define VECSAVE r11
|
||||||
|
|
||||||
#include "dtrsm_macros_LT_16x4_power8.S"
|
#include "dtrsm_macros_LT_16x4_power8.S"
|
||||||
|
|
||||||
#ifndef NEEDPARAM
|
#ifndef NEEDPARAM
|
||||||
|
@ -163,6 +166,7 @@
|
||||||
std r17, 256(SP)
|
std r17, 256(SP)
|
||||||
std r16, 264(SP)
|
std r16, 264(SP)
|
||||||
std r15, 272(SP)
|
std r15, 272(SP)
|
||||||
|
addi r11,SP,288
|
||||||
#else
|
#else
|
||||||
stw r31, 144(SP)
|
stw r31, 144(SP)
|
||||||
stw r30, 148(SP)
|
stw r30, 148(SP)
|
||||||
|
@ -178,7 +182,32 @@
|
||||||
stw r20, 188(SP)
|
stw r20, 188(SP)
|
||||||
stw r19, 192(SP)
|
stw r19, 192(SP)
|
||||||
stw r18, 196(SP)
|
stw r18, 196(SP)
|
||||||
|
addi r11,SP,208
|
||||||
#endif
|
#endif
|
||||||
|
stvx v20, r11,r0
|
||||||
|
addi r11,r11,16
|
||||||
|
stvx v21, r11,r0
|
||||||
|
addi r11,r11,16
|
||||||
|
stvx v22, r11,r0
|
||||||
|
addi r11,r11,16
|
||||||
|
stvx v23, r11,r0
|
||||||
|
addi r11,r11,16
|
||||||
|
stvx v24, r11,r0
|
||||||
|
addi r11,r11,16
|
||||||
|
stvx v25, r11,r0
|
||||||
|
addi r11,r11,16
|
||||||
|
stvx v26, r11,r0
|
||||||
|
addi r11,r11,16
|
||||||
|
stvx v27, r11,r0
|
||||||
|
addi r11,r11,16
|
||||||
|
stvx v28, r11,r0
|
||||||
|
addi r11,r11,16
|
||||||
|
stvx v29, r11,r0
|
||||||
|
addi r11,r11,16
|
||||||
|
stvx v30, r11,r0
|
||||||
|
addi r11,r11,16
|
||||||
|
stvx v31, r11,r0
|
||||||
|
li r11,0
|
||||||
|
|
||||||
|
|
||||||
#if defined(_AIX) || defined(__APPLE__)
|
#if defined(_AIX) || defined(__APPLE__)
|
||||||
|
@ -269,6 +298,7 @@ L999:
|
||||||
ld r17, 256(SP)
|
ld r17, 256(SP)
|
||||||
ld r16, 264(SP)
|
ld r16, 264(SP)
|
||||||
ld r15, 272(SP)
|
ld r15, 272(SP)
|
||||||
|
addi r11,SP,288
|
||||||
#else
|
#else
|
||||||
lwz r31, 144(SP)
|
lwz r31, 144(SP)
|
||||||
lwz r30, 148(SP)
|
lwz r30, 148(SP)
|
||||||
|
@ -284,10 +314,35 @@ L999:
|
||||||
lwz r20, 188(SP)
|
lwz r20, 188(SP)
|
||||||
lwz r19, 192(SP)
|
lwz r19, 192(SP)
|
||||||
lwz r18, 196(SP)
|
lwz r18, 196(SP)
|
||||||
|
addi r11,SP,208
|
||||||
#endif
|
#endif
|
||||||
|
lvx v20, r11,r3
|
||||||
|
addi r11,r11,16
|
||||||
|
lvx v21, r11,r3
|
||||||
|
addi r11,r11,16
|
||||||
|
lvx v22, r11,r3
|
||||||
|
addi r11,r11,16
|
||||||
|
lvx v23, r11,r3
|
||||||
|
addi r11,r11,16
|
||||||
|
lvx v24, r11,r3
|
||||||
|
addi r11,r11,16
|
||||||
|
lvx v25, r11,r3
|
||||||
|
addi r11,r11,16
|
||||||
|
lvx v26, r11,r3
|
||||||
|
addi r11,r11,16
|
||||||
|
lvx v27, r11,r3
|
||||||
|
addi r11,r11,16
|
||||||
|
lvx v28, r11,r3
|
||||||
|
addi r11,r11,16
|
||||||
|
lvx v29, r11,r3
|
||||||
|
addi r11,r11,16
|
||||||
|
lvx v30, r11,r3
|
||||||
|
addi r11,r11,16
|
||||||
|
lvx v31, r11,r3
|
||||||
|
li r11,0
|
||||||
|
|
||||||
|
|
||||||
addi SP, SP, STACKSIZE
|
addi SP, SP, STACKSIZE
|
||||||
|
|
||||||
blr
|
blr
|
||||||
|
|
||||||
EPILOGUE
|
EPILOGUE
|
||||||
|
|
|
@ -83,12 +83,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
#ifdef __64BIT__
|
#ifdef __64BIT__
|
||||||
#define STACKSIZE 32752
|
#define STACKSIZE 32752
|
||||||
#define ALPHA_SP 296(SP)
|
#define ALPHA_SP 296+192(SP)
|
||||||
#define FZERO 304(SP)
|
#define FZERO 304+192(SP)
|
||||||
#else
|
#else
|
||||||
#define STACKSIZE 240
|
#define STACKSIZE 440
|
||||||
#define ALPHA_SP 224(SP)
|
#define ALPHA_SP 224+200(SP)
|
||||||
#define FZERO 232(SP)
|
#define FZERO 232+200(SP)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#define M r3
|
#define M r3
|
||||||
|
@ -132,6 +132,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
#define o0 0
|
#define o0 0
|
||||||
|
|
||||||
|
#define VECSAVE r11
|
||||||
|
|
||||||
#define FRAMEPOINTER r12
|
#define FRAMEPOINTER r12
|
||||||
|
|
||||||
#define BBUFFER r14
|
#define BBUFFER r14
|
||||||
|
@ -211,6 +213,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
std r16, 264(SP)
|
std r16, 264(SP)
|
||||||
std r15, 272(SP)
|
std r15, 272(SP)
|
||||||
std r14, 280(SP)
|
std r14, 280(SP)
|
||||||
|
addi r11, SP, 288
|
||||||
#else
|
#else
|
||||||
stw r31, 144(SP)
|
stw r31, 144(SP)
|
||||||
stw r30, 148(SP)
|
stw r30, 148(SP)
|
||||||
|
@ -230,7 +233,33 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
stw r16, 204(SP)
|
stw r16, 204(SP)
|
||||||
stw r15, 208(SP)
|
stw r15, 208(SP)
|
||||||
stw r14, 212(SP)
|
stw r14, 212(SP)
|
||||||
|
addi r11, SP, 224
|
||||||
#endif
|
#endif
|
||||||
|
stvx v20, r11, r0
|
||||||
|
addi r11, r11, 16
|
||||||
|
stvx v21, r11, r0
|
||||||
|
addi r11, r11, 16
|
||||||
|
stvx v22, r11, r0
|
||||||
|
addi r11, r11, 16
|
||||||
|
stvx v23, r11, r0
|
||||||
|
addi r11, r11, 16
|
||||||
|
stvx v24, r11, r0
|
||||||
|
addi r11, r11, 16
|
||||||
|
stvx v25, r11, r0
|
||||||
|
addi r11, r11, 16
|
||||||
|
stvx v26, r11, r0
|
||||||
|
addi r11, r11, 16
|
||||||
|
stvx v27, r11, r0
|
||||||
|
addi r11, r11, 16
|
||||||
|
stvx v28, r11, r0
|
||||||
|
addi r11, r11, 16
|
||||||
|
stvx v29, r11, r0
|
||||||
|
addi r11, r11, 16
|
||||||
|
stvx v30, r11, r0
|
||||||
|
addi r11, r11, 16
|
||||||
|
stvx v31, r11, r0
|
||||||
|
li r11,0
|
||||||
|
|
||||||
|
|
||||||
// stfd f1, ALPHA_SP
|
// stfd f1, ALPHA_SP
|
||||||
// stw r0, FZERO
|
// stw r0, FZERO
|
||||||
|
@ -281,7 +310,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
li T1, -4096
|
li T1, -4096
|
||||||
and BBUFFER, BBUFFER, T1
|
and BBUFFER, BBUFFER, T1
|
||||||
|
|
||||||
addi T1, SP, 300
|
addi T1, SP, 300+192
|
||||||
stxsspx f1, o0 , T1
|
stxsspx f1, o0 , T1
|
||||||
stxsspx f1, o4 , T1
|
stxsspx f1, o4 , T1
|
||||||
stxsspx f1, o8 , T1
|
stxsspx f1, o8 , T1
|
||||||
|
@ -339,6 +368,7 @@ L999:
|
||||||
ld r16, 264(SP)
|
ld r16, 264(SP)
|
||||||
ld r15, 272(SP)
|
ld r15, 272(SP)
|
||||||
ld r14, 280(SP)
|
ld r14, 280(SP)
|
||||||
|
addi r11, SP, 288
|
||||||
#else
|
#else
|
||||||
lwz r31, 144(SP)
|
lwz r31, 144(SP)
|
||||||
lwz r30, 148(SP)
|
lwz r30, 148(SP)
|
||||||
|
@ -358,13 +388,38 @@ L999:
|
||||||
lwz r16, 204(SP)
|
lwz r16, 204(SP)
|
||||||
lwz r15, 208(SP)
|
lwz r15, 208(SP)
|
||||||
lwz r14, 212(SP)
|
lwz r14, 212(SP)
|
||||||
|
addi r11, SP, 224
|
||||||
#endif
|
#endif
|
||||||
|
lvx v20, r11, r3
|
||||||
|
addi r11, r11, 16
|
||||||
|
lvx v21, r11, r3
|
||||||
|
addi r11, r11, 16
|
||||||
|
lvx v22, r11, r3
|
||||||
|
addi r11, r11, 16
|
||||||
|
lvx v23, r11, r3
|
||||||
|
addi r11, r11, 16
|
||||||
|
lvx v24, r11, r3
|
||||||
|
addi r11, r11, 16
|
||||||
|
lvx v25, r11, r3
|
||||||
|
addi r11, r11, 16
|
||||||
|
lvx v26, r11, r3
|
||||||
|
addi r11, r11, 16
|
||||||
|
lvx v27, r11, r3
|
||||||
|
addi r11, r11, 16
|
||||||
|
lvx v28, r11, r3
|
||||||
|
addi r11, r11, 16
|
||||||
|
lvx v29, r11, r3
|
||||||
|
addi r11, r11, 16
|
||||||
|
lvx v30, r11, r3
|
||||||
|
addi r11, r11, 16
|
||||||
|
lvx v31, r11, r3
|
||||||
|
li r11, 0
|
||||||
|
|
||||||
|
|
||||||
addi SP, SP, STACKSIZE
|
addi SP, SP, STACKSIZE
|
||||||
addi SP, SP, STACKSIZE
|
addi SP, SP, STACKSIZE
|
||||||
addi SP, SP, STACKSIZE
|
addi SP, SP, STACKSIZE
|
||||||
addi SP, SP, STACKSIZE
|
addi SP, SP, STACKSIZE
|
||||||
|
|
||||||
blr
|
blr
|
||||||
|
|
||||||
EPILOGUE
|
EPILOGUE
|
||||||
|
|
|
@ -110,8 +110,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
#include "sgemm_tcopy_macros_16_power8.S"
|
#include "sgemm_tcopy_macros_16_power8.S"
|
||||||
|
|
||||||
#define STACKSIZE 384
|
#define STACKSIZE 576
|
||||||
|
|
||||||
|
|
||||||
PROLOGUE
|
PROLOGUE
|
||||||
PROFCODE
|
PROFCODE
|
||||||
|
@ -137,6 +136,31 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
std r16, 264(SP)
|
std r16, 264(SP)
|
||||||
std r15, 272(SP)
|
std r15, 272(SP)
|
||||||
std r14, 280(SP)
|
std r14, 280(SP)
|
||||||
|
addi r11 ,SP, 288
|
||||||
|
stvx v20, r11, r0
|
||||||
|
addi r11, r11, 16
|
||||||
|
stvx v21, r11, r0
|
||||||
|
addi r11, r11, 16
|
||||||
|
stvx v22, r11, r0
|
||||||
|
addi r11, r11, 16
|
||||||
|
stvx v23, r11, r0
|
||||||
|
addi r11, r11, 16
|
||||||
|
stvx v24, r11, r0
|
||||||
|
addi r11, r11, 16
|
||||||
|
stvx v25, r11, r0
|
||||||
|
addi r11, r11, 16
|
||||||
|
stvx v26, r11, r0
|
||||||
|
addi r11, r11, 16
|
||||||
|
stvx v27, r11, r0
|
||||||
|
addi r11, r11, 16
|
||||||
|
stvx v28, r11, r0
|
||||||
|
addi r11, r11, 16
|
||||||
|
stvx v29, r11, r0
|
||||||
|
addi r11, r11, 16
|
||||||
|
stvx v30, r11, r0
|
||||||
|
addi r11, r11, 16
|
||||||
|
stvx v31, r11, r0
|
||||||
|
li r11, 0
|
||||||
|
|
||||||
cmpwi cr0, M, 0
|
cmpwi cr0, M, 0
|
||||||
ble- L999
|
ble- L999
|
||||||
|
@ -203,9 +227,33 @@ L999:
|
||||||
ld r16, 264(SP)
|
ld r16, 264(SP)
|
||||||
ld r15, 272(SP)
|
ld r15, 272(SP)
|
||||||
ld r14, 280(SP)
|
ld r14, 280(SP)
|
||||||
|
addi r11, SP, 288
|
||||||
|
lvx v20, r11, r3
|
||||||
|
addi r11, r11, 16
|
||||||
|
lvx v21, r11, r3
|
||||||
|
addi r11, r11, 16
|
||||||
|
lvx v22, r11, r3
|
||||||
|
addi r11, r11, 16
|
||||||
|
lvx v23, r11, r3
|
||||||
|
addi r11, r11, 16
|
||||||
|
lvx v24, r11, r3
|
||||||
|
addi r11, r11, 16
|
||||||
|
lvx v25, r11, r3
|
||||||
|
addi r11, r11, 16
|
||||||
|
lvx v26, r11, r3
|
||||||
|
addi r11, r11, 16
|
||||||
|
lvx v27, r11, r3
|
||||||
|
addi r11, r11, 16
|
||||||
|
lvx v28, r11, r3
|
||||||
|
addi r11, r11, 16
|
||||||
|
lvx v29, r11, r3
|
||||||
|
addi r11, r11, 16
|
||||||
|
lvx v30, r11, r3
|
||||||
|
addi r11, r11, 16
|
||||||
|
lvx v31, r11, r3
|
||||||
|
li r11, 0
|
||||||
|
|
||||||
addi SP, SP, STACKSIZE
|
addi SP, SP, STACKSIZE
|
||||||
|
|
||||||
blr
|
blr
|
||||||
EPILOGUE
|
EPILOGUE
|
||||||
|
|
||||||
|
|
|
@ -110,8 +110,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
#include "sgemm_tcopy_macros_8_power8.S"
|
#include "sgemm_tcopy_macros_8_power8.S"
|
||||||
|
|
||||||
#define STACKSIZE 384
|
#define STACKSIZE 576
|
||||||
|
|
||||||
|
|
||||||
PROLOGUE
|
PROLOGUE
|
||||||
PROFCODE
|
PROFCODE
|
||||||
|
@ -137,6 +136,31 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
std r16, 264(SP)
|
std r16, 264(SP)
|
||||||
std r15, 272(SP)
|
std r15, 272(SP)
|
||||||
std r14, 280(SP)
|
std r14, 280(SP)
|
||||||
|
addi r11, SP, 288
|
||||||
|
stvx v20, r11, r0
|
||||||
|
addi r11, r11, 16
|
||||||
|
stvx v21, r11, r0
|
||||||
|
addi r11, r11, 16
|
||||||
|
stvx v22, r11, r0
|
||||||
|
addi r11, r11, 16
|
||||||
|
stvx v23, r11, r0
|
||||||
|
addi r11, r11, 16
|
||||||
|
stvx v24, r11, r0
|
||||||
|
addi r11, r11, 16
|
||||||
|
stvx v25, r11, r0
|
||||||
|
addi r11, r11, 16
|
||||||
|
stvx v26, r11, r0
|
||||||
|
addi r11, r11, 16
|
||||||
|
stvx v27, r11, r0
|
||||||
|
addi r11, r11, 16
|
||||||
|
stvx v28, r11, r0
|
||||||
|
addi r11, r11, 16
|
||||||
|
stvx v29, r11, r0
|
||||||
|
addi r11, r11, 16
|
||||||
|
stvx v30, r11, r0
|
||||||
|
addi r11, r11, 16
|
||||||
|
stvx v31, r11, r0
|
||||||
|
li r11, 0
|
||||||
|
|
||||||
cmpwi cr0, M, 0
|
cmpwi cr0, M, 0
|
||||||
ble- L999
|
ble- L999
|
||||||
|
@ -198,9 +222,33 @@ L999:
|
||||||
ld r16, 264(SP)
|
ld r16, 264(SP)
|
||||||
ld r15, 272(SP)
|
ld r15, 272(SP)
|
||||||
ld r14, 280(SP)
|
ld r14, 280(SP)
|
||||||
|
addi r11,SP,288
|
||||||
|
lvx v20, r11, r3
|
||||||
|
addi r11, r11, 16
|
||||||
|
lvx v21, r11, r3
|
||||||
|
addi r11, r11, 16
|
||||||
|
lvx v22, r11, r3
|
||||||
|
addi r11, r11, 16
|
||||||
|
lvx v23, r11, r3
|
||||||
|
addi r11, r11, 16
|
||||||
|
lvx v24, r11, r3
|
||||||
|
addi r11, r11, 16
|
||||||
|
lvx v25, r11, r3
|
||||||
|
addi r11, r11, 16
|
||||||
|
lvx v26, r11, r3
|
||||||
|
addi r11, r11, 16
|
||||||
|
lvx v27, r11, r3
|
||||||
|
addi r11, r11, 16
|
||||||
|
lvx v28, r11, r3
|
||||||
|
addi r11, r11, 16
|
||||||
|
lvx v29, r11, r3
|
||||||
|
addi r11, r11, 16
|
||||||
|
lvx v30, r11, r3
|
||||||
|
addi r11, r11, 16
|
||||||
|
lvx v31, r11, r3
|
||||||
|
li r11, 0
|
||||||
|
|
||||||
addi SP, SP, STACKSIZE
|
addi SP, SP, STACKSIZE
|
||||||
|
|
||||||
blr
|
blr
|
||||||
EPILOGUE
|
EPILOGUE
|
||||||
|
|
||||||
|
|
|
@ -83,8 +83,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
#ifdef __64BIT__
|
#ifdef __64BIT__
|
||||||
#define STACKSIZE 340
|
#define STACKSIZE 340
|
||||||
#define ALPHA_SP 296(SP)
|
#define STACKSIZE 540
|
||||||
#define FZERO 304(SP)
|
#define ALPHA_SP 296+200(SP)
|
||||||
|
#define FZERO 304+200(SP)
|
||||||
#else
|
#else
|
||||||
#define STACKSIZE 240
|
#define STACKSIZE 240
|
||||||
#define ALPHA_SP 224(SP)
|
#define ALPHA_SP 224(SP)
|
||||||
|
@ -132,6 +133,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
#define o0 0
|
#define o0 0
|
||||||
|
|
||||||
|
#define VECSAVE r11
|
||||||
|
|
||||||
#define TBUFFER r13
|
#define TBUFFER r13
|
||||||
#define o12 r14
|
#define o12 r14
|
||||||
#define o4 r15
|
#define o4 r15
|
||||||
|
@ -207,6 +210,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
std r15, 272(SP)
|
std r15, 272(SP)
|
||||||
std r14, 280(SP)
|
std r14, 280(SP)
|
||||||
std r13, 288(SP)
|
std r13, 288(SP)
|
||||||
|
addi r11, SP, 304
|
||||||
#else
|
#else
|
||||||
stw r31, 144(SP)
|
stw r31, 144(SP)
|
||||||
stw r30, 148(SP)
|
stw r30, 148(SP)
|
||||||
|
@ -226,8 +230,34 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
stw r16, 204(SP)
|
stw r16, 204(SP)
|
||||||
stw r15, 208(SP)
|
stw r15, 208(SP)
|
||||||
stw r14, 212(SP)
|
stw r14, 212(SP)
|
||||||
stw r13, 216(SP)
|
stw r13, 216(SP)
|
||||||
|
addi r11, SP, 224
|
||||||
#endif
|
#endif
|
||||||
|
stvx v20, r11, r0
|
||||||
|
addi r11, r11, 16
|
||||||
|
stvx v21, r11, r0
|
||||||
|
addi r11, r11, 16
|
||||||
|
stvx v22, r11, r0
|
||||||
|
addi r11, r11, 16
|
||||||
|
stvx v23, r11, r0
|
||||||
|
addi r11, r11, 16
|
||||||
|
stvx v24, r11, r0
|
||||||
|
addi r11, r11, 16
|
||||||
|
stvx v25, r11, r0
|
||||||
|
addi r11, r11, 16
|
||||||
|
stvx v26, r11, r0
|
||||||
|
addi r11, r11, 16
|
||||||
|
stvx v27, r11, r0
|
||||||
|
addi r11, r11, 16
|
||||||
|
stvx v28, r11, r0
|
||||||
|
addi r11, r11, 16
|
||||||
|
stvx v29, r11, r0
|
||||||
|
addi r11, r11, 16
|
||||||
|
stvx v30, r11, r0
|
||||||
|
addi r11, r11, 16
|
||||||
|
stvx v31, r11, r0
|
||||||
|
li r11, 0
|
||||||
|
|
||||||
|
|
||||||
// stfd f1, ALPHA_SP
|
// stfd f1, ALPHA_SP
|
||||||
// stw r0, FZERO
|
// stw r0, FZERO
|
||||||
|
@ -278,9 +308,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
li o16, 16
|
li o16, 16
|
||||||
li o32, 32
|
li o32, 32
|
||||||
li o48, 48
|
li o48, 48
|
||||||
addi TBUFFER, SP, 320
|
addi TBUFFER, SP, 320+200
|
||||||
|
|
||||||
addi T1, SP, 300
|
addi T1, SP, 300+200
|
||||||
stxsspx f1, o0 , T1
|
stxsspx f1, o0 , T1
|
||||||
stxsspx f1, o4 , T1
|
stxsspx f1, o4 , T1
|
||||||
stxsspx f1, o8 , T1
|
stxsspx f1, o8 , T1
|
||||||
|
@ -339,6 +369,7 @@ L999:
|
||||||
ld r15, 272(SP)
|
ld r15, 272(SP)
|
||||||
ld r14, 280(SP)
|
ld r14, 280(SP)
|
||||||
ld r13, 288(SP)
|
ld r13, 288(SP)
|
||||||
|
addi r11, SP, 304
|
||||||
#else
|
#else
|
||||||
lwz r31, 144(SP)
|
lwz r31, 144(SP)
|
||||||
lwz r30, 148(SP)
|
lwz r30, 148(SP)
|
||||||
|
@ -359,10 +390,34 @@ L999:
|
||||||
lwz r15, 208(SP)
|
lwz r15, 208(SP)
|
||||||
lwz r14, 212(SP)
|
lwz r14, 212(SP)
|
||||||
lwz r13, 216(SP)
|
lwz r13, 216(SP)
|
||||||
|
addi r11, SP, 224
|
||||||
#endif
|
#endif
|
||||||
|
lvx v20, r11, r3
|
||||||
|
addi r11, r11, 16
|
||||||
|
lvx v21, r11, r3
|
||||||
|
addi r11, r11, 16
|
||||||
|
lvx v22, r11, r3
|
||||||
|
addi r11, r11, 16
|
||||||
|
lvx v23, r11, r3
|
||||||
|
addi r11, r11, 16
|
||||||
|
lvx v24, r11, r3
|
||||||
|
addi r11, r11, 16
|
||||||
|
lvx v25, r11, r3
|
||||||
|
addi r11, r11, 16
|
||||||
|
lvx v26, r11, r3
|
||||||
|
addi r11, r11, 16
|
||||||
|
lvx v27, r11, r3
|
||||||
|
addi r11, r11, 16
|
||||||
|
lvx v28, r11, r3
|
||||||
|
addi r11, r11, 16
|
||||||
|
lvx v29, r11, r3
|
||||||
|
addi r11, r11, 16
|
||||||
|
lvx v30, r11, r3
|
||||||
|
addi r11, r11, 16
|
||||||
|
lvx v31, r11, r3
|
||||||
|
li r11, 0
|
||||||
|
|
||||||
addi SP, SP, STACKSIZE
|
addi SP, SP, STACKSIZE
|
||||||
|
|
||||||
blr
|
blr
|
||||||
|
|
||||||
EPILOGUE
|
EPILOGUE
|
||||||
|
|
|
@ -117,15 +117,15 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef __64BIT__
|
#ifdef __64BIT__
|
||||||
#define STACKSIZE 32000
|
#define STACKSIZE 32192
|
||||||
#define ALPHA_R_SP 296(SP)
|
#define ALPHA_R_SP 296+192(SP)
|
||||||
#define ALPHA_I_SP 304(SP)
|
#define ALPHA_I_SP 304+192(SP)
|
||||||
#define FZERO 312(SP)
|
#define FZERO 312+192(SP)
|
||||||
#else
|
#else
|
||||||
#define STACKSIZE 256
|
#define STACKSIZE 460
|
||||||
#define ALPHA_R_SP 224(SP)
|
#define ALPHA_R_SP 224+204(SP)
|
||||||
#define ALPHA_I_SP 232(SP)
|
#define ALPHA_I_SP 232+204(SP)
|
||||||
#define FZERO 240(SP)
|
#define FZERO 240+204(SP)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#define M r3
|
#define M r3
|
||||||
|
@ -168,6 +168,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#define alpha_r vs30
|
#define alpha_r vs30
|
||||||
#define alpha_i vs31
|
#define alpha_i vs31
|
||||||
|
|
||||||
|
#define VECSAVE r11
|
||||||
|
|
||||||
#define FRAMEPOINTER r12
|
#define FRAMEPOINTER r12
|
||||||
|
|
||||||
|
@ -245,6 +246,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
std r16, 264(SP)
|
std r16, 264(SP)
|
||||||
std r15, 272(SP)
|
std r15, 272(SP)
|
||||||
std r14, 280(SP)
|
std r14, 280(SP)
|
||||||
|
addi r11, SP, 288
|
||||||
#else
|
#else
|
||||||
stw r31, 144(SP)
|
stw r31, 144(SP)
|
||||||
stw r30, 148(SP)
|
stw r30, 148(SP)
|
||||||
|
@ -263,7 +265,32 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
stw r17, 200(SP)
|
stw r17, 200(SP)
|
||||||
stw r16, 204(SP)
|
stw r16, 204(SP)
|
||||||
stw r15, 208(SP)
|
stw r15, 208(SP)
|
||||||
|
addi r11, SP, 224
|
||||||
#endif
|
#endif
|
||||||
|
stvx v20, r11, r0
|
||||||
|
addi r11, r11, 16
|
||||||
|
stvx v21, r11, r0
|
||||||
|
addi r11, r11, 16
|
||||||
|
stvx v22, r11, r0
|
||||||
|
addi r11, r11, 16
|
||||||
|
stvx v23, r11, r0
|
||||||
|
addi r11, r11, 16
|
||||||
|
stvx v24, r11, r0
|
||||||
|
addi r11, r11, 16
|
||||||
|
stvx v25, r11, r0
|
||||||
|
addi r11, r11, 16
|
||||||
|
stvx v26, r11, r0
|
||||||
|
addi r11, r11, 16
|
||||||
|
stvx v27, r11, r0
|
||||||
|
addi r11, r11, 16
|
||||||
|
stvx v28, r11, r0
|
||||||
|
addi r11, r11, 16
|
||||||
|
stvx v29, r11, r0
|
||||||
|
addi r11, r11, 16
|
||||||
|
stvx v30, r11, r0
|
||||||
|
addi r11, r11, 16
|
||||||
|
stvx v31, r11, r0
|
||||||
|
li r11,0
|
||||||
|
|
||||||
stfd f1, ALPHA_R_SP
|
stfd f1, ALPHA_R_SP
|
||||||
stfd f2, ALPHA_I_SP
|
stfd f2, ALPHA_I_SP
|
||||||
|
@ -332,9 +359,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
and BBUFFER, BBUFFER, T1
|
and BBUFFER, BBUFFER, T1
|
||||||
|
|
||||||
#ifdef __64BIT__
|
#ifdef __64BIT__
|
||||||
addi ALPHA, SP, 296
|
addi ALPHA, SP, 296+192
|
||||||
#else
|
#else
|
||||||
addi ALPHA, SP, 224
|
addi ALPHA, SP, 224+192+12
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
lxsdx alpha_r, 0, ALPHA
|
lxsdx alpha_r, 0, ALPHA
|
||||||
|
@ -389,6 +416,7 @@ L999:
|
||||||
ld r16, 264(SP)
|
ld r16, 264(SP)
|
||||||
ld r15, 272(SP)
|
ld r15, 272(SP)
|
||||||
ld r14, 280(SP)
|
ld r14, 280(SP)
|
||||||
|
addi r11, SP, 288
|
||||||
#else
|
#else
|
||||||
lwz r31, 144(SP)
|
lwz r31, 144(SP)
|
||||||
lwz r30, 148(SP)
|
lwz r30, 148(SP)
|
||||||
|
@ -407,13 +435,37 @@ L999:
|
||||||
lwz r17, 200(SP)
|
lwz r17, 200(SP)
|
||||||
lwz r16, 204(SP)
|
lwz r16, 204(SP)
|
||||||
lwz r15, 208(SP)
|
lwz r15, 208(SP)
|
||||||
|
addi r11, SP, 224
|
||||||
#endif
|
#endif
|
||||||
|
lvx v20, r11, r3
|
||||||
|
addi r11, r11, 16
|
||||||
|
lvx v21, r11, r3
|
||||||
|
addi r11, r11, 16
|
||||||
|
lvx v22, r11, r3
|
||||||
|
addi r11, r11, 16
|
||||||
|
lvx v23, r11, r3
|
||||||
|
addi r11, r11, 16
|
||||||
|
lvx v24, r11, r3
|
||||||
|
addi r11, r11, 16
|
||||||
|
lvx v25, r11, r3
|
||||||
|
addi r11, r11, 16
|
||||||
|
lvx v26, r11, r3
|
||||||
|
addi r11, r11, 16
|
||||||
|
lvx v27, r11, r3
|
||||||
|
addi r11, r11, 16
|
||||||
|
lvx v28, r11, r3
|
||||||
|
addi r11, r11, 16
|
||||||
|
lvx v29, r11, r3
|
||||||
|
addi r11, r11, 16
|
||||||
|
lvx v30, r11, r3
|
||||||
|
addi r11, r11, 16
|
||||||
|
lvx v31, r11, r3
|
||||||
|
li r11, 0
|
||||||
|
|
||||||
addi SP, SP, STACKSIZE
|
addi SP, SP, STACKSIZE
|
||||||
addi SP, SP, STACKSIZE
|
addi SP, SP, STACKSIZE
|
||||||
addi SP, SP, STACKSIZE
|
addi SP, SP, STACKSIZE
|
||||||
addi SP, SP, STACKSIZE
|
addi SP, SP, STACKSIZE
|
||||||
|
|
||||||
blr
|
blr
|
||||||
|
|
||||||
EPILOGUE
|
EPILOGUE
|
||||||
|
|
|
@ -110,6 +110,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#include "zgemm_tcopy_macros_8_power8.S"
|
#include "zgemm_tcopy_macros_8_power8.S"
|
||||||
|
|
||||||
#define STACKSIZE 384
|
#define STACKSIZE 384
|
||||||
|
#define STACKSIZE 576
|
||||||
|
|
||||||
|
|
||||||
PROLOGUE
|
PROLOGUE
|
||||||
|
@ -136,6 +137,31 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
std r16, 264(SP)
|
std r16, 264(SP)
|
||||||
std r15, 272(SP)
|
std r15, 272(SP)
|
||||||
std r14, 280(SP)
|
std r14, 280(SP)
|
||||||
|
addi r11, SP ,288
|
||||||
|
stvx v20, r11, r0
|
||||||
|
addi r11, r11, 16
|
||||||
|
stvx v21, r11, r0
|
||||||
|
addi r11, r11, 16
|
||||||
|
stvx v22, r11, r0
|
||||||
|
addi r11, r11, 16
|
||||||
|
stvx v23, r11, r0
|
||||||
|
addi r11, r11, 16
|
||||||
|
stvx v24, r11, r0
|
||||||
|
addi r11, r11, 16
|
||||||
|
stvx v25, r11, r0
|
||||||
|
addi r11, r11, 16
|
||||||
|
stvx v26, r11, r0
|
||||||
|
addi r11, r11, 16
|
||||||
|
stvx v27, r11, r0
|
||||||
|
addi r11, r11, 16
|
||||||
|
stvx v28, r11, r0
|
||||||
|
addi r11, r11, 16
|
||||||
|
stvx v29, r11, r0
|
||||||
|
addi r11, r11, 16
|
||||||
|
stvx v30, r11, r0
|
||||||
|
addi r11, r11 ,16
|
||||||
|
stvx v31, r11, r0
|
||||||
|
li r11,0
|
||||||
|
|
||||||
cmpwi cr0, M, 0
|
cmpwi cr0, M, 0
|
||||||
ble- L999
|
ble- L999
|
||||||
|
@ -196,9 +222,33 @@ L999:
|
||||||
ld r16, 264(SP)
|
ld r16, 264(SP)
|
||||||
ld r15, 272(SP)
|
ld r15, 272(SP)
|
||||||
ld r14, 280(SP)
|
ld r14, 280(SP)
|
||||||
|
addi r11, SP, 288
|
||||||
|
lvx v20, r11,r3
|
||||||
|
addi r11, r11, 16
|
||||||
|
lvx v21, r11, r3
|
||||||
|
addi r11, r11, 16
|
||||||
|
lvx v22, r11, r3
|
||||||
|
addi r11, r11, 16
|
||||||
|
lvx v23, r11, r3
|
||||||
|
addi r11, r11, 16
|
||||||
|
lvx v24, r11, r3
|
||||||
|
addi r11, r11, 16
|
||||||
|
lvx v25, r11, r3
|
||||||
|
addi r11, r11, 16
|
||||||
|
lvx v26, r11, r3
|
||||||
|
addi r11, r11, 16
|
||||||
|
lvx v27, r11, r3
|
||||||
|
addi r11, r11, 16
|
||||||
|
lvx v28, r11, r3
|
||||||
|
addi r11, r11, 16
|
||||||
|
lvx v29, r11, r3
|
||||||
|
addi r11, r11, 16
|
||||||
|
lvx v30, r11, r3
|
||||||
|
addi r11, r11, 16
|
||||||
|
lvx v31, r11, r3
|
||||||
|
li r11,0
|
||||||
|
|
||||||
addi SP, SP, STACKSIZE
|
addi SP, SP, STACKSIZE
|
||||||
|
|
||||||
blr
|
blr
|
||||||
EPILOGUE
|
EPILOGUE
|
||||||
|
|
||||||
|
|
|
@ -1,3 +1,4 @@
|
||||||
|
|
||||||
/***************************************************************************
|
/***************************************************************************
|
||||||
Copyright (c) 2013-2016, The OpenBLAS Project
|
Copyright (c) 2013-2016, The OpenBLAS Project
|
||||||
All rights reserved.
|
All rights reserved.
|
||||||
|
@ -82,15 +83,15 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef __64BIT__
|
#ifdef __64BIT__
|
||||||
#define STACKSIZE 320
|
#define STACKSIZE 520
|
||||||
#define ALPHA_R_SP 296(SP)
|
#define ALPHA_R_SP 296+200(SP)
|
||||||
#define ALPHA_I_SP 304(SP)
|
#define ALPHA_I_SP 304+200(SP)
|
||||||
#define FZERO 312(SP)
|
#define FZERO 312+200(SP)
|
||||||
#else
|
#else
|
||||||
#define STACKSIZE 256
|
#define STACKSIZE 452
|
||||||
#define ALPHA_R_SP 224(SP)
|
#define ALPHA_R_SP 224+196(SP)
|
||||||
#define ALPHA_I_SP 232(SP)
|
#define ALPHA_I_SP 232+196(SP)
|
||||||
#define FZERO 240(SP)
|
#define FZERO 240+196(SP)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#define M r3
|
#define M r3
|
||||||
|
@ -133,6 +134,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#define alpha_r vs30
|
#define alpha_r vs30
|
||||||
#define alpha_i vs31
|
#define alpha_i vs31
|
||||||
|
|
||||||
|
#define VECSAVE r11
|
||||||
|
|
||||||
#define KKK r13
|
#define KKK r13
|
||||||
#define K1 r14
|
#define K1 r14
|
||||||
#define L r15
|
#define L r15
|
||||||
|
@ -204,6 +207,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
std r15, 272(SP)
|
std r15, 272(SP)
|
||||||
std r14, 280(SP)
|
std r14, 280(SP)
|
||||||
std r13, 288(SP)
|
std r13, 288(SP)
|
||||||
|
addi r11, SP, 304
|
||||||
#else
|
#else
|
||||||
stw r31, 144(SP)
|
stw r31, 144(SP)
|
||||||
stw r30, 148(SP)
|
stw r30, 148(SP)
|
||||||
|
@ -224,7 +228,32 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
stw r15, 208(SP)
|
stw r15, 208(SP)
|
||||||
stw r14, 212(SP)
|
stw r14, 212(SP)
|
||||||
stw r13, 216(SP)
|
stw r13, 216(SP)
|
||||||
|
addi r11, SP, 224
|
||||||
#endif
|
#endif
|
||||||
|
stvx v20, r11, r0
|
||||||
|
addi r11, r11, 16
|
||||||
|
stvx v21, r11, r0
|
||||||
|
addi r11, r11, 16
|
||||||
|
stvx v22, r11, r0
|
||||||
|
addi r11, r11, 16
|
||||||
|
stvx v23, r11, r0
|
||||||
|
addi r11, r11, 16
|
||||||
|
stvx v24, r11, r0
|
||||||
|
addi r11, r11, 16
|
||||||
|
stvx v25, r11, r0
|
||||||
|
addi r11, r11, 16
|
||||||
|
stvx v26, r11, r0
|
||||||
|
addi r11, r11, 16
|
||||||
|
stvx v27, r11, r0
|
||||||
|
addi r11, r11, 16
|
||||||
|
stvx v28, r11, r0
|
||||||
|
addi r11, r11, 16
|
||||||
|
stvx v29, r11, r0
|
||||||
|
addi r11, r11, 16
|
||||||
|
stvx v30, r11, r0
|
||||||
|
addi r11, r11, 16
|
||||||
|
stvx v31, r11, r0
|
||||||
|
li r11, 0
|
||||||
|
|
||||||
stfd f1, ALPHA_R_SP
|
stfd f1, ALPHA_R_SP
|
||||||
stfd f2, ALPHA_I_SP
|
stfd f2, ALPHA_I_SP
|
||||||
|
@ -289,9 +318,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
li o48 , 48
|
li o48 , 48
|
||||||
|
|
||||||
#ifdef __64BIT__
|
#ifdef __64BIT__
|
||||||
addi ALPHA, SP, 296
|
addi ALPHA, SP, 296+200
|
||||||
#else
|
#else
|
||||||
addi ALPHA, SP, 224
|
addi ALPHA, SP, 224+196
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
lxsdx alpha_r, 0, ALPHA
|
lxsdx alpha_r, 0, ALPHA
|
||||||
|
@ -347,6 +376,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
ld r15, 272(SP)
|
ld r15, 272(SP)
|
||||||
ld r14, 280(SP)
|
ld r14, 280(SP)
|
||||||
ld r13, 288(SP)
|
ld r13, 288(SP)
|
||||||
|
addi r11, SP, 304
|
||||||
#else
|
#else
|
||||||
lwz r31, 144(SP)
|
lwz r31, 144(SP)
|
||||||
lwz r30, 148(SP)
|
lwz r30, 148(SP)
|
||||||
|
@ -367,10 +397,34 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
lwz r15, 208(SP)
|
lwz r15, 208(SP)
|
||||||
lwz r14, 212(SP)
|
lwz r14, 212(SP)
|
||||||
lwz r13, 216(SP)
|
lwz r13, 216(SP)
|
||||||
|
addi r11, SP, 224
|
||||||
#endif
|
#endif
|
||||||
|
lvx v20, r11, r3
|
||||||
|
addi r11, r11, 16
|
||||||
|
lvx v21, r11, r3
|
||||||
|
addi r11, r11, 16
|
||||||
|
lvx v22, r11, r3
|
||||||
|
addi r11, r11, 16
|
||||||
|
lvx v23, r11, r3
|
||||||
|
addi r11, r11, 16
|
||||||
|
lvx v24, r11, r3
|
||||||
|
addi r11, r11, 16
|
||||||
|
lvx v25, r11, r3
|
||||||
|
addi r11, r11, 16
|
||||||
|
lvx v26, r11, r3
|
||||||
|
addi r11, r11, 16
|
||||||
|
lvx v27, r11, r3
|
||||||
|
addi r11, r11, 16
|
||||||
|
lvx v28, r11, r3
|
||||||
|
addi r11, r11, 16
|
||||||
|
lvx v29, r11, r3
|
||||||
|
addi r11, r11, 16
|
||||||
|
lvx v30, r11, r3
|
||||||
|
addi r11, r11, 16
|
||||||
|
lvx v31, r11, r3
|
||||||
|
li r11, 0
|
||||||
|
|
||||||
addi SP, SP, STACKSIZE
|
addi SP, SP, STACKSIZE
|
||||||
|
|
||||||
blr
|
blr
|
||||||
|
|
||||||
EPILOGUE
|
EPILOGUE
|
||||||
|
|
Loading…
Reference in New Issue