Fix stack-pointer bug for strmm.
This commit is contained in:
parent
a15bc95824
commit
23e182ca7c
|
@ -3,7 +3,7 @@
|
||||||
#include "common.h"
|
#include "common.h"
|
||||||
|
|
||||||
#define FETCH ld
|
#define FETCH ld
|
||||||
#define STACKSIZE 192
|
#define STACKSIZE 160
|
||||||
#define gsLQC1(base,fq,ft,offset) .word(0x32<<26|base<<21|ft<<16|0x1<<15|offset<<6|0x1<<5|fq)
|
#define gsLQC1(base,fq,ft,offset) .word(0x32<<26|base<<21|ft<<16|0x1<<15|offset<<6|0x1<<5|fq)
|
||||||
#define gsSQC1(base,fq,ft,offset) .word(0x3A<<26|base<<21|ft<<16|0x1<<15|offset<<6|0x1<<5|fq)
|
#define gsSQC1(base,fq,ft,offset) .word(0x3A<<26|base<<21|ft<<16|0x1<<15|offset<<6|0x1<<5|fq)
|
||||||
|
|
||||||
|
@ -127,7 +127,7 @@
|
||||||
# .ent gemm
|
# .ent gemm
|
||||||
# .type gemm, @function
|
# .type gemm, @function
|
||||||
#gemm:
|
#gemm:
|
||||||
# .frame $fp,STACKSIZE,$31 # vars= 48, regs= 1/0, args= 0, gp= 0
|
# .frame $sp,STACKSIZE,$31 # vars= 48, regs= 1/0, args= 0, gp= 0
|
||||||
# .mask 0x40000000,-8
|
# .mask 0x40000000,-8
|
||||||
# .fmask 0x00000000,0
|
# .fmask 0x00000000,0
|
||||||
# .set noreorder
|
# .set noreorder
|
||||||
|
@ -137,34 +137,34 @@
|
||||||
PROLOGUE
|
PROLOGUE
|
||||||
|
|
||||||
daddiu $sp,$sp,-STACKSIZE
|
daddiu $sp,$sp,-STACKSIZE
|
||||||
sd $fp,184($sp)
|
|
||||||
move $fp,$sp
|
|
||||||
|
|
||||||
sd $16, 0($fp)
|
sd $16, 0($sp)
|
||||||
sd $17, 8($fp)
|
sd $17, 8($sp)
|
||||||
sd $18, 16($fp)
|
sd $18, 16($sp)
|
||||||
sd $19, 24($fp)
|
sd $19, 24($sp)
|
||||||
sd $20, 32($fp)
|
sd $20, 32($sp)
|
||||||
sd $21, 40($fp)
|
sd $21, 40($sp)
|
||||||
sd $22, 48($fp)
|
sd $22, 48($sp)
|
||||||
|
|
||||||
ST $f24, 56($fp)
|
ST $f24, 56($sp)
|
||||||
ST $f25, 64($fp)
|
ST $f25, 64($sp)
|
||||||
ST $f26, 72($fp)
|
ST $f26, 72($sp)
|
||||||
ST $f27, 80($fp)
|
ST $f27, 80($sp)
|
||||||
ST $f28, 88($fp)
|
ST $f28, 88($sp)
|
||||||
|
|
||||||
#if defined(TRMMKERNEL)
|
#if defined(TRMMKERNEL)
|
||||||
sd $23, 96($fp)
|
sd $23, 96($sp)
|
||||||
sd $24, 104($fp)
|
sd $24, 104($sp)
|
||||||
sd $25, 112($fp)
|
sd $25, 112($sp)
|
||||||
|
|
||||||
|
LDARG OFFSET, 160($sp)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifndef __64BIT__
|
#ifndef __64BIT__
|
||||||
ST $f20,120($fp)
|
ST $f20,120($sp)
|
||||||
ST $f21,128($fp)
|
ST $f21,128($sp)
|
||||||
ST $f22,136($fp)
|
ST $f22,136($sp)
|
||||||
ST $f23,144($fp)
|
ST $f23,144($sp)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
.align 4
|
.align 4
|
||||||
|
@ -172,16 +172,12 @@
|
||||||
dsra J, N, 2 # NR=4
|
dsra J, N, 2 # NR=4
|
||||||
dsll LDC, LDC, BASE_SHIFT# LDC*SIZE
|
dsll LDC, LDC, BASE_SHIFT# LDC*SIZE
|
||||||
|
|
||||||
#if defined(TRMMKERNEL)
|
|
||||||
LD OFFSET, 192($fp)
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if defined(TRMMKERNEL) && !defined(LEFT)
|
#if defined(TRMMKERNEL) && !defined(LEFT)
|
||||||
neg KK, OFFSET
|
neg KK, OFFSET
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
blez J, .L2
|
blez J, .L2
|
||||||
ST ALPHA, 152($fp)
|
ST ALPHA, 152($sp)
|
||||||
|
|
||||||
.L48:
|
.L48:
|
||||||
dsra I, M, 3 # MR=8
|
dsra I, M, 3 # MR=8
|
||||||
|
@ -4670,7 +4666,7 @@
|
||||||
andi L, TEMP, 1
|
andi L, TEMP, 1
|
||||||
#endif
|
#endif
|
||||||
blez L, .L480
|
blez L, .L480
|
||||||
LD ALPHA, 152($fp)
|
LD ALPHA, 152($sp)
|
||||||
|
|
||||||
MADPS C11, C11, A1, B1
|
MADPS C11, C11, A1, B1
|
||||||
MADPS C21, C21, A2, B1
|
MADPS C21, C21, A2, B1
|
||||||
|
@ -5273,7 +5269,7 @@
|
||||||
andi L, TEMP, 1
|
andi L, TEMP, 1
|
||||||
#endif
|
#endif
|
||||||
blez L, .L440
|
blez L, .L440
|
||||||
LD ALPHA, 152($fp)
|
LD ALPHA, 152($sp)
|
||||||
|
|
||||||
MADPS C11, C11, A1, B1
|
MADPS C11, C11, A1, B1
|
||||||
MADPS C21, C21, A2, B1
|
MADPS C21, C21, A2, B1
|
||||||
|
@ -5653,7 +5649,7 @@
|
||||||
andi L, TEMP, 1
|
andi L, TEMP, 1
|
||||||
#endif
|
#endif
|
||||||
blez L, .L420
|
blez L, .L420
|
||||||
LD ALPHA, 152($fp)
|
LD ALPHA, 152($sp)
|
||||||
|
|
||||||
MADPS C11, C11, A1, B1
|
MADPS C11, C11, A1, B1
|
||||||
MADPS C12, C12, A1, B2
|
MADPS C12, C12, A1, B2
|
||||||
|
@ -5968,7 +5964,7 @@
|
||||||
andi L, TEMP, 1
|
andi L, TEMP, 1
|
||||||
#endif
|
#endif
|
||||||
blez L, .L410
|
blez L, .L410
|
||||||
LD ALPHA, 152($fp)
|
LD ALPHA, 152($sp)
|
||||||
|
|
||||||
MADD C11, C11, A1, B1
|
MADD C11, C11, A1, B1
|
||||||
MADD C12, C12, A1, B2
|
MADD C12, C12, A1, B2
|
||||||
|
@ -6258,7 +6254,7 @@
|
||||||
andi L, TEMP, 1
|
andi L, TEMP, 1
|
||||||
#endif
|
#endif
|
||||||
blez L, .L280
|
blez L, .L280
|
||||||
LD ALPHA, 152($fp)
|
LD ALPHA, 152($sp)
|
||||||
|
|
||||||
MADD C13, C13, A5, B1
|
MADD C13, C13, A5, B1
|
||||||
MADD C23, C23, A6, B1
|
MADD C23, C23, A6, B1
|
||||||
|
@ -6574,7 +6570,7 @@
|
||||||
andi L, TEMP, 1
|
andi L, TEMP, 1
|
||||||
#endif
|
#endif
|
||||||
blez L, .L240
|
blez L, .L240
|
||||||
LD ALPHA, 152($fp)
|
LD ALPHA, 152($sp)
|
||||||
|
|
||||||
MADD C11, C11, A1, B1
|
MADD C11, C11, A1, B1
|
||||||
MADD C21, C21, A2, B1
|
MADD C21, C21, A2, B1
|
||||||
|
@ -6784,7 +6780,7 @@
|
||||||
andi L, TEMP, 1
|
andi L, TEMP, 1
|
||||||
#endif
|
#endif
|
||||||
blez L, .L220
|
blez L, .L220
|
||||||
LD ALPHA, 152($fp)
|
LD ALPHA, 152($sp)
|
||||||
|
|
||||||
MADD C11, C11, A1, B1
|
MADD C11, C11, A1, B1
|
||||||
MADD C21, C21, A2, B1
|
MADD C21, C21, A2, B1
|
||||||
|
@ -6953,7 +6949,7 @@
|
||||||
andi L, TEMP, 1
|
andi L, TEMP, 1
|
||||||
#endif
|
#endif
|
||||||
blez L, .L210
|
blez L, .L210
|
||||||
LD ALPHA, 152($fp)
|
LD ALPHA, 152($sp)
|
||||||
|
|
||||||
MADD C11, C11, A1, B1
|
MADD C11, C11, A1, B1
|
||||||
MADD C12, C12, A1, B2
|
MADD C12, C12, A1, B2
|
||||||
|
@ -7204,7 +7200,7 @@
|
||||||
andi L, TEMP, 1
|
andi L, TEMP, 1
|
||||||
#endif
|
#endif
|
||||||
blez L, .L180
|
blez L, .L180
|
||||||
LD ALPHA, 152($fp)
|
LD ALPHA, 152($sp)
|
||||||
|
|
||||||
MADD C13, C13, A5, B1
|
MADD C13, C13, A5, B1
|
||||||
MADD C23, C23, A6, B1
|
MADD C23, C23, A6, B1
|
||||||
|
@ -7435,7 +7431,7 @@
|
||||||
andi L, TEMP, 1
|
andi L, TEMP, 1
|
||||||
#endif
|
#endif
|
||||||
blez L, .L140
|
blez L, .L140
|
||||||
LD ALPHA, 152($fp)
|
LD ALPHA, 152($sp)
|
||||||
|
|
||||||
MADD C11, C11, A1, B1
|
MADD C11, C11, A1, B1
|
||||||
MADD C21, C21, A2, B1
|
MADD C21, C21, A2, B1
|
||||||
|
@ -7597,7 +7593,7 @@
|
||||||
andi L, TEMP, 1
|
andi L, TEMP, 1
|
||||||
#endif
|
#endif
|
||||||
blez L, .L120
|
blez L, .L120
|
||||||
LD ALPHA, 152($fp)
|
LD ALPHA, 152($sp)
|
||||||
|
|
||||||
MADD C11, C11, A1, B1
|
MADD C11, C11, A1, B1
|
||||||
MADD C21, C21, A2, B1
|
MADD C21, C21, A2, B1
|
||||||
|
@ -7730,7 +7726,7 @@
|
||||||
andi L, TEMP, 1
|
andi L, TEMP, 1
|
||||||
#endif
|
#endif
|
||||||
blez L, .L110
|
blez L, .L110
|
||||||
LD ALPHA, 152($fp)
|
LD ALPHA, 152($sp)
|
||||||
|
|
||||||
MADD C11, C11, A1, B1
|
MADD C11, C11, A1, B1
|
||||||
daddiu AO, AO, 1 * SIZE
|
daddiu AO, AO, 1 * SIZE
|
||||||
|
@ -7762,35 +7758,33 @@
|
||||||
NOP
|
NOP
|
||||||
|
|
||||||
.L999:
|
.L999:
|
||||||
ld $16, 0($fp)
|
ld $16, 0($sp)
|
||||||
ld $17, 8($fp)
|
ld $17, 8($sp)
|
||||||
ld $18, 16($fp)
|
ld $18, 16($sp)
|
||||||
ld $19, 24($fp)
|
ld $19, 24($sp)
|
||||||
ld $20, 32($fp)
|
ld $20, 32($sp)
|
||||||
ld $21, 40($fp)
|
ld $21, 40($sp)
|
||||||
ld $22, 48($fp)
|
ld $22, 48($sp)
|
||||||
|
|
||||||
LD $f24, 56($fp)
|
LD $f24, 56($sp)
|
||||||
LD $f25, 64($fp)
|
LD $f25, 64($sp)
|
||||||
LD $f26, 72($fp)
|
LD $f26, 72($sp)
|
||||||
LD $f27, 80($fp)
|
LD $f27, 80($sp)
|
||||||
LD $f28, 88($fp)
|
LD $f28, 88($sp)
|
||||||
|
|
||||||
#if defined(TRMMKERNEL)
|
#if defined(TRMMKERNEL)
|
||||||
ld $23, 96($fp)
|
ld $23, 96($sp)
|
||||||
ld $24, 104($fp)
|
ld $24, 104($sp)
|
||||||
ld $25, 112($fp)
|
ld $25, 112($sp)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifndef __64BIT__
|
#ifndef __64BIT__
|
||||||
LD $f20,120($fp)
|
LD $f20,120($sp)
|
||||||
LD $f21,128($fp)
|
LD $f21,128($sp)
|
||||||
LD $f22,136($fp)
|
LD $f22,136($sp)
|
||||||
LD $f23,144($fp)
|
LD $f23,144($sp)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
move $sp,$fp
|
|
||||||
ld $fp,184($sp)
|
|
||||||
daddiu $sp,$sp,STACKSIZE
|
daddiu $sp,$sp,STACKSIZE
|
||||||
j $31
|
j $31
|
||||||
nop
|
nop
|
||||||
|
|
Loading…
Reference in New Issue