Fixed overflow internal buffer bug of (s/d/c/z)gemv on x86.
This commit is contained in:
parent
947457fb7c
commit
6a72840945
|
@ -101,10 +101,10 @@
|
||||||
#define Y 36 + STACKSIZE+ARGS(%esp)
|
#define Y 36 + STACKSIZE+ARGS(%esp)
|
||||||
#define STACK_INCY 40 + STACKSIZE+ARGS(%esp)
|
#define STACK_INCY 40 + STACKSIZE+ARGS(%esp)
|
||||||
#define BUFFER 44 + STACKSIZE+ARGS(%esp)
|
#define BUFFER 44 + STACKSIZE+ARGS(%esp)
|
||||||
|
|
||||||
#define MMM 0+ARGS(%esp)
|
#define MMM 0+ARGS(%esp)
|
||||||
#define YY 4+ARGS(%esp)
|
#define YY 4+ARGS(%esp)
|
||||||
#define AA 8+ARGS(%esp)
|
#define AA 8+ARGS(%esp)
|
||||||
#define LDAX 12+ARGS(%esp)
|
|
||||||
|
|
||||||
#define I %eax
|
#define I %eax
|
||||||
#define J %ebx
|
#define J %ebx
|
||||||
|
@ -153,8 +153,8 @@
|
||||||
|
|
||||||
movl YY,J
|
movl YY,J
|
||||||
movl J,Y
|
movl J,Y
|
||||||
movl STACK_LDA, LDA
|
|
||||||
|
|
||||||
|
movl STACK_LDA, LDA
|
||||||
movl STACK_X, X
|
movl STACK_X, X
|
||||||
movl STACK_INCX, INCX
|
movl STACK_INCX, INCX
|
||||||
|
|
||||||
|
@ -688,9 +688,9 @@
|
||||||
movl M,J
|
movl M,J
|
||||||
leal (,J,SIZE),%eax
|
leal (,J,SIZE),%eax
|
||||||
addl %eax,AA
|
addl %eax,AA
|
||||||
movl YY,J
|
movl STACK_INCY,INCY
|
||||||
addl %eax,J
|
imull INCY,%eax
|
||||||
movl J,YY
|
addl %eax,YY
|
||||||
jmp .L0t
|
jmp .L0t
|
||||||
ALIGN_4
|
ALIGN_4
|
||||||
|
|
||||||
|
|
|
@ -714,9 +714,9 @@
|
||||||
movl M,J
|
movl M,J
|
||||||
leal (,J,SIZE),%eax
|
leal (,J,SIZE),%eax
|
||||||
addl %eax,AA
|
addl %eax,AA
|
||||||
movl YY,J
|
movl STACK_INCY,INCY
|
||||||
addl %eax,J
|
imull INCY,%eax
|
||||||
movl J,YY
|
addl %eax,YY
|
||||||
jmp .L0t
|
jmp .L0t
|
||||||
ALIGN_4
|
ALIGN_4
|
||||||
|
|
||||||
|
|
|
@ -102,11 +102,9 @@
|
||||||
#define STACK_INCY 40 + STACKSIZE+ARGS(%esp)
|
#define STACK_INCY 40 + STACKSIZE+ARGS(%esp)
|
||||||
#define BUFFER 44 + STACKSIZE+ARGS(%esp)
|
#define BUFFER 44 + STACKSIZE+ARGS(%esp)
|
||||||
|
|
||||||
#define MMM 0+STACKSIZE(%esp)
|
#define MMM 0+ARGS(%esp)
|
||||||
#define NN 4+STACKSIZE(%esp)
|
#define AA 4+ARGS(%esp)
|
||||||
#define AA 8+STACKSIZE(%esp)
|
#define XX 8+ARGS(%esp)
|
||||||
#define LDAX 12+STACKSIZE(%esp)
|
|
||||||
#define XX 16+STACKSIZE(%esp)
|
|
||||||
|
|
||||||
#define I %eax
|
#define I %eax
|
||||||
#define J %ebx
|
#define J %ebx
|
||||||
|
@ -129,12 +127,8 @@
|
||||||
|
|
||||||
PROFCODE
|
PROFCODE
|
||||||
|
|
||||||
movl STACK_LDA, LDA
|
|
||||||
movl LDA,LDAX # backup LDA
|
|
||||||
movl STACK_X, X
|
movl STACK_X, X
|
||||||
movl X,XX
|
movl X,XX
|
||||||
movl N,J
|
|
||||||
movl J,NN # backup N
|
|
||||||
movl A,J
|
movl A,J
|
||||||
movl J,AA # backup A
|
movl J,AA # backup A
|
||||||
movl M,J
|
movl M,J
|
||||||
|
@ -144,7 +138,6 @@
|
||||||
addl $1,J
|
addl $1,J
|
||||||
sall $22,J # J=2^24*sizeof(float)=buffer size(16MB)
|
sall $22,J # J=2^24*sizeof(float)=buffer size(16MB)
|
||||||
subl $8, J # Don't use last 8 float in the buffer.
|
subl $8, J # Don't use last 8 float in the buffer.
|
||||||
# Now, split M by block J
|
|
||||||
subl J,MMM # MMM=MMM-J
|
subl J,MMM # MMM=MMM-J
|
||||||
movl J,M
|
movl J,M
|
||||||
jge .L00t
|
jge .L00t
|
||||||
|
@ -159,13 +152,10 @@
|
||||||
movl AA,%eax
|
movl AA,%eax
|
||||||
movl %eax,A # mov AA to A
|
movl %eax,A # mov AA to A
|
||||||
|
|
||||||
movl NN,%eax
|
movl XX,%eax
|
||||||
movl %eax,N # reset N
|
movl %eax,X
|
||||||
|
|
||||||
|
|
||||||
movl LDAX, LDA # reset LDA
|
|
||||||
movl XX,X
|
|
||||||
|
|
||||||
|
movl STACK_LDA, LDA
|
||||||
movl STACK_INCX, INCX
|
movl STACK_INCX, INCX
|
||||||
movl STACK_INCY, INCY
|
movl STACK_INCY, INCY
|
||||||
|
|
||||||
|
@ -688,9 +678,9 @@
|
||||||
movl M,J
|
movl M,J
|
||||||
leal (,J,SIZE),%eax
|
leal (,J,SIZE),%eax
|
||||||
addl %eax,AA
|
addl %eax,AA
|
||||||
movl XX,J
|
movl STACK_INCX,INCX
|
||||||
addl %eax,J
|
imull INCX,%eax
|
||||||
movl J,XX
|
addl %eax,XX
|
||||||
jmp .L0t
|
jmp .L0t
|
||||||
ALIGN_4
|
ALIGN_4
|
||||||
|
|
||||||
|
|
|
@ -76,7 +76,7 @@
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#define STACKSIZE 16
|
#define STACKSIZE 16
|
||||||
#define ARGS 16
|
#define ARGS 20
|
||||||
|
|
||||||
#define M 4 + STACKSIZE+ARGS(%esp)
|
#define M 4 + STACKSIZE+ARGS(%esp)
|
||||||
#define N 8 + STACKSIZE+ARGS(%esp)
|
#define N 8 + STACKSIZE+ARGS(%esp)
|
||||||
|
@ -89,10 +89,9 @@
|
||||||
#define STACK_INCY 44 + STACKSIZE+ARGS(%esp)
|
#define STACK_INCY 44 + STACKSIZE+ARGS(%esp)
|
||||||
#define BUFFER 48 + STACKSIZE+ARGS(%esp)
|
#define BUFFER 48 + STACKSIZE+ARGS(%esp)
|
||||||
|
|
||||||
#define MMM 0+STACKSIZE(%esp)
|
#define MMM 0+ARGS(%esp)
|
||||||
#define AA 4+STACKSIZE(%esp)
|
#define AA 4+ARGS(%esp)
|
||||||
#define LDAX 8+STACKSIZE(%esp)
|
#define XX 8+ARGS(%esp)
|
||||||
#define NN 12+STACKSIZE(%esp)
|
|
||||||
|
|
||||||
#define I %eax
|
#define I %eax
|
||||||
#define J %ebx
|
#define J %ebx
|
||||||
|
@ -117,10 +116,8 @@
|
||||||
PROFCODE
|
PROFCODE
|
||||||
|
|
||||||
|
|
||||||
movl STACK_LDA, LDA
|
movl STACK_X, X
|
||||||
movl LDA,LDAX # backup LDA
|
movl X,XX
|
||||||
movl N,J
|
|
||||||
movl J,NN # backup N
|
|
||||||
movl A,J
|
movl A,J
|
||||||
movl J,AA # backup A
|
movl J,AA # backup A
|
||||||
movl M,J
|
movl M,J
|
||||||
|
@ -130,7 +127,6 @@
|
||||||
addl $1,J
|
addl $1,J
|
||||||
sall $21,J # J=2^21*sizeof(double)=buffer size(16MB)
|
sall $21,J # J=2^21*sizeof(double)=buffer size(16MB)
|
||||||
subl $4, J # Don't use last 4 double in the buffer.
|
subl $4, J # Don't use last 4 double in the buffer.
|
||||||
# Now, split M by block J
|
|
||||||
subl J,MMM # MMM=MMM-J
|
subl J,MMM # MMM=MMM-J
|
||||||
movl J,M
|
movl J,M
|
||||||
jge .L00t
|
jge .L00t
|
||||||
|
@ -142,15 +138,13 @@
|
||||||
movl %eax,M
|
movl %eax,M
|
||||||
|
|
||||||
.L00t:
|
.L00t:
|
||||||
|
movl XX,%eax
|
||||||
|
movl %eax, X
|
||||||
|
|
||||||
movl AA,%eax
|
movl AA,%eax
|
||||||
movl %eax,A # mov AA to A
|
movl %eax,A # mov AA to A
|
||||||
|
|
||||||
movl NN,%eax
|
movl STACK_LDA, LDA
|
||||||
movl %eax,N # reset N
|
|
||||||
|
|
||||||
|
|
||||||
movl LDAX, LDA # reset LDA
|
|
||||||
movl STACK_X, X
|
|
||||||
movl STACK_INCX, INCX
|
movl STACK_INCX, INCX
|
||||||
movl STACK_INCY, INCY
|
movl STACK_INCY, INCY
|
||||||
|
|
||||||
|
@ -605,6 +599,9 @@
|
||||||
movl M,J
|
movl M,J
|
||||||
leal (,J,SIZE),%eax
|
leal (,J,SIZE),%eax
|
||||||
addl %eax,AA
|
addl %eax,AA
|
||||||
|
movl STACK_INCX,INCX
|
||||||
|
imull INCX,%eax
|
||||||
|
addl %eax,XX
|
||||||
jmp .L0t
|
jmp .L0t
|
||||||
ALIGN_4
|
ALIGN_4
|
||||||
|
|
||||||
|
|
|
@ -89,18 +89,23 @@
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#define STACKSIZE 16
|
#define STACKSIZE 16
|
||||||
|
#define ARGS 20
|
||||||
|
|
||||||
#define M 4 + STACKSIZE(%esp)
|
#define M 4 + STACKSIZE+ARGS(%esp)
|
||||||
#define N 8 + STACKSIZE(%esp)
|
#define N 8 + STACKSIZE+ARGS(%esp)
|
||||||
#define ALPHA_R 16 + STACKSIZE(%esp)
|
#define ALPHA_R 16 + STACKSIZE+ARGS(%esp)
|
||||||
#define ALPHA_I 20 + STACKSIZE(%esp)
|
#define ALPHA_I 20 + STACKSIZE+ARGS(%esp)
|
||||||
#define A 24 + STACKSIZE(%esp)
|
#define A 24 + STACKSIZE+ARGS(%esp)
|
||||||
#define STACK_LDA 28 + STACKSIZE(%esp)
|
#define STACK_LDA 28 + STACKSIZE+ARGS(%esp)
|
||||||
#define STACK_X 32 + STACKSIZE(%esp)
|
#define STACK_X 32 + STACKSIZE+ARGS(%esp)
|
||||||
#define STACK_INCX 36 + STACKSIZE(%esp)
|
#define STACK_INCX 36 + STACKSIZE+ARGS(%esp)
|
||||||
#define Y 40 + STACKSIZE(%esp)
|
#define Y 40 + STACKSIZE+ARGS(%esp)
|
||||||
#define STACK_INCY 44 + STACKSIZE(%esp)
|
#define STACK_INCY 44 + STACKSIZE+ARGS(%esp)
|
||||||
#define BUFFER 48 + STACKSIZE(%esp)
|
#define BUFFER 48 + STACKSIZE+ARGS(%esp)
|
||||||
|
|
||||||
|
#define MMM 0+ARGS(%esp)
|
||||||
|
#define YY 4+ARGS(%esp)
|
||||||
|
#define AA 8+ARGS(%esp)
|
||||||
|
|
||||||
#define I %eax
|
#define I %eax
|
||||||
#define J %ebx
|
#define J %ebx
|
||||||
|
@ -123,6 +128,7 @@
|
||||||
|
|
||||||
PROLOGUE
|
PROLOGUE
|
||||||
|
|
||||||
|
subl $ARGS,%esp
|
||||||
pushl %ebp
|
pushl %ebp
|
||||||
pushl %edi
|
pushl %edi
|
||||||
pushl %esi
|
pushl %esi
|
||||||
|
@ -130,6 +136,33 @@
|
||||||
|
|
||||||
PROFCODE
|
PROFCODE
|
||||||
|
|
||||||
|
movl Y,J
|
||||||
|
movl J,YY
|
||||||
|
movl A,J
|
||||||
|
movl J,AA
|
||||||
|
movl M,J
|
||||||
|
movl J,MMM
|
||||||
|
.L0t:
|
||||||
|
xorl J,J
|
||||||
|
addl $1,J
|
||||||
|
sall $20,J
|
||||||
|
subl J,MMM
|
||||||
|
movl J,M
|
||||||
|
jge .L00t
|
||||||
|
ALIGN_3
|
||||||
|
|
||||||
|
movl MMM,%eax
|
||||||
|
addl J,%eax
|
||||||
|
jle .L999x
|
||||||
|
movl %eax,M
|
||||||
|
|
||||||
|
.L00t:
|
||||||
|
movl AA,%eax
|
||||||
|
movl %eax,A
|
||||||
|
|
||||||
|
movl YY,J
|
||||||
|
movl J,Y
|
||||||
|
|
||||||
movl STACK_LDA, LDA
|
movl STACK_LDA, LDA
|
||||||
movl STACK_X, X
|
movl STACK_X, X
|
||||||
movl STACK_INCX, INCX
|
movl STACK_INCX, INCX
|
||||||
|
@ -595,10 +628,21 @@
|
||||||
ALIGN_3
|
ALIGN_3
|
||||||
|
|
||||||
.L999:
|
.L999:
|
||||||
|
movl M,%eax
|
||||||
|
sall $ZBASE_SHIFT,%eax
|
||||||
|
addl %eax,AA
|
||||||
|
movl STACK_INCY,INCY
|
||||||
|
imull INCY,%eax
|
||||||
|
addl %eax,YY
|
||||||
|
jmp .L0t
|
||||||
|
ALIGN_3
|
||||||
|
|
||||||
|
.L999x:
|
||||||
popl %ebx
|
popl %ebx
|
||||||
popl %esi
|
popl %esi
|
||||||
popl %edi
|
popl %edi
|
||||||
popl %ebp
|
popl %ebp
|
||||||
|
addl $ARGS,%esp
|
||||||
ret
|
ret
|
||||||
|
|
||||||
EPILOGUE
|
EPILOGUE
|
||||||
|
|
|
@ -76,18 +76,23 @@
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#define STACKSIZE 16
|
#define STACKSIZE 16
|
||||||
|
#define ARGS 16
|
||||||
|
|
||||||
|
#define M 4 + STACKSIZE+ARGS(%esp)
|
||||||
|
#define N 8 + STACKSIZE+ARGS(%esp)
|
||||||
|
#define ALPHA_R 16 + STACKSIZE+ARGS(%esp)
|
||||||
|
#define ALPHA_I 24 + STACKSIZE+ARGS(%esp)
|
||||||
|
#define A 32 + STACKSIZE+ARGS(%esp)
|
||||||
|
#define STACK_LDA 36 + STACKSIZE+ARGS(%esp)
|
||||||
|
#define STACK_X 40 + STACKSIZE+ARGS(%esp)
|
||||||
|
#define STACK_INCX 44 + STACKSIZE+ARGS(%esp)
|
||||||
|
#define Y 48 + STACKSIZE+ARGS(%esp)
|
||||||
|
#define STACK_INCY 52 + STACKSIZE+ARGS(%esp)
|
||||||
|
#define BUFFER 56 + STACKSIZE+ARGS(%esp)
|
||||||
|
#define MMM 0 + ARGS(%esp)
|
||||||
|
#define YY 4 + ARGS(%esp)
|
||||||
|
#define AA 8 + ARGS(%esp)
|
||||||
|
|
||||||
#define M 4 + STACKSIZE(%esp)
|
|
||||||
#define N 8 + STACKSIZE(%esp)
|
|
||||||
#define ALPHA_R 16 + STACKSIZE(%esp)
|
|
||||||
#define ALPHA_I 24 + STACKSIZE(%esp)
|
|
||||||
#define A 32 + STACKSIZE(%esp)
|
|
||||||
#define STACK_LDA 36 + STACKSIZE(%esp)
|
|
||||||
#define STACK_X 40 + STACKSIZE(%esp)
|
|
||||||
#define STACK_INCX 44 + STACKSIZE(%esp)
|
|
||||||
#define Y 48 + STACKSIZE(%esp)
|
|
||||||
#define STACK_INCY 52 + STACKSIZE(%esp)
|
|
||||||
#define BUFFER 56 + STACKSIZE(%esp)
|
|
||||||
|
|
||||||
#define I %eax
|
#define I %eax
|
||||||
#define J %ebx
|
#define J %ebx
|
||||||
|
@ -110,6 +115,7 @@
|
||||||
|
|
||||||
PROLOGUE
|
PROLOGUE
|
||||||
|
|
||||||
|
subl $ARGS,%esp
|
||||||
pushl %ebp
|
pushl %ebp
|
||||||
pushl %edi
|
pushl %edi
|
||||||
pushl %esi
|
pushl %esi
|
||||||
|
@ -117,6 +123,33 @@
|
||||||
|
|
||||||
PROFCODE
|
PROFCODE
|
||||||
|
|
||||||
|
movl Y,J
|
||||||
|
movl J,YY
|
||||||
|
movl A,J
|
||||||
|
movl J,AA
|
||||||
|
movl M,J
|
||||||
|
movl J,MMM
|
||||||
|
.L0t:
|
||||||
|
xorl J,J
|
||||||
|
addl $1,J
|
||||||
|
sall $18,J
|
||||||
|
subl J,MMM
|
||||||
|
movl J,M
|
||||||
|
jge .L00t
|
||||||
|
ALIGN_3
|
||||||
|
|
||||||
|
movl MMM,%eax
|
||||||
|
addl J,%eax
|
||||||
|
jle .L999x
|
||||||
|
movl %eax,M
|
||||||
|
|
||||||
|
.L00t:
|
||||||
|
movl AA,%eax
|
||||||
|
movl %eax,A
|
||||||
|
|
||||||
|
movl YY,J
|
||||||
|
movl J,Y
|
||||||
|
|
||||||
movl STACK_LDA, LDA
|
movl STACK_LDA, LDA
|
||||||
movl STACK_X, X
|
movl STACK_X, X
|
||||||
movl STACK_INCX, INCX
|
movl STACK_INCX, INCX
|
||||||
|
@ -458,10 +491,21 @@
|
||||||
ALIGN_3
|
ALIGN_3
|
||||||
|
|
||||||
.L999:
|
.L999:
|
||||||
|
movl M,%eax
|
||||||
|
sall $ZBASE_SHIFT,%eax
|
||||||
|
addl %eax,AA
|
||||||
|
movl STACK_INCY,INCY
|
||||||
|
imull INCY,%eax
|
||||||
|
addl %eax,YY
|
||||||
|
jmp .L0t
|
||||||
|
ALIGN_3
|
||||||
|
|
||||||
|
.L999x:
|
||||||
popl %ebx
|
popl %ebx
|
||||||
popl %esi
|
popl %esi
|
||||||
popl %edi
|
popl %edi
|
||||||
popl %ebp
|
popl %ebp
|
||||||
|
addl $ARGS,%esp
|
||||||
ret
|
ret
|
||||||
|
|
||||||
EPILOGUE
|
EPILOGUE
|
||||||
|
|
|
@ -89,18 +89,23 @@
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#define STACKSIZE 16
|
#define STACKSIZE 16
|
||||||
|
#define ARGS 20
|
||||||
|
|
||||||
#define M 4 + STACKSIZE(%esp)
|
#define M 4 + STACKSIZE+ARGS(%esp)
|
||||||
#define N 8 + STACKSIZE(%esp)
|
#define N 8 + STACKSIZE+ARGS(%esp)
|
||||||
#define ALPHA_R 16 + STACKSIZE(%esp)
|
#define ALPHA_R 16 + STACKSIZE+ARGS(%esp)
|
||||||
#define ALPHA_I 20 + STACKSIZE(%esp)
|
#define ALPHA_I 20 + STACKSIZE+ARGS(%esp)
|
||||||
#define A 24 + STACKSIZE(%esp)
|
#define A 24 + STACKSIZE+ARGS(%esp)
|
||||||
#define STACK_LDA 28 + STACKSIZE(%esp)
|
#define STACK_LDA 28 + STACKSIZE+ARGS(%esp)
|
||||||
#define STACK_X 32 + STACKSIZE(%esp)
|
#define STACK_X 32 + STACKSIZE+ARGS(%esp)
|
||||||
#define STACK_INCX 36 + STACKSIZE(%esp)
|
#define STACK_INCX 36 + STACKSIZE+ARGS(%esp)
|
||||||
#define Y 40 + STACKSIZE(%esp)
|
#define Y 40 + STACKSIZE+ARGS(%esp)
|
||||||
#define STACK_INCY 44 + STACKSIZE(%esp)
|
#define STACK_INCY 44 + STACKSIZE+ARGS(%esp)
|
||||||
#define BUFFER 48 + STACKSIZE(%esp)
|
#define BUFFER 48 + STACKSIZE+ARGS(%esp)
|
||||||
|
|
||||||
|
#define MMM 0+ARGS(%esp)
|
||||||
|
#define XX 4+ARGS(%esp)
|
||||||
|
#define AA 8+ARGS(%esp)
|
||||||
|
|
||||||
#define I %eax
|
#define I %eax
|
||||||
#define J %ebx
|
#define J %ebx
|
||||||
|
@ -123,6 +128,7 @@
|
||||||
|
|
||||||
PROLOGUE
|
PROLOGUE
|
||||||
|
|
||||||
|
subl $ARGS,%esp
|
||||||
pushl %ebp
|
pushl %ebp
|
||||||
pushl %edi
|
pushl %edi
|
||||||
pushl %esi
|
pushl %esi
|
||||||
|
@ -130,8 +136,35 @@
|
||||||
|
|
||||||
PROFCODE
|
PROFCODE
|
||||||
|
|
||||||
movl STACK_LDA, LDA
|
|
||||||
movl STACK_X, X
|
movl STACK_X, X
|
||||||
|
movl X,XX
|
||||||
|
movl A,J
|
||||||
|
movl J,AA #backup A
|
||||||
|
movl M,J
|
||||||
|
movl J,MMM
|
||||||
|
.L0t:
|
||||||
|
xorl J,J
|
||||||
|
addl $1,J
|
||||||
|
sall $20,J
|
||||||
|
subl $8,J
|
||||||
|
subl J,MMM #MMM-=J
|
||||||
|
movl J,M
|
||||||
|
jge .L00t
|
||||||
|
ALIGN_4
|
||||||
|
|
||||||
|
movl MMM,%eax
|
||||||
|
addl J,%eax
|
||||||
|
jle .L999x
|
||||||
|
movl %eax,M
|
||||||
|
|
||||||
|
.L00t:
|
||||||
|
movl AA,%eax
|
||||||
|
movl %eax,A
|
||||||
|
|
||||||
|
movl XX,%eax
|
||||||
|
movl %eax,X
|
||||||
|
|
||||||
|
movl STACK_LDA,LDA
|
||||||
movl STACK_INCX, INCX
|
movl STACK_INCX, INCX
|
||||||
movl STACK_INCY, INCY
|
movl STACK_INCY, INCY
|
||||||
|
|
||||||
|
@ -513,10 +546,22 @@
|
||||||
ALIGN_4
|
ALIGN_4
|
||||||
|
|
||||||
.L999:
|
.L999:
|
||||||
|
movl M,%eax
|
||||||
|
sall $ZBASE_SHIFT, %eax
|
||||||
|
addl %eax,AA
|
||||||
|
movl STACK_INCX,INCX
|
||||||
|
imull INCX,%eax
|
||||||
|
addl %eax,XX
|
||||||
|
jmp .L0t
|
||||||
|
ALIGN_4
|
||||||
|
|
||||||
|
.L999x:
|
||||||
popl %ebx
|
popl %ebx
|
||||||
popl %esi
|
popl %esi
|
||||||
popl %edi
|
popl %edi
|
||||||
popl %ebp
|
popl %ebp
|
||||||
|
|
||||||
|
addl $ARGS,%esp
|
||||||
ret
|
ret
|
||||||
|
|
||||||
EPILOGUE
|
EPILOGUE
|
||||||
|
|
|
@ -76,18 +76,23 @@
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#define STACKSIZE 16
|
#define STACKSIZE 16
|
||||||
|
#define ARGS 20
|
||||||
|
|
||||||
#define M 4 + STACKSIZE(%esp)
|
#define M 4 + STACKSIZE+ARGS(%esp)
|
||||||
#define N 8 + STACKSIZE(%esp)
|
#define N 8 + STACKSIZE+ARGS(%esp)
|
||||||
#define ALPHA_R 16 + STACKSIZE(%esp)
|
#define ALPHA_R 16 + STACKSIZE+ARGS(%esp)
|
||||||
#define ALPHA_I 24 + STACKSIZE(%esp)
|
#define ALPHA_I 24 + STACKSIZE+ARGS(%esp)
|
||||||
#define A 32 + STACKSIZE(%esp)
|
#define A 32 + STACKSIZE+ARGS(%esp)
|
||||||
#define STACK_LDA 36 + STACKSIZE(%esp)
|
#define STACK_LDA 36 + STACKSIZE+ARGS(%esp)
|
||||||
#define STACK_X 40 + STACKSIZE(%esp)
|
#define STACK_X 40 + STACKSIZE+ARGS(%esp)
|
||||||
#define STACK_INCX 44 + STACKSIZE(%esp)
|
#define STACK_INCX 44 + STACKSIZE+ARGS(%esp)
|
||||||
#define Y 48 + STACKSIZE(%esp)
|
#define Y 48 + STACKSIZE+ARGS(%esp)
|
||||||
#define STACK_INCY 52 + STACKSIZE(%esp)
|
#define STACK_INCY 52 + STACKSIZE+ARGS(%esp)
|
||||||
#define BUFFER 56 + STACKSIZE(%esp)
|
#define BUFFER 56 + STACKSIZE+ARGS(%esp)
|
||||||
|
|
||||||
|
#define MMM 0 + ARGS(%esp)
|
||||||
|
#define AA 4 + ARGS(%esp)
|
||||||
|
#define XX 8 + ARGS(%esp)
|
||||||
|
|
||||||
#define I %eax
|
#define I %eax
|
||||||
#define J %ebx
|
#define J %ebx
|
||||||
|
@ -110,6 +115,7 @@
|
||||||
|
|
||||||
PROLOGUE
|
PROLOGUE
|
||||||
|
|
||||||
|
subl $ARGS,%esp
|
||||||
pushl %ebp
|
pushl %ebp
|
||||||
pushl %edi
|
pushl %edi
|
||||||
pushl %esi
|
pushl %esi
|
||||||
|
@ -117,8 +123,35 @@
|
||||||
|
|
||||||
PROFCODE
|
PROFCODE
|
||||||
|
|
||||||
movl STACK_LDA, LDA
|
|
||||||
movl STACK_X, X
|
movl STACK_X, X
|
||||||
|
movl X, XX
|
||||||
|
movl A,J
|
||||||
|
movl J,AA
|
||||||
|
movl M,J
|
||||||
|
movl J,MMM
|
||||||
|
.L0t:
|
||||||
|
xorl J,J
|
||||||
|
addl $1,J
|
||||||
|
sall $18,J
|
||||||
|
subl $4,J
|
||||||
|
subl J,MMM
|
||||||
|
movl J,M
|
||||||
|
jge .L00t
|
||||||
|
ALIGN_4
|
||||||
|
|
||||||
|
movl MMM,%eax
|
||||||
|
addl J,%eax
|
||||||
|
jle .L999x
|
||||||
|
movl %eax, M
|
||||||
|
|
||||||
|
.L00t:
|
||||||
|
movl XX, %eax
|
||||||
|
movl %eax, X
|
||||||
|
|
||||||
|
movl AA,%eax
|
||||||
|
movl %eax,A
|
||||||
|
|
||||||
|
movl STACK_LDA, LDA
|
||||||
movl STACK_INCX, INCX
|
movl STACK_INCX, INCX
|
||||||
movl STACK_INCY, INCY
|
movl STACK_INCY, INCY
|
||||||
|
|
||||||
|
@ -188,7 +221,7 @@
|
||||||
movl Y, Y1
|
movl Y, Y1
|
||||||
|
|
||||||
movl N, J
|
movl N, J
|
||||||
ALIGN_3
|
ALIGN_4
|
||||||
|
|
||||||
.L11:
|
.L11:
|
||||||
movl BUFFER, X
|
movl BUFFER, X
|
||||||
|
@ -395,10 +428,21 @@
|
||||||
ALIGN_4
|
ALIGN_4
|
||||||
|
|
||||||
.L999:
|
.L999:
|
||||||
|
movl M,%eax
|
||||||
|
sall $ZBASE_SHIFT,%eax
|
||||||
|
addl %eax,AA
|
||||||
|
movl STACK_INCX,INCX
|
||||||
|
imull INCX,%eax
|
||||||
|
addl %eax,XX
|
||||||
|
jmp .L0t
|
||||||
|
ALIGN_4
|
||||||
|
|
||||||
|
.L999x:
|
||||||
popl %ebx
|
popl %ebx
|
||||||
popl %esi
|
popl %esi
|
||||||
popl %edi
|
popl %edi
|
||||||
popl %ebp
|
popl %ebp
|
||||||
|
addl $ARGS,%esp
|
||||||
ret
|
ret
|
||||||
|
|
||||||
EPILOGUE
|
EPILOGUE
|
||||||
|
|
Loading…
Reference in New Issue