Fixed overflow internal buffer bug of (s/d/c/z)gemv on x86_64.
This commit is contained in:
parent
6a72840945
commit
23965f164c
|
@ -47,7 +47,7 @@
|
|||
|
||||
#ifndef WINDOWS_ABI
|
||||
|
||||
#define STACKSIZE 64
|
||||
#define STACKSIZE 128
|
||||
|
||||
#define OLD_INCX 8 + STACKSIZE(%rsp)
|
||||
#define OLD_Y 16 + STACKSIZE(%rsp)
|
||||
|
@ -55,6 +55,14 @@
|
|||
#define OLD_BUFFER 32 + STACKSIZE(%rsp)
|
||||
#define ALPHA 48 (%rsp)
|
||||
|
||||
#define MMM 64(%rsp)
|
||||
#define NN 72(%rsp)
|
||||
#define AA 80(%rsp)
|
||||
#define XX 88(%rsp)
|
||||
#define LDAX 96(%rsp)
|
||||
#define ALPHAR 104(%rsp)
|
||||
#define ALPHAI 112(%rsp)
|
||||
|
||||
#define M %rdi
|
||||
#define N %rsi
|
||||
#define A %rcx
|
||||
|
@ -66,7 +74,7 @@
|
|||
|
||||
#else
|
||||
|
||||
#define STACKSIZE 256
|
||||
#define STACKSIZE 288
|
||||
|
||||
#define OLD_ALPHA_I 40 + STACKSIZE(%rsp)
|
||||
#define OLD_A 48 + STACKSIZE(%rsp)
|
||||
|
@ -78,6 +86,14 @@
|
|||
#define OLD_BUFFER 96 + STACKSIZE(%rsp)
|
||||
#define ALPHA 224 (%rsp)
|
||||
|
||||
#define MMM 232(%rsp)
|
||||
#define NN 240(%rsp)
|
||||
#define AA 248(%rsp)
|
||||
#define XX 256(%rsp)
|
||||
#define LDAX 264(%rsp)
|
||||
#define ALPHAR 272(%rsp)
|
||||
#define ALPHAI 280(%rsp)
|
||||
|
||||
#define M %rcx
|
||||
#define N %rdx
|
||||
#define A %r8
|
||||
|
@ -142,9 +158,37 @@
|
|||
movaps %xmm3, %xmm0
|
||||
movss OLD_ALPHA_I, %xmm1
|
||||
#endif
|
||||
movq A, AA
|
||||
movq N, NN
|
||||
movq M, MMM
|
||||
movq LDA, LDAX
|
||||
movq X, XX
|
||||
movq OLD_Y, Y
|
||||
movss %xmm0,ALPHAR
|
||||
movss %xmm1,ALPHAI
|
||||
|
||||
.L0t:
|
||||
xorq I,I
|
||||
addq $1,I
|
||||
salq $20,I
|
||||
subq I,MMM
|
||||
movq I,M
|
||||
movss ALPHAR,%xmm0
|
||||
movss ALPHAI,%xmm1
|
||||
jge .L00t
|
||||
|
||||
movq MMM,M
|
||||
addq I,M
|
||||
jle .L999x
|
||||
|
||||
.L00t:
|
||||
movq AA, A
|
||||
movq NN, N
|
||||
movq LDAX, LDA
|
||||
movq XX, X
|
||||
|
||||
movq OLD_INCX, INCX
|
||||
movq OLD_Y, Y
|
||||
# movq OLD_Y, Y
|
||||
movq OLD_INCY, INCY
|
||||
movq OLD_BUFFER, BUFFER
|
||||
|
||||
|
@ -4274,6 +4318,11 @@
|
|||
ALIGN_3
|
||||
|
||||
.L999:
|
||||
movq M, I
|
||||
salq $ZBASE_SHIFT,I
|
||||
addq I,AA
|
||||
jmp .L0t
|
||||
.L999x:
|
||||
movq 0(%rsp), %rbx
|
||||
movq 8(%rsp), %rbp
|
||||
movq 16(%rsp), %r12
|
||||
|
|
|
@ -47,13 +47,19 @@
|
|||
|
||||
#ifndef WINDOWS_ABI
|
||||
|
||||
#define STACKSIZE 64
|
||||
#define STACKSIZE 128
|
||||
|
||||
#define OLD_INCX 8 + STACKSIZE(%rsp)
|
||||
#define OLD_Y 16 + STACKSIZE(%rsp)
|
||||
#define OLD_INCY 24 + STACKSIZE(%rsp)
|
||||
#define OLD_BUFFER 32 + STACKSIZE(%rsp)
|
||||
#define ALPHA 48 (%rsp)
|
||||
#define MMM 64(%rsp)
|
||||
#define NN 72(%rsp)
|
||||
#define AA 80(%rsp)
|
||||
#define LDAX 88(%rsp)
|
||||
#define ALPHAR 96(%rsp)
|
||||
#define ALPHAI 104(%rsp)
|
||||
|
||||
#define M %rdi
|
||||
#define N %rsi
|
||||
|
@ -66,7 +72,7 @@
|
|||
|
||||
#else
|
||||
|
||||
#define STACKSIZE 256
|
||||
#define STACKSIZE 288
|
||||
|
||||
#define OLD_ALPHA_I 40 + STACKSIZE(%rsp)
|
||||
#define OLD_A 48 + STACKSIZE(%rsp)
|
||||
|
@ -78,6 +84,13 @@
|
|||
#define OLD_BUFFER 96 + STACKSIZE(%rsp)
|
||||
#define ALPHA 224 (%rsp)
|
||||
|
||||
#define MMM 232(%rsp)
|
||||
#define NN 240(%rsp)
|
||||
#define AA 248(%rsp)
|
||||
#define LDAX 256(%rsp)
|
||||
#define ALPHAR 264(%rsp)
|
||||
#define ALPHAI 272(%rsp)
|
||||
|
||||
#define M %rcx
|
||||
#define N %rdx
|
||||
#define A %r8
|
||||
|
@ -144,6 +157,32 @@
|
|||
movss OLD_ALPHA_I, %xmm1
|
||||
#endif
|
||||
|
||||
movq A, AA
|
||||
movq N, NN
|
||||
movq M, MMM
|
||||
movq LDA, LDAX
|
||||
movss %xmm0,ALPHAR
|
||||
movss %xmm1,ALPHAI
|
||||
|
||||
.L0t:
|
||||
xorq I,I
|
||||
addq $1,I
|
||||
salq $20,I
|
||||
subq I,MMM
|
||||
movq I,M
|
||||
movss ALPHAR,%xmm0
|
||||
movss ALPHAI,%xmm1
|
||||
jge .L00t
|
||||
|
||||
movq MMM,M
|
||||
addq I,M
|
||||
jle .L999x
|
||||
|
||||
.L00t:
|
||||
movq AA, A
|
||||
movq NN, N
|
||||
movq LDAX, LDA
|
||||
|
||||
movq OLD_INCX, INCX
|
||||
movq OLD_Y, Y
|
||||
movq OLD_INCY, INCY
|
||||
|
@ -4350,6 +4389,11 @@
|
|||
ALIGN_3
|
||||
|
||||
.L999:
|
||||
movq M, I
|
||||
salq $ZBASE_SHIFT,I
|
||||
addq I,AA
|
||||
jmp .L0t
|
||||
.L999x:
|
||||
movq 0(%rsp), %rbx
|
||||
movq 8(%rsp), %rbp
|
||||
movq 16(%rsp), %r12
|
||||
|
|
|
@ -47,7 +47,7 @@
|
|||
|
||||
#ifndef WINDOWS_ABI
|
||||
|
||||
#define STACKSIZE 64
|
||||
#define STACKSIZE 128
|
||||
|
||||
#define OLD_M %rdi
|
||||
#define OLD_N %rsi
|
||||
|
@ -59,6 +59,11 @@
|
|||
#define STACK_BUFFER 32 + STACKSIZE(%rsp)
|
||||
#define ALPHA 48 (%rsp)
|
||||
|
||||
#define MMM 56(%rsp)
|
||||
#define NN 64(%rsp)
|
||||
#define AA 72(%rsp)
|
||||
#define LDAX 80(%rsp)
|
||||
#define XX 88(%rsp)
|
||||
#else
|
||||
|
||||
#define STACKSIZE 256
|
||||
|
@ -137,17 +142,42 @@
|
|||
movq OLD_LDA, LDA
|
||||
#endif
|
||||
|
||||
movq STACK_INCX, INCX
|
||||
movq STACK_Y, Y
|
||||
movq STACK_INCY, INCY
|
||||
movq STACK_BUFFER, BUFFER
|
||||
|
||||
#ifndef WINDOWS_ABI
|
||||
movsd %xmm0, ALPHA
|
||||
#else
|
||||
movsd %xmm3, ALPHA
|
||||
#endif
|
||||
|
||||
movq STACK_Y, Y
|
||||
movq A,AA
|
||||
movq N,NN
|
||||
movq M,MMM
|
||||
movq LDA,LDAX
|
||||
movq X,XX
|
||||
|
||||
.L0t:
|
||||
xorq I,I
|
||||
addq $1,I
|
||||
salq $21,I
|
||||
subq I,MMM
|
||||
movq I,M
|
||||
jge .L00t
|
||||
|
||||
movq MMM,M
|
||||
addq I,M
|
||||
jle .L999x
|
||||
|
||||
.L00t:
|
||||
movq XX,X
|
||||
movq AA,A
|
||||
movq NN,N
|
||||
movq LDAX,LDA
|
||||
|
||||
movq STACK_INCX, INCX
|
||||
movq STACK_INCY, INCY
|
||||
movq STACK_BUFFER, BUFFER
|
||||
|
||||
|
||||
leaq -1(INCY), %rax
|
||||
|
||||
leaq (,INCX, SIZE), INCX
|
||||
|
@ -2815,6 +2845,12 @@
|
|||
ALIGN_3
|
||||
|
||||
.L999:
|
||||
leaq (, M, SIZE), %rax
|
||||
addq %rax,AA
|
||||
jmp .L0t
|
||||
ALIGN_4
|
||||
|
||||
.L999x:
|
||||
movq 0(%rsp), %rbx
|
||||
movq 8(%rsp), %rbp
|
||||
movq 16(%rsp), %r12
|
||||
|
|
|
@ -47,7 +47,7 @@
|
|||
|
||||
#ifndef WINDOWS_ABI
|
||||
|
||||
#define STACKSIZE 64
|
||||
#define STACKSIZE 128
|
||||
|
||||
#define OLD_M %rdi
|
||||
#define OLD_N %rsi
|
||||
|
@ -58,10 +58,14 @@
|
|||
#define STACK_INCY 24 + STACKSIZE(%rsp)
|
||||
#define STACK_BUFFER 32 + STACKSIZE(%rsp)
|
||||
#define ALPHA 48 (%rsp)
|
||||
|
||||
#define MMM 56(%rsp)
|
||||
#define NN 64(%rsp)
|
||||
#define AA 72(%rsp)
|
||||
#define LDAX 80(%rsp)
|
||||
#define XX 96(%rsp)
|
||||
#else
|
||||
|
||||
#define STACKSIZE 256
|
||||
#define STACKSIZE 288
|
||||
|
||||
#define OLD_M %rcx
|
||||
#define OLD_N %rdx
|
||||
|
@ -74,6 +78,12 @@
|
|||
#define STACK_BUFFER 88 + STACKSIZE(%rsp)
|
||||
#define ALPHA 224 (%rsp)
|
||||
|
||||
#define MMM 232(%rsp)
|
||||
#define NN 240(%rsp)
|
||||
#define AA 248(%rsp)
|
||||
#define LDAX 256(%rsp)
|
||||
#define XX 264(%rsp)
|
||||
#define
|
||||
#endif
|
||||
|
||||
#define LDA %r8
|
||||
|
@ -137,17 +147,41 @@
|
|||
movq OLD_LDA, LDA
|
||||
#endif
|
||||
|
||||
movq STACK_INCX, INCX
|
||||
movq STACK_Y, Y
|
||||
movq STACK_INCY, INCY
|
||||
movq STACK_BUFFER, BUFFER
|
||||
|
||||
#ifndef WINDOWS_ABI
|
||||
movss %xmm0, ALPHA
|
||||
#else
|
||||
movss %xmm3, ALPHA
|
||||
#endif
|
||||
|
||||
|
||||
movq M,MMM
|
||||
movq A,AA
|
||||
movq N,NN
|
||||
movq LDA,LDAX
|
||||
movq X,XX
|
||||
movq STACK_Y, Y
|
||||
.L0t:
|
||||
xorq I,I
|
||||
addq $1,I
|
||||
salq $22,I
|
||||
subq I,MMM
|
||||
movq I,M
|
||||
jge .L00t
|
||||
|
||||
movq MMM,M
|
||||
addq I,M
|
||||
jle .L999x
|
||||
|
||||
.L00t:
|
||||
movq AA,A
|
||||
movq NN,N
|
||||
movq LDAX,LDA
|
||||
movq XX,X
|
||||
|
||||
movq STACK_INCX, INCX
|
||||
movq STACK_INCY, INCY
|
||||
movq STACK_BUFFER, BUFFER
|
||||
|
||||
leaq (,INCX, SIZE), INCX
|
||||
leaq (,INCY, SIZE), INCY
|
||||
leaq (,LDA, SIZE), LDA
|
||||
|
@ -5990,6 +6024,12 @@
|
|||
ALIGN_3
|
||||
|
||||
.L999:
|
||||
leaq (,M,SIZE),%rax
|
||||
addq %rax,AA
|
||||
jmp .L0t
|
||||
ALIGN_4
|
||||
|
||||
.L999x:
|
||||
movq 0(%rsp), %rbx
|
||||
movq 8(%rsp), %rbp
|
||||
movq 16(%rsp), %r12
|
||||
|
|
|
@ -63,7 +63,7 @@
|
|||
|
||||
#else
|
||||
|
||||
#define STACKSIZE 256
|
||||
#define STACKSIZE 288
|
||||
|
||||
#define OLD_M %rcx
|
||||
#define OLD_N %rdx
|
||||
|
@ -74,10 +74,10 @@
|
|||
#define STACK_Y 72 + STACKSIZE(%rsp)
|
||||
#define STACK_INCY 80 + STACKSIZE(%rsp)
|
||||
#define STACK_BUFFER 88 + STACKSIZE(%rsp)
|
||||
#define MMM 216(%rsp)
|
||||
#define NN 224(%rsp)
|
||||
#define AA 232(%rsp)
|
||||
#define LDAX 240(%rsp)
|
||||
#define MMM 232(%rsp)
|
||||
#define NN 240(%rsp)
|
||||
#define AA 248(%rsp)
|
||||
#define LDAX 256(%rsp)
|
||||
|
||||
#endif
|
||||
|
||||
|
|
|
@ -42,7 +42,7 @@
|
|||
|
||||
#ifndef WINDOWS_ABI
|
||||
|
||||
#define STACKSIZE 64
|
||||
#define STACKSIZE 128
|
||||
|
||||
#define OLD_INCX 8 + STACKSIZE(%rsp)
|
||||
#define OLD_Y 16 + STACKSIZE(%rsp)
|
||||
|
@ -51,6 +51,14 @@
|
|||
#define ALPHA_R 48 (%rsp)
|
||||
#define ALPHA_I 56 (%rsp)
|
||||
|
||||
#define MMM 64(%rsp)
|
||||
#define NN 72(%rsp)
|
||||
#define AA 80(%rsp)
|
||||
#define XX 88(%rsp)
|
||||
#define LDAX 96(%rsp)
|
||||
#define ALPHAR 104(%rsp)
|
||||
#define ALPHAI 112(%rsp)
|
||||
|
||||
#define M %rdi
|
||||
#define N %rsi
|
||||
#define A %rcx
|
||||
|
@ -62,7 +70,7 @@
|
|||
|
||||
#else
|
||||
|
||||
#define STACKSIZE 256
|
||||
#define STACKSIZE 288
|
||||
|
||||
#define OLD_ALPHA_I 40 + STACKSIZE(%rsp)
|
||||
#define OLD_A 48 + STACKSIZE(%rsp)
|
||||
|
@ -75,6 +83,14 @@
|
|||
#define ALPHA_R 224 (%rsp)
|
||||
#define ALPHA_I 232 (%rsp)
|
||||
|
||||
#define MMM 232(%rsp)
|
||||
#define NN 240(%rsp)
|
||||
#define AA 248(%rsp)
|
||||
#define XX 256(%rsp)
|
||||
#define LDAX 264(%rsp)
|
||||
#define ALPHAR 272(%rsp)
|
||||
#define ALPHAI 280(%rsp)
|
||||
|
||||
#define M %rcx
|
||||
#define N %rdx
|
||||
#define A %r8
|
||||
|
@ -136,8 +152,37 @@
|
|||
movsd OLD_ALPHA_I, %xmm1
|
||||
#endif
|
||||
|
||||
movq OLD_INCX, INCX
|
||||
movq A, AA
|
||||
movq N, NN
|
||||
movq M, MMM
|
||||
movq LDA, LDAX
|
||||
movq X, XX
|
||||
movq OLD_Y, Y
|
||||
movsd %xmm0,ALPHAR
|
||||
movsd %xmm1,ALPHAI
|
||||
|
||||
.L0t:
|
||||
xorq I,I
|
||||
addq $1,I
|
||||
salq $18,I
|
||||
subq I,MMM
|
||||
movq I,M
|
||||
movsd ALPHAR,%xmm0
|
||||
movsd ALPHAI,%xmm1
|
||||
jge .L00t
|
||||
|
||||
movq MMM,M
|
||||
addq I,M
|
||||
jle .L999x
|
||||
|
||||
.L00t:
|
||||
movq AA, A
|
||||
movq NN, N
|
||||
movq LDAX, LDA
|
||||
movq XX, X
|
||||
|
||||
movq OLD_INCX, INCX
|
||||
# movq OLD_Y, Y
|
||||
movq OLD_INCY, INCY
|
||||
movq OLD_BUFFER, BUFFER
|
||||
|
||||
|
@ -2673,6 +2718,12 @@
|
|||
ALIGN_3
|
||||
|
||||
.L999:
|
||||
movq M, I
|
||||
salq $ZBASE_SHIFT,I
|
||||
addq I,AA
|
||||
jmp .L0t
|
||||
.L999x:
|
||||
|
||||
movq 0(%rsp), %rbx
|
||||
movq 8(%rsp), %rbp
|
||||
movq 16(%rsp), %r12
|
||||
|
|
|
@ -42,13 +42,20 @@
|
|||
|
||||
#ifndef WINDOWS_ABI
|
||||
|
||||
#define STACKSIZE 64
|
||||
#define STACKSIZE 128
|
||||
|
||||
#define OLD_INCX 8 + STACKSIZE(%rsp)
|
||||
#define OLD_Y 16 + STACKSIZE(%rsp)
|
||||
#define OLD_INCY 24 + STACKSIZE(%rsp)
|
||||
#define OLD_BUFFER 32 + STACKSIZE(%rsp)
|
||||
|
||||
#define MMM 64(%rsp)
|
||||
#define NN 72(%rsp)
|
||||
#define AA 80(%rsp)
|
||||
#define LDAX 88(%rsp)
|
||||
#define ALPHAR 96(%rsp)
|
||||
#define ALPHAI 104(%rsp)
|
||||
|
||||
#define M %rdi
|
||||
#define N %rsi
|
||||
#define A %rcx
|
||||
|
@ -60,7 +67,7 @@
|
|||
|
||||
#else
|
||||
|
||||
#define STACKSIZE 256
|
||||
#define STACKSIZE 288
|
||||
|
||||
#define OLD_ALPHA_I 40 + STACKSIZE(%rsp)
|
||||
#define OLD_A 48 + STACKSIZE(%rsp)
|
||||
|
@ -71,6 +78,13 @@
|
|||
#define OLD_INCY 88 + STACKSIZE(%rsp)
|
||||
#define OLD_BUFFER 96 + STACKSIZE(%rsp)
|
||||
|
||||
#define MMM 232(%rsp)
|
||||
#define NN 240(%rsp)
|
||||
#define AA 248(%rsp)
|
||||
#define LDAX 256(%rsp)
|
||||
#define ALPHAR 264(%rsp)
|
||||
#define ALPHAI 272(%rsp)
|
||||
|
||||
#define M %rcx
|
||||
#define N %rdx
|
||||
#define A %r8
|
||||
|
@ -135,6 +149,32 @@
|
|||
movsd OLD_ALPHA_I, %xmm1
|
||||
#endif
|
||||
|
||||
movq A, AA
|
||||
movq N, NN
|
||||
movq M, MMM
|
||||
movq LDA, LDAX
|
||||
movsd %xmm0,ALPHAR
|
||||
movsd %xmm1,ALPHAI
|
||||
|
||||
.L0t:
|
||||
xorq I,I
|
||||
addq $1,I
|
||||
salq $19,I
|
||||
subq I,MMM
|
||||
movq I,M
|
||||
movsd ALPHAR,%xmm0
|
||||
movsd ALPHAI,%xmm1
|
||||
jge .L00t
|
||||
|
||||
movq MMM,M
|
||||
addq I,M
|
||||
jle .L999x
|
||||
|
||||
.L00t:
|
||||
movq AA, A
|
||||
movq NN, N
|
||||
movq LDAX, LDA
|
||||
|
||||
movq OLD_INCX, INCX
|
||||
movq OLD_Y, Y
|
||||
movq OLD_INCY, INCY
|
||||
|
@ -2405,6 +2445,12 @@
|
|||
ALIGN_3
|
||||
|
||||
.L999:
|
||||
movq M, I
|
||||
salq $ZBASE_SHIFT,I
|
||||
addq I,AA
|
||||
jmp .L0t
|
||||
.L999x:
|
||||
|
||||
movq 0(%rsp), %rbx
|
||||
movq 8(%rsp), %rbp
|
||||
movq 16(%rsp), %r12
|
||||
|
|
Loading…
Reference in New Issue