Fixed overflow internal buffer bug of (s/d/c/z)gemv on x86_64.

This commit is contained in:
wangqian 2013-05-29 19:48:31 +08:00
parent 6a72840945
commit 23965f164c
7 changed files with 297 additions and 31 deletions

View File

@ -47,14 +47,22 @@
#ifndef WINDOWS_ABI
#define STACKSIZE 64
#define STACKSIZE 128
#define OLD_INCX 8 + STACKSIZE(%rsp)
#define OLD_Y 16 + STACKSIZE(%rsp)
#define OLD_INCY 24 + STACKSIZE(%rsp)
#define OLD_BUFFER 32 + STACKSIZE(%rsp)
#define ALPHA 48 (%rsp)
#define MMM 64(%rsp)
#define NN 72(%rsp)
#define AA 80(%rsp)
#define XX 88(%rsp)
#define LDAX 96(%rsp)
#define ALPHAR 104(%rsp)
#define ALPHAI 112(%rsp)
#define M %rdi
#define N %rsi
#define A %rcx
@ -66,7 +74,7 @@
#else
#define STACKSIZE 256
#define STACKSIZE 288
#define OLD_ALPHA_I 40 + STACKSIZE(%rsp)
#define OLD_A 48 + STACKSIZE(%rsp)
@ -78,6 +86,14 @@
#define OLD_BUFFER 96 + STACKSIZE(%rsp)
#define ALPHA 224 (%rsp)
#define MMM 232(%rsp)
#define NN 240(%rsp)
#define AA 248(%rsp)
#define XX 256(%rsp)
#define LDAX 264(%rsp)
#define ALPHAR 272(%rsp)
#define ALPHAI 280(%rsp)
#define M %rcx
#define N %rdx
#define A %r8
@ -142,9 +158,37 @@
movaps %xmm3, %xmm0
movss OLD_ALPHA_I, %xmm1
#endif
movq A, AA
movq N, NN
movq M, MMM
movq LDA, LDAX
movq X, XX
movq OLD_Y, Y
movss %xmm0,ALPHAR
movss %xmm1,ALPHAI
.L0t:
xorq I,I
addq $1,I
salq $20,I
subq I,MMM
movq I,M
movss ALPHAR,%xmm0
movss ALPHAI,%xmm1
jge .L00t
movq MMM,M
addq I,M
jle .L999x
.L00t:
movq AA, A
movq NN, N
movq LDAX, LDA
movq XX, X
movq OLD_INCX, INCX
movq OLD_Y, Y
# movq OLD_Y, Y
movq OLD_INCY, INCY
movq OLD_BUFFER, BUFFER
@ -4274,6 +4318,11 @@
ALIGN_3
.L999:
movq M, I
salq $ZBASE_SHIFT,I
addq I,AA
jmp .L0t
.L999x:
movq 0(%rsp), %rbx
movq 8(%rsp), %rbp
movq 16(%rsp), %r12

View File

@ -47,13 +47,19 @@
#ifndef WINDOWS_ABI
#define STACKSIZE 64
#define STACKSIZE 128
#define OLD_INCX 8 + STACKSIZE(%rsp)
#define OLD_Y 16 + STACKSIZE(%rsp)
#define OLD_INCY 24 + STACKSIZE(%rsp)
#define OLD_BUFFER 32 + STACKSIZE(%rsp)
#define ALPHA 48 (%rsp)
#define MMM 64(%rsp)
#define NN 72(%rsp)
#define AA 80(%rsp)
#define LDAX 88(%rsp)
#define ALPHAR 96(%rsp)
#define ALPHAI 104(%rsp)
#define M %rdi
#define N %rsi
@ -66,7 +72,7 @@
#else
#define STACKSIZE 256
#define STACKSIZE 288
#define OLD_ALPHA_I 40 + STACKSIZE(%rsp)
#define OLD_A 48 + STACKSIZE(%rsp)
@ -78,6 +84,13 @@
#define OLD_BUFFER 96 + STACKSIZE(%rsp)
#define ALPHA 224 (%rsp)
#define MMM 232(%rsp)
#define NN 240(%rsp)
#define AA 248(%rsp)
#define LDAX 256(%rsp)
#define ALPHAR 264(%rsp)
#define ALPHAI 272(%rsp)
#define M %rcx
#define N %rdx
#define A %r8
@ -144,6 +157,32 @@
movss OLD_ALPHA_I, %xmm1
#endif
movq A, AA
movq N, NN
movq M, MMM
movq LDA, LDAX
movss %xmm0,ALPHAR
movss %xmm1,ALPHAI
.L0t:
xorq I,I
addq $1,I
salq $20,I
subq I,MMM
movq I,M
movss ALPHAR,%xmm0
movss ALPHAI,%xmm1
jge .L00t
movq MMM,M
addq I,M
jle .L999x
.L00t:
movq AA, A
movq NN, N
movq LDAX, LDA
movq OLD_INCX, INCX
movq OLD_Y, Y
movq OLD_INCY, INCY
@ -4350,6 +4389,11 @@
ALIGN_3
.L999:
movq M, I
salq $ZBASE_SHIFT,I
addq I,AA
jmp .L0t
.L999x:
movq 0(%rsp), %rbx
movq 8(%rsp), %rbp
movq 16(%rsp), %r12

View File

@ -47,7 +47,7 @@
#ifndef WINDOWS_ABI
#define STACKSIZE 64
#define STACKSIZE 128
#define OLD_M %rdi
#define OLD_N %rsi
@ -59,6 +59,11 @@
#define STACK_BUFFER 32 + STACKSIZE(%rsp)
#define ALPHA 48 (%rsp)
#define MMM 56(%rsp)
#define NN 64(%rsp)
#define AA 72(%rsp)
#define LDAX 80(%rsp)
#define XX 88(%rsp)
#else
#define STACKSIZE 256
@ -137,17 +142,42 @@
movq OLD_LDA, LDA
#endif
movq STACK_INCX, INCX
movq STACK_Y, Y
movq STACK_INCY, INCY
movq STACK_BUFFER, BUFFER
#ifndef WINDOWS_ABI
movsd %xmm0, ALPHA
#else
movsd %xmm3, ALPHA
#endif
movq STACK_Y, Y
movq A,AA
movq N,NN
movq M,MMM
movq LDA,LDAX
movq X,XX
.L0t:
xorq I,I
addq $1,I
salq $21,I
subq I,MMM
movq I,M
jge .L00t
movq MMM,M
addq I,M
jle .L999x
.L00t:
movq XX,X
movq AA,A
movq NN,N
movq LDAX,LDA
movq STACK_INCX, INCX
movq STACK_INCY, INCY
movq STACK_BUFFER, BUFFER
leaq -1(INCY), %rax
leaq (,INCX, SIZE), INCX
@ -2815,6 +2845,12 @@
ALIGN_3
.L999:
leaq (, M, SIZE), %rax
addq %rax,AA
jmp .L0t
ALIGN_4
.L999x:
movq 0(%rsp), %rbx
movq 8(%rsp), %rbp
movq 16(%rsp), %r12

View File

@ -47,7 +47,7 @@
#ifndef WINDOWS_ABI
#define STACKSIZE 64
#define STACKSIZE 128
#define OLD_M %rdi
#define OLD_N %rsi
@ -58,10 +58,14 @@
#define STACK_INCY 24 + STACKSIZE(%rsp)
#define STACK_BUFFER 32 + STACKSIZE(%rsp)
#define ALPHA 48 (%rsp)
#define MMM 56(%rsp)
#define NN 64(%rsp)
#define AA 72(%rsp)
#define LDAX 80(%rsp)
#define XX 96(%rsp)
#else
#define STACKSIZE 256
#define STACKSIZE 288
#define OLD_M %rcx
#define OLD_N %rdx
@ -74,6 +78,12 @@
#define STACK_BUFFER 88 + STACKSIZE(%rsp)
#define ALPHA 224 (%rsp)
#define MMM 232(%rsp)
#define NN 240(%rsp)
#define AA 248(%rsp)
#define LDAX 256(%rsp)
#define XX 264(%rsp)
#define
#endif
#define LDA %r8
@ -137,17 +147,41 @@
movq OLD_LDA, LDA
#endif
movq STACK_INCX, INCX
movq STACK_Y, Y
movq STACK_INCY, INCY
movq STACK_BUFFER, BUFFER
#ifndef WINDOWS_ABI
movss %xmm0, ALPHA
#else
movss %xmm3, ALPHA
#endif
movq M,MMM
movq A,AA
movq N,NN
movq LDA,LDAX
movq X,XX
movq STACK_Y, Y
.L0t:
xorq I,I
addq $1,I
salq $22,I
subq I,MMM
movq I,M
jge .L00t
movq MMM,M
addq I,M
jle .L999x
.L00t:
movq AA,A
movq NN,N
movq LDAX,LDA
movq XX,X
movq STACK_INCX, INCX
movq STACK_INCY, INCY
movq STACK_BUFFER, BUFFER
leaq (,INCX, SIZE), INCX
leaq (,INCY, SIZE), INCY
leaq (,LDA, SIZE), LDA
@ -5990,6 +6024,12 @@
ALIGN_3
.L999:
leaq (,M,SIZE),%rax
addq %rax,AA
jmp .L0t
ALIGN_4
.L999x:
movq 0(%rsp), %rbx
movq 8(%rsp), %rbp
movq 16(%rsp), %r12

View File

@ -63,7 +63,7 @@
#else
#define STACKSIZE 256
#define STACKSIZE 288
#define OLD_M %rcx
#define OLD_N %rdx
@ -74,10 +74,10 @@
#define STACK_Y 72 + STACKSIZE(%rsp)
#define STACK_INCY 80 + STACKSIZE(%rsp)
#define STACK_BUFFER 88 + STACKSIZE(%rsp)
#define MMM 216(%rsp)
#define NN 224(%rsp)
#define AA 232(%rsp)
#define LDAX 240(%rsp)
#define MMM 232(%rsp)
#define NN 240(%rsp)
#define AA 248(%rsp)
#define LDAX 256(%rsp)
#endif

View File

@ -42,7 +42,7 @@
#ifndef WINDOWS_ABI
#define STACKSIZE 64
#define STACKSIZE 128
#define OLD_INCX 8 + STACKSIZE(%rsp)
#define OLD_Y 16 + STACKSIZE(%rsp)
@ -50,7 +50,15 @@
#define OLD_BUFFER 32 + STACKSIZE(%rsp)
#define ALPHA_R 48 (%rsp)
#define ALPHA_I 56 (%rsp)
#define MMM 64(%rsp)
#define NN 72(%rsp)
#define AA 80(%rsp)
#define XX 88(%rsp)
#define LDAX 96(%rsp)
#define ALPHAR 104(%rsp)
#define ALPHAI 112(%rsp)
#define M %rdi
#define N %rsi
#define A %rcx
@ -62,7 +70,7 @@
#else
#define STACKSIZE 256
#define STACKSIZE 288
#define OLD_ALPHA_I 40 + STACKSIZE(%rsp)
#define OLD_A 48 + STACKSIZE(%rsp)
@ -75,6 +83,14 @@
#define ALPHA_R 224 (%rsp)
#define ALPHA_I 232 (%rsp)
#define MMM 232(%rsp)
#define NN 240(%rsp)
#define AA 248(%rsp)
#define XX 256(%rsp)
#define LDAX 264(%rsp)
#define ALPHAR 272(%rsp)
#define ALPHAI 280(%rsp)
#define M %rcx
#define N %rdx
#define A %r8
@ -136,8 +152,37 @@
movsd OLD_ALPHA_I, %xmm1
#endif
movq OLD_INCX, INCX
movq A, AA
movq N, NN
movq M, MMM
movq LDA, LDAX
movq X, XX
movq OLD_Y, Y
movsd %xmm0,ALPHAR
movsd %xmm1,ALPHAI
.L0t:
xorq I,I
addq $1,I
salq $18,I
subq I,MMM
movq I,M
movsd ALPHAR,%xmm0
movsd ALPHAI,%xmm1
jge .L00t
movq MMM,M
addq I,M
jle .L999x
.L00t:
movq AA, A
movq NN, N
movq LDAX, LDA
movq XX, X
movq OLD_INCX, INCX
# movq OLD_Y, Y
movq OLD_INCY, INCY
movq OLD_BUFFER, BUFFER
@ -2673,6 +2718,12 @@
ALIGN_3
.L999:
movq M, I
salq $ZBASE_SHIFT,I
addq I,AA
jmp .L0t
.L999x:
movq 0(%rsp), %rbx
movq 8(%rsp), %rbp
movq 16(%rsp), %r12

View File

@ -42,13 +42,20 @@
#ifndef WINDOWS_ABI
#define STACKSIZE 64
#define STACKSIZE 128
#define OLD_INCX 8 + STACKSIZE(%rsp)
#define OLD_Y 16 + STACKSIZE(%rsp)
#define OLD_INCY 24 + STACKSIZE(%rsp)
#define OLD_BUFFER 32 + STACKSIZE(%rsp)
#define MMM 64(%rsp)
#define NN 72(%rsp)
#define AA 80(%rsp)
#define LDAX 88(%rsp)
#define ALPHAR 96(%rsp)
#define ALPHAI 104(%rsp)
#define M %rdi
#define N %rsi
#define A %rcx
@ -60,7 +67,7 @@
#else
#define STACKSIZE 256
#define STACKSIZE 288
#define OLD_ALPHA_I 40 + STACKSIZE(%rsp)
#define OLD_A 48 + STACKSIZE(%rsp)
@ -71,6 +78,13 @@
#define OLD_INCY 88 + STACKSIZE(%rsp)
#define OLD_BUFFER 96 + STACKSIZE(%rsp)
#define MMM 232(%rsp)
#define NN 240(%rsp)
#define AA 248(%rsp)
#define LDAX 256(%rsp)
#define ALPHAR 264(%rsp)
#define ALPHAI 272(%rsp)
#define M %rcx
#define N %rdx
#define A %r8
@ -135,6 +149,32 @@
movsd OLD_ALPHA_I, %xmm1
#endif
movq A, AA
movq N, NN
movq M, MMM
movq LDA, LDAX
movsd %xmm0,ALPHAR
movsd %xmm1,ALPHAI
.L0t:
xorq I,I
addq $1,I
salq $19,I
subq I,MMM
movq I,M
movsd ALPHAR,%xmm0
movsd ALPHAI,%xmm1
jge .L00t
movq MMM,M
addq I,M
jle .L999x
.L00t:
movq AA, A
movq NN, N
movq LDAX, LDA
movq OLD_INCX, INCX
movq OLD_Y, Y
movq OLD_INCY, INCY
@ -2405,6 +2445,12 @@
ALIGN_3
.L999:
movq M, I
salq $ZBASE_SHIFT,I
addq I,AA
jmp .L0t
.L999x:
movq 0(%rsp), %rbx
movq 8(%rsp), %rbp
movq 16(%rsp), %r12