Fixed overflow internal buffer bug of (s/d/c/z)gemv on x86_64.
This commit is contained in:
parent
6a72840945
commit
23965f164c
|
@ -47,7 +47,7 @@
|
||||||
|
|
||||||
#ifndef WINDOWS_ABI
|
#ifndef WINDOWS_ABI
|
||||||
|
|
||||||
#define STACKSIZE 64
|
#define STACKSIZE 128
|
||||||
|
|
||||||
#define OLD_INCX 8 + STACKSIZE(%rsp)
|
#define OLD_INCX 8 + STACKSIZE(%rsp)
|
||||||
#define OLD_Y 16 + STACKSIZE(%rsp)
|
#define OLD_Y 16 + STACKSIZE(%rsp)
|
||||||
|
@ -55,6 +55,14 @@
|
||||||
#define OLD_BUFFER 32 + STACKSIZE(%rsp)
|
#define OLD_BUFFER 32 + STACKSIZE(%rsp)
|
||||||
#define ALPHA 48 (%rsp)
|
#define ALPHA 48 (%rsp)
|
||||||
|
|
||||||
|
#define MMM 64(%rsp)
|
||||||
|
#define NN 72(%rsp)
|
||||||
|
#define AA 80(%rsp)
|
||||||
|
#define XX 88(%rsp)
|
||||||
|
#define LDAX 96(%rsp)
|
||||||
|
#define ALPHAR 104(%rsp)
|
||||||
|
#define ALPHAI 112(%rsp)
|
||||||
|
|
||||||
#define M %rdi
|
#define M %rdi
|
||||||
#define N %rsi
|
#define N %rsi
|
||||||
#define A %rcx
|
#define A %rcx
|
||||||
|
@ -66,7 +74,7 @@
|
||||||
|
|
||||||
#else
|
#else
|
||||||
|
|
||||||
#define STACKSIZE 256
|
#define STACKSIZE 288
|
||||||
|
|
||||||
#define OLD_ALPHA_I 40 + STACKSIZE(%rsp)
|
#define OLD_ALPHA_I 40 + STACKSIZE(%rsp)
|
||||||
#define OLD_A 48 + STACKSIZE(%rsp)
|
#define OLD_A 48 + STACKSIZE(%rsp)
|
||||||
|
@ -78,6 +86,14 @@
|
||||||
#define OLD_BUFFER 96 + STACKSIZE(%rsp)
|
#define OLD_BUFFER 96 + STACKSIZE(%rsp)
|
||||||
#define ALPHA 224 (%rsp)
|
#define ALPHA 224 (%rsp)
|
||||||
|
|
||||||
|
#define MMM 232(%rsp)
|
||||||
|
#define NN 240(%rsp)
|
||||||
|
#define AA 248(%rsp)
|
||||||
|
#define XX 256(%rsp)
|
||||||
|
#define LDAX 264(%rsp)
|
||||||
|
#define ALPHAR 272(%rsp)
|
||||||
|
#define ALPHAI 280(%rsp)
|
||||||
|
|
||||||
#define M %rcx
|
#define M %rcx
|
||||||
#define N %rdx
|
#define N %rdx
|
||||||
#define A %r8
|
#define A %r8
|
||||||
|
@ -142,9 +158,37 @@
|
||||||
movaps %xmm3, %xmm0
|
movaps %xmm3, %xmm0
|
||||||
movss OLD_ALPHA_I, %xmm1
|
movss OLD_ALPHA_I, %xmm1
|
||||||
#endif
|
#endif
|
||||||
|
movq A, AA
|
||||||
|
movq N, NN
|
||||||
|
movq M, MMM
|
||||||
|
movq LDA, LDAX
|
||||||
|
movq X, XX
|
||||||
|
movq OLD_Y, Y
|
||||||
|
movss %xmm0,ALPHAR
|
||||||
|
movss %xmm1,ALPHAI
|
||||||
|
|
||||||
|
.L0t:
|
||||||
|
xorq I,I
|
||||||
|
addq $1,I
|
||||||
|
salq $20,I
|
||||||
|
subq I,MMM
|
||||||
|
movq I,M
|
||||||
|
movss ALPHAR,%xmm0
|
||||||
|
movss ALPHAI,%xmm1
|
||||||
|
jge .L00t
|
||||||
|
|
||||||
|
movq MMM,M
|
||||||
|
addq I,M
|
||||||
|
jle .L999x
|
||||||
|
|
||||||
|
.L00t:
|
||||||
|
movq AA, A
|
||||||
|
movq NN, N
|
||||||
|
movq LDAX, LDA
|
||||||
|
movq XX, X
|
||||||
|
|
||||||
movq OLD_INCX, INCX
|
movq OLD_INCX, INCX
|
||||||
movq OLD_Y, Y
|
# movq OLD_Y, Y
|
||||||
movq OLD_INCY, INCY
|
movq OLD_INCY, INCY
|
||||||
movq OLD_BUFFER, BUFFER
|
movq OLD_BUFFER, BUFFER
|
||||||
|
|
||||||
|
@ -4274,6 +4318,11 @@
|
||||||
ALIGN_3
|
ALIGN_3
|
||||||
|
|
||||||
.L999:
|
.L999:
|
||||||
|
movq M, I
|
||||||
|
salq $ZBASE_SHIFT,I
|
||||||
|
addq I,AA
|
||||||
|
jmp .L0t
|
||||||
|
.L999x:
|
||||||
movq 0(%rsp), %rbx
|
movq 0(%rsp), %rbx
|
||||||
movq 8(%rsp), %rbp
|
movq 8(%rsp), %rbp
|
||||||
movq 16(%rsp), %r12
|
movq 16(%rsp), %r12
|
||||||
|
|
|
@ -47,13 +47,19 @@
|
||||||
|
|
||||||
#ifndef WINDOWS_ABI
|
#ifndef WINDOWS_ABI
|
||||||
|
|
||||||
#define STACKSIZE 64
|
#define STACKSIZE 128
|
||||||
|
|
||||||
#define OLD_INCX 8 + STACKSIZE(%rsp)
|
#define OLD_INCX 8 + STACKSIZE(%rsp)
|
||||||
#define OLD_Y 16 + STACKSIZE(%rsp)
|
#define OLD_Y 16 + STACKSIZE(%rsp)
|
||||||
#define OLD_INCY 24 + STACKSIZE(%rsp)
|
#define OLD_INCY 24 + STACKSIZE(%rsp)
|
||||||
#define OLD_BUFFER 32 + STACKSIZE(%rsp)
|
#define OLD_BUFFER 32 + STACKSIZE(%rsp)
|
||||||
#define ALPHA 48 (%rsp)
|
#define ALPHA 48 (%rsp)
|
||||||
|
#define MMM 64(%rsp)
|
||||||
|
#define NN 72(%rsp)
|
||||||
|
#define AA 80(%rsp)
|
||||||
|
#define LDAX 88(%rsp)
|
||||||
|
#define ALPHAR 96(%rsp)
|
||||||
|
#define ALPHAI 104(%rsp)
|
||||||
|
|
||||||
#define M %rdi
|
#define M %rdi
|
||||||
#define N %rsi
|
#define N %rsi
|
||||||
|
@ -66,7 +72,7 @@
|
||||||
|
|
||||||
#else
|
#else
|
||||||
|
|
||||||
#define STACKSIZE 256
|
#define STACKSIZE 288
|
||||||
|
|
||||||
#define OLD_ALPHA_I 40 + STACKSIZE(%rsp)
|
#define OLD_ALPHA_I 40 + STACKSIZE(%rsp)
|
||||||
#define OLD_A 48 + STACKSIZE(%rsp)
|
#define OLD_A 48 + STACKSIZE(%rsp)
|
||||||
|
@ -78,6 +84,13 @@
|
||||||
#define OLD_BUFFER 96 + STACKSIZE(%rsp)
|
#define OLD_BUFFER 96 + STACKSIZE(%rsp)
|
||||||
#define ALPHA 224 (%rsp)
|
#define ALPHA 224 (%rsp)
|
||||||
|
|
||||||
|
#define MMM 232(%rsp)
|
||||||
|
#define NN 240(%rsp)
|
||||||
|
#define AA 248(%rsp)
|
||||||
|
#define LDAX 256(%rsp)
|
||||||
|
#define ALPHAR 264(%rsp)
|
||||||
|
#define ALPHAI 272(%rsp)
|
||||||
|
|
||||||
#define M %rcx
|
#define M %rcx
|
||||||
#define N %rdx
|
#define N %rdx
|
||||||
#define A %r8
|
#define A %r8
|
||||||
|
@ -144,6 +157,32 @@
|
||||||
movss OLD_ALPHA_I, %xmm1
|
movss OLD_ALPHA_I, %xmm1
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
movq A, AA
|
||||||
|
movq N, NN
|
||||||
|
movq M, MMM
|
||||||
|
movq LDA, LDAX
|
||||||
|
movss %xmm0,ALPHAR
|
||||||
|
movss %xmm1,ALPHAI
|
||||||
|
|
||||||
|
.L0t:
|
||||||
|
xorq I,I
|
||||||
|
addq $1,I
|
||||||
|
salq $20,I
|
||||||
|
subq I,MMM
|
||||||
|
movq I,M
|
||||||
|
movss ALPHAR,%xmm0
|
||||||
|
movss ALPHAI,%xmm1
|
||||||
|
jge .L00t
|
||||||
|
|
||||||
|
movq MMM,M
|
||||||
|
addq I,M
|
||||||
|
jle .L999x
|
||||||
|
|
||||||
|
.L00t:
|
||||||
|
movq AA, A
|
||||||
|
movq NN, N
|
||||||
|
movq LDAX, LDA
|
||||||
|
|
||||||
movq OLD_INCX, INCX
|
movq OLD_INCX, INCX
|
||||||
movq OLD_Y, Y
|
movq OLD_Y, Y
|
||||||
movq OLD_INCY, INCY
|
movq OLD_INCY, INCY
|
||||||
|
@ -4350,6 +4389,11 @@
|
||||||
ALIGN_3
|
ALIGN_3
|
||||||
|
|
||||||
.L999:
|
.L999:
|
||||||
|
movq M, I
|
||||||
|
salq $ZBASE_SHIFT,I
|
||||||
|
addq I,AA
|
||||||
|
jmp .L0t
|
||||||
|
.L999x:
|
||||||
movq 0(%rsp), %rbx
|
movq 0(%rsp), %rbx
|
||||||
movq 8(%rsp), %rbp
|
movq 8(%rsp), %rbp
|
||||||
movq 16(%rsp), %r12
|
movq 16(%rsp), %r12
|
||||||
|
|
|
@ -47,7 +47,7 @@
|
||||||
|
|
||||||
#ifndef WINDOWS_ABI
|
#ifndef WINDOWS_ABI
|
||||||
|
|
||||||
#define STACKSIZE 64
|
#define STACKSIZE 128
|
||||||
|
|
||||||
#define OLD_M %rdi
|
#define OLD_M %rdi
|
||||||
#define OLD_N %rsi
|
#define OLD_N %rsi
|
||||||
|
@ -59,6 +59,11 @@
|
||||||
#define STACK_BUFFER 32 + STACKSIZE(%rsp)
|
#define STACK_BUFFER 32 + STACKSIZE(%rsp)
|
||||||
#define ALPHA 48 (%rsp)
|
#define ALPHA 48 (%rsp)
|
||||||
|
|
||||||
|
#define MMM 56(%rsp)
|
||||||
|
#define NN 64(%rsp)
|
||||||
|
#define AA 72(%rsp)
|
||||||
|
#define LDAX 80(%rsp)
|
||||||
|
#define XX 88(%rsp)
|
||||||
#else
|
#else
|
||||||
|
|
||||||
#define STACKSIZE 256
|
#define STACKSIZE 256
|
||||||
|
@ -137,17 +142,42 @@
|
||||||
movq OLD_LDA, LDA
|
movq OLD_LDA, LDA
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
movq STACK_INCX, INCX
|
|
||||||
movq STACK_Y, Y
|
|
||||||
movq STACK_INCY, INCY
|
|
||||||
movq STACK_BUFFER, BUFFER
|
|
||||||
|
|
||||||
#ifndef WINDOWS_ABI
|
#ifndef WINDOWS_ABI
|
||||||
movsd %xmm0, ALPHA
|
movsd %xmm0, ALPHA
|
||||||
#else
|
#else
|
||||||
movsd %xmm3, ALPHA
|
movsd %xmm3, ALPHA
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
movq STACK_Y, Y
|
||||||
|
movq A,AA
|
||||||
|
movq N,NN
|
||||||
|
movq M,MMM
|
||||||
|
movq LDA,LDAX
|
||||||
|
movq X,XX
|
||||||
|
|
||||||
|
.L0t:
|
||||||
|
xorq I,I
|
||||||
|
addq $1,I
|
||||||
|
salq $21,I
|
||||||
|
subq I,MMM
|
||||||
|
movq I,M
|
||||||
|
jge .L00t
|
||||||
|
|
||||||
|
movq MMM,M
|
||||||
|
addq I,M
|
||||||
|
jle .L999x
|
||||||
|
|
||||||
|
.L00t:
|
||||||
|
movq XX,X
|
||||||
|
movq AA,A
|
||||||
|
movq NN,N
|
||||||
|
movq LDAX,LDA
|
||||||
|
|
||||||
|
movq STACK_INCX, INCX
|
||||||
|
movq STACK_INCY, INCY
|
||||||
|
movq STACK_BUFFER, BUFFER
|
||||||
|
|
||||||
|
|
||||||
leaq -1(INCY), %rax
|
leaq -1(INCY), %rax
|
||||||
|
|
||||||
leaq (,INCX, SIZE), INCX
|
leaq (,INCX, SIZE), INCX
|
||||||
|
@ -2815,6 +2845,12 @@
|
||||||
ALIGN_3
|
ALIGN_3
|
||||||
|
|
||||||
.L999:
|
.L999:
|
||||||
|
leaq (, M, SIZE), %rax
|
||||||
|
addq %rax,AA
|
||||||
|
jmp .L0t
|
||||||
|
ALIGN_4
|
||||||
|
|
||||||
|
.L999x:
|
||||||
movq 0(%rsp), %rbx
|
movq 0(%rsp), %rbx
|
||||||
movq 8(%rsp), %rbp
|
movq 8(%rsp), %rbp
|
||||||
movq 16(%rsp), %r12
|
movq 16(%rsp), %r12
|
||||||
|
|
|
@ -47,7 +47,7 @@
|
||||||
|
|
||||||
#ifndef WINDOWS_ABI
|
#ifndef WINDOWS_ABI
|
||||||
|
|
||||||
#define STACKSIZE 64
|
#define STACKSIZE 128
|
||||||
|
|
||||||
#define OLD_M %rdi
|
#define OLD_M %rdi
|
||||||
#define OLD_N %rsi
|
#define OLD_N %rsi
|
||||||
|
@ -58,10 +58,14 @@
|
||||||
#define STACK_INCY 24 + STACKSIZE(%rsp)
|
#define STACK_INCY 24 + STACKSIZE(%rsp)
|
||||||
#define STACK_BUFFER 32 + STACKSIZE(%rsp)
|
#define STACK_BUFFER 32 + STACKSIZE(%rsp)
|
||||||
#define ALPHA 48 (%rsp)
|
#define ALPHA 48 (%rsp)
|
||||||
|
#define MMM 56(%rsp)
|
||||||
|
#define NN 64(%rsp)
|
||||||
|
#define AA 72(%rsp)
|
||||||
|
#define LDAX 80(%rsp)
|
||||||
|
#define XX 96(%rsp)
|
||||||
#else
|
#else
|
||||||
|
|
||||||
#define STACKSIZE 256
|
#define STACKSIZE 288
|
||||||
|
|
||||||
#define OLD_M %rcx
|
#define OLD_M %rcx
|
||||||
#define OLD_N %rdx
|
#define OLD_N %rdx
|
||||||
|
@ -74,6 +78,12 @@
|
||||||
#define STACK_BUFFER 88 + STACKSIZE(%rsp)
|
#define STACK_BUFFER 88 + STACKSIZE(%rsp)
|
||||||
#define ALPHA 224 (%rsp)
|
#define ALPHA 224 (%rsp)
|
||||||
|
|
||||||
|
#define MMM 232(%rsp)
|
||||||
|
#define NN 240(%rsp)
|
||||||
|
#define AA 248(%rsp)
|
||||||
|
#define LDAX 256(%rsp)
|
||||||
|
#define XX 264(%rsp)
|
||||||
|
#define
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#define LDA %r8
|
#define LDA %r8
|
||||||
|
@ -137,17 +147,41 @@
|
||||||
movq OLD_LDA, LDA
|
movq OLD_LDA, LDA
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
movq STACK_INCX, INCX
|
|
||||||
movq STACK_Y, Y
|
|
||||||
movq STACK_INCY, INCY
|
|
||||||
movq STACK_BUFFER, BUFFER
|
|
||||||
|
|
||||||
#ifndef WINDOWS_ABI
|
#ifndef WINDOWS_ABI
|
||||||
movss %xmm0, ALPHA
|
movss %xmm0, ALPHA
|
||||||
#else
|
#else
|
||||||
movss %xmm3, ALPHA
|
movss %xmm3, ALPHA
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
movq M,MMM
|
||||||
|
movq A,AA
|
||||||
|
movq N,NN
|
||||||
|
movq LDA,LDAX
|
||||||
|
movq X,XX
|
||||||
|
movq STACK_Y, Y
|
||||||
|
.L0t:
|
||||||
|
xorq I,I
|
||||||
|
addq $1,I
|
||||||
|
salq $22,I
|
||||||
|
subq I,MMM
|
||||||
|
movq I,M
|
||||||
|
jge .L00t
|
||||||
|
|
||||||
|
movq MMM,M
|
||||||
|
addq I,M
|
||||||
|
jle .L999x
|
||||||
|
|
||||||
|
.L00t:
|
||||||
|
movq AA,A
|
||||||
|
movq NN,N
|
||||||
|
movq LDAX,LDA
|
||||||
|
movq XX,X
|
||||||
|
|
||||||
|
movq STACK_INCX, INCX
|
||||||
|
movq STACK_INCY, INCY
|
||||||
|
movq STACK_BUFFER, BUFFER
|
||||||
|
|
||||||
leaq (,INCX, SIZE), INCX
|
leaq (,INCX, SIZE), INCX
|
||||||
leaq (,INCY, SIZE), INCY
|
leaq (,INCY, SIZE), INCY
|
||||||
leaq (,LDA, SIZE), LDA
|
leaq (,LDA, SIZE), LDA
|
||||||
|
@ -5990,6 +6024,12 @@
|
||||||
ALIGN_3
|
ALIGN_3
|
||||||
|
|
||||||
.L999:
|
.L999:
|
||||||
|
leaq (,M,SIZE),%rax
|
||||||
|
addq %rax,AA
|
||||||
|
jmp .L0t
|
||||||
|
ALIGN_4
|
||||||
|
|
||||||
|
.L999x:
|
||||||
movq 0(%rsp), %rbx
|
movq 0(%rsp), %rbx
|
||||||
movq 8(%rsp), %rbp
|
movq 8(%rsp), %rbp
|
||||||
movq 16(%rsp), %r12
|
movq 16(%rsp), %r12
|
||||||
|
|
|
@ -63,7 +63,7 @@
|
||||||
|
|
||||||
#else
|
#else
|
||||||
|
|
||||||
#define STACKSIZE 256
|
#define STACKSIZE 288
|
||||||
|
|
||||||
#define OLD_M %rcx
|
#define OLD_M %rcx
|
||||||
#define OLD_N %rdx
|
#define OLD_N %rdx
|
||||||
|
@ -74,10 +74,10 @@
|
||||||
#define STACK_Y 72 + STACKSIZE(%rsp)
|
#define STACK_Y 72 + STACKSIZE(%rsp)
|
||||||
#define STACK_INCY 80 + STACKSIZE(%rsp)
|
#define STACK_INCY 80 + STACKSIZE(%rsp)
|
||||||
#define STACK_BUFFER 88 + STACKSIZE(%rsp)
|
#define STACK_BUFFER 88 + STACKSIZE(%rsp)
|
||||||
#define MMM 216(%rsp)
|
#define MMM 232(%rsp)
|
||||||
#define NN 224(%rsp)
|
#define NN 240(%rsp)
|
||||||
#define AA 232(%rsp)
|
#define AA 248(%rsp)
|
||||||
#define LDAX 240(%rsp)
|
#define LDAX 256(%rsp)
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
|
@ -42,7 +42,7 @@
|
||||||
|
|
||||||
#ifndef WINDOWS_ABI
|
#ifndef WINDOWS_ABI
|
||||||
|
|
||||||
#define STACKSIZE 64
|
#define STACKSIZE 128
|
||||||
|
|
||||||
#define OLD_INCX 8 + STACKSIZE(%rsp)
|
#define OLD_INCX 8 + STACKSIZE(%rsp)
|
||||||
#define OLD_Y 16 + STACKSIZE(%rsp)
|
#define OLD_Y 16 + STACKSIZE(%rsp)
|
||||||
|
@ -51,6 +51,14 @@
|
||||||
#define ALPHA_R 48 (%rsp)
|
#define ALPHA_R 48 (%rsp)
|
||||||
#define ALPHA_I 56 (%rsp)
|
#define ALPHA_I 56 (%rsp)
|
||||||
|
|
||||||
|
#define MMM 64(%rsp)
|
||||||
|
#define NN 72(%rsp)
|
||||||
|
#define AA 80(%rsp)
|
||||||
|
#define XX 88(%rsp)
|
||||||
|
#define LDAX 96(%rsp)
|
||||||
|
#define ALPHAR 104(%rsp)
|
||||||
|
#define ALPHAI 112(%rsp)
|
||||||
|
|
||||||
#define M %rdi
|
#define M %rdi
|
||||||
#define N %rsi
|
#define N %rsi
|
||||||
#define A %rcx
|
#define A %rcx
|
||||||
|
@ -62,7 +70,7 @@
|
||||||
|
|
||||||
#else
|
#else
|
||||||
|
|
||||||
#define STACKSIZE 256
|
#define STACKSIZE 288
|
||||||
|
|
||||||
#define OLD_ALPHA_I 40 + STACKSIZE(%rsp)
|
#define OLD_ALPHA_I 40 + STACKSIZE(%rsp)
|
||||||
#define OLD_A 48 + STACKSIZE(%rsp)
|
#define OLD_A 48 + STACKSIZE(%rsp)
|
||||||
|
@ -75,6 +83,14 @@
|
||||||
#define ALPHA_R 224 (%rsp)
|
#define ALPHA_R 224 (%rsp)
|
||||||
#define ALPHA_I 232 (%rsp)
|
#define ALPHA_I 232 (%rsp)
|
||||||
|
|
||||||
|
#define MMM 232(%rsp)
|
||||||
|
#define NN 240(%rsp)
|
||||||
|
#define AA 248(%rsp)
|
||||||
|
#define XX 256(%rsp)
|
||||||
|
#define LDAX 264(%rsp)
|
||||||
|
#define ALPHAR 272(%rsp)
|
||||||
|
#define ALPHAI 280(%rsp)
|
||||||
|
|
||||||
#define M %rcx
|
#define M %rcx
|
||||||
#define N %rdx
|
#define N %rdx
|
||||||
#define A %r8
|
#define A %r8
|
||||||
|
@ -136,8 +152,37 @@
|
||||||
movsd OLD_ALPHA_I, %xmm1
|
movsd OLD_ALPHA_I, %xmm1
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
movq OLD_INCX, INCX
|
movq A, AA
|
||||||
|
movq N, NN
|
||||||
|
movq M, MMM
|
||||||
|
movq LDA, LDAX
|
||||||
|
movq X, XX
|
||||||
movq OLD_Y, Y
|
movq OLD_Y, Y
|
||||||
|
movsd %xmm0,ALPHAR
|
||||||
|
movsd %xmm1,ALPHAI
|
||||||
|
|
||||||
|
.L0t:
|
||||||
|
xorq I,I
|
||||||
|
addq $1,I
|
||||||
|
salq $18,I
|
||||||
|
subq I,MMM
|
||||||
|
movq I,M
|
||||||
|
movsd ALPHAR,%xmm0
|
||||||
|
movsd ALPHAI,%xmm1
|
||||||
|
jge .L00t
|
||||||
|
|
||||||
|
movq MMM,M
|
||||||
|
addq I,M
|
||||||
|
jle .L999x
|
||||||
|
|
||||||
|
.L00t:
|
||||||
|
movq AA, A
|
||||||
|
movq NN, N
|
||||||
|
movq LDAX, LDA
|
||||||
|
movq XX, X
|
||||||
|
|
||||||
|
movq OLD_INCX, INCX
|
||||||
|
# movq OLD_Y, Y
|
||||||
movq OLD_INCY, INCY
|
movq OLD_INCY, INCY
|
||||||
movq OLD_BUFFER, BUFFER
|
movq OLD_BUFFER, BUFFER
|
||||||
|
|
||||||
|
@ -2673,6 +2718,12 @@
|
||||||
ALIGN_3
|
ALIGN_3
|
||||||
|
|
||||||
.L999:
|
.L999:
|
||||||
|
movq M, I
|
||||||
|
salq $ZBASE_SHIFT,I
|
||||||
|
addq I,AA
|
||||||
|
jmp .L0t
|
||||||
|
.L999x:
|
||||||
|
|
||||||
movq 0(%rsp), %rbx
|
movq 0(%rsp), %rbx
|
||||||
movq 8(%rsp), %rbp
|
movq 8(%rsp), %rbp
|
||||||
movq 16(%rsp), %r12
|
movq 16(%rsp), %r12
|
||||||
|
|
|
@ -42,13 +42,20 @@
|
||||||
|
|
||||||
#ifndef WINDOWS_ABI
|
#ifndef WINDOWS_ABI
|
||||||
|
|
||||||
#define STACKSIZE 64
|
#define STACKSIZE 128
|
||||||
|
|
||||||
#define OLD_INCX 8 + STACKSIZE(%rsp)
|
#define OLD_INCX 8 + STACKSIZE(%rsp)
|
||||||
#define OLD_Y 16 + STACKSIZE(%rsp)
|
#define OLD_Y 16 + STACKSIZE(%rsp)
|
||||||
#define OLD_INCY 24 + STACKSIZE(%rsp)
|
#define OLD_INCY 24 + STACKSIZE(%rsp)
|
||||||
#define OLD_BUFFER 32 + STACKSIZE(%rsp)
|
#define OLD_BUFFER 32 + STACKSIZE(%rsp)
|
||||||
|
|
||||||
|
#define MMM 64(%rsp)
|
||||||
|
#define NN 72(%rsp)
|
||||||
|
#define AA 80(%rsp)
|
||||||
|
#define LDAX 88(%rsp)
|
||||||
|
#define ALPHAR 96(%rsp)
|
||||||
|
#define ALPHAI 104(%rsp)
|
||||||
|
|
||||||
#define M %rdi
|
#define M %rdi
|
||||||
#define N %rsi
|
#define N %rsi
|
||||||
#define A %rcx
|
#define A %rcx
|
||||||
|
@ -60,7 +67,7 @@
|
||||||
|
|
||||||
#else
|
#else
|
||||||
|
|
||||||
#define STACKSIZE 256
|
#define STACKSIZE 288
|
||||||
|
|
||||||
#define OLD_ALPHA_I 40 + STACKSIZE(%rsp)
|
#define OLD_ALPHA_I 40 + STACKSIZE(%rsp)
|
||||||
#define OLD_A 48 + STACKSIZE(%rsp)
|
#define OLD_A 48 + STACKSIZE(%rsp)
|
||||||
|
@ -71,6 +78,13 @@
|
||||||
#define OLD_INCY 88 + STACKSIZE(%rsp)
|
#define OLD_INCY 88 + STACKSIZE(%rsp)
|
||||||
#define OLD_BUFFER 96 + STACKSIZE(%rsp)
|
#define OLD_BUFFER 96 + STACKSIZE(%rsp)
|
||||||
|
|
||||||
|
#define MMM 232(%rsp)
|
||||||
|
#define NN 240(%rsp)
|
||||||
|
#define AA 248(%rsp)
|
||||||
|
#define LDAX 256(%rsp)
|
||||||
|
#define ALPHAR 264(%rsp)
|
||||||
|
#define ALPHAI 272(%rsp)
|
||||||
|
|
||||||
#define M %rcx
|
#define M %rcx
|
||||||
#define N %rdx
|
#define N %rdx
|
||||||
#define A %r8
|
#define A %r8
|
||||||
|
@ -135,6 +149,32 @@
|
||||||
movsd OLD_ALPHA_I, %xmm1
|
movsd OLD_ALPHA_I, %xmm1
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
movq A, AA
|
||||||
|
movq N, NN
|
||||||
|
movq M, MMM
|
||||||
|
movq LDA, LDAX
|
||||||
|
movsd %xmm0,ALPHAR
|
||||||
|
movsd %xmm1,ALPHAI
|
||||||
|
|
||||||
|
.L0t:
|
||||||
|
xorq I,I
|
||||||
|
addq $1,I
|
||||||
|
salq $19,I
|
||||||
|
subq I,MMM
|
||||||
|
movq I,M
|
||||||
|
movsd ALPHAR,%xmm0
|
||||||
|
movsd ALPHAI,%xmm1
|
||||||
|
jge .L00t
|
||||||
|
|
||||||
|
movq MMM,M
|
||||||
|
addq I,M
|
||||||
|
jle .L999x
|
||||||
|
|
||||||
|
.L00t:
|
||||||
|
movq AA, A
|
||||||
|
movq NN, N
|
||||||
|
movq LDAX, LDA
|
||||||
|
|
||||||
movq OLD_INCX, INCX
|
movq OLD_INCX, INCX
|
||||||
movq OLD_Y, Y
|
movq OLD_Y, Y
|
||||||
movq OLD_INCY, INCY
|
movq OLD_INCY, INCY
|
||||||
|
@ -2405,6 +2445,12 @@
|
||||||
ALIGN_3
|
ALIGN_3
|
||||||
|
|
||||||
.L999:
|
.L999:
|
||||||
|
movq M, I
|
||||||
|
salq $ZBASE_SHIFT,I
|
||||||
|
addq I,AA
|
||||||
|
jmp .L0t
|
||||||
|
.L999x:
|
||||||
|
|
||||||
movq 0(%rsp), %rbx
|
movq 0(%rsp), %rbx
|
||||||
movq 8(%rsp), %rbp
|
movq 8(%rsp), %rbp
|
||||||
movq 16(%rsp), %r12
|
movq 16(%rsp), %r12
|
||||||
|
|
Loading…
Reference in New Issue