Refs #154. Fixed a SEGFAULT bug of dgemv_t when m is very large.
It overflowed the internal buffer. Thus, we split vector x into blocks when m is very large. Thank @wangqian for this patch.
This commit is contained in:
parent
6caf1bab73
commit
5f0117385e
|
@ -47,7 +47,7 @@
|
||||||
|
|
||||||
#ifndef WINDOWS_ABI
|
#ifndef WINDOWS_ABI
|
||||||
|
|
||||||
#define STACKSIZE 64
|
#define STACKSIZE 128
|
||||||
|
|
||||||
#define OLD_M %rdi
|
#define OLD_M %rdi
|
||||||
#define OLD_N %rsi
|
#define OLD_N %rsi
|
||||||
|
@ -57,7 +57,10 @@
|
||||||
#define STACK_Y 16 + STACKSIZE(%rsp)
|
#define STACK_Y 16 + STACKSIZE(%rsp)
|
||||||
#define STACK_INCY 24 + STACKSIZE(%rsp)
|
#define STACK_INCY 24 + STACKSIZE(%rsp)
|
||||||
#define STACK_BUFFER 32 + STACKSIZE(%rsp)
|
#define STACK_BUFFER 32 + STACKSIZE(%rsp)
|
||||||
|
#define MMM 56(%rsp)
|
||||||
|
#define NN 64(%rsp)
|
||||||
|
#define AA 72(%rsp)
|
||||||
|
#define LDAX 80(%rsp)
|
||||||
#else
|
#else
|
||||||
|
|
||||||
#define STACKSIZE 256
|
#define STACKSIZE 256
|
||||||
|
@ -132,27 +135,11 @@
|
||||||
movq OLD_LDA, LDA
|
movq OLD_LDA, LDA
|
||||||
movq OLD_X, X
|
movq OLD_X, X
|
||||||
#else
|
#else
|
||||||
movq OLD_M, M
|
movq OLD_M, MMM
|
||||||
movq OLD_N, N
|
movq OLD_N, NN
|
||||||
movq OLD_A, A
|
movq OLD_A, AA
|
||||||
movq OLD_LDA, LDA
|
movq OLD_LDA, LDAX
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
movq STACK_INCX, INCX
|
|
||||||
movq STACK_Y, Y
|
|
||||||
movq STACK_INCY, INCY
|
|
||||||
movq STACK_BUFFER, BUFFER
|
|
||||||
|
|
||||||
leaq -1(INCX), %rax
|
|
||||||
|
|
||||||
leaq (,LDA, SIZE), LDA
|
|
||||||
leaq (,INCX, SIZE), INCX
|
|
||||||
leaq (,INCY, SIZE), INCY
|
|
||||||
|
|
||||||
leaq (LDA, LDA, 2), LDA3
|
|
||||||
|
|
||||||
subq $-16 * SIZE, A
|
|
||||||
|
|
||||||
#ifdef HAVE_SSE3
|
#ifdef HAVE_SSE3
|
||||||
#ifndef WINDOWS_ABI
|
#ifndef WINDOWS_ABI
|
||||||
movddup %xmm0, ALPHA
|
movddup %xmm0, ALPHA
|
||||||
|
@ -168,6 +155,39 @@
|
||||||
unpcklpd ALPHA, ALPHA
|
unpcklpd ALPHA, ALPHA
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
.L0x:
|
||||||
|
xorq M,M
|
||||||
|
addq $1,M
|
||||||
|
salq $22,M
|
||||||
|
subq M,MMM
|
||||||
|
jge .L00
|
||||||
|
|
||||||
|
movq MMM,%rax
|
||||||
|
addq M,%rax
|
||||||
|
jle .L999x
|
||||||
|
movq %rax,M
|
||||||
|
|
||||||
|
.L00:
|
||||||
|
movq LDAX,LDA
|
||||||
|
movq NN,N
|
||||||
|
movq AA,A
|
||||||
|
movq STACK_INCX, INCX
|
||||||
|
movq STACK_Y, Y
|
||||||
|
movq STACK_INCY, INCY
|
||||||
|
movq STACK_BUFFER, BUFFER
|
||||||
|
|
||||||
|
leaq -1(INCX), %rax
|
||||||
|
|
||||||
|
leaq (,LDA, SIZE), LDA
|
||||||
|
leaq (,INCX, SIZE), INCX
|
||||||
|
leaq (,INCY, SIZE), INCY
|
||||||
|
|
||||||
|
leaq (LDA, LDA, 2), LDA3
|
||||||
|
|
||||||
|
subq $-16 * SIZE, A
|
||||||
|
|
||||||
testq M, M
|
testq M, M
|
||||||
jle .L999
|
jle .L999
|
||||||
testq N, N
|
testq N, N
|
||||||
|
@ -854,7 +874,6 @@
|
||||||
|
|
||||||
.L21:
|
.L21:
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
subq $4, N
|
subq $4, N
|
||||||
|
|
||||||
leaq 16 * SIZE(BUFFER), X1
|
leaq 16 * SIZE(BUFFER), X1
|
||||||
|
@ -2461,6 +2480,12 @@
|
||||||
ALIGN_4
|
ALIGN_4
|
||||||
|
|
||||||
.L999:
|
.L999:
|
||||||
|
leaq (, M, SIZE), %rax
|
||||||
|
addq %rax,AA
|
||||||
|
jmp .L0x;
|
||||||
|
ALIGN_4
|
||||||
|
|
||||||
|
.L999x:
|
||||||
movq 0(%rsp), %rbx
|
movq 0(%rsp), %rbx
|
||||||
movq 8(%rsp), %rbp
|
movq 8(%rsp), %rbp
|
||||||
movq 16(%rsp), %r12
|
movq 16(%rsp), %r12
|
||||||
|
|
Loading…
Reference in New Issue