Refs #154. Fixed a SEGFAULT bug of dgemv_t when m is very large.

It overflowed the internal buffer. Thus, we split vector x into blocks when m is very large.

Thank @wangqian for this patch.
This commit is contained in:
Zhang Xianyi 2012-11-19 22:32:27 +08:00
parent 6caf1bab73
commit 5f0117385e
1 changed files with 48 additions and 23 deletions

View File

@ -47,7 +47,7 @@
#ifndef WINDOWS_ABI
#define STACKSIZE 64
#define STACKSIZE 128
#define OLD_M %rdi
#define OLD_N %rsi
@ -57,7 +57,10 @@
#define STACK_Y 16 + STACKSIZE(%rsp)
#define STACK_INCY 24 + STACKSIZE(%rsp)
#define STACK_BUFFER 32 + STACKSIZE(%rsp)
#define MMM 56(%rsp)
#define NN 64(%rsp)
#define AA 72(%rsp)
#define LDAX 80(%rsp)
#else
#define STACKSIZE 256
@ -132,27 +135,11 @@
movq OLD_LDA, LDA
movq OLD_X, X
#else
movq OLD_M, M
movq OLD_N, N
movq OLD_A, A
movq OLD_LDA, LDA
movq OLD_M, MMM
movq OLD_N, NN
movq OLD_A, AA
movq OLD_LDA, LDAX
#endif
movq STACK_INCX, INCX
movq STACK_Y, Y
movq STACK_INCY, INCY
movq STACK_BUFFER, BUFFER
leaq -1(INCX), %rax
leaq (,LDA, SIZE), LDA
leaq (,INCX, SIZE), INCX
leaq (,INCY, SIZE), INCY
leaq (LDA, LDA, 2), LDA3
subq $-16 * SIZE, A
#ifdef HAVE_SSE3
#ifndef WINDOWS_ABI
movddup %xmm0, ALPHA
@ -168,6 +155,39 @@
unpcklpd ALPHA, ALPHA
#endif
.L0x:
xorq M,M
addq $1,M
salq $22,M
subq M,MMM
jge .L00
movq MMM,%rax
addq M,%rax
jle .L999x
movq %rax,M
.L00:
movq LDAX,LDA
movq NN,N
movq AA,A
movq STACK_INCX, INCX
movq STACK_Y, Y
movq STACK_INCY, INCY
movq STACK_BUFFER, BUFFER
leaq -1(INCX), %rax
leaq (,LDA, SIZE), LDA
leaq (,INCX, SIZE), INCX
leaq (,INCY, SIZE), INCY
leaq (LDA, LDA, 2), LDA3
subq $-16 * SIZE, A
testq M, M
jle .L999
testq N, N
@ -854,7 +874,6 @@
.L21:
#endif
subq $4, N
leaq 16 * SIZE(BUFFER), X1
@ -2461,6 +2480,12 @@
ALIGN_4
.L999:
leaq (, M, SIZE), %rax
addq %rax,AA
jmp .L0x;
ALIGN_4
.L999x:
movq 0(%rsp), %rbx
movq 8(%rsp), %rbp
movq 16(%rsp), %r12