From 5f0117385e1d4f986ad75fa66b873b014a7792c2 Mon Sep 17 00:00:00 2001 From: Zhang Xianyi Date: Mon, 19 Nov 2012 22:32:27 +0800 Subject: [PATCH] Refs #154. Fixed a SEGFAULT bug of dgemv_t when m is very large. It overflowed the internal buffer. Thus, we split vector x into blocks when m is very large. Thank @wangqian for this patch. --- kernel/x86_64/dgemv_t.S | 71 ++++++++++++++++++++++++++++------------- 1 file changed, 48 insertions(+), 23 deletions(-) diff --git a/kernel/x86_64/dgemv_t.S b/kernel/x86_64/dgemv_t.S index 071920723..02601be0a 100644 --- a/kernel/x86_64/dgemv_t.S +++ b/kernel/x86_64/dgemv_t.S @@ -47,7 +47,7 @@ #ifndef WINDOWS_ABI -#define STACKSIZE 64 +#define STACKSIZE 128 #define OLD_M %rdi #define OLD_N %rsi @@ -57,7 +57,10 @@ #define STACK_Y 16 + STACKSIZE(%rsp) #define STACK_INCY 24 + STACKSIZE(%rsp) #define STACK_BUFFER 32 + STACKSIZE(%rsp) - +#define MMM 56(%rsp) +#define NN 64(%rsp) +#define AA 72(%rsp) +#define LDAX 80(%rsp) #else #define STACKSIZE 256 @@ -132,27 +135,11 @@ movq OLD_LDA, LDA movq OLD_X, X #else - movq OLD_M, M - movq OLD_N, N - movq OLD_A, A - movq OLD_LDA, LDA + movq OLD_M, MMM + movq OLD_N, NN + movq OLD_A, AA + movq OLD_LDA, LDAX #endif - - movq STACK_INCX, INCX - movq STACK_Y, Y - movq STACK_INCY, INCY - movq STACK_BUFFER, BUFFER - - leaq -1(INCX), %rax - - leaq (,LDA, SIZE), LDA - leaq (,INCX, SIZE), INCX - leaq (,INCY, SIZE), INCY - - leaq (LDA, LDA, 2), LDA3 - - subq $-16 * SIZE, A - #ifdef HAVE_SSE3 #ifndef WINDOWS_ABI movddup %xmm0, ALPHA @@ -168,6 +155,39 @@ unpcklpd ALPHA, ALPHA #endif + + +.L0x: + xorq M,M + addq $1,M + salq $22,M + subq M,MMM + jge .L00 + + movq MMM,%rax + addq M,%rax + jle .L999x + movq %rax,M + +.L00: + movq LDAX,LDA + movq NN,N + movq AA,A + movq STACK_INCX, INCX + movq STACK_Y, Y + movq STACK_INCY, INCY + movq STACK_BUFFER, BUFFER + + leaq -1(INCX), %rax + + leaq (,LDA, SIZE), LDA + leaq (,INCX, SIZE), INCX + leaq (,INCY, SIZE), INCY + + leaq (LDA, LDA, 2), LDA3 + + subq $-16 * SIZE, A + testq M, M jle .L999 testq N, N @@ -854,7 +874,6 @@ .L21: #endif - subq $4, N leaq 16 * SIZE(BUFFER), X1 @@ -2461,6 +2480,12 @@ ALIGN_4 .L999: + leaq (, M, SIZE), %rax + addq %rax,AA + jmp .L0x; + ALIGN_4 + +.L999x: movq 0(%rsp), %rbx movq 8(%rsp), %rbp movq 16(%rsp), %r12