Fixed #7. 1)Disable the multi-thread and 2) Modified kernel codes to avoid unloop in axpy function when incx==0 or incy==0.
This commit is contained in:
parent
109b86d00e
commit
0cfd29a819
|
@ -81,6 +81,11 @@ void CNAME(blasint n, FLOAT alpha, FLOAT *x, blasint incx, FLOAT *y, blasint inc
|
|||
#ifdef SMP
|
||||
nthreads = num_cpu_avail(1);
|
||||
|
||||
//disable multi-thread when incx==0 or incy==0
|
||||
//In that case, the threads would be dependent.
|
||||
if (incx == 0 || incy == 0)
|
||||
nthreads = 1;
|
||||
|
||||
if (nthreads == 1) {
|
||||
#endif
|
||||
|
||||
|
|
|
@ -83,6 +83,11 @@ void CNAME(blasint n, FLOAT *ALPHA, FLOAT *x, blasint incx, FLOAT *y, blasint in
|
|||
#ifdef SMP
|
||||
nthreads = num_cpu_avail(1);
|
||||
|
||||
//disable multi-thread when incx==0 or incy==0
|
||||
//In that case, the threads would be dependent.
|
||||
if (incx == 0 || incy == 0)
|
||||
nthreads = 1;
|
||||
|
||||
if (nthreads == 1) {
|
||||
#endif
|
||||
|
||||
|
|
|
@ -1463,6 +1463,12 @@
|
|||
.L50:
|
||||
movq M, %rax
|
||||
movq Y, YY
|
||||
//If incx==0 || incy==0, avoid unloop.
|
||||
cmpq $0, INCX
|
||||
je .L56
|
||||
cmpq $0, INCY
|
||||
je .L56
|
||||
|
||||
sarq $3, %rax
|
||||
jle .L55
|
||||
ALIGN_3
|
||||
|
|
|
@ -805,6 +805,12 @@
|
|||
.L40:
|
||||
movq Y, YY
|
||||
movq M, %rax
|
||||
//If incx==0 || incy==0, avoid unloop.
|
||||
cmpq $0, INCX
|
||||
je .L46
|
||||
cmpq $0, INCY
|
||||
je .L46
|
||||
|
||||
sarq $3, %rax
|
||||
jle .L45
|
||||
ALIGN_3
|
||||
|
|
|
@ -2893,6 +2893,12 @@
|
|||
unpcklps %xmm13, %xmm15
|
||||
#endif
|
||||
|
||||
//If incx==0 || incy==0, avoid unloop and jump to end.
|
||||
cmpq $0, INCX
|
||||
je .L200
|
||||
cmpq $0, INCY
|
||||
je .L200
|
||||
|
||||
movq Y, YY
|
||||
|
||||
movq M, %rax
|
||||
|
@ -3105,8 +3111,42 @@
|
|||
addps %xmm1, %xmm8
|
||||
|
||||
movsd %xmm8, (Y)
|
||||
jmp .L999
|
||||
ALIGN_3
|
||||
|
||||
.L200:
|
||||
movq M, %rax
|
||||
cmpq $0, %rax
|
||||
jle .L999
|
||||
ALIGN_3
|
||||
|
||||
.L201:
|
||||
movsd (X), %xmm0
|
||||
addq INCX, X
|
||||
|
||||
#ifdef HAVE_SSE3
|
||||
movshdup %xmm0, %xmm1
|
||||
movsldup %xmm0, %xmm0
|
||||
#else
|
||||
pshufd $0xf5, %xmm0, %xmm1
|
||||
shufps $0xa0, %xmm0, %xmm0
|
||||
#endif
|
||||
|
||||
mulps %xmm14, %xmm0
|
||||
mulps %xmm15, %xmm1
|
||||
|
||||
movsd (Y), %xmm8
|
||||
|
||||
addps %xmm0, %xmm8
|
||||
addps %xmm1, %xmm8
|
||||
|
||||
movsd %xmm8, (Y)
|
||||
addq INCY, Y
|
||||
|
||||
decq %rax
|
||||
jg .L201
|
||||
ALIGN_3
|
||||
|
||||
.L999:
|
||||
xorq %rax, %rax
|
||||
|
||||
|
|
|
@ -1416,6 +1416,12 @@
|
|||
|
||||
movq Y, YY
|
||||
movq M, %rax
|
||||
//If incx==0 || incy==0, avoid unloop and jump to end.
|
||||
cmpq $0, INCX
|
||||
je .L58
|
||||
cmpq $0, INCY
|
||||
je .L58
|
||||
|
||||
sarq $3, %rax
|
||||
jle .L55
|
||||
|
||||
|
@ -1769,6 +1775,7 @@
|
|||
andq $1, %rax
|
||||
jle .L999
|
||||
|
||||
.L58:
|
||||
MOVDDUP( 0 * SIZE, X, %xmm0)
|
||||
MOVDDUP( 1 * SIZE, X, %xmm1)
|
||||
|
||||
|
@ -1781,6 +1788,9 @@
|
|||
|
||||
movlpd %xmm8, 0 * SIZE(YY)
|
||||
movhpd %xmm8, 1 * SIZE(YY)
|
||||
|
||||
decq %rax
|
||||
jg .L58
|
||||
ALIGN_3
|
||||
|
||||
.L999:
|
||||
|
|
Loading…
Reference in New Issue