Fixed #7. 1)Disable the multi-thread and 2) Modified kernel codes to avoid unloop in axpy function when incx==0 or incy==0.
This commit is contained in:
parent
109b86d00e
commit
0cfd29a819
|
@ -81,6 +81,11 @@ void CNAME(blasint n, FLOAT alpha, FLOAT *x, blasint incx, FLOAT *y, blasint inc
|
||||||
#ifdef SMP
|
#ifdef SMP
|
||||||
nthreads = num_cpu_avail(1);
|
nthreads = num_cpu_avail(1);
|
||||||
|
|
||||||
|
//disable multi-thread when incx==0 or incy==0
|
||||||
|
//In that case, the threads would be dependent.
|
||||||
|
if (incx == 0 || incy == 0)
|
||||||
|
nthreads = 1;
|
||||||
|
|
||||||
if (nthreads == 1) {
|
if (nthreads == 1) {
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
|
@ -83,6 +83,11 @@ void CNAME(blasint n, FLOAT *ALPHA, FLOAT *x, blasint incx, FLOAT *y, blasint in
|
||||||
#ifdef SMP
|
#ifdef SMP
|
||||||
nthreads = num_cpu_avail(1);
|
nthreads = num_cpu_avail(1);
|
||||||
|
|
||||||
|
//disable multi-thread when incx==0 or incy==0
|
||||||
|
//In that case, the threads would be dependent.
|
||||||
|
if (incx == 0 || incy == 0)
|
||||||
|
nthreads = 1;
|
||||||
|
|
||||||
if (nthreads == 1) {
|
if (nthreads == 1) {
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
|
@ -1463,6 +1463,12 @@
|
||||||
.L50:
|
.L50:
|
||||||
movq M, %rax
|
movq M, %rax
|
||||||
movq Y, YY
|
movq Y, YY
|
||||||
|
//If incx==0 || incy==0, avoid unloop.
|
||||||
|
cmpq $0, INCX
|
||||||
|
je .L56
|
||||||
|
cmpq $0, INCY
|
||||||
|
je .L56
|
||||||
|
|
||||||
sarq $3, %rax
|
sarq $3, %rax
|
||||||
jle .L55
|
jle .L55
|
||||||
ALIGN_3
|
ALIGN_3
|
||||||
|
|
|
@ -805,6 +805,12 @@
|
||||||
.L40:
|
.L40:
|
||||||
movq Y, YY
|
movq Y, YY
|
||||||
movq M, %rax
|
movq M, %rax
|
||||||
|
//If incx==0 || incy==0, avoid unloop.
|
||||||
|
cmpq $0, INCX
|
||||||
|
je .L46
|
||||||
|
cmpq $0, INCY
|
||||||
|
je .L46
|
||||||
|
|
||||||
sarq $3, %rax
|
sarq $3, %rax
|
||||||
jle .L45
|
jle .L45
|
||||||
ALIGN_3
|
ALIGN_3
|
||||||
|
|
|
@ -2893,6 +2893,12 @@
|
||||||
unpcklps %xmm13, %xmm15
|
unpcklps %xmm13, %xmm15
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
//If incx==0 || incy==0, avoid unloop and jump to end.
|
||||||
|
cmpq $0, INCX
|
||||||
|
je .L200
|
||||||
|
cmpq $0, INCY
|
||||||
|
je .L200
|
||||||
|
|
||||||
movq Y, YY
|
movq Y, YY
|
||||||
|
|
||||||
movq M, %rax
|
movq M, %rax
|
||||||
|
@ -3105,8 +3111,42 @@
|
||||||
addps %xmm1, %xmm8
|
addps %xmm1, %xmm8
|
||||||
|
|
||||||
movsd %xmm8, (Y)
|
movsd %xmm8, (Y)
|
||||||
|
jmp .L999
|
||||||
ALIGN_3
|
ALIGN_3
|
||||||
|
|
||||||
|
.L200:
|
||||||
|
movq M, %rax
|
||||||
|
cmpq $0, %rax
|
||||||
|
jle .L999
|
||||||
|
ALIGN_3
|
||||||
|
|
||||||
|
.L201:
|
||||||
|
movsd (X), %xmm0
|
||||||
|
addq INCX, X
|
||||||
|
|
||||||
|
#ifdef HAVE_SSE3
|
||||||
|
movshdup %xmm0, %xmm1
|
||||||
|
movsldup %xmm0, %xmm0
|
||||||
|
#else
|
||||||
|
pshufd $0xf5, %xmm0, %xmm1
|
||||||
|
shufps $0xa0, %xmm0, %xmm0
|
||||||
|
#endif
|
||||||
|
|
||||||
|
mulps %xmm14, %xmm0
|
||||||
|
mulps %xmm15, %xmm1
|
||||||
|
|
||||||
|
movsd (Y), %xmm8
|
||||||
|
|
||||||
|
addps %xmm0, %xmm8
|
||||||
|
addps %xmm1, %xmm8
|
||||||
|
|
||||||
|
movsd %xmm8, (Y)
|
||||||
|
addq INCY, Y
|
||||||
|
|
||||||
|
decq %rax
|
||||||
|
jg .L201
|
||||||
|
ALIGN_3
|
||||||
|
|
||||||
.L999:
|
.L999:
|
||||||
xorq %rax, %rax
|
xorq %rax, %rax
|
||||||
|
|
||||||
|
|
|
@ -1416,6 +1416,12 @@
|
||||||
|
|
||||||
movq Y, YY
|
movq Y, YY
|
||||||
movq M, %rax
|
movq M, %rax
|
||||||
|
//If incx==0 || incy==0, avoid unloop and jump to end.
|
||||||
|
cmpq $0, INCX
|
||||||
|
je .L58
|
||||||
|
cmpq $0, INCY
|
||||||
|
je .L58
|
||||||
|
|
||||||
sarq $3, %rax
|
sarq $3, %rax
|
||||||
jle .L55
|
jle .L55
|
||||||
|
|
||||||
|
@ -1769,6 +1775,7 @@
|
||||||
andq $1, %rax
|
andq $1, %rax
|
||||||
jle .L999
|
jle .L999
|
||||||
|
|
||||||
|
.L58:
|
||||||
MOVDDUP( 0 * SIZE, X, %xmm0)
|
MOVDDUP( 0 * SIZE, X, %xmm0)
|
||||||
MOVDDUP( 1 * SIZE, X, %xmm1)
|
MOVDDUP( 1 * SIZE, X, %xmm1)
|
||||||
|
|
||||||
|
@ -1781,6 +1788,9 @@
|
||||||
|
|
||||||
movlpd %xmm8, 0 * SIZE(YY)
|
movlpd %xmm8, 0 * SIZE(YY)
|
||||||
movhpd %xmm8, 1 * SIZE(YY)
|
movhpd %xmm8, 1 * SIZE(YY)
|
||||||
|
|
||||||
|
decq %rax
|
||||||
|
jg .L58
|
||||||
ALIGN_3
|
ALIGN_3
|
||||||
|
|
||||||
.L999:
|
.L999:
|
||||||
|
|
Loading…
Reference in New Issue