diff --git a/kernel/x86_64/zaxpy_sse2.S b/kernel/x86_64/zaxpy_sse2.S index a7dd054fb..3776c8910 100644 --- a/kernel/x86_64/zaxpy_sse2.S +++ b/kernel/x86_64/zaxpy_sse2.S @@ -1418,10 +1418,10 @@ movq M, %rax //If incx==0 || incy==0, avoid unloop and jump to end. cmpq $0, INCX - je .L58 + jne .L59 cmpq $0, INCY je .L58 - +.L59: sarq $3, %rax jle .L55 diff --git a/utest/test_axpy.c b/utest/test_axpy.c index 5fd7c1b04..26005e70f 100644 --- a/utest/test_axpy.c +++ b/utest/test_axpy.c @@ -74,6 +74,26 @@ CTEST(axpy,zaxpy_inc_0) ASSERT_DBL_NEAR_TOL(y2[i], y1[i], DOUBLE_EPS); } } + +CTEST(axpy,zaxpy_incx_0) +{ + blasint i; + blasint N=4,incX=0,incY=1; + double a[2]={0.25,0.5}; + double x1[]={1.0,3.0,5.0,7.0,1.0,3.0,5.0,7.0}; + double y1[]={2.0,4.0,6.0,8.0,2.0,4.0,6.0,8.0}; + double x2[]={1.0,3.0,5.0,7.0,1.0,3.0,5.0,7.0}; + double y2[]={0.75,5.25,4.75,9.25,0.75,5.25,4.75,9.25}; + + //OpenBLAS + BLASFUNC(zaxpy)(&N,a,x1,&incX,y1,&incY); + + for(i=0; i<2*N; i++){ + ASSERT_DBL_NEAR_TOL(x2[i], x1[i], DOUBLE_EPS); + ASSERT_DBL_NEAR_TOL(y2[i], y1[i], DOUBLE_EPS); + } +} + #endif #ifdef BUILD_SINGLE @@ -116,5 +136,24 @@ CTEST(axpy,caxpy_inc_0) ASSERT_DBL_NEAR_TOL(y2[i], y1[i], DOUBLE_EPS); } } + +CTEST(axpy,caxpy_incx_0) +{ + blasint i; + blasint N=4,incX=0,incY=1; + float a[2]={0.25,0.5}; + float x1[]={1.0,3.0,5.0,7.0,1.0,3.0,5.0,7.0}; + float y1[]={2.0,4.0,6.0,8.0,2.0,4.0,6.0,8.0}; + double x2[]={1.0,3.0,5.0,7.0,1.0,3.0,5.0,7.0}; + double y2[]={0.75,5.25,4.75,9.25,0.75,5.25,4.75,9.25}; + + //OpenBLAS + BLASFUNC(caxpy)(&N,a,x1,&incX,y1,&incY); + + for(i=0; i<2*N; i++){ + ASSERT_DBL_NEAR_TOL(x2[i], x1[i], DOUBLE_EPS); + ASSERT_DBL_NEAR_TOL(y2[i], y1[i], DOUBLE_EPS); + } +} #endif