From 4664b57e6ede3c55efea2c6aba01dad59e84e67a Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Fri, 4 Aug 2023 12:25:34 +0200 Subject: [PATCH 1/3] use shortcut only when both incx and incy are zero --- kernel/x86_64/zaxpy_sse2.S | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/x86_64/zaxpy_sse2.S b/kernel/x86_64/zaxpy_sse2.S index a7dd054fb..3776c8910 100644 --- a/kernel/x86_64/zaxpy_sse2.S +++ b/kernel/x86_64/zaxpy_sse2.S @@ -1418,10 +1418,10 @@ movq M, %rax //If incx==0 || incy==0, avoid unloop and jump to end. cmpq $0, INCX - je .L58 + jne .L59 cmpq $0, INCY je .L58 - +.L59: sarq $3, %rax jle .L55 From d64fa286f7c5c911c7a07a5638bc9090c54bafbc Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Fri, 4 Aug 2023 12:26:36 +0200 Subject: [PATCH 2/3] add test case for zaxpy with incx=0 incy=1 --- utest/test_axpy.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/utest/test_axpy.c b/utest/test_axpy.c index 5fd7c1b04..33b6cf6ed 100644 --- a/utest/test_axpy.c +++ b/utest/test_axpy.c @@ -74,6 +74,28 @@ CTEST(axpy,zaxpy_inc_0) ASSERT_DBL_NEAR_TOL(y2[i], y1[i], DOUBLE_EPS); } } + +CTEST(axpy,zaxpy_incx_0) +{ + blasint i; + blasint N=4,incX=0,incY=1; + double a[2]={0.25,0.5}; + double x1[]={1.0,3.0,5.0,7.0,1.0,3.0,5.0,7.0}; + double y1[]={2.0,4.0,6.0,8.0,2.0,4.0,6.0,8.0}; + double x2[]={1.0,3.0,5.0,7.0,1.0,3.0,5.0,7.0}; + double y2[]={0.75,5.25,4.75,9.25,0.75,5.25,4.75,9.25}; + + //OpenBLAS + BLASFUNC(zaxpy)(&N,a,x1,&incX,y1,&incY); + + for(i=0; i<2*N; i++){ +//fprintf(stderr,"output X %lf\n",x1[i]); +//fprintf(stderr,"output Y %lf\n",y1[i]); + ASSERT_DBL_NEAR_TOL(x2[i], x1[i], DOUBLE_EPS); + ASSERT_DBL_NEAR_TOL(y2[i], y1[i], DOUBLE_EPS); + } +} + #endif #ifdef BUILD_SINGLE From 862d06ab8ae0486a41c138ed51c2cbc75722a906 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Fri, 4 Aug 2023 15:28:02 +0200 Subject: [PATCH 3/3] Add INCX=0,INCY=1 test case for CAXPY --- utest/test_axpy.c | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/utest/test_axpy.c b/utest/test_axpy.c index 33b6cf6ed..26005e70f 100644 --- a/utest/test_axpy.c +++ b/utest/test_axpy.c @@ -89,8 +89,6 @@ CTEST(axpy,zaxpy_incx_0) BLASFUNC(zaxpy)(&N,a,x1,&incX,y1,&incY); for(i=0; i<2*N; i++){ -//fprintf(stderr,"output X %lf\n",x1[i]); -//fprintf(stderr,"output Y %lf\n",y1[i]); ASSERT_DBL_NEAR_TOL(x2[i], x1[i], DOUBLE_EPS); ASSERT_DBL_NEAR_TOL(y2[i], y1[i], DOUBLE_EPS); } @@ -138,5 +136,24 @@ CTEST(axpy,caxpy_inc_0) ASSERT_DBL_NEAR_TOL(y2[i], y1[i], DOUBLE_EPS); } } + +CTEST(axpy,caxpy_incx_0) +{ + blasint i; + blasint N=4,incX=0,incY=1; + float a[2]={0.25,0.5}; + float x1[]={1.0,3.0,5.0,7.0,1.0,3.0,5.0,7.0}; + float y1[]={2.0,4.0,6.0,8.0,2.0,4.0,6.0,8.0}; + double x2[]={1.0,3.0,5.0,7.0,1.0,3.0,5.0,7.0}; + double y2[]={0.75,5.25,4.75,9.25,0.75,5.25,4.75,9.25}; + + //OpenBLAS + BLASFUNC(caxpy)(&N,a,x1,&incX,y1,&incY); + + for(i=0; i<2*N; i++){ + ASSERT_DBL_NEAR_TOL(x2[i], x1[i], DOUBLE_EPS); + ASSERT_DBL_NEAR_TOL(y2[i], y1[i], DOUBLE_EPS); + } +} #endif