From f3cebb3ca3c0b3c75c287862462bc996f88095a2 Mon Sep 17 00:00:00 2001 From: gxw Date: Wed, 10 Jul 2024 15:11:12 +0800 Subject: [PATCH 1/5] x86: Fixed numpy CI failure when the target is ZEN. --- interface/scal.c | 4 +- kernel/x86_64/dscal.c | 302 ++++++++++++++++++++---------------------- kernel/x86_64/sscal.c | 269 ++++++++++++++++++------------------- 3 files changed, 279 insertions(+), 296 deletions(-) diff --git a/interface/scal.c b/interface/scal.c index 0a7fee640..c6638a62d 100644 --- a/interface/scal.c +++ b/interface/scal.c @@ -85,7 +85,7 @@ void CNAME(blasint n, FLOAT alpha, FLOAT *x, blasint incx){ if (nthreads == 1) { #endif - SCAL_K(n, 0, 0, alpha, x, incx, NULL, 0, NULL, 0); + SCAL_K(n, 0, 0, alpha, x, incx, NULL, 0, NULL, 1); #ifdef SMP } else { @@ -102,7 +102,7 @@ void CNAME(blasint n, FLOAT alpha, FLOAT *x, blasint incx){ #else &alpha, #endif - x, incx, NULL, 0, NULL, 0, (int (*)(void))SCAL_K, nthreads); + x, incx, NULL, 0, NULL, 1, (int (*)(void))SCAL_K, nthreads); } #endif diff --git a/kernel/x86_64/dscal.c b/kernel/x86_64/dscal.c index e7182c5ce..641f86f90 100644 --- a/kernel/x86_64/dscal.c +++ b/kernel/x86_64/dscal.c @@ -43,21 +43,21 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. static void dscal_kernel_8( BLASLONG n, FLOAT *da , FLOAT *x ) { - BLASLONG i; - FLOAT alpha = *da; + BLASLONG i; + FLOAT alpha = *da; - for( i=0; i 0 ) - { - dscal_kernel_inc_8(n1, &da, x, inc_x); - i = n1 * inc_x; - j = n1; - } - - while(j < n) - { - - x[i] *= da; - i += inc_x ; - j++; - - } - - } - - return(0); - } - - BLASLONG n1 = n & -8; - if ( n1 > 0 ) - { -// if ( da == 0.0 ) -// dscal_kernel_8_zero(n1 , &da , x); -// else - dscal_kernel_8(n1 , &da , x); - } - - if ( da == 0.0 ) - { - for ( i=n1 ; i 0 ) + { + dscal_kernel_inc_8(n1, &da, x, inc_x); + i = n1 * inc_x; + j = n1; + } + while(j < n) + { + x[i] *= da; + i += inc_x ; + j++; + } + } + else + { + BLASLONG n1 = n & -8; + if ( n1 > 0) + dscal_kernel_8(n1 , &da , x); + for ( i = n1 ; i < n; i++ ) + x[i] *= da; + } + } + else + { + if ( inc_x != 1 ) + { + if( da == 0.0) + { + BLASLONG n1 = n & -2; + while(j < n1) + { + x[i] = 0.0; + x[i+inc_x] = 0.0; + i += 2 * inc_x ; + j += 2; + } + while(j < n) + { + x[i] = 0.0; + i += inc_x ; + j++; + } + } + else + { + BLASLONG n1 = n & -8; + if ( n1 > 0 ) + { + dscal_kernel_inc_8(n1, &da, x, inc_x); + i = n1 * inc_x; + j = n1; + } + while(j < n) + { + x[i] *= da; + i += inc_x ; + j++; + } + } + } + else + { + if ( da == 0.0 ) + { + BLASLONG n1 = n & -8; + if ( n1 > 0) + dscal_kernel_8_zero(n1, &da, x); + for ( i = n1 ; i < n; i++ ) + x[i] = 0.0; + } + else + { + BLASLONG n1 = n & -8; + if ( n1 > 0) + dscal_kernel_8(n1 , &da , x); + for ( i = n1 ; i < n; i++ ) + x[i] *= da; + } + } + } } - - diff --git a/kernel/x86_64/sscal.c b/kernel/x86_64/sscal.c index a85d20564..6e54f8893 100644 --- a/kernel/x86_64/sscal.c +++ b/kernel/x86_64/sscal.c @@ -39,21 +39,21 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. static void sscal_kernel_16( BLASLONG n, FLOAT *da , FLOAT *x ) { - BLASLONG i; - FLOAT alpha = *da; + BLASLONG i; + FLOAT alpha = *da; - for( i=0; i 0 ) - { - sscal_kernel_inc_8(n1, &da, x, inc_x); - i = n1 * inc_x; - j = n1; - } -#endif - while(j < n) - { - x[i] *= da; - i += inc_x ; - j++; - - } - - } - return(0); - } - - BLASLONG n1 = n & -16; - if ( n1 > 0 ) - { - //if ( da == 0.0 ) - // sscal_kernel_16_zero(n1 , &da , x); - //else - sscal_kernel_16(n1 , &da , x); - } - - if ( da == 0.0 ) - { - for ( i=n1 ; i 0 ) + { + sscal_kernel_inc_8(n1, &da, x, inc_x); + i = n1 * inc_x; + j = n1; + } + while(j < n) + { + x[i] *= da; + i += inc_x ; + j++; + } + } + else + { + BLASLONG n1 = n & -16; + if ( n1 > 0) + sscal_kernel_16(n1 , &da , x); + for ( i = n1 ; i < n; i++ ) + x[i] *= da; + } + } + else + { + if ( inc_x != 1 ) + { + if( da == 0.0) + { + BLASLONG n1 = n & -2; + while(j < n1) + { + x[i] = 0.0; + x[i+inc_x] = 0.0; + i += 2 * inc_x ; + j += 2; + } + while(j < n) + { + x[i] = 0.0; + i += inc_x ; + j++; + } + } + else + { + BLASLONG n1 = n & -8; + if ( n1 > 0 ) + { + sscal_kernel_inc_8(n1, &da, x, inc_x); + i = n1 * inc_x; + j = n1; + } + while(j < n) + { + x[i] *= da; + i += inc_x ; + j++; + } + } + } + else + { + if ( da == 0.0 ) + { + BLASLONG n1 = n & -16; + if ( n1 > 0) + sscal_kernel_16_zero(n1, &da, x); + for ( i = n1 ; i < n; i++ ) + x[i] = 0.0; + } + else + { + BLASLONG n1 = n & -16; + if ( n1 > 0) + sscal_kernel_16(n1 , &da , x); + for ( i = n1 ; i < n; i++ ) + x[i] *= da; + } + } + } } - - From 3f39c8f94f5d61f69a7b1c578e7fc90c4c95d6e2 Mon Sep 17 00:00:00 2001 From: gxw Date: Fri, 12 Jul 2024 16:56:35 +0800 Subject: [PATCH 2/5] LoongArch: Fixed numpy CI failure --- kernel/loongarch64/scal.S | 77 +++++++++++++++++++++++++++++++ kernel/loongarch64/scal_lasx.S | 79 ++++++++++++++++++++++++++++++-- kernel/loongarch64/scal_lsx.S | 82 +++++++++++++++++++++++++++++++++- 3 files changed, 233 insertions(+), 5 deletions(-) diff --git a/kernel/loongarch64/scal.S b/kernel/loongarch64/scal.S index 8de710f41..431a44c1c 100644 --- a/kernel/loongarch64/scal.S +++ b/kernel/loongarch64/scal.S @@ -53,9 +53,86 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. PROLOGUE li.d TEMP, SIZE + ld.d XX, $sp, 0 // Load dummy2 + slli.d XX, XX, BASE_SHIFT MTC a1, $r0 slli.d INCX, INCX, BASE_SHIFT bge $r0, N, .L999 + CMPEQ $fcc0, ALPHA, a1 + bceqz $fcc0, .L50 + beq XX, TEMP, .L50 // if dummp2 == 1, do not directly copy 0 + srai.d I, N, 3 + bne INCX, TEMP, .L20 + bge $r0, I, .L15 + .align 3 + +.L12: + ST a1, X, 0 * SIZE + ST a1, X, 1 * SIZE + ST a1, X, 2 * SIZE + ST a1, X, 3 * SIZE + ST a1, X, 4 * SIZE + ST a1, X, 5 * SIZE + ST a1, X, 6 * SIZE + ST a1, X, 7 * SIZE + addi.w I, I, -1 + addi.d X, X, 8 * SIZE + blt $r0, I, .L12 + .align 3 + +.L15: + andi I, N, 7 + bge $r0, I, .L999 + .align 3 +.L16: + ST a1, X, 0 * SIZE + addi.d I, I, -1 + addi.d X, X, SIZE + blt $r0, I, .L16 + move $r4, $r17 + fmov.d $f0, $f22 + jirl $r0, $r1, 0x0 + .align 3 + +.L20: + srai.d I, N, 3 + bge $r0, I, .L25 + .align 3 + +.L22: + ST a1, X, 0 * SIZE + add.d X, X, INCX + ST a1, X, 0 * SIZE + add.d X, X, INCX + ST a1, X, 0 * SIZE + add.d X, X, INCX + ST a1, X, 0 * SIZE + add.d X, X, INCX + ST a1, X, 0 * SIZE + add.d X, X, INCX + ST a1, X, 0 * SIZE + add.d X, X, INCX + ST a1, X, 0 * SIZE + add.d X, X, INCX + ST a1, X, 0 * SIZE + addi.d I, I, -1 + add.d X, X, INCX + blt $r0, I, .L22 + .align 3 + +.L25: + andi I, N, 7 + bge $r0, I, .L999 + .align 3 +.L26: + addi.d I, I, -1 + ST a1, X, 0 * SIZE + add.d X, X, INCX + blt $r0, I, .L26 + move $r4, $r17 + fmov.d $f0, $f22 + jirl $r0, $r1, 0x0 + .align 3 .L50: srai.d I, N, 3 diff --git a/kernel/loongarch64/scal_lasx.S b/kernel/loongarch64/scal_lasx.S index b4585c1b9..dd69636e6 100644 --- a/kernel/loongarch64/scal_lasx.S +++ b/kernel/loongarch64/scal_lasx.S @@ -52,17 +52,21 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. bge $r0, N, .L999 bge $r0, INCX, .L999 li.d TEMP, 1 + ld.d t1, $sp, 0 // Load dummp2 movgr2fr.d a1, $r0 FFINT a1, a1 movgr2fr.d a2, TEMP FFINT a2, a2 slli.d TEMP, TEMP, BASE_SHIFT slli.d INCX, INCX, BASE_SHIFT + slli.d t1, t1, BASE_SHIFT + CMPEQ $fcc0, ALPHA, a1 + bcnez $fcc0, .L20 //ALPHA==0 CMPEQ $fcc0, ALPHA, a2 bcnez $fcc0, .L999 //ALPHA==1 return - +.L1: srai.d I, N, 3 - beq INCX, TEMP, .L30 //ALPHA!=1 and INCX==1 + beq INCX, TEMP, .L30 //ALPHA !=0|1 and INCX==1 MTG TEMP, ALPHA #ifdef DOUBLE xvreplgr2vr.d VALPHA, TEMP @@ -72,7 +76,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. move XX, X .align 3 -.L10: //ALPHA!=1 and INCX!=1 +.L10: //ALPHA !=0|1 and INCX!=1 bge $r0, I, .L32 .align 3 .L11: @@ -165,6 +169,75 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. blt $r0, I, .L11 b .L32 .align 3 + +.L20: + beq t1, TEMP, .L1 // if dummp2 == 1, do not directly copy 0 + srai.d I, N, 3 + beq INCX, TEMP, .L24 + bge $r0, I, .L22 + .align 3 + +.L21: + ST a1, X, 0 + add.d X, X, INCX + ST a1, X, 0 + add.d X, X, INCX + ST a1, X, 0 + add.d X, X, INCX + ST a1, X, 0 + add.d X, X, INCX + ST a1, X, 0 + add.d X, X, INCX + ST a1, X, 0 + add.d X, X, INCX + ST a1, X, 0 + add.d X, X, INCX + ST a1, X, 0 + add.d X, X, INCX + addi.d I, I, -1 + blt $r0, I, .L21 + .align 3 + +.L22: + andi I, N, 7 + bge $r0, I, .L999 + .align 3 + +.L23: + ST a1, X, 0 * SIZE + addi.d I, I, -1 + add.d X, X, INCX + blt $r0, I, .L23 + jirl $r0, $r1, 0 + .align 3 + +.L24: + bge $r0, I, .L26 /*N<8 INCX==1*/ + .align 3 +.L25: + xvxor.v VX0, VX0, VX0 + xvst VX0, X, 0 * SIZE +#ifdef DOUBLE + xvst VX0, X, 4 * SIZE +#endif + addi.d I, I, -1 + addi.d X, X, 8 * SIZE + blt $r0, I, .L25 + .align 3 + +.L26: + andi I, N, 7 + bge $r0, I, .L999 + .align 3 + +.L27: + ST a1, X, 0 * SIZE + addi.d I, I, -1 + addi.d X, X, SIZE + blt $r0, I, .L27 + jirl $r0, $r1, 0 + .align 3 + .L30: bge $r0, I, .L32/*N<8 INCX==1*/ MTG TEMP, ALPHA diff --git a/kernel/loongarch64/scal_lsx.S b/kernel/loongarch64/scal_lsx.S index a27e050ed..57dc5d0d3 100644 --- a/kernel/loongarch64/scal_lsx.S +++ b/kernel/loongarch64/scal_lsx.S @@ -51,6 +51,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. bge $r0, N, .L999 bge $r0, INCX, .L999 + ld.d t1, $sp, 0 // Load dummy2 li.d TEMP, 1 movgr2fr.d a1, $r0 FFINT a1, a1 @@ -58,10 +59,14 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. FFINT a2, a2 slli.d TEMP, TEMP, BASE_SHIFT slli.d INCX, INCX, BASE_SHIFT + slli.d t1, t1, BASE_SHIFT + CMPEQ $fcc0, ALPHA, a1 + bcnez $fcc0, .L20 //ALPHA==0 CMPEQ $fcc0, ALPHA, a2 bcnez $fcc0, .L999 //ALPHA==1 return +.L1: srai.d I, N, 3 - beq INCX, TEMP, .L30 //ALPHA!=1 and INCX==1 + beq INCX, TEMP, .L30 //ALPHA !=0|1 and INCX==1 MTG TEMP, ALPHA #ifdef DOUBLE vreplgr2vr.d VALPHA, TEMP @@ -71,7 +76,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. move XX, X .align 3 -.L10: //ALPHA!=1 and INCX!=1 +.L10: //ALPHA !=0|1 and INCX!=1 bge $r0, I, .L32 .align 3 @@ -169,6 +174,79 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. b .L32 .align 3 +.L20: + beq t1, TEMP, .L1 // if dummp2 == 1, do not directly copy 0 + srai.d I, N, 3 + beq INCX, TEMP, .L24 + bge $r0, I, .L22 + .align 3 + +.L21: + ST a1, X, 0 + add.d X, X, INCX + ST a1, X, 0 + add.d X, X, INCX + ST a1, X, 0 + add.d X, X, INCX + ST a1, X, 0 + add.d X, X, INCX + ST a1, X, 0 + add.d X, X, INCX + ST a1, X, 0 + add.d X, X, INCX + ST a1, X, 0 + add.d X, X, INCX + ST a1, X, 0 + add.d X, X, INCX + addi.d I, I, -1 + blt $r0, I, .L21 + .align 3 + +.L22: + andi I, N, 7 + bge $r0, I, .L999 + .align 3 + +.L23: + ST a1, X, 0 * SIZE + addi.d I, I, -1 + add.d X, X, INCX + blt $r0, I, .L23 + jirl $r0, $r1, 0 + .align 3 + +.L24: + bge $r0, I, .L26 /*N<8 INCX==1*/ + .align 3 + +.L25: + vxor.v VX0, VX0, VX0 + vst VX0, X, 0 * SIZE +#ifdef DOUBLE + vst VX0, X, 2 * SIZE + vst VX0, X, 4 * SIZE + vst VX0, X, 6 * SIZE +#else + vst VX0, X, 4 * SIZE +#endif + addi.d I, I, -1 + addi.d X, X, 8 * SIZE + blt $r0, I, .L25 + .align 3 + +.L26: + andi I, N, 7 + bge $r0, I, .L999 + .align 3 + +.L27: + ST a1, X, 0 * SIZE + addi.d I, I, -1 + addi.d X, X, SIZE + blt $r0, I, .L27 + jirl $r0, $r1, 0 + .align 3 + .L30: bge $r0, I, .L32/*N<8 INCX==1*/ MTG TEMP, ALPHA From 9b3e80efe24046701a681ba53c4179c0b60ca970 Mon Sep 17 00:00:00 2001 From: gxw Date: Mon, 15 Jul 2024 16:33:09 +0800 Subject: [PATCH 3/5] utest: Add test_gemv --- utest/CMakeLists.txt | 1 + utest/Makefile | 2 +- utest/test_gemv.c | 126 +++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 128 insertions(+), 1 deletion(-) create mode 100644 utest/test_gemv.c diff --git a/utest/CMakeLists.txt b/utest/CMakeLists.txt index 4771d8a27..6a61899da 100644 --- a/utest/CMakeLists.txt +++ b/utest/CMakeLists.txt @@ -18,6 +18,7 @@ else () test_zscal.c test_amin.c test_axpby.c + test_gemv.c ) endif () diff --git a/utest/Makefile b/utest/Makefile index 36acf96cd..b82937093 100644 --- a/utest/Makefile +++ b/utest/Makefile @@ -14,7 +14,7 @@ UTESTEXTBIN=openblas_utest_ext include $(TOPDIR)/Makefile.system OBJS=utest_main.o test_min.o test_amax.o test_ismin.o test_rotmg.o test_axpy.o test_dotu.o test_dsdot.o test_swap.o test_rot.o test_dnrm2.o test_zscal.o \ - test_amin.o test_axpby.o + test_amin.o test_axpby.o test_gemv.o #test_rot.o test_swap.o test_axpy.o test_dotu.o test_dsdot.o test_fork.o OBJS_EXT=utest_main.o $(DIR_EXT)/xerbla.o $(DIR_EXT)/common.o OBJS_EXT+=$(DIR_EXT)/test_isamin.o $(DIR_EXT)/test_idamin.o $(DIR_EXT)/test_icamin.o $(DIR_EXT)/test_izamin.o diff --git a/utest/test_gemv.c b/utest/test_gemv.c new file mode 100644 index 000000000..c85ef3f38 --- /dev/null +++ b/utest/test_gemv.c @@ -0,0 +1,126 @@ +#include "openblas_utest.h" +#include + +#ifndef NAN +#define NAN 0.0/0.0 +#endif +#ifndef INFINITY +#define INFINITY 1.0/0.0 +#endif + +#ifdef BUILD_SINGLE + +CTEST(sgemv, 0_nan_inf) +{ + blasint N = 17; + blasint incX = 1; + blasint incY = 1; + float alpha = 0.0; + float beta = 0.0; + char trans = 'N'; + float A[N * N]; + float X[N]; + float Y[N]; + + memset(A, 0, sizeof(A)); + memset(X, 0, sizeof(X)); + for (int i = 0; i < (N - 1); i += 2) + { + Y[i] = NAN; + Y[i + 1] = INFINITY; + } + Y[N - 1] = NAN; + BLASFUNC(sgemv)(&trans, &N, &N, &alpha, A, &N, X, &incX, &beta, Y, &incY); + for (int i = 0; i < N; i ++) + ASSERT_TRUE(Y[i] == 0.0); +} + +CTEST(sgemv, 0_nan_inf_incy_2) +{ + blasint N = 17; + blasint Ny = 33; + blasint incX = 1; + blasint incY = 2; + float alpha = 0.0; + float beta = 0.0; + char trans = 'N'; + float A[N * N]; + float X[N]; + float Y[Ny]; + float *ay = Y; + + memset(A, 0, sizeof(A)); + memset(X, 0, sizeof(X)); + memset(Y, 0, sizeof(Y)); + for (int i = 0; i < (N - 1); i += 2) + { + ay[0] = NAN; + ay += 2; + ay[0] = INFINITY; + ay += 2; + } + Y[Ny - 1] = NAN; + BLASFUNC(sgemv)(&trans, &N, &N, &alpha, A, &N, X, &incX, &beta, Y, &incY); + for (int i = 0; i < Ny; i ++) + ASSERT_TRUE(Y[i] == 0.0); +} + +#endif + +#ifdef BUILD_DOUBLE +CTEST(dgemv, 0_nan_inf) +{ + blasint N = 17; + blasint incX = 1; + blasint incY = 1; + double alpha = 0.0; + double beta = 0.0; + char trans = 'N'; + double A[N * N]; + double X[N]; + double Y[N]; + + memset(A, 0, sizeof(A)); + memset(X, 0, sizeof(X)); + for (int i = 0; i < (N - 1); i += 2) + { + Y[i] = NAN; + Y[i + 1] = INFINITY; + } + Y[N - 1] = NAN; + BLASFUNC(dgemv)(&trans, &N, &N, &alpha, A, &N, X, &incX, &beta, Y, &incY); + for (int i = 0; i < N; i ++) + ASSERT_TRUE(Y[i] == 0.0); +} + +CTEST(dgemv, 0_nan_inf_incy_2) +{ + blasint N = 17; + blasint Ny = 33; + blasint incX = 1; + blasint incY = 2; + double alpha = 0.0; + double beta = 0.0; + char trans = 'N'; + double A[N * N]; + double X[N]; + double Y[Ny]; + double *ay = Y; + + memset(A, 0, sizeof(A)); + memset(X, 0, sizeof(X)); + memset(Y, 0, sizeof(Y)); + for (int i = 0; i < (N - 1); i += 2) + { + ay[0] = NAN; + ay += 2; + ay[0] = INFINITY; + ay += 2; + } + Y[Ny - 1] = NAN; + BLASFUNC(dgemv)(&trans, &N, &N, &alpha, A, &N, X, &incX, &beta, Y, &incY); + for (int i = 0; i < Ny; i ++) + ASSERT_TRUE(Y[i] == 0.0); +} + +#endif From f6d6c14a96602fb2cfb53584541017434f7492e2 Mon Sep 17 00:00:00 2001 From: gxw Date: Wed, 17 Jul 2024 09:23:49 +0800 Subject: [PATCH 4/5] mips: Fixed numpy CI failure --- kernel/mips/dscal_msa.c | 12 +++++------- kernel/mips/scal.c | 41 ++++++++++++++++++++++++----------------- kernel/mips/sscal_msa.c | 9 +++------ 3 files changed, 32 insertions(+), 30 deletions(-) diff --git a/kernel/mips/dscal_msa.c b/kernel/mips/dscal_msa.c index e95f0a655..cc8d83441 100644 --- a/kernel/mips/dscal_msa.c +++ b/kernel/mips/dscal_msa.c @@ -42,7 +42,7 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da, FLOAT *x, if (1 == inc_x) { - if (0) //if (0.0 == da ) + if (0.0 == da && !dummy2) { v2f64 zero_v = {0.0, 0.0}; @@ -240,14 +240,12 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da, FLOAT *x, } else { - if (da == 0.0) + if (da == 0.0 && !dummy2) { for (i = n; i--;) - { - if (isfinite(*x)) - *x = 0.0; - else - *x = NAN; + { + *x = 0.0; + x += inc_x; } } diff --git a/kernel/mips/scal.c b/kernel/mips/scal.c index d51fd9ccd..5f12d4271 100644 --- a/kernel/mips/scal.c +++ b/kernel/mips/scal.c @@ -29,27 +29,34 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT *dummy, BLASLONG dummy2) { - BLASLONG i=0,j=0; + BLASLONG i = 0, j = 0; - while(j < n) - { + // Resolved issue 4728 when the caller is {s/d}scal + if (da == 0.0 && dummy2 == 1) + { + while(j < n) + { + x[i] = da * x[i] ; - if ( da == 0.0 ) - if (isnan(x[i])||isinf(x[i])) - x[i]=NAN; - else - x[i]=0.0; - else if (isnan(da)) - x[i]=NAN; - else - x[i] = da * x[i] ; + i += inc_x ; + j++; + } + } + else + { + while(j < n) + { - i += inc_x ; - j++; - - } - return 0; + if ( da == 0.0 ) + x[i] = 0.0; + else + x[i] = da * x[i] ; + i += inc_x ; + j++; + } + } + return 0; } diff --git a/kernel/mips/sscal_msa.c b/kernel/mips/sscal_msa.c index bfd477b6a..953cf0fbc 100644 --- a/kernel/mips/sscal_msa.c +++ b/kernel/mips/sscal_msa.c @@ -42,7 +42,7 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da, FLOAT *x, if (1 == inc_x) { - if (0) // if (0.0 == da) + if (0.0 == da && !dummy2) { v4f32 zero_v = {0.0, 0.0, 0.0, 0.0}; @@ -255,14 +255,11 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da, FLOAT *x, } else { - if (0.0 == da) + if (0.0 == da && !dummy2) { for (i = n; i--;) { - if (isfinite(*x)) - *x = 0; - else - *x = NAN; + *x = 0; x += inc_x; } } From 34b80ce03f922ebdde8e081a7c5380f460cf215c Mon Sep 17 00:00:00 2001 From: gxw Date: Wed, 17 Jul 2024 09:52:14 +0800 Subject: [PATCH 5/5] mips64: Fixed numpy CI failure --- kernel/mips64/scal.S | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/kernel/mips64/scal.S b/kernel/mips64/scal.S index e392a9c6a..49716d3ed 100644 --- a/kernel/mips64/scal.S +++ b/kernel/mips64/scal.S @@ -48,6 +48,7 @@ #define TEMP $3 #define XX $5 +#define DUMMY2 $6 #define ALPHA $f15 @@ -73,13 +74,13 @@ blez N, .L999 dsll INCX, INCX, BASE_SHIFT - CMPEQ $fcc0, ALPHA, a1 - NOP + CMPEQ $fcc0, ALPHA, a1 + LDARG DUMMY2, 8($sp) bc1f $fcc0, .L50 - NOP + dsll DUMMY2, DUMMY2, BASE_SHIFT - bc1t $fcc0, .L50 + beq DUMMY2, TEMP, .L50 // If dummy2 == 1, do not directly copy 0 NOP bne INCX, TEMP, .L20