From 3f39c8f94f5d61f69a7b1c578e7fc90c4c95d6e2 Mon Sep 17 00:00:00 2001 From: gxw Date: Fri, 12 Jul 2024 16:56:35 +0800 Subject: [PATCH] LoongArch: Fixed numpy CI failure --- kernel/loongarch64/scal.S | 77 +++++++++++++++++++++++++++++++ kernel/loongarch64/scal_lasx.S | 79 ++++++++++++++++++++++++++++++-- kernel/loongarch64/scal_lsx.S | 82 +++++++++++++++++++++++++++++++++- 3 files changed, 233 insertions(+), 5 deletions(-) diff --git a/kernel/loongarch64/scal.S b/kernel/loongarch64/scal.S index 8de710f41..431a44c1c 100644 --- a/kernel/loongarch64/scal.S +++ b/kernel/loongarch64/scal.S @@ -53,9 +53,86 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. PROLOGUE li.d TEMP, SIZE + ld.d XX, $sp, 0 // Load dummy2 + slli.d XX, XX, BASE_SHIFT MTC a1, $r0 slli.d INCX, INCX, BASE_SHIFT bge $r0, N, .L999 + CMPEQ $fcc0, ALPHA, a1 + bceqz $fcc0, .L50 + beq XX, TEMP, .L50 // if dummp2 == 1, do not directly copy 0 + srai.d I, N, 3 + bne INCX, TEMP, .L20 + bge $r0, I, .L15 + .align 3 + +.L12: + ST a1, X, 0 * SIZE + ST a1, X, 1 * SIZE + ST a1, X, 2 * SIZE + ST a1, X, 3 * SIZE + ST a1, X, 4 * SIZE + ST a1, X, 5 * SIZE + ST a1, X, 6 * SIZE + ST a1, X, 7 * SIZE + addi.w I, I, -1 + addi.d X, X, 8 * SIZE + blt $r0, I, .L12 + .align 3 + +.L15: + andi I, N, 7 + bge $r0, I, .L999 + .align 3 +.L16: + ST a1, X, 0 * SIZE + addi.d I, I, -1 + addi.d X, X, SIZE + blt $r0, I, .L16 + move $r4, $r17 + fmov.d $f0, $f22 + jirl $r0, $r1, 0x0 + .align 3 + +.L20: + srai.d I, N, 3 + bge $r0, I, .L25 + .align 3 + +.L22: + ST a1, X, 0 * SIZE + add.d X, X, INCX + ST a1, X, 0 * SIZE + add.d X, X, INCX + ST a1, X, 0 * SIZE + add.d X, X, INCX + ST a1, X, 0 * SIZE + add.d X, X, INCX + ST a1, X, 0 * SIZE + add.d X, X, INCX + ST a1, X, 0 * SIZE + add.d X, X, INCX + ST a1, X, 0 * SIZE + add.d X, X, INCX + ST a1, X, 0 * SIZE + addi.d I, I, -1 + add.d X, X, INCX + blt $r0, I, .L22 + .align 3 + +.L25: + andi I, N, 7 + bge $r0, I, .L999 + .align 3 +.L26: + addi.d I, I, -1 + ST a1, X, 0 * SIZE + add.d X, X, INCX + blt $r0, I, .L26 + move $r4, $r17 + fmov.d $f0, $f22 + jirl $r0, $r1, 0x0 + .align 3 .L50: srai.d I, N, 3 diff --git a/kernel/loongarch64/scal_lasx.S b/kernel/loongarch64/scal_lasx.S index b4585c1b9..dd69636e6 100644 --- a/kernel/loongarch64/scal_lasx.S +++ b/kernel/loongarch64/scal_lasx.S @@ -52,17 +52,21 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. bge $r0, N, .L999 bge $r0, INCX, .L999 li.d TEMP, 1 + ld.d t1, $sp, 0 // Load dummp2 movgr2fr.d a1, $r0 FFINT a1, a1 movgr2fr.d a2, TEMP FFINT a2, a2 slli.d TEMP, TEMP, BASE_SHIFT slli.d INCX, INCX, BASE_SHIFT + slli.d t1, t1, BASE_SHIFT + CMPEQ $fcc0, ALPHA, a1 + bcnez $fcc0, .L20 //ALPHA==0 CMPEQ $fcc0, ALPHA, a2 bcnez $fcc0, .L999 //ALPHA==1 return - +.L1: srai.d I, N, 3 - beq INCX, TEMP, .L30 //ALPHA!=1 and INCX==1 + beq INCX, TEMP, .L30 //ALPHA !=0|1 and INCX==1 MTG TEMP, ALPHA #ifdef DOUBLE xvreplgr2vr.d VALPHA, TEMP @@ -72,7 +76,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. move XX, X .align 3 -.L10: //ALPHA!=1 and INCX!=1 +.L10: //ALPHA !=0|1 and INCX!=1 bge $r0, I, .L32 .align 3 .L11: @@ -165,6 +169,75 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. blt $r0, I, .L11 b .L32 .align 3 + +.L20: + beq t1, TEMP, .L1 // if dummp2 == 1, do not directly copy 0 + srai.d I, N, 3 + beq INCX, TEMP, .L24 + bge $r0, I, .L22 + .align 3 + +.L21: + ST a1, X, 0 + add.d X, X, INCX + ST a1, X, 0 + add.d X, X, INCX + ST a1, X, 0 + add.d X, X, INCX + ST a1, X, 0 + add.d X, X, INCX + ST a1, X, 0 + add.d X, X, INCX + ST a1, X, 0 + add.d X, X, INCX + ST a1, X, 0 + add.d X, X, INCX + ST a1, X, 0 + add.d X, X, INCX + addi.d I, I, -1 + blt $r0, I, .L21 + .align 3 + +.L22: + andi I, N, 7 + bge $r0, I, .L999 + .align 3 + +.L23: + ST a1, X, 0 * SIZE + addi.d I, I, -1 + add.d X, X, INCX + blt $r0, I, .L23 + jirl $r0, $r1, 0 + .align 3 + +.L24: + bge $r0, I, .L26 /*N<8 INCX==1*/ + .align 3 +.L25: + xvxor.v VX0, VX0, VX0 + xvst VX0, X, 0 * SIZE +#ifdef DOUBLE + xvst VX0, X, 4 * SIZE +#endif + addi.d I, I, -1 + addi.d X, X, 8 * SIZE + blt $r0, I, .L25 + .align 3 + +.L26: + andi I, N, 7 + bge $r0, I, .L999 + .align 3 + +.L27: + ST a1, X, 0 * SIZE + addi.d I, I, -1 + addi.d X, X, SIZE + blt $r0, I, .L27 + jirl $r0, $r1, 0 + .align 3 + .L30: bge $r0, I, .L32/*N<8 INCX==1*/ MTG TEMP, ALPHA diff --git a/kernel/loongarch64/scal_lsx.S b/kernel/loongarch64/scal_lsx.S index a27e050ed..57dc5d0d3 100644 --- a/kernel/loongarch64/scal_lsx.S +++ b/kernel/loongarch64/scal_lsx.S @@ -51,6 +51,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. bge $r0, N, .L999 bge $r0, INCX, .L999 + ld.d t1, $sp, 0 // Load dummy2 li.d TEMP, 1 movgr2fr.d a1, $r0 FFINT a1, a1 @@ -58,10 +59,14 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. FFINT a2, a2 slli.d TEMP, TEMP, BASE_SHIFT slli.d INCX, INCX, BASE_SHIFT + slli.d t1, t1, BASE_SHIFT + CMPEQ $fcc0, ALPHA, a1 + bcnez $fcc0, .L20 //ALPHA==0 CMPEQ $fcc0, ALPHA, a2 bcnez $fcc0, .L999 //ALPHA==1 return +.L1: srai.d I, N, 3 - beq INCX, TEMP, .L30 //ALPHA!=1 and INCX==1 + beq INCX, TEMP, .L30 //ALPHA !=0|1 and INCX==1 MTG TEMP, ALPHA #ifdef DOUBLE vreplgr2vr.d VALPHA, TEMP @@ -71,7 +76,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. move XX, X .align 3 -.L10: //ALPHA!=1 and INCX!=1 +.L10: //ALPHA !=0|1 and INCX!=1 bge $r0, I, .L32 .align 3 @@ -169,6 +174,79 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. b .L32 .align 3 +.L20: + beq t1, TEMP, .L1 // if dummp2 == 1, do not directly copy 0 + srai.d I, N, 3 + beq INCX, TEMP, .L24 + bge $r0, I, .L22 + .align 3 + +.L21: + ST a1, X, 0 + add.d X, X, INCX + ST a1, X, 0 + add.d X, X, INCX + ST a1, X, 0 + add.d X, X, INCX + ST a1, X, 0 + add.d X, X, INCX + ST a1, X, 0 + add.d X, X, INCX + ST a1, X, 0 + add.d X, X, INCX + ST a1, X, 0 + add.d X, X, INCX + ST a1, X, 0 + add.d X, X, INCX + addi.d I, I, -1 + blt $r0, I, .L21 + .align 3 + +.L22: + andi I, N, 7 + bge $r0, I, .L999 + .align 3 + +.L23: + ST a1, X, 0 * SIZE + addi.d I, I, -1 + add.d X, X, INCX + blt $r0, I, .L23 + jirl $r0, $r1, 0 + .align 3 + +.L24: + bge $r0, I, .L26 /*N<8 INCX==1*/ + .align 3 + +.L25: + vxor.v VX0, VX0, VX0 + vst VX0, X, 0 * SIZE +#ifdef DOUBLE + vst VX0, X, 2 * SIZE + vst VX0, X, 4 * SIZE + vst VX0, X, 6 * SIZE +#else + vst VX0, X, 4 * SIZE +#endif + addi.d I, I, -1 + addi.d X, X, 8 * SIZE + blt $r0, I, .L25 + .align 3 + +.L26: + andi I, N, 7 + bge $r0, I, .L999 + .align 3 + +.L27: + ST a1, X, 0 * SIZE + addi.d I, I, -1 + addi.d X, X, SIZE + blt $r0, I, .L27 + jirl $r0, $r1, 0 + .align 3 + .L30: bge $r0, I, .L32/*N<8 INCX==1*/ MTG TEMP, ALPHA