LoongArch: Fixed numpy CI failure

This commit is contained in:
gxw 2024-07-12 16:56:35 +08:00
parent f3cebb3ca3
commit 3f39c8f94f
3 changed files with 233 additions and 5 deletions

View File

@ -53,9 +53,86 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
PROLOGUE
li.d TEMP, SIZE
ld.d XX, $sp, 0 // Load dummy2
slli.d XX, XX, BASE_SHIFT
MTC a1, $r0
slli.d INCX, INCX, BASE_SHIFT
bge $r0, N, .L999
CMPEQ $fcc0, ALPHA, a1
bceqz $fcc0, .L50
beq XX, TEMP, .L50 // if dummp2 == 1, do not directly copy 0
srai.d I, N, 3
bne INCX, TEMP, .L20
bge $r0, I, .L15
.align 3
.L12:
ST a1, X, 0 * SIZE
ST a1, X, 1 * SIZE
ST a1, X, 2 * SIZE
ST a1, X, 3 * SIZE
ST a1, X, 4 * SIZE
ST a1, X, 5 * SIZE
ST a1, X, 6 * SIZE
ST a1, X, 7 * SIZE
addi.w I, I, -1
addi.d X, X, 8 * SIZE
blt $r0, I, .L12
.align 3
.L15:
andi I, N, 7
bge $r0, I, .L999
.align 3
.L16:
ST a1, X, 0 * SIZE
addi.d I, I, -1
addi.d X, X, SIZE
blt $r0, I, .L16
move $r4, $r17
fmov.d $f0, $f22
jirl $r0, $r1, 0x0
.align 3
.L20:
srai.d I, N, 3
bge $r0, I, .L25
.align 3
.L22:
ST a1, X, 0 * SIZE
add.d X, X, INCX
ST a1, X, 0 * SIZE
add.d X, X, INCX
ST a1, X, 0 * SIZE
add.d X, X, INCX
ST a1, X, 0 * SIZE
add.d X, X, INCX
ST a1, X, 0 * SIZE
add.d X, X, INCX
ST a1, X, 0 * SIZE
add.d X, X, INCX
ST a1, X, 0 * SIZE
add.d X, X, INCX
ST a1, X, 0 * SIZE
addi.d I, I, -1
add.d X, X, INCX
blt $r0, I, .L22
.align 3
.L25:
andi I, N, 7
bge $r0, I, .L999
.align 3
.L26:
addi.d I, I, -1
ST a1, X, 0 * SIZE
add.d X, X, INCX
blt $r0, I, .L26
move $r4, $r17
fmov.d $f0, $f22
jirl $r0, $r1, 0x0
.align 3
.L50:
srai.d I, N, 3

View File

@ -52,17 +52,21 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
bge $r0, N, .L999
bge $r0, INCX, .L999
li.d TEMP, 1
ld.d t1, $sp, 0 // Load dummp2
movgr2fr.d a1, $r0
FFINT a1, a1
movgr2fr.d a2, TEMP
FFINT a2, a2
slli.d TEMP, TEMP, BASE_SHIFT
slli.d INCX, INCX, BASE_SHIFT
slli.d t1, t1, BASE_SHIFT
CMPEQ $fcc0, ALPHA, a1
bcnez $fcc0, .L20 //ALPHA==0
CMPEQ $fcc0, ALPHA, a2
bcnez $fcc0, .L999 //ALPHA==1 return
.L1:
srai.d I, N, 3
beq INCX, TEMP, .L30 //ALPHA!=1 and INCX==1
beq INCX, TEMP, .L30 //ALPHA !=0|1 and INCX==1
MTG TEMP, ALPHA
#ifdef DOUBLE
xvreplgr2vr.d VALPHA, TEMP
@ -72,7 +76,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
move XX, X
.align 3
.L10: //ALPHA!=1 and INCX!=1
.L10: //ALPHA !=0|1 and INCX!=1
bge $r0, I, .L32
.align 3
.L11:
@ -165,6 +169,75 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
blt $r0, I, .L11
b .L32
.align 3
.L20:
beq t1, TEMP, .L1 // if dummp2 == 1, do not directly copy 0
srai.d I, N, 3
beq INCX, TEMP, .L24
bge $r0, I, .L22
.align 3
.L21:
ST a1, X, 0
add.d X, X, INCX
ST a1, X, 0
add.d X, X, INCX
ST a1, X, 0
add.d X, X, INCX
ST a1, X, 0
add.d X, X, INCX
ST a1, X, 0
add.d X, X, INCX
ST a1, X, 0
add.d X, X, INCX
ST a1, X, 0
add.d X, X, INCX
ST a1, X, 0
add.d X, X, INCX
addi.d I, I, -1
blt $r0, I, .L21
.align 3
.L22:
andi I, N, 7
bge $r0, I, .L999
.align 3
.L23:
ST a1, X, 0 * SIZE
addi.d I, I, -1
add.d X, X, INCX
blt $r0, I, .L23
jirl $r0, $r1, 0
.align 3
.L24:
bge $r0, I, .L26 /*N<8 INCX==1*/
.align 3
.L25:
xvxor.v VX0, VX0, VX0
xvst VX0, X, 0 * SIZE
#ifdef DOUBLE
xvst VX0, X, 4 * SIZE
#endif
addi.d I, I, -1
addi.d X, X, 8 * SIZE
blt $r0, I, .L25
.align 3
.L26:
andi I, N, 7
bge $r0, I, .L999
.align 3
.L27:
ST a1, X, 0 * SIZE
addi.d I, I, -1
addi.d X, X, SIZE
blt $r0, I, .L27
jirl $r0, $r1, 0
.align 3
.L30:
bge $r0, I, .L32/*N<8 INCX==1*/
MTG TEMP, ALPHA

View File

@ -51,6 +51,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
bge $r0, N, .L999
bge $r0, INCX, .L999
ld.d t1, $sp, 0 // Load dummy2
li.d TEMP, 1
movgr2fr.d a1, $r0
FFINT a1, a1
@ -58,10 +59,14 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
FFINT a2, a2
slli.d TEMP, TEMP, BASE_SHIFT
slli.d INCX, INCX, BASE_SHIFT
slli.d t1, t1, BASE_SHIFT
CMPEQ $fcc0, ALPHA, a1
bcnez $fcc0, .L20 //ALPHA==0
CMPEQ $fcc0, ALPHA, a2
bcnez $fcc0, .L999 //ALPHA==1 return
.L1:
srai.d I, N, 3
beq INCX, TEMP, .L30 //ALPHA!=1 and INCX==1
beq INCX, TEMP, .L30 //ALPHA !=0|1 and INCX==1
MTG TEMP, ALPHA
#ifdef DOUBLE
vreplgr2vr.d VALPHA, TEMP
@ -71,7 +76,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
move XX, X
.align 3
.L10: //ALPHA!=1 and INCX!=1
.L10: //ALPHA !=0|1 and INCX!=1
bge $r0, I, .L32
.align 3
@ -169,6 +174,79 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
b .L32
.align 3
.L20:
beq t1, TEMP, .L1 // if dummp2 == 1, do not directly copy 0
srai.d I, N, 3
beq INCX, TEMP, .L24
bge $r0, I, .L22
.align 3
.L21:
ST a1, X, 0
add.d X, X, INCX
ST a1, X, 0
add.d X, X, INCX
ST a1, X, 0
add.d X, X, INCX
ST a1, X, 0
add.d X, X, INCX
ST a1, X, 0
add.d X, X, INCX
ST a1, X, 0
add.d X, X, INCX
ST a1, X, 0
add.d X, X, INCX
ST a1, X, 0
add.d X, X, INCX
addi.d I, I, -1
blt $r0, I, .L21
.align 3
.L22:
andi I, N, 7
bge $r0, I, .L999
.align 3
.L23:
ST a1, X, 0 * SIZE
addi.d I, I, -1
add.d X, X, INCX
blt $r0, I, .L23
jirl $r0, $r1, 0
.align 3
.L24:
bge $r0, I, .L26 /*N<8 INCX==1*/
.align 3
.L25:
vxor.v VX0, VX0, VX0
vst VX0, X, 0 * SIZE
#ifdef DOUBLE
vst VX0, X, 2 * SIZE
vst VX0, X, 4 * SIZE
vst VX0, X, 6 * SIZE
#else
vst VX0, X, 4 * SIZE
#endif
addi.d I, I, -1
addi.d X, X, 8 * SIZE
blt $r0, I, .L25
.align 3
.L26:
andi I, N, 7
bge $r0, I, .L999
.align 3
.L27:
ST a1, X, 0 * SIZE
addi.d I, I, -1
addi.d X, X, SIZE
blt $r0, I, .L27
jirl $r0, $r1, 0
.align 3
.L30:
bge $r0, I, .L32/*N<8 INCX==1*/
MTG TEMP, ALPHA