Merge pull request #4566 from XiWeiGu/fix_loongarch_lsx

LoongArch: Fixed  LSX opt
This commit is contained in:
Martin Kroeker 2024-03-19 10:21:21 +01:00 committed by GitHub
commit b4a1153648
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 121 additions and 31 deletions

View File

@ -146,7 +146,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
add.d X, X, INCX add.d X, X, INCX
vinsgr2vr.d VX1, t3, 0 vinsgr2vr.d VX1, t3, 0
vinsgr2vr.d VX1, t4, 1 vinsgr2vr.d VX1, t4, 1
vfmaxa.d VM1, VX0, VX1 vfmina.d VM1, VX0, VX1
ld.d t1, X, 0 * SIZE ld.d t1, X, 0 * SIZE
add.d X, X, INCX add.d X, X, INCX
ld.d t2, X, 0 * SIZE ld.d t2, X, 0 * SIZE
@ -159,9 +159,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
add.d X, X, INCX add.d X, X, INCX
vinsgr2vr.d VX1, t3, 0 vinsgr2vr.d VX1, t3, 0
vinsgr2vr.d VX1, t4, 1 vinsgr2vr.d VX1, t4, 1
vfmaxa.d VM2, VX0, VX1 vfmina.d VM2, VX0, VX1
vfmaxa.d VM1, VM1, VM2 vfmina.d VM1, VM1, VM2
vfmaxa.d VM0, VM0, VM1 vfmina.d VM0, VM0, VM1
#else #else
ld.w t1, X, 0 ld.w t1, X, 0
add.d X, X, INCX add.d X, X, INCX
@ -187,8 +187,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
vinsgr2vr.w VX1, t2, 1 vinsgr2vr.w VX1, t2, 1
vinsgr2vr.w VX1, t3, 2 vinsgr2vr.w VX1, t3, 2
vinsgr2vr.w VX1, t4, 3 vinsgr2vr.w VX1, t4, 3
vfmaxa.s VM1, VX0, VX1 vfmina.s VM1, VX0, VX1
vfmaxa.s VM0, VM0, VM1 vfmina.s VM0, VM0, VM1
#endif #endif
addi.d I, I, -1 addi.d I, I, -1
blt $r0, I, .L21 blt $r0, I, .L21

View File

@ -990,6 +990,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#endif #endif
add.d YY, YY, INCY add.d YY, YY, INCY
blt $r0, I, .L222 blt $r0, I, .L222
move Y, YY
b .L997 b .L997
.align 3 .align 3

View File

@ -177,7 +177,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
FABS t4, t4 FABS t4, t4
ADD t1, t1, t2 ADD t1, t1, t2
ADD t3, t3, t4 ADD t3, t3, t4
FMAX s1, t1, t3 FMAX s2, t1, t3
LD t1, X, 0 * SIZE LD t1, X, 0 * SIZE
LD t2, X, 1 * SIZE LD t2, X, 1 * SIZE
add.d X, X, INCX add.d X, X, INCX
@ -205,13 +205,16 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
ADD t1, t1, t2 ADD t1, t1, t2
ADD t3, t3, t4 ADD t3, t3, t4
FMAX s4, t1, t3 FMAX s4, t1, t3
FMAX s1, s1, s2
FMAX s3, s3, s4
FMAX a0, a0, s3
FMAX a0, a0, s1
blt $r0, I, .L21 blt $r0, I, .L21
.align 3 .align 3
.L22: .L22:
FMAX s1, s1, s2 MOV s1, a0
FMAX s3, s3, s4
FMAX s1, s1, s3
.align 3 .align 3
.L23: //N<8 .L23: //N<8

View File

@ -186,7 +186,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
FABS t4, t4 FABS t4, t4
ADD t1, t1, t2 ADD t1, t1, t2
ADD t3, t3, t4 ADD t3, t3, t4
FMIN s1, t1, t3 FMIN s2, t1, t3
LD t1, X, 0 * SIZE LD t1, X, 0 * SIZE
LD t2, X, 1 * SIZE LD t2, X, 1 * SIZE
add.d X, X, INCX add.d X, X, INCX
@ -214,13 +214,16 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
ADD t1, t1, t2 ADD t1, t1, t2
ADD t3, t3, t4 ADD t3, t3, t4
FMIN s4, t1, t3 FMIN s4, t1, t3
FMIN s1, s1, s2
FMIN s3, s3, s4
FMIN a0, a0, s3
FMIN a0, a0, s1
blt $r0, I, .L21 blt $r0, I, .L21
.align 3 .align 3
.L22: .L22:
FMIN s1, s1, s2 MOV s1, a0
FMIN s3, s3, s4
FMIN s1, s1, s3
.align 3 .align 3
.L23: //N<8 .L23: //N<8

View File

@ -82,6 +82,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
vreplgr2vr.d VXC, t1 vreplgr2vr.d VXC, t1
vreplgr2vr.d VXS, t2 vreplgr2vr.d VXS, t2
vreplgr2vr.d VXZ, t3 vreplgr2vr.d VXZ, t3
srai.d I, N, 1
#else #else
vreplgr2vr.w VXC, t1 vreplgr2vr.w VXC, t1
vreplgr2vr.w VXS, t2 vreplgr2vr.w VXS, t2

View File

@ -70,18 +70,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
LD a1, X, 1 * SIZE LD a1, X, 1 * SIZE
FABS a0, a0 FABS a0, a0
FABS a1, a1 FABS a1, a1
ADD s1, a1, a0 ADD s1, a1, a0 // Initialization value
vreplvei.w VM0, VM0, 0
vxor.v VI3, VI3, VI3 // 0 vxor.v VI3, VI3, VI3 // 0
#ifdef DOUBLE #ifdef DOUBLE
li.d I, -1 li.d I, -1
vreplgr2vr.d VI4, I vreplgr2vr.d VI4, I
vffint.d.l VI4, VI4 // -1 vffint.d.l VI4, VI4 // -1
bne INCX, TEMP, .L20 bne INCX, TEMP, .L20 // incx != 1
// Init Index
addi.d i0, i0, 1 addi.d i0, i0, 1
srai.d I, N, 2 slli.d i0, i0, 1 // 2
bge $r0, I, .L21
slli.d i0, i0, 1 //2
vreplgr2vr.d VINC4, i0 vreplgr2vr.d VINC4, i0
addi.d i0, i0, -3 addi.d i0, i0, -3
vinsgr2vr.d VI1, i0, 0 //initialize the index value for vectorization vinsgr2vr.d VI1, i0, 0 //initialize the index value for vectorization
@ -91,14 +90,30 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
vinsgr2vr.d VI0, i0, 0 //1 vinsgr2vr.d VI0, i0, 0 //1
addi.d i0, i0, 1 addi.d i0, i0, 1
vinsgr2vr.d VI0, i0, 1 //2 vinsgr2vr.d VI0, i0, 1 //2
srai.d I, N, 2
bge $r0, I, .L21
// Init VM0
vld VX0, X, 0 * SIZE
vld VX1, X, 2 * SIZE
vpickev.d x1, VX1, VX0
vpickod.d x2, VX1, VX0
vfmul.d x3, VI4, x1
vfmul.d x4, VI4, x2
vfcmp.clt.d VT0, x1, VI3
vfcmp.clt.d VINC8, x2, VI3
vbitsel.v x1, x1, x3, VT0
vbitsel.v x2, x2, x4, VINC8
vfadd.d VM0, x1, x2
#else #else
li.w I, -1 li.w I, -1
vreplgr2vr.w VI4, I vreplgr2vr.w VI4, I
vffint.s.w VI4, VI4 // -1 vffint.s.w VI4, VI4 // -1
bne INCX, TEMP, .L20 bne INCX, TEMP, .L20 // incx != 1
// Init Index
addi.w i0, i0, 1 addi.w i0, i0, 1
srai.d I, N, 2
bge $r0, I, .L21
slli.w i0, i0, 2 //4 slli.w i0, i0, 2 //4
vreplgr2vr.w VINC4, i0 vreplgr2vr.w VINC4, i0
addi.w i0, i0, -7 addi.w i0, i0, -7
@ -117,6 +132,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
vinsgr2vr.w VI0, i0, 2 //3 vinsgr2vr.w VI0, i0, 2 //3
addi.w i0, i0, 1 addi.w i0, i0, 1
vinsgr2vr.w VI0, i0, 3 //4 vinsgr2vr.w VI0, i0, 3 //4
srai.d I, N, 2
bge $r0, I, .L21
// Init VM0
vld VX0, X, 0 * SIZE
vld VX1, X, 4 * SIZE
vpickev.w x1, VX1, VX0
vpickod.w x2, VX1, VX0
vfmul.s x3, VI4, x1
vfmul.s x4, VI4, x2
vfcmp.clt.s VT0, x1, VI3
vfcmp.clt.s VINC8, x2, VI3
vbitsel.v x1, x1, x3, VT0
vbitsel.v x2, x2, x4, VINC8
vfadd.s VM0, x1, x2
#endif #endif
.align 3 .align 3
@ -139,6 +170,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
vfcmp.ceq.d VT0, x3, VM0 vfcmp.ceq.d VT0, x3, VM0
vbitsel.v VM0, x3, VM0, VT0 vbitsel.v VM0, x3, VM0, VT0
vbitsel.v VI0, VI1, VI0, VT0 vbitsel.v VI0, VI1, VI0, VT0
vld VX0, X, 4 * SIZE vld VX0, X, 4 * SIZE
vadd.d VI1, VI1, VINC4 vadd.d VI1, VI1, VINC4
vld VX1, X, 6 * SIZE vld VX1, X, 6 * SIZE
@ -206,9 +238,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
.L20: // INCX!=1 .L20: // INCX!=1
#ifdef DOUBLE #ifdef DOUBLE
addi.d i0, i0, 1 addi.d i0, i0, 1
srai.d I, N, 2 // Init index
bge $r0, I, .L21 slli.d i0, i0, 1 //2
slli.d i0, i0, 1 //2
vreplgr2vr.d VINC4, i0 vreplgr2vr.d VINC4, i0
addi.d i0, i0, -3 addi.d i0, i0, -3
vinsgr2vr.d VI1, i0, 0 //initialize the index value for vectorization vinsgr2vr.d VI1, i0, 0 //initialize the index value for vectorization
@ -218,10 +249,32 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
vinsgr2vr.d VI0, i0, 0 //1 vinsgr2vr.d VI0, i0, 0 //1
addi.d i0, i0, 1 addi.d i0, i0, 1
vinsgr2vr.d VI0, i0, 1 //2 vinsgr2vr.d VI0, i0, 1 //2
srai.d I, N, 2
bge $r0, I, .L21 // N < 4
// Init VM0
ld.d t1, X, 0 * SIZE
ld.d t2, X, 1 * SIZE
add.d i1, X, INCX
ld.d t3, i1, 0 * SIZE
ld.d t4, i1, 1 * SIZE
add.d i1, i1, INCX
vinsgr2vr.d x1, t1, 0
vinsgr2vr.d x2, t2, 0
vinsgr2vr.d x1, t3, 1
vinsgr2vr.d x2, t4, 1
vfmul.d x3, VI4, x1
vfmul.d x4, VI4, x2
vfcmp.clt.d VT0, x1, VI3
vfcmp.clt.d VINC8, x2, VI3
vbitsel.v x1, x1, x3, VT0
vbitsel.v x2, x2, x4, VINC8
vfadd.d VM0, x1, x2
#else #else
addi.w i0, i0, 1 addi.w i0, i0, 1
srai.d I, N, 2
bge $r0, I, .L21 // Init index
slli.w i0, i0, 2 //4 slli.w i0, i0, 2 //4
vreplgr2vr.w VINC4, i0 vreplgr2vr.w VINC4, i0
addi.w i0, i0, -7 addi.w i0, i0, -7
@ -240,6 +293,36 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
vinsgr2vr.w VI0, i0, 2 //3 vinsgr2vr.w VI0, i0, 2 //3
addi.w i0, i0, 1 addi.w i0, i0, 1
vinsgr2vr.w VI0, i0, 3 //4 vinsgr2vr.w VI0, i0, 3 //4
srai.d I, N, 2
bge $r0, I, .L21 // N < 4
// Init VM0
ld.w t1, X, 0 * SIZE
ld.w t2, X, 1 * SIZE
add.d i1, X, INCX
ld.w t3, i1, 0 * SIZE
ld.w t4, i1, 1 * SIZE
add.d i1, i1, INCX
vinsgr2vr.w x1, t1, 0
vinsgr2vr.w x2, t2, 0
vinsgr2vr.w x1, t3, 1
vinsgr2vr.w x2, t4, 1
ld.w t1, i1, 0 * SIZE
ld.w t2, i1, 1 * SIZE
add.d i1, i1, INCX
ld.w t3, i1, 0 * SIZE
ld.w t4, i1, 1 * SIZE
add.d i1, i1, INCX
vinsgr2vr.w x1, t1, 2
vinsgr2vr.w x2, t2, 2
vinsgr2vr.w x1, t3, 3
vinsgr2vr.w x2, t4, 3
vfcmp.clt.s VT0, x1, VI3
vfcmp.clt.s VINC8, x2, VI3
vbitsel.v x1, x1, x3, VT0
vbitsel.v x2, x2, x4, VINC8
vfadd.s VM0, x1, x2
#endif #endif
.align 3 .align 3
@ -300,8 +383,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
vinsgr2vr.w x2, t2, 2 vinsgr2vr.w x2, t2, 2
vinsgr2vr.w x1, t3, 3 vinsgr2vr.w x1, t3, 3
vinsgr2vr.w x2, t4, 3 vinsgr2vr.w x2, t4, 3
vpickev.w x1, VX1, VX0
vpickod.w x2, VX1, VX0
#endif #endif
addi.d I, I, -1 addi.d I, I, -1
VFMUL x3, VI4, x1 VFMUL x3, VI4, x1
@ -358,12 +439,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#ifdef DOUBLE #ifdef DOUBLE
vfmina.d VM0, x1, x2 vfmina.d VM0, x1, x2
vfcmp.ceq.d VT0, x1, VM0 vfcmp.ceq.d VT0, x1, VM0
vbitsel.v VI0, VI2, VI1, VT0
#else #else
fcmp.ceq.d $fcc0, $f15, $f10 fcmp.ceq.d $fcc0, $f15, $f10
bceqz $fcc0, .L27 bceqz $fcc0, .L27
vfcmp.clt.s VT0, VI2, VI0 vfcmp.clt.s VT0, VI2, VI0
#endif
vbitsel.v VI0, VI0, VI2, VT0 vbitsel.v VI0, VI0, VI2, VT0
#endif
.align 3 .align 3
.L27: .L27: