loongarch: Fixed i{s/c/z}amin LASX opt

This commit is contained in:
gxw 2024-03-14 20:32:39 +08:00
parent 7d755912b9
commit 6159cffc58
2 changed files with 141 additions and 39 deletions

View File

@ -144,7 +144,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
xvfmina.d VM1, VM0, VM1 xvfmina.d VM1, VM0, VM1
#else #else
addi.d I, I, -1 addi.d I, I, -1
xvadd.w VI2, VI1, VINC8 xvadd.w VI1, VI1, VINC8
xvor.v VI2, VI1, VI1
xvfmina.s VM1, VX0, VM0 xvfmina.s VM1, VX0, VM0
#endif #endif
XVCMPEQ VT0, VM0, VM1 XVCMPEQ VT0, VM0, VM1
@ -189,6 +190,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
XVFMINA VM0, VM0, VM1 XVFMINA VM0, VM0, VM1
XVCMPEQ VT0, VM0, VM1 XVCMPEQ VT0, VM0, VM1
xvbitsel.v VI0, VINC8, VINC4, VT0 xvbitsel.v VI0, VINC8, VINC4, VT0
// $f9: x1
fcmp.ceq.d $fcc0, $f15, $f9 fcmp.ceq.d $fcc0, $f15, $f9
bceqz $fcc0, .L26 bceqz $fcc0, .L26
XVCMPLT VT0, VI1, VI0 XVCMPLT VT0, VI1, VI0
@ -357,7 +359,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
xvinsgr2vr.w VX0, t2, 5 xvinsgr2vr.w VX0, t2, 5
xvinsgr2vr.w VX0, t3, 6 xvinsgr2vr.w VX0, t3, 6
xvinsgr2vr.w VX0, t4, 7 xvinsgr2vr.w VX0, t4, 7
xvadd.w VI2, VI1, VINC8 xvadd.w VI1, VI1, VINC8
xvor.v VI2, VI1, VI1
xvfmina.s VM1, VX0, VM0 xvfmina.s VM1, VX0, VM0
xvfcmp.ceq.s VT0, VM1, VM0 xvfcmp.ceq.s VT0, VM1, VM0
#endif #endif
@ -393,7 +396,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
movfr2gr.d i0, $f20 movfr2gr.d i0, $f20
.align 3 .align 3
#else #else
fmov.s $f16, $f20 fmov.s $f7, $f20
.align 3 .align 3
.L252: .L252:
@ -449,9 +452,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
.L292: .L292:
xvfmina.s VM0, VX0, VM0 xvfmina.s VM0, VX0, VM0
xvfcmp.ceq.s VT0, VM0, VX0 xvfcmp.ceq.s VT0, VM0, VX0
xvbitsel.v VI0, VI0, VI1, VT0 xvbitsel.v VI0, VI0, $xr7, VT0
movfr2gr.s i0, $f20 movfr2gr.s i0, $f20
#endif #endif
.L21: // N<8 .L21: // N<8

View File

@ -72,12 +72,25 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
FABS a1, a1 FABS a1, a1
ADD s1, a1, a0 ADD s1, a1, a0
#ifdef DOUBLE #ifdef DOUBLE
xvreplve0.d VM0, VM0
xvxor.v VI3, VI3, VI3 // 0 xvxor.v VI3, VI3, VI3 // 0
li.d I, -1 li.d I, -1
xvreplgr2vr.d VI4, I xvreplgr2vr.d VI4, I
xvffint.d.l VI4, VI4 // -1 xvffint.d.l VI4, VI4 // -1
bne INCX, TEMP, .L20 bne INCX, TEMP, .L20
// Init VM0
xvreplve0.d VM0, VM0
xvld VX0, X, 0 * SIZE
xvld VX1, X, 4 * SIZE
xvpickev.d x1, VX1, VX0
xvpickod.d x2, VX1, VX0
xvfmul.d x3, VI4, x1
xvfmul.d x4, VI4, x2
xvfcmp.clt.d VT0, x1, VI3
xvfcmp.clt.d VINC8, x2, VI3
xvbitsel.v x1, x1, x3, VT0
xvbitsel.v x2, x2, x4, VINC8
xvfadd.d VM0, x1, x2
addi.d i0, i0, 1 addi.d i0, i0, 1
srai.d I, N, 2 srai.d I, N, 2
bge $r0, I, .L21 bge $r0, I, .L21
@ -100,12 +113,24 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
addi.d i0, i0, 2 addi.d i0, i0, 2
xvinsgr2vr.d VI0, i0, 3 //4 xvinsgr2vr.d VI0, i0, 3 //4
#else #else
xvreplve0.w VM0, VM0
xvxor.v VI3, VI3, VI3 // 0 xvxor.v VI3, VI3, VI3 // 0
li.w I, -1 li.w I, -1
xvreplgr2vr.w VI4, I xvreplgr2vr.w VI4, I
xvffint.s.w VI4, VI4 // -1 xvffint.s.w VI4, VI4 // -1
bne INCX, TEMP, .L20 bne INCX, TEMP, .L20
// Init VM0
xvld VX0, X, 0 * SIZE
xvld VX1, X, 8 * SIZE
xvpickev.w x1, VX1, VX0
xvpickod.w x2, VX1, VX0
xvfmul.s x3, VI4, x1
xvfmul.s x4, VI4, x2
xvfcmp.clt.s VT0, x1, VI3
xvfcmp.clt.s VINC4, x2, VI3
xvbitsel.v x1, x1, x3, VT0
xvbitsel.v x2, x2, x4, VINC4
xvfadd.s VM0, x1, x2
addi.w i0, i0, 1 addi.w i0, i0, 1
srai.d I, N, 3 srai.d I, N, 3
bge $r0, I, .L21 bge $r0, I, .L21
@ -160,6 +185,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
xvfcmp.clt.d VINC8, x2, VI3 xvfcmp.clt.d VINC8, x2, VI3
xvbitsel.v x1, x1, x3, VT0 xvbitsel.v x1, x1, x3, VT0
xvbitsel.v x2, x2, x4, VINC8 xvbitsel.v x2, x2, x4, VINC8
addi.d X, X, 8 * SIZE
#else #else
xvadd.w VI1, VI1, VINC8 xvadd.w VI1, VI1, VINC8
xvld VX1, X, 8 * SIZE xvld VX1, X, 8 * SIZE
@ -172,11 +198,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
xvfcmp.clt.s VINC4, x2, VI3 xvfcmp.clt.s VINC4, x2, VI3
xvbitsel.v x1, x1, x3, VT0 xvbitsel.v x1, x1, x3, VT0
xvbitsel.v x2, x2, x4, VINC4 xvbitsel.v x2, x2, x4, VINC4
addi.d X, X, 16 * SIZE
#endif #endif
XVFADD x1, x1, x2 XVFADD x1, x1, x2
XVFMIN x3, VM0, x1 XVFMIN x3, VM0, x1
XVCMPEQ VT0, x3, VM0 XVCMPEQ VT0, x3, VM0
addi.d X, X, 8 * SIZE
xvbitsel.v VM0, x3, VM0, VT0 xvbitsel.v VM0, x3, VM0, VT0
xvbitsel.v VI0, VI1, VI0, VT0 xvbitsel.v VI0, VI1, VI0, VT0
blt $r0, I, .L10 blt $r0, I, .L10
@ -214,13 +240,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
xvpickve.w x2, VM0, 1 xvpickve.w x2, VM0, 1
xvpickve.w x3, VM0, 2 xvpickve.w x3, VM0, 2
xvpickve.w x4, VM0, 3 xvpickve.w x4, VM0, 3
xvfcmp.clt.s VT0, x1, x2 xvfcmp.clt.s VT0, x2, x1
xvbitsel.v VM1, x1, x2, VT0 xvbitsel.v VM1, x1, x2, VT0
xvbitsel.v VINC4, VI1, VI2, VT0 xvbitsel.v VINC4, VI1, VI2, VT0
xvfcmp.clt.s VT0, x3, x4 xvfcmp.clt.s VT0, x4, x3
xvbitsel.v VM0, x3, x4, VT0 xvbitsel.v VM0, x3, x4, VT0
xvbitsel.v VINC8, VI3, VI4, VT0 xvbitsel.v VINC8, VI3, VI4, VT0
xvfcmp.clt.s VT0, VM0, VM1 xvfcmp.clt.s VT0, VM1, VM0
xvbitsel.v VM0, VM0, VM1, VT0 xvbitsel.v VM0, VM0, VM1, VT0
xvbitsel.v VI0, VINC8, VINC4, VT0 xvbitsel.v VI0, VINC8, VINC4, VT0
#endif #endif
@ -233,6 +259,34 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
.L20: // INCX!=1 .L20: // INCX!=1
#ifdef DOUBLE #ifdef DOUBLE
// Init VM0
ld.d t1, X, 0 * SIZE
ld.d t2, X, 1 * SIZE
add.d i1, X, INCX
ld.d t3, i1, 0 * SIZE
ld.d t4, i1, 1 * SIZE
add.d i1, i1, INCX
xvinsgr2vr.d x1, t1, 0
xvinsgr2vr.d x2, t2, 0
xvinsgr2vr.d x1, t3, 1
xvinsgr2vr.d x2, t4, 1
ld.d t1, i1, 0 * SIZE
ld.d t2, i1, 1 * SIZE
add.d i1, i1, INCX
ld.d t3, i1, 0 * SIZE
ld.d t4, i1, 1 * SIZE
xvinsgr2vr.d x1, t1, 2
xvinsgr2vr.d x2, t2, 2
xvinsgr2vr.d x1, t3, 3
xvinsgr2vr.d x2, t4, 3
xvfmul.d x3, VI4, x1
xvfmul.d x4, VI4, x2
xvfcmp.clt.d VT0, x1, VI3
xvfcmp.clt.d VINC8, x2, VI3
xvbitsel.v x1, x1, x3, VT0
xvbitsel.v x2, x2, x4, VINC8
xvfadd.d VM0, x1, x2
addi.d i0, i0, 1 addi.d i0, i0, 1
srai.d I, N, 2 srai.d I, N, 2
bge $r0, I, .L21 bge $r0, I, .L21
@ -240,21 +294,70 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
xvreplgr2vr.d VINC4, i0 xvreplgr2vr.d VINC4, i0
addi.d i0, i0, -7 addi.d i0, i0, -7
xvinsgr2vr.d VI1, i0, 0 //initialize the index value for vectorization xvinsgr2vr.d VI1, i0, 0 //initialize the index value for vectorization
addi.d i0, i0, 2 addi.d i0, i0, 1
xvinsgr2vr.d VI1, i0, 1 xvinsgr2vr.d VI1, i0, 1
addi.d i0, i0, -1 addi.d i0, i0, 1
xvinsgr2vr.d VI1, i0, 2 xvinsgr2vr.d VI1, i0, 2
addi.d i0, i0, 2 addi.d i0, i0, 1
xvinsgr2vr.d VI1, i0, 3 xvinsgr2vr.d VI1, i0, 3
addi.d i0, i0, 1 addi.d i0, i0, 1
xvinsgr2vr.d VI0, i0, 0 //1 xvinsgr2vr.d VI0, i0, 0 //1
addi.d i0, i0, 2 addi.d i0, i0, 1
xvinsgr2vr.d VI0, i0, 1 //3 xvinsgr2vr.d VI0, i0, 1 //2
addi.d i0, i0, -1 addi.d i0, i0, 1
xvinsgr2vr.d VI0, i0, 2 //2 xvinsgr2vr.d VI0, i0, 2 //3
addi.d i0, i0, 2 addi.d i0, i0, 1
xvinsgr2vr.d VI0, i0, 3 //4 xvinsgr2vr.d VI0, i0, 3 //4
#else #else
// Init VM0
ld.w t1, X, 0 * SIZE
ld.w t2, X, 1 * SIZE
add.d i1, X, INCX
ld.w t3, i1, 0 * SIZE
ld.w t4, i1, 1 * SIZE
add.d i1, i1, INCX
xvinsgr2vr.w x1, t1, 0
xvinsgr2vr.w x2, t2, 0
xvinsgr2vr.w x1, t3, 1
xvinsgr2vr.w x2, t4, 1
ld.w t1, i1, 0 * SIZE
ld.w t2, i1, 1 * SIZE
add.d i1, i1, INCX
ld.w t3, i1, 0 * SIZE
ld.w t4, i1, 1 * SIZE
add.d i1, i1, INCX
xvinsgr2vr.w x1, t1, 2
xvinsgr2vr.w x2, t2, 2
xvinsgr2vr.w x1, t3, 3
xvinsgr2vr.w x2, t4, 3
ld.w t1, i1, 0 * SIZE
ld.w t2, i1, 1 * SIZE
add.d i1, i1, INCX
ld.w t3, i1, 0 * SIZE
ld.w t4, i1, 1 * SIZE
add.d i1, i1, INCX
xvinsgr2vr.w x1, t1, 4
xvinsgr2vr.w x2, t2, 4
xvinsgr2vr.w x1, t3, 5
xvinsgr2vr.w x2, t4, 5
ld.w t1, i1, 0 * SIZE
ld.w t2, i1, 1 * SIZE
add.d i1, i1, INCX
ld.w t3, i1, 0 * SIZE
ld.w t4, i1, 1 * SIZE
add.d i1, i1, INCX
xvinsgr2vr.w x1, t1, 6
xvinsgr2vr.w x2, t2, 6
xvinsgr2vr.w x1, t3, 7
xvinsgr2vr.w x2, t4, 7
xvfmul.s x3, VI4, x1
xvfmul.s x4, VI4, x2
xvfcmp.clt.s VT0, x1, VI3
xvfcmp.clt.s VINC8, x2, VI3
xvbitsel.v x1, x1, x3, VT0
xvbitsel.v x2, x2, x4, VINC8
xvfadd.s VM0, x1, x2
addi.w i0, i0, 1 addi.w i0, i0, 1
srai.d I, N, 3 srai.d I, N, 3
bge $r0, I, .L21 bge $r0, I, .L21
@ -264,15 +367,15 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
xvinsgr2vr.w VI1, i0, 0 //initialize the index value for vectorization xvinsgr2vr.w VI1, i0, 0 //initialize the index value for vectorization
addi.w i0, i0, 1 addi.w i0, i0, 1
xvinsgr2vr.w VI1, i0, 1 xvinsgr2vr.w VI1, i0, 1
addi.w i0, i0, 3 addi.w i0, i0, 1
xvinsgr2vr.w VI1, i0, 2 xvinsgr2vr.w VI1, i0, 2
addi.w i0, i0, 1 addi.w i0, i0, 1
xvinsgr2vr.w VI1, i0, 3 xvinsgr2vr.w VI1, i0, 3
addi.w i0, i0, -3 addi.w i0, i0, 1
xvinsgr2vr.w VI1, i0, 4 xvinsgr2vr.w VI1, i0, 4
addi.w i0, i0, 1 addi.w i0, i0, 1
xvinsgr2vr.w VI1, i0, 5 xvinsgr2vr.w VI1, i0, 5
addi.w i0, i0, 3 addi.w i0, i0, 1
xvinsgr2vr.w VI1, i0, 6 xvinsgr2vr.w VI1, i0, 6
addi.w i0, i0, 1 addi.w i0, i0, 1
xvinsgr2vr.w VI1, i0, 7 xvinsgr2vr.w VI1, i0, 7
@ -280,15 +383,15 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
xvinsgr2vr.w VI0, i0, 0 //1 xvinsgr2vr.w VI0, i0, 0 //1
addi.w i0, i0, 1 addi.w i0, i0, 1
xvinsgr2vr.w VI0, i0, 1 //2 xvinsgr2vr.w VI0, i0, 1 //2
addi.w i0, i0, 3
xvinsgr2vr.w VI0, i0, 2 //5
addi.w i0, i0, 1 addi.w i0, i0, 1
xvinsgr2vr.w VI0, i0, 3 //6 xvinsgr2vr.w VI0, i0, 2 //3
addi.w i0, i0, -3 addi.w i0, i0, 1
xvinsgr2vr.w VI0, i0, 4 //3 xvinsgr2vr.w VI0, i0, 3 //4
addi.w i0, i0, 1
xvinsgr2vr.w VI0, i0, 4 //5
addi.w i0, i0, 1
xvinsgr2vr.w VI0, i0, 5 //6
addi.w i0, i0, 1 addi.w i0, i0, 1
xvinsgr2vr.w VI0, i0, 5 //4
addi.w i0, i0, 3
xvinsgr2vr.w VI0, i0, 6 //7 xvinsgr2vr.w VI0, i0, 6 //7
addi.w i0, i0, 1 addi.w i0, i0, 1
xvinsgr2vr.w VI0, i0, 7 //8 xvinsgr2vr.w VI0, i0, 7 //8
@ -350,7 +453,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
xvinsgr2vr.w x2, t2, 4 xvinsgr2vr.w x2, t2, 4
xvinsgr2vr.w x1, t3, 5 xvinsgr2vr.w x1, t3, 5
xvinsgr2vr.w x2, t4, 5 xvinsgr2vr.w x2, t4, 5
xvadd.w VI1, VI1, VINC8
ld.w t1, X, 0 * SIZE ld.w t1, X, 0 * SIZE
ld.w t2, X, 1 * SIZE ld.w t2, X, 1 * SIZE
add.d X, X, INCX add.d X, X, INCX
@ -361,8 +463,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
xvinsgr2vr.w x2, t2, 6 xvinsgr2vr.w x2, t2, 6
xvinsgr2vr.w x1, t3, 7 xvinsgr2vr.w x1, t3, 7
xvinsgr2vr.w x2, t4, 7 xvinsgr2vr.w x2, t4, 7
xvpickev.w x1, VX1, VX0
xvpickod.w x2, VX1, VX0
#endif #endif
addi.d I, I, -1 addi.d I, I, -1
XVFMUL x3, VI4, x1 XVFMUL x3, VI4, x1
@ -410,13 +510,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
xvpickve.w x2, VM0, 1 xvpickve.w x2, VM0, 1
xvpickve.w x3, VM0, 2 xvpickve.w x3, VM0, 2
xvpickve.w x4, VM0, 3 xvpickve.w x4, VM0, 3
xvfcmp.clt.s VT0, x1, x2 xvfcmp.clt.s VT0, x2, x1
xvbitsel.v VM1, x1, x2, VT0 xvbitsel.v VM1, x1, x2, VT0
xvbitsel.v VINC4, VI1, VI2, VT0 xvbitsel.v VINC4, VI1, VI2, VT0
xvfcmp.clt.s VT0, x3, x4 xvfcmp.clt.s VT0, x4, x3
xvbitsel.v VM0, x3, x4, VT0 xvbitsel.v VM0, x3, x4, VT0
xvbitsel.v VINC8, VI3, VI4, VT0 xvbitsel.v VINC8, VI3, VI4, VT0
xvfcmp.clt.s VT0, VM0, VM1 xvfcmp.clt.s VT0, VM1, VM0
xvbitsel.v VM0, VM0, VM1, VT0 xvbitsel.v VM0, VM0, VM1, VT0
#endif #endif
xvbitsel.v VI0, VINC8, VINC4, VT0 xvbitsel.v VI0, VINC8, VINC4, VT0
@ -475,13 +575,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
xvpickve.w x2, VM0, 5 xvpickve.w x2, VM0, 5
xvpickve.w x3, VM0, 6 xvpickve.w x3, VM0, 6
xvpickve.w x4, VM0, 7 xvpickve.w x4, VM0, 7
xvfcmp.clt.s VT0, x1, x2 xvfcmp.clt.s VT0, x2, x1
xvbitsel.v x1, x1, x2, VT0 xvbitsel.v x1, x1, x2, VT0
xvbitsel.v VINC4, VI1, VI2, VT0 xvbitsel.v VINC4, VI1, VI2, VT0
xvfcmp.clt.s VT0, x3, x4 xvfcmp.clt.s VT0, x4, x3
xvbitsel.v VM0, x3, x4, VT0 xvbitsel.v VM0, x3, x4, VT0
xvbitsel.v VINC8, VI3, VI4, VT0 xvbitsel.v VINC8, VI3, VI4, VT0
xvfcmp.clt.s VT0, VM0, x1 xvfcmp.clt.s VT0, x1, VM0
xvbitsel.v VM0, VM0, x1, VT0 xvbitsel.v VM0, VM0, x1, VT0
xvbitsel.v VI0, VINC8, VINC4, VT0 xvbitsel.v VI0, VINC8, VINC4, VT0
fcmp.ceq.d $fcc0, $f15, $f9 fcmp.ceq.d $fcc0, $f15, $f9
@ -512,7 +612,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
.align 3 .align 3
.L292: .L292:
fcmp.clt.s $fcc0, $f15, $f13 fcmp.clt.s $fcc0, $f13, $f15
fsel $f15, $f15, $f13, $fcc0 fsel $f15, $f15, $f13, $fcc0
fsel $f20, $f20, $f16, $fcc0 fsel $f20, $f20, $f16, $fcc0
movfr2gr.s i0, $f20 movfr2gr.s i0, $f20