Merge pull request #4563 from XiWeiGu/loongarch_fix_lasx

Loongarch: Fixed LASX opt
This commit is contained in:
Martin Kroeker 2024-03-16 10:34:32 +01:00 committed by GitHub
commit 79cb121ab9
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
9 changed files with 205 additions and 59 deletions

View File

@ -160,8 +160,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
xvinsgr2vr.d VX1, t2, 1 xvinsgr2vr.d VX1, t2, 1
xvinsgr2vr.d VX1, t3, 2 xvinsgr2vr.d VX1, t3, 2
xvinsgr2vr.d VX1, t4, 3 xvinsgr2vr.d VX1, t4, 3
xvfmaxa.d VM1, VX0, VX1 xvfmina.d VM1, VX0, VX1
xvfmaxa.d VM0, VM0, VM1 xvfmina.d VM0, VM0, VM1
#else #else
ld.w t1, X, 0 ld.w t1, X, 0
add.d X, X, INCX add.d X, X, INCX
@ -187,7 +187,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
xvinsgr2vr.w VM1, t2, 5 xvinsgr2vr.w VM1, t2, 5
xvinsgr2vr.w VM1, t3, 6 xvinsgr2vr.w VM1, t3, 6
xvinsgr2vr.w VM1, t4, 7 xvinsgr2vr.w VM1, t4, 7
xvfmaxa.s VM0, VM0, VM1 xvfmina.s VM0, VM0, VM1
#endif #endif
addi.d I, I, -1 addi.d I, I, -1
blt $r0, I, .L21 blt $r0, I, .L21

View File

@ -139,9 +139,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
xvst VX1, Y, 4 * SIZE xvst VX1, Y, 4 * SIZE
#else #else
xvfmul.s VX0, VX0, VXA xvfmul.s VX0, VX0, VXA
addi.d I, I, -1
xvst VX0, Y, 0 * SIZE xvst VX0, Y, 0 * SIZE
#endif #endif
addi.d I, I, -1
addi.d X, X, 8 * SIZE addi.d X, X, 8 * SIZE
addi.d Y, Y, 8 * SIZE addi.d Y, Y, 8 * SIZE
blt $r0, I, .L112 blt $r0, I, .L112
@ -288,6 +288,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
addi.d X, X, 8 * SIZE addi.d X, X, 8 * SIZE
addi.d I, I, -1 addi.d I, I, -1
blt $r0, I, .L121 blt $r0, I, .L121
move Y, YY
b .L997 b .L997
.align 3 .align 3
@ -334,6 +335,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
add.d YY, YY, INCY add.d YY, YY, INCY
addi.d X, X, 8 * SIZE addi.d X, X, 8 * SIZE
blt $r0, I, .L122 blt $r0, I, .L122
move Y, YY
b .L997 b .L997
.align 3 .align 3
@ -425,6 +427,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
add.d YY, YY, INCY add.d YY, YY, INCY
addi.d I, I, -1 addi.d I, I, -1
blt $r0, I, .L123 blt $r0, I, .L123
move Y, YY
b .L997 b .L997
.align 3 .align 3
@ -465,6 +468,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
add.d YY, YY, INCY add.d YY, YY, INCY
addi.d I, I, -1 addi.d I, I, -1
blt $r0, I, .L124 blt $r0, I, .L124
move Y, YY
b .L997 b .L997
.align 3 .align 3
@ -803,6 +807,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#endif #endif
add.d YY, YY, INCY add.d YY, YY, INCY
blt $r0, I, .L221 blt $r0, I, .L221
move Y, YY
b .L997 b .L997
.align 3 .align 3
@ -895,6 +900,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#endif #endif
add.d YY, YY, INCY add.d YY, YY, INCY
blt $r0, I, .L222 blt $r0, I, .L222
move Y, YY
b .L997 b .L997
.align 3 .align 3
@ -987,6 +993,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#endif #endif
add.d YY, YY, INCY add.d YY, YY, INCY
blt $r0, I, .L223 blt $r0, I, .L223
move Y, YY
b .L997 b .L997
.align 3 .align 3
@ -1027,6 +1034,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
add.d YY, YY, INCY add.d YY, YY, INCY
addi.d I, I, -1 addi.d I, I, -1
blt $r0, I, .L224 blt $r0, I, .L224
move Y, YY
b .L997 b .L997
.align 3 .align 3

View File

@ -107,15 +107,27 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#ifdef DOUBLE #ifdef DOUBLE
xvpickve.d x1, VM0, 0 xvpickve.d x1, VM0, 0
xvpickve.d x2, VM0, 1 xvpickve.d x2, VM0, 1
XVFMAX VM0, x1, x2 XVFMAX VX0, x1, x2
xvpickve.d x1, VM0, 2
xvpickve.d x2, VM0, 3
XVFMAX VX1, x1, x2
XVFMAX VM0, VX0, VX1
#else #else
xvpickve.w x1, VM0, 0 xvpickve.w x1, VM0, 0
xvpickve.w x2, VM0, 1 xvpickve.w x2, VM0, 1
xvpickve.w x3, VM0, 2 xvpickve.w x3, VM0, 2
xvpickve.w x4, VM0, 3 xvpickve.w x4, VM0, 3
XVFMAX VX0, x1, x2
XVFMAX VX1, x3, x4
XVFMAX VX0, VX0, VX1
xvpickve.w x1, VM0, 4
xvpickve.w x2, VM0, 5
xvpickve.w x3, VM0, 6
xvpickve.w x4, VM0, 7
XVFMAX VM0, x1, x2 XVFMAX VM0, x1, x2
XVFMAX VM1, x3, x4 XVFMAX VM1, x3, x4
XVFMAX VM0, VM0, VM1 XVFMAX VM0, VM0, VM1
XVFMAX VM0, VM0, VX0
#endif #endif
b .L23 b .L23
.align 3 .align 3
@ -150,7 +162,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
FABS t4, t4 FABS t4, t4
ADD t1, t1, t2 ADD t1, t1, t2
ADD t3, t3, t4 ADD t3, t3, t4
FMAX s1, t1, t3 FMAX s2, t1, t3
LD t1, X, 0 * SIZE LD t1, X, 0 * SIZE
LD t2, X, 1 * SIZE LD t2, X, 1 * SIZE
add.d X, X, INCX add.d X, X, INCX
@ -178,13 +190,16 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
ADD t1, t1, t2 ADD t1, t1, t2
ADD t3, t3, t4 ADD t3, t3, t4
FMAX s4, t1, t3 FMAX s4, t1, t3
FMAX s1, s1, s2
FMAX s3, s3, s4
FMAX a0, a0, s3
FMAX a0, a0, s1
blt $r0, I, .L21 blt $r0, I, .L21
.align 3 .align 3
.L22: .L22:
FMAX s1, s1, s2 MOV s1, a0
FMAX s3, s3, s4
FMAX s1, s1, s3
.align 3 .align 3
.L23: //N<8 .L23: //N<8

View File

@ -116,15 +116,27 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#ifdef DOUBLE #ifdef DOUBLE
xvpickve.d x1, VM0, 0 xvpickve.d x1, VM0, 0
xvpickve.d x2, VM0, 1 xvpickve.d x2, VM0, 1
XVFMIN VM0, x1, x2 XVFMIN VX0, x1, x2
xvpickve.d x1, VM0, 2
xvpickve.d x2, VM0, 3
XVFMIN VX1, x1, x2
XVFMIN VM0, VX0, VX1
#else #else
xvpickve.w x1, VM0, 0 xvpickve.w x1, VM0, 0
xvpickve.w x2, VM0, 1 xvpickve.w x2, VM0, 1
xvpickve.w x3, VM0, 2 xvpickve.w x3, VM0, 2
xvpickve.w x4, VM0, 3 xvpickve.w x4, VM0, 3
XVFMIN VX0, x1, x2
XVFMIN VX1, x3, x4
XVFMIN VX0, VX0, VX1
xvpickve.w x1, VM0, 4
xvpickve.w x2, VM0, 5
xvpickve.w x3, VM0, 6
xvpickve.w x4, VM0, 7
XVFMIN VM0, x1, x2 XVFMIN VM0, x1, x2
XVFMIN VM1, x3, x4 XVFMIN VM1, x3, x4
XVFMIN VM0, VM0, VM1 XVFMIN VM0, VM0, VM1
XVFMIN VM0, VM0, VX0
#endif #endif
b .L23 b .L23
.align 3 .align 3
@ -159,7 +171,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
FABS t4, t4 FABS t4, t4
ADD t1, t1, t2 ADD t1, t1, t2
ADD t3, t3, t4 ADD t3, t3, t4
FMIN s1, t1, t3 FMIN s2, t1, t3
LD t1, X, 0 * SIZE LD t1, X, 0 * SIZE
LD t2, X, 1 * SIZE LD t2, X, 1 * SIZE
add.d X, X, INCX add.d X, X, INCX
@ -187,13 +199,16 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
ADD t1, t1, t2 ADD t1, t1, t2
ADD t3, t3, t4 ADD t3, t3, t4
FMIN s4, t1, t3 FMIN s4, t1, t3
FMIN s1, s1, s2
FMIN s3, s3, s4
FMIN a0, a0, s3
FMIN a0, a0, s1
blt $r0, I, .L21 blt $r0, I, .L21
.align 3 .align 3
.L22: .L22:
FMIN s1, s1, s2 MOV s1, a0
FMIN s3, s3, s4
FMIN s1, s1, s3
.align 3 .align 3
.L23: //N<8 .L23: //N<8

View File

@ -176,13 +176,13 @@
xvilvh.d VX3, x4, x3 xvilvh.d VX3, x4, x3
xvst VX2, Y, 0 * SIZE xvst VX2, Y, 0 * SIZE
xvst VX3, Y, 4 * SIZE xvst VX3, Y, 4 * SIZE
addi.d X, Y, 8 * SIZE addi.d Y, Y, 8 * SIZE
#else #else
xvilvl.w VX2, x4 ,x3 xvilvl.w VX2, x4 ,x3
xvilvh.w VX3, x4, x3 xvilvh.w VX3, x4, x3
xvst VX2, Y, 0 * SIZE xvst VX2, Y, 0 * SIZE
xvst VX3, Y, 8 * SIZE xvst VX3, Y, 8 * SIZE
addi.d X, Y, 16 * SIZE addi.d Y, Y, 16 * SIZE
#endif #endif
addi.d I, I, -1 addi.d I, I, -1
blt $r0, I, .L113 blt $r0, I, .L113
@ -617,6 +617,7 @@
xvstelm.d x4, YY, 1 * SIZE, 3 xvstelm.d x4, YY, 1 * SIZE, 3
add.d YY, YY, INCY add.d YY, YY, INCY
blt $r0, I, .L222 blt $r0, I, .L222
move Y, YY
b .L997 b .L997
.align 3 .align 3
#else #else
@ -691,6 +692,7 @@
xvstelm.w x4, YY, 1 * SIZE, 7 xvstelm.w x4, YY, 1 * SIZE, 7
add.d YY, YY, INCY add.d YY, YY, INCY
blt $r0, I, .L222 blt $r0, I, .L222
move Y, YY
b .L997 b .L997
.align 3 .align 3
#endif #endif
@ -1011,7 +1013,11 @@
#endif #endif
.L997: .L997:
#ifdef DOUBLE
andi I, N, 3
#else
andi I, N, 7 andi I, N, 7
#endif
bge $r0, I, .L999 bge $r0, I, .L999
.align 3 .align 3

View File

@ -104,7 +104,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
xvfadd.s res1, VX0, res1 xvfadd.s res1, VX0, res1
xvfadd.s res1, VX1, res1 xvfadd.s res1, VX1, res1
xvfadd.s res1, VX2, res1 xvfadd.s res1, VX2, res1
xvfadd.s res1, VX2, res1 xvfadd.s res1, VX3, res1
#endif #endif
.align 3 .align 3
@ -246,7 +246,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
xvfadd.s res1, VX0, res1 xvfadd.s res1, VX0, res1
xvfadd.s res1, VX1, res1 xvfadd.s res1, VX1, res1
xvfadd.s res1, VX2, res1 xvfadd.s res1, VX2, res1
xvfadd.s res1, VX2, res1 xvfadd.s res1, VX3, res1
#endif #endif
.align 3 .align 3

View File

@ -144,7 +144,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
xvfmina.d VM1, VM0, VM1 xvfmina.d VM1, VM0, VM1
#else #else
addi.d I, I, -1 addi.d I, I, -1
xvadd.w VI2, VI1, VINC8 xvadd.w VI1, VI1, VINC8
xvor.v VI2, VI1, VI1
xvfmina.s VM1, VX0, VM0 xvfmina.s VM1, VX0, VM0
#endif #endif
XVCMPEQ VT0, VM0, VM1 XVCMPEQ VT0, VM0, VM1
@ -189,6 +190,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
XVFMINA VM0, VM0, VM1 XVFMINA VM0, VM0, VM1
XVCMPEQ VT0, VM0, VM1 XVCMPEQ VT0, VM0, VM1
xvbitsel.v VI0, VINC8, VINC4, VT0 xvbitsel.v VI0, VINC8, VINC4, VT0
// $f9: x1
fcmp.ceq.d $fcc0, $f15, $f9 fcmp.ceq.d $fcc0, $f15, $f9
bceqz $fcc0, .L26 bceqz $fcc0, .L26
XVCMPLT VT0, VI1, VI0 XVCMPLT VT0, VI1, VI0
@ -357,7 +359,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
xvinsgr2vr.w VX0, t2, 5 xvinsgr2vr.w VX0, t2, 5
xvinsgr2vr.w VX0, t3, 6 xvinsgr2vr.w VX0, t3, 6
xvinsgr2vr.w VX0, t4, 7 xvinsgr2vr.w VX0, t4, 7
xvadd.w VI2, VI1, VINC8 xvadd.w VI1, VI1, VINC8
xvor.v VI2, VI1, VI1
xvfmina.s VM1, VX0, VM0 xvfmina.s VM1, VX0, VM0
xvfcmp.ceq.s VT0, VM1, VM0 xvfcmp.ceq.s VT0, VM1, VM0
#endif #endif
@ -393,7 +396,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
movfr2gr.d i0, $f20 movfr2gr.d i0, $f20
.align 3 .align 3
#else #else
fmov.s $f16, $f20 fmov.s $f7, $f20
.align 3 .align 3
.L252: .L252:
@ -449,9 +452,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
.L292: .L292:
xvfmina.s VM0, VX0, VM0 xvfmina.s VM0, VX0, VM0
xvfcmp.ceq.s VT0, VM0, VX0 xvfcmp.ceq.s VT0, VM0, VX0
xvbitsel.v VI0, VI0, VI1, VT0 xvbitsel.v VI0, VI0, $xr7, VT0
movfr2gr.s i0, $f20 movfr2gr.s i0, $f20
#endif #endif
.L21: // N<8 .L21: // N<8

View File

@ -72,12 +72,25 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
FABS a1, a1 FABS a1, a1
ADD s1, a1, a0 ADD s1, a1, a0
#ifdef DOUBLE #ifdef DOUBLE
xvreplve0.d VM0, VM0
xvxor.v VI3, VI3, VI3 // 0 xvxor.v VI3, VI3, VI3 // 0
li.d I, -1 li.d I, -1
xvreplgr2vr.d VI4, I xvreplgr2vr.d VI4, I
xvffint.d.l VI4, VI4 // -1 xvffint.d.l VI4, VI4 // -1
bne INCX, TEMP, .L20 bne INCX, TEMP, .L20
// Init VM0
xvreplve0.d VM0, VM0
xvld VX0, X, 0 * SIZE
xvld VX1, X, 4 * SIZE
xvpickev.d x1, VX1, VX0
xvpickod.d x2, VX1, VX0
xvfmul.d x3, VI4, x1
xvfmul.d x4, VI4, x2
xvfcmp.clt.d VT0, x1, VI3
xvfcmp.clt.d VINC8, x2, VI3
xvbitsel.v x1, x1, x3, VT0
xvbitsel.v x2, x2, x4, VINC8
xvfadd.d VM0, x1, x2
addi.d i0, i0, 1 addi.d i0, i0, 1
srai.d I, N, 2 srai.d I, N, 2
bge $r0, I, .L21 bge $r0, I, .L21
@ -100,12 +113,24 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
addi.d i0, i0, 2 addi.d i0, i0, 2
xvinsgr2vr.d VI0, i0, 3 //4 xvinsgr2vr.d VI0, i0, 3 //4
#else #else
xvreplve0.w VM0, VM0
xvxor.v VI3, VI3, VI3 // 0 xvxor.v VI3, VI3, VI3 // 0
li.w I, -1 li.w I, -1
xvreplgr2vr.w VI4, I xvreplgr2vr.w VI4, I
xvffint.s.w VI4, VI4 // -1 xvffint.s.w VI4, VI4 // -1
bne INCX, TEMP, .L20 bne INCX, TEMP, .L20
// Init VM0
xvld VX0, X, 0 * SIZE
xvld VX1, X, 8 * SIZE
xvpickev.w x1, VX1, VX0
xvpickod.w x2, VX1, VX0
xvfmul.s x3, VI4, x1
xvfmul.s x4, VI4, x2
xvfcmp.clt.s VT0, x1, VI3
xvfcmp.clt.s VINC4, x2, VI3
xvbitsel.v x1, x1, x3, VT0
xvbitsel.v x2, x2, x4, VINC4
xvfadd.s VM0, x1, x2
addi.w i0, i0, 1 addi.w i0, i0, 1
srai.d I, N, 3 srai.d I, N, 3
bge $r0, I, .L21 bge $r0, I, .L21
@ -160,6 +185,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
xvfcmp.clt.d VINC8, x2, VI3 xvfcmp.clt.d VINC8, x2, VI3
xvbitsel.v x1, x1, x3, VT0 xvbitsel.v x1, x1, x3, VT0
xvbitsel.v x2, x2, x4, VINC8 xvbitsel.v x2, x2, x4, VINC8
addi.d X, X, 8 * SIZE
#else #else
xvadd.w VI1, VI1, VINC8 xvadd.w VI1, VI1, VINC8
xvld VX1, X, 8 * SIZE xvld VX1, X, 8 * SIZE
@ -172,11 +198,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
xvfcmp.clt.s VINC4, x2, VI3 xvfcmp.clt.s VINC4, x2, VI3
xvbitsel.v x1, x1, x3, VT0 xvbitsel.v x1, x1, x3, VT0
xvbitsel.v x2, x2, x4, VINC4 xvbitsel.v x2, x2, x4, VINC4
addi.d X, X, 16 * SIZE
#endif #endif
XVFADD x1, x1, x2 XVFADD x1, x1, x2
XVFMIN x3, VM0, x1 XVFMIN x3, VM0, x1
XVCMPEQ VT0, x3, VM0 XVCMPEQ VT0, x3, VM0
addi.d X, X, 8 * SIZE
xvbitsel.v VM0, x3, VM0, VT0 xvbitsel.v VM0, x3, VM0, VT0
xvbitsel.v VI0, VI1, VI0, VT0 xvbitsel.v VI0, VI1, VI0, VT0
blt $r0, I, .L10 blt $r0, I, .L10
@ -214,13 +240,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
xvpickve.w x2, VM0, 1 xvpickve.w x2, VM0, 1
xvpickve.w x3, VM0, 2 xvpickve.w x3, VM0, 2
xvpickve.w x4, VM0, 3 xvpickve.w x4, VM0, 3
xvfcmp.clt.s VT0, x1, x2 xvfcmp.clt.s VT0, x2, x1
xvbitsel.v VM1, x1, x2, VT0 xvbitsel.v VM1, x1, x2, VT0
xvbitsel.v VINC4, VI1, VI2, VT0 xvbitsel.v VINC4, VI1, VI2, VT0
xvfcmp.clt.s VT0, x3, x4 xvfcmp.clt.s VT0, x4, x3
xvbitsel.v VM0, x3, x4, VT0 xvbitsel.v VM0, x3, x4, VT0
xvbitsel.v VINC8, VI3, VI4, VT0 xvbitsel.v VINC8, VI3, VI4, VT0
xvfcmp.clt.s VT0, VM0, VM1 xvfcmp.clt.s VT0, VM1, VM0
xvbitsel.v VM0, VM0, VM1, VT0 xvbitsel.v VM0, VM0, VM1, VT0
xvbitsel.v VI0, VINC8, VINC4, VT0 xvbitsel.v VI0, VINC8, VINC4, VT0
#endif #endif
@ -233,6 +259,34 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
.L20: // INCX!=1 .L20: // INCX!=1
#ifdef DOUBLE #ifdef DOUBLE
// Init VM0
ld.d t1, X, 0 * SIZE
ld.d t2, X, 1 * SIZE
add.d i1, X, INCX
ld.d t3, i1, 0 * SIZE
ld.d t4, i1, 1 * SIZE
add.d i1, i1, INCX
xvinsgr2vr.d x1, t1, 0
xvinsgr2vr.d x2, t2, 0
xvinsgr2vr.d x1, t3, 1
xvinsgr2vr.d x2, t4, 1
ld.d t1, i1, 0 * SIZE
ld.d t2, i1, 1 * SIZE
add.d i1, i1, INCX
ld.d t3, i1, 0 * SIZE
ld.d t4, i1, 1 * SIZE
xvinsgr2vr.d x1, t1, 2
xvinsgr2vr.d x2, t2, 2
xvinsgr2vr.d x1, t3, 3
xvinsgr2vr.d x2, t4, 3
xvfmul.d x3, VI4, x1
xvfmul.d x4, VI4, x2
xvfcmp.clt.d VT0, x1, VI3
xvfcmp.clt.d VINC8, x2, VI3
xvbitsel.v x1, x1, x3, VT0
xvbitsel.v x2, x2, x4, VINC8
xvfadd.d VM0, x1, x2
addi.d i0, i0, 1 addi.d i0, i0, 1
srai.d I, N, 2 srai.d I, N, 2
bge $r0, I, .L21 bge $r0, I, .L21
@ -240,21 +294,70 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
xvreplgr2vr.d VINC4, i0 xvreplgr2vr.d VINC4, i0
addi.d i0, i0, -7 addi.d i0, i0, -7
xvinsgr2vr.d VI1, i0, 0 //initialize the index value for vectorization xvinsgr2vr.d VI1, i0, 0 //initialize the index value for vectorization
addi.d i0, i0, 2 addi.d i0, i0, 1
xvinsgr2vr.d VI1, i0, 1 xvinsgr2vr.d VI1, i0, 1
addi.d i0, i0, -1 addi.d i0, i0, 1
xvinsgr2vr.d VI1, i0, 2 xvinsgr2vr.d VI1, i0, 2
addi.d i0, i0, 2 addi.d i0, i0, 1
xvinsgr2vr.d VI1, i0, 3 xvinsgr2vr.d VI1, i0, 3
addi.d i0, i0, 1 addi.d i0, i0, 1
xvinsgr2vr.d VI0, i0, 0 //1 xvinsgr2vr.d VI0, i0, 0 //1
addi.d i0, i0, 2 addi.d i0, i0, 1
xvinsgr2vr.d VI0, i0, 1 //3 xvinsgr2vr.d VI0, i0, 1 //2
addi.d i0, i0, -1 addi.d i0, i0, 1
xvinsgr2vr.d VI0, i0, 2 //2 xvinsgr2vr.d VI0, i0, 2 //3
addi.d i0, i0, 2 addi.d i0, i0, 1
xvinsgr2vr.d VI0, i0, 3 //4 xvinsgr2vr.d VI0, i0, 3 //4
#else #else
// Init VM0
ld.w t1, X, 0 * SIZE
ld.w t2, X, 1 * SIZE
add.d i1, X, INCX
ld.w t3, i1, 0 * SIZE
ld.w t4, i1, 1 * SIZE
add.d i1, i1, INCX
xvinsgr2vr.w x1, t1, 0
xvinsgr2vr.w x2, t2, 0
xvinsgr2vr.w x1, t3, 1
xvinsgr2vr.w x2, t4, 1
ld.w t1, i1, 0 * SIZE
ld.w t2, i1, 1 * SIZE
add.d i1, i1, INCX
ld.w t3, i1, 0 * SIZE
ld.w t4, i1, 1 * SIZE
add.d i1, i1, INCX
xvinsgr2vr.w x1, t1, 2
xvinsgr2vr.w x2, t2, 2
xvinsgr2vr.w x1, t3, 3
xvinsgr2vr.w x2, t4, 3
ld.w t1, i1, 0 * SIZE
ld.w t2, i1, 1 * SIZE
add.d i1, i1, INCX
ld.w t3, i1, 0 * SIZE
ld.w t4, i1, 1 * SIZE
add.d i1, i1, INCX
xvinsgr2vr.w x1, t1, 4
xvinsgr2vr.w x2, t2, 4
xvinsgr2vr.w x1, t3, 5
xvinsgr2vr.w x2, t4, 5
ld.w t1, i1, 0 * SIZE
ld.w t2, i1, 1 * SIZE
add.d i1, i1, INCX
ld.w t3, i1, 0 * SIZE
ld.w t4, i1, 1 * SIZE
add.d i1, i1, INCX
xvinsgr2vr.w x1, t1, 6
xvinsgr2vr.w x2, t2, 6
xvinsgr2vr.w x1, t3, 7
xvinsgr2vr.w x2, t4, 7
xvfmul.s x3, VI4, x1
xvfmul.s x4, VI4, x2
xvfcmp.clt.s VT0, x1, VI3
xvfcmp.clt.s VINC8, x2, VI3
xvbitsel.v x1, x1, x3, VT0
xvbitsel.v x2, x2, x4, VINC8
xvfadd.s VM0, x1, x2
addi.w i0, i0, 1 addi.w i0, i0, 1
srai.d I, N, 3 srai.d I, N, 3
bge $r0, I, .L21 bge $r0, I, .L21
@ -264,15 +367,15 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
xvinsgr2vr.w VI1, i0, 0 //initialize the index value for vectorization xvinsgr2vr.w VI1, i0, 0 //initialize the index value for vectorization
addi.w i0, i0, 1 addi.w i0, i0, 1
xvinsgr2vr.w VI1, i0, 1 xvinsgr2vr.w VI1, i0, 1
addi.w i0, i0, 3 addi.w i0, i0, 1
xvinsgr2vr.w VI1, i0, 2 xvinsgr2vr.w VI1, i0, 2
addi.w i0, i0, 1 addi.w i0, i0, 1
xvinsgr2vr.w VI1, i0, 3 xvinsgr2vr.w VI1, i0, 3
addi.w i0, i0, -3 addi.w i0, i0, 1
xvinsgr2vr.w VI1, i0, 4 xvinsgr2vr.w VI1, i0, 4
addi.w i0, i0, 1 addi.w i0, i0, 1
xvinsgr2vr.w VI1, i0, 5 xvinsgr2vr.w VI1, i0, 5
addi.w i0, i0, 3 addi.w i0, i0, 1
xvinsgr2vr.w VI1, i0, 6 xvinsgr2vr.w VI1, i0, 6
addi.w i0, i0, 1 addi.w i0, i0, 1
xvinsgr2vr.w VI1, i0, 7 xvinsgr2vr.w VI1, i0, 7
@ -280,15 +383,15 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
xvinsgr2vr.w VI0, i0, 0 //1 xvinsgr2vr.w VI0, i0, 0 //1
addi.w i0, i0, 1 addi.w i0, i0, 1
xvinsgr2vr.w VI0, i0, 1 //2 xvinsgr2vr.w VI0, i0, 1 //2
addi.w i0, i0, 3
xvinsgr2vr.w VI0, i0, 2 //5
addi.w i0, i0, 1 addi.w i0, i0, 1
xvinsgr2vr.w VI0, i0, 3 //6 xvinsgr2vr.w VI0, i0, 2 //3
addi.w i0, i0, -3 addi.w i0, i0, 1
xvinsgr2vr.w VI0, i0, 4 //3 xvinsgr2vr.w VI0, i0, 3 //4
addi.w i0, i0, 1
xvinsgr2vr.w VI0, i0, 4 //5
addi.w i0, i0, 1
xvinsgr2vr.w VI0, i0, 5 //6
addi.w i0, i0, 1 addi.w i0, i0, 1
xvinsgr2vr.w VI0, i0, 5 //4
addi.w i0, i0, 3
xvinsgr2vr.w VI0, i0, 6 //7 xvinsgr2vr.w VI0, i0, 6 //7
addi.w i0, i0, 1 addi.w i0, i0, 1
xvinsgr2vr.w VI0, i0, 7 //8 xvinsgr2vr.w VI0, i0, 7 //8
@ -350,7 +453,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
xvinsgr2vr.w x2, t2, 4 xvinsgr2vr.w x2, t2, 4
xvinsgr2vr.w x1, t3, 5 xvinsgr2vr.w x1, t3, 5
xvinsgr2vr.w x2, t4, 5 xvinsgr2vr.w x2, t4, 5
xvadd.w VI1, VI1, VINC8
ld.w t1, X, 0 * SIZE ld.w t1, X, 0 * SIZE
ld.w t2, X, 1 * SIZE ld.w t2, X, 1 * SIZE
add.d X, X, INCX add.d X, X, INCX
@ -361,8 +463,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
xvinsgr2vr.w x2, t2, 6 xvinsgr2vr.w x2, t2, 6
xvinsgr2vr.w x1, t3, 7 xvinsgr2vr.w x1, t3, 7
xvinsgr2vr.w x2, t4, 7 xvinsgr2vr.w x2, t4, 7
xvpickev.w x1, VX1, VX0
xvpickod.w x2, VX1, VX0
#endif #endif
addi.d I, I, -1 addi.d I, I, -1
XVFMUL x3, VI4, x1 XVFMUL x3, VI4, x1
@ -410,13 +510,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
xvpickve.w x2, VM0, 1 xvpickve.w x2, VM0, 1
xvpickve.w x3, VM0, 2 xvpickve.w x3, VM0, 2
xvpickve.w x4, VM0, 3 xvpickve.w x4, VM0, 3
xvfcmp.clt.s VT0, x1, x2 xvfcmp.clt.s VT0, x2, x1
xvbitsel.v VM1, x1, x2, VT0 xvbitsel.v VM1, x1, x2, VT0
xvbitsel.v VINC4, VI1, VI2, VT0 xvbitsel.v VINC4, VI1, VI2, VT0
xvfcmp.clt.s VT0, x3, x4 xvfcmp.clt.s VT0, x4, x3
xvbitsel.v VM0, x3, x4, VT0 xvbitsel.v VM0, x3, x4, VT0
xvbitsel.v VINC8, VI3, VI4, VT0 xvbitsel.v VINC8, VI3, VI4, VT0
xvfcmp.clt.s VT0, VM0, VM1 xvfcmp.clt.s VT0, VM1, VM0
xvbitsel.v VM0, VM0, VM1, VT0 xvbitsel.v VM0, VM0, VM1, VT0
#endif #endif
xvbitsel.v VI0, VINC8, VINC4, VT0 xvbitsel.v VI0, VINC8, VINC4, VT0
@ -475,13 +575,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
xvpickve.w x2, VM0, 5 xvpickve.w x2, VM0, 5
xvpickve.w x3, VM0, 6 xvpickve.w x3, VM0, 6
xvpickve.w x4, VM0, 7 xvpickve.w x4, VM0, 7
xvfcmp.clt.s VT0, x1, x2 xvfcmp.clt.s VT0, x2, x1
xvbitsel.v x1, x1, x2, VT0 xvbitsel.v x1, x1, x2, VT0
xvbitsel.v VINC4, VI1, VI2, VT0 xvbitsel.v VINC4, VI1, VI2, VT0
xvfcmp.clt.s VT0, x3, x4 xvfcmp.clt.s VT0, x4, x3
xvbitsel.v VM0, x3, x4, VT0 xvbitsel.v VM0, x3, x4, VT0
xvbitsel.v VINC8, VI3, VI4, VT0 xvbitsel.v VINC8, VI3, VI4, VT0
xvfcmp.clt.s VT0, VM0, x1 xvfcmp.clt.s VT0, x1, VM0
xvbitsel.v VM0, VM0, x1, VT0 xvbitsel.v VM0, VM0, x1, VT0
xvbitsel.v VI0, VINC8, VINC4, VT0 xvbitsel.v VI0, VINC8, VINC4, VT0
fcmp.ceq.d $fcc0, $f15, $f9 fcmp.ceq.d $fcc0, $f15, $f9
@ -512,7 +612,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
.align 3 .align 3
.L292: .L292:
fcmp.clt.s $fcc0, $f15, $f13 fcmp.clt.s $fcc0, $f13, $f15
fsel $f15, $f15, $f13, $fcc0 fsel $f15, $f15, $f13, $fcc0
fsel $f20, $f20, $f16, $fcc0 fsel $f20, $f20, $f16, $fcc0
movfr2gr.s i0, $f20 movfr2gr.s i0, $f20

View File

@ -96,7 +96,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
xvfadd.s res1, VX0, res1 xvfadd.s res1, VX0, res1
xvfadd.s res1, VX1, res1 xvfadd.s res1, VX1, res1
xvfadd.s res1, VX2, res1 xvfadd.s res1, VX2, res1
xvfadd.s res1, VX2, res1 xvfadd.s res1, VX3, res1
#endif #endif
.align 3 .align 3
@ -200,7 +200,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
xvfadd.s res1, VX0, res1 xvfadd.s res1, VX0, res1
xvfadd.s res1, VX1, res1 xvfadd.s res1, VX1, res1
xvfadd.s res1, VX2, res1 xvfadd.s res1, VX2, res1
xvfadd.s res1, VX2, res1 xvfadd.s res1, VX3, res1
#endif #endif
.align 3 .align 3