Merge pull request #4563 from XiWeiGu/loongarch_fix_lasx

Loongarch: Fixed LASX opt
This commit is contained in:
Martin Kroeker 2024-03-16 10:34:32 +01:00 committed by GitHub
commit 79cb121ab9
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
9 changed files with 205 additions and 59 deletions

View File

@ -160,8 +160,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
xvinsgr2vr.d VX1, t2, 1
xvinsgr2vr.d VX1, t3, 2
xvinsgr2vr.d VX1, t4, 3
xvfmaxa.d VM1, VX0, VX1
xvfmaxa.d VM0, VM0, VM1
xvfmina.d VM1, VX0, VX1
xvfmina.d VM0, VM0, VM1
#else
ld.w t1, X, 0
add.d X, X, INCX
@ -187,7 +187,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
xvinsgr2vr.w VM1, t2, 5
xvinsgr2vr.w VM1, t3, 6
xvinsgr2vr.w VM1, t4, 7
xvfmaxa.s VM0, VM0, VM1
xvfmina.s VM0, VM0, VM1
#endif
addi.d I, I, -1
blt $r0, I, .L21

View File

@ -139,9 +139,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
xvst VX1, Y, 4 * SIZE
#else
xvfmul.s VX0, VX0, VXA
addi.d I, I, -1
xvst VX0, Y, 0 * SIZE
#endif
addi.d I, I, -1
addi.d X, X, 8 * SIZE
addi.d Y, Y, 8 * SIZE
blt $r0, I, .L112
@ -288,6 +288,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
addi.d X, X, 8 * SIZE
addi.d I, I, -1
blt $r0, I, .L121
move Y, YY
b .L997
.align 3
@ -334,6 +335,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
add.d YY, YY, INCY
addi.d X, X, 8 * SIZE
blt $r0, I, .L122
move Y, YY
b .L997
.align 3
@ -425,6 +427,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
add.d YY, YY, INCY
addi.d I, I, -1
blt $r0, I, .L123
move Y, YY
b .L997
.align 3
@ -465,6 +468,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
add.d YY, YY, INCY
addi.d I, I, -1
blt $r0, I, .L124
move Y, YY
b .L997
.align 3
@ -803,6 +807,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#endif
add.d YY, YY, INCY
blt $r0, I, .L221
move Y, YY
b .L997
.align 3
@ -895,6 +900,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#endif
add.d YY, YY, INCY
blt $r0, I, .L222
move Y, YY
b .L997
.align 3
@ -987,6 +993,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#endif
add.d YY, YY, INCY
blt $r0, I, .L223
move Y, YY
b .L997
.align 3
@ -1027,6 +1034,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
add.d YY, YY, INCY
addi.d I, I, -1
blt $r0, I, .L224
move Y, YY
b .L997
.align 3

View File

@ -107,15 +107,27 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#ifdef DOUBLE
xvpickve.d x1, VM0, 0
xvpickve.d x2, VM0, 1
XVFMAX VM0, x1, x2
XVFMAX VX0, x1, x2
xvpickve.d x1, VM0, 2
xvpickve.d x2, VM0, 3
XVFMAX VX1, x1, x2
XVFMAX VM0, VX0, VX1
#else
xvpickve.w x1, VM0, 0
xvpickve.w x2, VM0, 1
xvpickve.w x3, VM0, 2
xvpickve.w x4, VM0, 3
XVFMAX VX0, x1, x2
XVFMAX VX1, x3, x4
XVFMAX VX0, VX0, VX1
xvpickve.w x1, VM0, 4
xvpickve.w x2, VM0, 5
xvpickve.w x3, VM0, 6
xvpickve.w x4, VM0, 7
XVFMAX VM0, x1, x2
XVFMAX VM1, x3, x4
XVFMAX VM0, VM0, VM1
XVFMAX VM0, VM0, VX0
#endif
b .L23
.align 3
@ -150,7 +162,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
FABS t4, t4
ADD t1, t1, t2
ADD t3, t3, t4
FMAX s1, t1, t3
FMAX s2, t1, t3
LD t1, X, 0 * SIZE
LD t2, X, 1 * SIZE
add.d X, X, INCX
@ -178,13 +190,16 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
ADD t1, t1, t2
ADD t3, t3, t4
FMAX s4, t1, t3
FMAX s1, s1, s2
FMAX s3, s3, s4
FMAX a0, a0, s3
FMAX a0, a0, s1
blt $r0, I, .L21
.align 3
.L22:
FMAX s1, s1, s2
FMAX s3, s3, s4
FMAX s1, s1, s3
MOV s1, a0
.align 3
.L23: //N<8

View File

@ -116,15 +116,27 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#ifdef DOUBLE
xvpickve.d x1, VM0, 0
xvpickve.d x2, VM0, 1
XVFMIN VM0, x1, x2
XVFMIN VX0, x1, x2
xvpickve.d x1, VM0, 2
xvpickve.d x2, VM0, 3
XVFMIN VX1, x1, x2
XVFMIN VM0, VX0, VX1
#else
xvpickve.w x1, VM0, 0
xvpickve.w x2, VM0, 1
xvpickve.w x3, VM0, 2
xvpickve.w x4, VM0, 3
XVFMIN VX0, x1, x2
XVFMIN VX1, x3, x4
XVFMIN VX0, VX0, VX1
xvpickve.w x1, VM0, 4
xvpickve.w x2, VM0, 5
xvpickve.w x3, VM0, 6
xvpickve.w x4, VM0, 7
XVFMIN VM0, x1, x2
XVFMIN VM1, x3, x4
XVFMIN VM0, VM0, VM1
XVFMIN VM0, VM0, VX0
#endif
b .L23
.align 3
@ -159,7 +171,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
FABS t4, t4
ADD t1, t1, t2
ADD t3, t3, t4
FMIN s1, t1, t3
FMIN s2, t1, t3
LD t1, X, 0 * SIZE
LD t2, X, 1 * SIZE
add.d X, X, INCX
@ -187,13 +199,16 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
ADD t1, t1, t2
ADD t3, t3, t4
FMIN s4, t1, t3
FMIN s1, s1, s2
FMIN s3, s3, s4
FMIN a0, a0, s3
FMIN a0, a0, s1
blt $r0, I, .L21
.align 3
.L22:
FMIN s1, s1, s2
FMIN s3, s3, s4
FMIN s1, s1, s3
MOV s1, a0
.align 3
.L23: //N<8

View File

@ -176,13 +176,13 @@
xvilvh.d VX3, x4, x3
xvst VX2, Y, 0 * SIZE
xvst VX3, Y, 4 * SIZE
addi.d X, Y, 8 * SIZE
addi.d Y, Y, 8 * SIZE
#else
xvilvl.w VX2, x4 ,x3
xvilvh.w VX3, x4, x3
xvst VX2, Y, 0 * SIZE
xvst VX3, Y, 8 * SIZE
addi.d X, Y, 16 * SIZE
addi.d Y, Y, 16 * SIZE
#endif
addi.d I, I, -1
blt $r0, I, .L113
@ -617,6 +617,7 @@
xvstelm.d x4, YY, 1 * SIZE, 3
add.d YY, YY, INCY
blt $r0, I, .L222
move Y, YY
b .L997
.align 3
#else
@ -691,6 +692,7 @@
xvstelm.w x4, YY, 1 * SIZE, 7
add.d YY, YY, INCY
blt $r0, I, .L222
move Y, YY
b .L997
.align 3
#endif
@ -1011,7 +1013,11 @@
#endif
.L997:
#ifdef DOUBLE
andi I, N, 3
#else
andi I, N, 7
#endif
bge $r0, I, .L999
.align 3

View File

@ -104,7 +104,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
xvfadd.s res1, VX0, res1
xvfadd.s res1, VX1, res1
xvfadd.s res1, VX2, res1
xvfadd.s res1, VX2, res1
xvfadd.s res1, VX3, res1
#endif
.align 3
@ -246,7 +246,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
xvfadd.s res1, VX0, res1
xvfadd.s res1, VX1, res1
xvfadd.s res1, VX2, res1
xvfadd.s res1, VX2, res1
xvfadd.s res1, VX3, res1
#endif
.align 3

View File

@ -144,7 +144,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
xvfmina.d VM1, VM0, VM1
#else
addi.d I, I, -1
xvadd.w VI2, VI1, VINC8
xvadd.w VI1, VI1, VINC8
xvor.v VI2, VI1, VI1
xvfmina.s VM1, VX0, VM0
#endif
XVCMPEQ VT0, VM0, VM1
@ -189,6 +190,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
XVFMINA VM0, VM0, VM1
XVCMPEQ VT0, VM0, VM1
xvbitsel.v VI0, VINC8, VINC4, VT0
// $f9: x1
fcmp.ceq.d $fcc0, $f15, $f9
bceqz $fcc0, .L26
XVCMPLT VT0, VI1, VI0
@ -357,7 +359,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
xvinsgr2vr.w VX0, t2, 5
xvinsgr2vr.w VX0, t3, 6
xvinsgr2vr.w VX0, t4, 7
xvadd.w VI2, VI1, VINC8
xvadd.w VI1, VI1, VINC8
xvor.v VI2, VI1, VI1
xvfmina.s VM1, VX0, VM0
xvfcmp.ceq.s VT0, VM1, VM0
#endif
@ -393,7 +396,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
movfr2gr.d i0, $f20
.align 3
#else
fmov.s $f16, $f20
fmov.s $f7, $f20
.align 3
.L252:
@ -449,9 +452,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
.L292:
xvfmina.s VM0, VX0, VM0
xvfcmp.ceq.s VT0, VM0, VX0
xvbitsel.v VI0, VI0, VI1, VT0
xvbitsel.v VI0, VI0, $xr7, VT0
movfr2gr.s i0, $f20
#endif
.L21: // N<8

View File

@ -72,12 +72,25 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
FABS a1, a1
ADD s1, a1, a0
#ifdef DOUBLE
xvreplve0.d VM0, VM0
xvxor.v VI3, VI3, VI3 // 0
li.d I, -1
xvreplgr2vr.d VI4, I
xvffint.d.l VI4, VI4 // -1
bne INCX, TEMP, .L20
// Init VM0
xvreplve0.d VM0, VM0
xvld VX0, X, 0 * SIZE
xvld VX1, X, 4 * SIZE
xvpickev.d x1, VX1, VX0
xvpickod.d x2, VX1, VX0
xvfmul.d x3, VI4, x1
xvfmul.d x4, VI4, x2
xvfcmp.clt.d VT0, x1, VI3
xvfcmp.clt.d VINC8, x2, VI3
xvbitsel.v x1, x1, x3, VT0
xvbitsel.v x2, x2, x4, VINC8
xvfadd.d VM0, x1, x2
addi.d i0, i0, 1
srai.d I, N, 2
bge $r0, I, .L21
@ -100,12 +113,24 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
addi.d i0, i0, 2
xvinsgr2vr.d VI0, i0, 3 //4
#else
xvreplve0.w VM0, VM0
xvxor.v VI3, VI3, VI3 // 0
li.w I, -1
xvreplgr2vr.w VI4, I
xvffint.s.w VI4, VI4 // -1
bne INCX, TEMP, .L20
// Init VM0
xvld VX0, X, 0 * SIZE
xvld VX1, X, 8 * SIZE
xvpickev.w x1, VX1, VX0
xvpickod.w x2, VX1, VX0
xvfmul.s x3, VI4, x1
xvfmul.s x4, VI4, x2
xvfcmp.clt.s VT0, x1, VI3
xvfcmp.clt.s VINC4, x2, VI3
xvbitsel.v x1, x1, x3, VT0
xvbitsel.v x2, x2, x4, VINC4
xvfadd.s VM0, x1, x2
addi.w i0, i0, 1
srai.d I, N, 3
bge $r0, I, .L21
@ -160,6 +185,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
xvfcmp.clt.d VINC8, x2, VI3
xvbitsel.v x1, x1, x3, VT0
xvbitsel.v x2, x2, x4, VINC8
addi.d X, X, 8 * SIZE
#else
xvadd.w VI1, VI1, VINC8
xvld VX1, X, 8 * SIZE
@ -172,11 +198,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
xvfcmp.clt.s VINC4, x2, VI3
xvbitsel.v x1, x1, x3, VT0
xvbitsel.v x2, x2, x4, VINC4
addi.d X, X, 16 * SIZE
#endif
XVFADD x1, x1, x2
XVFMIN x3, VM0, x1
XVCMPEQ VT0, x3, VM0
addi.d X, X, 8 * SIZE
xvbitsel.v VM0, x3, VM0, VT0
xvbitsel.v VI0, VI1, VI0, VT0
blt $r0, I, .L10
@ -214,13 +240,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
xvpickve.w x2, VM0, 1
xvpickve.w x3, VM0, 2
xvpickve.w x4, VM0, 3
xvfcmp.clt.s VT0, x1, x2
xvfcmp.clt.s VT0, x2, x1
xvbitsel.v VM1, x1, x2, VT0
xvbitsel.v VINC4, VI1, VI2, VT0
xvfcmp.clt.s VT0, x3, x4
xvfcmp.clt.s VT0, x4, x3
xvbitsel.v VM0, x3, x4, VT0
xvbitsel.v VINC8, VI3, VI4, VT0
xvfcmp.clt.s VT0, VM0, VM1
xvfcmp.clt.s VT0, VM1, VM0
xvbitsel.v VM0, VM0, VM1, VT0
xvbitsel.v VI0, VINC8, VINC4, VT0
#endif
@ -233,6 +259,34 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
.L20: // INCX!=1
#ifdef DOUBLE
// Init VM0
ld.d t1, X, 0 * SIZE
ld.d t2, X, 1 * SIZE
add.d i1, X, INCX
ld.d t3, i1, 0 * SIZE
ld.d t4, i1, 1 * SIZE
add.d i1, i1, INCX
xvinsgr2vr.d x1, t1, 0
xvinsgr2vr.d x2, t2, 0
xvinsgr2vr.d x1, t3, 1
xvinsgr2vr.d x2, t4, 1
ld.d t1, i1, 0 * SIZE
ld.d t2, i1, 1 * SIZE
add.d i1, i1, INCX
ld.d t3, i1, 0 * SIZE
ld.d t4, i1, 1 * SIZE
xvinsgr2vr.d x1, t1, 2
xvinsgr2vr.d x2, t2, 2
xvinsgr2vr.d x1, t3, 3
xvinsgr2vr.d x2, t4, 3
xvfmul.d x3, VI4, x1
xvfmul.d x4, VI4, x2
xvfcmp.clt.d VT0, x1, VI3
xvfcmp.clt.d VINC8, x2, VI3
xvbitsel.v x1, x1, x3, VT0
xvbitsel.v x2, x2, x4, VINC8
xvfadd.d VM0, x1, x2
addi.d i0, i0, 1
srai.d I, N, 2
bge $r0, I, .L21
@ -240,21 +294,70 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
xvreplgr2vr.d VINC4, i0
addi.d i0, i0, -7
xvinsgr2vr.d VI1, i0, 0 //initialize the index value for vectorization
addi.d i0, i0, 2
addi.d i0, i0, 1
xvinsgr2vr.d VI1, i0, 1
addi.d i0, i0, -1
addi.d i0, i0, 1
xvinsgr2vr.d VI1, i0, 2
addi.d i0, i0, 2
addi.d i0, i0, 1
xvinsgr2vr.d VI1, i0, 3
addi.d i0, i0, 1
xvinsgr2vr.d VI0, i0, 0 //1
addi.d i0, i0, 2
xvinsgr2vr.d VI0, i0, 1 //3
addi.d i0, i0, -1
xvinsgr2vr.d VI0, i0, 2 //2
addi.d i0, i0, 2
addi.d i0, i0, 1
xvinsgr2vr.d VI0, i0, 1 //2
addi.d i0, i0, 1
xvinsgr2vr.d VI0, i0, 2 //3
addi.d i0, i0, 1
xvinsgr2vr.d VI0, i0, 3 //4
#else
// Init VM0
ld.w t1, X, 0 * SIZE
ld.w t2, X, 1 * SIZE
add.d i1, X, INCX
ld.w t3, i1, 0 * SIZE
ld.w t4, i1, 1 * SIZE
add.d i1, i1, INCX
xvinsgr2vr.w x1, t1, 0
xvinsgr2vr.w x2, t2, 0
xvinsgr2vr.w x1, t3, 1
xvinsgr2vr.w x2, t4, 1
ld.w t1, i1, 0 * SIZE
ld.w t2, i1, 1 * SIZE
add.d i1, i1, INCX
ld.w t3, i1, 0 * SIZE
ld.w t4, i1, 1 * SIZE
add.d i1, i1, INCX
xvinsgr2vr.w x1, t1, 2
xvinsgr2vr.w x2, t2, 2
xvinsgr2vr.w x1, t3, 3
xvinsgr2vr.w x2, t4, 3
ld.w t1, i1, 0 * SIZE
ld.w t2, i1, 1 * SIZE
add.d i1, i1, INCX
ld.w t3, i1, 0 * SIZE
ld.w t4, i1, 1 * SIZE
add.d i1, i1, INCX
xvinsgr2vr.w x1, t1, 4
xvinsgr2vr.w x2, t2, 4
xvinsgr2vr.w x1, t3, 5
xvinsgr2vr.w x2, t4, 5
ld.w t1, i1, 0 * SIZE
ld.w t2, i1, 1 * SIZE
add.d i1, i1, INCX
ld.w t3, i1, 0 * SIZE
ld.w t4, i1, 1 * SIZE
add.d i1, i1, INCX
xvinsgr2vr.w x1, t1, 6
xvinsgr2vr.w x2, t2, 6
xvinsgr2vr.w x1, t3, 7
xvinsgr2vr.w x2, t4, 7
xvfmul.s x3, VI4, x1
xvfmul.s x4, VI4, x2
xvfcmp.clt.s VT0, x1, VI3
xvfcmp.clt.s VINC8, x2, VI3
xvbitsel.v x1, x1, x3, VT0
xvbitsel.v x2, x2, x4, VINC8
xvfadd.s VM0, x1, x2
addi.w i0, i0, 1
srai.d I, N, 3
bge $r0, I, .L21
@ -264,15 +367,15 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
xvinsgr2vr.w VI1, i0, 0 //initialize the index value for vectorization
addi.w i0, i0, 1
xvinsgr2vr.w VI1, i0, 1
addi.w i0, i0, 3
addi.w i0, i0, 1
xvinsgr2vr.w VI1, i0, 2
addi.w i0, i0, 1
xvinsgr2vr.w VI1, i0, 3
addi.w i0, i0, -3
addi.w i0, i0, 1
xvinsgr2vr.w VI1, i0, 4
addi.w i0, i0, 1
xvinsgr2vr.w VI1, i0, 5
addi.w i0, i0, 3
addi.w i0, i0, 1
xvinsgr2vr.w VI1, i0, 6
addi.w i0, i0, 1
xvinsgr2vr.w VI1, i0, 7
@ -280,15 +383,15 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
xvinsgr2vr.w VI0, i0, 0 //1
addi.w i0, i0, 1
xvinsgr2vr.w VI0, i0, 1 //2
addi.w i0, i0, 3
xvinsgr2vr.w VI0, i0, 2 //5
addi.w i0, i0, 1
xvinsgr2vr.w VI0, i0, 3 //6
addi.w i0, i0, -3
xvinsgr2vr.w VI0, i0, 4 //3
xvinsgr2vr.w VI0, i0, 2 //3
addi.w i0, i0, 1
xvinsgr2vr.w VI0, i0, 3 //4
addi.w i0, i0, 1
xvinsgr2vr.w VI0, i0, 4 //5
addi.w i0, i0, 1
xvinsgr2vr.w VI0, i0, 5 //6
addi.w i0, i0, 1
xvinsgr2vr.w VI0, i0, 5 //4
addi.w i0, i0, 3
xvinsgr2vr.w VI0, i0, 6 //7
addi.w i0, i0, 1
xvinsgr2vr.w VI0, i0, 7 //8
@ -350,7 +453,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
xvinsgr2vr.w x2, t2, 4
xvinsgr2vr.w x1, t3, 5
xvinsgr2vr.w x2, t4, 5
xvadd.w VI1, VI1, VINC8
ld.w t1, X, 0 * SIZE
ld.w t2, X, 1 * SIZE
add.d X, X, INCX
@ -361,8 +463,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
xvinsgr2vr.w x2, t2, 6
xvinsgr2vr.w x1, t3, 7
xvinsgr2vr.w x2, t4, 7
xvpickev.w x1, VX1, VX0
xvpickod.w x2, VX1, VX0
#endif
addi.d I, I, -1
XVFMUL x3, VI4, x1
@ -410,13 +510,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
xvpickve.w x2, VM0, 1
xvpickve.w x3, VM0, 2
xvpickve.w x4, VM0, 3
xvfcmp.clt.s VT0, x1, x2
xvfcmp.clt.s VT0, x2, x1
xvbitsel.v VM1, x1, x2, VT0
xvbitsel.v VINC4, VI1, VI2, VT0
xvfcmp.clt.s VT0, x3, x4
xvfcmp.clt.s VT0, x4, x3
xvbitsel.v VM0, x3, x4, VT0
xvbitsel.v VINC8, VI3, VI4, VT0
xvfcmp.clt.s VT0, VM0, VM1
xvfcmp.clt.s VT0, VM1, VM0
xvbitsel.v VM0, VM0, VM1, VT0
#endif
xvbitsel.v VI0, VINC8, VINC4, VT0
@ -475,13 +575,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
xvpickve.w x2, VM0, 5
xvpickve.w x3, VM0, 6
xvpickve.w x4, VM0, 7
xvfcmp.clt.s VT0, x1, x2
xvfcmp.clt.s VT0, x2, x1
xvbitsel.v x1, x1, x2, VT0
xvbitsel.v VINC4, VI1, VI2, VT0
xvfcmp.clt.s VT0, x3, x4
xvfcmp.clt.s VT0, x4, x3
xvbitsel.v VM0, x3, x4, VT0
xvbitsel.v VINC8, VI3, VI4, VT0
xvfcmp.clt.s VT0, VM0, x1
xvfcmp.clt.s VT0, x1, VM0
xvbitsel.v VM0, VM0, x1, VT0
xvbitsel.v VI0, VINC8, VINC4, VT0
fcmp.ceq.d $fcc0, $f15, $f9
@ -512,7 +612,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
.align 3
.L292:
fcmp.clt.s $fcc0, $f15, $f13
fcmp.clt.s $fcc0, $f13, $f15
fsel $f15, $f15, $f13, $fcc0
fsel $f20, $f20, $f16, $fcc0
movfr2gr.s i0, $f20

View File

@ -96,7 +96,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
xvfadd.s res1, VX0, res1
xvfadd.s res1, VX1, res1
xvfadd.s res1, VX2, res1
xvfadd.s res1, VX2, res1
xvfadd.s res1, VX3, res1
#endif
.align 3
@ -200,7 +200,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
xvfadd.s res1, VX0, res1
xvfadd.s res1, VX1, res1
xvfadd.s res1, VX2, res1
xvfadd.s res1, VX2, res1
xvfadd.s res1, VX3, res1
#endif
.align 3