loongarch: Fixed {s/d/sc/dz}amin LSX opt

This commit is contained in:
gxw 2024-03-19 09:18:44 +08:00
parent bbf82cb624
commit ad13e04669
2 changed files with 13 additions and 10 deletions

View File

@ -146,7 +146,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
add.d X, X, INCX add.d X, X, INCX
vinsgr2vr.d VX1, t3, 0 vinsgr2vr.d VX1, t3, 0
vinsgr2vr.d VX1, t4, 1 vinsgr2vr.d VX1, t4, 1
vfmaxa.d VM1, VX0, VX1 vfmina.d VM1, VX0, VX1
ld.d t1, X, 0 * SIZE ld.d t1, X, 0 * SIZE
add.d X, X, INCX add.d X, X, INCX
ld.d t2, X, 0 * SIZE ld.d t2, X, 0 * SIZE
@ -159,9 +159,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
add.d X, X, INCX add.d X, X, INCX
vinsgr2vr.d VX1, t3, 0 vinsgr2vr.d VX1, t3, 0
vinsgr2vr.d VX1, t4, 1 vinsgr2vr.d VX1, t4, 1
vfmaxa.d VM2, VX0, VX1 vfmina.d VM2, VX0, VX1
vfmaxa.d VM1, VM1, VM2 vfmina.d VM1, VM1, VM2
vfmaxa.d VM0, VM0, VM1 vfmina.d VM0, VM0, VM1
#else #else
ld.w t1, X, 0 ld.w t1, X, 0
add.d X, X, INCX add.d X, X, INCX
@ -187,8 +187,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
vinsgr2vr.w VX1, t2, 1 vinsgr2vr.w VX1, t2, 1
vinsgr2vr.w VX1, t3, 2 vinsgr2vr.w VX1, t3, 2
vinsgr2vr.w VX1, t4, 3 vinsgr2vr.w VX1, t4, 3
vfmaxa.s VM1, VX0, VX1 vfmina.s VM1, VX0, VX1
vfmaxa.s VM0, VM0, VM1 vfmina.s VM0, VM0, VM1
#endif #endif
addi.d I, I, -1 addi.d I, I, -1
blt $r0, I, .L21 blt $r0, I, .L21

View File

@ -186,7 +186,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
FABS t4, t4 FABS t4, t4
ADD t1, t1, t2 ADD t1, t1, t2
ADD t3, t3, t4 ADD t3, t3, t4
FMIN s1, t1, t3 FMIN s2, t1, t3
LD t1, X, 0 * SIZE LD t1, X, 0 * SIZE
LD t2, X, 1 * SIZE LD t2, X, 1 * SIZE
add.d X, X, INCX add.d X, X, INCX
@ -214,13 +214,16 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
ADD t1, t1, t2 ADD t1, t1, t2
ADD t3, t3, t4 ADD t3, t3, t4
FMIN s4, t1, t3 FMIN s4, t1, t3
FMIN s1, s1, s2
FMIN s3, s3, s4
FMIN a0, a0, s3
FMIN a0, a0, s1
blt $r0, I, .L21 blt $r0, I, .L21
.align 3 .align 3
.L22: .L22:
FMIN s1, s1, s2 MOV s1, a0
FMIN s3, s3, s4
FMIN s1, s1, s3
.align 3 .align 3
.L23: //N<8 .L23: //N<8