LoongArch64: Handle NAN and INF
This commit is contained in:
parent
d6a5174e9c
commit
83ce97a4ca
|
@ -99,7 +99,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
b .L113 //alpha_r != 0.0 && alpha_i == 0.0
|
b .L113 //alpha_r != 0.0 && alpha_i == 0.0
|
||||||
|
|
||||||
.L14:
|
.L14:
|
||||||
bceqz $fcc1, .L112 //alpha_r == 0.0 && alpha_i != 0.0
|
bceqz $fcc1, .L114 //alpha_r == 0.0 && alpha_i != 0.0
|
||||||
b .L111 //alpha_r == 0.0 && alpha_i == 0.0
|
b .L111 //alpha_r == 0.0 && alpha_i == 0.0
|
||||||
.align 3
|
.align 3
|
||||||
|
|
||||||
|
@ -117,38 +117,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
b .L997
|
b .L997
|
||||||
.align 3
|
.align 3
|
||||||
|
|
||||||
.L112: //alpha_r == 0.0 && alpha_i != 0.0
|
|
||||||
xvld VX0, X, 0 * SIZE
|
|
||||||
#ifdef DOUBLE
|
|
||||||
xvld VX1, X, 4 * SIZE
|
|
||||||
xvpickev.d x1, VX1, VX0
|
|
||||||
xvpickod.d x2, VX1, VX0
|
|
||||||
xvfmul.d x3, VXAI, x2
|
|
||||||
xvfsub.d x3, VXZ, x3
|
|
||||||
xvfmul.d x4, VXAI, x1
|
|
||||||
xvilvl.d VX2, x4 ,x3
|
|
||||||
xvilvh.d VX3, x4, x3
|
|
||||||
xvst VX2, X, 0 * SIZE
|
|
||||||
xvst VX3, X, 4 * SIZE
|
|
||||||
addi.d X, X, 8 * SIZE
|
|
||||||
#else
|
|
||||||
xvld VX1, X, 8 * SIZE
|
|
||||||
xvpickev.w x1, VX1, VX0
|
|
||||||
xvpickod.w x2, VX1, VX0
|
|
||||||
xvfmul.s x3, VXAI, x2
|
|
||||||
xvfsub.s x3, VXZ, x3
|
|
||||||
xvfmul.s x4, VXAI, x1
|
|
||||||
xvilvl.w VX2, x4 ,x3
|
|
||||||
xvilvh.w VX3, x4, x3
|
|
||||||
xvst VX2, X, 0 * SIZE
|
|
||||||
xvst VX3, X, 8 * SIZE
|
|
||||||
addi.d X, X, 16 * SIZE
|
|
||||||
#endif
|
|
||||||
addi.d I, I, -1
|
|
||||||
blt $r0, I, .L112
|
|
||||||
b .L997
|
|
||||||
.align 3
|
|
||||||
|
|
||||||
.L113: //alpha_r != 0.0 && alpha_i == 0.0
|
.L113: //alpha_r != 0.0 && alpha_i == 0.0
|
||||||
xvld VX0, X, 0 * SIZE
|
xvld VX0, X, 0 * SIZE
|
||||||
#ifdef DOUBLE
|
#ifdef DOUBLE
|
||||||
|
@ -227,7 +195,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
b .L223 //alpha_r != 0.0 && alpha_i == 0.0
|
b .L223 //alpha_r != 0.0 && alpha_i == 0.0
|
||||||
|
|
||||||
.L24:
|
.L24:
|
||||||
bceqz $fcc1, .L222 //alpha_r == 0.0 && alpha_i != 0.0
|
bceqz $fcc1, .L224 //alpha_r == 0.0 && alpha_i != 0.0
|
||||||
b .L221 //alpha_r == 0.0 && alpha_i == 0.0
|
b .L221 //alpha_r == 0.0 && alpha_i == 0.0
|
||||||
.align 3
|
.align 3
|
||||||
|
|
||||||
|
@ -275,119 +243,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
b .L997
|
b .L997
|
||||||
.align 3
|
.align 3
|
||||||
|
|
||||||
.L222: //alpha_r == 0.0 && alpha_i != 0.0
|
|
||||||
#ifdef DOUBLE
|
|
||||||
ld.d t1, X, 0 * SIZE
|
|
||||||
ld.d t2, X, 1 * SIZE
|
|
||||||
add.d X, X, INCX
|
|
||||||
ld.d t3, X, 0 * SIZE
|
|
||||||
ld.d t4, X, 1 * SIZE
|
|
||||||
add.d X, X, INCX
|
|
||||||
xvinsgr2vr.d x1, t1, 0
|
|
||||||
xvinsgr2vr.d x2, t2, 0
|
|
||||||
xvinsgr2vr.d x1, t3, 1
|
|
||||||
xvinsgr2vr.d x2, t4, 1
|
|
||||||
ld.d t1, X, 0 * SIZE
|
|
||||||
ld.d t2, X, 1 * SIZE
|
|
||||||
add.d X, X, INCX
|
|
||||||
ld.d t3, X, 0 * SIZE
|
|
||||||
ld.d t4, X, 1 * SIZE
|
|
||||||
xvinsgr2vr.d x1, t1, 2
|
|
||||||
xvinsgr2vr.d x2, t2, 2
|
|
||||||
xvinsgr2vr.d x1, t3, 3
|
|
||||||
xvinsgr2vr.d x2, t4, 3
|
|
||||||
add.d X, X, INCX
|
|
||||||
|
|
||||||
xvfmul.d x3, VXAI, x2
|
|
||||||
xvfsub.d x3, VXZ, x3
|
|
||||||
xvfmul.d x4, VXAI, x1
|
|
||||||
addi.d I, I, -1
|
|
||||||
xvstelm.d x3, XX, 0 * SIZE, 0
|
|
||||||
xvstelm.d x4, XX, 1 * SIZE, 0
|
|
||||||
add.d XX, XX, INCX
|
|
||||||
xvstelm.d x3, XX, 0 * SIZE, 1
|
|
||||||
xvstelm.d x4, XX, 1 * SIZE, 1
|
|
||||||
add.d XX, XX, INCX
|
|
||||||
xvstelm.d x3, XX, 0 * SIZE, 2
|
|
||||||
xvstelm.d x4, XX, 1 * SIZE, 2
|
|
||||||
add.d XX, XX, INCX
|
|
||||||
xvstelm.d x3, XX, 0 * SIZE, 3
|
|
||||||
xvstelm.d x4, XX, 1 * SIZE, 3
|
|
||||||
#else
|
|
||||||
ld.w t1, X, 0 * SIZE
|
|
||||||
ld.w t2, X, 1 * SIZE
|
|
||||||
add.d X, X, INCX
|
|
||||||
ld.w t3, X, 0 * SIZE
|
|
||||||
ld.w t4, X, 1 * SIZE
|
|
||||||
add.d X, X, INCX
|
|
||||||
xvinsgr2vr.w x1, t1, 0
|
|
||||||
xvinsgr2vr.w x2, t2, 0
|
|
||||||
xvinsgr2vr.w x1, t3, 1
|
|
||||||
xvinsgr2vr.w x2, t4, 1
|
|
||||||
ld.w t1, X, 0 * SIZE
|
|
||||||
ld.w t2, X, 1 * SIZE
|
|
||||||
add.d X, X, INCX
|
|
||||||
ld.w t3, X, 0 * SIZE
|
|
||||||
ld.w t4, X, 1 * SIZE
|
|
||||||
xvinsgr2vr.w x1, t1, 2
|
|
||||||
xvinsgr2vr.w x2, t2, 2
|
|
||||||
xvinsgr2vr.w x1, t3, 3
|
|
||||||
xvinsgr2vr.w x2, t4, 3
|
|
||||||
add.d X, X, INCX
|
|
||||||
ld.w t1, X, 0 * SIZE
|
|
||||||
ld.w t2, X, 1 * SIZE
|
|
||||||
add.d X, X, INCX
|
|
||||||
ld.w t3, X, 0 * SIZE
|
|
||||||
ld.w t4, X, 1 * SIZE
|
|
||||||
add.d X, X, INCX
|
|
||||||
xvinsgr2vr.w x1, t1, 4
|
|
||||||
xvinsgr2vr.w x2, t2, 4
|
|
||||||
xvinsgr2vr.w x1, t3, 5
|
|
||||||
xvinsgr2vr.w x2, t4, 5
|
|
||||||
ld.w t1, X, 0 * SIZE
|
|
||||||
ld.w t2, X, 1 * SIZE
|
|
||||||
add.d X, X, INCX
|
|
||||||
ld.w t3, X, 0 * SIZE
|
|
||||||
ld.w t4, X, 1 * SIZE
|
|
||||||
xvinsgr2vr.w x1, t1, 6
|
|
||||||
xvinsgr2vr.w x2, t2, 6
|
|
||||||
xvinsgr2vr.w x1, t3, 7
|
|
||||||
xvinsgr2vr.w x2, t4, 7
|
|
||||||
add.d X, X, INCX
|
|
||||||
|
|
||||||
xvfmul.s x3, VXAI, x2
|
|
||||||
xvfsub.s x3, VXZ, x3
|
|
||||||
xvfmul.s x4, VXAI, x1
|
|
||||||
addi.d I, I, -1
|
|
||||||
xvstelm.w x3, XX, 0 * SIZE, 0
|
|
||||||
xvstelm.w x4, XX, 1 * SIZE, 0
|
|
||||||
add.d XX, XX, INCX
|
|
||||||
xvstelm.w x3, XX, 0 * SIZE, 1
|
|
||||||
xvstelm.w x4, XX, 1 * SIZE, 1
|
|
||||||
add.d XX, XX, INCX
|
|
||||||
xvstelm.w x3, XX, 0 * SIZE, 2
|
|
||||||
xvstelm.w x4, XX, 1 * SIZE, 2
|
|
||||||
add.d XX, XX, INCX
|
|
||||||
xvstelm.w x3, XX, 0 * SIZE, 3
|
|
||||||
xvstelm.w x4, XX, 1 * SIZE, 3
|
|
||||||
add.d XX, XX, INCX
|
|
||||||
xvstelm.w x3, XX, 0 * SIZE, 4
|
|
||||||
xvstelm.w x4, XX, 1 * SIZE, 4
|
|
||||||
add.d XX, XX, INCX
|
|
||||||
xvstelm.w x3, XX, 0 * SIZE, 5
|
|
||||||
xvstelm.w x4, XX, 1 * SIZE, 5
|
|
||||||
add.d XX, XX, INCX
|
|
||||||
xvstelm.w x3, XX, 0 * SIZE, 6
|
|
||||||
xvstelm.w x4, XX, 1 * SIZE, 6
|
|
||||||
add.d XX, XX, INCX
|
|
||||||
xvstelm.w x3, XX, 0 * SIZE, 7
|
|
||||||
xvstelm.w x4, XX, 1 * SIZE, 7
|
|
||||||
#endif
|
|
||||||
add.d XX, XX, INCX
|
|
||||||
blt $r0, I, .L222
|
|
||||||
b .L997
|
|
||||||
.align 3
|
|
||||||
|
|
||||||
.L223: //alpha_r != 0.0 && alpha_i == 0.0
|
.L223: //alpha_r != 0.0 && alpha_i == 0.0
|
||||||
#ifdef DOUBLE
|
#ifdef DOUBLE
|
||||||
ld.d t1, X, 0 * SIZE
|
ld.d t1, X, 0 * SIZE
|
||||||
|
|
|
@ -97,7 +97,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
b .L113 //alpha_r != 0.0 && alpha_i == 0.0
|
b .L113 //alpha_r != 0.0 && alpha_i == 0.0
|
||||||
|
|
||||||
.L14:
|
.L14:
|
||||||
bceqz $fcc1, .L112 //alpha_r == 0.0 && alpha_i != 0.0
|
bceqz $fcc1, .L114 //alpha_r == 0.0 && alpha_i != 0.0
|
||||||
b .L111 //alpha_r == 0.0 && alpha_i == 0.0
|
b .L111 //alpha_r == 0.0 && alpha_i == 0.0
|
||||||
.align 3
|
.align 3
|
||||||
|
|
||||||
|
@ -116,48 +116,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
b .L997
|
b .L997
|
||||||
.align 3
|
.align 3
|
||||||
|
|
||||||
.L112: //alpha_r == 0.0 && alpha_i != 0.0
|
|
||||||
vld VX0, X, 0 * SIZE
|
|
||||||
#ifdef DOUBLE
|
|
||||||
vld VX1, X, 2 * SIZE
|
|
||||||
vpickev.d x1, VX1, VX0
|
|
||||||
vpickod.d x2, VX1, VX0
|
|
||||||
vfmul.d x3, VXAI, x2
|
|
||||||
vfsub.d x3, VXZ, x3
|
|
||||||
vfmul.d x4, VXAI, x1
|
|
||||||
vilvl.d VX2, x4 ,x3
|
|
||||||
vilvh.d VX3, x4, x3
|
|
||||||
vst VX2, X, 0 * SIZE
|
|
||||||
vst VX3, X, 2 * SIZE
|
|
||||||
vld VX0, X, 4 * SIZE
|
|
||||||
vld VX1, X, 6 * SIZE
|
|
||||||
vpickev.d x1, VX1, VX0
|
|
||||||
vpickod.d x2, VX1, VX0
|
|
||||||
vfmul.d x3, VXAI, x2
|
|
||||||
vfsub.d x3, VXZ, x3
|
|
||||||
vfmul.d x4, VXAI, x1
|
|
||||||
vilvl.d VX2, x4 ,x3
|
|
||||||
vilvh.d VX3, x4, x3
|
|
||||||
vst VX2, X, 4 * SIZE
|
|
||||||
vst VX3, X, 6 * SIZE
|
|
||||||
#else
|
|
||||||
vld VX1, X, 4 * SIZE
|
|
||||||
vpickev.w x1, VX1, VX0
|
|
||||||
vpickod.w x2, VX1, VX0
|
|
||||||
vfmul.s x3, VXAI, x2
|
|
||||||
vfsub.s x3, VXZ, x3
|
|
||||||
vfmul.s x4, VXAI, x1
|
|
||||||
vilvl.w VX2, x4 ,x3
|
|
||||||
vilvh.w VX3, x4, x3
|
|
||||||
vst VX2, X, 0 * SIZE
|
|
||||||
vst VX3, X, 4 * SIZE
|
|
||||||
#endif
|
|
||||||
addi.d X, X, 8 * SIZE
|
|
||||||
addi.d I, I, -1
|
|
||||||
blt $r0, I, .L112
|
|
||||||
b .L997
|
|
||||||
.align 3
|
|
||||||
|
|
||||||
.L113: //alpha_r != 0.0 && alpha_i == 0.0
|
.L113: //alpha_r != 0.0 && alpha_i == 0.0
|
||||||
vld VX0, X, 0 * SIZE
|
vld VX0, X, 0 * SIZE
|
||||||
#ifdef DOUBLE
|
#ifdef DOUBLE
|
||||||
|
@ -256,7 +214,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
b .L223 //alpha_r != 0.0 && alpha_i == 0.0
|
b .L223 //alpha_r != 0.0 && alpha_i == 0.0
|
||||||
|
|
||||||
.L24:
|
.L24:
|
||||||
bceqz $fcc1, .L222 //alpha_r == 0.0 && alpha_i != 0.0
|
bceqz $fcc1, .L224 //alpha_r == 0.0 && alpha_i != 0.0
|
||||||
b .L221 //alpha_r == 0.0 && alpha_i == 0.0
|
b .L221 //alpha_r == 0.0 && alpha_i == 0.0
|
||||||
.align 3
|
.align 3
|
||||||
|
|
||||||
|
@ -292,90 +250,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
b .L997
|
b .L997
|
||||||
.align 3
|
.align 3
|
||||||
|
|
||||||
.L222: //alpha_r == 0.0 && alpha_i != 0.0
|
|
||||||
#ifdef DOUBLE
|
|
||||||
ld.d t1, X, 0 * SIZE
|
|
||||||
ld.d t2, X, 1 * SIZE
|
|
||||||
add.d X, X, INCX
|
|
||||||
ld.d t3, X, 0 * SIZE
|
|
||||||
ld.d t4, X, 1 * SIZE
|
|
||||||
add.d X, X, INCX
|
|
||||||
vinsgr2vr.d x1, t1, 0
|
|
||||||
vinsgr2vr.d x2, t2, 0
|
|
||||||
vinsgr2vr.d x1, t3, 1
|
|
||||||
vinsgr2vr.d x2, t4, 1
|
|
||||||
vfmul.d x3, VXAI, x2
|
|
||||||
vfsub.d x3, VXZ, x3
|
|
||||||
vfmul.d x4, VXAI, x1
|
|
||||||
vstelm.d x3, XX, 0 * SIZE, 0
|
|
||||||
vstelm.d x4, XX, 1 * SIZE, 0
|
|
||||||
add.d XX, XX, INCX
|
|
||||||
vstelm.d x3, XX, 0 * SIZE, 1
|
|
||||||
vstelm.d x4, XX, 1 * SIZE, 1
|
|
||||||
add.d XX, XX, INCX
|
|
||||||
|
|
||||||
ld.d t1, X, 0 * SIZE
|
|
||||||
ld.d t2, X, 1 * SIZE
|
|
||||||
add.d X, X, INCX
|
|
||||||
ld.d t3, X, 0 * SIZE
|
|
||||||
ld.d t4, X, 1 * SIZE
|
|
||||||
vinsgr2vr.d x1, t1, 0
|
|
||||||
vinsgr2vr.d x2, t2, 0
|
|
||||||
vinsgr2vr.d x1, t3, 1
|
|
||||||
vinsgr2vr.d x2, t4, 1
|
|
||||||
add.d X, X, INCX
|
|
||||||
vfmul.d x3, VXAI, x2
|
|
||||||
vfsub.d x3, VXZ, x3
|
|
||||||
vfmul.d x4, VXAI, x1
|
|
||||||
addi.d I, I, -1
|
|
||||||
vstelm.d x3, XX, 0 * SIZE, 0
|
|
||||||
vstelm.d x4, XX, 1 * SIZE, 0
|
|
||||||
add.d XX, XX, INCX
|
|
||||||
vstelm.d x3, XX, 0 * SIZE, 1
|
|
||||||
vstelm.d x4, XX, 1 * SIZE, 1
|
|
||||||
#else
|
|
||||||
ld.w t1, X, 0 * SIZE
|
|
||||||
ld.w t2, X, 1 * SIZE
|
|
||||||
add.d X, X, INCX
|
|
||||||
ld.w t3, X, 0 * SIZE
|
|
||||||
ld.w t4, X, 1 * SIZE
|
|
||||||
add.d X, X, INCX
|
|
||||||
vinsgr2vr.w x1, t1, 0
|
|
||||||
vinsgr2vr.w x2, t2, 0
|
|
||||||
vinsgr2vr.w x1, t3, 1
|
|
||||||
vinsgr2vr.w x2, t4, 1
|
|
||||||
ld.w t1, X, 0 * SIZE
|
|
||||||
ld.w t2, X, 1 * SIZE
|
|
||||||
add.d X, X, INCX
|
|
||||||
ld.w t3, X, 0 * SIZE
|
|
||||||
ld.w t4, X, 1 * SIZE
|
|
||||||
vinsgr2vr.w x1, t1, 2
|
|
||||||
vinsgr2vr.w x2, t2, 2
|
|
||||||
vinsgr2vr.w x1, t3, 3
|
|
||||||
vinsgr2vr.w x2, t4, 3
|
|
||||||
add.d X, X, INCX
|
|
||||||
|
|
||||||
vfmul.s x3, VXAI, x2
|
|
||||||
vfsub.s x3, VXZ, x3
|
|
||||||
vfmul.s x4, VXAI, x1
|
|
||||||
addi.d I, I, -1
|
|
||||||
vstelm.w x3, XX, 0 * SIZE, 0
|
|
||||||
vstelm.w x4, XX, 1 * SIZE, 0
|
|
||||||
add.d XX, XX, INCX
|
|
||||||
vstelm.w x3, XX, 0 * SIZE, 1
|
|
||||||
vstelm.w x4, XX, 1 * SIZE, 1
|
|
||||||
add.d XX, XX, INCX
|
|
||||||
vstelm.w x3, XX, 0 * SIZE, 2
|
|
||||||
vstelm.w x4, XX, 1 * SIZE, 2
|
|
||||||
add.d XX, XX, INCX
|
|
||||||
vstelm.w x3, XX, 0 * SIZE, 3
|
|
||||||
vstelm.w x4, XX, 1 * SIZE, 3
|
|
||||||
#endif
|
|
||||||
add.d XX, XX, INCX
|
|
||||||
blt $r0, I, .L222
|
|
||||||
b .L997
|
|
||||||
.align 3
|
|
||||||
|
|
||||||
.L223: //alpha_r != 0.0 && alpha_i == 0.0
|
.L223: //alpha_r != 0.0 && alpha_i == 0.0
|
||||||
#ifdef DOUBLE
|
#ifdef DOUBLE
|
||||||
ld.d t1, X, 0 * SIZE
|
ld.d t1, X, 0 * SIZE
|
||||||
|
|
Loading…
Reference in New Issue