Fix precision problem in DSDOT

This commit is contained in:
Martin Kroeker 2018-07-15 17:11:40 +02:00 committed by GitHub
parent 2fbfc64da8
commit d2142760e0
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 159 additions and 10 deletions

View File

@ -103,35 +103,83 @@
.align 3
.L12:
#ifdef DSDOT
cvt.d.s a1, a1
cvt.d.s b1, b1
madd.d s1, s1, a1, b1
#else
MADD s1, s1, a1, b1
#endif
LD a1, 4 * SIZE(X)
LD b1, 4 * SIZE(Y)
#ifdef DSDOT
cvt.d.s a2, a2
cvt.d.s b2, b2
madd.d s2, s2, a2, b2
#else
MADD s2, s2, a2, b2
#endif
LD a2, 5 * SIZE(X)
LD b2, 5 * SIZE(Y)
#ifdef DSDOT
cvt.d.s a3, a3
cvt.d.s b3, b3
madd.d s1, s1, a3, b3
#else
MADD s1, s1, a3, b3
#endif
LD a3, 6 * SIZE(X)
LD b3, 6 * SIZE(Y)
#ifdef DSDOT
cvt.d.s a4, a4
cvt.d.s b4, b4
madd.d s2, s2, a4, b4
#else
MADD s2, s2, a4, b4
#endif
LD a4, 7 * SIZE(X)
LD b4, 7 * SIZE(Y)
#ifdef DSDOT
cvt.d.s a1, a1
cvt.d.s b1, b1
madd.d s1, s1, a1, b1
#else
MADD s1, s1, a1, b1
#endif
LD a1, 8 * SIZE(X)
LD b1, 8 * SIZE(Y)
#ifdef DSDOT
cvt.d.s a2, a2
cvt.d.s b2, b2
madd.d s2, s2, a2, b2
#else
MADD s2, s2, a2, b2
#endif
LD a2, 9 * SIZE(X)
LD b2, 9 * SIZE(Y)
#ifdef DSDOT
cvt.d.s a3, a3
cvt.d.s b3, b3
madd.d s1, s1, a3, b3
#else
MADD s1, s1, a3, b3
#endif
LD a3, 10 * SIZE(X)
LD b3, 10 * SIZE(Y)
#ifdef DSDOT
cvt.d.s a4, a4
cvt.d.s b4, b4
madd.d s2, s2, a4, b4
#else
MADD s2, s2, a4, b4
#endif
LD a4, 11 * SIZE(X)
LD b4, 11 * SIZE(Y)
@ -143,29 +191,77 @@
.align 3
.L13:
#ifdef DSDOT
cvt.d.s a1, a1
cvt.d.s b1, b1
madd.d s1, s1, a1, b1
#else
MADD s1, s1, a1, b1
#endif
LD a1, 4 * SIZE(X)
LD b1, 4 * SIZE(Y)
#ifdef DSDOT
cvt.d.s a2, a2
cvt.d.s b2, b2
madd.d s2, s2, a2, b2
#else
MADD s2, s2, a2, b2
#endif
LD a2, 5 * SIZE(X)
LD b2, 5 * SIZE(Y)
#ifdef DSDOT
cvt.d.s a3, a3
cvt.d.s b3, b3
madd.d s1, s1, a3, b3
#else
MADD s1, s1, a3, b3
#endif
LD a3, 6 * SIZE(X)
LD b3, 6 * SIZE(Y)
#ifdef DSDOT
cvt.d.s a4, a4
cvt.d.s b4, b4
madd.d s2, s2, a4, b4
#else
MADD s2, s2, a4, b4
#endif
LD a4, 7 * SIZE(X)
LD b4, 7 * SIZE(Y)
#ifdef DSDOT
cvt.d.s a1, a1
cvt.d.s b1, b1
madd.d s1, s1, a1, b1
#else
MADD s1, s1, a1, b1
#endif
daddiu X, X, 8 * SIZE
#ifdef DSDOT
cvt.d.s a2, a2
cvt.d.s b2, b2
madd.d s2, s2, a2, b2
#else
MADD s2, s2, a2, b2
#endif
daddiu Y, Y, 8 * SIZE
#ifdef DSDOT
cvt.d.s a3, a3
cvt.d.s b3, b3
madd.d s1, s1, a3, b3
#else
MADD s1, s1, a3, b3
#endif
#ifdef DSDOT
cvt.d.s a4, a4
cvt.d.s b4, b4
madd.d s2, s2, a4, b4
#else
MADD s2, s2, a4, b4
#endif
.align 3
.L15:
@ -179,8 +275,13 @@
LD a1, 0 * SIZE(X)
LD b1, 0 * SIZE(Y)
#ifdef DSDOT
cvt.d.s a1, a1
cvt.d.s b1, b1
madd.d s1, s1, a1, b1
#else
MADD s1, s1, a1, b1
#endif
daddiu I, I, -1
daddiu X, X, SIZE
@ -225,50 +326,85 @@
LD b1, 0 * SIZE(Y)
dadd Y, Y, INCY
#ifdef DSDOT
cvt.d.s a1, a1
cvt.d.s b1, b1
madd.d s1, s1, a1, b1
#else
MADD s1, s1, a1, b1
#endif
LD a1, 0 * SIZE(X)
dadd X, X, INCX
LD b1, 0 * SIZE(Y)
dadd Y, Y, INCY
#ifdef DSDOT
cvt.d.s a1, a1
cvt.d.s b1, b1
madd.d s2, s2, a1, b1
#else
MADD s2, s2, a1, b1
#endif
LD a1, 0 * SIZE(X)
dadd X, X, INCX
LD b1, 0 * SIZE(Y)
dadd Y, Y, INCY
#ifdef DSDOT
cvt.d.s a1, a1
cvt.d.s b1, b1
madd.d s1, s1, a1, b1
#else
MADD s1, s1, a1, b1
#endif
LD a1, 0 * SIZE(X)
dadd X, X, INCX
LD b1, 0 * SIZE(Y)
dadd Y, Y, INCY
#ifdef DSDOT
cvt.d.s a1, a1
cvt.d.s b1, b1
madd.d s2, s2, a1, b1
#else
MADD s2, s2, a1, b1
#endif
LD a1, 0 * SIZE(X)
dadd X, X, INCX
LD b1, 0 * SIZE(Y)
dadd Y, Y, INCY
#ifdef DSDOT
cvt.d.s a1, a1
cvt.d.s b1, b1
madd.d s1, s1, a1, b1
#else
MADD s1, s1, a1, b1
#endif
LD a1, 0 * SIZE(X)
dadd X, X, INCX
LD b1, 0 * SIZE(Y)
dadd Y, Y, INCY
#ifdef DSDOT
cvt.d.s a1, a1
cvt.d.s b1, b1
madd.d s2, s2, a1, b1
#else
MADD s2, s2, a1, b1
#endif
LD a1, 0 * SIZE(X)
dadd X, X, INCX
LD b1, 0 * SIZE(Y)
dadd Y, Y, INCY
#ifdef DSDOT
cvt.d.s a1, a1
cvt.d.s b1, b1
madd.d s1, s1, a1, b1
#else
MADD s1, s1, a1, b1
#endif
LD a1, 0 * SIZE(X)
dadd X, X, INCX
LD b1, 0 * SIZE(Y)
@ -277,7 +413,13 @@
daddiu I, I, -1
bgtz I, .L23
#ifdef DSDOT
cvt.d.s a1, a1
cvt.d.s b1, b1
madd.d s2, s2, a1, b1
#else
MADD s2, s2, a1, b1
#endif
.align 3
.L25:
@ -296,13 +438,20 @@
daddiu I, I, -1
bgtz I, .L26
#ifdef DSDOT
cvt.d.s a1, a1
cvt.d.s b1, b1
madd.d s1, s1, a1, b1
#else
MADD s1, s1, a1, b1
#endif
.align 3
.L999:
ADD s1, s1, s2
#ifdef DSDOT
cvt.d.s s1, s1
add.d s1, s1, s2
#else
ADD s1, s1, s2
#endif
j $31
NOP