Fixed #30 strmm computational error on Loongson3A.

This commit is contained in:
traz 2011-05-28 09:48:34 +00:00
parent fc84909115
commit 88d94d0ec8
2 changed files with 70 additions and 50 deletions

View File

@ -1,6 +1,7 @@
#define REALNAME ASMNAME
#define ASSEMBLER
#include "common.h"
#define FETCH ld
#define gsLQC1(base,fq,ft,offset) .word(0x32<<26|base<<21|ft<<16|0x1<<15|offset<<6|0x1<<5|fq)
#define gsSQC1(base,fq,ft,offset) .word(0x3A<<26|base<<21|ft<<16|0x1<<15|offset<<6|0x1<<5|fq)
@ -215,35 +216,36 @@
daddu A,A,K # move A B to data part
daddu B,BO,TEMP
#endif
MTC $0,t11
MOV t21,t11
MTC $0,t11 # GEMM part NR=4,MR=4
LD a0,0(A)
MOV t21,t11
MOV t31,t11
MOV t41,t11
LD a1,1*SIZE(A)
MOV t41,t11
MOV t12,t11
MOV t22,t11
LD b0,0(B)
MOV t22,t11
MOV t32,t11
MOV t42,t11
LD b1,1*SIZE(B)
MOV t42,t11
LD a2,2*SIZE(A)
MOV t13,t11
MOV t23,t11
LD a2,2*SIZE(A)
LD b2,2*SIZE(B)
MOV t33,t11
MOV t43,t11
LD b2,2*SIZE(B)
LD a3,3*SIZE(A)
MOV t14,t11
MOV t24,t11
LD a3,3*SIZE(A)
LD b3,3*SIZE(B)
#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
dsubu TEMP,KCO,KK # temp is the length of the data part
@ -733,22 +735,22 @@
daddu B,BO,TEMP
#endif
MTC $0,t11
LD a0,0*SIZE(A)
MOV t21,t11
MTC $0,t11
LD a1,1*SIZE(A)
MOV t12,t11
MOV t21,t11
LD b0,0*SIZE(B)
MOV t22,t11
MOV t12,t11
LD b1,1*SIZE(B)
MOV t13,t11
MOV t22,t11
LD b2,2*SIZE(B)
MOV t13,t11
MOV t23,t11
LD b3,3*SIZE(B)
#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
dsubu TEMP,KCO,KK
#elif defined(LEFT)
@ -1043,20 +1045,26 @@
#if (defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA))
move B,BO # Reset B
#else
dsll K,KK, 0 + BASE_SHIFT
dsll K,KK, BASE_SHIFT
dsll TEMP,KK,2 + BASE_SHIFT
daddu A,A,K
daddu B,BO,TEMP
#endif
LD a0, 0 * SIZE(A) # a0
MTC $0,t11
LD b0,0*SIZE(B)
MOV t12,t11
LD a0, 0 * SIZE(A) # a0
LD b1,1*SIZE(B)
MOV t13,t11
LD b0,0*SIZE(B)
MOV t14,t11 # clear result registers
LD b1,1*SIZE(B)
LD b2,2*SIZE(B)
MOV t14,t11
LD b3,3*SIZE(B)
#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
dsubu TEMP, KCO, KK
@ -1236,7 +1244,7 @@
daddiu TEMP, TEMP, -4
#endif
dsll K,TEMP, 0 + BASE_SHIFT
dsll K,TEMP, BASE_SHIFT
dsll TEMP,TEMP, 2 + BASE_SHIFT
daddu A,A,K
@ -1291,21 +1299,21 @@
daddu A,A,K
daddu B,BO,TEMP
#endif
MTC $0,t11
LD a0,0*SIZE(A)
MOV t21,t11
MTC $0,t11 # gemm part
LD a1,1*SIZE(A)
MOV t31,t11
MOV t21,t11
LD b0,0*SIZE(B)
MOV t41,t11
MOV t31,t11
LD b1,1*SIZE(B)
MOV t12,t11
MOV t41,t11
LD a2,2*SIZE(A)
MOV t22,t11
LD a3,3*SIZE(A)
MOV t12,t11
MOV t22,t11
#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
dsubu TEMP,KCO,KK
@ -1621,11 +1629,14 @@
daddu A, A, K
daddu B, BO, TEMP
#endif
MTC $0,t11
LD a0,0*SIZE(A)
MOV t21,t11
LD a1,1*SIZE(A)
MTC $0,t11
LD b0,0*SIZE(B)
MOV t21,t11
LD b1,1*SIZE(B)
#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
dsubu TEMP, KCO, KK
#elif defined(LEFT)
@ -1830,11 +1841,14 @@
daddu A, A, K
daddu B, BO, TEMP
#endif
MTC $0,t11
LD a0, 0*SIZE(A) # a0
LD a0,0*SIZE(A)
MTC $0,t11
MOV t21,t11
LD b0,0*SIZE(B)
LD b0,0*SIZE(B)
MOV t12,t11
LD b1,1*SIZE(B)
#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
dsubu TEMP, KCO, KK
@ -1844,9 +1858,9 @@
daddiu TEMP, KK, 2
#endif
dsra K,TEMP,2
MOV t12,t11
beqz K,.L65
MOV t22,t11
beqz K,.L65
nop
#else
dsra K,KCO,2
@ -2023,13 +2037,18 @@
daddu A, A, K
daddu B, BO, TEMP
#endif
MTC $0,t11
LD b0, 0*SIZE(B)
MOV t21,t11
MTC $0,t11
LD a0,0*SIZE(A)
MOV t31,t11
MOV t21,t11
LD a1,1*SIZE(A)
MOV t31,t11
LD a2,2*SIZE(A)
MOV t41,t11
LD a3,3*SIZE(A)
#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
dsubu TEMP, KCO, KK
@ -2039,7 +2058,6 @@
daddiu TEMP, KK, 1
#endif
dsra K,TEMP,2
MOV t41,t11
beqz K,.L75
nop
#else
@ -2276,10 +2294,11 @@
daddu B, BO, TEMP
#endif
LD b0, 0*SIZE(B)
MTC $0,t11
LD a0,0*SIZE(A)
MOV t21,t11
LD a0,0*SIZE(A)
LD a1,1*SIZE(A)
#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
dsubu TEMP, KCO, KK
@ -2443,6 +2462,7 @@
LD a0, 0*SIZE(A)
LD b0, 0*SIZE(B)
MTC $0,t11
#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
dsubu TEMP, KCO, KK
#elif defined(LEFT)

10
param.h
View File

@ -1480,8 +1480,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define GEMM_DEFAULT_OFFSET_B 0
#define GEMM_DEFAULT_ALIGN 0x03fffUL
#define SGEMM_DEFAULT_UNROLL_M 2
#define SGEMM_DEFAULT_UNROLL_N 8
#define SGEMM_DEFAULT_UNROLL_M 4
#define SGEMM_DEFAULT_UNROLL_N 4
#define DGEMM_DEFAULT_UNROLL_M 4
#define DGEMM_DEFAULT_UNROLL_N 4
@ -1491,17 +1491,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define ZGEMM_DEFAULT_UNROLL_M 1
#define ZGEMM_DEFAULT_UNROLL_N 4
#define SGEMM_DEFAULT_P 108
#define SGEMM_DEFAULT_P 32
#define DGEMM_DEFAULT_P 32
#define CGEMM_DEFAULT_P 108
#define ZGEMM_DEFAULT_P 112
#define SGEMM_DEFAULT_Q 288
#define SGEMM_DEFAULT_Q 116
#define DGEMM_DEFAULT_Q 116
#define CGEMM_DEFAULT_Q 144
#define ZGEMM_DEFAULT_Q 72
#define SGEMM_DEFAULT_R 2000
#define SGEMM_DEFAULT_R 1000
#define DGEMM_DEFAULT_R 1000
#define CGEMM_DEFAULT_R 2000
#define ZGEMM_DEFAULT_R 2000