Complete all the plura single precision functions of level3 on Loongson3a, the performance is 2.3GFlops.

This commit is contained in:
traz 2011-07-18 17:03:38 +00:00
parent 68532fa9ec
commit c8360e3ae5
7 changed files with 1499 additions and 9 deletions

View File

@ -123,10 +123,21 @@ ifndef DTRSMKERNEL_RT
DTRSMKERNEL_RT = trsm_kernel_RT.S DTRSMKERNEL_RT = trsm_kernel_RT.S
endif endif
ifndef CTRSMKERNEL_LN
CTRSMKERNEL_LN = ztrsm_kernel_LT.S CTRSMKERNEL_LN = ztrsm_kernel_LT.S
endif
ifndef CTRSMKERNEL_LT
CTRSMKERNEL_LT = ztrsm_kernel_LT.S CTRSMKERNEL_LT = ztrsm_kernel_LT.S
endif
ifndef CTRSMKERNEL_RN
CTRSMKERNEL_RN = ztrsm_kernel_LT.S CTRSMKERNEL_RN = ztrsm_kernel_LT.S
endif
ifndef CTRSMKERNEL_RT
CTRSMKERNEL_RT = ztrsm_kernel_RT.S CTRSMKERNEL_RT = ztrsm_kernel_RT.S
endif
ifndef ZTRSMKERNEL_LN ifndef ZTRSMKERNEL_LN
ZTRSMKERNEL_LN = ztrsm_kernel_LT.S ZTRSMKERNEL_LN = ztrsm_kernel_LT.S

View File

@ -1,19 +1,25 @@
SAXPYKERNEL=axpy_loongson3a.S SAXPYKERNEL=axpy_loongson3a.S
DAXPYKERNEL=daxpy_loongson3a_simd.S DAXPYKERNEL=daxpy_loongson3a_simd.S
SGEMMKERNEL = sgemm_kernel_loongson3a.S SGEMMKERNEL = sgemm_kernel_loongson3a_4x4.S
SGEMMONCOPY = ../generic/gemm_ncopy_4.c SGEMMONCOPY = ../generic/gemm_ncopy_4.c
SGEMMOTCOPY = ../generic/gemm_tcopy_4.c SGEMMOTCOPY = ../generic/gemm_tcopy_4.c
SGEMMONCOPYOBJ = sgemm_oncopy.o SGEMMONCOPYOBJ = sgemm_oncopy.o
SGEMMOTCOPYOBJ = sgemm_otcopy.o SGEMMOTCOPYOBJ = sgemm_otcopy.o
DGEMMKERNEL = gemm_kernel_loongson3a.S DGEMMKERNEL = dgemm_kernel_loongson3a_4x4.S
DGEMMONCOPY = ../generic/gemm_ncopy_4.c DGEMMONCOPY = ../generic/gemm_ncopy_4.c
DGEMMOTCOPY = ../generic/gemm_tcopy_4.c DGEMMOTCOPY = ../generic/gemm_tcopy_4.c
DGEMMONCOPYOBJ = dgemm_oncopy.o DGEMMONCOPYOBJ = dgemm_oncopy.o
DGEMMOTCOPYOBJ = dgemm_otcopy.o DGEMMOTCOPYOBJ = dgemm_otcopy.o
ZGEMMKERNEL = zgemm_kernel_loongson3a.S CGEMMKERNEL = cgemm_kernel_loongson3a_2x2.S
CGEMMONCOPY = ../generic/zgemm_ncopy_2.c
CGEMMOTCOPY = ../generic/zgemm_tcopy_2.c
CGEMMONCOPYOBJ = cgemm_oncopy.o
CGEMMOTCOPYOBJ = cgemm_otcopy.o
ZGEMMKERNEL = zgemm_kernel_loongson3a_2x2.S
ZGEMMONCOPY = ../generic/zgemm_ncopy_2.c ZGEMMONCOPY = ../generic/zgemm_ncopy_2.c
ZGEMMOTCOPY = ../generic/zgemm_tcopy_2.c ZGEMMOTCOPY = ../generic/zgemm_tcopy_2.c
ZGEMMONCOPYOBJ = zgemm_oncopy.o ZGEMMONCOPYOBJ = zgemm_oncopy.o
@ -29,6 +35,11 @@ DTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
DTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c DTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
DTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c DTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
CTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
CTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
CTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
CTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
ZTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c ZTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
ZTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c ZTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
ZTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c ZTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c

File diff suppressed because it is too large Load Diff

View File

@ -1065,8 +1065,8 @@
daddiu PREA, PREA, 4 * SIZE # 2mr*1kr*cmpx daddiu PREA, PREA, 4 * SIZE # 2mr*1kr*cmpx
MADD2 c22, c22, a4, b1 MADD2 c22, c22, a4, b1
MADD4 c24, c24, a4, b2 MADD4 c24, c24, a4, b2
gsLQC1(R12, F1, F0, 0) # R:a1 I:a2 Unroll K=4
gsLQC1(R12, F1, F0, 0) # R:a1 I:a2 Unroll K=4
gsLQC1(R12, F3, F2, 1) # R:a3 I:a4 gsLQC1(R12, F3, F2, 1) # R:a3 I:a4
gsLQC1(R13, F5, F4, 0) # R:b1 I:b2 gsLQC1(R13, F5, F4, 0) # R:b1 I:b2

10
param.h
View File

@ -1486,25 +1486,25 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define DGEMM_DEFAULT_UNROLL_M 4 #define DGEMM_DEFAULT_UNROLL_M 4
#define DGEMM_DEFAULT_UNROLL_N 4 #define DGEMM_DEFAULT_UNROLL_N 4
#define CGEMM_DEFAULT_UNROLL_M 1 #define CGEMM_DEFAULT_UNROLL_M 2
#define CGEMM_DEFAULT_UNROLL_N 4 #define CGEMM_DEFAULT_UNROLL_N 2
#define ZGEMM_DEFAULT_UNROLL_M 2 #define ZGEMM_DEFAULT_UNROLL_M 2
#define ZGEMM_DEFAULT_UNROLL_N 2 #define ZGEMM_DEFAULT_UNROLL_N 2
#define SGEMM_DEFAULT_P 64 #define SGEMM_DEFAULT_P 64
#define DGEMM_DEFAULT_P 32 #define DGEMM_DEFAULT_P 32
#define CGEMM_DEFAULT_P 108 #define CGEMM_DEFAULT_P 64
#define ZGEMM_DEFAULT_P 32 #define ZGEMM_DEFAULT_P 32
#define SGEMM_DEFAULT_Q 116 #define SGEMM_DEFAULT_Q 116
#define DGEMM_DEFAULT_Q 116 #define DGEMM_DEFAULT_Q 116
#define CGEMM_DEFAULT_Q 144 #define CGEMM_DEFAULT_Q 100
#define ZGEMM_DEFAULT_Q 80 #define ZGEMM_DEFAULT_Q 80
#define SGEMM_DEFAULT_R 1000 #define SGEMM_DEFAULT_R 1000
#define DGEMM_DEFAULT_R 1000 #define DGEMM_DEFAULT_R 1000
#define CGEMM_DEFAULT_R 2000 #define CGEMM_DEFAULT_R 1000
#define ZGEMM_DEFAULT_R 1000 #define ZGEMM_DEFAULT_R 1000
#define SYMV_P 16 #define SYMV_P 16