Complete all the plura single precision functions of level3 on Loongson3a, the performance is 2.3GFlops.
This commit is contained in:
parent
68532fa9ec
commit
c8360e3ae5
|
@ -123,10 +123,21 @@ ifndef DTRSMKERNEL_RT
|
||||||
DTRSMKERNEL_RT = trsm_kernel_RT.S
|
DTRSMKERNEL_RT = trsm_kernel_RT.S
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
ifndef CTRSMKERNEL_LN
|
||||||
CTRSMKERNEL_LN = ztrsm_kernel_LT.S
|
CTRSMKERNEL_LN = ztrsm_kernel_LT.S
|
||||||
|
endif
|
||||||
|
|
||||||
|
ifndef CTRSMKERNEL_LT
|
||||||
CTRSMKERNEL_LT = ztrsm_kernel_LT.S
|
CTRSMKERNEL_LT = ztrsm_kernel_LT.S
|
||||||
|
endif
|
||||||
|
|
||||||
|
ifndef CTRSMKERNEL_RN
|
||||||
CTRSMKERNEL_RN = ztrsm_kernel_LT.S
|
CTRSMKERNEL_RN = ztrsm_kernel_LT.S
|
||||||
|
endif
|
||||||
|
|
||||||
|
ifndef CTRSMKERNEL_RT
|
||||||
CTRSMKERNEL_RT = ztrsm_kernel_RT.S
|
CTRSMKERNEL_RT = ztrsm_kernel_RT.S
|
||||||
|
endif
|
||||||
|
|
||||||
ifndef ZTRSMKERNEL_LN
|
ifndef ZTRSMKERNEL_LN
|
||||||
ZTRSMKERNEL_LN = ztrsm_kernel_LT.S
|
ZTRSMKERNEL_LN = ztrsm_kernel_LT.S
|
||||||
|
|
|
@ -1,19 +1,25 @@
|
||||||
SAXPYKERNEL=axpy_loongson3a.S
|
SAXPYKERNEL=axpy_loongson3a.S
|
||||||
DAXPYKERNEL=daxpy_loongson3a_simd.S
|
DAXPYKERNEL=daxpy_loongson3a_simd.S
|
||||||
|
|
||||||
SGEMMKERNEL = sgemm_kernel_loongson3a.S
|
SGEMMKERNEL = sgemm_kernel_loongson3a_4x4.S
|
||||||
SGEMMONCOPY = ../generic/gemm_ncopy_4.c
|
SGEMMONCOPY = ../generic/gemm_ncopy_4.c
|
||||||
SGEMMOTCOPY = ../generic/gemm_tcopy_4.c
|
SGEMMOTCOPY = ../generic/gemm_tcopy_4.c
|
||||||
SGEMMONCOPYOBJ = sgemm_oncopy.o
|
SGEMMONCOPYOBJ = sgemm_oncopy.o
|
||||||
SGEMMOTCOPYOBJ = sgemm_otcopy.o
|
SGEMMOTCOPYOBJ = sgemm_otcopy.o
|
||||||
|
|
||||||
DGEMMKERNEL = gemm_kernel_loongson3a.S
|
DGEMMKERNEL = dgemm_kernel_loongson3a_4x4.S
|
||||||
DGEMMONCOPY = ../generic/gemm_ncopy_4.c
|
DGEMMONCOPY = ../generic/gemm_ncopy_4.c
|
||||||
DGEMMOTCOPY = ../generic/gemm_tcopy_4.c
|
DGEMMOTCOPY = ../generic/gemm_tcopy_4.c
|
||||||
DGEMMONCOPYOBJ = dgemm_oncopy.o
|
DGEMMONCOPYOBJ = dgemm_oncopy.o
|
||||||
DGEMMOTCOPYOBJ = dgemm_otcopy.o
|
DGEMMOTCOPYOBJ = dgemm_otcopy.o
|
||||||
|
|
||||||
ZGEMMKERNEL = zgemm_kernel_loongson3a.S
|
CGEMMKERNEL = cgemm_kernel_loongson3a_2x2.S
|
||||||
|
CGEMMONCOPY = ../generic/zgemm_ncopy_2.c
|
||||||
|
CGEMMOTCOPY = ../generic/zgemm_tcopy_2.c
|
||||||
|
CGEMMONCOPYOBJ = cgemm_oncopy.o
|
||||||
|
CGEMMOTCOPYOBJ = cgemm_otcopy.o
|
||||||
|
|
||||||
|
ZGEMMKERNEL = zgemm_kernel_loongson3a_2x2.S
|
||||||
ZGEMMONCOPY = ../generic/zgemm_ncopy_2.c
|
ZGEMMONCOPY = ../generic/zgemm_ncopy_2.c
|
||||||
ZGEMMOTCOPY = ../generic/zgemm_tcopy_2.c
|
ZGEMMOTCOPY = ../generic/zgemm_tcopy_2.c
|
||||||
ZGEMMONCOPYOBJ = zgemm_oncopy.o
|
ZGEMMONCOPYOBJ = zgemm_oncopy.o
|
||||||
|
@ -29,6 +35,11 @@ DTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
|
||||||
DTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
|
DTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
|
||||||
DTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
|
DTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
|
||||||
|
|
||||||
|
CTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
|
||||||
|
CTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
|
||||||
|
CTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
|
||||||
|
CTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
|
||||||
|
|
||||||
ZTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
|
ZTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
|
||||||
ZTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
|
ZTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
|
||||||
ZTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
|
ZTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -1065,8 +1065,8 @@
|
||||||
daddiu PREA, PREA, 4 * SIZE # 2mr*1kr*cmpx
|
daddiu PREA, PREA, 4 * SIZE # 2mr*1kr*cmpx
|
||||||
MADD2 c22, c22, a4, b1
|
MADD2 c22, c22, a4, b1
|
||||||
MADD4 c24, c24, a4, b2
|
MADD4 c24, c24, a4, b2
|
||||||
gsLQC1(R12, F1, F0, 0) # R:a1 I:a2 Unroll K=4
|
|
||||||
|
|
||||||
|
gsLQC1(R12, F1, F0, 0) # R:a1 I:a2 Unroll K=4
|
||||||
gsLQC1(R12, F3, F2, 1) # R:a3 I:a4
|
gsLQC1(R12, F3, F2, 1) # R:a3 I:a4
|
||||||
gsLQC1(R13, F5, F4, 0) # R:b1 I:b2
|
gsLQC1(R13, F5, F4, 0) # R:b1 I:b2
|
||||||
|
|
10
param.h
10
param.h
|
@ -1486,25 +1486,25 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#define DGEMM_DEFAULT_UNROLL_M 4
|
#define DGEMM_DEFAULT_UNROLL_M 4
|
||||||
#define DGEMM_DEFAULT_UNROLL_N 4
|
#define DGEMM_DEFAULT_UNROLL_N 4
|
||||||
|
|
||||||
#define CGEMM_DEFAULT_UNROLL_M 1
|
#define CGEMM_DEFAULT_UNROLL_M 2
|
||||||
#define CGEMM_DEFAULT_UNROLL_N 4
|
#define CGEMM_DEFAULT_UNROLL_N 2
|
||||||
|
|
||||||
#define ZGEMM_DEFAULT_UNROLL_M 2
|
#define ZGEMM_DEFAULT_UNROLL_M 2
|
||||||
#define ZGEMM_DEFAULT_UNROLL_N 2
|
#define ZGEMM_DEFAULT_UNROLL_N 2
|
||||||
|
|
||||||
#define SGEMM_DEFAULT_P 64
|
#define SGEMM_DEFAULT_P 64
|
||||||
#define DGEMM_DEFAULT_P 32
|
#define DGEMM_DEFAULT_P 32
|
||||||
#define CGEMM_DEFAULT_P 108
|
#define CGEMM_DEFAULT_P 64
|
||||||
#define ZGEMM_DEFAULT_P 32
|
#define ZGEMM_DEFAULT_P 32
|
||||||
|
|
||||||
#define SGEMM_DEFAULT_Q 116
|
#define SGEMM_DEFAULT_Q 116
|
||||||
#define DGEMM_DEFAULT_Q 116
|
#define DGEMM_DEFAULT_Q 116
|
||||||
#define CGEMM_DEFAULT_Q 144
|
#define CGEMM_DEFAULT_Q 100
|
||||||
#define ZGEMM_DEFAULT_Q 80
|
#define ZGEMM_DEFAULT_Q 80
|
||||||
|
|
||||||
#define SGEMM_DEFAULT_R 1000
|
#define SGEMM_DEFAULT_R 1000
|
||||||
#define DGEMM_DEFAULT_R 1000
|
#define DGEMM_DEFAULT_R 1000
|
||||||
#define CGEMM_DEFAULT_R 2000
|
#define CGEMM_DEFAULT_R 1000
|
||||||
#define ZGEMM_DEFAULT_R 1000
|
#define ZGEMM_DEFAULT_R 1000
|
||||||
|
|
||||||
#define SYMV_P 16
|
#define SYMV_P 16
|
||||||
|
|
Loading…
Reference in New Issue