Complete all the plura single precision functions of level3 on Loongson3a, the performance is 2.3GFlops.
This commit is contained in:
parent
68532fa9ec
commit
c8360e3ae5
|
@ -123,10 +123,21 @@ ifndef DTRSMKERNEL_RT
|
|||
DTRSMKERNEL_RT = trsm_kernel_RT.S
|
||||
endif
|
||||
|
||||
ifndef CTRSMKERNEL_LN
|
||||
CTRSMKERNEL_LN = ztrsm_kernel_LT.S
|
||||
endif
|
||||
|
||||
ifndef CTRSMKERNEL_LT
|
||||
CTRSMKERNEL_LT = ztrsm_kernel_LT.S
|
||||
endif
|
||||
|
||||
ifndef CTRSMKERNEL_RN
|
||||
CTRSMKERNEL_RN = ztrsm_kernel_LT.S
|
||||
endif
|
||||
|
||||
ifndef CTRSMKERNEL_RT
|
||||
CTRSMKERNEL_RT = ztrsm_kernel_RT.S
|
||||
endif
|
||||
|
||||
ifndef ZTRSMKERNEL_LN
|
||||
ZTRSMKERNEL_LN = ztrsm_kernel_LT.S
|
||||
|
|
|
@ -1,19 +1,25 @@
|
|||
SAXPYKERNEL=axpy_loongson3a.S
|
||||
DAXPYKERNEL=daxpy_loongson3a_simd.S
|
||||
|
||||
SGEMMKERNEL = sgemm_kernel_loongson3a.S
|
||||
SGEMMKERNEL = sgemm_kernel_loongson3a_4x4.S
|
||||
SGEMMONCOPY = ../generic/gemm_ncopy_4.c
|
||||
SGEMMOTCOPY = ../generic/gemm_tcopy_4.c
|
||||
SGEMMONCOPYOBJ = sgemm_oncopy.o
|
||||
SGEMMOTCOPYOBJ = sgemm_otcopy.o
|
||||
|
||||
DGEMMKERNEL = gemm_kernel_loongson3a.S
|
||||
DGEMMKERNEL = dgemm_kernel_loongson3a_4x4.S
|
||||
DGEMMONCOPY = ../generic/gemm_ncopy_4.c
|
||||
DGEMMOTCOPY = ../generic/gemm_tcopy_4.c
|
||||
DGEMMONCOPYOBJ = dgemm_oncopy.o
|
||||
DGEMMOTCOPYOBJ = dgemm_otcopy.o
|
||||
|
||||
ZGEMMKERNEL = zgemm_kernel_loongson3a.S
|
||||
CGEMMKERNEL = cgemm_kernel_loongson3a_2x2.S
|
||||
CGEMMONCOPY = ../generic/zgemm_ncopy_2.c
|
||||
CGEMMOTCOPY = ../generic/zgemm_tcopy_2.c
|
||||
CGEMMONCOPYOBJ = cgemm_oncopy.o
|
||||
CGEMMOTCOPYOBJ = cgemm_otcopy.o
|
||||
|
||||
ZGEMMKERNEL = zgemm_kernel_loongson3a_2x2.S
|
||||
ZGEMMONCOPY = ../generic/zgemm_ncopy_2.c
|
||||
ZGEMMOTCOPY = ../generic/zgemm_tcopy_2.c
|
||||
ZGEMMONCOPYOBJ = zgemm_oncopy.o
|
||||
|
@ -29,6 +35,11 @@ DTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
|
|||
DTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
|
||||
DTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
|
||||
|
||||
CTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
|
||||
CTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
|
||||
CTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
|
||||
CTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
|
||||
|
||||
ZTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
|
||||
ZTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
|
||||
ZTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -1065,8 +1065,8 @@
|
|||
daddiu PREA, PREA, 4 * SIZE # 2mr*1kr*cmpx
|
||||
MADD2 c22, c22, a4, b1
|
||||
MADD4 c24, c24, a4, b2
|
||||
gsLQC1(R12, F1, F0, 0) # R:a1 I:a2 Unroll K=4
|
||||
|
||||
gsLQC1(R12, F1, F0, 0) # R:a1 I:a2 Unroll K=4
|
||||
gsLQC1(R12, F3, F2, 1) # R:a3 I:a4
|
||||
gsLQC1(R13, F5, F4, 0) # R:b1 I:b2
|
||||
|
10
param.h
10
param.h
|
@ -1486,25 +1486,25 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#define DGEMM_DEFAULT_UNROLL_M 4
|
||||
#define DGEMM_DEFAULT_UNROLL_N 4
|
||||
|
||||
#define CGEMM_DEFAULT_UNROLL_M 1
|
||||
#define CGEMM_DEFAULT_UNROLL_N 4
|
||||
#define CGEMM_DEFAULT_UNROLL_M 2
|
||||
#define CGEMM_DEFAULT_UNROLL_N 2
|
||||
|
||||
#define ZGEMM_DEFAULT_UNROLL_M 2
|
||||
#define ZGEMM_DEFAULT_UNROLL_N 2
|
||||
|
||||
#define SGEMM_DEFAULT_P 64
|
||||
#define DGEMM_DEFAULT_P 32
|
||||
#define CGEMM_DEFAULT_P 108
|
||||
#define CGEMM_DEFAULT_P 64
|
||||
#define ZGEMM_DEFAULT_P 32
|
||||
|
||||
#define SGEMM_DEFAULT_Q 116
|
||||
#define DGEMM_DEFAULT_Q 116
|
||||
#define CGEMM_DEFAULT_Q 144
|
||||
#define CGEMM_DEFAULT_Q 100
|
||||
#define ZGEMM_DEFAULT_Q 80
|
||||
|
||||
#define SGEMM_DEFAULT_R 1000
|
||||
#define DGEMM_DEFAULT_R 1000
|
||||
#define CGEMM_DEFAULT_R 2000
|
||||
#define CGEMM_DEFAULT_R 1000
|
||||
#define ZGEMM_DEFAULT_R 1000
|
||||
|
||||
#define SYMV_P 16
|
||||
|
|
Loading…
Reference in New Issue