This commit is contained in:
Martin Kroeker 2018-07-15 14:40:16 +00:00 committed by GitHub
commit 5e937b6022
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 327 additions and 10 deletions

168
KERNEL Normal file
View File

@ -0,0 +1,168 @@
CAXPYKERNEL = ../mips/zaxpy.c
ZAXPYKERNEL = ../mips/zaxpy.c
SROTKERNEL = ../mips/rot.c
DROTKERNEL = ../mips/rot.c
CROTKERNEL = ../mips/zrot.c
ZROTKERNEL = ../mips/zrot.c
CSWAPKERNEL = ../mips/zswap.c
ZSWAPKERNEL = ../mips/zwap.c
ifndef SNRM2KERNEL
SNRM2KERNEL = snrm2.S
endif
ifndef DNRM2KERNEL
DNRM2KERNEL = dnrm2.S
endif
ifndef CNRM2KERNEL
CNRM2KERNEL = cnrm2.S
endif
ifndef ZNRM2KERNEL
ZNRM2KERNEL = znrm2.S
endif
ifndef SCABS_KERNEL
SCABS_KERNEL = ../generic/cabs.c
endif
ifndef DCABS_KERNEL
DCABS_KERNEL = ../generic/cabs.c
endif
ifndef QCABS_KERNEL
QCABS_KERNEL = ../generic/cabs.c
endif
ifndef LSAME_KERNEL
LSAME_KERNEL = ../generic/lsame.c
endif
ifndef SGEMMKERNEL
SGEMMKERNEL = gemm_kernel.S
SGEMMINCOPY = ../generic/gemm_ncopy_2.c
SGEMMITCOPY = ../generic/gemm_tcopy_2.c
SGEMMONCOPY = ../generic/gemm_ncopy_8.c
SGEMMOTCOPY = ../generic/gemm_tcopy_8.c
SGEMMINCOPYOBJ = sgemm_incopy.o
SGEMMITCOPYOBJ = sgemm_itcopy.o
SGEMMONCOPYOBJ = sgemm_oncopy.o
SGEMMOTCOPYOBJ = sgemm_otcopy.o
endif
ifndef DGEMMKERNEL
DGEMMKERNEL = gemm_kernel.S
DGEMMINCOPY = ../generic/gemm_ncopy_2.c
DGEMMITCOPY = ../generic/gemm_tcopy_2.c
DGEMMONCOPY = ../generic/gemm_ncopy_8.c
DGEMMOTCOPY = ../generic/gemm_tcopy_8.c
DGEMMINCOPYOBJ = dgemm_incopy.o
DGEMMITCOPYOBJ = dgemm_itcopy.o
DGEMMONCOPYOBJ = dgemm_oncopy.o
DGEMMOTCOPYOBJ = dgemm_otcopy.o
endif
ifndef CGEMMKERNEL
CGEMMKERNEL = zgemm_kernel.S
CGEMMINCOPY = ../generic/zgemm_ncopy_1.c
CGEMMITCOPY = ../generic/zgemm_tcopy_1.c
CGEMMONCOPY = ../generic/zgemm_ncopy_4.c
CGEMMOTCOPY = ../generic/zgemm_tcopy_4.c
CGEMMINCOPYOBJ = cgemm_incopy.o
CGEMMITCOPYOBJ = cgemm_itcopy.o
CGEMMONCOPYOBJ = cgemm_oncopy.o
CGEMMOTCOPYOBJ = cgemm_otcopy.o
endif
ifndef ZGEMMKERNEL
ZGEMMKERNEL = zgemm_kernel.S
ZGEMMINCOPY = ../generic/zgemm_ncopy_1.c
ZGEMMITCOPY = ../generic/zgemm_tcopy_1.c
ZGEMMONCOPY = ../generic/zgemm_ncopy_4.c
ZGEMMOTCOPY = ../generic/zgemm_tcopy_4.c
ZGEMMINCOPYOBJ = zgemm_incopy.o
ZGEMMITCOPYOBJ = zgemm_itcopy.o
ZGEMMONCOPYOBJ = zgemm_oncopy.o
ZGEMMOTCOPYOBJ = zgemm_otcopy.o
endif
ifndef SGEMM_BETA
SGEMM_BETA = ../generic/gemm_beta.c
endif
ifndef DGEMM_BETA
DGEMM_BETA = ../generic/gemm_beta.c
endif
ifndef CGEMM_BETA
CGEMM_BETA = ../generic/zgemm_beta.c
endif
ifndef ZGEMM_BETA
ZGEMM_BETA = ../generic/zgemm_beta.c
endif
ifndef STRSMKERNEL_LN
STRSMKERNEL_LN = trsm_kernel_LN.S
endif
ifndef STRSMKERNEL_LT
STRSMKERNEL_LT = trsm_kernel_LT.S
endif
ifndef STRSMKERNEL_RN
STRSMKERNEL_RN = trsm_kernel_LT.S
endif
ifndef STRSMKERNEL_RT
STRSMKERNEL_RT = trsm_kernel_RT.S
endif
ifndef DTRSMKERNEL_LN
DTRSMKERNEL_LN = trsm_kernel_LN.S
endif
ifndef DTRSMKERNEL_LT
DTRSMKERNEL_LT = trsm_kernel_LT.S
endif
ifndef DTRSMKERNEL_RN
DTRSMKERNEL_RN = trsm_kernel_LT.S
endif
ifndef DTRSMKERNEL_RT
DTRSMKERNEL_RT = trsm_kernel_RT.S
endif
ifndef CTRSMKERNEL_LN
CTRSMKERNEL_LN = ztrsm_kernel_LT.S
endif
ifndef CTRSMKERNEL_LT
CTRSMKERNEL_LT = ztrsm_kernel_LT.S
endif
ifndef CTRSMKERNEL_RN
CTRSMKERNEL_RN = ztrsm_kernel_LT.S
endif
ifndef CTRSMKERNEL_RT
CTRSMKERNEL_RT = ztrsm_kernel_RT.S
endif
ifndef ZTRSMKERNEL_LN
ZTRSMKERNEL_LN = ztrsm_kernel_LT.S
endif
ifndef ZTRSMKERNEL_LT
ZTRSMKERNEL_LT = ztrsm_kernel_LT.S
endif
ifndef ZTRSMKERNEL_RN
ZTRSMKERNEL_RN = ztrsm_kernel_LT.S
endif
ifndef ZTRSMKERNEL_RT
ZTRSMKERNEL_RT = ztrsm_kernel_RT.S
endif
CGEMM3MKERNEL = zgemm3m_kernel.S
ZGEMM3MKERNEL = zgemm3m_kernel.S

View File

@ -103,35 +103,83 @@
.align 3 .align 3
.L12: .L12:
#ifdef DSDOT
cvt.d.s a1, a1
cvt.d.s b1, b1
madd.d s1, s1, a1, b1
#else
MADD s1, s1, a1, b1 MADD s1, s1, a1, b1
#endif
LD a1, 4 * SIZE(X) LD a1, 4 * SIZE(X)
LD b1, 4 * SIZE(Y) LD b1, 4 * SIZE(Y)
#ifdef DSDOT
cvt.d.s a2, a2
cvt.d.s b2, b2
madd.d s2, s2, a2, b2
#else
MADD s2, s2, a2, b2 MADD s2, s2, a2, b2
#endif
LD a2, 5 * SIZE(X) LD a2, 5 * SIZE(X)
LD b2, 5 * SIZE(Y) LD b2, 5 * SIZE(Y)
#ifdef DSDOT
cvt.d.s a3, a3
cvt.d.s b3, b3
madd.d s1, s1, a3, b3
#else
MADD s1, s1, a3, b3 MADD s1, s1, a3, b3
#endif
LD a3, 6 * SIZE(X) LD a3, 6 * SIZE(X)
LD b3, 6 * SIZE(Y) LD b3, 6 * SIZE(Y)
#ifdef DSDOT
cvt.d.s a4, a4
cvt.d.s b4, b4
madd.d s2, s2, a4, b4
#else
MADD s2, s2, a4, b4 MADD s2, s2, a4, b4
#endif
LD a4, 7 * SIZE(X) LD a4, 7 * SIZE(X)
LD b4, 7 * SIZE(Y) LD b4, 7 * SIZE(Y)
#ifdef DSDOT
cvt.d.s a1, a1
cvt.d.s b1, b1
madd.d s1, s1, a1, b1
#else
MADD s1, s1, a1, b1 MADD s1, s1, a1, b1
#endif
LD a1, 8 * SIZE(X) LD a1, 8 * SIZE(X)
LD b1, 8 * SIZE(Y) LD b1, 8 * SIZE(Y)
#ifdef DSDOT
cvt.d.s a2, a2
cvt.d.s b2, b2
madd.d s2, s2, a2, b2
#else
MADD s2, s2, a2, b2 MADD s2, s2, a2, b2
#endif
LD a2, 9 * SIZE(X) LD a2, 9 * SIZE(X)
LD b2, 9 * SIZE(Y) LD b2, 9 * SIZE(Y)
#ifdef DSDOT
cvt.d.s a3, a3
cvt.d.s b3, b3
madd.d s1, s1, a3, b3
#else
MADD s1, s1, a3, b3 MADD s1, s1, a3, b3
#endif
LD a3, 10 * SIZE(X) LD a3, 10 * SIZE(X)
LD b3, 10 * SIZE(Y) LD b3, 10 * SIZE(Y)
#ifdef DSDOT
cvt.d.s a4, a4
cvt.d.s b4, b4
madd.d s2, s2, a4, b4
#else
MADD s2, s2, a4, b4 MADD s2, s2, a4, b4
#endif
LD a4, 11 * SIZE(X) LD a4, 11 * SIZE(X)
LD b4, 11 * SIZE(Y) LD b4, 11 * SIZE(Y)
@ -143,29 +191,77 @@
.align 3 .align 3
.L13: .L13:
#ifdef DSDOT
cvt.d.s a1, a1
cvt.d.s b1, b1
madd.d s1, s1, a1, b1
#else
MADD s1, s1, a1, b1 MADD s1, s1, a1, b1
#endif
LD a1, 4 * SIZE(X) LD a1, 4 * SIZE(X)
LD b1, 4 * SIZE(Y) LD b1, 4 * SIZE(Y)
#ifdef DSDOT
cvt.d.s a2, a2
cvt.d.s b2, b2
madd.d s2, s2, a2, b2
#else
MADD s2, s2, a2, b2 MADD s2, s2, a2, b2
#endif
LD a2, 5 * SIZE(X) LD a2, 5 * SIZE(X)
LD b2, 5 * SIZE(Y) LD b2, 5 * SIZE(Y)
#ifdef DSDOT
cvt.d.s a3, a3
cvt.d.s b3, b3
madd.d s1, s1, a3, b3
#else
MADD s1, s1, a3, b3 MADD s1, s1, a3, b3
#endif
LD a3, 6 * SIZE(X) LD a3, 6 * SIZE(X)
LD b3, 6 * SIZE(Y) LD b3, 6 * SIZE(Y)
#ifdef DSDOT
cvt.d.s a4, a4
cvt.d.s b4, b4
madd.d s2, s2, a4, b4
#else
MADD s2, s2, a4, b4 MADD s2, s2, a4, b4
#endif
LD a4, 7 * SIZE(X) LD a4, 7 * SIZE(X)
LD b4, 7 * SIZE(Y) LD b4, 7 * SIZE(Y)
#ifdef DSDOT
cvt.d.s a1, a1
cvt.d.s b1, b1
madd.d s1, s1, a1, b1
#else
MADD s1, s1, a1, b1 MADD s1, s1, a1, b1
#endif
daddiu X, X, 8 * SIZE daddiu X, X, 8 * SIZE
#ifdef DSDOT
cvt.d.s a2, a2
cvt.d.s b2, b2
madd.d s2, s2, a2, b2
#else
MADD s2, s2, a2, b2 MADD s2, s2, a2, b2
#endif
daddiu Y, Y, 8 * SIZE daddiu Y, Y, 8 * SIZE
#ifdef DSDOT
cvt.d.s a3, a3
cvt.d.s b3, b3
madd.d s1, s1, a3, b3
#else
MADD s1, s1, a3, b3 MADD s1, s1, a3, b3
#endif
#ifdef DSDOT
cvt.d.s a4, a4
cvt.d.s b4, b4
madd.d s2, s2, a4, b4
#else
MADD s2, s2, a4, b4 MADD s2, s2, a4, b4
#endif
.align 3 .align 3
.L15: .L15:
@ -179,8 +275,13 @@
LD a1, 0 * SIZE(X) LD a1, 0 * SIZE(X)
LD b1, 0 * SIZE(Y) LD b1, 0 * SIZE(Y)
#ifdef DSDOT
cvt.d.s a1, a1
cvt.d.s b1, b1
madd.d s1, s1, a1, b1
#else
MADD s1, s1, a1, b1 MADD s1, s1, a1, b1
#endif
daddiu I, I, -1 daddiu I, I, -1
daddiu X, X, SIZE daddiu X, X, SIZE
@ -225,50 +326,85 @@
LD b1, 0 * SIZE(Y) LD b1, 0 * SIZE(Y)
dadd Y, Y, INCY dadd Y, Y, INCY
#ifdef DSDOT
cvt.d.s a1, a1
cvt.d.s b1, b1
madd.d s1, s1, a1, b1
#else
MADD s1, s1, a1, b1 MADD s1, s1, a1, b1
#endif
LD a1, 0 * SIZE(X) LD a1, 0 * SIZE(X)
dadd X, X, INCX dadd X, X, INCX
LD b1, 0 * SIZE(Y) LD b1, 0 * SIZE(Y)
dadd Y, Y, INCY dadd Y, Y, INCY
#ifdef DSDOT
cvt.d.s a1, a1
cvt.d.s b1, b1
madd.d s2, s2, a1, b1
#else
MADD s2, s2, a1, b1 MADD s2, s2, a1, b1
#endif
LD a1, 0 * SIZE(X) LD a1, 0 * SIZE(X)
dadd X, X, INCX dadd X, X, INCX
LD b1, 0 * SIZE(Y) LD b1, 0 * SIZE(Y)
dadd Y, Y, INCY dadd Y, Y, INCY
#ifdef DSDOT
cvt.d.s a1, a1
cvt.d.s b1, b1
madd.d s1, s1, a1, b1
#else
MADD s1, s1, a1, b1 MADD s1, s1, a1, b1
#endif
LD a1, 0 * SIZE(X) LD a1, 0 * SIZE(X)
dadd X, X, INCX dadd X, X, INCX
LD b1, 0 * SIZE(Y) LD b1, 0 * SIZE(Y)
dadd Y, Y, INCY dadd Y, Y, INCY
#ifdef DSDOT
cvt.d.s a1, a1
cvt.d.s b1, b1
madd.d s2, s2, a1, b1
#else
MADD s2, s2, a1, b1 MADD s2, s2, a1, b1
#endif
LD a1, 0 * SIZE(X) LD a1, 0 * SIZE(X)
dadd X, X, INCX dadd X, X, INCX
LD b1, 0 * SIZE(Y) LD b1, 0 * SIZE(Y)
dadd Y, Y, INCY dadd Y, Y, INCY
#ifdef DSDOT
cvt.d.s a1, a1
cvt.d.s b1, b1
madd.d s1, s1, a1, b1
#else
MADD s1, s1, a1, b1 MADD s1, s1, a1, b1
#endif
LD a1, 0 * SIZE(X) LD a1, 0 * SIZE(X)
dadd X, X, INCX dadd X, X, INCX
LD b1, 0 * SIZE(Y) LD b1, 0 * SIZE(Y)
dadd Y, Y, INCY dadd Y, Y, INCY
#ifdef DSDOT
cvt.d.s a1, a1
cvt.d.s b1, b1
madd.d s2, s2, a1, b1
#else
MADD s2, s2, a1, b1 MADD s2, s2, a1, b1
#endif
LD a1, 0 * SIZE(X) LD a1, 0 * SIZE(X)
dadd X, X, INCX dadd X, X, INCX
LD b1, 0 * SIZE(Y) LD b1, 0 * SIZE(Y)
dadd Y, Y, INCY dadd Y, Y, INCY
#ifdef DSDOT
cvt.d.s a1, a1
cvt.d.s b1, b1
madd.d s1, s1, a1, b1
#else
MADD s1, s1, a1, b1 MADD s1, s1, a1, b1
#endif
LD a1, 0 * SIZE(X) LD a1, 0 * SIZE(X)
dadd X, X, INCX dadd X, X, INCX
LD b1, 0 * SIZE(Y) LD b1, 0 * SIZE(Y)
@ -277,7 +413,13 @@
daddiu I, I, -1 daddiu I, I, -1
bgtz I, .L23 bgtz I, .L23
#ifdef DSDOT
cvt.d.s a1, a1
cvt.d.s b1, b1
madd.d s2, s2, a1, b1
#else
MADD s2, s2, a1, b1 MADD s2, s2, a1, b1
#endif
.align 3 .align 3
.L25: .L25:
@ -296,13 +438,20 @@
daddiu I, I, -1 daddiu I, I, -1
bgtz I, .L26 bgtz I, .L26
#ifdef DSDOT
cvt.d.s a1, a1
cvt.d.s b1, b1
madd.d s1, s1, a1, b1
#else
MADD s1, s1, a1, b1 MADD s1, s1, a1, b1
#endif
.align 3 .align 3
.L999: .L999:
ADD s1, s1, s2
#ifdef DSDOT #ifdef DSDOT
cvt.d.s s1, s1 add.d s1, s1, s2
#else
ADD s1, s1, s2
#endif #endif
j $31 j $31
NOP NOP