add v2x8 kernel + fix sve dtrmm

This commit is contained in:
Bine Brank 2021-11-07 20:37:51 +01:00
parent 7093372e32
commit ab7917910d
4 changed files with 1682 additions and 29 deletions

View File

@ -143,34 +143,22 @@ endif
SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX)
SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX)
DGEMMKERNEL = dgemm_kernel_$(DGEMM_UNROLL_M)x$(DGEMM_UNROLL_N).S
DTRMMKERNEL = dtrmm_kernel_$(DGEMM_UNROLL_M)x$(DGEMM_UNROLL_N).S
ifneq ($(DGEMM_UNROLL_M), $(DGEMM_UNROLL_N))
DGEMMKERNEL = dgemm_kernel_sve_v2x$(DGEMM_UNROLL_N).S
DTRMMKERNEL = dtrmm_kernel_sve_v1x$(DGEMM_UNROLL_N).S
ifeq ($(DGEMM_UNROLL_M), 8)
DGEMMINCOPY = dgemm_ncopy_$(DGEMM_UNROLL_M).S
DGEMMITCOPY = dgemm_tcopy_$(DGEMM_UNROLL_M).S
else
DGEMMINCOPY = ../generic/gemm_ncopy_$(DGEMM_UNROLL_M).c
DGEMMITCOPY = ../generic/gemm_tcopy_$(DGEMM_UNROLL_M).c
endif
DGEMMINCOPY = dgemm_ncopy_sve_v1.c
DGEMMITCOPY = dgemm_tcopy_sve_v1.c
DGEMMONCOPY = dgemm_ncopy_$(DGEMM_UNROLL_N).S
DGEMMOTCOPY = dgemm_tcopy_$(DGEMM_UNROLL_N).S
DGEMMINCOPYOBJ = dgemm_incopy$(TSUFFIX).$(SUFFIX)
DGEMMITCOPYOBJ = dgemm_itcopy$(TSUFFIX).$(SUFFIX)
endif
ifeq ($(DGEMM_UNROLL_N), 4)
DGEMMONCOPY = dgemm_ncopy_$(DGEMM_UNROLL_N).S
DGEMMOTCOPY = dgemm_tcopy_$(DGEMM_UNROLL_N).S
else
DGEMMONCOPY = ../generic/gemm_ncopy_$(DGEMM_UNROLL_N).c
DGEMMOTCOPY = ../generic/gemm_tcopy_$(DGEMM_UNROLL_N).c
endif
DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX)
DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX)
CGEMMKERNEL = cgemm_kernel_$(CGEMM_UNROLL_M)x$(CGEMM_UNROLL_N).S
CTRMMKERNEL = ctrmm_kernel_$(CGEMM_UNROLL_M)x$(CGEMM_UNROLL_N).S
ifneq ($(CGEMM_UNROLL_M), $(CGEMM_UNROLL_N))

File diff suppressed because it is too large Load Diff

View File

@ -344,21 +344,21 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
prfm PLDL2KEEP, [pCRow0, #C_PRE_SIZE]
add pCRow1, pCRow0, LDC
fmla z16.d, p1/m, z16.d, alphaZ
fmul z16.d, p1/m, z16.d, alphaZ
st1d z16.d, p1, [pCRow0]
prfm PLDL2KEEP, [pCRow1, #C_PRE_SIZE]
add pCRow2, pCRow1, LDC
fmla z17.d, p1/m, z17.d, alphaZ
fmul z17.d, p1/m, z17.d, alphaZ
st1d z17.d, p1, [pCRow1]
prfm PLDL2KEEP, [pCRow2, #C_PRE_SIZE]
add pCRow1, pCRow2, LDC
fmla z18.d, p1/m, z18.d, alphaZ
fmul z18.d, p1/m, z18.d, alphaZ
st1d z18.d, p1, [pCRow2]
prfm PLDL2KEEP, [pCRow1, #C_PRE_SIZE]
fmla z19.d, p1/m, z19.d, alphaZ
fmul z19.d, p1/m, z19.d, alphaZ
st1d z19.d, p1, [pCRow1]
add pCRow0, pCRow0, lanes, lsl #3 // pC = pC + lanes * 8
@ -392,11 +392,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
prfm PLDL2KEEP, [pCRow0, #C_PRE_SIZE]
add pCRow1, pCRow0, LDC
fmla z16.d, p1/m, z16.d, alphaZ
fmul z16.d, p1/m, z16.d, alphaZ
st1d z16.d, p1, [pCRow0]
prfm PLDL2KEEP, [pCRow1, #C_PRE_SIZE]
fmla z17.d, p1/m, z17.d, alphaZ
fmul z17.d, p1/m, z17.d, alphaZ
st1d z17.d, p1, [pCRow1]
add pCRow0, pCRow0, lanes, lsl #3 // pC = pC + lanes * 8
@ -426,7 +426,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
prfm PLDL2KEEP, [pCRow0, #C_PRE_SIZE]
fmla z16.d, p1/m, z16.d, alphaZ
fmul z16.d, p1/m, z16.d, alphaZ
st1d z16.d, p1, [pCRow0]

View File

@ -3328,8 +3328,8 @@ is a big desktop or server with abundant cache rather than a phone or embedded d
#define SGEMM_DEFAULT_UNROLL_M 16
#define SGEMM_DEFAULT_UNROLL_N 4
#define DGEMM_DEFAULT_UNROLL_M 8
#define DGEMM_DEFAULT_UNROLL_N 4
#define DGEMM_DEFAULT_UNROLL_M 4
#define DGEMM_DEFAULT_UNROLL_N 8
#define CGEMM_DEFAULT_UNROLL_M 8
#define CGEMM_DEFAULT_UNROLL_N 4