add v2x8 kernel + fix sve dtrmm
This commit is contained in:
parent
7093372e32
commit
ab7917910d
|
@ -143,34 +143,22 @@ endif
|
|||
SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||
SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||
|
||||
DGEMMKERNEL = dgemm_kernel_$(DGEMM_UNROLL_M)x$(DGEMM_UNROLL_N).S
|
||||
DTRMMKERNEL = dtrmm_kernel_$(DGEMM_UNROLL_M)x$(DGEMM_UNROLL_N).S
|
||||
|
||||
ifneq ($(DGEMM_UNROLL_M), $(DGEMM_UNROLL_N))
|
||||
DGEMMKERNEL = dgemm_kernel_sve_v2x$(DGEMM_UNROLL_N).S
|
||||
DTRMMKERNEL = dtrmm_kernel_sve_v1x$(DGEMM_UNROLL_N).S
|
||||
|
||||
ifeq ($(DGEMM_UNROLL_M), 8)
|
||||
DGEMMINCOPY = dgemm_ncopy_$(DGEMM_UNROLL_M).S
|
||||
DGEMMITCOPY = dgemm_tcopy_$(DGEMM_UNROLL_M).S
|
||||
else
|
||||
DGEMMINCOPY = ../generic/gemm_ncopy_$(DGEMM_UNROLL_M).c
|
||||
DGEMMITCOPY = ../generic/gemm_tcopy_$(DGEMM_UNROLL_M).c
|
||||
endif
|
||||
DGEMMINCOPY = dgemm_ncopy_sve_v1.c
|
||||
DGEMMITCOPY = dgemm_tcopy_sve_v1.c
|
||||
DGEMMONCOPY = dgemm_ncopy_$(DGEMM_UNROLL_N).S
|
||||
DGEMMOTCOPY = dgemm_tcopy_$(DGEMM_UNROLL_N).S
|
||||
|
||||
DGEMMINCOPYOBJ = dgemm_incopy$(TSUFFIX).$(SUFFIX)
|
||||
DGEMMITCOPYOBJ = dgemm_itcopy$(TSUFFIX).$(SUFFIX)
|
||||
endif
|
||||
|
||||
ifeq ($(DGEMM_UNROLL_N), 4)
|
||||
DGEMMONCOPY = dgemm_ncopy_$(DGEMM_UNROLL_N).S
|
||||
DGEMMOTCOPY = dgemm_tcopy_$(DGEMM_UNROLL_N).S
|
||||
else
|
||||
DGEMMONCOPY = ../generic/gemm_ncopy_$(DGEMM_UNROLL_N).c
|
||||
DGEMMOTCOPY = ../generic/gemm_tcopy_$(DGEMM_UNROLL_N).c
|
||||
endif
|
||||
|
||||
DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||
DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||
|
||||
|
||||
|
||||
CGEMMKERNEL = cgemm_kernel_$(CGEMM_UNROLL_M)x$(CGEMM_UNROLL_N).S
|
||||
CTRMMKERNEL = ctrmm_kernel_$(CGEMM_UNROLL_M)x$(CGEMM_UNROLL_N).S
|
||||
ifneq ($(CGEMM_UNROLL_M), $(CGEMM_UNROLL_N))
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -344,21 +344,21 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
prfm PLDL2KEEP, [pCRow0, #C_PRE_SIZE]
|
||||
|
||||
add pCRow1, pCRow0, LDC
|
||||
fmla z16.d, p1/m, z16.d, alphaZ
|
||||
fmul z16.d, p1/m, z16.d, alphaZ
|
||||
st1d z16.d, p1, [pCRow0]
|
||||
prfm PLDL2KEEP, [pCRow1, #C_PRE_SIZE]
|
||||
|
||||
add pCRow2, pCRow1, LDC
|
||||
fmla z17.d, p1/m, z17.d, alphaZ
|
||||
fmul z17.d, p1/m, z17.d, alphaZ
|
||||
st1d z17.d, p1, [pCRow1]
|
||||
prfm PLDL2KEEP, [pCRow2, #C_PRE_SIZE]
|
||||
|
||||
add pCRow1, pCRow2, LDC
|
||||
fmla z18.d, p1/m, z18.d, alphaZ
|
||||
fmul z18.d, p1/m, z18.d, alphaZ
|
||||
st1d z18.d, p1, [pCRow2]
|
||||
prfm PLDL2KEEP, [pCRow1, #C_PRE_SIZE]
|
||||
|
||||
fmla z19.d, p1/m, z19.d, alphaZ
|
||||
fmul z19.d, p1/m, z19.d, alphaZ
|
||||
st1d z19.d, p1, [pCRow1]
|
||||
|
||||
add pCRow0, pCRow0, lanes, lsl #3 // pC = pC + lanes * 8
|
||||
|
@ -392,11 +392,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
prfm PLDL2KEEP, [pCRow0, #C_PRE_SIZE]
|
||||
|
||||
add pCRow1, pCRow0, LDC
|
||||
fmla z16.d, p1/m, z16.d, alphaZ
|
||||
fmul z16.d, p1/m, z16.d, alphaZ
|
||||
st1d z16.d, p1, [pCRow0]
|
||||
prfm PLDL2KEEP, [pCRow1, #C_PRE_SIZE]
|
||||
|
||||
fmla z17.d, p1/m, z17.d, alphaZ
|
||||
fmul z17.d, p1/m, z17.d, alphaZ
|
||||
st1d z17.d, p1, [pCRow1]
|
||||
|
||||
add pCRow0, pCRow0, lanes, lsl #3 // pC = pC + lanes * 8
|
||||
|
@ -426,7 +426,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
prfm PLDL2KEEP, [pCRow0, #C_PRE_SIZE]
|
||||
|
||||
fmla z16.d, p1/m, z16.d, alphaZ
|
||||
fmul z16.d, p1/m, z16.d, alphaZ
|
||||
st1d z16.d, p1, [pCRow0]
|
||||
|
||||
|
||||
|
|
4
param.h
4
param.h
|
@ -3328,8 +3328,8 @@ is a big desktop or server with abundant cache rather than a phone or embedded d
|
|||
#define SGEMM_DEFAULT_UNROLL_M 16
|
||||
#define SGEMM_DEFAULT_UNROLL_N 4
|
||||
|
||||
#define DGEMM_DEFAULT_UNROLL_M 8
|
||||
#define DGEMM_DEFAULT_UNROLL_N 4
|
||||
#define DGEMM_DEFAULT_UNROLL_M 4
|
||||
#define DGEMM_DEFAULT_UNROLL_N 8
|
||||
|
||||
#define CGEMM_DEFAULT_UNROLL_M 8
|
||||
#define CGEMM_DEFAULT_UNROLL_N 4
|
||||
|
|
Loading…
Reference in New Issue