add v2x8 kernel + fix sve dtrmm
This commit is contained in:
@@ -143,34 +143,22 @@ endif
|
||||
SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||
SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||
|
||||
DGEMMKERNEL = dgemm_kernel_$(DGEMM_UNROLL_M)x$(DGEMM_UNROLL_N).S
|
||||
DTRMMKERNEL = dtrmm_kernel_$(DGEMM_UNROLL_M)x$(DGEMM_UNROLL_N).S
|
||||
|
||||
ifneq ($(DGEMM_UNROLL_M), $(DGEMM_UNROLL_N))
|
||||
DGEMMKERNEL = dgemm_kernel_sve_v2x$(DGEMM_UNROLL_N).S
|
||||
DTRMMKERNEL = dtrmm_kernel_sve_v1x$(DGEMM_UNROLL_N).S
|
||||
|
||||
ifeq ($(DGEMM_UNROLL_M), 8)
|
||||
DGEMMINCOPY = dgemm_ncopy_$(DGEMM_UNROLL_M).S
|
||||
DGEMMITCOPY = dgemm_tcopy_$(DGEMM_UNROLL_M).S
|
||||
else
|
||||
DGEMMINCOPY = ../generic/gemm_ncopy_$(DGEMM_UNROLL_M).c
|
||||
DGEMMITCOPY = ../generic/gemm_tcopy_$(DGEMM_UNROLL_M).c
|
||||
endif
|
||||
DGEMMINCOPY = dgemm_ncopy_sve_v1.c
|
||||
DGEMMITCOPY = dgemm_tcopy_sve_v1.c
|
||||
DGEMMONCOPY = dgemm_ncopy_$(DGEMM_UNROLL_N).S
|
||||
DGEMMOTCOPY = dgemm_tcopy_$(DGEMM_UNROLL_N).S
|
||||
|
||||
DGEMMINCOPYOBJ = dgemm_incopy$(TSUFFIX).$(SUFFIX)
|
||||
DGEMMITCOPYOBJ = dgemm_itcopy$(TSUFFIX).$(SUFFIX)
|
||||
endif
|
||||
|
||||
ifeq ($(DGEMM_UNROLL_N), 4)
|
||||
DGEMMONCOPY = dgemm_ncopy_$(DGEMM_UNROLL_N).S
|
||||
DGEMMOTCOPY = dgemm_tcopy_$(DGEMM_UNROLL_N).S
|
||||
else
|
||||
DGEMMONCOPY = ../generic/gemm_ncopy_$(DGEMM_UNROLL_N).c
|
||||
DGEMMOTCOPY = ../generic/gemm_tcopy_$(DGEMM_UNROLL_N).c
|
||||
endif
|
||||
|
||||
DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||
DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||
|
||||
|
||||
|
||||
CGEMMKERNEL = cgemm_kernel_$(CGEMM_UNROLL_M)x$(CGEMM_UNROLL_N).S
|
||||
CTRMMKERNEL = ctrmm_kernel_$(CGEMM_UNROLL_M)x$(CGEMM_UNROLL_N).S
|
||||
ifneq ($(CGEMM_UNROLL_M), $(CGEMM_UNROLL_N))
|
||||
|
||||
1665
kernel/arm64/dgemm_kernel_sve_v2x8.S
Normal file
1665
kernel/arm64/dgemm_kernel_sve_v2x8.S
Normal file
File diff suppressed because it is too large
Load Diff
@@ -344,21 +344,21 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
prfm PLDL2KEEP, [pCRow0, #C_PRE_SIZE]
|
||||
|
||||
add pCRow1, pCRow0, LDC
|
||||
fmla z16.d, p1/m, z16.d, alphaZ
|
||||
fmul z16.d, p1/m, z16.d, alphaZ
|
||||
st1d z16.d, p1, [pCRow0]
|
||||
prfm PLDL2KEEP, [pCRow1, #C_PRE_SIZE]
|
||||
|
||||
add pCRow2, pCRow1, LDC
|
||||
fmla z17.d, p1/m, z17.d, alphaZ
|
||||
fmul z17.d, p1/m, z17.d, alphaZ
|
||||
st1d z17.d, p1, [pCRow1]
|
||||
prfm PLDL2KEEP, [pCRow2, #C_PRE_SIZE]
|
||||
|
||||
add pCRow1, pCRow2, LDC
|
||||
fmla z18.d, p1/m, z18.d, alphaZ
|
||||
fmul z18.d, p1/m, z18.d, alphaZ
|
||||
st1d z18.d, p1, [pCRow2]
|
||||
prfm PLDL2KEEP, [pCRow1, #C_PRE_SIZE]
|
||||
|
||||
fmla z19.d, p1/m, z19.d, alphaZ
|
||||
fmul z19.d, p1/m, z19.d, alphaZ
|
||||
st1d z19.d, p1, [pCRow1]
|
||||
|
||||
add pCRow0, pCRow0, lanes, lsl #3 // pC = pC + lanes * 8
|
||||
@@ -392,11 +392,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
prfm PLDL2KEEP, [pCRow0, #C_PRE_SIZE]
|
||||
|
||||
add pCRow1, pCRow0, LDC
|
||||
fmla z16.d, p1/m, z16.d, alphaZ
|
||||
fmul z16.d, p1/m, z16.d, alphaZ
|
||||
st1d z16.d, p1, [pCRow0]
|
||||
prfm PLDL2KEEP, [pCRow1, #C_PRE_SIZE]
|
||||
|
||||
fmla z17.d, p1/m, z17.d, alphaZ
|
||||
fmul z17.d, p1/m, z17.d, alphaZ
|
||||
st1d z17.d, p1, [pCRow1]
|
||||
|
||||
add pCRow0, pCRow0, lanes, lsl #3 // pC = pC + lanes * 8
|
||||
@@ -426,7 +426,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
prfm PLDL2KEEP, [pCRow0, #C_PRE_SIZE]
|
||||
|
||||
fmla z16.d, p1/m, z16.d, alphaZ
|
||||
fmul z16.d, p1/m, z16.d, alphaZ
|
||||
st1d z16.d, p1, [pCRow0]
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user