fix UNROLL_MN and add to targets for SVE
This commit is contained in:
parent
774267fdac
commit
a8f62a347b
|
@ -114,8 +114,8 @@ DSDOTKERNEL = dot.S
|
|||
DGEMM_BETA = dgemm_beta.S
|
||||
SGEMM_BETA = sgemm_beta.S
|
||||
|
||||
SGEMMKERNEL = sgemm_kernel_sve_v1x$(SGEMM_UNROLL_N).S
|
||||
STRMMKERNEL = strmm_kernel_8x$(SGEMM_UNROLL_N).S
|
||||
SGEMMKERNEL = sgemm_kernel_sve_v2x$(SGEMM_UNROLL_N).S
|
||||
STRMMKERNEL = strmm_kernel_sve_v1x$(SGEMM_UNROLL_N).S
|
||||
|
||||
SGEMMINCOPY = sgemm_ncopy_sve_v1.c
|
||||
SGEMMITCOPY = sgemm_tcopy_sve_v1.c
|
||||
|
@ -127,6 +127,11 @@ SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX)
|
|||
SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||
SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||
|
||||
STRMMUNCOPY_M = trmm_uncopy_sve_v1.c
|
||||
STRMMLNCOPY_M = trmm_lncopy_sve_v1.c
|
||||
STRMMUTCOPY_M = trmm_utcopy_sve_v1.c
|
||||
STRMMLTCOPY_M = trmm_ltcopy_sve_v1.c
|
||||
|
||||
SSYMMUCOPY_M = symm_ucopy_sve.c
|
||||
SSYMMLCOPY_M = symm_lcopy_sve.c
|
||||
|
||||
|
|
|
@ -114,35 +114,27 @@ DSDOTKERNEL = dot.S
|
|||
DGEMM_BETA = dgemm_beta.S
|
||||
SGEMM_BETA = sgemm_beta.S
|
||||
|
||||
SGEMMKERNEL = sgemm_kernel_$(SGEMM_UNROLL_M)x$(SGEMM_UNROLL_N).S
|
||||
STRMMKERNEL = strmm_kernel_$(SGEMM_UNROLL_M)x$(SGEMM_UNROLL_N).S
|
||||
ifneq ($(SGEMM_UNROLL_M), $(SGEMM_UNROLL_N))
|
||||
ifeq ($(SGEMM_UNROLL_M), 16)
|
||||
SGEMMITCOPY = sgemm_tcopy_$(SGEMM_UNROLL_M).S
|
||||
else
|
||||
SGEMMITCOPY = ../generic/gemm_tcopy_$(SGEMM_UNROLL_M).c
|
||||
endif
|
||||
ifeq ($(SGEMM_UNROLL_M), 4)
|
||||
SGEMMINCOPY = sgemm_ncopy_$(SGEMM_UNROLL_M).S
|
||||
else
|
||||
SGEMMINCOPY = ../generic/gemm_ncopy_$(SGEMM_UNROLL_M).c
|
||||
endif
|
||||
SGEMMKERNEL = sgemm_kernel_sve_v2x$(SGEMM_UNROLL_N).S
|
||||
STRMMKERNEL = strmm_kernel_sve_v1x$(SGEMM_UNROLL_N).S
|
||||
|
||||
SGEMMINCOPY = sgemm_ncopy_sve_v1.c
|
||||
SGEMMITCOPY = sgemm_tcopy_sve_v1.c
|
||||
SGEMMONCOPY = sgemm_ncopy_$(DGEMM_UNROLL_N).S
|
||||
SGEMMOTCOPY = sgemm_tcopy_$(DGEMM_UNROLL_N).S
|
||||
|
||||
SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX)
|
||||
SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX)
|
||||
endif
|
||||
ifeq ($(SGEMM_UNROLL_N), 16)
|
||||
SGEMMOTCOPY = sgemm_tcopy_$(SGEMM_UNROLL_N).S
|
||||
else
|
||||
SGEMMOTCOPY = ../generic/gemm_tcopy_$(SGEMM_UNROLL_N).c
|
||||
endif
|
||||
ifeq ($(SGEMM_UNROLL_N), 4)
|
||||
SGEMMONCOPY = sgemm_ncopy_$(SGEMM_UNROLL_N).S
|
||||
else
|
||||
SGEMMONCOPY = ../generic/gemm_ncopy_$(SGEMM_UNROLL_N).c
|
||||
endif
|
||||
SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||
SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||
|
||||
STRMMUNCOPY_M = trmm_uncopy_sve_v1.c
|
||||
STRMMLNCOPY_M = trmm_lncopy_sve_v1.c
|
||||
STRMMUTCOPY_M = trmm_utcopy_sve_v1.c
|
||||
STRMMLTCOPY_M = trmm_ltcopy_sve_v1.c
|
||||
|
||||
SSYMMUCOPY_M = symm_ucopy_sve.c
|
||||
SSYMMLCOPY_M = symm_lcopy_sve.c
|
||||
|
||||
DGEMMKERNEL = dgemm_kernel_sve_v2x$(DGEMM_UNROLL_N).S
|
||||
DTRMMKERNEL = dtrmm_kernel_sve_v1x$(DGEMM_UNROLL_N).S
|
||||
|
||||
|
|
8
param.h
8
param.h
|
@ -3296,14 +3296,22 @@ is a big desktop or server with abundant cache rather than a phone or embedded d
|
|||
|
||||
#elif defined(ARMV8SVE) || defined(A64FX)
|
||||
|
||||
/* When all BLAS3 routines are implemeted with SVE, SGEMM_DEFAULT_UNROLL_M should be "sve_vl".
|
||||
Until then, just keep it different than DGEMM_DEFAULT_UNROLL_N to keep copy routines in both directions seperated. */
|
||||
#define SGEMM_DEFAULT_UNROLL_M 4
|
||||
#define SGEMM_DEFAULT_UNROLL_N 8
|
||||
/* SGEMM_UNROLL_MN is calculated as max(SGEMM_UNROLL_M, SGEMM_UNROLL_N)
|
||||
* Since we don't define SGEMM_UNROLL_M correctly we have to manually set this macro.
|
||||
* If SVE size is ever more than 1024, this should be increased also. */
|
||||
#define SGEMM_DEFAULT_UNROLL_MN 32
|
||||
|
||||
/* When all BLAS3 routines are implemeted with SVE, DGEMM_DEFAULT_UNROLL_M should be "sve_vl".
|
||||
Until then, just keep it different than DGEMM_DEFAULT_UNROLL_N to keep copy routines in both directions seperated. */
|
||||
#define DGEMM_DEFAULT_UNROLL_M 2
|
||||
#define DGEMM_DEFAULT_UNROLL_N 8
|
||||
|
||||
#define DGEMM_DEFAULT_UNROLL_MN 32
|
||||
|
||||
#define CGEMM_DEFAULT_UNROLL_M 8
|
||||
#define CGEMM_DEFAULT_UNROLL_N 4
|
||||
|
||||
|
|
Loading…
Reference in New Issue