fix UNROLL_MN and add to targets for SVE
This commit is contained in:
parent
774267fdac
commit
a8f62a347b
|
@ -114,8 +114,8 @@ DSDOTKERNEL = dot.S
|
||||||
DGEMM_BETA = dgemm_beta.S
|
DGEMM_BETA = dgemm_beta.S
|
||||||
SGEMM_BETA = sgemm_beta.S
|
SGEMM_BETA = sgemm_beta.S
|
||||||
|
|
||||||
SGEMMKERNEL = sgemm_kernel_sve_v1x$(SGEMM_UNROLL_N).S
|
SGEMMKERNEL = sgemm_kernel_sve_v2x$(SGEMM_UNROLL_N).S
|
||||||
STRMMKERNEL = strmm_kernel_8x$(SGEMM_UNROLL_N).S
|
STRMMKERNEL = strmm_kernel_sve_v1x$(SGEMM_UNROLL_N).S
|
||||||
|
|
||||||
SGEMMINCOPY = sgemm_ncopy_sve_v1.c
|
SGEMMINCOPY = sgemm_ncopy_sve_v1.c
|
||||||
SGEMMITCOPY = sgemm_tcopy_sve_v1.c
|
SGEMMITCOPY = sgemm_tcopy_sve_v1.c
|
||||||
|
@ -127,6 +127,11 @@ SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX)
|
||||||
SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||||
SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||||
|
|
||||||
|
STRMMUNCOPY_M = trmm_uncopy_sve_v1.c
|
||||||
|
STRMMLNCOPY_M = trmm_lncopy_sve_v1.c
|
||||||
|
STRMMUTCOPY_M = trmm_utcopy_sve_v1.c
|
||||||
|
STRMMLTCOPY_M = trmm_ltcopy_sve_v1.c
|
||||||
|
|
||||||
SSYMMUCOPY_M = symm_ucopy_sve.c
|
SSYMMUCOPY_M = symm_ucopy_sve.c
|
||||||
SSYMMLCOPY_M = symm_lcopy_sve.c
|
SSYMMLCOPY_M = symm_lcopy_sve.c
|
||||||
|
|
||||||
|
|
|
@ -114,35 +114,27 @@ DSDOTKERNEL = dot.S
|
||||||
DGEMM_BETA = dgemm_beta.S
|
DGEMM_BETA = dgemm_beta.S
|
||||||
SGEMM_BETA = sgemm_beta.S
|
SGEMM_BETA = sgemm_beta.S
|
||||||
|
|
||||||
SGEMMKERNEL = sgemm_kernel_$(SGEMM_UNROLL_M)x$(SGEMM_UNROLL_N).S
|
SGEMMKERNEL = sgemm_kernel_sve_v2x$(SGEMM_UNROLL_N).S
|
||||||
STRMMKERNEL = strmm_kernel_$(SGEMM_UNROLL_M)x$(SGEMM_UNROLL_N).S
|
STRMMKERNEL = strmm_kernel_sve_v1x$(SGEMM_UNROLL_N).S
|
||||||
ifneq ($(SGEMM_UNROLL_M), $(SGEMM_UNROLL_N))
|
|
||||||
ifeq ($(SGEMM_UNROLL_M), 16)
|
SGEMMINCOPY = sgemm_ncopy_sve_v1.c
|
||||||
SGEMMITCOPY = sgemm_tcopy_$(SGEMM_UNROLL_M).S
|
SGEMMITCOPY = sgemm_tcopy_sve_v1.c
|
||||||
else
|
SGEMMONCOPY = sgemm_ncopy_$(DGEMM_UNROLL_N).S
|
||||||
SGEMMITCOPY = ../generic/gemm_tcopy_$(SGEMM_UNROLL_M).c
|
SGEMMOTCOPY = sgemm_tcopy_$(DGEMM_UNROLL_N).S
|
||||||
endif
|
|
||||||
ifeq ($(SGEMM_UNROLL_M), 4)
|
|
||||||
SGEMMINCOPY = sgemm_ncopy_$(SGEMM_UNROLL_M).S
|
|
||||||
else
|
|
||||||
SGEMMINCOPY = ../generic/gemm_ncopy_$(SGEMM_UNROLL_M).c
|
|
||||||
endif
|
|
||||||
SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX)
|
SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX)
|
||||||
SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX)
|
SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX)
|
||||||
endif
|
|
||||||
ifeq ($(SGEMM_UNROLL_N), 16)
|
|
||||||
SGEMMOTCOPY = sgemm_tcopy_$(SGEMM_UNROLL_N).S
|
|
||||||
else
|
|
||||||
SGEMMOTCOPY = ../generic/gemm_tcopy_$(SGEMM_UNROLL_N).c
|
|
||||||
endif
|
|
||||||
ifeq ($(SGEMM_UNROLL_N), 4)
|
|
||||||
SGEMMONCOPY = sgemm_ncopy_$(SGEMM_UNROLL_N).S
|
|
||||||
else
|
|
||||||
SGEMMONCOPY = ../generic/gemm_ncopy_$(SGEMM_UNROLL_N).c
|
|
||||||
endif
|
|
||||||
SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||||
SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||||
|
|
||||||
|
STRMMUNCOPY_M = trmm_uncopy_sve_v1.c
|
||||||
|
STRMMLNCOPY_M = trmm_lncopy_sve_v1.c
|
||||||
|
STRMMUTCOPY_M = trmm_utcopy_sve_v1.c
|
||||||
|
STRMMLTCOPY_M = trmm_ltcopy_sve_v1.c
|
||||||
|
|
||||||
|
SSYMMUCOPY_M = symm_ucopy_sve.c
|
||||||
|
SSYMMLCOPY_M = symm_lcopy_sve.c
|
||||||
|
|
||||||
DGEMMKERNEL = dgemm_kernel_sve_v2x$(DGEMM_UNROLL_N).S
|
DGEMMKERNEL = dgemm_kernel_sve_v2x$(DGEMM_UNROLL_N).S
|
||||||
DTRMMKERNEL = dtrmm_kernel_sve_v1x$(DGEMM_UNROLL_N).S
|
DTRMMKERNEL = dtrmm_kernel_sve_v1x$(DGEMM_UNROLL_N).S
|
||||||
|
|
||||||
|
|
8
param.h
8
param.h
|
@ -3296,14 +3296,22 @@ is a big desktop or server with abundant cache rather than a phone or embedded d
|
||||||
|
|
||||||
#elif defined(ARMV8SVE) || defined(A64FX)
|
#elif defined(ARMV8SVE) || defined(A64FX)
|
||||||
|
|
||||||
|
/* When all BLAS3 routines are implemeted with SVE, SGEMM_DEFAULT_UNROLL_M should be "sve_vl".
|
||||||
|
Until then, just keep it different than DGEMM_DEFAULT_UNROLL_N to keep copy routines in both directions seperated. */
|
||||||
#define SGEMM_DEFAULT_UNROLL_M 4
|
#define SGEMM_DEFAULT_UNROLL_M 4
|
||||||
#define SGEMM_DEFAULT_UNROLL_N 8
|
#define SGEMM_DEFAULT_UNROLL_N 8
|
||||||
|
/* SGEMM_UNROLL_MN is calculated as max(SGEMM_UNROLL_M, SGEMM_UNROLL_N)
|
||||||
|
* Since we don't define SGEMM_UNROLL_M correctly we have to manually set this macro.
|
||||||
|
* If SVE size is ever more than 1024, this should be increased also. */
|
||||||
|
#define SGEMM_DEFAULT_UNROLL_MN 32
|
||||||
|
|
||||||
/* When all BLAS3 routines are implemeted with SVE, DGEMM_DEFAULT_UNROLL_M should be "sve_vl".
|
/* When all BLAS3 routines are implemeted with SVE, DGEMM_DEFAULT_UNROLL_M should be "sve_vl".
|
||||||
Until then, just keep it different than DGEMM_DEFAULT_UNROLL_N to keep copy routines in both directions seperated. */
|
Until then, just keep it different than DGEMM_DEFAULT_UNROLL_N to keep copy routines in both directions seperated. */
|
||||||
#define DGEMM_DEFAULT_UNROLL_M 2
|
#define DGEMM_DEFAULT_UNROLL_M 2
|
||||||
#define DGEMM_DEFAULT_UNROLL_N 8
|
#define DGEMM_DEFAULT_UNROLL_N 8
|
||||||
|
|
||||||
|
#define DGEMM_DEFAULT_UNROLL_MN 32
|
||||||
|
|
||||||
#define CGEMM_DEFAULT_UNROLL_M 8
|
#define CGEMM_DEFAULT_UNROLL_M 8
|
||||||
#define CGEMM_DEFAULT_UNROLL_N 4
|
#define CGEMM_DEFAULT_UNROLL_N 4
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue