configure SVE Makefile
This commit is contained in:
parent
b58d4f31ab
commit
9388f05a3c
|
@ -1207,7 +1207,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
"-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 " \
|
||||
"-DL2_SIZE=262144 -DL2_LINESIZE=64 " \
|
||||
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=32 " \
|
||||
"-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8"
|
||||
"-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DHAVE_SVE -DARMV8"
|
||||
#define LIBNAME "armv8sve"
|
||||
#define CORENAME "ARMV8SVE"
|
||||
#endif
|
||||
|
@ -1450,7 +1450,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
"-DL2_SIZE=8388608 -DL2_LINESIZE=256 -DL2_ASSOCIATIVE=8 " \
|
||||
"-DL3_SIZE=0 -DL3_LINESIZE=0 -DL3_ASSOCIATIVE=0 " \
|
||||
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \
|
||||
"-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8"
|
||||
"-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DHAVE_SVE -DARMV8"
|
||||
#define LIBNAME "a64fx"
|
||||
#define CORENAME "A64FX"
|
||||
#else
|
||||
|
|
|
@ -77,6 +77,14 @@ ifeq ($(CORE), Z14)
|
|||
USE_TRMM = 1
|
||||
endif
|
||||
|
||||
ifeq ($(CORE), A64FX)
|
||||
HAVE_SVE = 1
|
||||
endif
|
||||
|
||||
ifeq ($(CORE), ARMV8SVE)
|
||||
HAVE_SVE = 1
|
||||
endif
|
||||
|
||||
ifdef USE_DIRECT_SGEMM
|
||||
ifndef SGEMMDIRECTKERNEL
|
||||
SGEMMDIRECTKERNEL = sgemm_direct_skylakex.c
|
||||
|
@ -1531,6 +1539,31 @@ $(KDIR)strmm_oltucopy$(TSUFFIX).$(SUFFIX) : generic/trmm_ltcopy_$(SGEMM_UNROLL_N
|
|||
$(KDIR)strmm_oltncopy$(TSUFFIX).$(SUFFIX) : generic/trmm_ltcopy_$(SGEMM_UNROLL_N).c
|
||||
$(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -UCOMPLEX -DOUTER -DLOWER -UUNIT $< -o $@
|
||||
|
||||
ifdef HAVE_SVE
|
||||
$(KDIR)dtrmm_iunucopy$(TSUFFIX).$(SUFFIX) : arm64/trmm_uncopy_sve_v1.c
|
||||
$(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -UCOMPLEX -UOUTER -ULOWER -DUNIT $< -o $@
|
||||
|
||||
$(KDIR)dtrmm_iunncopy$(TSUFFIX).$(SUFFIX) : arm64/trmm_uncopy_sve_v1.c
|
||||
$(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -UCOMPLEX -UOUTER -ULOWER -UUNIT $< -o $@
|
||||
|
||||
$(KDIR)dtrmm_ilnucopy$(TSUFFIX).$(SUFFIX) : arm64/trmm_lncopy_sve_v1.c
|
||||
$(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -UCOMPLEX -UOUTER -DLOWER -DUNIT $< -o $@
|
||||
|
||||
$(KDIR)dtrmm_ilnncopy$(TSUFFIX).$(SUFFIX) : arm64/trmm_lncopy_sve_v1.c
|
||||
$(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -UCOMPLEX -UOUTER -DLOWER -UUNIT $< -o $@
|
||||
|
||||
$(KDIR)dtrmm_iutucopy$(TSUFFIX).$(SUFFIX) : arm64/trmm_utcopy_sve_v1.c
|
||||
$(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -UCOMPLEX -UOUTER -ULOWER -DUNIT $< -o $@
|
||||
|
||||
$(KDIR)dtrmm_iutncopy$(TSUFFIX).$(SUFFIX) : arm64/trmm_utcopy_sve_v1.c
|
||||
$(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -UCOMPLEX -UOUTER -ULOWER -UUNIT $< -o $@
|
||||
|
||||
$(KDIR)dtrmm_iltucopy$(TSUFFIX).$(SUFFIX) : arm64/trmm_ltcopy_sve_v1.c
|
||||
$(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -UCOMPLEX -UOUTER -DLOWER -DUNIT $< -o $@
|
||||
|
||||
$(KDIR)dtrmm_iltncopy$(TSUFFIX).$(SUFFIX) : arm64/trmm_ltcopy_sve_v1.c
|
||||
$(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -UCOMPLEX -UOUTER -DLOWER -UUNIT $< -o $@
|
||||
else
|
||||
$(KDIR)dtrmm_iunucopy$(TSUFFIX).$(SUFFIX) : generic/trmm_uncopy_$(DGEMM_UNROLL_M).c
|
||||
$(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -UCOMPLEX -UOUTER -ULOWER -DUNIT $< -o $@
|
||||
|
||||
|
@ -1554,6 +1587,7 @@ $(KDIR)dtrmm_iltucopy$(TSUFFIX).$(SUFFIX) : generic/trmm_ltcopy_$(DGEMM_UNROLL_M
|
|||
|
||||
$(KDIR)dtrmm_iltncopy$(TSUFFIX).$(SUFFIX) : generic/trmm_ltcopy_$(DGEMM_UNROLL_M).c
|
||||
$(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -UCOMPLEX -UOUTER -DLOWER -UUNIT $< -o $@
|
||||
endif
|
||||
|
||||
$(KDIR)dtrmm_ounucopy$(TSUFFIX).$(SUFFIX) : generic/trmm_uncopy_$(DGEMM_UNROLL_N).c
|
||||
$(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -UCOMPLEX -DOUTER -ULOWER -DUNIT $< -o $@
|
||||
|
@ -1789,11 +1823,19 @@ $(KDIR)dsymm_outcopy$(TSUFFIX).$(SUFFIX) : generic/symm_ucopy_$(DGEMM_UNROLL_N).
|
|||
$(KDIR)dsymm_oltcopy$(TSUFFIX).$(SUFFIX) : generic/symm_lcopy_$(DGEMM_UNROLL_N).c
|
||||
$(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -UCOMPLEX -DOUTER -DLOWER $< -o $@
|
||||
|
||||
ifdef HAVE_SVE
|
||||
$(KDIR)dsymm_iutcopy$(TSUFFIX).$(SUFFIX) : arm64/symm_ucopy_sve.c
|
||||
$(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -UCOMPLEX -UOUTER -ULOWER $< -o $@
|
||||
|
||||
$(KDIR)dsymm_iltcopy$(TSUFFIX).$(SUFFIX) : arm64/symm_lcopy_sve.c
|
||||
$(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -UCOMPLEX -UOUTER -DLOWER $< -o $@
|
||||
else
|
||||
$(KDIR)dsymm_iutcopy$(TSUFFIX).$(SUFFIX) : generic/symm_ucopy_$(DGEMM_UNROLL_M).c
|
||||
$(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -UCOMPLEX -UOUTER -ULOWER $< -o $@
|
||||
|
||||
$(KDIR)dsymm_iltcopy$(TSUFFIX).$(SUFFIX) : generic/symm_lcopy_$(DGEMM_UNROLL_M).c
|
||||
$(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -UCOMPLEX -UOUTER -DLOWER $< -o $@
|
||||
endif
|
||||
|
||||
$(KDIR)qsymm_outcopy$(TSUFFIX).$(SUFFIX) : generic/symm_ucopy_$(QGEMM_UNROLL_N).c
|
||||
$(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DXDOUBLE -UCOMPLEX -DOUTER -ULOWER $< -o $@
|
||||
|
|
6
param.h
6
param.h
|
@ -3294,12 +3294,14 @@ is a big desktop or server with abundant cache rather than a phone or embedded d
|
|||
#define CGEMM_DEFAULT_R 4096
|
||||
#define ZGEMM_DEFAULT_R 4096
|
||||
|
||||
#elif defined(ARMV8SVE)
|
||||
#elif defined(ARMV8SVE) || defined(A64FX)
|
||||
|
||||
#define SGEMM_DEFAULT_UNROLL_M 16
|
||||
#define SGEMM_DEFAULT_UNROLL_N 4
|
||||
|
||||
#define DGEMM_DEFAULT_UNROLL_M 4
|
||||
/* When all BLAS3 routines are implemeted with SVE, DGEMM_DEFAULT_UNROLL_M should be "sve_vl".
|
||||
Until then, just keep it different than DGEMM_DEFAULT_UNROLL_N to keep copy routines in both directions seperated. */
|
||||
#define DGEMM_DEFAULT_UNROLL_M 4
|
||||
#define DGEMM_DEFAULT_UNROLL_N 8
|
||||
|
||||
#define CGEMM_DEFAULT_UNROLL_M 8
|
||||
|
|
Loading…
Reference in New Issue