configure SVE Makefile
This commit is contained in:
parent
b58d4f31ab
commit
9388f05a3c
|
@ -1207,7 +1207,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
"-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 " \
|
"-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 " \
|
||||||
"-DL2_SIZE=262144 -DL2_LINESIZE=64 " \
|
"-DL2_SIZE=262144 -DL2_LINESIZE=64 " \
|
||||||
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=32 " \
|
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=32 " \
|
||||||
"-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8"
|
"-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DHAVE_SVE -DARMV8"
|
||||||
#define LIBNAME "armv8sve"
|
#define LIBNAME "armv8sve"
|
||||||
#define CORENAME "ARMV8SVE"
|
#define CORENAME "ARMV8SVE"
|
||||||
#endif
|
#endif
|
||||||
|
@ -1450,7 +1450,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
"-DL2_SIZE=8388608 -DL2_LINESIZE=256 -DL2_ASSOCIATIVE=8 " \
|
"-DL2_SIZE=8388608 -DL2_LINESIZE=256 -DL2_ASSOCIATIVE=8 " \
|
||||||
"-DL3_SIZE=0 -DL3_LINESIZE=0 -DL3_ASSOCIATIVE=0 " \
|
"-DL3_SIZE=0 -DL3_LINESIZE=0 -DL3_ASSOCIATIVE=0 " \
|
||||||
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \
|
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \
|
||||||
"-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8"
|
"-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DHAVE_SVE -DARMV8"
|
||||||
#define LIBNAME "a64fx"
|
#define LIBNAME "a64fx"
|
||||||
#define CORENAME "A64FX"
|
#define CORENAME "A64FX"
|
||||||
#else
|
#else
|
||||||
|
|
|
@ -77,6 +77,14 @@ ifeq ($(CORE), Z14)
|
||||||
USE_TRMM = 1
|
USE_TRMM = 1
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
ifeq ($(CORE), A64FX)
|
||||||
|
HAVE_SVE = 1
|
||||||
|
endif
|
||||||
|
|
||||||
|
ifeq ($(CORE), ARMV8SVE)
|
||||||
|
HAVE_SVE = 1
|
||||||
|
endif
|
||||||
|
|
||||||
ifdef USE_DIRECT_SGEMM
|
ifdef USE_DIRECT_SGEMM
|
||||||
ifndef SGEMMDIRECTKERNEL
|
ifndef SGEMMDIRECTKERNEL
|
||||||
SGEMMDIRECTKERNEL = sgemm_direct_skylakex.c
|
SGEMMDIRECTKERNEL = sgemm_direct_skylakex.c
|
||||||
|
@ -1531,6 +1539,31 @@ $(KDIR)strmm_oltucopy$(TSUFFIX).$(SUFFIX) : generic/trmm_ltcopy_$(SGEMM_UNROLL_N
|
||||||
$(KDIR)strmm_oltncopy$(TSUFFIX).$(SUFFIX) : generic/trmm_ltcopy_$(SGEMM_UNROLL_N).c
|
$(KDIR)strmm_oltncopy$(TSUFFIX).$(SUFFIX) : generic/trmm_ltcopy_$(SGEMM_UNROLL_N).c
|
||||||
$(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -UCOMPLEX -DOUTER -DLOWER -UUNIT $< -o $@
|
$(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -UCOMPLEX -DOUTER -DLOWER -UUNIT $< -o $@
|
||||||
|
|
||||||
|
ifdef HAVE_SVE
|
||||||
|
$(KDIR)dtrmm_iunucopy$(TSUFFIX).$(SUFFIX) : arm64/trmm_uncopy_sve_v1.c
|
||||||
|
$(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -UCOMPLEX -UOUTER -ULOWER -DUNIT $< -o $@
|
||||||
|
|
||||||
|
$(KDIR)dtrmm_iunncopy$(TSUFFIX).$(SUFFIX) : arm64/trmm_uncopy_sve_v1.c
|
||||||
|
$(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -UCOMPLEX -UOUTER -ULOWER -UUNIT $< -o $@
|
||||||
|
|
||||||
|
$(KDIR)dtrmm_ilnucopy$(TSUFFIX).$(SUFFIX) : arm64/trmm_lncopy_sve_v1.c
|
||||||
|
$(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -UCOMPLEX -UOUTER -DLOWER -DUNIT $< -o $@
|
||||||
|
|
||||||
|
$(KDIR)dtrmm_ilnncopy$(TSUFFIX).$(SUFFIX) : arm64/trmm_lncopy_sve_v1.c
|
||||||
|
$(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -UCOMPLEX -UOUTER -DLOWER -UUNIT $< -o $@
|
||||||
|
|
||||||
|
$(KDIR)dtrmm_iutucopy$(TSUFFIX).$(SUFFIX) : arm64/trmm_utcopy_sve_v1.c
|
||||||
|
$(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -UCOMPLEX -UOUTER -ULOWER -DUNIT $< -o $@
|
||||||
|
|
||||||
|
$(KDIR)dtrmm_iutncopy$(TSUFFIX).$(SUFFIX) : arm64/trmm_utcopy_sve_v1.c
|
||||||
|
$(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -UCOMPLEX -UOUTER -ULOWER -UUNIT $< -o $@
|
||||||
|
|
||||||
|
$(KDIR)dtrmm_iltucopy$(TSUFFIX).$(SUFFIX) : arm64/trmm_ltcopy_sve_v1.c
|
||||||
|
$(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -UCOMPLEX -UOUTER -DLOWER -DUNIT $< -o $@
|
||||||
|
|
||||||
|
$(KDIR)dtrmm_iltncopy$(TSUFFIX).$(SUFFIX) : arm64/trmm_ltcopy_sve_v1.c
|
||||||
|
$(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -UCOMPLEX -UOUTER -DLOWER -UUNIT $< -o $@
|
||||||
|
else
|
||||||
$(KDIR)dtrmm_iunucopy$(TSUFFIX).$(SUFFIX) : generic/trmm_uncopy_$(DGEMM_UNROLL_M).c
|
$(KDIR)dtrmm_iunucopy$(TSUFFIX).$(SUFFIX) : generic/trmm_uncopy_$(DGEMM_UNROLL_M).c
|
||||||
$(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -UCOMPLEX -UOUTER -ULOWER -DUNIT $< -o $@
|
$(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -UCOMPLEX -UOUTER -ULOWER -DUNIT $< -o $@
|
||||||
|
|
||||||
|
@ -1554,6 +1587,7 @@ $(KDIR)dtrmm_iltucopy$(TSUFFIX).$(SUFFIX) : generic/trmm_ltcopy_$(DGEMM_UNROLL_M
|
||||||
|
|
||||||
$(KDIR)dtrmm_iltncopy$(TSUFFIX).$(SUFFIX) : generic/trmm_ltcopy_$(DGEMM_UNROLL_M).c
|
$(KDIR)dtrmm_iltncopy$(TSUFFIX).$(SUFFIX) : generic/trmm_ltcopy_$(DGEMM_UNROLL_M).c
|
||||||
$(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -UCOMPLEX -UOUTER -DLOWER -UUNIT $< -o $@
|
$(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -UCOMPLEX -UOUTER -DLOWER -UUNIT $< -o $@
|
||||||
|
endif
|
||||||
|
|
||||||
$(KDIR)dtrmm_ounucopy$(TSUFFIX).$(SUFFIX) : generic/trmm_uncopy_$(DGEMM_UNROLL_N).c
|
$(KDIR)dtrmm_ounucopy$(TSUFFIX).$(SUFFIX) : generic/trmm_uncopy_$(DGEMM_UNROLL_N).c
|
||||||
$(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -UCOMPLEX -DOUTER -ULOWER -DUNIT $< -o $@
|
$(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -UCOMPLEX -DOUTER -ULOWER -DUNIT $< -o $@
|
||||||
|
@ -1789,11 +1823,19 @@ $(KDIR)dsymm_outcopy$(TSUFFIX).$(SUFFIX) : generic/symm_ucopy_$(DGEMM_UNROLL_N).
|
||||||
$(KDIR)dsymm_oltcopy$(TSUFFIX).$(SUFFIX) : generic/symm_lcopy_$(DGEMM_UNROLL_N).c
|
$(KDIR)dsymm_oltcopy$(TSUFFIX).$(SUFFIX) : generic/symm_lcopy_$(DGEMM_UNROLL_N).c
|
||||||
$(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -UCOMPLEX -DOUTER -DLOWER $< -o $@
|
$(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -UCOMPLEX -DOUTER -DLOWER $< -o $@
|
||||||
|
|
||||||
|
ifdef HAVE_SVE
|
||||||
|
$(KDIR)dsymm_iutcopy$(TSUFFIX).$(SUFFIX) : arm64/symm_ucopy_sve.c
|
||||||
|
$(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -UCOMPLEX -UOUTER -ULOWER $< -o $@
|
||||||
|
|
||||||
|
$(KDIR)dsymm_iltcopy$(TSUFFIX).$(SUFFIX) : arm64/symm_lcopy_sve.c
|
||||||
|
$(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -UCOMPLEX -UOUTER -DLOWER $< -o $@
|
||||||
|
else
|
||||||
$(KDIR)dsymm_iutcopy$(TSUFFIX).$(SUFFIX) : generic/symm_ucopy_$(DGEMM_UNROLL_M).c
|
$(KDIR)dsymm_iutcopy$(TSUFFIX).$(SUFFIX) : generic/symm_ucopy_$(DGEMM_UNROLL_M).c
|
||||||
$(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -UCOMPLEX -UOUTER -ULOWER $< -o $@
|
$(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -UCOMPLEX -UOUTER -ULOWER $< -o $@
|
||||||
|
|
||||||
$(KDIR)dsymm_iltcopy$(TSUFFIX).$(SUFFIX) : generic/symm_lcopy_$(DGEMM_UNROLL_M).c
|
$(KDIR)dsymm_iltcopy$(TSUFFIX).$(SUFFIX) : generic/symm_lcopy_$(DGEMM_UNROLL_M).c
|
||||||
$(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -UCOMPLEX -UOUTER -DLOWER $< -o $@
|
$(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -UCOMPLEX -UOUTER -DLOWER $< -o $@
|
||||||
|
endif
|
||||||
|
|
||||||
$(KDIR)qsymm_outcopy$(TSUFFIX).$(SUFFIX) : generic/symm_ucopy_$(QGEMM_UNROLL_N).c
|
$(KDIR)qsymm_outcopy$(TSUFFIX).$(SUFFIX) : generic/symm_ucopy_$(QGEMM_UNROLL_N).c
|
||||||
$(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DXDOUBLE -UCOMPLEX -DOUTER -ULOWER $< -o $@
|
$(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DXDOUBLE -UCOMPLEX -DOUTER -ULOWER $< -o $@
|
||||||
|
|
4
param.h
4
param.h
|
@ -3294,11 +3294,13 @@ is a big desktop or server with abundant cache rather than a phone or embedded d
|
||||||
#define CGEMM_DEFAULT_R 4096
|
#define CGEMM_DEFAULT_R 4096
|
||||||
#define ZGEMM_DEFAULT_R 4096
|
#define ZGEMM_DEFAULT_R 4096
|
||||||
|
|
||||||
#elif defined(ARMV8SVE)
|
#elif defined(ARMV8SVE) || defined(A64FX)
|
||||||
|
|
||||||
#define SGEMM_DEFAULT_UNROLL_M 16
|
#define SGEMM_DEFAULT_UNROLL_M 16
|
||||||
#define SGEMM_DEFAULT_UNROLL_N 4
|
#define SGEMM_DEFAULT_UNROLL_N 4
|
||||||
|
|
||||||
|
/* When all BLAS3 routines are implemeted with SVE, DGEMM_DEFAULT_UNROLL_M should be "sve_vl".
|
||||||
|
Until then, just keep it different than DGEMM_DEFAULT_UNROLL_N to keep copy routines in both directions seperated. */
|
||||||
#define DGEMM_DEFAULT_UNROLL_M 4
|
#define DGEMM_DEFAULT_UNROLL_M 4
|
||||||
#define DGEMM_DEFAULT_UNROLL_N 8
|
#define DGEMM_DEFAULT_UNROLL_N 8
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue