Merge pull request #1876 from rengolin/armv8-cleanup

Simplifying ARMv8 build parameters
This commit is contained in:
Martin Kroeker 2018-11-25 20:51:24 +01:00 committed by GitHub
commit 910050985a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
11 changed files with 256 additions and 207 deletions

View File

@ -4,22 +4,37 @@ CCOMMON_OPT += -march=armv8-a
FCOMMON_OPT += -march=armv8-a FCOMMON_OPT += -march=armv8-a
endif endif
ifeq ($(CORE), CORTEXA57) ifeq ($(CORE), CORTEXA53)
CCOMMON_OPT += -march=armv8-a+crc+crypto+fp+simd -mtune=cortex-a57 CCOMMON_OPT += -march=armv8-a -mtune=cortex-a53
FCOMMON_OPT += -march=armv8-a+crc+crypto+fp+simd -mtune=cortex-a57 FCOMMON_OPT += -march=armv8-a -mtune=cortex-a53
endif endif
ifeq ($(CORE), VULCAN) ifeq ($(CORE), CORTEXA57)
CCOMMON_OPT += -mtune=vulcan -mcpu=vulcan CCOMMON_OPT += -march=armv8-a -mtune=cortex-a57
FCOMMON_OPT += -mtune=vulcan -mcpu=vulcan FCOMMON_OPT += -march=armv8-a -mtune=cortex-a57
endif
ifeq ($(CORE), CORTEXA72)
CCOMMON_OPT += -march=armv8-a -mtune=cortex-a72
FCOMMON_OPT += -march=armv8-a -mtune=cortex-a72
endif
ifeq ($(CORE), CORTEXA73)
CCOMMON_OPT += -march=armv8-a -mtune=cortex-a73
FCOMMON_OPT += -march=armv8-a -mtune=cortex-a73
endif endif
ifeq ($(CORE), THUNDERX) ifeq ($(CORE), THUNDERX)
CCOMMON_OPT += -mtune=thunderx -mcpu=thunderx CCOMMON_OPT += -march=armv8-a -mtune=thunderx
FCOMMON_OPT += -mtune=thunderx -mcpu=thunderx FCOMMON_OPT += -march=armv8-a -mtune=thunderx
endif
ifeq ($(CORE), FALKOR)
CCOMMON_OPT += -march=armv8.1-a -mtune=falkor
FCOMMON_OPT += -march=armv8.1-a -mtune=falkor
endif endif
ifeq ($(CORE), THUNDERX2T99) ifeq ($(CORE), THUNDERX2T99)
CCOMMON_OPT += -mtune=thunderx2t99 -mcpu=thunderx2t99 CCOMMON_OPT += -march=armv8.1-a -mtune=thunderx2t99
FCOMMON_OPT += -mtune=thunderx2t99 -mcpu=thunderx2t99 FCOMMON_OPT += -march=armv8.1-a -mtune=thunderx2t99
endif endif

View File

@ -83,8 +83,11 @@ ARMV5
8.ARM 64-bit CPU: 8.ARM 64-bit CPU:
ARMV8 ARMV8
CORTEXA53
CORTEXA57 CORTEXA57
VULCAN CORTEXA72
CORTEXA73
FALKOR
THUNDERX THUNDERX
THUNDERX2T99 THUNDERX2T99

View File

@ -29,16 +29,25 @@
#define CPU_UNKNOWN 0 #define CPU_UNKNOWN 0
#define CPU_ARMV8 1 #define CPU_ARMV8 1
#define CPU_CORTEXA57 2 // Arm
#define CPU_VULCAN 3 #define CPU_CORTEXA53 2
#define CPU_THUNDERX 4 #define CPU_CORTEXA57 3
#define CPU_THUNDERX2T99 5 #define CPU_CORTEXA72 4
#define CPU_CORTEXA73 5
// Qualcomm
#define CPU_FALKOR 6
// Cavium
#define CPU_THUNDERX 7
#define CPU_THUNDERX2T99 8
static char *cpuname[] = { static char *cpuname[] = {
"UNKNOWN", "UNKNOWN",
"ARMV8" , "ARMV8" ,
"CORTEXA53",
"CORTEXA57", "CORTEXA57",
"VULCAN", "CORTEXA72",
"CORTEXA73",
"FALKOR",
"THUNDERX", "THUNDERX",
"THUNDERX2T99" "THUNDERX2T99"
}; };
@ -46,8 +55,11 @@ static char *cpuname[] = {
static char *cpuname_lower[] = { static char *cpuname_lower[] = {
"unknown", "unknown",
"armv8", "armv8",
"cortexa53",
"cortexa57", "cortexa57",
"vulcan", "cortexa72",
"cortexa73",
"falkor",
"thunderx", "thunderx",
"thunderx2t99" "thunderx2t99"
}; };
@ -114,14 +126,24 @@ int detect(void)
fclose(infile); fclose(infile);
if(cpu_part != NULL && cpu_implementer != NULL) { if(cpu_part != NULL && cpu_implementer != NULL) {
if (strstr(cpu_implementer, "0x41") && // Arm
(strstr(cpu_part, "0xd07") || strstr(cpu_part,"0xd08"))) if (strstr(cpu_implementer, "0x41")) {
return CPU_CORTEXA57; //or compatible, ex. A72 if (strstr(cpu_part, "0xd03"))
else if (strstr(cpu_part, "0x516") && strstr(cpu_implementer, "0x42")) return CPU_CORTEXA53;
return CPU_VULCAN; else if (strstr(cpu_part, "0xd07"))
else if (strstr(cpu_part, "0x0a1") && strstr(cpu_implementer, "0x43")) return CPU_CORTEXA57;
else if (strstr(cpu_part, "0xd08"))
return CPU_CORTEXA72;
else if (strstr(cpu_part, "0xd09"))
return CPU_CORTEXA73;
}
// Qualcomm
else if (strstr(cpu_implementer, "0x51") && strstr(cpu_part, "0xc00"))
return CPU_FALKOR;
// Cavium
else if (strstr(cpu_implementer, "0x43") && strstr(cpu_part, "0x0a1"))
return CPU_THUNDERX; return CPU_THUNDERX;
else if (strstr(cpu_part, "0x0af") && strstr(cpu_implementer, "0x43")) else if (strstr(cpu_implementer, "0x43") && strstr(cpu_part, "0x0af"))
return CPU_THUNDERX2T99; return CPU_THUNDERX2T99;
} }
@ -180,12 +202,20 @@ void get_subdirname(void)
void get_cpuconfig(void) void get_cpuconfig(void)
{ {
// All arches should define ARMv8
printf("#define ARMV8\n");
printf("#define HAVE_NEON\n"); // This shouldn't be necessary
printf("#define HAVE_VFPV4\n"); // This shouldn't be necessary
int d = detect(); int d = detect();
switch (d) switch (d)
{ {
case CPU_CORTEXA53:
printf("#define %s\n", cpuname[d]);
// Fall-through
case CPU_ARMV8: case CPU_ARMV8:
printf("#define ARMV8\n"); // Minimum parameters for ARMv8 (based on A53)
printf("#define L1_DATA_SIZE 32768\n"); printf("#define L1_DATA_SIZE 32768\n");
printf("#define L1_DATA_LINESIZE 64\n"); printf("#define L1_DATA_LINESIZE 64\n");
printf("#define L2_SIZE 262144\n"); printf("#define L2_SIZE 262144\n");
@ -195,47 +225,39 @@ void get_cpuconfig(void)
printf("#define L2_ASSOCIATIVE 4\n"); printf("#define L2_ASSOCIATIVE 4\n");
break; break;
case CPU_VULCAN:
printf("#define VULCAN \n");
printf("#define HAVE_VFP \n");
printf("#define HAVE_VFPV3 \n");
printf("#define HAVE_NEON \n");
printf("#define HAVE_VFPV4 \n");
printf("#define L1_CODE_SIZE 32768 \n");
printf("#define L1_CODE_LINESIZE 64 \n");
printf("#define L1_CODE_ASSOCIATIVE 8 \n");
printf("#define L1_DATA_SIZE 32768 \n");
printf("#define L1_DATA_LINESIZE 64 \n");
printf("#define L1_DATA_ASSOCIATIVE 8 \n");
printf("#define L2_SIZE 262144 \n");
printf("#define L2_LINESIZE 64 \n");
printf("#define L2_ASSOCIATIVE 8 \n");
printf("#define L3_SIZE 33554432 \n");
printf("#define L3_LINESIZE 64 \n");
printf("#define L3_ASSOCIATIVE 32 \n");
printf("#define DTB_DEFAULT_ENTRIES 64 \n");
printf("#define DTB_SIZE 4096 \n");
break;
case CPU_CORTEXA57: case CPU_CORTEXA57:
printf("#define CORTEXA57\n"); case CPU_CORTEXA72:
printf("#define HAVE_VFP\n"); case CPU_CORTEXA73:
printf("#define HAVE_VFPV3\n"); // Common minimum settings for these Arm cores
printf("#define HAVE_NEON\n"); // Can change a lot, but we need to be conservative
printf("#define HAVE_VFPV4\n"); // TODO: detect info from /sys if possible
printf("#define %s\n", cpuname[d]);
printf("#define L1_CODE_SIZE 49152\n"); printf("#define L1_CODE_SIZE 49152\n");
printf("#define L1_CODE_LINESIZE 64\n"); printf("#define L1_CODE_LINESIZE 64\n");
printf("#define L1_CODE_ASSOCIATIVE 3\n"); printf("#define L1_CODE_ASSOCIATIVE 3\n");
printf("#define L1_DATA_SIZE 32768\n"); printf("#define L1_DATA_SIZE 32768\n");
printf("#define L1_DATA_LINESIZE 64\n"); printf("#define L1_DATA_LINESIZE 64\n");
printf("#define L1_DATA_ASSOCIATIVE 2\n"); printf("#define L1_DATA_ASSOCIATIVE 2\n");
printf("#define L2_SIZE 2097152\n"); printf("#define L2_SIZE 524288\n");
printf("#define L2_LINESIZE 64\n"); printf("#define L2_LINESIZE 64\n");
printf("#define L2_ASSOCIATIVE 16\n"); printf("#define L2_ASSOCIATIVE 16\n");
printf("#define DTB_DEFAULT_ENTRIES 64\n"); printf("#define DTB_DEFAULT_ENTRIES 64\n");
printf("#define DTB_SIZE 4096\n"); printf("#define DTB_SIZE 4096\n");
break; break;
case CPU_FALKOR:
printf("#define FALKOR\n");
printf("#define L1_CODE_SIZE 65536\n");
printf("#define L1_CODE_LINESIZE 64\n");
printf("#define L1_DATA_SIZE 32768\n");
printf("#define L1_DATA_LINESIZE 128\n");
printf("#define L2_SIZE 524288\n");
printf("#define L2_LINESIZE 64\n");
printf("#define DTB_DEFAULT_ENTRIES 64\n");
printf("#define DTB_SIZE 4096\n");
printf("#define L2_ASSOCIATIVE 16\n");
break;
case CPU_THUNDERX: case CPU_THUNDERX:
printf("#define THUNDERX\n"); printf("#define THUNDERX\n");
printf("#define L1_DATA_SIZE 32768\n"); printf("#define L1_DATA_SIZE 32768\n");
@ -249,10 +271,6 @@ void get_cpuconfig(void)
case CPU_THUNDERX2T99: case CPU_THUNDERX2T99:
printf("#define VULCAN \n"); printf("#define VULCAN \n");
printf("#define HAVE_VFP \n");
printf("#define HAVE_VFPV3 \n");
printf("#define HAVE_NEON \n");
printf("#define HAVE_VFPV4 \n");
printf("#define L1_CODE_SIZE 32768 \n"); printf("#define L1_CODE_SIZE 32768 \n");
printf("#define L1_CODE_LINESIZE 64 \n"); printf("#define L1_CODE_LINESIZE 64 \n");
printf("#define L1_CODE_ASSOCIATIVE 8 \n"); printf("#define L1_CODE_ASSOCIATIVE 8 \n");

View File

@ -927,11 +927,28 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define ARCHCONFIG "-DARMV8 " \ #define ARCHCONFIG "-DARMV8 " \
"-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 " \ "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 " \
"-DL2_SIZE=262144 -DL2_LINESIZE=64 " \ "-DL2_SIZE=262144 -DL2_LINESIZE=64 " \
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=32 " "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=32 " \
"-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8"
#define LIBNAME "armv8" #define LIBNAME "armv8"
#define CORENAME "ARMV8" #define CORENAME "ARMV8"
#endif #endif
#ifdef FORCE_CORTEXA53
#define FORCE
#define ARCHITECTURE "ARM64"
#define SUBARCHITECTURE "CORTEXA53"
#define SUBDIRNAME "arm64"
#define ARCHCONFIG "-DCORTEXA53 " \
"-DL1_CODE_SIZE=32768 -DL1_CODE_LINESIZE=64 -DL1_CODE_ASSOCIATIVE=3 " \
"-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 -DL1_DATA_ASSOCIATIVE=2 " \
"-DL2_SIZE=262144 -DL2_LINESIZE=64 -DL2_ASSOCIATIVE=16 " \
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \
"-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8"
#define LIBNAME "cortexa53"
#define CORENAME "CORTEXA53"
#else
#endif
#ifdef FORCE_CORTEXA57 #ifdef FORCE_CORTEXA57
#define FORCE #define FORCE
#define ARCHITECTURE "ARM64" #define ARCHITECTURE "ARM64"
@ -942,26 +959,57 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
"-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 -DL1_DATA_ASSOCIATIVE=2 " \ "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 -DL1_DATA_ASSOCIATIVE=2 " \
"-DL2_SIZE=2097152 -DL2_LINESIZE=64 -DL2_ASSOCIATIVE=16 " \ "-DL2_SIZE=2097152 -DL2_LINESIZE=64 -DL2_ASSOCIATIVE=16 " \
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \ "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \
"-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON" "-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8"
#define LIBNAME "cortexa57" #define LIBNAME "cortexa57"
#define CORENAME "CORTEXA57" #define CORENAME "CORTEXA57"
#else #else
#endif #endif
#ifdef FORCE_VULCAN #ifdef FORCE_CORTEXA72
#define FORCE #define FORCE
#define ARCHITECTURE "ARM64" #define ARCHITECTURE "ARM64"
#define SUBARCHITECTURE "VULCAN" #define SUBARCHITECTURE "CORTEXA72"
#define SUBDIRNAME "arm64" #define SUBDIRNAME "arm64"
#define ARCHCONFIG "-DVULCAN " \ #define ARCHCONFIG "-DCORTEXA72 " \
"-DL1_CODE_SIZE=32768 -DL1_CODE_LINESIZE=64 -DL1_CODE_ASSOCIATIVE=8 " \ "-DL1_CODE_SIZE=49152 -DL1_CODE_LINESIZE=64 -DL1_CODE_ASSOCIATIVE=3 " \
"-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 -DL1_DATA_ASSOCIATIVE=8 " \ "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 -DL1_DATA_ASSOCIATIVE=2 " \
"-DL2_SIZE=262144 -DL2_LINESIZE=64 -DL2_ASSOCIATIVE=8 " \ "-DL2_SIZE=2097152 -DL2_LINESIZE=64 -DL2_ASSOCIATIVE=16 " \
"-DL3_SIZE=33554432 -DL3_LINESIZE=64 -DL3_ASSOCIATIVE=32 " \
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \ "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \
"-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON" "-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8"
#define LIBNAME "vulcan" #define LIBNAME "cortexa72"
#define CORENAME "VULCAN" #define CORENAME "CORTEXA72"
#else
#endif
#ifdef FORCE_CORTEXA73
#define FORCE
#define ARCHITECTURE "ARM64"
#define SUBARCHITECTURE "CORTEXA73"
#define SUBDIRNAME "arm64"
#define ARCHCONFIG "-DCORTEXA73 " \
"-DL1_CODE_SIZE=49152 -DL1_CODE_LINESIZE=64 -DL1_CODE_ASSOCIATIVE=3 " \
"-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 -DL1_DATA_ASSOCIATIVE=2 " \
"-DL2_SIZE=2097152 -DL2_LINESIZE=64 -DL2_ASSOCIATIVE=16 " \
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \
"-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8"
#define LIBNAME "cortexa73"
#define CORENAME "CORTEXA73"
#else
#endif
#ifdef FORCE_FALKOR
#define FORCE
#define ARCHITECTURE "ARM64"
#define SUBARCHITECTURE "FALKOR"
#define SUBDIRNAME "arm64"
#define ARCHCONFIG "-DFALKOR " \
"-DL1_CODE_SIZE=49152 -DL1_CODE_LINESIZE=64 -DL1_CODE_ASSOCIATIVE=3 " \
"-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 -DL1_DATA_ASSOCIATIVE=2 " \
"-DL2_SIZE=2097152 -DL2_LINESIZE=64 -DL2_ASSOCIATIVE=16 " \
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \
"-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8"
#define LIBNAME "falkor"
#define CORENAME "FALKOR"
#else #else
#endif #endif
@ -973,13 +1021,15 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define ARCHCONFIG "-DTHUNDERX " \ #define ARCHCONFIG "-DTHUNDERX " \
"-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=128 " \ "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=128 " \
"-DL2_SIZE=16777216 -DL2_LINESIZE=128 -DL2_ASSOCIATIVE=16 " \ "-DL2_SIZE=16777216 -DL2_LINESIZE=128 -DL2_ASSOCIATIVE=16 " \
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \
"-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8"
#define LIBNAME "thunderx" #define LIBNAME "thunderx"
#define CORENAME "THUNDERX" #define CORENAME "THUNDERX"
#else #else
#endif #endif
#ifdef FORCE_THUNDERX2T99 #ifdef FORCE_THUNDERX2T99
#define ARMV8
#define FORCE #define FORCE
#define ARCHITECTURE "ARM64" #define ARCHITECTURE "ARM64"
#define SUBARCHITECTURE "THUNDERX2T99" #define SUBARCHITECTURE "THUNDERX2T99"
@ -990,7 +1040,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
"-DL2_SIZE=262144 -DL2_LINESIZE=64 -DL2_ASSOCIATIVE=8 " \ "-DL2_SIZE=262144 -DL2_LINESIZE=64 -DL2_ASSOCIATIVE=8 " \
"-DL3_SIZE=33554432 -DL3_LINESIZE=64 -DL3_ASSOCIATIVE=32 " \ "-DL3_SIZE=33554432 -DL3_LINESIZE=64 -DL3_ASSOCIATIVE=32 " \
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \ "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \
"-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON" "-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8"
#define LIBNAME "thunderx2t99" #define LIBNAME "thunderx2t99"
#define CORENAME "THUNDERX2T99" #define CORENAME "THUNDERX2T99"
#else #else

View File

@ -46,7 +46,7 @@ CAMAXKERNEL = zamax.S
ZAMAXKERNEL = zamax.S ZAMAXKERNEL = zamax.S
SAXPYKERNEL = axpy.S SAXPYKERNEL = axpy.S
DAXPYKERNEL = daxpy_thunderx2t99.S DAXPYKERNEL = axpy.S
CAXPYKERNEL = zaxpy.S CAXPYKERNEL = zaxpy.S
ZAXPYKERNEL = zaxpy.S ZAXPYKERNEL = zaxpy.S
@ -71,39 +71,37 @@ CGEMVTKERNEL = zgemv_t.S
ZGEMVTKERNEL = zgemv_t.S ZGEMVTKERNEL = zgemv_t.S
SASUMKERNEL = sasum_thunderx2t99.c SASUMKERNEL = asum.S
DASUMKERNEL = dasum_thunderx2t99.c DASUMKERNEL = asum.S
CASUMKERNEL = casum_thunderx2t99.c CASUMKERNEL = casum.S
ZASUMKERNEL = zasum_thunderx2t99.c ZASUMKERNEL = zasum.S
SCOPYKERNEL = copy_thunderx2t99.c SCOPYKERNEL = copy.S
DCOPYKERNEL = copy_thunderx2t99.c DCOPYKERNEL = copy.S
CCOPYKERNEL = copy_thunderx2t99.c CCOPYKERNEL = copy.S
ZCOPYKERNEL = copy_thunderx2t99.c ZCOPYKERNEL = copy.S
SSWAPKERNEL = swap_thunderx2t99.S SSWAPKERNEL = swap.S
DSWAPKERNEL = swap_thunderx2t99.S DSWAPKERNEL = swap.S
CSWAPKERNEL = swap_thunderx2t99.S CSWAPKERNEL = swap.S
ZSWAPKERNEL = swap_thunderx2t99.S ZSWAPKERNEL = swap.S
ISAMAXKERNEL = iamax_thunderx2t99.c ISAMAXKERNEL = iamax.S
IDAMAXKERNEL = iamax_thunderx2t99.c IDAMAXKERNEL = iamax.S
ICAMAXKERNEL = izamax_thunderx2t99.c ICAMAXKERNEL = izamax.S
IZAMAXKERNEL = izamax_thunderx2t99.c IZAMAXKERNEL = izamax.S
ifneq ($(OS_DARWIN)$(CROSS),11) ifneq ($(OS_DARWIN)$(CROSS),11)
SNRM2KERNEL = scnrm2_thunderx2t99.c SNRM2KERNEL = nrm2.S
CNRM2KERNEL = scnrm2_thunderx2t99.c CNRM2KERNEL = nrm2.S
#DNRM2KERNEL = dznrm2_thunderx2t99_fast.c DNRM2KERNEL = znrm2.S
#ZNRM2KERNEL = dznrm2_thunderx2t99_fast.c ZNRM2KERNEL = znrm2.S
DNRM2KERNEL = dznrm2_thunderx2t99.c
ZNRM2KERNEL = dznrm2_thunderx2t99.c
endif endif
DDOTKERNEL = dot_thunderx2t99.c DDOTKERNEL = dot.S
SDOTKERNEL = dot_thunderx2t99.c SDOTKERNEL = dot.S
CDOTKERNEL = zdot_thunderx2t99.c CDOTKERNEL = zdot.S
ZDOTKERNEL = zdot_thunderx2t99.c ZDOTKERNEL = zdot.S
DSDOTKERNEL = dot.S DSDOTKERNEL = dot.S
ifneq ($(OS_DARWIN)$(CROSS),11) ifneq ($(OS_DARWIN)$(CROSS),11)
@ -175,22 +173,6 @@ ZGEMMOTCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_N).c
ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX) ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX)
ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX) ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX)
ifeq ($(DGEMM_UNROLL_M)x$(DGEMM_UNROLL_N), 8x4)
DGEMMKERNEL = dgemm_kernel_8x4_thunderx2t99.S
endif
ifeq ($(SGEMM_UNROLL_M)x$(SGEMM_UNROLL_N), 16x4)
SGEMMKERNEL = sgemm_kernel_16x4_thunderx2t99.S
endif
ifeq ($(CGEMM_UNROLL_M)x$(CGEMM_UNROLL_N), 8x4)
CGEMMKERNEL = cgemm_kernel_8x4_thunderx2t99.S
endif
ifeq ($(ZGEMM_UNROLL_M)x$(ZGEMM_UNROLL_N), 4x4)
ZGEMMKERNEL = zgemm_kernel_4x4_thunderx2t99.S
endif
else else
STRMMKERNEL = ../generic/trmmkernel_2x2.c STRMMKERNEL = ../generic/trmmkernel_2x2.c

View File

@ -0,0 +1,3 @@
include $(KERNELDIR)/KERNEL.ARMV8

View File

@ -0,0 +1,3 @@
include $(KERNELDIR)/KERNEL.CORTEXA57

View File

@ -0,0 +1,3 @@
include $(KERNELDIR)/KERNEL.CORTEXA57

View File

@ -0,0 +1,3 @@
include $(KERNELDIR)/KERNEL.CORTEXA57

View File

@ -1,3 +0,0 @@
include $(KERNELDIR)/KERNEL.THUNDERX2T99

136
param.h
View File

@ -2543,49 +2543,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define SYMV_P 16 #define SYMV_P 16
#endif #endif
// Common ARMv8 parameters
#if defined(CORTEXA57)
#define SNUMOPT 2
#define DNUMOPT 2
#define GEMM_DEFAULT_OFFSET_A 0
#define GEMM_DEFAULT_OFFSET_B 0
#define GEMM_DEFAULT_ALIGN 0x03fffUL
#define SGEMM_DEFAULT_UNROLL_M 16
#define SGEMM_DEFAULT_UNROLL_N 4
#define DGEMM_DEFAULT_UNROLL_M 8
#define DGEMM_DEFAULT_UNROLL_N 4
#define CGEMM_DEFAULT_UNROLL_M 8
#define CGEMM_DEFAULT_UNROLL_N 4
#define ZGEMM_DEFAULT_UNROLL_M 4
#define ZGEMM_DEFAULT_UNROLL_N 4
#define SGEMM_DEFAULT_P 512
#define DGEMM_DEFAULT_P 256
#define CGEMM_DEFAULT_P 256
#define ZGEMM_DEFAULT_P 128
#define SGEMM_DEFAULT_Q 1024
#define DGEMM_DEFAULT_Q 512
#define CGEMM_DEFAULT_Q 512
#define ZGEMM_DEFAULT_Q 512
#define SGEMM_DEFAULT_R 4096
#define DGEMM_DEFAULT_R 4096
#define CGEMM_DEFAULT_R 4096
#define ZGEMM_DEFAULT_R 2048
#define SYMV_P 16
#endif
#if defined(ARMV8) #if defined(ARMV8)
#if defined(OS_DARWIN) && defined(CROSS)
#define SNUMOPT 2 #define SNUMOPT 2
#define DNUMOPT 2 #define DNUMOPT 2
@ -2593,6 +2553,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define GEMM_DEFAULT_OFFSET_B 0 #define GEMM_DEFAULT_OFFSET_B 0
#define GEMM_DEFAULT_ALIGN 0x03fffUL #define GEMM_DEFAULT_ALIGN 0x03fffUL
#define SYMV_P 16
// Darwin / Cross
#if defined(OS_DARWIN) && defined(CROSS)
#define SGEMM_DEFAULT_UNROLL_M 2 #define SGEMM_DEFAULT_UNROLL_M 2
#define SGEMM_DEFAULT_UNROLL_N 2 #define SGEMM_DEFAULT_UNROLL_N 2
@ -2620,15 +2585,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define CGEMM_DEFAULT_R 4096 #define CGEMM_DEFAULT_R 4096
#define ZGEMM_DEFAULT_R 4096 #define ZGEMM_DEFAULT_R 4096
#define SYMV_P 16 #else // Linux / Native
#else
#define SNUMOPT 2 #if defined(CORTEXA53) || defined(CORTEXA57) || \
#define DNUMOPT 2 defined(CORTEXA72) || defined(CORTEXA73) || \
defined(FALKOR)
#define GEMM_DEFAULT_OFFSET_A 0
#define GEMM_DEFAULT_OFFSET_B 0
#define GEMM_DEFAULT_ALIGN 0x03fffUL
#define SGEMM_DEFAULT_UNROLL_M 16 #define SGEMM_DEFAULT_UNROLL_M 16
#define SGEMM_DEFAULT_UNROLL_N 4 #define SGEMM_DEFAULT_UNROLL_N 4
@ -2642,33 +2603,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define ZGEMM_DEFAULT_UNROLL_M 4 #define ZGEMM_DEFAULT_UNROLL_M 4
#define ZGEMM_DEFAULT_UNROLL_N 4 #define ZGEMM_DEFAULT_UNROLL_N 4
#define SGEMM_DEFAULT_P 128 #define SGEMM_DEFAULT_P 512
#define DGEMM_DEFAULT_P 160 #define DGEMM_DEFAULT_P 256
#define CGEMM_DEFAULT_P 128 #define CGEMM_DEFAULT_P 256
#define ZGEMM_DEFAULT_P 128 #define ZGEMM_DEFAULT_P 128
#define SGEMM_DEFAULT_Q 352 #define SGEMM_DEFAULT_Q 1024
#define DGEMM_DEFAULT_Q 128 #define DGEMM_DEFAULT_Q 512
#define CGEMM_DEFAULT_Q 224 #define CGEMM_DEFAULT_Q 512
#define ZGEMM_DEFAULT_Q 112 #define ZGEMM_DEFAULT_Q 512
#define SGEMM_DEFAULT_R 4096 #define SGEMM_DEFAULT_R 4096
#define DGEMM_DEFAULT_R 4096 #define DGEMM_DEFAULT_R 4096
#define CGEMM_DEFAULT_R 4096 #define CGEMM_DEFAULT_R 4096
#define ZGEMM_DEFAULT_R 4096 #define ZGEMM_DEFAULT_R 2048
#define SYMV_P 16 #elif defined(THUNDERX)
#endif
#endif
#if defined(THUNDERX)
#define SNUMOPT 2
#define DNUMOPT 2
#define GEMM_DEFAULT_OFFSET_A 0
#define GEMM_DEFAULT_OFFSET_B 0
#define GEMM_DEFAULT_ALIGN 0x03fffUL
#define SGEMM_DEFAULT_UNROLL_M 4 #define SGEMM_DEFAULT_UNROLL_M 4
#define SGEMM_DEFAULT_UNROLL_N 4 #define SGEMM_DEFAULT_UNROLL_N 4
@ -2697,17 +2647,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define CGEMM_DEFAULT_R 4096 #define CGEMM_DEFAULT_R 4096
#define ZGEMM_DEFAULT_R 4096 #define ZGEMM_DEFAULT_R 4096
#elif defined(THUNDERX2T99)
#define SYMV_P 16
#endif
#if defined(THUNDERX2T99) || defined(VULCAN)
#define SNUMOPT 2
#define DNUMOPT 2
#define GEMM_DEFAULT_OFFSET_A 0
#define GEMM_DEFAULT_OFFSET_B 0
#define GEMM_DEFAULT_ALIGN 0x03fffUL
#define SGEMM_DEFAULT_UNROLL_M 16 #define SGEMM_DEFAULT_UNROLL_M 16
#define SGEMM_DEFAULT_UNROLL_N 4 #define SGEMM_DEFAULT_UNROLL_N 4
@ -2736,8 +2676,40 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define CGEMM_DEFAULT_R 4096 #define CGEMM_DEFAULT_R 4096
#define ZGEMM_DEFAULT_R 4096 #define ZGEMM_DEFAULT_R 4096
#define SYMV_P 16 #else // Other/undetected ARMv8 cores
#endif
#define SGEMM_DEFAULT_UNROLL_M 16
#define SGEMM_DEFAULT_UNROLL_N 4
#define DGEMM_DEFAULT_UNROLL_M 8
#define DGEMM_DEFAULT_UNROLL_N 4
#define CGEMM_DEFAULT_UNROLL_M 8
#define CGEMM_DEFAULT_UNROLL_N 4
#define ZGEMM_DEFAULT_UNROLL_M 4
#define ZGEMM_DEFAULT_UNROLL_N 4
#define SGEMM_DEFAULT_P 128
#define DGEMM_DEFAULT_P 160
#define CGEMM_DEFAULT_P 128
#define ZGEMM_DEFAULT_P 128
#define SGEMM_DEFAULT_Q 352
#define DGEMM_DEFAULT_Q 128
#define CGEMM_DEFAULT_Q 224
#define ZGEMM_DEFAULT_Q 112
#define SGEMM_DEFAULT_R 4096
#define DGEMM_DEFAULT_R 4096
#define CGEMM_DEFAULT_R 4096
#define ZGEMM_DEFAULT_R 4096
#endif // Cores
#endif // Linux / Darwin
#endif // ARMv8
#if defined(ARMV5) #if defined(ARMV5)
#define SNUMOPT 2 #define SNUMOPT 2