Add basic autodetection support for Fujitsu A64FX

This commit is contained in:
Martin Kroeker 2021-10-18 00:27:54 +02:00 committed by GitHub
parent b57acdf2d3
commit 3cb1ec2a96
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 183 additions and 188 deletions

View File

@ -153,6 +153,15 @@ endif
endif endif
endif endif
ifeq (1, $(filter 1,$(GCCVERSIONGTEQ11) $(ISCLANG)))
ifeq ($(CORE), A64FX)
CCOMMON_OPT += -march=armv8.2-a -mtune=a64fx
ifneq ($(F_COMPILER), NAG)
FCOMMON_OPT += -march=armv8.2-a -mtune=a64fx
endif
endif
endif
endif endif
endif endif

View File

@ -55,6 +55,8 @@ size_t length64=sizeof(value64);
#define CPU_EMAG8180 10 #define CPU_EMAG8180 10
// Apple // Apple
#define CPU_VORTEX 13 #define CPU_VORTEX 13
// Fujitsu
#define CPU_A64FX 14
static char *cpuname[] = { static char *cpuname[] = {
"UNKNOWN", "UNKNOWN",
@ -71,7 +73,8 @@ static char *cpuname[] = {
"NEOVERSEN1", "NEOVERSEN1",
"THUNDERX3T110", "THUNDERX3T110",
"VORTEX", "VORTEX",
"CORTEXA55" "CORTEXA55",
"A64FX"
}; };
static char *cpuname_lower[] = { static char *cpuname_lower[] = {
@ -89,7 +92,8 @@ static char *cpuname_lower[] = {
"neoversen1", "neoversen1",
"thunderx3t110", "thunderx3t110",
"vortex", "vortex",
"cortexa55" "cortexa55",
"a64fx"
}; };
int get_feature(char *search) int get_feature(char *search)
@ -185,6 +189,9 @@ int detect(void)
// Ampere // Ampere
else if (strstr(cpu_implementer, "0x50") && strstr(cpu_part, "0x000")) else if (strstr(cpu_implementer, "0x50") && strstr(cpu_part, "0x000"))
return CPU_EMAG8180; return CPU_EMAG8180;
// Fujitsu
else if (strstr(cpu_implementer, "0x46") && strstr(cpu_part, "0x001"))
return CPU_A64FX;
} }
p = (char *) NULL ; p = (char *) NULL ;
@ -287,156 +294,166 @@ void get_cpuconfig(void)
switch (d) switch (d)
{ {
case CPU_CORTEXA53: case CPU_CORTEXA53:
case CPU_CORTEXA55: case CPU_CORTEXA55:
printf("#define %s\n", cpuname[d]); printf("#define %s\n", cpuname[d]);
// Fall-through // Fall-through
case CPU_ARMV8: case CPU_ARMV8:
// Minimum parameters for ARMv8 (based on A53) // Minimum parameters for ARMv8 (based on A53)
printf("#define L1_DATA_SIZE 32768\n"); printf("#define L1_DATA_SIZE 32768\n");
printf("#define L1_DATA_LINESIZE 64\n"); printf("#define L1_DATA_LINESIZE 64\n");
printf("#define L2_SIZE 262144\n"); printf("#define L2_SIZE 262144\n");
printf("#define L2_LINESIZE 64\n"); printf("#define L2_LINESIZE 64\n");
printf("#define DTB_DEFAULT_ENTRIES 64\n"); printf("#define DTB_DEFAULT_ENTRIES 64\n");
printf("#define DTB_SIZE 4096\n"); printf("#define DTB_SIZE 4096\n");
printf("#define L2_ASSOCIATIVE 4\n"); printf("#define L2_ASSOCIATIVE 4\n");
break; break;
case CPU_CORTEXA57: case CPU_CORTEXA57:
case CPU_CORTEXA72: case CPU_CORTEXA72:
case CPU_CORTEXA73: case CPU_CORTEXA73:
// Common minimum settings for these Arm cores // Common minimum settings for these Arm cores
// Can change a lot, but we need to be conservative // Can change a lot, but we need to be conservative
// TODO: detect info from /sys if possible // TODO: detect info from /sys if possible
printf("#define %s\n", cpuname[d]); printf("#define %s\n", cpuname[d]);
printf("#define L1_CODE_SIZE 49152\n"); printf("#define L1_CODE_SIZE 49152\n");
printf("#define L1_CODE_LINESIZE 64\n"); printf("#define L1_CODE_LINESIZE 64\n");
printf("#define L1_CODE_ASSOCIATIVE 3\n"); printf("#define L1_CODE_ASSOCIATIVE 3\n");
printf("#define L1_DATA_SIZE 32768\n"); printf("#define L1_DATA_SIZE 32768\n");
printf("#define L1_DATA_LINESIZE 64\n"); printf("#define L1_DATA_LINESIZE 64\n");
printf("#define L1_DATA_ASSOCIATIVE 2\n"); printf("#define L1_DATA_ASSOCIATIVE 2\n");
printf("#define L2_SIZE 524288\n"); printf("#define L2_SIZE 524288\n");
printf("#define L2_LINESIZE 64\n"); printf("#define L2_LINESIZE 64\n");
printf("#define L2_ASSOCIATIVE 16\n"); printf("#define L2_ASSOCIATIVE 16\n");
printf("#define DTB_DEFAULT_ENTRIES 64\n"); printf("#define DTB_DEFAULT_ENTRIES 64\n");
printf("#define DTB_SIZE 4096\n"); printf("#define DTB_SIZE 4096\n");
break; break;
case CPU_NEOVERSEN1: case CPU_NEOVERSEN1:
printf("#define %s\n", cpuname[d]); printf("#define %s\n", cpuname[d]);
printf("#define L1_CODE_SIZE 65536\n"); printf("#define L1_CODE_SIZE 65536\n");
printf("#define L1_CODE_LINESIZE 64\n"); printf("#define L1_CODE_LINESIZE 64\n");
printf("#define L1_CODE_ASSOCIATIVE 4\n"); printf("#define L1_CODE_ASSOCIATIVE 4\n");
printf("#define L1_DATA_SIZE 65536\n"); printf("#define L1_DATA_SIZE 65536\n");
printf("#define L1_DATA_LINESIZE 64\n"); printf("#define L1_DATA_LINESIZE 64\n");
printf("#define L1_DATA_ASSOCIATIVE 4\n"); printf("#define L1_DATA_ASSOCIATIVE 4\n");
printf("#define L2_SIZE 1048576\n"); printf("#define L2_SIZE 1048576\n");
printf("#define L2_LINESIZE 64\n"); printf("#define L2_LINESIZE 64\n");
printf("#define L2_ASSOCIATIVE 16\n"); printf("#define L2_ASSOCIATIVE 16\n");
printf("#define DTB_DEFAULT_ENTRIES 64\n"); printf("#define DTB_DEFAULT_ENTRIES 64\n");
printf("#define DTB_SIZE 4096\n"); printf("#define DTB_SIZE 4096\n");
break; break;
case CPU_FALKOR: case CPU_FALKOR:
printf("#define FALKOR\n"); printf("#define FALKOR\n");
printf("#define L1_CODE_SIZE 65536\n"); printf("#define L1_CODE_SIZE 65536\n");
printf("#define L1_CODE_LINESIZE 64\n"); printf("#define L1_CODE_LINESIZE 64\n");
printf("#define L1_DATA_SIZE 32768\n"); printf("#define L1_DATA_SIZE 32768\n");
printf("#define L1_DATA_LINESIZE 128\n"); printf("#define L1_DATA_LINESIZE 128\n");
printf("#define L2_SIZE 524288\n"); printf("#define L2_SIZE 524288\n");
printf("#define L2_LINESIZE 64\n"); printf("#define L2_LINESIZE 64\n");
printf("#define DTB_DEFAULT_ENTRIES 64\n"); printf("#define DTB_DEFAULT_ENTRIES 64\n");
printf("#define DTB_SIZE 4096\n"); printf("#define DTB_SIZE 4096\n");
printf("#define L2_ASSOCIATIVE 16\n"); printf("#define L2_ASSOCIATIVE 16\n");
break; break;
case CPU_THUNDERX: case CPU_THUNDERX:
printf("#define THUNDERX\n"); printf("#define THUNDERX\n");
printf("#define L1_DATA_SIZE 32768\n"); printf("#define L1_DATA_SIZE 32768\n");
printf("#define L1_DATA_LINESIZE 128\n"); printf("#define L1_DATA_LINESIZE 128\n");
printf("#define L2_SIZE 16777216\n"); printf("#define L2_SIZE 16777216\n");
printf("#define L2_LINESIZE 128\n"); printf("#define L2_LINESIZE 128\n");
printf("#define DTB_DEFAULT_ENTRIES 64\n"); printf("#define DTB_DEFAULT_ENTRIES 64\n");
printf("#define DTB_SIZE 4096\n"); printf("#define DTB_SIZE 4096\n");
printf("#define L2_ASSOCIATIVE 16\n"); printf("#define L2_ASSOCIATIVE 16\n");
break; break;
case CPU_THUNDERX2T99: case CPU_THUNDERX2T99:
printf("#define THUNDERX2T99 \n"); printf("#define THUNDERX2T99 \n");
printf("#define L1_CODE_SIZE 32768 \n"); printf("#define L1_CODE_SIZE 32768 \n");
printf("#define L1_CODE_LINESIZE 64 \n"); printf("#define L1_CODE_LINESIZE 64 \n");
printf("#define L1_CODE_ASSOCIATIVE 8 \n"); printf("#define L1_CODE_ASSOCIATIVE 8 \n");
printf("#define L1_DATA_SIZE 32768 \n"); printf("#define L1_DATA_SIZE 32768 \n");
printf("#define L1_DATA_LINESIZE 64 \n"); printf("#define L1_DATA_LINESIZE 64 \n");
printf("#define L1_DATA_ASSOCIATIVE 8 \n"); printf("#define L1_DATA_ASSOCIATIVE 8 \n");
printf("#define L2_SIZE 262144 \n"); printf("#define L2_SIZE 262144 \n");
printf("#define L2_LINESIZE 64 \n"); printf("#define L2_LINESIZE 64 \n");
printf("#define L2_ASSOCIATIVE 8 \n"); printf("#define L2_ASSOCIATIVE 8 \n");
printf("#define L3_SIZE 33554432 \n"); printf("#define L3_SIZE 33554432 \n");
printf("#define L3_LINESIZE 64 \n"); printf("#define L3_LINESIZE 64 \n");
printf("#define L3_ASSOCIATIVE 32 \n"); printf("#define L3_ASSOCIATIVE 32 \n");
printf("#define DTB_DEFAULT_ENTRIES 64 \n"); printf("#define DTB_DEFAULT_ENTRIES 64 \n");
printf("#define DTB_SIZE 4096 \n"); printf("#define DTB_SIZE 4096 \n");
break; break;
case CPU_TSV110: case CPU_TSV110:
printf("#define TSV110 \n"); printf("#define TSV110 \n");
printf("#define L1_CODE_SIZE 65536 \n"); printf("#define L1_CODE_SIZE 65536 \n");
printf("#define L1_CODE_LINESIZE 64 \n"); printf("#define L1_CODE_LINESIZE 64 \n");
printf("#define L1_CODE_ASSOCIATIVE 4 \n"); printf("#define L1_CODE_ASSOCIATIVE 4 \n");
printf("#define L1_DATA_SIZE 65536 \n"); printf("#define L1_DATA_SIZE 65536 \n");
printf("#define L1_DATA_LINESIZE 64 \n"); printf("#define L1_DATA_LINESIZE 64 \n");
printf("#define L1_DATA_ASSOCIATIVE 4 \n"); printf("#define L1_DATA_ASSOCIATIVE 4 \n");
printf("#define L2_SIZE 524228 \n"); printf("#define L2_SIZE 524228 \n");
printf("#define L2_LINESIZE 64 \n"); printf("#define L2_LINESIZE 64 \n");
printf("#define L2_ASSOCIATIVE 8 \n"); printf("#define L2_ASSOCIATIVE 8 \n");
printf("#define DTB_DEFAULT_ENTRIES 64 \n"); printf("#define DTB_DEFAULT_ENTRIES 64 \n");
printf("#define DTB_SIZE 4096 \n"); printf("#define DTB_SIZE 4096 \n");
break; break;
case CPU_EMAG8180: case CPU_EMAG8180:
// Minimum parameters for ARMv8 (based on A53) // Minimum parameters for ARMv8 (based on A53)
printf("#define EMAG8180\n"); printf("#define EMAG8180\n");
printf("#define L1_CODE_SIZE 32768\n"); printf("#define L1_CODE_SIZE 32768\n");
printf("#define L1_DATA_SIZE 32768\n"); printf("#define L1_DATA_SIZE 32768\n");
printf("#define L1_DATA_LINESIZE 64\n"); printf("#define L1_DATA_LINESIZE 64\n");
printf("#define L2_SIZE 262144\n"); printf("#define L2_SIZE 262144\n");
printf("#define L2_LINESIZE 64\n"); printf("#define L2_LINESIZE 64\n");
printf("#define DTB_DEFAULT_ENTRIES 64\n"); printf("#define DTB_DEFAULT_ENTRIES 64\n");
printf("#define DTB_SIZE 4096\n"); printf("#define DTB_SIZE 4096\n");
break; break;
case CPU_THUNDERX3T110: case CPU_THUNDERX3T110:
printf("#define THUNDERX3T110 \n"); printf("#define THUNDERX3T110 \n");
printf("#define L1_CODE_SIZE 65536 \n"); printf("#define L1_CODE_SIZE 65536 \n");
printf("#define L1_CODE_LINESIZE 64 \n"); printf("#define L1_CODE_LINESIZE 64 \n");
printf("#define L1_CODE_ASSOCIATIVE 8 \n"); printf("#define L1_CODE_ASSOCIATIVE 8 \n");
printf("#define L1_DATA_SIZE 32768 \n"); printf("#define L1_DATA_SIZE 32768 \n");
printf("#define L1_DATA_LINESIZE 64 \n"); printf("#define L1_DATA_LINESIZE 64 \n");
printf("#define L1_DATA_ASSOCIATIVE 8 \n"); printf("#define L1_DATA_ASSOCIATIVE 8 \n");
printf("#define L2_SIZE 524288 \n"); printf("#define L2_SIZE 524288 \n");
printf("#define L2_LINESIZE 64 \n"); printf("#define L2_LINESIZE 64 \n");
printf("#define L2_ASSOCIATIVE 8 \n"); printf("#define L2_ASSOCIATIVE 8 \n");
printf("#define L3_SIZE 94371840 \n"); printf("#define L3_SIZE 94371840 \n");
printf("#define L3_LINESIZE 64 \n"); printf("#define L3_LINESIZE 64 \n");
printf("#define L3_ASSOCIATIVE 32 \n"); printf("#define L3_ASSOCIATIVE 32 \n");
printf("#define DTB_DEFAULT_ENTRIES 64 \n"); printf("#define DTB_DEFAULT_ENTRIES 64 \n");
printf("#define DTB_SIZE 4096 \n"); printf("#define DTB_SIZE 4096 \n");
break; break;
#ifdef __APPLE__ #ifdef __APPLE__
case CPU_VORTEX: case CPU_VORTEX:
printf("#define VORTEX \n"); printf("#define VORTEX \n");
sysctlbyname("hw.l1icachesize",&value64,&length64,NULL,0); sysctlbyname("hw.l1icachesize",&value64,&length64,NULL,0);
printf("#define L1_CODE_SIZE %lld \n",value64); printf("#define L1_CODE_SIZE %lld \n",value64);
sysctlbyname("hw.cachelinesize",&value64,&length64,NULL,0); sysctlbyname("hw.cachelinesize",&value64,&length64,NULL,0);
printf("#define L1_CODE_LINESIZE %lld \n",value64); printf("#define L1_CODE_LINESIZE %lld \n",value64);
sysctlbyname("hw.l1dcachesize",&value64,&length64,NULL,0); sysctlbyname("hw.l1dcachesize",&value64,&length64,NULL,0);
printf("#define L1_DATA_SIZE %lld \n",value64); printf("#define L1_DATA_SIZE %lld \n",value64);
sysctlbyname("hw.l2cachesize",&value64,&length64,NULL,0); sysctlbyname("hw.l2cachesize",&value64,&length64,NULL,0);
printf("#define L2_SIZE %lld \n",value64); printf("#define L2_SIZE %lld \n",value64);
printf("#define DTB_DEFAULT_ENTRIES 64 \n"); printf("#define DTB_DEFAULT_ENTRIES 64 \n");
printf("#define DTB_SIZE 4096 \n"); printf("#define DTB_SIZE 4096 \n");
break; break;
#endif #endif
case CPU_A64FX:
printf("#define A64FX\n");
printf("#define L1_CODE_SIZE 65535\n");
printf("#define L1_DATA_SIZE 65535\n");
printf("#define L1_DATA_LINESIZE 256\n");
printf("#define L2_SIZE 8388608\n");
printf("#define L2_LINESIZE 256\n");
printf("#define DTB_DEFAULT_ENTRIES 64\n");
printf("#define DTB_SIZE 4096\n");
break;
} }
get_cpucount(); get_cpucount();
} }

View File

@ -469,55 +469,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#endif #endif
#endif #endif
#ifdef FORCE_SAPPHIRERAPIDS
#define FORCE
#define FORCE_INTEL
#define ARCHITECTURE "X86"
#ifdef NO_AVX512
#ifdef NO_AVX2
#ifdef NO_AVX
#define SUBARCHITECTURE "NEHALEM"
#define ARCHCONFIG "-DNEHALEM " \
"-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 " \
"-DL2_SIZE=262144 -DL2_LINESIZE=64 " \
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \
"-DHAVE_CMOV -DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3 -DHAVE_SSSE3 -DHAVE_SSE4_1 -DHAVE_SSE4_2"
#define LIBNAME "nehalem"
#define CORENAME "NEHALEM"
#else
#define SUBARCHITECTURE "SANDYBRIDGE"
#define ARCHCONFIG "-DSANDYBRIDGE " \
"-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 " \
"-DL2_SIZE=262144 -DL2_LINESIZE=64 " \
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \
"-DHAVE_CMOV -DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3 -DHAVE_SSSE3 -DHAVE_SSE4_1 -DHAVE_SSE4_2 -DHAVE_AVX"
#define LIBNAME "sandybridge"
#define CORENAME "SANDYBRIDGE"
#endif
#else
#define SUBARCHITECTURE "HASWELL"
#define ARCHCONFIG "-DHASWELL " \
"-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 " \
"-DL2_SIZE=262144 -DL2_LINESIZE=64 " \
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \
"-DHAVE_CMOV -DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3 -DHAVE_SSSE3 -DHAVE_SSE4_1 -DHAVE_SSE4_2 -DHAVE_AVX " \
"-DHAVE_AVX2 -DHAVE_FMA3 -DFMA3"
#define LIBNAME "haswell"
#define CORENAME "HASWELL"
#endif
#else
#define SUBARCHITECTURE "SAPPHIRERAPIDS"
#define ARCHCONFIG "-DSAPPHIRERAPIDS " \
"-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 " \
"-DL2_SIZE=262144 -DL2_LINESIZE=64 " \
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \
"-DHAVE_CMOV -DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3 -DHAVE_SSSE3 -DHAVE_SSE4_1 -DHAVE_SSE4_2 -DHAVE_AVX " \
"-DHAVE_AVX2 -DHAVE_FMA3 -DFMA3 -DHAVE_AVX512VL -DHAVE_AVX512BF16 -march=sapphirerapids"
#define LIBNAME "sapphirerapids"
#define CORENAME "SAPPHIRERAPIDS"
#endif
#endif
#ifdef FORCE_ATOM #ifdef FORCE_ATOM
#define FORCE #define FORCE
#define FORCE_INTEL #define FORCE_INTEL
@ -1424,6 +1375,24 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define CORENAME "VORTEX" #define CORENAME "VORTEX"
#endif #endif
#ifdef FORCE_A64FX
#define ARMV8
#define FORCE
#define ARCHITECTURE "ARM64"
#define SUBARCHITECTURE "A64FX"
#define SUBDIRNAME "arm64"
#define ARCHCONFIG "-DA64FX " \
"-DL1_CODE_SIZE=65536 -DL1_CODE_LINESIZE=256 -DL1_CODE_ASSOCIATIVE=8 " \
"-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=256 -DL1_DATA_ASSOCIATIVE=8 " \
"-DL2_SIZE=8388608 -DL2_LINESIZE=256 -DL2_ASSOCIATIVE=8 " \
"-DL3_SIZE=0 -DL3_LINESIZE=0 -DL3_ASSOCIATIVE=0 " \
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \
"-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8"
#define LIBNAME "a64fx"
#define CORENAME "A64FX"
#else
#endif
#ifdef FORCE_ZARCH_GENERIC #ifdef FORCE_ZARCH_GENERIC
#define FORCE #define FORCE
#define ARCHITECTURE "ZARCH" #define ARCHITECTURE "ZARCH"