diff --git a/Makefile.arm64 b/Makefile.arm64 index 3e3466de8..e9ae23366 100644 --- a/Makefile.arm64 +++ b/Makefile.arm64 @@ -153,6 +153,15 @@ endif endif endif +ifeq (1, $(filter 1,$(GCCVERSIONGTEQ11) $(ISCLANG))) +ifeq ($(CORE), A64FX) +CCOMMON_OPT += -march=armv8.2-a -mtune=a64fx +ifneq ($(F_COMPILER), NAG) +FCOMMON_OPT += -march=armv8.2-a -mtune=a64fx +endif +endif +endif + endif endif \ No newline at end of file diff --git a/cpuid_arm64.c b/cpuid_arm64.c index 73a82d188..fddb9e6bf 100644 --- a/cpuid_arm64.c +++ b/cpuid_arm64.c @@ -55,6 +55,8 @@ size_t length64=sizeof(value64); #define CPU_EMAG8180 10 // Apple #define CPU_VORTEX 13 +// Fujitsu +#define CPU_A64FX 14 static char *cpuname[] = { "UNKNOWN", @@ -71,7 +73,8 @@ static char *cpuname[] = { "NEOVERSEN1", "THUNDERX3T110", "VORTEX", - "CORTEXA55" + "CORTEXA55", + "A64FX" }; static char *cpuname_lower[] = { @@ -89,7 +92,8 @@ static char *cpuname_lower[] = { "neoversen1", "thunderx3t110", "vortex", - "cortexa55" + "cortexa55", + "a64fx" }; int get_feature(char *search) @@ -185,6 +189,9 @@ int detect(void) // Ampere else if (strstr(cpu_implementer, "0x50") && strstr(cpu_part, "0x000")) return CPU_EMAG8180; + // Fujitsu + else if (strstr(cpu_implementer, "0x46") && strstr(cpu_part, "0x001")) + return CPU_A64FX; } p = (char *) NULL ; @@ -287,156 +294,166 @@ void get_cpuconfig(void) switch (d) { - case CPU_CORTEXA53: - case CPU_CORTEXA55: - printf("#define %s\n", cpuname[d]); - // Fall-through - case CPU_ARMV8: - // Minimum parameters for ARMv8 (based on A53) - printf("#define L1_DATA_SIZE 32768\n"); - printf("#define L1_DATA_LINESIZE 64\n"); - printf("#define L2_SIZE 262144\n"); - printf("#define L2_LINESIZE 64\n"); - printf("#define DTB_DEFAULT_ENTRIES 64\n"); - printf("#define DTB_SIZE 4096\n"); - printf("#define L2_ASSOCIATIVE 4\n"); + case CPU_CORTEXA53: + case CPU_CORTEXA55: + printf("#define %s\n", cpuname[d]); + // Fall-through + case CPU_ARMV8: + // Minimum parameters for ARMv8 (based on A53) + printf("#define L1_DATA_SIZE 32768\n"); + printf("#define L1_DATA_LINESIZE 64\n"); + printf("#define L2_SIZE 262144\n"); + printf("#define L2_LINESIZE 64\n"); + printf("#define DTB_DEFAULT_ENTRIES 64\n"); + printf("#define DTB_SIZE 4096\n"); + printf("#define L2_ASSOCIATIVE 4\n"); break; - case CPU_CORTEXA57: - case CPU_CORTEXA72: - case CPU_CORTEXA73: + case CPU_CORTEXA57: + case CPU_CORTEXA72: + case CPU_CORTEXA73: // Common minimum settings for these Arm cores // Can change a lot, but we need to be conservative // TODO: detect info from /sys if possible - printf("#define %s\n", cpuname[d]); - printf("#define L1_CODE_SIZE 49152\n"); - printf("#define L1_CODE_LINESIZE 64\n"); - printf("#define L1_CODE_ASSOCIATIVE 3\n"); - printf("#define L1_DATA_SIZE 32768\n"); - printf("#define L1_DATA_LINESIZE 64\n"); - printf("#define L1_DATA_ASSOCIATIVE 2\n"); - printf("#define L2_SIZE 524288\n"); - printf("#define L2_LINESIZE 64\n"); - printf("#define L2_ASSOCIATIVE 16\n"); - printf("#define DTB_DEFAULT_ENTRIES 64\n"); - printf("#define DTB_SIZE 4096\n"); - break; - case CPU_NEOVERSEN1: - printf("#define %s\n", cpuname[d]); - printf("#define L1_CODE_SIZE 65536\n"); - printf("#define L1_CODE_LINESIZE 64\n"); - printf("#define L1_CODE_ASSOCIATIVE 4\n"); - printf("#define L1_DATA_SIZE 65536\n"); - printf("#define L1_DATA_LINESIZE 64\n"); - printf("#define L1_DATA_ASSOCIATIVE 4\n"); - printf("#define L2_SIZE 1048576\n"); - printf("#define L2_LINESIZE 64\n"); - printf("#define L2_ASSOCIATIVE 16\n"); - printf("#define DTB_DEFAULT_ENTRIES 64\n"); - printf("#define DTB_SIZE 4096\n"); - break; + printf("#define %s\n", cpuname[d]); + printf("#define L1_CODE_SIZE 49152\n"); + printf("#define L1_CODE_LINESIZE 64\n"); + printf("#define L1_CODE_ASSOCIATIVE 3\n"); + printf("#define L1_DATA_SIZE 32768\n"); + printf("#define L1_DATA_LINESIZE 64\n"); + printf("#define L1_DATA_ASSOCIATIVE 2\n"); + printf("#define L2_SIZE 524288\n"); + printf("#define L2_LINESIZE 64\n"); + printf("#define L2_ASSOCIATIVE 16\n"); + printf("#define DTB_DEFAULT_ENTRIES 64\n"); + printf("#define DTB_SIZE 4096\n"); + break; + case CPU_NEOVERSEN1: + printf("#define %s\n", cpuname[d]); + printf("#define L1_CODE_SIZE 65536\n"); + printf("#define L1_CODE_LINESIZE 64\n"); + printf("#define L1_CODE_ASSOCIATIVE 4\n"); + printf("#define L1_DATA_SIZE 65536\n"); + printf("#define L1_DATA_LINESIZE 64\n"); + printf("#define L1_DATA_ASSOCIATIVE 4\n"); + printf("#define L2_SIZE 1048576\n"); + printf("#define L2_LINESIZE 64\n"); + printf("#define L2_ASSOCIATIVE 16\n"); + printf("#define DTB_DEFAULT_ENTRIES 64\n"); + printf("#define DTB_SIZE 4096\n"); + break; - case CPU_FALKOR: - printf("#define FALKOR\n"); - printf("#define L1_CODE_SIZE 65536\n"); - printf("#define L1_CODE_LINESIZE 64\n"); - printf("#define L1_DATA_SIZE 32768\n"); - printf("#define L1_DATA_LINESIZE 128\n"); - printf("#define L2_SIZE 524288\n"); - printf("#define L2_LINESIZE 64\n"); - printf("#define DTB_DEFAULT_ENTRIES 64\n"); - printf("#define DTB_SIZE 4096\n"); - printf("#define L2_ASSOCIATIVE 16\n"); - break; + case CPU_FALKOR: + printf("#define FALKOR\n"); + printf("#define L1_CODE_SIZE 65536\n"); + printf("#define L1_CODE_LINESIZE 64\n"); + printf("#define L1_DATA_SIZE 32768\n"); + printf("#define L1_DATA_LINESIZE 128\n"); + printf("#define L2_SIZE 524288\n"); + printf("#define L2_LINESIZE 64\n"); + printf("#define DTB_DEFAULT_ENTRIES 64\n"); + printf("#define DTB_SIZE 4096\n"); + printf("#define L2_ASSOCIATIVE 16\n"); + break; - case CPU_THUNDERX: - printf("#define THUNDERX\n"); - printf("#define L1_DATA_SIZE 32768\n"); - printf("#define L1_DATA_LINESIZE 128\n"); - printf("#define L2_SIZE 16777216\n"); - printf("#define L2_LINESIZE 128\n"); - printf("#define DTB_DEFAULT_ENTRIES 64\n"); - printf("#define DTB_SIZE 4096\n"); - printf("#define L2_ASSOCIATIVE 16\n"); - break; + case CPU_THUNDERX: + printf("#define THUNDERX\n"); + printf("#define L1_DATA_SIZE 32768\n"); + printf("#define L1_DATA_LINESIZE 128\n"); + printf("#define L2_SIZE 16777216\n"); + printf("#define L2_LINESIZE 128\n"); + printf("#define DTB_DEFAULT_ENTRIES 64\n"); + printf("#define DTB_SIZE 4096\n"); + printf("#define L2_ASSOCIATIVE 16\n"); + break; - case CPU_THUNDERX2T99: - printf("#define THUNDERX2T99 \n"); - printf("#define L1_CODE_SIZE 32768 \n"); - printf("#define L1_CODE_LINESIZE 64 \n"); - printf("#define L1_CODE_ASSOCIATIVE 8 \n"); - printf("#define L1_DATA_SIZE 32768 \n"); - printf("#define L1_DATA_LINESIZE 64 \n"); - printf("#define L1_DATA_ASSOCIATIVE 8 \n"); - printf("#define L2_SIZE 262144 \n"); - printf("#define L2_LINESIZE 64 \n"); - printf("#define L2_ASSOCIATIVE 8 \n"); - printf("#define L3_SIZE 33554432 \n"); - printf("#define L3_LINESIZE 64 \n"); - printf("#define L3_ASSOCIATIVE 32 \n"); - printf("#define DTB_DEFAULT_ENTRIES 64 \n"); - printf("#define DTB_SIZE 4096 \n"); - break; + case CPU_THUNDERX2T99: + printf("#define THUNDERX2T99 \n"); + printf("#define L1_CODE_SIZE 32768 \n"); + printf("#define L1_CODE_LINESIZE 64 \n"); + printf("#define L1_CODE_ASSOCIATIVE 8 \n"); + printf("#define L1_DATA_SIZE 32768 \n"); + printf("#define L1_DATA_LINESIZE 64 \n"); + printf("#define L1_DATA_ASSOCIATIVE 8 \n"); + printf("#define L2_SIZE 262144 \n"); + printf("#define L2_LINESIZE 64 \n"); + printf("#define L2_ASSOCIATIVE 8 \n"); + printf("#define L3_SIZE 33554432 \n"); + printf("#define L3_LINESIZE 64 \n"); + printf("#define L3_ASSOCIATIVE 32 \n"); + printf("#define DTB_DEFAULT_ENTRIES 64 \n"); + printf("#define DTB_SIZE 4096 \n"); + break; - case CPU_TSV110: - printf("#define TSV110 \n"); - printf("#define L1_CODE_SIZE 65536 \n"); - printf("#define L1_CODE_LINESIZE 64 \n"); - printf("#define L1_CODE_ASSOCIATIVE 4 \n"); - printf("#define L1_DATA_SIZE 65536 \n"); - printf("#define L1_DATA_LINESIZE 64 \n"); - printf("#define L1_DATA_ASSOCIATIVE 4 \n"); - printf("#define L2_SIZE 524228 \n"); - printf("#define L2_LINESIZE 64 \n"); - printf("#define L2_ASSOCIATIVE 8 \n"); - printf("#define DTB_DEFAULT_ENTRIES 64 \n"); - printf("#define DTB_SIZE 4096 \n"); - break; + case CPU_TSV110: + printf("#define TSV110 \n"); + printf("#define L1_CODE_SIZE 65536 \n"); + printf("#define L1_CODE_LINESIZE 64 \n"); + printf("#define L1_CODE_ASSOCIATIVE 4 \n"); + printf("#define L1_DATA_SIZE 65536 \n"); + printf("#define L1_DATA_LINESIZE 64 \n"); + printf("#define L1_DATA_ASSOCIATIVE 4 \n"); + printf("#define L2_SIZE 524228 \n"); + printf("#define L2_LINESIZE 64 \n"); + printf("#define L2_ASSOCIATIVE 8 \n"); + printf("#define DTB_DEFAULT_ENTRIES 64 \n"); + printf("#define DTB_SIZE 4096 \n"); + break; - case CPU_EMAG8180: - // Minimum parameters for ARMv8 (based on A53) - printf("#define EMAG8180\n"); - printf("#define L1_CODE_SIZE 32768\n"); - printf("#define L1_DATA_SIZE 32768\n"); - printf("#define L1_DATA_LINESIZE 64\n"); - printf("#define L2_SIZE 262144\n"); - printf("#define L2_LINESIZE 64\n"); - printf("#define DTB_DEFAULT_ENTRIES 64\n"); - printf("#define DTB_SIZE 4096\n"); - break; + case CPU_EMAG8180: + // Minimum parameters for ARMv8 (based on A53) + printf("#define EMAG8180\n"); + printf("#define L1_CODE_SIZE 32768\n"); + printf("#define L1_DATA_SIZE 32768\n"); + printf("#define L1_DATA_LINESIZE 64\n"); + printf("#define L2_SIZE 262144\n"); + printf("#define L2_LINESIZE 64\n"); + printf("#define DTB_DEFAULT_ENTRIES 64\n"); + printf("#define DTB_SIZE 4096\n"); + break; - case CPU_THUNDERX3T110: - printf("#define THUNDERX3T110 \n"); - printf("#define L1_CODE_SIZE 65536 \n"); - printf("#define L1_CODE_LINESIZE 64 \n"); - printf("#define L1_CODE_ASSOCIATIVE 8 \n"); - printf("#define L1_DATA_SIZE 32768 \n"); - printf("#define L1_DATA_LINESIZE 64 \n"); - printf("#define L1_DATA_ASSOCIATIVE 8 \n"); - printf("#define L2_SIZE 524288 \n"); - printf("#define L2_LINESIZE 64 \n"); - printf("#define L2_ASSOCIATIVE 8 \n"); - printf("#define L3_SIZE 94371840 \n"); - printf("#define L3_LINESIZE 64 \n"); - printf("#define L3_ASSOCIATIVE 32 \n"); - printf("#define DTB_DEFAULT_ENTRIES 64 \n"); - printf("#define DTB_SIZE 4096 \n"); - break; + case CPU_THUNDERX3T110: + printf("#define THUNDERX3T110 \n"); + printf("#define L1_CODE_SIZE 65536 \n"); + printf("#define L1_CODE_LINESIZE 64 \n"); + printf("#define L1_CODE_ASSOCIATIVE 8 \n"); + printf("#define L1_DATA_SIZE 32768 \n"); + printf("#define L1_DATA_LINESIZE 64 \n"); + printf("#define L1_DATA_ASSOCIATIVE 8 \n"); + printf("#define L2_SIZE 524288 \n"); + printf("#define L2_LINESIZE 64 \n"); + printf("#define L2_ASSOCIATIVE 8 \n"); + printf("#define L3_SIZE 94371840 \n"); + printf("#define L3_LINESIZE 64 \n"); + printf("#define L3_ASSOCIATIVE 32 \n"); + printf("#define DTB_DEFAULT_ENTRIES 64 \n"); + printf("#define DTB_SIZE 4096 \n"); + break; #ifdef __APPLE__ - case CPU_VORTEX: - printf("#define VORTEX \n"); - sysctlbyname("hw.l1icachesize",&value64,&length64,NULL,0); - printf("#define L1_CODE_SIZE %lld \n",value64); - sysctlbyname("hw.cachelinesize",&value64,&length64,NULL,0); - printf("#define L1_CODE_LINESIZE %lld \n",value64); - sysctlbyname("hw.l1dcachesize",&value64,&length64,NULL,0); - printf("#define L1_DATA_SIZE %lld \n",value64); - sysctlbyname("hw.l2cachesize",&value64,&length64,NULL,0); - printf("#define L2_SIZE %lld \n",value64); - printf("#define DTB_DEFAULT_ENTRIES 64 \n"); - printf("#define DTB_SIZE 4096 \n"); - break; + case CPU_VORTEX: + printf("#define VORTEX \n"); + sysctlbyname("hw.l1icachesize",&value64,&length64,NULL,0); + printf("#define L1_CODE_SIZE %lld \n",value64); + sysctlbyname("hw.cachelinesize",&value64,&length64,NULL,0); + printf("#define L1_CODE_LINESIZE %lld \n",value64); + sysctlbyname("hw.l1dcachesize",&value64,&length64,NULL,0); + printf("#define L1_DATA_SIZE %lld \n",value64); + sysctlbyname("hw.l2cachesize",&value64,&length64,NULL,0); + printf("#define L2_SIZE %lld \n",value64); + printf("#define DTB_DEFAULT_ENTRIES 64 \n"); + printf("#define DTB_SIZE 4096 \n"); + break; #endif + case CPU_A64FX: + printf("#define A64FX\n"); + printf("#define L1_CODE_SIZE 65535\n"); + printf("#define L1_DATA_SIZE 65535\n"); + printf("#define L1_DATA_LINESIZE 256\n"); + printf("#define L2_SIZE 8388608\n"); + printf("#define L2_LINESIZE 256\n"); + printf("#define DTB_DEFAULT_ENTRIES 64\n"); + printf("#define DTB_SIZE 4096\n"); + break; } get_cpucount(); } diff --git a/getarch.c b/getarch.c index d095472a6..cea49c2c1 100644 --- a/getarch.c +++ b/getarch.c @@ -469,55 +469,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #endif #endif -#ifdef FORCE_SAPPHIRERAPIDS -#define FORCE -#define FORCE_INTEL -#define ARCHITECTURE "X86" -#ifdef NO_AVX512 -#ifdef NO_AVX2 -#ifdef NO_AVX -#define SUBARCHITECTURE "NEHALEM" -#define ARCHCONFIG "-DNEHALEM " \ - "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 " \ - "-DL2_SIZE=262144 -DL2_LINESIZE=64 " \ - "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \ - "-DHAVE_CMOV -DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3 -DHAVE_SSSE3 -DHAVE_SSE4_1 -DHAVE_SSE4_2" -#define LIBNAME "nehalem" -#define CORENAME "NEHALEM" -#else -#define SUBARCHITECTURE "SANDYBRIDGE" -#define ARCHCONFIG "-DSANDYBRIDGE " \ - "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 " \ - "-DL2_SIZE=262144 -DL2_LINESIZE=64 " \ - "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \ - "-DHAVE_CMOV -DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3 -DHAVE_SSSE3 -DHAVE_SSE4_1 -DHAVE_SSE4_2 -DHAVE_AVX" -#define LIBNAME "sandybridge" -#define CORENAME "SANDYBRIDGE" -#endif -#else -#define SUBARCHITECTURE "HASWELL" -#define ARCHCONFIG "-DHASWELL " \ - "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 " \ - "-DL2_SIZE=262144 -DL2_LINESIZE=64 " \ - "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \ - "-DHAVE_CMOV -DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3 -DHAVE_SSSE3 -DHAVE_SSE4_1 -DHAVE_SSE4_2 -DHAVE_AVX " \ - "-DHAVE_AVX2 -DHAVE_FMA3 -DFMA3" -#define LIBNAME "haswell" -#define CORENAME "HASWELL" -#endif -#else -#define SUBARCHITECTURE "SAPPHIRERAPIDS" -#define ARCHCONFIG "-DSAPPHIRERAPIDS " \ - "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 " \ - "-DL2_SIZE=262144 -DL2_LINESIZE=64 " \ - "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \ - "-DHAVE_CMOV -DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3 -DHAVE_SSSE3 -DHAVE_SSE4_1 -DHAVE_SSE4_2 -DHAVE_AVX " \ - "-DHAVE_AVX2 -DHAVE_FMA3 -DFMA3 -DHAVE_AVX512VL -DHAVE_AVX512BF16 -march=sapphirerapids" -#define LIBNAME "sapphirerapids" -#define CORENAME "SAPPHIRERAPIDS" -#endif -#endif - #ifdef FORCE_ATOM #define FORCE #define FORCE_INTEL @@ -1424,6 +1375,24 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define CORENAME "VORTEX" #endif +#ifdef FORCE_A64FX +#define ARMV8 +#define FORCE +#define ARCHITECTURE "ARM64" +#define SUBARCHITECTURE "A64FX" +#define SUBDIRNAME "arm64" +#define ARCHCONFIG "-DA64FX " \ + "-DL1_CODE_SIZE=65536 -DL1_CODE_LINESIZE=256 -DL1_CODE_ASSOCIATIVE=8 " \ + "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=256 -DL1_DATA_ASSOCIATIVE=8 " \ + "-DL2_SIZE=8388608 -DL2_LINESIZE=256 -DL2_ASSOCIATIVE=8 " \ + "-DL3_SIZE=0 -DL3_LINESIZE=0 -DL3_ASSOCIATIVE=0 " \ + "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \ + "-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8" +#define LIBNAME "a64fx" +#define CORENAME "A64FX" +#else +#endif + #ifdef FORCE_ZARCH_GENERIC #define FORCE #define ARCHITECTURE "ZARCH"