Add A64FX to the list of CPUs supported by DYNAMIC_ARCH
This commit is contained in:
parent
a815594fd1
commit
821ef34635
|
@ -689,6 +689,7 @@ ifneq ($(NO_SVE), 1)
|
|||
DYNAMIC_CORE += NEOVERSEV1
|
||||
DYNAMIC_CORE += NEOVERSEN2
|
||||
DYNAMIC_CORE += ARMV8SVE
|
||||
DYNAMIC_CORE += A64FX
|
||||
endif
|
||||
DYNAMIC_CORE += THUNDERX
|
||||
DYNAMIC_CORE += THUNDERX2T99
|
||||
|
|
|
@ -46,7 +46,7 @@ if (DYNAMIC_ARCH)
|
|||
if (ARM64)
|
||||
set(DYNAMIC_CORE ARMV8 CORTEXA53 CORTEXA57 THUNDERX THUNDERX2T99 TSV110 EMAG8180 NEOVERSEN1 THUNDERX3T110)
|
||||
if (${CMAKE_C_COMPILER_VERSION} VERSION_GREATER 9.99)
|
||||
set(DYNAMIC_CORE ${DYNAMIC_CORE} NEOVERSEV1 NEOVERSEN2 ARMV8SVE)
|
||||
set(DYNAMIC_CORE ${DYNAMIC_CORE} NEOVERSEV1 NEOVERSEN2 ARMV8SVE A64FX)
|
||||
endif ()
|
||||
if (DYNAMIC_LIST)
|
||||
set(DYNAMIC_CORE ARMV8 ${DYNAMIC_LIST})
|
||||
|
|
|
@ -1218,6 +1218,37 @@ endif ()
|
|||
set(ZGEMM_UNROLL_M 4)
|
||||
set(ZGEMM_UNROLL_N 4)
|
||||
set(SYMV_P 16)
|
||||
elseif ("${TCORE}" STREQUAL "A64FX")
|
||||
file(APPEND ${TARGET_CONF_TEMP}
|
||||
"#define L1_CODE_SIZE\t65536\n"
|
||||
"#define L1_CODE_LINESIZE\t256\n"
|
||||
"#define L1_CODE_ASSOCIATIVE\t8\n"
|
||||
"#define L1_DATA_SIZE\t32768\n"
|
||||
"#define L1_DATA_LINESIZE\t256\n"
|
||||
"#define L1_DATA_ASSOCIATIVE\t8\n"
|
||||
"#define L2_SIZE\t8388608\n\n"
|
||||
"#define L2_LINESIZE\t256\n"
|
||||
"#define L2_ASSOCIATIVE\t8\n"
|
||||
"#define L3_SIZE\t0\n\n"
|
||||
"#define L3_LINESIZE\t0\n\n"
|
||||
"#define L3_ASSOCIATIVE\t0\n\n"
|
||||
"#define DTB_DEFAULT_ENTRIES\t64\n"
|
||||
"#define DTB_SIZE\t4096\n"
|
||||
"#define HAVE_VFPV4\n"
|
||||
"#define HAVE_VFPV3\n"
|
||||
"#define HAVE_VFP\n"
|
||||
"#define HAVE_NEON\n"
|
||||
"#define HAVE_SVE\n"
|
||||
"#define ARMV8\n")
|
||||
set(SGEMM_UNROLL_M 4)
|
||||
set(SGEMM_UNROLL_N 8)
|
||||
set(DGEMM_UNROLL_M 2)
|
||||
set(DGEMM_UNROLL_N 8)
|
||||
set(CGEMM_UNROLL_M 2)
|
||||
set(CGEMM_UNROLL_N 4)
|
||||
set(ZGEMM_UNROLL_M 2)
|
||||
set(ZGEMM_UNROLL_N 4)
|
||||
set(SYMV_P 16)
|
||||
elseif ("${TCORE}" STREQUAL "P5600")
|
||||
file(APPEND ${TARGET_CONF_TEMP}
|
||||
"#define L2_SIZE 1048576\n"
|
||||
|
|
|
@ -310,6 +310,18 @@ if (${TARGET} STREQUAL NEOVERSEV1)
|
|||
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=armv8.2-a+sve")
|
||||
endif()
|
||||
endif()
|
||||
if (${TARGET} STREQUAL A64FX)
|
||||
if (${CMAKE_C_COMPILER_ID} STREQUAL "PGI" AND NOT NO_SVE)
|
||||
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -Msve-intrinsics -march=armv8.2-a+sve -mtune=a64fx")
|
||||
else ()
|
||||
execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION)
|
||||
if (${GCC_VERSION} VERSION_GREATER 10.4 OR ${GCC_VERSION} VERSION_EQUAL 10.4)
|
||||
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=armv8.2-a+sve -mtune=a64fx")
|
||||
else ()
|
||||
message(FATAL_ERROR "Compiler $${CMAKE_C_COMPILER} {GCC_VERSION} does not support A64FX.")
|
||||
endif()
|
||||
endif()
|
||||
endif()
|
||||
|
||||
endif()
|
||||
|
||||
|
|
|
@ -120,6 +120,11 @@ extern gotoblas_t gotoblas_CORTEXA55;
|
|||
#else
|
||||
#define gotoblas_CORTEXA55 gotoblas_ARMV8
|
||||
#endif
|
||||
#ifdef DYN_A64FX
|
||||
extern gotoblas_t gotoblas_A64FX;
|
||||
#else
|
||||
#define gotoblas_A64FX gotoblas_ARMV8
|
||||
#endif
|
||||
#else
|
||||
extern gotoblas_t gotoblas_CORTEXA53;
|
||||
#define gotoblas_CORTEXA55 gotoblas_CORTEXA53
|
||||
|
@ -136,10 +141,12 @@ extern gotoblas_t gotoblas_NEOVERSEN1;
|
|||
extern gotoblas_t gotoblas_NEOVERSEV1;
|
||||
extern gotoblas_t gotoblas_NEOVERSEN2;
|
||||
extern gotoblas_t gotoblas_ARMV8SVE;
|
||||
extern gotoblas_t gotoblas_A64FX;
|
||||
#else
|
||||
#define gotoblas_NEOVERSEV1 gotoblas_ARMV8
|
||||
#define gotoblas_NEOVERSEN2 gotoblas_ARMV8
|
||||
#define gotoblas_ARMV8SVE gotoblas_ARMV8
|
||||
#define gotoblas_A64FX gotoblas_ARMV8
|
||||
#endif
|
||||
extern gotoblas_t gotoblas_THUNDERX3T110;
|
||||
#endif
|
||||
|
@ -149,7 +156,7 @@ extern void openblas_warning(int verbose, const char * msg);
|
|||
#define FALLBACK_VERBOSE 1
|
||||
#define NEOVERSEN1_FALLBACK "OpenBLAS : Your OS does not support SVE instructions. OpenBLAS is using Neoverse N1 kernels as a fallback, which may give poorer performance.\n"
|
||||
|
||||
#define NUM_CORETYPES 17
|
||||
#define NUM_CORETYPES 18
|
||||
|
||||
/*
|
||||
* In case asm/hwcap.h is outdated on the build system, make sure
|
||||
|
@ -184,6 +191,7 @@ static char *corename[] = {
|
|||
"thunderx3t110",
|
||||
"cortexa55",
|
||||
"armv8sve",
|
||||
"a64fx",
|
||||
"unknown"
|
||||
};
|
||||
|
||||
|
@ -205,6 +213,7 @@ char *gotoblas_corename(void) {
|
|||
if (gotoblas == &gotoblas_THUNDERX3T110) return corename[14];
|
||||
if (gotoblas == &gotoblas_CORTEXA55) return corename[15];
|
||||
if (gotoblas == &gotoblas_ARMV8SVE) return corename[16];
|
||||
if (gotoblas == &gotoblas_A64FX) return corename[17];
|
||||
return corename[NUM_CORETYPES];
|
||||
}
|
||||
|
||||
|
@ -241,6 +250,7 @@ static gotoblas_t *force_coretype(char *coretype) {
|
|||
case 14: return (&gotoblas_THUNDERX3T110);
|
||||
case 15: return (&gotoblas_CORTEXA55);
|
||||
case 16: return (&gotoblas_ARMV8SVE);
|
||||
case 17: return (&gotoblas_A64FX);
|
||||
}
|
||||
snprintf(message, 128, "Core not found: %s\n", coretype);
|
||||
openblas_warning(1, message);
|
||||
|
@ -346,6 +356,15 @@ static gotoblas_t *get_coretype(void) {
|
|||
return &gotoblas_THUNDERX3T110;
|
||||
}
|
||||
break;
|
||||
case 0x46: // Fujitsu
|
||||
switch (part)
|
||||
{
|
||||
#ifndef NO_SVE
|
||||
case 0x001: // A64FX
|
||||
return &gotoblas_A64FX;
|
||||
#endif
|
||||
}
|
||||
break;
|
||||
case 0x48: // HiSilicon
|
||||
switch (part)
|
||||
{
|
||||
|
|
Loading…
Reference in New Issue