Update DYNAMIC_ARCH support for ARM64 and PPC (#2332)

* Update DYNAMIC_ARCH list of ARM64 targets for gmake
* Update arm64 cpu list for runtime detection
* Update DYNAMIC_ARCH list of ARM64 targets for cmake and add POWERPC targets
This commit is contained in:
Martin Kroeker 2019-12-04 11:06:03 +01:00 committed by GitHub
parent 9d5079008f
commit a4896b5538
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 138 additions and 10 deletions

View File

@ -39,7 +39,10 @@ CCOMMON_OPT += -march=armv8.1-a -mtune=thunderx2t99
FCOMMON_OPT += -march=armv8.1-a -mtune=thunderx2t99 FCOMMON_OPT += -march=armv8.1-a -mtune=thunderx2t99
endif endif
ifeq ($(GCCVERSIONGTEQ9), 1)
ifeq ($(CORE), TSV110) ifeq ($(CORE), TSV110)
CCOMMON_OPT += -march=armv8.2-a -mtune=tsv110 CCOMMON_OPT += -march=armv8.2-a -mtune=tsv110
FCOMMON_OPT += -march=armv8.2-a -mtune=tsv110 FCOMMON_OPT += -march=armv8.2-a -mtune=tsv110
endif endif
endif

View File

@ -326,6 +326,7 @@ ifeq ($(C_COMPILER), GCC)
GCCVERSIONGTEQ4 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 4) GCCVERSIONGTEQ4 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 4)
GCCVERSIONGT4 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \> 4) GCCVERSIONGT4 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \> 4)
GCCVERSIONGT5 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \> 5) GCCVERSIONGT5 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \> 5)
GCCVERSIONGTEQ9 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 9)
GCCMINORVERSIONGTEQ7 := $(shell expr `$(CC) -dumpversion | cut -f2 -d.` \>= 7) GCCMINORVERSIONGTEQ7 := $(shell expr `$(CC) -dumpversion | cut -f2 -d.` \>= 7)
ifeq ($(GCCVERSIONGT4), 1) ifeq ($(GCCVERSIONGT4), 1)
# GCC Major version > 4 # GCC Major version > 4
@ -547,9 +548,14 @@ endif
ifeq ($(ARCH), arm64) ifeq ($(ARCH), arm64)
DYNAMIC_CORE = ARMV8 DYNAMIC_CORE = ARMV8
DYNAMIC_CORE += CORTEXA53
DYNAMIC_CORE += CORTEXA57 DYNAMIC_CORE += CORTEXA57
DYNAMIC_CORE += CORTEXA72
DYNAMIC_CORE += CORTEXA73
DYNAMIC_CORE += FALKOR
DYNAMIC_CORE += THUNDERX DYNAMIC_CORE += THUNDERX
DYNAMIC_CORE += THUNDERX2T99 DYNAMIC_CORE += THUNDERX2T99
DYNAMIC_CORE += TSV110
endif endif
ifeq ($(ARCH), power) ifeq ($(ARCH), power)

View File

@ -45,7 +45,11 @@ endif ()
if (DYNAMIC_ARCH) if (DYNAMIC_ARCH)
if (ARM64) if (ARM64)
set(DYNAMIC_CORE ARMV8 CORTEXA53 CORTEXA57 CORTEXA72 CORTEXA73 FALKOR THUNDERX THUNDERX2T99) set(DYNAMIC_CORE ARMV8 CORTEXA53 CORTEXA57 CORTEXA72 CORTEXA73 FALKOR THUNDERX THUNDERX2T99 TSV110)
endif ()
if (POWER)
set(DYNAMIC_CORE POWER6 POWER8 POWER9)
endif () endif ()
if (X86) if (X86)

View File

@ -309,6 +309,83 @@ if (DEFINED CORE AND CMAKE_CROSSCOMPILING AND NOT (${HOST_OS} STREQUAL "WINDOWSS
set(ZGEMM_UNROLL_M 4) set(ZGEMM_UNROLL_M 4)
set(ZGEMM_UNROLL_N 4) set(ZGEMM_UNROLL_N 4)
set(SYMV_P 16) set(SYMV_P 16)
elseif ("${TCORE}" STREQUAL "TSV110")
file(APPEND ${TARGET_CONF_TEMP}
"#define ARMV8\n"
"#define L1_CODE_SIZE\t65536\n"
"#define L1_CODE_LINESIZE\t64\n"
"#define L1_CODE_ASSOCIATIVE\t4\n"
"#define L1_DATA_SIZE\t65536\n"
"#define L1_DATA_LINESIZE\t64\n"
"#define L1_DATA_ASSOCIATIVE\t4\n"
"#define L2_SIZE\t524288\n"
"#define L2_LINESIZE\t64\n"
"#define L2_ASSOCIATIVE\t8\n"
"#define DTB_DEFAULT_ENTRIES\t64\n"
"#define DTB_SIZE\t4096\n")
set(SGEMM_UNROLL_M 16)
set(SGEMM_UNROLL_N 4)
set(DGEMM_UNROLL_M 8)
set(DGEMM_UNROLL_N 4)
set(CGEMM_UNROLL_M 8)
set(CGEMM_UNROLL_N 4)
set(ZGEMM_UNROLL_M 4)
set(ZGEMM_UNROLL_N 4)
set(SYMV_P 16)
elseif ("${TCORE}" STREQUAL "POWER6")
file(APPEND ${TARGET_CONF_TEMP}
"#define L1_DATA_SIZE 32768\n"
"#define L1_DATA_LINESIZE 128\n"
"#define L2_SIZE 524288\n"
"#define L2_LINESIZE 128 \n"
"#define DTB_DEFAULT_ENTRIES 128\n"
"#define DTB_SIZE 4096\n"
"#define L2_ASSOCIATIVE 8\n")
set(SGEMM_UNROLL_M 4)
set(SGEMM_UNROLL_N 4)
set(DGEMM_UNROLL_M 4)
set(DGEMM_UNROLL_N 4)
set(CGEMM_UNROLL_M 2)
set(CGEMM_UNROLL_N 4)
set(ZGEMM_UNROLL_M 2)
set(ZGEMM_UNROLL_N 4)
set(SYMV_P 8)
elseif ("${TCORE}" STREQUAL "POWER8")
file(APPEND ${TARGET_CONF_TEMP}
"#define L1_DATA_SIZE 32768\n"
"#define L1_DATA_LINESIZE 128\n"
"#define L2_SIZE 524288\n"
"#define L2_LINESIZE 128 \n"
"#define DTB_DEFAULT_ENTRIES 128\n"
"#define DTB_SIZE 4096\n"
"#define L2_ASSOCIATIVE 8\n")
set(SGEMM_UNROLL_M 16)
set(SGEMM_UNROLL_N 8)
set(DGEMM_UNROLL_M 16)
set(DGEMM_UNROLL_N 4)
set(CGEMM_UNROLL_M 8)
set(CGEMM_UNROLL_N 4)
set(ZGEMM_UNROLL_M 8)
set(ZGEMM_UNROLL_N 2)
set(SYMV_P 8)
elseif ("${TCORE}" STREQUAL "POWER9")
file(APPEND ${TARGET_CONF_TEMP}
"#define L1_DATA_SIZE 32768\n"
"#define L1_DATA_LINESIZE 128\n"
"#define L2_SIZE 524288\n"
"#define L2_LINESIZE 128 \n"
"#define DTB_DEFAULT_ENTRIES 128\n"
"#define DTB_SIZE 4096\n"
"#define L2_ASSOCIATIVE 8\n")
set(SGEMM_UNROLL_M 16)
set(SGEMM_UNROLL_N 8)
set(DGEMM_UNROLL_M 16)
set(DGEMM_UNROLL_N 4)
set(CGEMM_UNROLL_M 8)
set(CGEMM_UNROLL_N 4)
set(ZGEMM_UNROLL_M 8)
set(ZGEMM_UNROLL_N 2)
set(SYMV_P 8)
endif() endif()
# Or should this actually be NUM_CORES? # Or should this actually be NUM_CORES?

View File

@ -43,13 +43,18 @@
#endif #endif
extern gotoblas_t gotoblas_ARMV8; extern gotoblas_t gotoblas_ARMV8;
extern gotoblas_t gotoblas_CORTEXA53;
extern gotoblas_t gotoblas_CORTEXA57; extern gotoblas_t gotoblas_CORTEXA57;
extern gotoblas_t gotoblas_CORTEXA72;
extern gotoblas_t gotoblas_CORTEXA73;
extern gotoblas_t gotoblas_FALKOR;
extern gotoblas_t gotoblas_THUNDERX; extern gotoblas_t gotoblas_THUNDERX;
extern gotoblas_t gotoblas_THUNDERX2T99; extern gotoblas_t gotoblas_THUNDERX2T99;
extern gotoblas_t gotoblas_TSV110;
extern void openblas_warning(int verbose, const char * msg); extern void openblas_warning(int verbose, const char * msg);
#define NUM_CORETYPES 4 #define NUM_CORETYPES 9
/* /*
* In case asm/hwcap.h is outdated on the build system, make sure * In case asm/hwcap.h is outdated on the build system, make sure
@ -65,17 +70,27 @@ extern void openblas_warning(int verbose, const char * msg);
static char *corename[] = { static char *corename[] = {
"armv8", "armv8",
"cortexa53",
"cortexa57", "cortexa57",
"cortexa72",
"cortexa73",
"falkor",
"thunderx", "thunderx",
"thunderx2t99", "thunderx2t99",
"tsv110",
"unknown" "unknown"
}; };
char *gotoblas_corename(void) { char *gotoblas_corename(void) {
if (gotoblas == &gotoblas_ARMV8) return corename[ 0]; if (gotoblas == &gotoblas_ARMV8) return corename[ 0];
if (gotoblas == &gotoblas_CORTEXA57) return corename[ 1]; if (gotoblas == &gotoblas_CORTEXA53) return corename[ 1];
if (gotoblas == &gotoblas_THUNDERX) return corename[ 2]; if (gotoblas == &gotoblas_CORTEXA57) return corename[ 2];
if (gotoblas == &gotoblas_THUNDERX2T99) return corename[ 3]; if (gotoblas == &gotoblas_CORTEXA72) return corename[ 3];
if (gotoblas == &gotoblas_CORTEXA73) return corename[ 4];
if (gotoblas == &gotoblas_FALKOR) return corename[ 5];
if (gotoblas == &gotoblas_THUNDERX) return corename[ 6];
if (gotoblas == &gotoblas_THUNDERX2T99) return corename[ 7];
if (gotoblas == &gotoblas_TSV110) return corename[ 8];
return corename[NUM_CORETYPES]; return corename[NUM_CORETYPES];
} }
@ -96,9 +111,14 @@ static gotoblas_t *force_coretype(char *coretype) {
switch (found) switch (found)
{ {
case 0: return (&gotoblas_ARMV8); case 0: return (&gotoblas_ARMV8);
case 1: return (&gotoblas_CORTEXA57); case 1: return (&gotoblas_CORTEXA53);
case 2: return (&gotoblas_THUNDERX); case 2: return (&gotoblas_CORTEXA57);
case 3: return (&gotoblas_THUNDERX2T99); case 3: return (&gotoblas_CORTEXA72);
case 4: return (&gotoblas_CORTEXA73);
case 5: return (&gotoblas_FALKOR);
case 6: return (&gotoblas_THUNDERX);
case 7: return (&gotoblas_THUNDERX2T99);
case 8: return (&gotoblas_TSV110);
} }
snprintf(message, 128, "Core not found: %s\n", coretype); snprintf(message, 128, "Core not found: %s\n", coretype);
openblas_warning(1, message); openblas_warning(1, message);
@ -136,10 +156,14 @@ static gotoblas_t *get_coretype(void) {
case 0x41: // ARM case 0x41: // ARM
switch (part) switch (part)
{ {
case 0xd07: // Cortex A57
case 0xd08: // Cortex A72
case 0xd03: // Cortex A53 case 0xd03: // Cortex A53
return &gotoblas_CORTEXA53;
case 0xd07: // Cortex A57
return &gotoblas_CORTEXA57; return &gotoblas_CORTEXA57;
case 0xd08: // Cortex A72
return &gotoblas_CORTEXA72;
case 0xd09: // Cortex A73
return &gotoblas_CORTEXA73;
} }
break; break;
case 0x42: // Broadcom case 0x42: // Broadcom
@ -158,6 +182,20 @@ static gotoblas_t *get_coretype(void) {
return &gotoblas_THUNDERX2T99; return &gotoblas_THUNDERX2T99;
} }
break; break;
case 0x48: // HiSilicon
switch (part)
{
case 0xd01: // tsv110
return &gotoblas_TSV110;
}
break;
case 0x51: // Qualcomm
switch (part)
{
case 0xc00: // Falkor
return &gotoblas_FALKOR;
}
break;
} }
return NULL; return NULL;
} }