From 75b1f3becc236f269a332e6233f2eab35d46f683 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Sat, 19 Dec 2020 23:17:40 +0100 Subject: [PATCH 1/3] Limit POWERPC DYNAMIC_CORE list to P8 and P9 for NVIDIA compilers --- Makefile.system | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/Makefile.system b/Makefile.system index 45d02ba5c..ce3a819a8 100644 --- a/Makefile.system +++ b/Makefile.system @@ -663,6 +663,7 @@ endif endif # ARCH zarch ifeq ($(ARCH), power) +ifneq ($(C_COMPILER), PGI) DYNAMIC_CORE = POWER6 DYNAMIC_CORE += POWER8 ifneq ($(C_COMPILER), GCC) @@ -689,6 +690,10 @@ else $(info, OpenBLAS: Your gcc version is too old to build the POWER10 kernels.) endif endif +else +DYNAMIC_CORE = POWER8 +DYNAMIC_CORE += POWER9 +endif endif # If DYNAMIC_CORE is not set, DYNAMIC_ARCH cannot do anything, so force it to empty @@ -1039,12 +1044,18 @@ ifeq ($(ARCH), x86_64) FCOMMON_OPT += -tp p7-64 else ifeq ($(ARCH), power) +ifeq ($(CORE), POWER6) +$(warning NVIDIA HPC compilers do not support POWER6.) +endif ifeq ($(CORE), POWER8) FCOMMON_OPT += -tp pwr8 endif ifeq ($(CORE), POWER9) FCOMMON_OPT += -tp pwr9 endif +ifeq ($(CORE), POWER10) +$(warning NVIDIA HPC compilers do not support POWER10.) +endif endif endif else From 91c3f86c2bc47a8ebecbcea8af5cca6e38d5295b Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Sat, 19 Dec 2020 23:19:05 +0100 Subject: [PATCH 2/3] NVIDIA compiler does not yet support POWER10 --- Makefile.power | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Makefile.power b/Makefile.power index c7e972290..946f55232 100644 --- a/Makefile.power +++ b/Makefile.power @@ -10,9 +10,11 @@ USE_OPENMP = 1 endif ifeq ($(CORE), POWER10) +ifneq ($(C_COMPILER), PGI) CCOMMON_OPT += -Ofast -mcpu=power10 -mtune=power10 -mvsx -fno-fast-math FCOMMON_OPT += -O2 -frecursive -mcpu=power10 -mtune=power10 -fno-fast-math endif +endif ifeq ($(CORE), POWER9) ifneq ($(C_COMPILER), PGI) From 17c16f2a71cf957f4a4c74050da0825f6ebe203f Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Sat, 19 Dec 2020 23:21:22 +0100 Subject: [PATCH 3/3] Implement builtin_cpu_is and limit cpu choices to P8 and P9 for NVIDIA compilers --- driver/others/dynamic_power.c | 151 ++++++++++++++++++++++++++++++++++ 1 file changed, 151 insertions(+) diff --git a/driver/others/dynamic_power.c b/driver/others/dynamic_power.c index a2f56d839..f9feeb6e8 100644 --- a/driver/others/dynamic_power.c +++ b/driver/others/dynamic_power.c @@ -27,7 +27,9 @@ static char *corename[] = { #define NUM_CORETYPES 4 char *gotoblas_corename(void) { +#ifndef C_PGI if (gotoblas == &gotoblas_POWER6) return corename[1]; +#endif if (gotoblas == &gotoblas_POWER8) return corename[2]; #if (!defined __GNUC__) || ( __GNUC__ >= 6) if (gotoblas == &gotoblas_POWER9) return corename[3]; @@ -38,10 +40,157 @@ char *gotoblas_corename(void) { return corename[0]; } +#ifdef C_PGI +/* + * NV HPC compilers do not yet implement __builtin_cpu_is(). + * Fake a version here for use in the CPU detection code below. + * + * Strategy here is to first check the CPU to see what it actually is, + * and then test the input to see if what the CPU actually is matches + * what was requested. + */ + +#include + +/* + * Define POWER processor version table. + * + * NOTE NV HPC SDK compilers only support POWER8 and POWER9 at this time + */ + +#define CPU_UNKNOWN 0 +#define CPU_POWER5 5 +#define CPU_POWER6 6 +#define CPU_POWER8 8 +#define CPU_POWER9 9 +#define CPU_POWER10 10 + +static struct { + uint32_t pvr_mask; + uint32_t pvr_value; + const char* cpu_name; + uint32_t cpu_type; +} pvrPOWER [] = { + + { /* POWER6 in P5+ mode; 2.04-compliant processor */ + .pvr_mask = 0xffffffff, + .pvr_value = 0x0f000001, + .cpu_name = "POWER5+", + .cpu_type = CPU_POWER5, + }, + + { /* Power6 aka POWER6X*/ + .pvr_mask = 0xffff0000, + .pvr_value = 0x003e0000, + .cpu_name = "POWER6 (raw)", + .cpu_type = CPU_POWER6, + }, + + { /* Power7 */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x003f0000, + .cpu_name = "POWER7 (raw)", + .cpu_type = CPU_POWER6, + }, + + { /* Power7+ */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x004A0000, + .cpu_name = "POWER7+ (raw)", + .cpu_type = CPU_POWER6, + }, + + { /* Power8E */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x004b0000, + .cpu_name = "POWER8E (raw)", + .cpu_type = CPU_POWER8, + }, + + { /* Power8NVL */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x004c0000, + .cpu_name = "POWER8NVL (raw)", + .cpu_type = CPU_POWER8, + }, + + { /* Power8 */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x004d0000, + .cpu_name = "POWER8 (raw)", + .cpu_type = CPU_POWER8, + }, + + { /* Power9 DD2.0 */ + .pvr_mask = 0xffffefff, + .pvr_value = 0x004e0200, + .cpu_name = "POWER9 (raw)", + .cpu_type = CPU_POWER9, + }, + + { /* Power9 DD 2.1 */ + .pvr_mask = 0xffffefff, + .pvr_value = 0x004e0201, + .cpu_name = "POWER9 (raw)", + .cpu_type = CPU_POWER9, + }, + + { /* Power9 DD2.2 or later */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x004e0000, + .cpu_name = "POWER9 (raw)", + .cpu_type = CPU_POWER9, + }, + + { /* Power10 */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x00800000, + .cpu_name = "POWER10 (raw)", + .cpu_type = CPU_POWER10, + }, + + { /* End of table, pvr_mask and pvr_value must be zero */ + .pvr_mask = 0x0, + .pvr_value = 0x0, + .cpu_name = "Unknown", + .cpu_type = CPU_UNKNOWN, + }, +}; + +static int __builtin_cpu_is(const char *cpu) { + int i; + uint32_t pvr; + uint32_t cpu_type; + + asm("mfpvr %0" : "=r"(pvr)); + + for (i = 0 ; i < sizeof pvrPOWER / sizeof *pvrPOWER ; ++i) { + if ((pvr & pvrPOWER[i].pvr_mask) == pvrPOWER[i].pvr_value) { + break; + } + } + +#if defined(DEBUG) + printf("%s: returning CPU=%s, cpu_type=%p\n", __func__, + pvrPOWER[i].cpu_name, pvrPOWER[i].cpu_type); +#endif + cpu_type = pvrPOWER[i].cpu_type; + + if (!strcmp(cpu, "power8")) + return cpu_type == CPU_POWER8; + if (!strcmp(cpu, "power9")) + return cpu_type == CPU_POWER9; + return 0; +} + +#endif /* C_PGI */ + static gotoblas_t *get_coretype(void) { +#ifndef C_PGI if (__builtin_cpu_is("power6") || __builtin_cpu_is("power6x")) return &gotoblas_POWER6; +#endif if (__builtin_cpu_is("power8")) return &gotoblas_POWER8; #if (!defined __GNUC__) || ( __GNUC__ >= 6) @@ -77,7 +226,9 @@ static gotoblas_t *force_coretype(char * coretype) { switch (found) { +#ifndef C_PGI case 1: return (&gotoblas_POWER6); +#endif case 2: return (&gotoblas_POWER8); #if (!defined __GNUC__) || ( __GNUC__ >= 6) case 3: return (&gotoblas_POWER9);