diff --git a/driver/others/dynamic_power.c b/driver/others/dynamic_power.c index 40f00a634..0f5b06be5 100644 --- a/driver/others/dynamic_power.c +++ b/driver/others/dynamic_power.c @@ -13,27 +13,181 @@ extern gotoblas_t gotoblas_POWER10; extern void openblas_warning(int verbose, const char *msg); static char *corename[] = { - "unknown", - "POWER6", - "POWER8", - "POWER9", - "POWER10" + "unknown", + "POWER6", + "POWER8", + "POWER9", + "POWER10" }; #define NUM_CORETYPES 5 char *gotoblas_corename(void) { - if (gotoblas == &gotoblas_POWER6) return corename[1]; - if (gotoblas == &gotoblas_POWER8) return corename[2]; +#ifndef C_PGI + if (gotoblas == &gotoblas_POWER6) return corename[1]; +#endif + if (gotoblas == &gotoblas_POWER8) return corename[2]; #if (!defined __GNUC__) || ( __GNUC__ >= 6) - if (gotoblas == &gotoblas_POWER9) return corename[3]; + if (gotoblas == &gotoblas_POWER9) return corename[3]; #endif #ifdef HAVE_P10_SUPPORT - if (gotoblas == &gotoblas_POWER10) return corename[4]; + if (gotoblas == &gotoblas_POWER10) return corename[4]; #endif - return corename[0]; + return corename[0]; } +#if defined(__clang__) +static int __builtin_cpu_supports(char* arg) +{ + return 0; +} +#endif + +#if defined(C_PGI) || defined(__clang__) +/* + * NV HPC compilers do not yet implement __builtin_cpu_is(). + * Fake a version here for use in the CPU detection code below. + * + * Strategy here is to first check the CPU to see what it actually is, + * and then test the input to see if what the CPU actually is matches + * what was requested. + */ + +#include + +/* + * Define POWER processor version table. + * + * NOTE NV HPC SDK compilers only support POWER8 and POWER9 at this time + */ + +#define CPU_UNKNOWN 0 +#define CPU_POWER5 5 +#define CPU_POWER6 6 +#define CPU_POWER8 8 +#define CPU_POWER9 9 +#define CPU_POWER10 10 + +static struct { + uint32_t pvr_mask; + uint32_t pvr_value; + const char* cpu_name; + uint32_t cpu_type; +} pvrPOWER [] = { + + { /* POWER6 in P5+ mode; 2.04-compliant processor */ + .pvr_mask = 0xffffffff, + .pvr_value = 0x0f000001, + .cpu_name = "POWER5+", + .cpu_type = CPU_POWER5, + }, + + { /* Power6 aka POWER6X*/ + .pvr_mask = 0xffff0000, + .pvr_value = 0x003e0000, + .cpu_name = "POWER6 (raw)", + .cpu_type = CPU_POWER6, + }, + + { /* Power7 */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x003f0000, + .cpu_name = "POWER7 (raw)", + .cpu_type = CPU_POWER6, + }, + + { /* Power7+ */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x004A0000, + .cpu_name = "POWER7+ (raw)", + .cpu_type = CPU_POWER6, + }, + + { /* Power8E */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x004b0000, + .cpu_name = "POWER8E (raw)", + .cpu_type = CPU_POWER8, + }, + + { /* Power8NVL */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x004c0000, + .cpu_name = "POWER8NVL (raw)", + .cpu_type = CPU_POWER8, + }, + + { /* Power8 */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x004d0000, + .cpu_name = "POWER8 (raw)", + .cpu_type = CPU_POWER8, + }, + + { /* Power9 DD2.0 */ + .pvr_mask = 0xffffefff, + .pvr_value = 0x004e0200, + .cpu_name = "POWER9 (raw)", + .cpu_type = CPU_POWER9, + }, + + { /* Power9 DD 2.1 */ + .pvr_mask = 0xffffefff, + .pvr_value = 0x004e0201, + .cpu_name = "POWER9 (raw)", + .cpu_type = CPU_POWER9, + }, + + { /* Power9 DD2.2 or later */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x004e0000, + .cpu_name = "POWER9 (raw)", + .cpu_type = CPU_POWER9, + }, + + { /* Power10 */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x00800000, + .cpu_name = "POWER10 (raw)", + .cpu_type = CPU_POWER10, + }, + + { /* End of table, pvr_mask and pvr_value must be zero */ + .pvr_mask = 0x0, + .pvr_value = 0x0, + .cpu_name = "Unknown", + .cpu_type = CPU_UNKNOWN, + }, +}; + +static int __builtin_cpu_is(const char *cpu) { + int i; + uint32_t pvr; + uint32_t cpu_type; + + asm("mfpvr %0" : "=r"(pvr)); + + for (i = 0 ; i < sizeof pvrPOWER / sizeof *pvrPOWER ; ++i) { + if ((pvr & pvrPOWER[i].pvr_mask) == pvrPOWER[i].pvr_value) { + break; + } + } + +#if defined(DEBUG) + printf("%s: returning CPU=%s, cpu_type=%p\n", __func__, + pvrPOWER[i].cpu_name, pvrPOWER[i].cpu_type); +#endif + cpu_type = pvrPOWER[i].cpu_type; + + if (!strcmp(cpu, "power8")) + return cpu_type == CPU_POWER8; + if (!strcmp(cpu, "power9")) + return cpu_type == CPU_POWER9; + return 0; +} + +#endif /* C_PGI */ + #ifdef _AIX #include @@ -90,98 +244,102 @@ static int __builtin_cpu_supports(char* arg) static gotoblas_t *get_coretype(void) { - if (__builtin_cpu_supports("power6")) - return &gotoblas_POWER6; - if (__builtin_cpu_supports("power8")) - return &gotoblas_POWER8; +#ifndef C_PGI + if (__builtin_cpu_is("power6") || __builtin_cpu_is("power6x")) + return &gotoblas_POWER6; +#endif + if (__builtin_cpu_is("power8")) + return &gotoblas_POWER8; #if (!defined __GNUC__) || ( __GNUC__ >= 6) - if (__builtin_cpu_supports("power9")) - return &gotoblas_POWER9; + if (__builtin_cpu_is("power9")) + return &gotoblas_POWER9; #endif #ifdef HAVE_P10_SUPPORT #ifdef _AIX - if (__builtin_cpu_supports("power10")) + if (__builtin_cpu_supports("power10")) #else - if (__builtin_cpu_supports("arch_3_1") && __builtin_cpu_supports("mma")) + if (__builtin_cpu_supports ("arch_3_1") && __builtin_cpu_supports ("mma")) #endif - return &gotoblas_POWER10; + return &gotoblas_POWER10; #endif - /* Fall back to the POWER9 implementation if the toolchain is too old or the MMA feature is not set */ -#if (!defined __GNUC__) || ( __GNUC__ < 11) || (__GNUC__ == 10 && __GNUC_MINOR__ < 2) - if (__builtin_cpu_supports("power10")) - return &gotoblas_POWER9; -#endif - return NULL; + /* Fall back to the POWER9 implementation if the toolchain is too old or the MMA feature is not set */ +#if (!defined __GNUC__) || ( __GNUC__ >= 11) || (__GNUC__ == 10 && __GNUC_MINOR__ >= 2) + if (__builtin_cpu_is("power10")) + return &gotoblas_POWER9; +#endif + return NULL; } static gotoblas_t *force_coretype(char * coretype) { - int i ; - int found = -1; - char message[128]; + int i ; + int found = -1; + char message[128]; - for ( i = 0 ; i < NUM_CORETYPES; i++) - { - if (!strncasecmp(coretype, corename[i], 20)) - { - found = i; - break; - } - } + for ( i = 0 ; i < NUM_CORETYPES; i++) + { + if (!strncasecmp(coretype, corename[i], 20)) + { + found = i; + break; + } + } - switch (found) - { - case 1: return (&gotoblas_POWER6); - case 2: return (&gotoblas_POWER8); + switch (found) + { +#ifndef C_PGI + case 1: return (&gotoblas_POWER6); +#endif + case 2: return (&gotoblas_POWER8); #if (!defined __GNUC__) || ( __GNUC__ >= 6) - case 3: return (&gotoblas_POWER9); + case 3: return (&gotoblas_POWER9); #endif #ifdef HAVE_P10_SUPPORT - case 4: return (&gotoblas_POWER10); + case 4: return (&gotoblas_POWER10); #endif - default: return NULL; - } - snprintf(message, 128, "Core not found: %s\n", coretype); - openblas_warning(1, message); + default: return NULL; + } + snprintf(message, 128, "Core not found: %s\n", coretype); + openblas_warning(1, message); } void gotoblas_dynamic_init(void) { - char coremsg[128]; - char coren[22]; - char *p; + char coremsg[128]; + char coren[22]; + char *p; - if (gotoblas) return; + if (gotoblas) return; - p = getenv("OPENBLAS_CORETYPE"); - if ( p ) - { - gotoblas = force_coretype(p); - } - else - { - gotoblas = get_coretype(); - } + p = getenv("OPENBLAS_CORETYPE"); + if ( p ) + { + gotoblas = force_coretype(p); + } + else + { + gotoblas = get_coretype(); + } - if (gotoblas == NULL) - { - snprintf(coremsg, 128, "Falling back to POWER8 core\n"); - openblas_warning(1, coremsg); - gotoblas = &gotoblas_POWER8; - } + if (gotoblas == NULL) + { + snprintf(coremsg, 128, "Falling back to POWER8 core\n"); + openblas_warning(1, coremsg); + gotoblas = &gotoblas_POWER8; + } - if (gotoblas && gotoblas -> init) { - strncpy(coren,gotoblas_corename(),20); - sprintf(coremsg, "Core: %s\n",coren); - openblas_warning(2, coremsg); - gotoblas -> init(); - } else { - openblas_warning(0, "OpenBLAS : Architecture Initialization failed. No initialization function found.\n"); - exit(1); - } + if (gotoblas && gotoblas -> init) { + strncpy(coren,gotoblas_corename(),20); + sprintf(coremsg, "Core: %s\n",coren); + openblas_warning(2, coremsg); + gotoblas -> init(); + } else { + openblas_warning(0, "OpenBLAS : Architecture Initialization failed. No initialization function found.\n"); + exit(1); + } } void gotoblas_dynamic_quit(void) { - gotoblas = NULL; + gotoblas = NULL; }