From 3b1150fcee164922ed932c7d46b28a8ffec744a8 Mon Sep 17 00:00:00 2001 From: Chip-Kerchner Date: Mon, 2 Oct 2023 12:00:48 -0500 Subject: [PATCH] Fix CPU identification to work on AIX. --- driver/others/dynamic_power.c | 216 +++++++++------------------------- 1 file changed, 58 insertions(+), 158 deletions(-) diff --git a/driver/others/dynamic_power.c b/driver/others/dynamic_power.c index 2847ea9ae..7f8bfd5b9 100644 --- a/driver/others/dynamic_power.c +++ b/driver/others/dynamic_power.c @@ -20,12 +20,10 @@ static char *corename[] = { "POWER10" }; -#define NUM_CORETYPES 4 +#define NUM_CORETYPES 5 char *gotoblas_corename(void) { -#ifndef C_PGI if (gotoblas == &gotoblas_POWER6) return corename[1]; -#endif if (gotoblas == &gotoblas_POWER8) return corename[2]; #if (!defined __GNUC__) || ( __GNUC__ >= 6) if (gotoblas == &gotoblas_POWER9) return corename[3]; @@ -36,177 +34,81 @@ char *gotoblas_corename(void) { return corename[0]; } -#if defined(__clang__) -static int __builtin_cpu_supports(char* arg) -{ - return 0; -} -#endif +#ifdef _AIX +#include -#if defined(C_PGI) || defined(__clang__) -/* - * NV HPC compilers do not yet implement __builtin_cpu_is(). - * Fake a version here for use in the CPU detection code below. - * - * Strategy here is to first check the CPU to see what it actually is, - * and then test the input to see if what the CPU actually is matches - * what was requested. - */ - -#include - -/* - * Define POWER processor version table. - * - * NOTE NV HPC SDK compilers only support POWER8 and POWER9 at this time - */ - -#define CPU_UNKNOWN 0 -#define CPU_POWER5 5 -#define CPU_POWER6 6 -#define CPU_POWER8 8 -#define CPU_POWER9 9 +#define CPU_UNKNOWN 0 +#define CPU_POWER6 6 +#define CPU_POWER7 7 +#define CPU_POWER8 8 +#define CPU_POWER9 9 #define CPU_POWER10 10 -static struct { - uint32_t pvr_mask; - uint32_t pvr_value; - const char* cpu_name; - uint32_t cpu_type; -} pvrPOWER [] = { - - { /* POWER6 in P5+ mode; 2.04-compliant processor */ - .pvr_mask = 0xffffffff, - .pvr_value = 0x0f000001, - .cpu_name = "POWER5+", - .cpu_type = CPU_POWER5, - }, - - { /* Power6 aka POWER6X*/ - .pvr_mask = 0xffff0000, - .pvr_value = 0x003e0000, - .cpu_name = "POWER6 (raw)", - .cpu_type = CPU_POWER6, - }, - - { /* Power7 */ - .pvr_mask = 0xffff0000, - .pvr_value = 0x003f0000, - .cpu_name = "POWER7 (raw)", - .cpu_type = CPU_POWER6, - }, - - { /* Power7+ */ - .pvr_mask = 0xffff0000, - .pvr_value = 0x004A0000, - .cpu_name = "POWER7+ (raw)", - .cpu_type = CPU_POWER6, - }, - - { /* Power8E */ - .pvr_mask = 0xffff0000, - .pvr_value = 0x004b0000, - .cpu_name = "POWER8E (raw)", - .cpu_type = CPU_POWER8, - }, - - { /* Power8NVL */ - .pvr_mask = 0xffff0000, - .pvr_value = 0x004c0000, - .cpu_name = "POWER8NVL (raw)", - .cpu_type = CPU_POWER8, - }, - - { /* Power8 */ - .pvr_mask = 0xffff0000, - .pvr_value = 0x004d0000, - .cpu_name = "POWER8 (raw)", - .cpu_type = CPU_POWER8, - }, - - { /* Power9 DD2.0 */ - .pvr_mask = 0xffffefff, - .pvr_value = 0x004e0200, - .cpu_name = "POWER9 (raw)", - .cpu_type = CPU_POWER9, - }, - - { /* Power9 DD 2.1 */ - .pvr_mask = 0xffffefff, - .pvr_value = 0x004e0201, - .cpu_name = "POWER9 (raw)", - .cpu_type = CPU_POWER9, - }, - - { /* Power9 DD2.2 or later */ - .pvr_mask = 0xffff0000, - .pvr_value = 0x004e0000, - .cpu_name = "POWER9 (raw)", - .cpu_type = CPU_POWER9, - }, - - { /* Power10 */ - .pvr_mask = 0xffff0000, - .pvr_value = 0x00800000, - .cpu_name = "POWER10 (raw)", - .cpu_type = CPU_POWER10, - }, - - { /* End of table, pvr_mask and pvr_value must be zero */ - .pvr_mask = 0x0, - .pvr_value = 0x0, - .cpu_name = "Unknown", - .cpu_type = CPU_UNKNOWN, - }, -}; - -static int __builtin_cpu_is(const char *cpu) { - int i; - uint32_t pvr; - uint32_t cpu_type; - - asm("mfpvr %0" : "=r"(pvr)); - - for (i = 0 ; i < sizeof pvrPOWER / sizeof *pvrPOWER ; ++i) { - if ((pvr & pvrPOWER[i].pvr_mask) == pvrPOWER[i].pvr_value) { - break; - } - } - -#if defined(DEBUG) - printf("%s: returning CPU=%s, cpu_type=%p\n", __func__, - pvrPOWER[i].cpu_name, pvrPOWER[i].cpu_type); +int cpuid() +{ + int arch = _system_configuration.implementation; +#ifdef POWER_6 + if (arch == POWER_6) return CPU_POWER6; #endif - cpu_type = pvrPOWER[i].cpu_type; - - if (!strcmp(cpu, "power8")) - return cpu_type == CPU_POWER8; - if (!strcmp(cpu, "power9")) - return cpu_type == CPU_POWER9; - return 0; +#ifdef POWER_7 + else if (arch == POWER_7) return CPU_POWER7; +#endif +#ifdef POWER_8 + else if (arch == POWER_8) return CPU_POWER8; +#endif +#ifdef POWER_9 + else if (arch == POWER_9) return CPU_POWER9; +#endif +#ifdef POWER_10 + else if (arch == POWER_10) return CPU_POWER10; +#endif + return CPU_UNKNOWN; } -#endif /* C_PGI */ +#ifndef __BUILTIN_CPU_SUPPORTS__ +static int __builtin_cpu_supports(char* arg) +{ + static int ipinfo = -1; + if (ipinfo < 0) { + ipinfo = cpuid(); + } + if (ipinfo >= CPU_POWER10) { + if (!strcmp(arg, "power10")) return 1; + } + if (ipinfo >= CPU_POWER9) { + if (!strcmp(arg, "power9")) return 1; + } + if (ipinfo >= CPU_POWER8) { + if (!strcmp(arg, "power8")) return 1; + } + if (ipinfo >= CPU_POWER6) { + if (!strcmp(arg, "power6")) return 1; + } + return 0; +} +#endif static gotoblas_t *get_coretype(void) { -#ifndef C_PGI - if (__builtin_cpu_is("power6") || __builtin_cpu_is("power6x")) + if (__builtin_cpu_supports("power6")) return &gotoblas_POWER6; -#endif - if (__builtin_cpu_is("power8")) + if (__builtin_cpu_supports("power8")) return &gotoblas_POWER8; #if (!defined __GNUC__) || ( __GNUC__ >= 6) - if (__builtin_cpu_is("power9")) + if (__builtin_cpu_supports("power9")) return &gotoblas_POWER9; #endif #ifdef HAVE_P10_SUPPORT - if (__builtin_cpu_supports ("arch_3_1") && __builtin_cpu_supports ("mma")) +#ifdef _AIX + if (__builtin_cpu_supports("power10")) +#else + if (__builtin_cpu_supports("arch_3_1") && __builtin_cpu_supports("mma")) +#endif return &gotoblas_POWER10; #endif /* Fall back to the POWER9 implementation if the toolchain is too old or the MMA feature is not set */ -#if (!defined __GNUC__) || ( __GNUC__ >= 11) || (__GNUC__ == 10 && __GNUC_MINOR__ >= 2) - if (__builtin_cpu_is("power10")) +#if (!defined __GNUC__) || ( __GNUC__ < 11) || (__GNUC__ == 10 && __GNUC_MINOR__ < 2) + if (__builtin_cpu_supports("power10")) return &gotoblas_POWER9; #endif return NULL; @@ -229,9 +131,7 @@ static gotoblas_t *force_coretype(char * coretype) { switch (found) { -#ifndef C_PGI case 1: return (&gotoblas_POWER6); -#endif case 2: return (&gotoblas_POWER8); #if (!defined __GNUC__) || ( __GNUC__ >= 6) case 3: return (&gotoblas_POWER9);