Merge pull request #4280 from ChipKerchner/dynamicDispatchAIXandClang

Add dynamic dispatch to AIX and clang for Power
This commit is contained in:
Martin Kroeker 2023-11-02 23:43:59 +01:00 committed by GitHub
commit 59444415ac
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 75 additions and 28 deletions

View File

@ -3,7 +3,7 @@
extern gotoblas_t gotoblas_POWER6; extern gotoblas_t gotoblas_POWER6;
extern gotoblas_t gotoblas_POWER8; extern gotoblas_t gotoblas_POWER8;
#if (!defined __GNUC__) || ( __GNUC__ >= 6) #if ((!defined __GNUC__) || ( __GNUC__ >= 6)) || defined(__clang__)
extern gotoblas_t gotoblas_POWER9; extern gotoblas_t gotoblas_POWER9;
#endif #endif
#ifdef HAVE_P10_SUPPORT #ifdef HAVE_P10_SUPPORT
@ -20,14 +20,14 @@ static char *corename[] = {
"POWER10" "POWER10"
}; };
#define NUM_CORETYPES 4 #define NUM_CORETYPES 5
char *gotoblas_corename(void) { char *gotoblas_corename(void) {
#ifndef C_PGI #ifndef C_PGI
if (gotoblas == &gotoblas_POWER6) return corename[1]; if (gotoblas == &gotoblas_POWER6) return corename[1];
#endif #endif
if (gotoblas == &gotoblas_POWER8) return corename[2]; if (gotoblas == &gotoblas_POWER8) return corename[2];
#if (!defined __GNUC__) || ( __GNUC__ >= 6) #if ((!defined __GNUC__) || ( __GNUC__ >= 6)) || defined(__clang__)
if (gotoblas == &gotoblas_POWER9) return corename[3]; if (gotoblas == &gotoblas_POWER9) return corename[3];
#endif #endif
#ifdef HAVE_P10_SUPPORT #ifdef HAVE_P10_SUPPORT
@ -36,13 +36,37 @@ char *gotoblas_corename(void) {
return corename[0]; return corename[0];
} }
#if defined(__clang__) #define CPU_UNKNOWN 0
static int __builtin_cpu_supports(char* arg) #define CPU_POWER5 5
{ #define CPU_POWER6 6
return 0; #define CPU_POWER8 8
} #define CPU_POWER9 9
#endif #define CPU_POWER10 10
#ifdef _AIX
#include <sys/systemcfg.h>
static int cpuid(void)
{
int arch = _system_configuration.implementation;
#ifdef POWER_6
if (arch == POWER_6) return CPU_POWER6;
#endif
#ifdef POWER_7
else if (arch == POWER_7) return CPU_POWER6;
#endif
#ifdef POWER_8
else if (arch == POWER_8) return CPU_POWER8;
#endif
#ifdef POWER_9
else if (arch == POWER_9) return CPU_POWER9;
#endif
#ifdef POWER_10
else if (arch == POWER_10) return CPU_POWER10;
#endif
return CPU_UNKNOWN;
}
#else
#if defined(C_PGI) || defined(__clang__) #if defined(C_PGI) || defined(__clang__)
/* /*
* NV HPC compilers do not yet implement __builtin_cpu_is(). * NV HPC compilers do not yet implement __builtin_cpu_is().
@ -53,21 +77,12 @@ static int __builtin_cpu_supports(char* arg)
* what was requested. * what was requested.
*/ */
#include <string.h>
/* /*
* Define POWER processor version table. * Define POWER processor version table.
* *
* NOTE NV HPC SDK compilers only support POWER8 and POWER9 at this time * NOTE NV HPC SDK compilers only support POWER8 and POWER9 at this time
*/ */
#define CPU_UNKNOWN 0
#define CPU_POWER5 5
#define CPU_POWER6 6
#define CPU_POWER8 8
#define CPU_POWER9 9
#define CPU_POWER10 10
static struct { static struct {
uint32_t pvr_mask; uint32_t pvr_mask;
uint32_t pvr_value; uint32_t pvr_value;
@ -160,7 +175,8 @@ static struct {
}, },
}; };
static int __builtin_cpu_is(const char *cpu) { static int cpuid(void)
{
int i; int i;
uint32_t pvr; uint32_t pvr;
uint32_t cpu_type; uint32_t cpu_type;
@ -178,15 +194,42 @@ static int __builtin_cpu_is(const char *cpu) {
pvrPOWER[i].cpu_name, pvrPOWER[i].cpu_type); pvrPOWER[i].cpu_name, pvrPOWER[i].cpu_type);
#endif #endif
cpu_type = pvrPOWER[i].cpu_type; cpu_type = pvrPOWER[i].cpu_type;
return (int)(cpu_type);
}
#endif /* C_PGI */
#endif /* _AIX */
if (!strcmp(cpu, "power8")) #ifndef __BUILTIN_CPU_SUPPORTS__
return cpu_type == CPU_POWER8; #include <string.h>
if (!strcmp(cpu, "power9"))
return cpu_type == CPU_POWER9; static int __builtin_cpu_is(const char *arg)
{
static int ipinfo = -1;
if (ipinfo < 0) {
ipinfo = cpuid();
}
#ifdef HAVE_P10_SUPPORT
if (ipinfo == CPU_POWER10) {
if (!strcmp(arg, "power10")) return 1;
}
#endif
if (ipinfo == CPU_POWER9) {
if (!strcmp(arg, "power9")) return 1;
} else if (ipinfo == CPU_POWER8) {
if (!strcmp(arg, "power8")) return 1;
#ifndef C_PGI
} else if (ipinfo == CPU_POWER6) {
if (!strcmp(arg, "power6")) return 1;
#endif
}
return 0; return 0;
} }
#endif /* C_PGI */ static int __builtin_cpu_supports(const char *arg)
{
return 0;
}
#endif
static gotoblas_t *get_coretype(void) { static gotoblas_t *get_coretype(void) {
@ -196,12 +239,16 @@ static gotoblas_t *get_coretype(void) {
#endif #endif
if (__builtin_cpu_is("power8")) if (__builtin_cpu_is("power8"))
return &gotoblas_POWER8; return &gotoblas_POWER8;
#if (!defined __GNUC__) || ( __GNUC__ >= 6) #if ((!defined __GNUC__) || ( __GNUC__ >= 6)) || defined(__clang__)
if (__builtin_cpu_is("power9")) if (__builtin_cpu_is("power9"))
return &gotoblas_POWER9; return &gotoblas_POWER9;
#endif #endif
#ifdef HAVE_P10_SUPPORT #ifdef HAVE_P10_SUPPORT
#if defined(_AIX) || defined(__clang__)
if (__builtin_cpu_is("power10"))
#else
if (__builtin_cpu_supports ("arch_3_1") && __builtin_cpu_supports ("mma")) if (__builtin_cpu_supports ("arch_3_1") && __builtin_cpu_supports ("mma"))
#endif
return &gotoblas_POWER10; return &gotoblas_POWER10;
#endif #endif
/* Fall back to the POWER9 implementation if the toolchain is too old or the MMA feature is not set */ /* Fall back to the POWER9 implementation if the toolchain is too old or the MMA feature is not set */
@ -233,7 +280,7 @@ static gotoblas_t *force_coretype(char * coretype) {
case 1: return (&gotoblas_POWER6); case 1: return (&gotoblas_POWER6);
#endif #endif
case 2: return (&gotoblas_POWER8); case 2: return (&gotoblas_POWER8);
#if (!defined __GNUC__) || ( __GNUC__ >= 6) #if ((!defined __GNUC__) || ( __GNUC__ >= 6)) || defined(__clang__)
case 3: return (&gotoblas_POWER9); case 3: return (&gotoblas_POWER9);
#endif #endif
#ifdef HAVE_P10_SUPPORT #ifdef HAVE_P10_SUPPORT