Merge pull request #3046 from martin-frbg/nvidiasdk-ppc

Support NVIDIA HPC SDK on POWERPC
This commit is contained in:
Martin Kroeker 2020-12-20 11:55:53 +01:00 committed by GitHub
commit 85e5165e98
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 164 additions and 0 deletions

View File

@ -10,9 +10,11 @@ USE_OPENMP = 1
endif
ifeq ($(CORE), POWER10)
ifneq ($(C_COMPILER), PGI)
CCOMMON_OPT += -Ofast -mcpu=power10 -mtune=power10 -mvsx -fno-fast-math
FCOMMON_OPT += -O2 -frecursive -mcpu=power10 -mtune=power10 -fno-fast-math
endif
endif
ifeq ($(CORE), POWER9)
ifneq ($(C_COMPILER), PGI)

View File

@ -663,6 +663,7 @@ endif
endif # ARCH zarch
ifeq ($(ARCH), power)
ifneq ($(C_COMPILER), PGI)
DYNAMIC_CORE = POWER6
DYNAMIC_CORE += POWER8
ifneq ($(C_COMPILER), GCC)
@ -689,6 +690,10 @@ else
$(info, OpenBLAS: Your gcc version is too old to build the POWER10 kernels.)
endif
endif
else
DYNAMIC_CORE = POWER8
DYNAMIC_CORE += POWER9
endif
endif
# If DYNAMIC_CORE is not set, DYNAMIC_ARCH cannot do anything, so force it to empty
@ -1039,12 +1044,18 @@ ifeq ($(ARCH), x86_64)
FCOMMON_OPT += -tp p7-64
else
ifeq ($(ARCH), power)
ifeq ($(CORE), POWER6)
$(warning NVIDIA HPC compilers do not support POWER6.)
endif
ifeq ($(CORE), POWER8)
FCOMMON_OPT += -tp pwr8
endif
ifeq ($(CORE), POWER9)
FCOMMON_OPT += -tp pwr9
endif
ifeq ($(CORE), POWER10)
$(warning NVIDIA HPC compilers do not support POWER10.)
endif
endif
endif
else

View File

@ -27,7 +27,9 @@ static char *corename[] = {
#define NUM_CORETYPES 4
char *gotoblas_corename(void) {
#ifndef C_PGI
if (gotoblas == &gotoblas_POWER6) return corename[1];
#endif
if (gotoblas == &gotoblas_POWER8) return corename[2];
#if (!defined __GNUC__) || ( __GNUC__ >= 6)
if (gotoblas == &gotoblas_POWER9) return corename[3];
@ -38,10 +40,157 @@ char *gotoblas_corename(void) {
return corename[0];
}
#ifdef C_PGI
/*
* NV HPC compilers do not yet implement __builtin_cpu_is().
* Fake a version here for use in the CPU detection code below.
*
* Strategy here is to first check the CPU to see what it actually is,
* and then test the input to see if what the CPU actually is matches
* what was requested.
*/
#include <string.h>
/*
* Define POWER processor version table.
*
* NOTE NV HPC SDK compilers only support POWER8 and POWER9 at this time
*/
#define CPU_UNKNOWN 0
#define CPU_POWER5 5
#define CPU_POWER6 6
#define CPU_POWER8 8
#define CPU_POWER9 9
#define CPU_POWER10 10
static struct {
uint32_t pvr_mask;
uint32_t pvr_value;
const char* cpu_name;
uint32_t cpu_type;
} pvrPOWER [] = {
{ /* POWER6 in P5+ mode; 2.04-compliant processor */
.pvr_mask = 0xffffffff,
.pvr_value = 0x0f000001,
.cpu_name = "POWER5+",
.cpu_type = CPU_POWER5,
},
{ /* Power6 aka POWER6X*/
.pvr_mask = 0xffff0000,
.pvr_value = 0x003e0000,
.cpu_name = "POWER6 (raw)",
.cpu_type = CPU_POWER6,
},
{ /* Power7 */
.pvr_mask = 0xffff0000,
.pvr_value = 0x003f0000,
.cpu_name = "POWER7 (raw)",
.cpu_type = CPU_POWER6,
},
{ /* Power7+ */
.pvr_mask = 0xffff0000,
.pvr_value = 0x004A0000,
.cpu_name = "POWER7+ (raw)",
.cpu_type = CPU_POWER6,
},
{ /* Power8E */
.pvr_mask = 0xffff0000,
.pvr_value = 0x004b0000,
.cpu_name = "POWER8E (raw)",
.cpu_type = CPU_POWER8,
},
{ /* Power8NVL */
.pvr_mask = 0xffff0000,
.pvr_value = 0x004c0000,
.cpu_name = "POWER8NVL (raw)",
.cpu_type = CPU_POWER8,
},
{ /* Power8 */
.pvr_mask = 0xffff0000,
.pvr_value = 0x004d0000,
.cpu_name = "POWER8 (raw)",
.cpu_type = CPU_POWER8,
},
{ /* Power9 DD2.0 */
.pvr_mask = 0xffffefff,
.pvr_value = 0x004e0200,
.cpu_name = "POWER9 (raw)",
.cpu_type = CPU_POWER9,
},
{ /* Power9 DD 2.1 */
.pvr_mask = 0xffffefff,
.pvr_value = 0x004e0201,
.cpu_name = "POWER9 (raw)",
.cpu_type = CPU_POWER9,
},
{ /* Power9 DD2.2 or later */
.pvr_mask = 0xffff0000,
.pvr_value = 0x004e0000,
.cpu_name = "POWER9 (raw)",
.cpu_type = CPU_POWER9,
},
{ /* Power10 */
.pvr_mask = 0xffff0000,
.pvr_value = 0x00800000,
.cpu_name = "POWER10 (raw)",
.cpu_type = CPU_POWER10,
},
{ /* End of table, pvr_mask and pvr_value must be zero */
.pvr_mask = 0x0,
.pvr_value = 0x0,
.cpu_name = "Unknown",
.cpu_type = CPU_UNKNOWN,
},
};
static int __builtin_cpu_is(const char *cpu) {
int i;
uint32_t pvr;
uint32_t cpu_type;
asm("mfpvr %0" : "=r"(pvr));
for (i = 0 ; i < sizeof pvrPOWER / sizeof *pvrPOWER ; ++i) {
if ((pvr & pvrPOWER[i].pvr_mask) == pvrPOWER[i].pvr_value) {
break;
}
}
#if defined(DEBUG)
printf("%s: returning CPU=%s, cpu_type=%p\n", __func__,
pvrPOWER[i].cpu_name, pvrPOWER[i].cpu_type);
#endif
cpu_type = pvrPOWER[i].cpu_type;
if (!strcmp(cpu, "power8"))
return cpu_type == CPU_POWER8;
if (!strcmp(cpu, "power9"))
return cpu_type == CPU_POWER9;
return 0;
}
#endif /* C_PGI */
static gotoblas_t *get_coretype(void) {
#ifndef C_PGI
if (__builtin_cpu_is("power6") || __builtin_cpu_is("power6x"))
return &gotoblas_POWER6;
#endif
if (__builtin_cpu_is("power8"))
return &gotoblas_POWER8;
#if (!defined __GNUC__) || ( __GNUC__ >= 6)
@ -77,7 +226,9 @@ static gotoblas_t *force_coretype(char * coretype) {
switch (found)
{
#ifndef C_PGI
case 1: return (&gotoblas_POWER6);
#endif
case 2: return (&gotoblas_POWER8);
#if (!defined __GNUC__) || ( __GNUC__ >= 6)
case 3: return (&gotoblas_POWER9);