Implement builtin_cpu_is and limit cpu choices to P8 and P9 for NVIDIA compilers
This commit is contained in:
parent
91c3f86c2b
commit
17c16f2a71
|
@ -27,7 +27,9 @@ static char *corename[] = {
|
|||
#define NUM_CORETYPES 4
|
||||
|
||||
char *gotoblas_corename(void) {
|
||||
#ifndef C_PGI
|
||||
if (gotoblas == &gotoblas_POWER6) return corename[1];
|
||||
#endif
|
||||
if (gotoblas == &gotoblas_POWER8) return corename[2];
|
||||
#if (!defined __GNUC__) || ( __GNUC__ >= 6)
|
||||
if (gotoblas == &gotoblas_POWER9) return corename[3];
|
||||
|
@ -38,10 +40,157 @@ char *gotoblas_corename(void) {
|
|||
return corename[0];
|
||||
}
|
||||
|
||||
#ifdef C_PGI
|
||||
/*
|
||||
* NV HPC compilers do not yet implement __builtin_cpu_is().
|
||||
* Fake a version here for use in the CPU detection code below.
|
||||
*
|
||||
* Strategy here is to first check the CPU to see what it actually is,
|
||||
* and then test the input to see if what the CPU actually is matches
|
||||
* what was requested.
|
||||
*/
|
||||
|
||||
#include <string.h>
|
||||
|
||||
/*
|
||||
* Define POWER processor version table.
|
||||
*
|
||||
* NOTE NV HPC SDK compilers only support POWER8 and POWER9 at this time
|
||||
*/
|
||||
|
||||
#define CPU_UNKNOWN 0
|
||||
#define CPU_POWER5 5
|
||||
#define CPU_POWER6 6
|
||||
#define CPU_POWER8 8
|
||||
#define CPU_POWER9 9
|
||||
#define CPU_POWER10 10
|
||||
|
||||
static struct {
|
||||
uint32_t pvr_mask;
|
||||
uint32_t pvr_value;
|
||||
const char* cpu_name;
|
||||
uint32_t cpu_type;
|
||||
} pvrPOWER [] = {
|
||||
|
||||
{ /* POWER6 in P5+ mode; 2.04-compliant processor */
|
||||
.pvr_mask = 0xffffffff,
|
||||
.pvr_value = 0x0f000001,
|
||||
.cpu_name = "POWER5+",
|
||||
.cpu_type = CPU_POWER5,
|
||||
},
|
||||
|
||||
{ /* Power6 aka POWER6X*/
|
||||
.pvr_mask = 0xffff0000,
|
||||
.pvr_value = 0x003e0000,
|
||||
.cpu_name = "POWER6 (raw)",
|
||||
.cpu_type = CPU_POWER6,
|
||||
},
|
||||
|
||||
{ /* Power7 */
|
||||
.pvr_mask = 0xffff0000,
|
||||
.pvr_value = 0x003f0000,
|
||||
.cpu_name = "POWER7 (raw)",
|
||||
.cpu_type = CPU_POWER6,
|
||||
},
|
||||
|
||||
{ /* Power7+ */
|
||||
.pvr_mask = 0xffff0000,
|
||||
.pvr_value = 0x004A0000,
|
||||
.cpu_name = "POWER7+ (raw)",
|
||||
.cpu_type = CPU_POWER6,
|
||||
},
|
||||
|
||||
{ /* Power8E */
|
||||
.pvr_mask = 0xffff0000,
|
||||
.pvr_value = 0x004b0000,
|
||||
.cpu_name = "POWER8E (raw)",
|
||||
.cpu_type = CPU_POWER8,
|
||||
},
|
||||
|
||||
{ /* Power8NVL */
|
||||
.pvr_mask = 0xffff0000,
|
||||
.pvr_value = 0x004c0000,
|
||||
.cpu_name = "POWER8NVL (raw)",
|
||||
.cpu_type = CPU_POWER8,
|
||||
},
|
||||
|
||||
{ /* Power8 */
|
||||
.pvr_mask = 0xffff0000,
|
||||
.pvr_value = 0x004d0000,
|
||||
.cpu_name = "POWER8 (raw)",
|
||||
.cpu_type = CPU_POWER8,
|
||||
},
|
||||
|
||||
{ /* Power9 DD2.0 */
|
||||
.pvr_mask = 0xffffefff,
|
||||
.pvr_value = 0x004e0200,
|
||||
.cpu_name = "POWER9 (raw)",
|
||||
.cpu_type = CPU_POWER9,
|
||||
},
|
||||
|
||||
{ /* Power9 DD 2.1 */
|
||||
.pvr_mask = 0xffffefff,
|
||||
.pvr_value = 0x004e0201,
|
||||
.cpu_name = "POWER9 (raw)",
|
||||
.cpu_type = CPU_POWER9,
|
||||
},
|
||||
|
||||
{ /* Power9 DD2.2 or later */
|
||||
.pvr_mask = 0xffff0000,
|
||||
.pvr_value = 0x004e0000,
|
||||
.cpu_name = "POWER9 (raw)",
|
||||
.cpu_type = CPU_POWER9,
|
||||
},
|
||||
|
||||
{ /* Power10 */
|
||||
.pvr_mask = 0xffff0000,
|
||||
.pvr_value = 0x00800000,
|
||||
.cpu_name = "POWER10 (raw)",
|
||||
.cpu_type = CPU_POWER10,
|
||||
},
|
||||
|
||||
{ /* End of table, pvr_mask and pvr_value must be zero */
|
||||
.pvr_mask = 0x0,
|
||||
.pvr_value = 0x0,
|
||||
.cpu_name = "Unknown",
|
||||
.cpu_type = CPU_UNKNOWN,
|
||||
},
|
||||
};
|
||||
|
||||
static int __builtin_cpu_is(const char *cpu) {
|
||||
int i;
|
||||
uint32_t pvr;
|
||||
uint32_t cpu_type;
|
||||
|
||||
asm("mfpvr %0" : "=r"(pvr));
|
||||
|
||||
for (i = 0 ; i < sizeof pvrPOWER / sizeof *pvrPOWER ; ++i) {
|
||||
if ((pvr & pvrPOWER[i].pvr_mask) == pvrPOWER[i].pvr_value) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
#if defined(DEBUG)
|
||||
printf("%s: returning CPU=%s, cpu_type=%p\n", __func__,
|
||||
pvrPOWER[i].cpu_name, pvrPOWER[i].cpu_type);
|
||||
#endif
|
||||
cpu_type = pvrPOWER[i].cpu_type;
|
||||
|
||||
if (!strcmp(cpu, "power8"))
|
||||
return cpu_type == CPU_POWER8;
|
||||
if (!strcmp(cpu, "power9"))
|
||||
return cpu_type == CPU_POWER9;
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif /* C_PGI */
|
||||
|
||||
static gotoblas_t *get_coretype(void) {
|
||||
|
||||
#ifndef C_PGI
|
||||
if (__builtin_cpu_is("power6") || __builtin_cpu_is("power6x"))
|
||||
return &gotoblas_POWER6;
|
||||
#endif
|
||||
if (__builtin_cpu_is("power8"))
|
||||
return &gotoblas_POWER8;
|
||||
#if (!defined __GNUC__) || ( __GNUC__ >= 6)
|
||||
|
@ -77,7 +226,9 @@ static gotoblas_t *force_coretype(char * coretype) {
|
|||
|
||||
switch (found)
|
||||
{
|
||||
#ifndef C_PGI
|
||||
case 1: return (&gotoblas_POWER6);
|
||||
#endif
|
||||
case 2: return (&gotoblas_POWER8);
|
||||
#if (!defined __GNUC__) || ( __GNUC__ >= 6)
|
||||
case 3: return (&gotoblas_POWER9);
|
||||
|
|
Loading…
Reference in New Issue