s390x: choose SIMD kernels at run-time based on OS and compiler support
Extend and simplify the run-time detection for dynamic architecture support for z to check HW_CAP and only use SIMD features if advertised by the OS. While at it, also honor the env variable LD_HWCAP_MASK and do not use the CPU features masked there. Note that we can only use the SIMD features on z13 or newer (i.e., Vector Facility or Vector-Enhancements Facilities) when the operating system supports properly context-switching the vector registers. The OS advertises that support as a bit in the HW_CAP value in the auxiliary vector. While all recent Linux kernels have that support, we should maintain compatibility with older versions that may still be in use. Signed-off-by: Marius Hillenbrand <mhillen@linux.ibm.com>
This commit is contained in:
parent
62cf391cbb
commit
0dbe61a612
|
@ -13,6 +13,39 @@
|
||||||
#define HAVE_Z14_SUPPORT
|
#define HAVE_Z14_SUPPORT
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
// Guard the use of getauxval() on glibc version >= 2.16
|
||||||
|
#ifdef __GLIBC__
|
||||||
|
#include <features.h>
|
||||||
|
#if __GLIBC_PREREQ(2, 16)
|
||||||
|
#include <sys/auxv.h>
|
||||||
|
#define HAVE_GETAUXVAL 1
|
||||||
|
|
||||||
|
static unsigned long get_hwcap(void)
|
||||||
|
{
|
||||||
|
unsigned long hwcap = getauxval(AT_HWCAP);
|
||||||
|
char *maskenv;
|
||||||
|
|
||||||
|
// honor requests for not using specific CPU features in LD_HWCAP_MASK
|
||||||
|
maskenv = getenv("LD_HWCAP_MASK");
|
||||||
|
if (maskenv)
|
||||||
|
hwcap &= strtoul(maskenv, NULL, 0);
|
||||||
|
|
||||||
|
return hwcap;
|
||||||
|
// note that a missing auxval is interpreted as no capabilities
|
||||||
|
// available, which is safe.
|
||||||
|
}
|
||||||
|
|
||||||
|
#else // __GLIBC_PREREQ(2, 16)
|
||||||
|
#warn "Cannot detect SIMD support in Z13 or newer architectures since glibc is older than 2.16"
|
||||||
|
|
||||||
|
static unsigned long get_hwcap(void) {
|
||||||
|
// treat missing support for getauxval() as no capabilities available,
|
||||||
|
// which is safe.
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
#endif // __GLIBC_PREREQ(2, 16)
|
||||||
|
#endif // __GLIBC
|
||||||
|
|
||||||
extern gotoblas_t gotoblas_ZARCH_GENERIC;
|
extern gotoblas_t gotoblas_ZARCH_GENERIC;
|
||||||
#ifdef HAVE_Z13_SUPPORT
|
#ifdef HAVE_Z13_SUPPORT
|
||||||
extern gotoblas_t gotoblas_Z13;
|
extern gotoblas_t gotoblas_Z13;
|
||||||
|
@ -44,39 +77,34 @@ char* gotoblas_corename(void) {
|
||||||
return corename[0];
|
return corename[0];
|
||||||
}
|
}
|
||||||
|
|
||||||
// __builtin_cpu_is is not supported by zarch
|
/**
|
||||||
|
* Detect the fitting set of kernels by retrieving the CPU features supported by
|
||||||
|
* OS from the auxiliary value AT_HWCAP and choosing the set of kernels
|
||||||
|
* ("coretype") that exploits most of the features and can be compiled with the
|
||||||
|
* available gcc version.
|
||||||
|
* Note that we cannot use vector registers on a z13 or newer unless supported
|
||||||
|
* by the OS kernel (which needs to handle them properly during context switch).
|
||||||
|
*/
|
||||||
static gotoblas_t* get_coretype(void) {
|
static gotoblas_t* get_coretype(void) {
|
||||||
FILE* infile;
|
|
||||||
char buffer[512], * p;
|
|
||||||
|
|
||||||
p = (char*)NULL;
|
unsigned long hwcap __attribute__((unused)) = get_hwcap();
|
||||||
infile = fopen("/proc/sysinfo", "r");
|
|
||||||
while (fgets(buffer, sizeof(buffer), infile)) {
|
|
||||||
if (!strncmp("Type", buffer, 4)) {
|
|
||||||
p = strchr(buffer, ':') + 2;
|
|
||||||
#if 0
|
|
||||||
fprintf(stderr, "%s\n", p);
|
|
||||||
#endif
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fclose(infile);
|
// z14 and z15 systems: exploit Vector Facility (SIMD) and
|
||||||
|
// Vector-Enhancements Facility 1 (float SIMD instructions), if present.
|
||||||
#ifdef HAVE_Z13_SUPPORT
|
|
||||||
if (strstr(p, "2964") || strstr(p, "2965")) return &gotoblas_Z13;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
// Z14 and Z15 systems
|
|
||||||
if (strstr(p, "3906") || strstr(p, "3907") || strstr(p, "8561") ||
|
|
||||||
strstr(p, "8562"))
|
|
||||||
#ifdef HAVE_Z14_SUPPORT
|
#ifdef HAVE_Z14_SUPPORT
|
||||||
|
if ((hwcap & HWCAP_S390_VX) && (hwcap & HWCAP_S390_VXE))
|
||||||
return &gotoblas_Z14;
|
return &gotoblas_Z14;
|
||||||
#else
|
#endif
|
||||||
|
|
||||||
|
// z13: Vector Facility (SIMD for double)
|
||||||
|
#ifdef HAVE_Z13_SUPPORT
|
||||||
|
if (hwcap & HWCAP_S390_VX)
|
||||||
return &gotoblas_Z13;
|
return &gotoblas_Z13;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// unknown system or compiler too old? use generic code for z architecture
|
// fallback in case of missing compiler support, systems before z13, or
|
||||||
|
// when the OS does not advertise support for the Vector Facility (e.g.,
|
||||||
|
// missing support in the OS kernel)
|
||||||
return &gotoblas_ZARCH_GENERIC;
|
return &gotoblas_ZARCH_GENERIC;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue