From 0dbe61a612708c1a689835dcf5fdb76b166e7729 Mon Sep 17 00:00:00 2001 From: Marius Hillenbrand Date: Mon, 11 May 2020 13:00:10 +0200 Subject: [PATCH] s390x: choose SIMD kernels at run-time based on OS and compiler support Extend and simplify the run-time detection for dynamic architecture support for z to check HW_CAP and only use SIMD features if advertised by the OS. While at it, also honor the env variable LD_HWCAP_MASK and do not use the CPU features masked there. Note that we can only use the SIMD features on z13 or newer (i.e., Vector Facility or Vector-Enhancements Facilities) when the operating system supports properly context-switching the vector registers. The OS advertises that support as a bit in the HW_CAP value in the auxiliary vector. While all recent Linux kernels have that support, we should maintain compatibility with older versions that may still be in use. Signed-off-by: Marius Hillenbrand --- driver/others/dynamic_zarch.c | 78 ++++++++++++++++++++++++----------- 1 file changed, 53 insertions(+), 25 deletions(-) diff --git a/driver/others/dynamic_zarch.c b/driver/others/dynamic_zarch.c index 8bcfcd004..403b34111 100644 --- a/driver/others/dynamic_zarch.c +++ b/driver/others/dynamic_zarch.c @@ -13,6 +13,39 @@ #define HAVE_Z14_SUPPORT #endif +// Guard the use of getauxval() on glibc version >= 2.16 +#ifdef __GLIBC__ +#include +#if __GLIBC_PREREQ(2, 16) +#include +#define HAVE_GETAUXVAL 1 + +static unsigned long get_hwcap(void) +{ + unsigned long hwcap = getauxval(AT_HWCAP); + char *maskenv; + + // honor requests for not using specific CPU features in LD_HWCAP_MASK + maskenv = getenv("LD_HWCAP_MASK"); + if (maskenv) + hwcap &= strtoul(maskenv, NULL, 0); + + return hwcap; + // note that a missing auxval is interpreted as no capabilities + // available, which is safe. +} + +#else // __GLIBC_PREREQ(2, 16) +#warn "Cannot detect SIMD support in Z13 or newer architectures since glibc is older than 2.16" + +static unsigned long get_hwcap(void) { + // treat missing support for getauxval() as no capabilities available, + // which is safe. + return 0; +} +#endif // __GLIBC_PREREQ(2, 16) +#endif // __GLIBC + extern gotoblas_t gotoblas_ZARCH_GENERIC; #ifdef HAVE_Z13_SUPPORT extern gotoblas_t gotoblas_Z13; @@ -44,39 +77,34 @@ char* gotoblas_corename(void) { return corename[0]; } -// __builtin_cpu_is is not supported by zarch +/** + * Detect the fitting set of kernels by retrieving the CPU features supported by + * OS from the auxiliary value AT_HWCAP and choosing the set of kernels + * ("coretype") that exploits most of the features and can be compiled with the + * available gcc version. + * Note that we cannot use vector registers on a z13 or newer unless supported + * by the OS kernel (which needs to handle them properly during context switch). + */ static gotoblas_t* get_coretype(void) { - FILE* infile; - char buffer[512], * p; - p = (char*)NULL; - infile = fopen("/proc/sysinfo", "r"); - while (fgets(buffer, sizeof(buffer), infile)) { - if (!strncmp("Type", buffer, 4)) { - p = strchr(buffer, ':') + 2; -#if 0 - fprintf(stderr, "%s\n", p); -#endif - break; - } - } + unsigned long hwcap __attribute__((unused)) = get_hwcap(); - fclose(infile); - -#ifdef HAVE_Z13_SUPPORT - if (strstr(p, "2964") || strstr(p, "2965")) return &gotoblas_Z13; -#endif - - // Z14 and Z15 systems - if (strstr(p, "3906") || strstr(p, "3907") || strstr(p, "8561") || - strstr(p, "8562")) + // z14 and z15 systems: exploit Vector Facility (SIMD) and + // Vector-Enhancements Facility 1 (float SIMD instructions), if present. #ifdef HAVE_Z14_SUPPORT + if ((hwcap & HWCAP_S390_VX) && (hwcap & HWCAP_S390_VXE)) return &gotoblas_Z14; -#else +#endif + + // z13: Vector Facility (SIMD for double) +#ifdef HAVE_Z13_SUPPORT + if (hwcap & HWCAP_S390_VX) return &gotoblas_Z13; #endif - // unknown system or compiler too old? use generic code for z architecture + // fallback in case of missing compiler support, systems before z13, or + // when the OS does not advertise support for the Vector Facility (e.g., + // missing support in the OS kernel) return &gotoblas_ZARCH_GENERIC; }