diff --git a/cpuid_zarch.h b/cpuid_zarch.h index 404d90e86..686f2eb17 100644 --- a/cpuid_zarch.h +++ b/cpuid_zarch.h @@ -56,6 +56,40 @@ static int detect(void) { unsigned long hwcap = get_hwcap(); + // Choose the architecture level for optimized kernels based on hardware + // capability bits (just like glibc chooses optimized implementations). + // + // The hardware capability bits that are used here indicate both + // hardware support for a particular ISA extension and the presence of + // software support to enable its use. For example, when HWCAP_S390_VX + // is set then both the CPU can execute SIMD instructions and the Linux + // kernel can manage applications using the vector registers and SIMD + // instructions. + // + // See glibc's sysdeps/s390/dl-procinfo.h for an overview (also in + // sysdeps/unix/sysv/linux/s390/bits/hwcap.h) of the defined hardware + // capability bits. They are derived from the information that the + // "store facility list (extended)" instructions provide. + // (https://sourceware.org/git/?p=glibc.git;a=blob_plain;f=sysdeps/s390/dl-procinfo.h;hb=HEAD) + // + // currently used: + // HWCAP_S390_VX - vector facility for z/Architecture (introduced with + // IBM z13), enables level CPU_Z13 (SIMD) + // HWCAP_S390_VXE - vector enhancements facility 1 (introduced with IBM + // z14), together with VX enables level CPU_Z14 + // (single-precision SIMD instructions) + // + // When you add optimized kernels that make use of other ISA extensions + // (e.g., for exploiting the vector-enhancements facility 2 that was introduced + // with IBM z15), then add a new architecture level (e.g., CPU_Z15) and gate + // it on the hwcap that represents it here (e.g., HWCAP_S390_VXRS_EXT2 + // for the z15 vector enhancements). + // + // To learn the value of hwcaps on a given system, set the environment + // variable LD_SHOW_AUXV and let ld.so dump it (e.g., by running + // LD_SHOW_AUXV=1 /bin/true). + // Also, the init function for dynamic arch support will print hwcaps + // when OPENBLAS_VERBOSE is set to 2 or higher. if ((hwcap & HWCAP_S390_VX) && (hwcap & HWCAP_S390_VXE)) return CPU_Z14; diff --git a/driver/others/dynamic_zarch.c b/driver/others/dynamic_zarch.c index ad748f14c..5b45aae2f 100644 --- a/driver/others/dynamic_zarch.c +++ b/driver/others/dynamic_zarch.c @@ -13,6 +13,7 @@ extern gotoblas_t gotoblas_Z14; #define NUM_CORETYPES 4 +extern int openblas_verbose(); extern void openblas_warning(int verbose, const char* msg); char* gotoblas_corename(void) { @@ -120,6 +121,11 @@ void gotoblas_dynamic_init(void) { else { gotoblas = get_coretype(); + if (openblas_verbose() >= 2) { + snprintf(coremsg, sizeof(coremsg), "Choosing kernels based on getauxval(AT_HWCAP)=0x%lx\n", + getauxval(AT_HWCAP)); + openblas_warning(2, coremsg); + } } if (gotoblas == NULL) @@ -130,9 +136,11 @@ void gotoblas_dynamic_init(void) { } if (gotoblas && gotoblas->init) { - strncpy(coren, gotoblas_corename(), 20); - sprintf(coremsg, "Core: %s\n", coren); - openblas_warning(2, coremsg); + if (openblas_verbose() >= 2) { + strncpy(coren, gotoblas_corename(), 20); + sprintf(coremsg, "Core: %s\n", coren); + openblas_warning(2, coremsg); + } gotoblas->init(); } else {