cpuid_zarch/hwcaps: add documentation and dump hwcaps in init
Add pointers to the definition of the hardware capability flags in glibc and describe how they relate to the levels CPU_Z13 and CPU_Z14 for optimized kernels. To aid identifying available hardware capabilities and in debugging potential build issues, dump their value in dynamic_arch_init() when OPENBLAS_VERBOSE is set to 2 or higher. Signed-off-by: Marius Hillenbrand <mhillen@linux.ibm.com>
This commit is contained in:
parent
aa231b5875
commit
77747bc536
|
@ -56,6 +56,40 @@ static int detect(void)
|
||||||
{
|
{
|
||||||
unsigned long hwcap = get_hwcap();
|
unsigned long hwcap = get_hwcap();
|
||||||
|
|
||||||
|
// Choose the architecture level for optimized kernels based on hardware
|
||||||
|
// capability bits (just like glibc chooses optimized implementations).
|
||||||
|
//
|
||||||
|
// The hardware capability bits that are used here indicate both
|
||||||
|
// hardware support for a particular ISA extension and the presence of
|
||||||
|
// software support to enable its use. For example, when HWCAP_S390_VX
|
||||||
|
// is set then both the CPU can execute SIMD instructions and the Linux
|
||||||
|
// kernel can manage applications using the vector registers and SIMD
|
||||||
|
// instructions.
|
||||||
|
//
|
||||||
|
// See glibc's sysdeps/s390/dl-procinfo.h for an overview (also in
|
||||||
|
// sysdeps/unix/sysv/linux/s390/bits/hwcap.h) of the defined hardware
|
||||||
|
// capability bits. They are derived from the information that the
|
||||||
|
// "store facility list (extended)" instructions provide.
|
||||||
|
// (https://sourceware.org/git/?p=glibc.git;a=blob_plain;f=sysdeps/s390/dl-procinfo.h;hb=HEAD)
|
||||||
|
//
|
||||||
|
// currently used:
|
||||||
|
// HWCAP_S390_VX - vector facility for z/Architecture (introduced with
|
||||||
|
// IBM z13), enables level CPU_Z13 (SIMD)
|
||||||
|
// HWCAP_S390_VXE - vector enhancements facility 1 (introduced with IBM
|
||||||
|
// z14), together with VX enables level CPU_Z14
|
||||||
|
// (single-precision SIMD instructions)
|
||||||
|
//
|
||||||
|
// When you add optimized kernels that make use of other ISA extensions
|
||||||
|
// (e.g., for exploiting the vector-enhancements facility 2 that was introduced
|
||||||
|
// with IBM z15), then add a new architecture level (e.g., CPU_Z15) and gate
|
||||||
|
// it on the hwcap that represents it here (e.g., HWCAP_S390_VXRS_EXT2
|
||||||
|
// for the z15 vector enhancements).
|
||||||
|
//
|
||||||
|
// To learn the value of hwcaps on a given system, set the environment
|
||||||
|
// variable LD_SHOW_AUXV and let ld.so dump it (e.g., by running
|
||||||
|
// LD_SHOW_AUXV=1 /bin/true).
|
||||||
|
// Also, the init function for dynamic arch support will print hwcaps
|
||||||
|
// when OPENBLAS_VERBOSE is set to 2 or higher.
|
||||||
if ((hwcap & HWCAP_S390_VX) && (hwcap & HWCAP_S390_VXE))
|
if ((hwcap & HWCAP_S390_VX) && (hwcap & HWCAP_S390_VXE))
|
||||||
return CPU_Z14;
|
return CPU_Z14;
|
||||||
|
|
||||||
|
|
|
@ -13,6 +13,7 @@ extern gotoblas_t gotoblas_Z14;
|
||||||
|
|
||||||
#define NUM_CORETYPES 4
|
#define NUM_CORETYPES 4
|
||||||
|
|
||||||
|
extern int openblas_verbose();
|
||||||
extern void openblas_warning(int verbose, const char* msg);
|
extern void openblas_warning(int verbose, const char* msg);
|
||||||
|
|
||||||
char* gotoblas_corename(void) {
|
char* gotoblas_corename(void) {
|
||||||
|
@ -120,6 +121,11 @@ void gotoblas_dynamic_init(void) {
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
gotoblas = get_coretype();
|
gotoblas = get_coretype();
|
||||||
|
if (openblas_verbose() >= 2) {
|
||||||
|
snprintf(coremsg, sizeof(coremsg), "Choosing kernels based on getauxval(AT_HWCAP)=0x%lx\n",
|
||||||
|
getauxval(AT_HWCAP));
|
||||||
|
openblas_warning(2, coremsg);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (gotoblas == NULL)
|
if (gotoblas == NULL)
|
||||||
|
@ -130,9 +136,11 @@ void gotoblas_dynamic_init(void) {
|
||||||
}
|
}
|
||||||
|
|
||||||
if (gotoblas && gotoblas->init) {
|
if (gotoblas && gotoblas->init) {
|
||||||
|
if (openblas_verbose() >= 2) {
|
||||||
strncpy(coren, gotoblas_corename(), 20);
|
strncpy(coren, gotoblas_corename(), 20);
|
||||||
sprintf(coremsg, "Core: %s\n", coren);
|
sprintf(coremsg, "Core: %s\n", coren);
|
||||||
openblas_warning(2, coremsg);
|
openblas_warning(2, coremsg);
|
||||||
|
}
|
||||||
gotoblas->init();
|
gotoblas->init();
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
|
|
Loading…
Reference in New Issue