From ca7199f249bb6a87f201a1cd564d42fef338f29a Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Wed, 19 Jul 2023 14:48:42 +0200 Subject: [PATCH 1/2] Treat newer Neoverse as N1 if SVE unavailable (may be disabled in container/cloud env) --- driver/others/dynamic_arm64.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/driver/others/dynamic_arm64.c b/driver/others/dynamic_arm64.c index 0f47b287c..b29e6e46c 100644 --- a/driver/others/dynamic_arm64.c +++ b/driver/others/dynamic_arm64.c @@ -147,6 +147,9 @@ extern void openblas_warning(int verbose, const char * msg); #ifndef HWCAP_CPUID #define HWCAP_CPUID (1 << 11) #endif +#ifndef HWCAP_SVE +#define HWCAP_SVE (1 << 22) +#endif #define get_cpu_ftr(id, var) ({ \ __asm__ __volatile__ ("mrs %0, "#id : "=r" (var)); \ @@ -281,9 +284,15 @@ static gotoblas_t *get_coretype(void) { return &gotoblas_NEOVERSEN1; #ifndef NO_SVE case 0xd49: - return &gotoblas_NEOVERSEN2; + if (!(getauxval(AT_HWCAP) & HWCAP_SVE)) + return &gotoblas_NEOVERSEN1; + else + return &gotoblas_NEOVERSEN2; case 0xd40: - return &gotoblas_NEOVERSEV1; + if (!(getauxval(AT_HWCAP) & HWCAP_SVE)) + return &gotoblas_NEOVERSEN1; + else + return &gotoblas_NEOVERSEV1; #endif case 0xd05: // Cortex A55 return &gotoblas_CORTEXA55; From 5c58994eb26bfd5715f4100a213a0e69b280ea16 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Wed, 19 Jul 2023 18:27:41 +0200 Subject: [PATCH 2/2] Add fallback warning --- driver/others/dynamic_arm64.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/driver/others/dynamic_arm64.c b/driver/others/dynamic_arm64.c index b29e6e46c..ef2597234 100644 --- a/driver/others/dynamic_arm64.c +++ b/driver/others/dynamic_arm64.c @@ -137,6 +137,8 @@ extern gotoblas_t gotoblas_CORTEXA55; #endif extern void openblas_warning(int verbose, const char * msg); +#define FALLBACK_VERBOSE 1 +#define NEOVERSEN1_FALLBACK "OpenBLAS : Your OS does not support SVE instructions. OpenBLAS is using Neoverse N1 kernels as a fallback, which may give poorer performance.\n" #define NUM_CORETYPES 13 @@ -284,14 +286,16 @@ static gotoblas_t *get_coretype(void) { return &gotoblas_NEOVERSEN1; #ifndef NO_SVE case 0xd49: - if (!(getauxval(AT_HWCAP) & HWCAP_SVE)) + if (!(getauxval(AT_HWCAP) & HWCAP_SVE)) { + openblas_warning(FALLBACK_VERBOSE, NEOVERSEN1_FALLBACK); return &gotoblas_NEOVERSEN1; - else + } else return &gotoblas_NEOVERSEN2; case 0xd40: - if (!(getauxval(AT_HWCAP) & HWCAP_SVE)) + if (!(getauxval(AT_HWCAP) & HWCAP_SVE)) { + openblas_warning(FALLBACK_VERBOSE, NEOVERSEN1_FALLBACK); return &gotoblas_NEOVERSEN1; - else + }else return &gotoblas_NEOVERSEV1; #endif case 0xd05: // Cortex A55