From ae1d1f74f7ff96b8345189bcba058b7acdc7d494 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Sat, 5 Jan 2019 16:55:33 +0100 Subject: [PATCH] Query AVX2 and AVX512 capability for runtime cpu selection --- driver/others/dynamic.c | 141 +++++++++++++++++++++++++++++----------- 1 file changed, 102 insertions(+), 39 deletions(-) diff --git a/driver/others/dynamic.c b/driver/others/dynamic.c index 1f67dc521..7cc911d32 100644 --- a/driver/others/dynamic.c +++ b/driver/others/dynamic.c @@ -304,9 +304,47 @@ int support_avx(){ #endif } +int support_avx2(){ +#ifndef NO_AVX2 + int eax, ebx, ecx=0, edx; + int ret=0; + + if (!support_avx) + return 0; + cpuid(7, &eax, &ebx, &ecx, &edx); + if((ebx & (1<<7)) != 0) + ret=1; //OS supports AVX2 + return ret; +#else + return 0; +#endif +} + +int support_avx512(){ +#ifndef NO_AVX512 + int eax, ebx, ecx, edx; + int ret=0; + + if (!support_avx) + return 0; + cpuid(7, &eax, &ebx, &ecx, &edx); + if((ebx & (1<<7)) != 1){ + ret=0; //OS does not even support AVX2 + } + if((ebx & (1<<31)) != 0){ + ret=1; //OS supports AVX512VL + } + return ret; +#else + return 0; +#endif +} + extern void openblas_warning(int verbose, const char * msg); #define FALLBACK_VERBOSE 1 #define NEHALEM_FALLBACK "OpenBLAS : Your OS does not support AVX instructions. OpenBLAS is using Nehalem kernels as a fallback, which may give poorer performance.\n" +#define SANDYBRIDGE_FALLBACK "OpenBLAS : Your OS does not support AVX2 instructions. OpenBLAS is using Sandybridge kernels as a fallback, which may give poorer performance.\n" +#define HASWELL_FALLBACK "OpenBLAS : Your OS does not support AVX512 instructions. OpenBLAS is using Haswell kernels as a fallback, which may give poorer performance.\n" #define BARCELONA_FALLBACK "OpenBLAS : Your OS does not support AVX instructions. OpenBLAS is using Barcelona kernels as a fallback, which may give poorer performance.\n" static int get_vendor(void){ @@ -403,18 +441,24 @@ static gotoblas_t *get_coretype(void){ } //Intel Haswell if (model == 12 || model == 15) { - if(support_avx()) + if(support_avx2()) return &gotoblas_HASWELL; - else{ + if(support_avx()) { + openblas_warning(FALLBACK_VERBOSE, SANDYBRIDGE_FALLBACK); + return &gotoblas_SANDYBRIDGE; + } else { openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK); return &gotoblas_NEHALEM; //OS doesn't support AVX. Use old kernels. } } //Intel Broadwell if (model == 13) { - if(support_avx()) + if(support_avx2()) return &gotoblas_HASWELL; - else{ + if(support_avx()) { + openblas_warning(FALLBACK_VERBOSE, SANDYBRIDGE_FALLBACK); + return &gotoblas_SANDYBRIDGE; + } else { openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK); return &gotoblas_NEHALEM; //OS doesn't support AVX. Use old kernels. } @@ -424,27 +468,36 @@ static gotoblas_t *get_coretype(void){ case 4: //Intel Haswell if (model == 5 || model == 6) { - if(support_avx()) + if(support_avx2()) return &gotoblas_HASWELL; - else{ + if(support_avx()) { + openblas_warning(FALLBACK_VERBOSE, SANDYBRIDGE_FALLBACK); + return &gotoblas_SANDYBRIDGE; + } else { openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK); return &gotoblas_NEHALEM; //OS doesn't support AVX. Use old kernels. } } //Intel Broadwell if (model == 7 || model == 15) { - if(support_avx()) + if(support_avx2()) return &gotoblas_HASWELL; - else{ + if(support_avx()) { + openblas_warning(FALLBACK_VERBOSE, SANDYBRIDGE_FALLBACK); + return &gotoblas_SANDYBRIDGE; + } else { openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK); return &gotoblas_NEHALEM; //OS doesn't support AVX. Use old kernels. } } //Intel Skylake if (model == 14) { - if(support_avx()) + if(support_avx2()) return &gotoblas_HASWELL; - else{ + if(support_avx()) { + openblas_warning(FALLBACK_VERBOSE, SANDYBRIDGE_FALLBACK); + return &gotoblas_SANDYBRIDGE; + } else { openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK); return &gotoblas_NEHALEM; //OS doesn't support AVX. Use old kernels. } @@ -457,40 +510,50 @@ static gotoblas_t *get_coretype(void){ case 5: //Intel Broadwell if (model == 6) { - if(support_avx()) + if(support_avx2()) return &gotoblas_HASWELL; - else{ + if(support_avx()) { + openblas_warning(FALLBACK_VERBOSE, SANDYBRIDGE_FALLBACK); + return &gotoblas_SANDYBRIDGE; + } else { openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK); return &gotoblas_NEHALEM; //OS doesn't support AVX. Use old kernels. } } if (model == 5) { // Intel Skylake X -#ifndef NO_AVX512 - return &gotoblas_SKYLAKEX; -#else - if(support_avx()) + if (support_avx512()) + return &gotoblas_SKYLAKEX; + if(support_avx2()) return &gotoblas_HASWELL; - else { - openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK); - return &gotoblas_NEHALEM; - } -#endif + if(support_avx()) { + openblas_warning(FALLBACK_VERBOSE, SANDYBRIDGE_FALLBACK); + return &gotoblas_SANDYBRIDGE; + } else { + openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK); + return &gotoblas_NEHALEM; + } } //Intel Skylake if (model == 14) { - if(support_avx()) + if(support_avx2()) return &gotoblas_HASWELL; - else{ + if(support_avx()) { + openblas_warning(FALLBACK_VERBOSE, SANDYBRIDGE_FALLBACK); + return &gotoblas_SANDYBRIDGE; + } else { openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK); return &gotoblas_NEHALEM; //OS doesn't support AVX. Use old kernels. } } //Intel Phi Knights Landing if (model == 7) { - if(support_avx()) + if(support_avx2()) return &gotoblas_HASWELL; - else{ + if(support_avx()) { + openblas_warning(FALLBACK_VERBOSE, SANDYBRIDGE_FALLBACK); + return &gotoblas_SANDYBRIDGE; + } else { openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK); return &gotoblas_NEHALEM; //OS doesn't support AVX. Use old kernels. } @@ -503,26 +566,26 @@ static gotoblas_t *get_coretype(void){ case 6: if (model == 6) { // Cannon Lake -#ifndef NO_AVX512 - return &gotoblas_SKYLAKEX; -#else - if(support_avx()) -#ifndef NO_AVX2 - return &gotoblas_HASWELL; -#else - return &gotoblas_SANDYBRIDGE; -#endif - else - return &gotoblas_NEHALEM; -#endif + if(support_avx2()) + return &gotoblas_HASWELL; + if(support_avx()) { + openblas_warning(FALLBACK_VERBOSE, SANDYBRIDGE_FALLBACK); + return &gotoblas_SANDYBRIDGE; + } else { + openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK); + return &gotoblas_NEHALEM; + } } return NULL; case 9: case 8: if (model == 14 ) { // Kaby Lake - if(support_avx()) + if(support_avx2()) return &gotoblas_HASWELL; - else{ + if(support_avx()) { + openblas_warning(FALLBACK_VERBOSE, SANDYBRIDGE_FALLBACK); + return &gotoblas_SANDYBRIDGE; + } else { openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK); return &gotoblas_NEHALEM; //OS doesn't support AVX. Use old kernels. }