Query AVX2 and AVX512 capability for runtime cpu selection

This commit is contained in:
Martin Kroeker 2019-01-05 16:55:33 +01:00 committed by GitHub
parent 20d1aad13f
commit ae1d1f74f7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 102 additions and 39 deletions

View File

@ -304,9 +304,47 @@ int support_avx(){
#endif #endif
} }
int support_avx2(){
#ifndef NO_AVX2
int eax, ebx, ecx=0, edx;
int ret=0;
if (!support_avx)
return 0;
cpuid(7, &eax, &ebx, &ecx, &edx);
if((ebx & (1<<7)) != 0)
ret=1; //OS supports AVX2
return ret;
#else
return 0;
#endif
}
int support_avx512(){
#ifndef NO_AVX512
int eax, ebx, ecx, edx;
int ret=0;
if (!support_avx)
return 0;
cpuid(7, &eax, &ebx, &ecx, &edx);
if((ebx & (1<<7)) != 1){
ret=0; //OS does not even support AVX2
}
if((ebx & (1<<31)) != 0){
ret=1; //OS supports AVX512VL
}
return ret;
#else
return 0;
#endif
}
extern void openblas_warning(int verbose, const char * msg); extern void openblas_warning(int verbose, const char * msg);
#define FALLBACK_VERBOSE 1 #define FALLBACK_VERBOSE 1
#define NEHALEM_FALLBACK "OpenBLAS : Your OS does not support AVX instructions. OpenBLAS is using Nehalem kernels as a fallback, which may give poorer performance.\n" #define NEHALEM_FALLBACK "OpenBLAS : Your OS does not support AVX instructions. OpenBLAS is using Nehalem kernels as a fallback, which may give poorer performance.\n"
#define SANDYBRIDGE_FALLBACK "OpenBLAS : Your OS does not support AVX2 instructions. OpenBLAS is using Sandybridge kernels as a fallback, which may give poorer performance.\n"
#define HASWELL_FALLBACK "OpenBLAS : Your OS does not support AVX512 instructions. OpenBLAS is using Haswell kernels as a fallback, which may give poorer performance.\n"
#define BARCELONA_FALLBACK "OpenBLAS : Your OS does not support AVX instructions. OpenBLAS is using Barcelona kernels as a fallback, which may give poorer performance.\n" #define BARCELONA_FALLBACK "OpenBLAS : Your OS does not support AVX instructions. OpenBLAS is using Barcelona kernels as a fallback, which may give poorer performance.\n"
static int get_vendor(void){ static int get_vendor(void){
@ -403,18 +441,24 @@ static gotoblas_t *get_coretype(void){
} }
//Intel Haswell //Intel Haswell
if (model == 12 || model == 15) { if (model == 12 || model == 15) {
if(support_avx()) if(support_avx2())
return &gotoblas_HASWELL; return &gotoblas_HASWELL;
else{ if(support_avx()) {
openblas_warning(FALLBACK_VERBOSE, SANDYBRIDGE_FALLBACK);
return &gotoblas_SANDYBRIDGE;
} else {
openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK); openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK);
return &gotoblas_NEHALEM; //OS doesn't support AVX. Use old kernels. return &gotoblas_NEHALEM; //OS doesn't support AVX. Use old kernels.
} }
} }
//Intel Broadwell //Intel Broadwell
if (model == 13) { if (model == 13) {
if(support_avx()) if(support_avx2())
return &gotoblas_HASWELL; return &gotoblas_HASWELL;
else{ if(support_avx()) {
openblas_warning(FALLBACK_VERBOSE, SANDYBRIDGE_FALLBACK);
return &gotoblas_SANDYBRIDGE;
} else {
openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK); openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK);
return &gotoblas_NEHALEM; //OS doesn't support AVX. Use old kernels. return &gotoblas_NEHALEM; //OS doesn't support AVX. Use old kernels.
} }
@ -424,27 +468,36 @@ static gotoblas_t *get_coretype(void){
case 4: case 4:
//Intel Haswell //Intel Haswell
if (model == 5 || model == 6) { if (model == 5 || model == 6) {
if(support_avx()) if(support_avx2())
return &gotoblas_HASWELL; return &gotoblas_HASWELL;
else{ if(support_avx()) {
openblas_warning(FALLBACK_VERBOSE, SANDYBRIDGE_FALLBACK);
return &gotoblas_SANDYBRIDGE;
} else {
openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK); openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK);
return &gotoblas_NEHALEM; //OS doesn't support AVX. Use old kernels. return &gotoblas_NEHALEM; //OS doesn't support AVX. Use old kernels.
} }
} }
//Intel Broadwell //Intel Broadwell
if (model == 7 || model == 15) { if (model == 7 || model == 15) {
if(support_avx()) if(support_avx2())
return &gotoblas_HASWELL; return &gotoblas_HASWELL;
else{ if(support_avx()) {
openblas_warning(FALLBACK_VERBOSE, SANDYBRIDGE_FALLBACK);
return &gotoblas_SANDYBRIDGE;
} else {
openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK); openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK);
return &gotoblas_NEHALEM; //OS doesn't support AVX. Use old kernels. return &gotoblas_NEHALEM; //OS doesn't support AVX. Use old kernels.
} }
} }
//Intel Skylake //Intel Skylake
if (model == 14) { if (model == 14) {
if(support_avx()) if(support_avx2())
return &gotoblas_HASWELL; return &gotoblas_HASWELL;
else{ if(support_avx()) {
openblas_warning(FALLBACK_VERBOSE, SANDYBRIDGE_FALLBACK);
return &gotoblas_SANDYBRIDGE;
} else {
openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK); openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK);
return &gotoblas_NEHALEM; //OS doesn't support AVX. Use old kernels. return &gotoblas_NEHALEM; //OS doesn't support AVX. Use old kernels.
} }
@ -457,40 +510,50 @@ static gotoblas_t *get_coretype(void){
case 5: case 5:
//Intel Broadwell //Intel Broadwell
if (model == 6) { if (model == 6) {
if(support_avx()) if(support_avx2())
return &gotoblas_HASWELL; return &gotoblas_HASWELL;
else{ if(support_avx()) {
openblas_warning(FALLBACK_VERBOSE, SANDYBRIDGE_FALLBACK);
return &gotoblas_SANDYBRIDGE;
} else {
openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK); openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK);
return &gotoblas_NEHALEM; //OS doesn't support AVX. Use old kernels. return &gotoblas_NEHALEM; //OS doesn't support AVX. Use old kernels.
} }
} }
if (model == 5) { if (model == 5) {
// Intel Skylake X // Intel Skylake X
#ifndef NO_AVX512 if (support_avx512())
return &gotoblas_SKYLAKEX; return &gotoblas_SKYLAKEX;
#else if(support_avx2())
if(support_avx())
return &gotoblas_HASWELL; return &gotoblas_HASWELL;
else { if(support_avx()) {
openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK); openblas_warning(FALLBACK_VERBOSE, SANDYBRIDGE_FALLBACK);
return &gotoblas_NEHALEM; return &gotoblas_SANDYBRIDGE;
} } else {
#endif openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK);
return &gotoblas_NEHALEM;
}
} }
//Intel Skylake //Intel Skylake
if (model == 14) { if (model == 14) {
if(support_avx()) if(support_avx2())
return &gotoblas_HASWELL; return &gotoblas_HASWELL;
else{ if(support_avx()) {
openblas_warning(FALLBACK_VERBOSE, SANDYBRIDGE_FALLBACK);
return &gotoblas_SANDYBRIDGE;
} else {
openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK); openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK);
return &gotoblas_NEHALEM; //OS doesn't support AVX. Use old kernels. return &gotoblas_NEHALEM; //OS doesn't support AVX. Use old kernels.
} }
} }
//Intel Phi Knights Landing //Intel Phi Knights Landing
if (model == 7) { if (model == 7) {
if(support_avx()) if(support_avx2())
return &gotoblas_HASWELL; return &gotoblas_HASWELL;
else{ if(support_avx()) {
openblas_warning(FALLBACK_VERBOSE, SANDYBRIDGE_FALLBACK);
return &gotoblas_SANDYBRIDGE;
} else {
openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK); openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK);
return &gotoblas_NEHALEM; //OS doesn't support AVX. Use old kernels. return &gotoblas_NEHALEM; //OS doesn't support AVX. Use old kernels.
} }
@ -503,26 +566,26 @@ static gotoblas_t *get_coretype(void){
case 6: case 6:
if (model == 6) { if (model == 6) {
// Cannon Lake // Cannon Lake
#ifndef NO_AVX512 if(support_avx2())
return &gotoblas_SKYLAKEX; return &gotoblas_HASWELL;
#else if(support_avx()) {
if(support_avx()) openblas_warning(FALLBACK_VERBOSE, SANDYBRIDGE_FALLBACK);
#ifndef NO_AVX2 return &gotoblas_SANDYBRIDGE;
return &gotoblas_HASWELL; } else {
#else openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK);
return &gotoblas_SANDYBRIDGE; return &gotoblas_NEHALEM;
#endif }
else
return &gotoblas_NEHALEM;
#endif
} }
return NULL; return NULL;
case 9: case 9:
case 8: case 8:
if (model == 14 ) { // Kaby Lake if (model == 14 ) { // Kaby Lake
if(support_avx()) if(support_avx2())
return &gotoblas_HASWELL; return &gotoblas_HASWELL;
else{ if(support_avx()) {
openblas_warning(FALLBACK_VERBOSE, SANDYBRIDGE_FALLBACK);
return &gotoblas_SANDYBRIDGE;
} else {
openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK); openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK);
return &gotoblas_NEHALEM; //OS doesn't support AVX. Use old kernels. return &gotoblas_NEHALEM; //OS doesn't support AVX. Use old kernels.
} }