Query AVX2 and AVX512 capability for runtime cpu selection

This commit is contained in:
Martin Kroeker 2019-01-05 16:55:33 +01:00 committed by GitHub
parent 20d1aad13f
commit ae1d1f74f7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 102 additions and 39 deletions

View File

@ -304,9 +304,47 @@ int support_avx(){
#endif
}
int support_avx2(){
#ifndef NO_AVX2
int eax, ebx, ecx=0, edx;
int ret=0;
if (!support_avx)
return 0;
cpuid(7, &eax, &ebx, &ecx, &edx);
if((ebx & (1<<7)) != 0)
ret=1; //OS supports AVX2
return ret;
#else
return 0;
#endif
}
int support_avx512(){
#ifndef NO_AVX512
int eax, ebx, ecx, edx;
int ret=0;
if (!support_avx)
return 0;
cpuid(7, &eax, &ebx, &ecx, &edx);
if((ebx & (1<<7)) != 1){
ret=0; //OS does not even support AVX2
}
if((ebx & (1<<31)) != 0){
ret=1; //OS supports AVX512VL
}
return ret;
#else
return 0;
#endif
}
extern void openblas_warning(int verbose, const char * msg);
#define FALLBACK_VERBOSE 1
#define NEHALEM_FALLBACK "OpenBLAS : Your OS does not support AVX instructions. OpenBLAS is using Nehalem kernels as a fallback, which may give poorer performance.\n"
#define SANDYBRIDGE_FALLBACK "OpenBLAS : Your OS does not support AVX2 instructions. OpenBLAS is using Sandybridge kernels as a fallback, which may give poorer performance.\n"
#define HASWELL_FALLBACK "OpenBLAS : Your OS does not support AVX512 instructions. OpenBLAS is using Haswell kernels as a fallback, which may give poorer performance.\n"
#define BARCELONA_FALLBACK "OpenBLAS : Your OS does not support AVX instructions. OpenBLAS is using Barcelona kernels as a fallback, which may give poorer performance.\n"
static int get_vendor(void){
@ -403,18 +441,24 @@ static gotoblas_t *get_coretype(void){
}
//Intel Haswell
if (model == 12 || model == 15) {
if(support_avx())
if(support_avx2())
return &gotoblas_HASWELL;
else{
if(support_avx()) {
openblas_warning(FALLBACK_VERBOSE, SANDYBRIDGE_FALLBACK);
return &gotoblas_SANDYBRIDGE;
} else {
openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK);
return &gotoblas_NEHALEM; //OS doesn't support AVX. Use old kernels.
}
}
//Intel Broadwell
if (model == 13) {
if(support_avx())
if(support_avx2())
return &gotoblas_HASWELL;
else{
if(support_avx()) {
openblas_warning(FALLBACK_VERBOSE, SANDYBRIDGE_FALLBACK);
return &gotoblas_SANDYBRIDGE;
} else {
openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK);
return &gotoblas_NEHALEM; //OS doesn't support AVX. Use old kernels.
}
@ -424,27 +468,36 @@ static gotoblas_t *get_coretype(void){
case 4:
//Intel Haswell
if (model == 5 || model == 6) {
if(support_avx())
if(support_avx2())
return &gotoblas_HASWELL;
else{
if(support_avx()) {
openblas_warning(FALLBACK_VERBOSE, SANDYBRIDGE_FALLBACK);
return &gotoblas_SANDYBRIDGE;
} else {
openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK);
return &gotoblas_NEHALEM; //OS doesn't support AVX. Use old kernels.
}
}
//Intel Broadwell
if (model == 7 || model == 15) {
if(support_avx())
if(support_avx2())
return &gotoblas_HASWELL;
else{
if(support_avx()) {
openblas_warning(FALLBACK_VERBOSE, SANDYBRIDGE_FALLBACK);
return &gotoblas_SANDYBRIDGE;
} else {
openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK);
return &gotoblas_NEHALEM; //OS doesn't support AVX. Use old kernels.
}
}
//Intel Skylake
if (model == 14) {
if(support_avx())
if(support_avx2())
return &gotoblas_HASWELL;
else{
if(support_avx()) {
openblas_warning(FALLBACK_VERBOSE, SANDYBRIDGE_FALLBACK);
return &gotoblas_SANDYBRIDGE;
} else {
openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK);
return &gotoblas_NEHALEM; //OS doesn't support AVX. Use old kernels.
}
@ -457,40 +510,50 @@ static gotoblas_t *get_coretype(void){
case 5:
//Intel Broadwell
if (model == 6) {
if(support_avx())
if(support_avx2())
return &gotoblas_HASWELL;
else{
if(support_avx()) {
openblas_warning(FALLBACK_VERBOSE, SANDYBRIDGE_FALLBACK);
return &gotoblas_SANDYBRIDGE;
} else {
openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK);
return &gotoblas_NEHALEM; //OS doesn't support AVX. Use old kernels.
}
}
if (model == 5) {
// Intel Skylake X
#ifndef NO_AVX512
if (support_avx512())
return &gotoblas_SKYLAKEX;
#else
if(support_avx())
if(support_avx2())
return &gotoblas_HASWELL;
else {
if(support_avx()) {
openblas_warning(FALLBACK_VERBOSE, SANDYBRIDGE_FALLBACK);
return &gotoblas_SANDYBRIDGE;
} else {
openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK);
return &gotoblas_NEHALEM;
}
#endif
}
//Intel Skylake
if (model == 14) {
if(support_avx())
if(support_avx2())
return &gotoblas_HASWELL;
else{
if(support_avx()) {
openblas_warning(FALLBACK_VERBOSE, SANDYBRIDGE_FALLBACK);
return &gotoblas_SANDYBRIDGE;
} else {
openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK);
return &gotoblas_NEHALEM; //OS doesn't support AVX. Use old kernels.
}
}
//Intel Phi Knights Landing
if (model == 7) {
if(support_avx())
if(support_avx2())
return &gotoblas_HASWELL;
else{
if(support_avx()) {
openblas_warning(FALLBACK_VERBOSE, SANDYBRIDGE_FALLBACK);
return &gotoblas_SANDYBRIDGE;
} else {
openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK);
return &gotoblas_NEHALEM; //OS doesn't support AVX. Use old kernels.
}
@ -503,26 +566,26 @@ static gotoblas_t *get_coretype(void){
case 6:
if (model == 6) {
// Cannon Lake
#ifndef NO_AVX512
return &gotoblas_SKYLAKEX;
#else
if(support_avx())
#ifndef NO_AVX2
if(support_avx2())
return &gotoblas_HASWELL;
#else
if(support_avx()) {
openblas_warning(FALLBACK_VERBOSE, SANDYBRIDGE_FALLBACK);
return &gotoblas_SANDYBRIDGE;
#endif
else
} else {
openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK);
return &gotoblas_NEHALEM;
#endif
}
}
return NULL;
case 9:
case 8:
if (model == 14 ) { // Kaby Lake
if(support_avx())
if(support_avx2())
return &gotoblas_HASWELL;
else{
if(support_avx()) {
openblas_warning(FALLBACK_VERBOSE, SANDYBRIDGE_FALLBACK);
return &gotoblas_SANDYBRIDGE;
} else {
openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK);
return &gotoblas_NEHALEM; //OS doesn't support AVX. Use old kernels.
}