Query AVX2 and AVX512 capability for runtime cpu selection
This commit is contained in:
parent
20d1aad13f
commit
ae1d1f74f7
|
@ -304,9 +304,47 @@ int support_avx(){
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int support_avx2(){
|
||||||
|
#ifndef NO_AVX2
|
||||||
|
int eax, ebx, ecx=0, edx;
|
||||||
|
int ret=0;
|
||||||
|
|
||||||
|
if (!support_avx)
|
||||||
|
return 0;
|
||||||
|
cpuid(7, &eax, &ebx, &ecx, &edx);
|
||||||
|
if((ebx & (1<<7)) != 0)
|
||||||
|
ret=1; //OS supports AVX2
|
||||||
|
return ret;
|
||||||
|
#else
|
||||||
|
return 0;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
int support_avx512(){
|
||||||
|
#ifndef NO_AVX512
|
||||||
|
int eax, ebx, ecx, edx;
|
||||||
|
int ret=0;
|
||||||
|
|
||||||
|
if (!support_avx)
|
||||||
|
return 0;
|
||||||
|
cpuid(7, &eax, &ebx, &ecx, &edx);
|
||||||
|
if((ebx & (1<<7)) != 1){
|
||||||
|
ret=0; //OS does not even support AVX2
|
||||||
|
}
|
||||||
|
if((ebx & (1<<31)) != 0){
|
||||||
|
ret=1; //OS supports AVX512VL
|
||||||
|
}
|
||||||
|
return ret;
|
||||||
|
#else
|
||||||
|
return 0;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
extern void openblas_warning(int verbose, const char * msg);
|
extern void openblas_warning(int verbose, const char * msg);
|
||||||
#define FALLBACK_VERBOSE 1
|
#define FALLBACK_VERBOSE 1
|
||||||
#define NEHALEM_FALLBACK "OpenBLAS : Your OS does not support AVX instructions. OpenBLAS is using Nehalem kernels as a fallback, which may give poorer performance.\n"
|
#define NEHALEM_FALLBACK "OpenBLAS : Your OS does not support AVX instructions. OpenBLAS is using Nehalem kernels as a fallback, which may give poorer performance.\n"
|
||||||
|
#define SANDYBRIDGE_FALLBACK "OpenBLAS : Your OS does not support AVX2 instructions. OpenBLAS is using Sandybridge kernels as a fallback, which may give poorer performance.\n"
|
||||||
|
#define HASWELL_FALLBACK "OpenBLAS : Your OS does not support AVX512 instructions. OpenBLAS is using Haswell kernels as a fallback, which may give poorer performance.\n"
|
||||||
#define BARCELONA_FALLBACK "OpenBLAS : Your OS does not support AVX instructions. OpenBLAS is using Barcelona kernels as a fallback, which may give poorer performance.\n"
|
#define BARCELONA_FALLBACK "OpenBLAS : Your OS does not support AVX instructions. OpenBLAS is using Barcelona kernels as a fallback, which may give poorer performance.\n"
|
||||||
|
|
||||||
static int get_vendor(void){
|
static int get_vendor(void){
|
||||||
|
@ -403,18 +441,24 @@ static gotoblas_t *get_coretype(void){
|
||||||
}
|
}
|
||||||
//Intel Haswell
|
//Intel Haswell
|
||||||
if (model == 12 || model == 15) {
|
if (model == 12 || model == 15) {
|
||||||
if(support_avx())
|
if(support_avx2())
|
||||||
return &gotoblas_HASWELL;
|
return &gotoblas_HASWELL;
|
||||||
else{
|
if(support_avx()) {
|
||||||
|
openblas_warning(FALLBACK_VERBOSE, SANDYBRIDGE_FALLBACK);
|
||||||
|
return &gotoblas_SANDYBRIDGE;
|
||||||
|
} else {
|
||||||
openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK);
|
openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK);
|
||||||
return &gotoblas_NEHALEM; //OS doesn't support AVX. Use old kernels.
|
return &gotoblas_NEHALEM; //OS doesn't support AVX. Use old kernels.
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
//Intel Broadwell
|
//Intel Broadwell
|
||||||
if (model == 13) {
|
if (model == 13) {
|
||||||
if(support_avx())
|
if(support_avx2())
|
||||||
return &gotoblas_HASWELL;
|
return &gotoblas_HASWELL;
|
||||||
else{
|
if(support_avx()) {
|
||||||
|
openblas_warning(FALLBACK_VERBOSE, SANDYBRIDGE_FALLBACK);
|
||||||
|
return &gotoblas_SANDYBRIDGE;
|
||||||
|
} else {
|
||||||
openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK);
|
openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK);
|
||||||
return &gotoblas_NEHALEM; //OS doesn't support AVX. Use old kernels.
|
return &gotoblas_NEHALEM; //OS doesn't support AVX. Use old kernels.
|
||||||
}
|
}
|
||||||
|
@ -424,27 +468,36 @@ static gotoblas_t *get_coretype(void){
|
||||||
case 4:
|
case 4:
|
||||||
//Intel Haswell
|
//Intel Haswell
|
||||||
if (model == 5 || model == 6) {
|
if (model == 5 || model == 6) {
|
||||||
if(support_avx())
|
if(support_avx2())
|
||||||
return &gotoblas_HASWELL;
|
return &gotoblas_HASWELL;
|
||||||
else{
|
if(support_avx()) {
|
||||||
|
openblas_warning(FALLBACK_VERBOSE, SANDYBRIDGE_FALLBACK);
|
||||||
|
return &gotoblas_SANDYBRIDGE;
|
||||||
|
} else {
|
||||||
openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK);
|
openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK);
|
||||||
return &gotoblas_NEHALEM; //OS doesn't support AVX. Use old kernels.
|
return &gotoblas_NEHALEM; //OS doesn't support AVX. Use old kernels.
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
//Intel Broadwell
|
//Intel Broadwell
|
||||||
if (model == 7 || model == 15) {
|
if (model == 7 || model == 15) {
|
||||||
if(support_avx())
|
if(support_avx2())
|
||||||
return &gotoblas_HASWELL;
|
return &gotoblas_HASWELL;
|
||||||
else{
|
if(support_avx()) {
|
||||||
|
openblas_warning(FALLBACK_VERBOSE, SANDYBRIDGE_FALLBACK);
|
||||||
|
return &gotoblas_SANDYBRIDGE;
|
||||||
|
} else {
|
||||||
openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK);
|
openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK);
|
||||||
return &gotoblas_NEHALEM; //OS doesn't support AVX. Use old kernels.
|
return &gotoblas_NEHALEM; //OS doesn't support AVX. Use old kernels.
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
//Intel Skylake
|
//Intel Skylake
|
||||||
if (model == 14) {
|
if (model == 14) {
|
||||||
if(support_avx())
|
if(support_avx2())
|
||||||
return &gotoblas_HASWELL;
|
return &gotoblas_HASWELL;
|
||||||
else{
|
if(support_avx()) {
|
||||||
|
openblas_warning(FALLBACK_VERBOSE, SANDYBRIDGE_FALLBACK);
|
||||||
|
return &gotoblas_SANDYBRIDGE;
|
||||||
|
} else {
|
||||||
openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK);
|
openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK);
|
||||||
return &gotoblas_NEHALEM; //OS doesn't support AVX. Use old kernels.
|
return &gotoblas_NEHALEM; //OS doesn't support AVX. Use old kernels.
|
||||||
}
|
}
|
||||||
|
@ -457,40 +510,50 @@ static gotoblas_t *get_coretype(void){
|
||||||
case 5:
|
case 5:
|
||||||
//Intel Broadwell
|
//Intel Broadwell
|
||||||
if (model == 6) {
|
if (model == 6) {
|
||||||
if(support_avx())
|
if(support_avx2())
|
||||||
return &gotoblas_HASWELL;
|
return &gotoblas_HASWELL;
|
||||||
else{
|
if(support_avx()) {
|
||||||
|
openblas_warning(FALLBACK_VERBOSE, SANDYBRIDGE_FALLBACK);
|
||||||
|
return &gotoblas_SANDYBRIDGE;
|
||||||
|
} else {
|
||||||
openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK);
|
openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK);
|
||||||
return &gotoblas_NEHALEM; //OS doesn't support AVX. Use old kernels.
|
return &gotoblas_NEHALEM; //OS doesn't support AVX. Use old kernels.
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (model == 5) {
|
if (model == 5) {
|
||||||
// Intel Skylake X
|
// Intel Skylake X
|
||||||
#ifndef NO_AVX512
|
if (support_avx512())
|
||||||
return &gotoblas_SKYLAKEX;
|
return &gotoblas_SKYLAKEX;
|
||||||
#else
|
if(support_avx2())
|
||||||
if(support_avx())
|
|
||||||
return &gotoblas_HASWELL;
|
return &gotoblas_HASWELL;
|
||||||
else {
|
if(support_avx()) {
|
||||||
openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK);
|
openblas_warning(FALLBACK_VERBOSE, SANDYBRIDGE_FALLBACK);
|
||||||
return &gotoblas_NEHALEM;
|
return &gotoblas_SANDYBRIDGE;
|
||||||
}
|
} else {
|
||||||
#endif
|
openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK);
|
||||||
|
return &gotoblas_NEHALEM;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
//Intel Skylake
|
//Intel Skylake
|
||||||
if (model == 14) {
|
if (model == 14) {
|
||||||
if(support_avx())
|
if(support_avx2())
|
||||||
return &gotoblas_HASWELL;
|
return &gotoblas_HASWELL;
|
||||||
else{
|
if(support_avx()) {
|
||||||
|
openblas_warning(FALLBACK_VERBOSE, SANDYBRIDGE_FALLBACK);
|
||||||
|
return &gotoblas_SANDYBRIDGE;
|
||||||
|
} else {
|
||||||
openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK);
|
openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK);
|
||||||
return &gotoblas_NEHALEM; //OS doesn't support AVX. Use old kernels.
|
return &gotoblas_NEHALEM; //OS doesn't support AVX. Use old kernels.
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
//Intel Phi Knights Landing
|
//Intel Phi Knights Landing
|
||||||
if (model == 7) {
|
if (model == 7) {
|
||||||
if(support_avx())
|
if(support_avx2())
|
||||||
return &gotoblas_HASWELL;
|
return &gotoblas_HASWELL;
|
||||||
else{
|
if(support_avx()) {
|
||||||
|
openblas_warning(FALLBACK_VERBOSE, SANDYBRIDGE_FALLBACK);
|
||||||
|
return &gotoblas_SANDYBRIDGE;
|
||||||
|
} else {
|
||||||
openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK);
|
openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK);
|
||||||
return &gotoblas_NEHALEM; //OS doesn't support AVX. Use old kernels.
|
return &gotoblas_NEHALEM; //OS doesn't support AVX. Use old kernels.
|
||||||
}
|
}
|
||||||
|
@ -503,26 +566,26 @@ static gotoblas_t *get_coretype(void){
|
||||||
case 6:
|
case 6:
|
||||||
if (model == 6) {
|
if (model == 6) {
|
||||||
// Cannon Lake
|
// Cannon Lake
|
||||||
#ifndef NO_AVX512
|
if(support_avx2())
|
||||||
return &gotoblas_SKYLAKEX;
|
return &gotoblas_HASWELL;
|
||||||
#else
|
if(support_avx()) {
|
||||||
if(support_avx())
|
openblas_warning(FALLBACK_VERBOSE, SANDYBRIDGE_FALLBACK);
|
||||||
#ifndef NO_AVX2
|
return &gotoblas_SANDYBRIDGE;
|
||||||
return &gotoblas_HASWELL;
|
} else {
|
||||||
#else
|
openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK);
|
||||||
return &gotoblas_SANDYBRIDGE;
|
return &gotoblas_NEHALEM;
|
||||||
#endif
|
}
|
||||||
else
|
|
||||||
return &gotoblas_NEHALEM;
|
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
return NULL;
|
return NULL;
|
||||||
case 9:
|
case 9:
|
||||||
case 8:
|
case 8:
|
||||||
if (model == 14 ) { // Kaby Lake
|
if (model == 14 ) { // Kaby Lake
|
||||||
if(support_avx())
|
if(support_avx2())
|
||||||
return &gotoblas_HASWELL;
|
return &gotoblas_HASWELL;
|
||||||
else{
|
if(support_avx()) {
|
||||||
|
openblas_warning(FALLBACK_VERBOSE, SANDYBRIDGE_FALLBACK);
|
||||||
|
return &gotoblas_SANDYBRIDGE;
|
||||||
|
} else {
|
||||||
openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK);
|
openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK);
|
||||||
return &gotoblas_NEHALEM; //OS doesn't support AVX. Use old kernels.
|
return &gotoblas_NEHALEM; //OS doesn't support AVX. Use old kernels.
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue