Query AVX2 and AVX512VL capability in x86 cpu detection

This commit is contained in:
Martin Kroeker 2019-01-05 16:58:56 +01:00 committed by GitHub
parent ae1d1f74f7
commit 0afaae4b23
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 76 additions and 59 deletions

View File

@ -134,7 +134,7 @@ static __inline void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx){
"=b" (*ebx), "=b" (*ebx),
"=c" (*ecx), "=c" (*ecx),
"=d" (*edx) "=d" (*edx)
: "0" (op)); : "0" (op), "c"(0));
#endif #endif
} }

View File

@ -139,6 +139,7 @@
#define HAVE_FMA4 (1 << 19) #define HAVE_FMA4 (1 << 19)
#define HAVE_FMA3 (1 << 20) #define HAVE_FMA3 (1 << 20)
#define HAVE_AVX512VL (1 << 21) #define HAVE_AVX512VL (1 << 21)
#define HAVE_AVX2 (1 << 22)
#define CACHE_INFO_L1_I 1 #define CACHE_INFO_L1_I 1
#define CACHE_INFO_L1_D 2 #define CACHE_INFO_L1_D 2

View File

@ -97,10 +97,10 @@ static C_INLINE void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx){
("mov %%ebx, %%edi;" ("mov %%ebx, %%edi;"
"cpuid;" "cpuid;"
"xchgl %%ebx, %%edi;" "xchgl %%ebx, %%edi;"
: "=a" (*eax), "=D" (*ebx), "=c" (*ecx), "=d" (*edx) : "a" (op) : "cc"); : "=a" (*eax), "=D" (*ebx), "=c" (*ecx), "=d" (*edx) : "a" (op), "c" (0) : "cc");
#else #else
__asm__ __volatile__ __asm__ __volatile__
("cpuid": "=a" (*eax), "=b" (*ebx), "=c" (*ecx), "=d" (*edx) : "a" (op) : "cc"); ("cpuid": "=a" (*eax), "=b" (*ebx), "=c" (*ecx), "=d" (*edx) : "a" (op) , "c" (0) : "cc");
#endif #endif
} }
@ -211,6 +211,42 @@ int support_avx(){
#endif #endif
} }
int support_avx2(){
#ifndef NO_AVX2
int eax, ebx, ecx=0, edx;
int ret=0;
if (!support_avx)
return 0;
cpuid(7, &eax, &ebx, &ecx, &edx);
if((ebx & (1<<7)) != 0)
ret=1; //OS supports AVX2
return ret;
#else
return 0;
#endif
}
int support_avx512(){
#ifndef NO_AVX512
int eax, ebx, ecx, edx;
int ret=0;
if (!support_avx)
return 0;
cpuid(7, &eax, &ebx, &ecx, &edx);
if((ebx & 32) != 32){
ret=0; //OS does not even support AVX2
}
if((ebx & (1<<31)) != 0){
ret=1; //OS supports AVX512VL
}
return ret;
#else
return 0;
#endif
}
int get_vendor(void){ int get_vendor(void){
int eax, ebx, ecx, edx; int eax, ebx, ecx, edx;
@ -294,6 +330,8 @@ int get_cputype(int gettype){
if ((ecx & (1 << 20)) != 0) feature |= HAVE_SSE4_2; if ((ecx & (1 << 20)) != 0) feature |= HAVE_SSE4_2;
#ifndef NO_AVX #ifndef NO_AVX
if (support_avx()) feature |= HAVE_AVX; if (support_avx()) feature |= HAVE_AVX;
if (support_avx2()) feature |= HAVE_AVX2;
if (support_avx512()) feature |= HAVE_AVX512VL;
if ((ecx & (1 << 12)) != 0) feature |= HAVE_FMA3; if ((ecx & (1 << 12)) != 0) feature |= HAVE_FMA3;
#endif #endif
@ -1228,22 +1266,18 @@ int get_cpuname(void){
return CPUTYPE_NEHALEM; return CPUTYPE_NEHALEM;
case 12: case 12:
case 15: case 15:
if(support_avx()) if(support_avx2())
#ifndef NO_AVX2
return CPUTYPE_HASWELL; return CPUTYPE_HASWELL;
#else if(support_avx())
return CPUTYPE_SANDYBRIDGE; return CPUTYPE_SANDYBRIDGE;
#endif
else else
return CPUTYPE_NEHALEM; return CPUTYPE_NEHALEM;
case 13: case 13:
//Broadwell //Broadwell
if(support_avx()) if(support_avx2())
#ifndef NO_AVX2
return CPUTYPE_HASWELL; return CPUTYPE_HASWELL;
#else if(support_avx())
return CPUTYPE_SANDYBRIDGE; return CPUTYPE_SANDYBRIDGE;
#endif
else else
return CPUTYPE_NEHALEM; return CPUTYPE_NEHALEM;
} }
@ -1252,33 +1286,27 @@ int get_cpuname(void){
switch (model) { switch (model) {
case 5: case 5:
case 6: case 6:
if(support_avx()) if(support_avx2())
#ifndef NO_AVX2
return CPUTYPE_HASWELL; return CPUTYPE_HASWELL;
#else if(support_avx())
return CPUTYPE_SANDYBRIDGE; return CPUTYPE_SANDYBRIDGE;
#endif
else else
return CPUTYPE_NEHALEM; return CPUTYPE_NEHALEM;
case 7: case 7:
case 15: case 15:
//Broadwell //Broadwell
if(support_avx()) if(support_avx2())
#ifndef NO_AVX2
return CPUTYPE_HASWELL; return CPUTYPE_HASWELL;
#else if(support_avx())
return CPUTYPE_SANDYBRIDGE; return CPUTYPE_SANDYBRIDGE;
#endif
else else
return CPUTYPE_NEHALEM; return CPUTYPE_NEHALEM;
case 14: case 14:
//Skylake //Skylake
if(support_avx()) if(support_avx2())
#ifndef NO_AVX2
return CPUTYPE_HASWELL; return CPUTYPE_HASWELL;
#else if(support_avx())
return CPUTYPE_SANDYBRIDGE; return CPUTYPE_SANDYBRIDGE;
#endif
else else
return CPUTYPE_NEHALEM; return CPUTYPE_NEHALEM;
case 12: case 12:
@ -1292,46 +1320,36 @@ int get_cpuname(void){
switch (model) { switch (model) {
case 6: case 6:
//Broadwell //Broadwell
if(support_avx()) if(support_avx2())
#ifndef NO_AVX2
return CPUTYPE_HASWELL; return CPUTYPE_HASWELL;
#else if(support_avx())
return CPUTYPE_SANDYBRIDGE; return CPUTYPE_SANDYBRIDGE;
#endif
else else
return CPUTYPE_NEHALEM; return CPUTYPE_NEHALEM;
case 5: case 5:
// Skylake X // Skylake X
#ifndef NO_AVX512 if(support_avx512())
return CPUTYPE_SKYLAKEX; return CPUTYPE_SKYLAKEX;
#else if(support_avx2())
if(support_avx()) return CPUTYPE_HASWELL;
#ifndef NO_AVX2 if(support_avx())
return CPUTYPE_HASWELL; return CPUTYPE_SANDYBRIDGE;
#else
return CPUTYPE_SANDYBRIDGE;
#endif
else else
return CPUTYPE_NEHALEM; return CPUTYPE_NEHALEM;
#endif
case 14: case 14:
// Skylake // Skylake
if(support_avx()) if(support_avx2())
#ifndef NO_AVX2
return CPUTYPE_HASWELL; return CPUTYPE_HASWELL;
#else if(support_avx())
return CPUTYPE_SANDYBRIDGE; return CPUTYPE_SANDYBRIDGE;
#endif
else else
return CPUTYPE_NEHALEM; return CPUTYPE_NEHALEM;
case 7: case 7:
// Xeon Phi Knights Landing // Xeon Phi Knights Landing
if(support_avx()) if(support_avx2())
#ifndef NO_AVX2
return CPUTYPE_HASWELL; return CPUTYPE_HASWELL;
#else if(support_avx())
return CPUTYPE_SANDYBRIDGE; return CPUTYPE_SANDYBRIDGE;
#endif
else else
return CPUTYPE_NEHALEM; return CPUTYPE_NEHALEM;
case 12: case 12:
@ -1342,30 +1360,24 @@ int get_cpuname(void){
case 6: case 6:
switch (model) { switch (model) {
case 6: // Cannon Lake case 6: // Cannon Lake
#ifndef NO_AVX512 if(support_avx512())
return CPUTYPE_SKYLAKEX; return CPUTYPE_SKYLAKEX;
#else if(support_avx2())
if(support_avx()) return CPUTYPE_HASWELL;
#ifndef NO_AVX2 if(support_avx())
return CPUTYPE_HASWELL; return CPUTYPE_SANDYBRIDGE;
#else
return CPUTYPE_SANDYBRIDGE;
#endif
else else
return CPUTYPE_NEHALEM; return CPUTYPE_NEHALEM;
#endif
} }
break; break;
case 9: case 9:
case 8: case 8:
switch (model) { switch (model) {
case 14: // Kaby Lake case 14: // Kaby Lake
if(support_avx()) if(support_avx2())
#ifndef NO_AVX2
return CPUTYPE_HASWELL; return CPUTYPE_HASWELL;
#else if(support_avx())
return CPUTYPE_SANDYBRIDGE; return CPUTYPE_SANDYBRIDGE;
#endif
else else
return CPUTYPE_NEHALEM; return CPUTYPE_NEHALEM;
} }
@ -2112,6 +2124,8 @@ void get_cpuconfig(void){
if (features & HAVE_SSE4A) printf("#define HAVE_SSE4A\n"); if (features & HAVE_SSE4A) printf("#define HAVE_SSE4A\n");
if (features & HAVE_SSE5 ) printf("#define HAVE_SSSE5\n"); if (features & HAVE_SSE5 ) printf("#define HAVE_SSSE5\n");
if (features & HAVE_AVX ) printf("#define HAVE_AVX\n"); if (features & HAVE_AVX ) printf("#define HAVE_AVX\n");
if (features & HAVE_AVX2 ) printf("#define HAVE_AVX2\n");
if (features & HAVE_AVX512VL ) printf("#define HAVE_AVX512VL\n");
if (features & HAVE_3DNOWEX) printf("#define HAVE_3DNOWEX\n"); if (features & HAVE_3DNOWEX) printf("#define HAVE_3DNOWEX\n");
if (features & HAVE_3DNOW) printf("#define HAVE_3DNOW\n"); if (features & HAVE_3DNOW) printf("#define HAVE_3DNOW\n");
if (features & HAVE_FMA4 ) printf("#define HAVE_FMA4\n"); if (features & HAVE_FMA4 ) printf("#define HAVE_FMA4\n");
@ -2180,6 +2194,8 @@ void get_sse(void){
if (features & HAVE_SSE4A) printf("HAVE_SSE4A=1\n"); if (features & HAVE_SSE4A) printf("HAVE_SSE4A=1\n");
if (features & HAVE_SSE5 ) printf("HAVE_SSSE5=1\n"); if (features & HAVE_SSE5 ) printf("HAVE_SSSE5=1\n");
if (features & HAVE_AVX ) printf("HAVE_AVX=1\n"); if (features & HAVE_AVX ) printf("HAVE_AVX=1\n");
if (features & HAVE_AVX2 ) printf("HAVE_AVX2=1\n");
if (features & HAVE_AVX512VL ) printf("HAVE_AVX512VL=1\n");
if (features & HAVE_3DNOWEX) printf("HAVE_3DNOWEX=1\n"); if (features & HAVE_3DNOWEX) printf("HAVE_3DNOWEX=1\n");
if (features & HAVE_3DNOW) printf("HAVE_3DNOW=1\n"); if (features & HAVE_3DNOW) printf("HAVE_3DNOW=1\n");
if (features & HAVE_FMA4 ) printf("HAVE_FMA4=1\n"); if (features & HAVE_FMA4 ) printf("HAVE_FMA4=1\n");