diff --git a/cpuid_x86.c b/cpuid_x86.c index 79fd20e3f..ebbbe3fff 100644 --- a/cpuid_x86.c +++ b/cpuid_x86.c @@ -114,6 +114,25 @@ static inline int have_excpuid(void){ return eax & 0xffff; } +static inline void xgetbv(int op, int * eax, int * edx){ + __asm__ __volatile__ + ("xgetbv": "=a" (*eax), "=d" (*edx) : "c" (op) : "cc"); +} + +int support_avx(){ + int eax, ebx, ecx, edx; + int ret=0; + + cpuid(1, &eax, &ebx, &ecx, &edx); + if ((ecx & (1 << 28)) != 0 && (ecx & (1 << 27)) != 0){ + xgetbv(0, &eax, &edx); + if((eax & 6) == 6){ + ret=1; //OS support AVX + } + } + return ret; +} + int get_vendor(void){ int eax, ebx, ecx, edx; char vendor[13]; @@ -195,7 +214,7 @@ int get_cputype(int gettype){ if ((ecx & (1 << 19)) != 0) feature |= HAVE_SSE4_1; if ((ecx & (1 << 20)) != 0) feature |= HAVE_SSE4_2; #ifndef NO_AVX - if ((ecx & (1 << 28)) != 0) feature |= HAVE_AVX; + if (support_avx()) feature |= HAVE_AVX; #endif if (have_excpuid() >= 0x01) { @@ -991,13 +1010,19 @@ int get_cpuname(void){ return CPUTYPE_NEHALEM; case 10: //Intel Core i5-2000 /i7-2000 (Sandy Bridge) - return CPUTYPE_SANDYBRIDGE; + if(support_avx()) + return CPUTYPE_SANDYBRIDGE; + else + return CPUTYPE_NEHALEM; //OS doesn't support AVX case 12: //Xeon Processor 5600 (Westmere-EP) return CPUTYPE_NEHALEM; case 13: //Intel Core i7-3000 / Xeon E5 (Sandy Bridge) - return CPUTYPE_SANDYBRIDGE; + if(support_avx()) + return CPUTYPE_SANDYBRIDGE; + else + return CPUTYPE_NEHALEM; case 15: //Xeon Processor E7 (Westmere-EX) return CPUTYPE_NEHALEM; @@ -1006,7 +1031,10 @@ int get_cpuname(void){ case 3: switch (model) { case 10: - return CPUTYPE_SANDYBRIDGE; + if(support_avx()) + return CPUTYPE_SANDYBRIDGE; + else + return CPUTYPE_NEHALEM; } break; } @@ -1350,13 +1378,19 @@ int get_coretype(void){ return CORE_NEHALEM; case 10: //Intel Core i5-2000 /i7-2000 (Sandy Bridge) - return CORE_SANDYBRIDGE; + if(support_avx()) + return CORE_SANDYBRIDGE; + else + return CORE_NEHALEM; //OS doesn't support AVX case 12: //Xeon Processor 5600 (Westmere-EP) return CORE_NEHALEM; case 13: //Intel Core i7-3000 / Xeon E5 (Sandy Bridge) - return CORE_SANDYBRIDGE; + if(support_avx()) + return CORE_SANDYBRIDGE; + else + return CORE_NEHALEM; //OS doesn't support AVX case 15: //Xeon Processor E7 (Westmere-EX) return CORE_NEHALEM; @@ -1365,7 +1399,10 @@ int get_coretype(void){ case 3: switch (model) { case 10: - return CORE_SANDYBRIDGE; + if(support_avx()) + return CORE_SANDYBRIDGE; + else + return CORE_NEHALEM; //OS doesn't support AVX } break; } diff --git a/driver/others/dynamic.c b/driver/others/dynamic.c index 45783c517..468ab0dc8 100644 --- a/driver/others/dynamic.c +++ b/driver/others/dynamic.c @@ -76,6 +76,25 @@ extern gotoblas_t gotoblas_SANDYBRIDGE; #define BITMASK(a, b, c) ((((a) >> (b)) & (c))) +static inline void xgetbv(int op, int * eax, int * edx){ + __asm__ __volatile__ + ("xgetbv": "=a" (*eax), "=d" (*edx) : "c" (op) : "cc"); +} + +int support_avx(){ + int eax, ebx, ecx, edx; + int ret=0; + + cpuid(1, &eax, &ebx, &ecx, &edx); + if ((ecx & (1 << 28)) != 0 && (ecx & (1 << 27)) != 0){ + xgetbv(0, &eax, &edx); + if((eax & 6) == 6){ + ret=1; //OS support AVX + } + } + return ret; +} + static int get_vendor(void){ int eax, ebx, ecx, edx; char vendor[13]; @@ -142,11 +161,25 @@ static gotoblas_t *get_coretype(void){ //Intel Core i5-2000 /i7-2000 (Sandy Bridge) //Intel Core i7-3000 / Xeon E5 - if (model == 10 || model == 13) return &gotoblas_SANDYBRIDGE; + if (model == 10 || model == 13) { + if(support_avx()) + return &gotoblas_SANDYBRIDGE; + else{ + fprintf(stderr, "OpenBLAS : Your OS doesn't support AVX. Use Nehalem kernels.\n"); + return &gotoblas_NEHALEM; //OS doesn't support AVX. Use old kernels. + } + } return NULL; case 3: //Intel Sandy Bridge 22nm (Ivy Bridge?) - if (model == 10) return &gotoblas_SANDYBRIDGE; + if (model == 10) { + if(support_avx()) + return &gotoblas_SANDYBRIDGE; + else{ + fprintf(stderr, "OpenBLAS : Your OS doesn't support AVX. Use Nehalem kernels.\n"); + return &gotoblas_NEHALEM; //OS doesn't support AVX. Use old kernels. + } + } return NULL; } case 0xf: @@ -239,7 +272,7 @@ void gotoblas_dynamic_init(void) { if (gotoblas && gotoblas -> init) { gotoblas -> init(); } else { - fprintf(stderr, "GotoBLAS : Architecture Initialization failed. No initialization function found.\n"); + fprintf(stderr, "OpenBLAS : Architecture Initialization failed. No initialization function found.\n"); exit(1); }