diff --git a/Makefile.rule b/Makefile.rule index 57094377a..37b6c8acc 100644 --- a/Makefile.rule +++ b/Makefile.rule @@ -71,6 +71,10 @@ VERSION = 0.2.3 # If you want to disable CPU/Memory affinity on Linux. # NO_AFFINITY = 1 +# Don't use AVX kernel on Sandy Bridge. It is compatible with old compilers +# and OS. However, the performance is low. +# NO_AVX = 1 + # If you would like to know minute performance report of GotoBLAS. # FUNCTION_PROFILE = 1 diff --git a/Makefile.system b/Makefile.system index e5279d407..1db4d9d2f 100644 --- a/Makefile.system +++ b/Makefile.system @@ -57,6 +57,10 @@ GEMM_MULTITHREAD_THRESHOLD=50 endif GETARCH_FLAGS += -DGEMM_MULTITHREAD_THRESHOLD=$(GEMM_MULTITHREAD_THRESHOLD) +ifeq ($(NO_AVX), 1) +GETARCH_FLAGS += -DNO_AVX +endif + # This operation is expensive, so execution should be once. ifndef GOTOBLAS_MAKEFILE export GOTOBLAS_MAKEFILE = 1 @@ -247,11 +251,17 @@ endif ifeq ($(DYNAMIC_ARCH), 1) ifeq ($(ARCH), x86) DYNAMIC_CORE = KATMAI COPPERMINE NORTHWOOD PRESCOTT BANIAS \ - CORE2 PENRYN DUNNINGTON NEHALEM SANDYBRIDGE ATHLON OPTERON OPTERON_SSE3 BARCELONA BOBCAT ATOM NANO + CORE2 PENRYN DUNNINGTON NEHALEM ATHLON OPTERON OPTERON_SSE3 BARCELONA BOBCAT ATOM NANO +ifneq ($(NO_AVX), 1) +DYNAMIC_CORE += SANDYBRIDGE +endif endif ifeq ($(ARCH), x86_64) -DYNAMIC_CORE = PRESCOTT CORE2 PENRYN DUNNINGTON NEHALEM SANDYBRIDGE OPTERON OPTERON_SSE3 BARCELONA BOBCAT ATOM NANO +DYNAMIC_CORE = PRESCOTT CORE2 PENRYN DUNNINGTON NEHALEM OPTERON OPTERON_SSE3 BARCELONA BOBCAT ATOM NANO +ifneq ($(NO_AVX), 1) +DYNAMIC_CORE += SANDYBRIDGE +endif endif ifndef DYNAMIC_CORE @@ -562,6 +572,10 @@ ifeq ($(NO_LAPACKE), 1) CCOMMON_OPT += -DNO_LAPACKE endif +ifeq ($(NO_AVX), 1) +CCOMMON_OPT += -DNO_AVX +endif + ifdef SMP CCOMMON_OPT += -DSMP_SERVER diff --git a/cpuid_x86.c b/cpuid_x86.c index b304cdade..79fd20e3f 100644 --- a/cpuid_x86.c +++ b/cpuid_x86.c @@ -40,6 +40,11 @@ #include #include "cpuid.h" +#ifdef NO_AVX +#define CPUTYPE_SANDYBRIDGE CPUTYPE_NEHALEM +#define CORE_SANDYBRIDGE CORE_NEHALEM +#endif + #ifndef CPUIDEMU #if defined(__APPLE__) && defined(__i386__) @@ -189,7 +194,9 @@ int get_cputype(int gettype){ if ((ecx & (1 << 9)) != 0) feature |= HAVE_SSSE3; if ((ecx & (1 << 19)) != 0) feature |= HAVE_SSE4_1; if ((ecx & (1 << 20)) != 0) feature |= HAVE_SSE4_2; +#ifndef NO_AVX if ((ecx & (1 << 28)) != 0) feature |= HAVE_AVX; +#endif if (have_excpuid() >= 0x01) { cpuid(0x80000001, &eax, &ebx, &ecx, &edx); diff --git a/driver/others/dynamic.c b/driver/others/dynamic.c index aa4b867fd..45783c517 100644 --- a/driver/others/dynamic.c +++ b/driver/others/dynamic.c @@ -60,8 +60,14 @@ extern gotoblas_t gotoblas_NEHALEM; extern gotoblas_t gotoblas_OPTERON; extern gotoblas_t gotoblas_OPTERON_SSE3; extern gotoblas_t gotoblas_BARCELONA; -extern gotoblas_t gotoblas_SANDYBRIDGE; extern gotoblas_t gotoblas_BOBCAT; +#ifndef NO_AVX +extern gotoblas_t gotoblas_SANDYBRIDGE; +#else +//Use NEHALEM kernels for sandy bridge +#define gotoblas_SANDYBRIDGE gotoblas_NEHALEM +#endif + #define VENDOR_INTEL 1 #define VENDOR_AMD 2