Refs #139. Added NO_AVX flag to use old Nehalem kernels on Sandy Bridge.
For example, make NO_AVX=1 or make DYNAMIC_ARCH=1 NO_AVX=1
This commit is contained in:
parent
9419a43a7f
commit
f76a384841
|
@ -71,6 +71,10 @@ VERSION = 0.2.3
|
|||
# If you want to disable CPU/Memory affinity on Linux.
|
||||
# NO_AFFINITY = 1
|
||||
|
||||
# Don't use AVX kernel on Sandy Bridge. It is compatible with old compilers
|
||||
# and OS. However, the performance is low.
|
||||
# NO_AVX = 1
|
||||
|
||||
# If you would like to know minute performance report of GotoBLAS.
|
||||
# FUNCTION_PROFILE = 1
|
||||
|
||||
|
|
|
@ -57,6 +57,10 @@ GEMM_MULTITHREAD_THRESHOLD=50
|
|||
endif
|
||||
GETARCH_FLAGS += -DGEMM_MULTITHREAD_THRESHOLD=$(GEMM_MULTITHREAD_THRESHOLD)
|
||||
|
||||
ifeq ($(NO_AVX), 1)
|
||||
GETARCH_FLAGS += -DNO_AVX
|
||||
endif
|
||||
|
||||
# This operation is expensive, so execution should be once.
|
||||
ifndef GOTOBLAS_MAKEFILE
|
||||
export GOTOBLAS_MAKEFILE = 1
|
||||
|
@ -247,11 +251,17 @@ endif
|
|||
ifeq ($(DYNAMIC_ARCH), 1)
|
||||
ifeq ($(ARCH), x86)
|
||||
DYNAMIC_CORE = KATMAI COPPERMINE NORTHWOOD PRESCOTT BANIAS \
|
||||
CORE2 PENRYN DUNNINGTON NEHALEM SANDYBRIDGE ATHLON OPTERON OPTERON_SSE3 BARCELONA BOBCAT ATOM NANO
|
||||
CORE2 PENRYN DUNNINGTON NEHALEM ATHLON OPTERON OPTERON_SSE3 BARCELONA BOBCAT ATOM NANO
|
||||
ifneq ($(NO_AVX), 1)
|
||||
DYNAMIC_CORE += SANDYBRIDGE
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(ARCH), x86_64)
|
||||
DYNAMIC_CORE = PRESCOTT CORE2 PENRYN DUNNINGTON NEHALEM SANDYBRIDGE OPTERON OPTERON_SSE3 BARCELONA BOBCAT ATOM NANO
|
||||
DYNAMIC_CORE = PRESCOTT CORE2 PENRYN DUNNINGTON NEHALEM OPTERON OPTERON_SSE3 BARCELONA BOBCAT ATOM NANO
|
||||
ifneq ($(NO_AVX), 1)
|
||||
DYNAMIC_CORE += SANDYBRIDGE
|
||||
endif
|
||||
endif
|
||||
|
||||
ifndef DYNAMIC_CORE
|
||||
|
@ -562,6 +572,10 @@ ifeq ($(NO_LAPACKE), 1)
|
|||
CCOMMON_OPT += -DNO_LAPACKE
|
||||
endif
|
||||
|
||||
ifeq ($(NO_AVX), 1)
|
||||
CCOMMON_OPT += -DNO_AVX
|
||||
endif
|
||||
|
||||
ifdef SMP
|
||||
CCOMMON_OPT += -DSMP_SERVER
|
||||
|
||||
|
|
|
@ -40,6 +40,11 @@
|
|||
#include <string.h>
|
||||
#include "cpuid.h"
|
||||
|
||||
#ifdef NO_AVX
|
||||
#define CPUTYPE_SANDYBRIDGE CPUTYPE_NEHALEM
|
||||
#define CORE_SANDYBRIDGE CORE_NEHALEM
|
||||
#endif
|
||||
|
||||
#ifndef CPUIDEMU
|
||||
|
||||
#if defined(__APPLE__) && defined(__i386__)
|
||||
|
@ -189,7 +194,9 @@ int get_cputype(int gettype){
|
|||
if ((ecx & (1 << 9)) != 0) feature |= HAVE_SSSE3;
|
||||
if ((ecx & (1 << 19)) != 0) feature |= HAVE_SSE4_1;
|
||||
if ((ecx & (1 << 20)) != 0) feature |= HAVE_SSE4_2;
|
||||
#ifndef NO_AVX
|
||||
if ((ecx & (1 << 28)) != 0) feature |= HAVE_AVX;
|
||||
#endif
|
||||
|
||||
if (have_excpuid() >= 0x01) {
|
||||
cpuid(0x80000001, &eax, &ebx, &ecx, &edx);
|
||||
|
|
|
@ -60,8 +60,14 @@ extern gotoblas_t gotoblas_NEHALEM;
|
|||
extern gotoblas_t gotoblas_OPTERON;
|
||||
extern gotoblas_t gotoblas_OPTERON_SSE3;
|
||||
extern gotoblas_t gotoblas_BARCELONA;
|
||||
extern gotoblas_t gotoblas_SANDYBRIDGE;
|
||||
extern gotoblas_t gotoblas_BOBCAT;
|
||||
#ifndef NO_AVX
|
||||
extern gotoblas_t gotoblas_SANDYBRIDGE;
|
||||
#else
|
||||
//Use NEHALEM kernels for sandy bridge
|
||||
#define gotoblas_SANDYBRIDGE gotoblas_NEHALEM
|
||||
#endif
|
||||
|
||||
|
||||
#define VENDOR_INTEL 1
|
||||
#define VENDOR_AMD 2
|
||||
|
|
Loading…
Reference in New Issue