Refs #139. Added NO_AVX flag to use old Nehalem kernels on Sandy Bridge.

For example, make NO_AVX=1 or make DYNAMIC_ARCH=1 NO_AVX=1
This commit is contained in:
Zhang Xianyi 2012-09-17 23:24:04 +08:00
parent 9419a43a7f
commit f76a384841
4 changed files with 34 additions and 3 deletions

View File

@ -71,6 +71,10 @@ VERSION = 0.2.3
# If you want to disable CPU/Memory affinity on Linux.
# NO_AFFINITY = 1
# Don't use AVX kernel on Sandy Bridge. It is compatible with old compilers
# and OS. However, the performance is low.
# NO_AVX = 1
# If you would like to know minute performance report of GotoBLAS.
# FUNCTION_PROFILE = 1

View File

@ -57,6 +57,10 @@ GEMM_MULTITHREAD_THRESHOLD=50
endif
GETARCH_FLAGS += -DGEMM_MULTITHREAD_THRESHOLD=$(GEMM_MULTITHREAD_THRESHOLD)
ifeq ($(NO_AVX), 1)
GETARCH_FLAGS += -DNO_AVX
endif
# This operation is expensive, so execution should be once.
ifndef GOTOBLAS_MAKEFILE
export GOTOBLAS_MAKEFILE = 1
@ -247,11 +251,17 @@ endif
ifeq ($(DYNAMIC_ARCH), 1)
ifeq ($(ARCH), x86)
DYNAMIC_CORE = KATMAI COPPERMINE NORTHWOOD PRESCOTT BANIAS \
CORE2 PENRYN DUNNINGTON NEHALEM SANDYBRIDGE ATHLON OPTERON OPTERON_SSE3 BARCELONA BOBCAT ATOM NANO
CORE2 PENRYN DUNNINGTON NEHALEM ATHLON OPTERON OPTERON_SSE3 BARCELONA BOBCAT ATOM NANO
ifneq ($(NO_AVX), 1)
DYNAMIC_CORE += SANDYBRIDGE
endif
endif
ifeq ($(ARCH), x86_64)
DYNAMIC_CORE = PRESCOTT CORE2 PENRYN DUNNINGTON NEHALEM SANDYBRIDGE OPTERON OPTERON_SSE3 BARCELONA BOBCAT ATOM NANO
DYNAMIC_CORE = PRESCOTT CORE2 PENRYN DUNNINGTON NEHALEM OPTERON OPTERON_SSE3 BARCELONA BOBCAT ATOM NANO
ifneq ($(NO_AVX), 1)
DYNAMIC_CORE += SANDYBRIDGE
endif
endif
ifndef DYNAMIC_CORE
@ -562,6 +572,10 @@ ifeq ($(NO_LAPACKE), 1)
CCOMMON_OPT += -DNO_LAPACKE
endif
ifeq ($(NO_AVX), 1)
CCOMMON_OPT += -DNO_AVX
endif
ifdef SMP
CCOMMON_OPT += -DSMP_SERVER

View File

@ -40,6 +40,11 @@
#include <string.h>
#include "cpuid.h"
#ifdef NO_AVX
#define CPUTYPE_SANDYBRIDGE CPUTYPE_NEHALEM
#define CORE_SANDYBRIDGE CORE_NEHALEM
#endif
#ifndef CPUIDEMU
#if defined(__APPLE__) && defined(__i386__)
@ -189,7 +194,9 @@ int get_cputype(int gettype){
if ((ecx & (1 << 9)) != 0) feature |= HAVE_SSSE3;
if ((ecx & (1 << 19)) != 0) feature |= HAVE_SSE4_1;
if ((ecx & (1 << 20)) != 0) feature |= HAVE_SSE4_2;
#ifndef NO_AVX
if ((ecx & (1 << 28)) != 0) feature |= HAVE_AVX;
#endif
if (have_excpuid() >= 0x01) {
cpuid(0x80000001, &eax, &ebx, &ecx, &edx);

View File

@ -60,8 +60,14 @@ extern gotoblas_t gotoblas_NEHALEM;
extern gotoblas_t gotoblas_OPTERON;
extern gotoblas_t gotoblas_OPTERON_SSE3;
extern gotoblas_t gotoblas_BARCELONA;
extern gotoblas_t gotoblas_SANDYBRIDGE;
extern gotoblas_t gotoblas_BOBCAT;
#ifndef NO_AVX
extern gotoblas_t gotoblas_SANDYBRIDGE;
#else
//Use NEHALEM kernels for sandy bridge
#define gotoblas_SANDYBRIDGE gotoblas_NEHALEM
#endif
#define VENDOR_INTEL 1
#define VENDOR_AMD 2