Refs #139. Added NO_AVX flag to use old Nehalem kernels on Sandy Bridge.
For example, make NO_AVX=1 or make DYNAMIC_ARCH=1 NO_AVX=1
This commit is contained in:
parent
9419a43a7f
commit
f76a384841
|
@ -71,6 +71,10 @@ VERSION = 0.2.3
|
||||||
# If you want to disable CPU/Memory affinity on Linux.
|
# If you want to disable CPU/Memory affinity on Linux.
|
||||||
# NO_AFFINITY = 1
|
# NO_AFFINITY = 1
|
||||||
|
|
||||||
|
# Don't use AVX kernel on Sandy Bridge. It is compatible with old compilers
|
||||||
|
# and OS. However, the performance is low.
|
||||||
|
# NO_AVX = 1
|
||||||
|
|
||||||
# If you would like to know minute performance report of GotoBLAS.
|
# If you would like to know minute performance report of GotoBLAS.
|
||||||
# FUNCTION_PROFILE = 1
|
# FUNCTION_PROFILE = 1
|
||||||
|
|
||||||
|
|
|
@ -57,6 +57,10 @@ GEMM_MULTITHREAD_THRESHOLD=50
|
||||||
endif
|
endif
|
||||||
GETARCH_FLAGS += -DGEMM_MULTITHREAD_THRESHOLD=$(GEMM_MULTITHREAD_THRESHOLD)
|
GETARCH_FLAGS += -DGEMM_MULTITHREAD_THRESHOLD=$(GEMM_MULTITHREAD_THRESHOLD)
|
||||||
|
|
||||||
|
ifeq ($(NO_AVX), 1)
|
||||||
|
GETARCH_FLAGS += -DNO_AVX
|
||||||
|
endif
|
||||||
|
|
||||||
# This operation is expensive, so execution should be once.
|
# This operation is expensive, so execution should be once.
|
||||||
ifndef GOTOBLAS_MAKEFILE
|
ifndef GOTOBLAS_MAKEFILE
|
||||||
export GOTOBLAS_MAKEFILE = 1
|
export GOTOBLAS_MAKEFILE = 1
|
||||||
|
@ -247,11 +251,17 @@ endif
|
||||||
ifeq ($(DYNAMIC_ARCH), 1)
|
ifeq ($(DYNAMIC_ARCH), 1)
|
||||||
ifeq ($(ARCH), x86)
|
ifeq ($(ARCH), x86)
|
||||||
DYNAMIC_CORE = KATMAI COPPERMINE NORTHWOOD PRESCOTT BANIAS \
|
DYNAMIC_CORE = KATMAI COPPERMINE NORTHWOOD PRESCOTT BANIAS \
|
||||||
CORE2 PENRYN DUNNINGTON NEHALEM SANDYBRIDGE ATHLON OPTERON OPTERON_SSE3 BARCELONA BOBCAT ATOM NANO
|
CORE2 PENRYN DUNNINGTON NEHALEM ATHLON OPTERON OPTERON_SSE3 BARCELONA BOBCAT ATOM NANO
|
||||||
|
ifneq ($(NO_AVX), 1)
|
||||||
|
DYNAMIC_CORE += SANDYBRIDGE
|
||||||
|
endif
|
||||||
endif
|
endif
|
||||||
|
|
||||||
ifeq ($(ARCH), x86_64)
|
ifeq ($(ARCH), x86_64)
|
||||||
DYNAMIC_CORE = PRESCOTT CORE2 PENRYN DUNNINGTON NEHALEM SANDYBRIDGE OPTERON OPTERON_SSE3 BARCELONA BOBCAT ATOM NANO
|
DYNAMIC_CORE = PRESCOTT CORE2 PENRYN DUNNINGTON NEHALEM OPTERON OPTERON_SSE3 BARCELONA BOBCAT ATOM NANO
|
||||||
|
ifneq ($(NO_AVX), 1)
|
||||||
|
DYNAMIC_CORE += SANDYBRIDGE
|
||||||
|
endif
|
||||||
endif
|
endif
|
||||||
|
|
||||||
ifndef DYNAMIC_CORE
|
ifndef DYNAMIC_CORE
|
||||||
|
@ -562,6 +572,10 @@ ifeq ($(NO_LAPACKE), 1)
|
||||||
CCOMMON_OPT += -DNO_LAPACKE
|
CCOMMON_OPT += -DNO_LAPACKE
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
ifeq ($(NO_AVX), 1)
|
||||||
|
CCOMMON_OPT += -DNO_AVX
|
||||||
|
endif
|
||||||
|
|
||||||
ifdef SMP
|
ifdef SMP
|
||||||
CCOMMON_OPT += -DSMP_SERVER
|
CCOMMON_OPT += -DSMP_SERVER
|
||||||
|
|
||||||
|
|
|
@ -40,6 +40,11 @@
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
#include "cpuid.h"
|
#include "cpuid.h"
|
||||||
|
|
||||||
|
#ifdef NO_AVX
|
||||||
|
#define CPUTYPE_SANDYBRIDGE CPUTYPE_NEHALEM
|
||||||
|
#define CORE_SANDYBRIDGE CORE_NEHALEM
|
||||||
|
#endif
|
||||||
|
|
||||||
#ifndef CPUIDEMU
|
#ifndef CPUIDEMU
|
||||||
|
|
||||||
#if defined(__APPLE__) && defined(__i386__)
|
#if defined(__APPLE__) && defined(__i386__)
|
||||||
|
@ -189,7 +194,9 @@ int get_cputype(int gettype){
|
||||||
if ((ecx & (1 << 9)) != 0) feature |= HAVE_SSSE3;
|
if ((ecx & (1 << 9)) != 0) feature |= HAVE_SSSE3;
|
||||||
if ((ecx & (1 << 19)) != 0) feature |= HAVE_SSE4_1;
|
if ((ecx & (1 << 19)) != 0) feature |= HAVE_SSE4_1;
|
||||||
if ((ecx & (1 << 20)) != 0) feature |= HAVE_SSE4_2;
|
if ((ecx & (1 << 20)) != 0) feature |= HAVE_SSE4_2;
|
||||||
|
#ifndef NO_AVX
|
||||||
if ((ecx & (1 << 28)) != 0) feature |= HAVE_AVX;
|
if ((ecx & (1 << 28)) != 0) feature |= HAVE_AVX;
|
||||||
|
#endif
|
||||||
|
|
||||||
if (have_excpuid() >= 0x01) {
|
if (have_excpuid() >= 0x01) {
|
||||||
cpuid(0x80000001, &eax, &ebx, &ecx, &edx);
|
cpuid(0x80000001, &eax, &ebx, &ecx, &edx);
|
||||||
|
|
|
@ -60,8 +60,14 @@ extern gotoblas_t gotoblas_NEHALEM;
|
||||||
extern gotoblas_t gotoblas_OPTERON;
|
extern gotoblas_t gotoblas_OPTERON;
|
||||||
extern gotoblas_t gotoblas_OPTERON_SSE3;
|
extern gotoblas_t gotoblas_OPTERON_SSE3;
|
||||||
extern gotoblas_t gotoblas_BARCELONA;
|
extern gotoblas_t gotoblas_BARCELONA;
|
||||||
extern gotoblas_t gotoblas_SANDYBRIDGE;
|
|
||||||
extern gotoblas_t gotoblas_BOBCAT;
|
extern gotoblas_t gotoblas_BOBCAT;
|
||||||
|
#ifndef NO_AVX
|
||||||
|
extern gotoblas_t gotoblas_SANDYBRIDGE;
|
||||||
|
#else
|
||||||
|
//Use NEHALEM kernels for sandy bridge
|
||||||
|
#define gotoblas_SANDYBRIDGE gotoblas_NEHALEM
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
#define VENDOR_INTEL 1
|
#define VENDOR_INTEL 1
|
||||||
#define VENDOR_AMD 2
|
#define VENDOR_AMD 2
|
||||||
|
|
Loading…
Reference in New Issue