diff --git a/Makefile b/Makefile index 380ba1ce8..56b4426f8 100644 --- a/Makefile +++ b/Makefile @@ -153,6 +153,9 @@ ifeq ($(DYNAMIC_ARCH), 1) do $(MAKE) GOTOBLAS_MAKEFILE= -C kernel TARGET_CORE=$$d kernel || exit 1 ;\ done @echo DYNAMIC_ARCH=1 >> Makefile.conf_last +ifeq ($(DYNAMIC_OLDER), 1) + @echo DYNAMIC_OLDER=1 >> Makefile.conf_last +endif endif ifdef USE_THREAD @echo USE_THREAD=$(USE_THREAD) >> Makefile.conf_last diff --git a/Makefile.install b/Makefile.install index 21c3c9e22..c51c8a021 100644 --- a/Makefile.install +++ b/Makefile.install @@ -98,7 +98,7 @@ endif @echo Generating openblas.pc in "$(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)" @echo 'libdir='$(OPENBLAS_LIBRARY_DIR) > "$(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)/openblas.pc" @echo 'includedir='$(OPENBLAS_INCLUDE_DIR) >> "$(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)/openblas.pc" - @echo 'openblas_config= USE_64BITINT='$(USE_64BITINT) 'DYNAMIC_ARCH='$(DYNAMIC_ARCH) 'NO_CBLAS='$(NO_CBLAS) 'NO_LAPACK='$(NO_LAPACK) 'NO_LAPACKE='$(NO_LAPACKE) 'NO_AFFINITY='$(NO_AFFINITY) 'USE_OPENMP='$(USE_OPENMP) $(CORE) 'MAX_THREADS='$(NUM_THREADS)>> "$(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)/openblas.pc" + @echo 'openblas_config= USE_64BITINT='$(USE_64BITINT) 'DYNAMIC_ARCH='$(DYNAMIC_ARCH) 'DYNAMIC_OLDER='$(DYNAMIC_OLDER) 'NO_CBLAS='$(NO_CBLAS) 'NO_LAPACK='$(NO_LAPACK) 'NO_LAPACKE='$(NO_LAPACKE) 'NO_AFFINITY='$(NO_AFFINITY) 'USE_OPENMP='$(USE_OPENMP) $(CORE) 'MAX_THREADS='$(NUM_THREADS)>> "$(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)/openblas.pc" @echo 'version='$(VERSION) >> "$(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)/openblas.pc" @echo 'extralib='$(EXTRALIB) >> "$(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)/openblas.pc" @cat openblas.pc.in >> "$(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)/openblas.pc" diff --git a/Makefile.rule b/Makefile.rule index 1b4b8eb63..5c03d0195 100644 --- a/Makefile.rule +++ b/Makefile.rule @@ -17,6 +17,11 @@ VERSION = 0.3.1.dev # If you want to support multiple architecture in one binary # DYNAMIC_ARCH = 1 +# If you want the full list of x86_64 architectures supported in DYNAMIC_ARCH +# mode (including individual optimizied codes for PENRYN, DUNNINGTON, OPTERON, +# OPTERON_SSE3, ATOM and NANO rather than fallbacks to older architectures) +# DYNAMIC_OLDER = 1 + # C compiler including binary type(32bit / 64bit). Default is gcc. # Don't use Intel Compiler or PGI, it won't generate right codes as I expect. # CC = gcc diff --git a/Makefile.system b/Makefile.system index eaf3e9889..62ba0e466 100644 --- a/Makefile.system +++ b/Makefile.system @@ -472,7 +472,18 @@ DYNAMIC_CORE = KATMAI COPPERMINE NORTHWOOD PRESCOTT BANIAS \ endif ifeq ($(ARCH), x86_64) -DYNAMIC_CORE = PRESCOTT CORE2 PENRYN DUNNINGTON NEHALEM OPTERON OPTERON_SSE3 BARCELONA BOBCAT ATOM NANO +DYNAMIC_CORE = PRESCOTT CORE2 +ifeq ($(DYNAMIC_OLDER), 1) +DYNAMIC_CORE += PENRYN DUNNINGTON +endif +DYNAMIC_CORE += NEHALEM +ifeq ($(DYNAMIC_OLDER), 1) +DYNAMIC_CORE += OPTERON OPTERON_SSE3 +endif +DYNAMIC_CORE += BARCELONA +ifeq ($(DYNAMIC_OLDER), 1) +DYNAMIC_CORE += BOBCAT ATOM NANO +endif ifneq ($(NO_AVX), 1) DYNAMIC_CORE += SANDYBRIDGE BULLDOZER PILEDRIVER STEAMROLLER EXCAVATOR endif @@ -917,6 +928,10 @@ ifeq ($(DYNAMIC_ARCH), 1) CCOMMON_OPT += -DDYNAMIC_ARCH endif +ifeq ($(DYNAMIC_OLDER), 1) +CCOMMON_OPT += -DDYNAMIC_OLDER +endif + ifeq ($(NO_LAPACK), 1) CCOMMON_OPT += -DNO_LAPACK #Disable LAPACK C interface diff --git a/cmake/arch.cmake b/cmake/arch.cmake index 527d2bec6..52fb64eaa 100644 --- a/cmake/arch.cmake +++ b/cmake/arch.cmake @@ -49,7 +49,18 @@ if (DYNAMIC_ARCH) endif () if (X86_64) - set(DYNAMIC_CORE PRESCOTT CORE2 PENRYN DUNNINGTON NEHALEM OPTERON OPTERON_SSE3 BARCELONA BOBCAT ATOM NANO) + set(DYNAMIC_CORE PRESCOTT CORE2) + if (DYNAMIC_OLDER) + set (DYNAMIC_CORE ${DYNAMIC_CORE} PENRYN DUNNINGTON) + endif () + set (DYNAMIC_CORE ${DYNAMIC_CORE} NEHALEM) + if (DYNAMIC_OLDER) + set (DYNAMIC_CORE ${DYNAMIC_CORE} OPTERON OPTERON_SSE3) + endif () + set (DYNAMIC_CORE ${DYNAMIC_CORE} BARCELONA) + if (DYNAMIC_OLDER) + set (DYNAMIC_CORE ${DYNAMIC_CORE} BOBCAT ATOM NANO) + endif () if (NOT NO_AVX) set(DYNAMIC_CORE ${DYNAMIC_CORE} SANDYBRIDGE BULLDOZER PILEDRIVER STEAMROLLER EXCAVATOR) endif () diff --git a/cmake/openblas.pc.in b/cmake/openblas.pc.in index 35973b09b..ca88a6d5f 100644 --- a/cmake/openblas.pc.in +++ b/cmake/openblas.pc.in @@ -1,7 +1,7 @@ libdir=@CMAKE_INSTALL_FULL_LIBDIR@ includedir=@CMAKE_INSTALL_FULL_INCLUDEDIR@ -openblas_config=USE_64BITINT=@USE_64BITINT@ NO_CBLAS=@NO_CBLAS@ NO_LAPACK=@NO_LAPACK@ NO_LAPACKE=@NO_LAPACKE@ DYNAMIC_ARCH=@DYNAMIC_ARCH@ NO_AFFINITY=@NO_AFFINITY@ USE_OPENMP=@USE_OPENMP@ @CORE@ MAX_THREADS=@NUM_THREADS@ +openblas_config=USE_64BITINT=@USE_64BITINT@ NO_CBLAS=@NO_CBLAS@ NO_LAPACK=@NO_LAPACK@ NO_LAPACKE=@NO_LAPACKE@ DYNAMIC_ARCH=@DYNAMIC_ARCH@ DYNAMIC_OLDER=@DYNAMIC_OLDER@ NO_AFFINITY=@NO_AFFINITY@ USE_OPENMP=@USE_OPENMP@ @CORE@ MAX_THREADS=@NUM_THREADS@ Name: OpenBLAS Description: OpenBLAS is an optimized BLAS library based on GotoBLAS2 1.13 BSD version Version: @OPENBLAS_VERSION@ diff --git a/cmake/system.cmake b/cmake/system.cmake index c21fe7c14..48e8f75bc 100644 --- a/cmake/system.cmake +++ b/cmake/system.cmake @@ -163,6 +163,9 @@ endif () if (DYNAMIC_ARCH) set(CCOMMON_OPT "${CCOMMON_OPT} -DDYNAMIC_ARCH") + if (DYNAMIC_OLDER) + set(CCOMMON_OPT "${CCOMMON_OPT} -DDYNAMIC_OLDER") + endif () endif () if (NO_LAPACK) diff --git a/driver/others/dynamic.c b/driver/others/dynamic.c index acb2d8b8c..4271c0a0d 100644 --- a/driver/others/dynamic.c +++ b/driver/others/dynamic.c @@ -56,16 +56,27 @@ EXTERN gotoblas_t gotoblas_BANIAS; EXTERN gotoblas_t gotoblas_ATHLON; extern gotoblas_t gotoblas_PRESCOTT; +extern gotoblas_t gotoblas_CORE2; +extern gotoblas_t gotoblas_NEHALEM; +extern gotoblas_t gotoblas_BARCELONA; +#ifdef DYNAMIC_OLDER extern gotoblas_t gotoblas_ATOM; extern gotoblas_t gotoblas_NANO; -extern gotoblas_t gotoblas_CORE2; extern gotoblas_t gotoblas_PENRYN; extern gotoblas_t gotoblas_DUNNINGTON; -extern gotoblas_t gotoblas_NEHALEM; extern gotoblas_t gotoblas_OPTERON; extern gotoblas_t gotoblas_OPTERON_SSE3; -extern gotoblas_t gotoblas_BARCELONA; extern gotoblas_t gotoblas_BOBCAT; +#else +#define gotoblas_ATOM gotoblas_NEHALEM +#define gotoblas_NANO gotoblas_NEHALEM +#define gotoblas_PENRYN gotoblas_CORE2 +#define gotoblas_DUNNINGTON gotoblas_CORE2 +#define gotoblas_OPTERON gotoblas_CORE2 +#define gotoblas_OPTERON_SSE3 gotoblas_CORE2 +#define gotoblas_BOBCAT gotoblas_CORE2 +#endif + #ifndef NO_AVX extern gotoblas_t gotoblas_SANDYBRIDGE; extern gotoblas_t gotoblas_BULLDOZER;