Merge branch 'hotfix-v0.2.8'
This commit is contained in:
commit
835293cc1a
|
@ -79,5 +79,9 @@ In chronological order:
|
||||||
* [2013-07-11] create openblas_get_parallel to retrieve information which parallelization
|
* [2013-07-11] create openblas_get_parallel to retrieve information which parallelization
|
||||||
model is used by OpenBLAS.
|
model is used by OpenBLAS.
|
||||||
|
|
||||||
|
* Sébastien Fabbro <bicatali@gentoo.org>
|
||||||
|
* [2013-07-24] Modify makefile to respect user's LDFLAGS
|
||||||
|
* [2013-07-24] Add stack markings for GNU as arch-independent for assembler files
|
||||||
|
|
||||||
* [Your name or handle] <[email or website]>
|
* [Your name or handle] <[email or website]>
|
||||||
* [Date] [Brief summary of your changes]
|
* [Date] [Brief summary of your changes]
|
||||||
|
|
|
@ -1,4 +1,16 @@
|
||||||
OpenBLAS ChangeLog
|
OpenBLAS ChangeLog
|
||||||
|
====================================================================
|
||||||
|
Version 0.2.8
|
||||||
|
01-Aug-2013
|
||||||
|
common:
|
||||||
|
* Support Open64 5.0. (#266)
|
||||||
|
* Add executable stack markings. (#262, Thank Sébastien Fabbro)
|
||||||
|
* Respect user's LDFLAGS (Thank Sébastien Fabbro)
|
||||||
|
|
||||||
|
x86/x86-64:
|
||||||
|
* Rollback bulldozer and piledriver kernels to barcelona kernels (#263)
|
||||||
|
We will fix the compuational error bug in bulldozer and piledriver kernels.
|
||||||
|
|
||||||
====================================================================
|
====================================================================
|
||||||
Version 0.2.7
|
Version 0.2.7
|
||||||
20-Jul-2013
|
20-Jul-2013
|
||||||
|
|
|
@ -1,6 +1 @@
|
||||||
COPT = -Wall -O2 # -DGEMMTEST
|
COPT = -Wall -O2 # -DGEMMTEST
|
||||||
ifdef BINARY64
|
|
||||||
else
|
|
||||||
# LDFLAGS = -m elf32ppc
|
|
||||||
LDFLAGS = -m elf_i386
|
|
||||||
endif
|
|
||||||
|
|
|
@ -17,13 +17,7 @@ endif
|
||||||
endif
|
endif
|
||||||
|
|
||||||
ifdef BINARY64
|
ifdef BINARY64
|
||||||
ifeq ($(OSNAME), Linux)
|
|
||||||
LDFLAGS = -m elf64ppc
|
|
||||||
endif
|
|
||||||
|
|
||||||
ifeq ($(OSNAME), Darwin)
|
|
||||||
LDFLAGS = -arch ppc64
|
|
||||||
endif
|
|
||||||
|
|
||||||
ifeq ($(OSNAME), AIX)
|
ifeq ($(OSNAME), AIX)
|
||||||
CCOMMON_OPT += -mpowerpc64 -maix64
|
CCOMMON_OPT += -mpowerpc64 -maix64
|
||||||
|
@ -34,17 +28,12 @@ ifeq ($(COMPILER_F77), xlf)
|
||||||
FCOMMON_OPT += -q64
|
FCOMMON_OPT += -q64
|
||||||
endif
|
endif
|
||||||
ARFLAGS = -X 64
|
ARFLAGS = -X 64
|
||||||
LDFLAGS = -b64
|
|
||||||
ASFLAGS = -a64
|
ASFLAGS = -a64
|
||||||
endif
|
endif
|
||||||
else
|
else
|
||||||
ifeq ($(OSNAME), Linux)
|
|
||||||
LDFLAGS = -m elf32ppc
|
|
||||||
endif
|
|
||||||
ifeq ($(OSNAME), AIX)
|
ifeq ($(OSNAME), AIX)
|
||||||
CCOMMON_OPT += -Wa,-a32
|
CCOMMON_OPT += -Wa,-a32
|
||||||
ARFLAGS = -X 32
|
ARFLAGS = -X 32
|
||||||
LDFLAGS = -b32
|
|
||||||
ASFLAGS = -a32
|
ASFLAGS = -a32
|
||||||
endif
|
endif
|
||||||
endif
|
endif
|
||||||
|
|
|
@ -3,7 +3,7 @@
|
||||||
#
|
#
|
||||||
|
|
||||||
# This library's version
|
# This library's version
|
||||||
VERSION = 0.2.7
|
VERSION = 0.2.8
|
||||||
|
|
||||||
# If you set the suffix, the library name will be libopenblas_$(LIBNAMESUFFIX).a
|
# If you set the suffix, the library name will be libopenblas_$(LIBNAMESUFFIX).a
|
||||||
# and libopenblas_$(LIBNAMESUFFIX).so. Meanwhile, the soname in shared library
|
# and libopenblas_$(LIBNAMESUFFIX).so. Meanwhile, the soname in shared library
|
||||||
|
|
|
@ -10,7 +10,6 @@ endif
|
||||||
ifeq ($(COMPILER_F77), f90)
|
ifeq ($(COMPILER_F77), f90)
|
||||||
FCOMMON_OPT += -xarch=v9
|
FCOMMON_OPT += -xarch=v9
|
||||||
endif
|
endif
|
||||||
LDFLAGS = -64
|
|
||||||
else
|
else
|
||||||
|
|
||||||
CCOMMON_OPT += -mcpu=v9
|
CCOMMON_OPT += -mcpu=v9
|
||||||
|
|
|
@ -324,14 +324,16 @@ ifeq ($(ARCH), x86)
|
||||||
DYNAMIC_CORE = KATMAI COPPERMINE NORTHWOOD PRESCOTT BANIAS \
|
DYNAMIC_CORE = KATMAI COPPERMINE NORTHWOOD PRESCOTT BANIAS \
|
||||||
CORE2 PENRYN DUNNINGTON NEHALEM ATHLON OPTERON OPTERON_SSE3 BARCELONA BOBCAT ATOM NANO
|
CORE2 PENRYN DUNNINGTON NEHALEM ATHLON OPTERON OPTERON_SSE3 BARCELONA BOBCAT ATOM NANO
|
||||||
ifneq ($(NO_AVX), 1)
|
ifneq ($(NO_AVX), 1)
|
||||||
DYNAMIC_CORE += SANDYBRIDGE BULLDOZER PILEDRIVER
|
DYNAMIC_CORE += SANDYBRIDGE
|
||||||
|
#BULLDOZER PILEDRIVER
|
||||||
endif
|
endif
|
||||||
endif
|
endif
|
||||||
|
|
||||||
ifeq ($(ARCH), x86_64)
|
ifeq ($(ARCH), x86_64)
|
||||||
DYNAMIC_CORE = PRESCOTT CORE2 PENRYN DUNNINGTON NEHALEM OPTERON OPTERON_SSE3 BARCELONA BOBCAT ATOM NANO
|
DYNAMIC_CORE = PRESCOTT CORE2 PENRYN DUNNINGTON NEHALEM OPTERON OPTERON_SSE3 BARCELONA BOBCAT ATOM NANO
|
||||||
ifneq ($(NO_AVX), 1)
|
ifneq ($(NO_AVX), 1)
|
||||||
DYNAMIC_CORE += SANDYBRIDGE BULLDOZER PILEDRIVER
|
DYNAMIC_CORE += SANDYBRIDGE
|
||||||
|
#BULLDOZER PILEDRIVER
|
||||||
endif
|
endif
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
@ -895,6 +897,7 @@ export CC
|
||||||
export FC
|
export FC
|
||||||
export BU
|
export BU
|
||||||
export FU
|
export FU
|
||||||
|
export NEED2UNDERSCORES
|
||||||
export USE_THREAD
|
export USE_THREAD
|
||||||
export NUM_THREADS
|
export NUM_THREADS
|
||||||
export NUM_CORES
|
export NUM_CORES
|
||||||
|
|
|
@ -1,8 +1,5 @@
|
||||||
# COMPILER_PREFIX = mingw32-
|
# COMPILER_PREFIX = mingw32-
|
||||||
|
|
||||||
ifeq ($(OSNAME), Linux)
|
|
||||||
LDFLAGS = -melf_i386
|
|
||||||
endif
|
|
||||||
|
|
||||||
ifeq ($(OSNAME), Interix)
|
ifeq ($(OSNAME), Interix)
|
||||||
ARFLAGS = -m x86
|
ARFLAGS = -m x86
|
||||||
|
|
|
@ -2,25 +2,12 @@
|
||||||
|
|
||||||
ifeq ($(OSNAME), SunOS)
|
ifeq ($(OSNAME), SunOS)
|
||||||
ifdef BINARY64
|
ifdef BINARY64
|
||||||
LDFLAGS = -64
|
|
||||||
ifeq ($(F_COMPILER), SUN)
|
ifeq ($(F_COMPILER), SUN)
|
||||||
FCOMMON_OPT += -m64
|
FCOMMON_OPT += -m64
|
||||||
endif
|
endif
|
||||||
endif
|
endif
|
||||||
endif
|
endif
|
||||||
|
|
||||||
ifeq ($(OSNAME), FreeBSD)
|
|
||||||
LDFLAGS = -m elf_x86_64_fbsd
|
|
||||||
endif
|
|
||||||
|
|
||||||
ifeq ($(OSNAME), Linux)
|
|
||||||
LDFLAGS = -m elf_x86_64
|
|
||||||
endif
|
|
||||||
|
|
||||||
ifeq ($(OSNAME), Darwin)
|
|
||||||
LDFLAGS =
|
|
||||||
endif
|
|
||||||
|
|
||||||
ifeq ($(OSNAME), Interix)
|
ifeq ($(OSNAME), Interix)
|
||||||
ARFLAGS = -m x64
|
ARFLAGS = -m x64
|
||||||
endif
|
endif
|
||||||
|
|
|
@ -150,9 +150,17 @@ REALNAME:
|
||||||
#define PROFCODE .prologue 0
|
#define PROFCODE .prologue 0
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#if defined(__linux__) && defined(__ELF__)
|
||||||
|
#define GNUSTACK .section .note.GNU-stack,"",%progbits
|
||||||
|
#else
|
||||||
|
#define GNUSTACK
|
||||||
|
#endif
|
||||||
|
|
||||||
#define EPILOGUE \
|
#define EPILOGUE \
|
||||||
.end REALNAME; \
|
.end REALNAME; \
|
||||||
.ident VERSION
|
.ident VERSION; \
|
||||||
|
GNUSTACK
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef DOUBLE
|
#ifdef DOUBLE
|
||||||
|
|
|
@ -379,8 +379,15 @@ REALNAME:
|
||||||
#define PROFCODE
|
#define PROFCODE
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#if defined(__linux__) && defined(__ELF__)
|
||||||
|
#define GNUSTACK .section .note.GNU-stack,"",%progbits
|
||||||
|
#else
|
||||||
|
#define GNUSTACK
|
||||||
|
#endif
|
||||||
|
|
||||||
#define EPILOGUE \
|
#define EPILOGUE \
|
||||||
.endp REALNAME
|
.endp REALNAME ; \
|
||||||
|
GNUSTACK
|
||||||
|
|
||||||
#define START_ADDRESS 0x20000fc800000000UL
|
#define START_ADDRESS 0x20000fc800000000UL
|
||||||
|
|
||||||
|
|
|
@ -235,10 +235,17 @@ REALNAME: ;\
|
||||||
.set noreorder ;\
|
.set noreorder ;\
|
||||||
.set nomacro
|
.set nomacro
|
||||||
|
|
||||||
|
#if defined(__linux__) && defined(__ELF__)
|
||||||
|
#define GNUSTACK .section .note.GNU-stack,"",%progbits
|
||||||
|
#else
|
||||||
|
#define GNUSTACK
|
||||||
|
#endif
|
||||||
|
|
||||||
#define EPILOGUE \
|
#define EPILOGUE \
|
||||||
.set macro ;\
|
.set macro ;\
|
||||||
.set reorder ;\
|
.set reorder ;\
|
||||||
.end REALNAME
|
.end REALNAME ;\
|
||||||
|
GNUSTACK
|
||||||
|
|
||||||
#define PROFCODE
|
#define PROFCODE
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -199,8 +199,17 @@ static __inline int blas_quickdivide(blasint x, blasint y){
|
||||||
.type REALNAME, #function; \
|
.type REALNAME, #function; \
|
||||||
.proc 07; \
|
.proc 07; \
|
||||||
REALNAME:;
|
REALNAME:;
|
||||||
|
|
||||||
|
#if defined(__linux__) && defined(__ELF__)
|
||||||
|
#define GNUSTACK .section .note.GNU-stack,"",%progbits
|
||||||
|
#else
|
||||||
|
#define GNUSTACK
|
||||||
|
#endif
|
||||||
|
|
||||||
#define EPILOGUE \
|
#define EPILOGUE \
|
||||||
.size REALNAME, .-REALNAME
|
.size REALNAME, .-REALNAME; \
|
||||||
|
GNUSTACK
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -301,7 +301,9 @@ REALNAME:
|
||||||
#define PROFCODE
|
#define PROFCODE
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#define EPILOGUE .size REALNAME, .-REALNAME
|
#define EPILOGUE \
|
||||||
|
.size REALNAME, .-REALNAME; \
|
||||||
|
.section .note.GNU-stack,"",%progbits
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
|
@ -372,7 +372,10 @@ REALNAME:
|
||||||
#define PROFCODE
|
#define PROFCODE
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#define EPILOGUE .size REALNAME, .-REALNAME
|
#define EPILOGUE \
|
||||||
|
.size REALNAME, .-REALNAME; \
|
||||||
|
.section .note.GNU-stack,"",%progbits
|
||||||
|
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
8
cpuid.h
8
cpuid.h
|
@ -105,8 +105,8 @@
|
||||||
#define CORE_NANO 19
|
#define CORE_NANO 19
|
||||||
#define CORE_SANDYBRIDGE 20
|
#define CORE_SANDYBRIDGE 20
|
||||||
#define CORE_BOBCAT 21
|
#define CORE_BOBCAT 21
|
||||||
#define CORE_BULLDOZER 22
|
#define CORE_BULLDOZER CORE_BARCELONA
|
||||||
#define CORE_PILEDRIVER 23
|
#define CORE_PILEDRIVER CORE_BARCELONA
|
||||||
#define CORE_HASWELL CORE_SANDYBRIDGE
|
#define CORE_HASWELL CORE_SANDYBRIDGE
|
||||||
|
|
||||||
#define HAVE_SSE (1 << 0)
|
#define HAVE_SSE (1 << 0)
|
||||||
|
@ -198,8 +198,8 @@ typedef struct {
|
||||||
#define CPUTYPE_NANO 43
|
#define CPUTYPE_NANO 43
|
||||||
#define CPUTYPE_SANDYBRIDGE 44
|
#define CPUTYPE_SANDYBRIDGE 44
|
||||||
#define CPUTYPE_BOBCAT 45
|
#define CPUTYPE_BOBCAT 45
|
||||||
#define CPUTYPE_BULLDOZER 46
|
#define CPUTYPE_BULLDOZER CPUTYPE_BARCELONA
|
||||||
#define CPUTYPE_PILEDRIVER 47
|
#define CPUTYPE_PILEDRIVER CPUTYPE_BARCELONA
|
||||||
// this define is because BLAS doesn't have haswell specific optimizations yet
|
// this define is because BLAS doesn't have haswell specific optimizations yet
|
||||||
#define CPUTYPE_HASWELL CPUTYPE_SANDYBRIDGE
|
#define CPUTYPE_HASWELL CPUTYPE_SANDYBRIDGE
|
||||||
|
|
||||||
|
|
|
@ -229,8 +229,8 @@ int get_cputype(int gettype){
|
||||||
if ((ecx & (1 << 20)) != 0) feature |= HAVE_SSE4_2;
|
if ((ecx & (1 << 20)) != 0) feature |= HAVE_SSE4_2;
|
||||||
#ifndef NO_AVX
|
#ifndef NO_AVX
|
||||||
if (support_avx()) feature |= HAVE_AVX;
|
if (support_avx()) feature |= HAVE_AVX;
|
||||||
|
if ((ecx & (1 << 12)) != 0) feature |= HAVE_FMA3;
|
||||||
#endif
|
#endif
|
||||||
if ((ecx & (1 << 20)) != 0) feature |= HAVE_FMA3;
|
|
||||||
|
|
||||||
if (have_excpuid() >= 0x01) {
|
if (have_excpuid() >= 0x01) {
|
||||||
cpuid(0x80000001, &eax, &ebx, &ecx, &edx);
|
cpuid(0x80000001, &eax, &ebx, &ecx, &edx);
|
||||||
|
|
|
@ -77,7 +77,7 @@ endif
|
||||||
clean ::
|
clean ::
|
||||||
rm -f x*
|
rm -f x*
|
||||||
|
|
||||||
FLDFLAGS = $(FFLAGS:-fPIC=)
|
FLDFLAGS = $(FFLAGS:-fPIC=) $(LDFLAGS)
|
||||||
CEXTRALIB =
|
CEXTRALIB =
|
||||||
|
|
||||||
# Single real
|
# Single real
|
||||||
|
|
|
@ -63,14 +63,16 @@ extern gotoblas_t gotoblas_BARCELONA;
|
||||||
extern gotoblas_t gotoblas_BOBCAT;
|
extern gotoblas_t gotoblas_BOBCAT;
|
||||||
#ifndef NO_AVX
|
#ifndef NO_AVX
|
||||||
extern gotoblas_t gotoblas_SANDYBRIDGE;
|
extern gotoblas_t gotoblas_SANDYBRIDGE;
|
||||||
extern gotoblas_t gotoblas_BULLDOZER;
|
//extern gotoblas_t gotoblas_BULLDOZER;
|
||||||
extern gotoblas_t gotoblas_PILEDRIVER;
|
//extern gotoblas_t gotoblas_PILEDRIVER;
|
||||||
#else
|
#else
|
||||||
//Use NEHALEM kernels for sandy bridge
|
//Use NEHALEM kernels for sandy bridge
|
||||||
#define gotoblas_SANDYBRIDGE gotoblas_NEHALEM
|
#define gotoblas_SANDYBRIDGE gotoblas_NEHALEM
|
||||||
|
#endif
|
||||||
|
|
||||||
#define gotoblas_BULLDOZER gotoblas_BARCELONA
|
#define gotoblas_BULLDOZER gotoblas_BARCELONA
|
||||||
#define gotoblas_PILEDRIVER gotoblas_BARCELONA
|
#define gotoblas_PILEDRIVER gotoblas_BARCELONA
|
||||||
#endif
|
|
||||||
//Use sandy bridge kernels for haswell.
|
//Use sandy bridge kernels for haswell.
|
||||||
#define gotoblas_HASWELL gotoblas_SANDYBRIDGE
|
#define gotoblas_HASWELL gotoblas_SANDYBRIDGE
|
||||||
|
|
||||||
|
|
|
@ -83,6 +83,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#include <dirent.h>
|
#include <dirent.h>
|
||||||
#include <dlfcn.h>
|
#include <dlfcn.h>
|
||||||
#include <unistd.h>
|
#include <unistd.h>
|
||||||
|
#include <string.h>
|
||||||
|
|
||||||
#define MAX_NODES 16
|
#define MAX_NODES 16
|
||||||
#define MAX_CPUS 256
|
#define MAX_CPUS 256
|
||||||
|
@ -315,7 +316,7 @@ static int numa_check(void) {
|
||||||
}
|
}
|
||||||
|
|
||||||
while ((dir = readdir(dp)) != NULL) {
|
while ((dir = readdir(dp)) != NULL) {
|
||||||
if (*(unsigned int *) dir -> d_name == 0x065646f6eU) {
|
if (strncmp(dir->d_name, "node", 4)==0) {
|
||||||
|
|
||||||
node = atoi(&dir -> d_name[4]);
|
node = atoi(&dir -> d_name[4]);
|
||||||
|
|
||||||
|
|
|
@ -18,6 +18,10 @@ ifndef NO_LAPACKE
|
||||||
NO_LAPACKE = 0
|
NO_LAPACKE = 0
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
ifndef NEED2UNDERSCORES
|
||||||
|
NEED2UNDERSCORES=0
|
||||||
|
endif
|
||||||
|
|
||||||
ifeq ($(OSNAME), WINNT)
|
ifeq ($(OSNAME), WINNT)
|
||||||
ifeq ($(F_COMPILER), GFORTRAN)
|
ifeq ($(F_COMPILER), GFORTRAN)
|
||||||
EXTRALIB += -lgfortran
|
EXTRALIB += -lgfortran
|
||||||
|
@ -89,18 +93,18 @@ else
|
||||||
endif
|
endif
|
||||||
|
|
||||||
libgoto2_shared.dll : ../$(LIBNAME) libgoto2_shared.def
|
libgoto2_shared.dll : ../$(LIBNAME) libgoto2_shared.def
|
||||||
$(CC) $(CFLAGS) libgoto2_shared.def -shared -o $(@F) \
|
$(CC) $(CFLAGS) $(LDFLAGS) libgoto2_shared.def -shared -o $(@F) \
|
||||||
-Wl,--whole-archive ../$(LIBNAME) -Wl,--no-whole-archive \
|
-Wl,--whole-archive ../$(LIBNAME) -Wl,--no-whole-archive \
|
||||||
-Wl,--out-implib,libgoto2_shared.lib $(FEXTRALIB)
|
-Wl,--out-implib,libgoto2_shared.lib $(FEXTRALIB)
|
||||||
|
|
||||||
libopenblas.def : gensymbol
|
libopenblas.def : gensymbol
|
||||||
perl ./gensymbol win2k $(ARCH) dummy $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) > $(@F)
|
perl ./gensymbol win2k $(ARCH) dummy $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) > $(@F)
|
||||||
|
|
||||||
libgoto2_shared.def : gensymbol
|
libgoto2_shared.def : gensymbol
|
||||||
perl ./gensymbol win2k $(ARCH) dummy $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) > $(@F)
|
perl ./gensymbol win2k $(ARCH) dummy $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) > $(@F)
|
||||||
|
|
||||||
libgoto_hpl.def : gensymbol
|
libgoto_hpl.def : gensymbol
|
||||||
perl ./gensymbol win2khpl $(ARCH) dummy $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) > $(@F)
|
perl ./gensymbol win2khpl $(ARCH) dummy $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) > $(@F)
|
||||||
|
|
||||||
$(LIBDYNNAME) : ../$(LIBNAME) osx.def
|
$(LIBDYNNAME) : ../$(LIBNAME) osx.def
|
||||||
$(CC) $(CFLAGS) -all_load -headerpad_max_install_names -install_name $(CURDIR)/../$(LIBDYNNAME) -dynamiclib -o ../$(LIBDYNNAME) $< -Wl,-exported_symbols_list,osx.def $(FEXTRALIB)
|
$(CC) $(CFLAGS) -all_load -headerpad_max_install_names -install_name $(CURDIR)/../$(LIBDYNNAME) -dynamiclib -o ../$(LIBDYNNAME) $< -Wl,-exported_symbols_list,osx.def $(FEXTRALIB)
|
||||||
|
@ -116,14 +120,14 @@ ifeq ($(OSNAME), Linux)
|
||||||
so : ../$(LIBSONAME)
|
so : ../$(LIBSONAME)
|
||||||
|
|
||||||
../$(LIBSONAME) : ../$(LIBNAME) linux.def linktest.c
|
../$(LIBSONAME) : ../$(LIBNAME) linux.def linktest.c
|
||||||
$(CC) $(CFLAGS) -shared -o ../$(LIBSONAME) \
|
$(CC) $(CFLAGS) $(LDFLAGS) -shared -o ../$(LIBSONAME) \
|
||||||
-Wl,--whole-archive ../$(LIBNAME) -Wl,--no-whole-archive \
|
-Wl,--whole-archive ../$(LIBNAME) -Wl,--no-whole-archive \
|
||||||
-Wl,--retain-symbols-file=linux.def -Wl,-soname,$(LIBPREFIX).so.$(MAJOR_VERSION) $(EXTRALIB)
|
-Wl,--retain-symbols-file=linux.def -Wl,-soname,$(LIBPREFIX).so.$(MAJOR_VERSION) $(EXTRALIB)
|
||||||
ifneq ($(C_COMPILER), LSB)
|
ifneq ($(C_COMPILER), LSB)
|
||||||
$(CC) $(CFLAGS) -w -o linktest linktest.c ../$(LIBSONAME) $(FEXTRALIB) && echo OK.
|
$(CC) $(CFLAGS) $(LDFLAGS) -w -o linktest linktest.c ../$(LIBSONAME) $(FEXTRALIB) && echo OK.
|
||||||
else
|
else
|
||||||
#Use FC on LSB
|
#Use FC on LSB
|
||||||
$(FC) $(FFLAGS) -w -o linktest linktest.c ../$(LIBSONAME) $(FEXTRALIB) && echo OK.
|
$(FC) $(FFLAGS) $(LDFLAGS) -w -o linktest linktest.c ../$(LIBSONAME) $(FEXTRALIB) && echo OK.
|
||||||
endif
|
endif
|
||||||
rm -f linktest
|
rm -f linktest
|
||||||
|
|
||||||
|
@ -135,10 +139,10 @@ ifeq ($(OSNAME), $(filter $(OSNAME),FreeBSD NetBSD))
|
||||||
so : ../$(LIBSONAME)
|
so : ../$(LIBSONAME)
|
||||||
|
|
||||||
../$(LIBSONAME) : ../$(LIBNAME) linux.def linktest.c
|
../$(LIBSONAME) : ../$(LIBNAME) linux.def linktest.c
|
||||||
$(CC) $(CFLAGS) -shared -o ../$(LIBSONAME) \
|
$(CC) $(CFLAGS) $(LDFLAGS) -shared -o ../$(LIBSONAME) \
|
||||||
-Wl,--whole-archive ../$(LIBNAME) -Wl,--no-whole-archive \
|
-Wl,--whole-archive ../$(LIBNAME) -Wl,--no-whole-archive \
|
||||||
-Wl,--retain-symbols-file=linux.def $(FEXTRALIB) $(EXTRALIB)
|
-Wl,--retain-symbols-file=linux.def $(FEXTRALIB) $(EXTRALIB)
|
||||||
$(CC) $(CFLAGS) -w -o linktest linktest.c ../$(LIBSONAME) $(FEXTRALIB) && echo OK.
|
$(CC) $(CFLAGS) $(LDFLAGS) -w -o linktest linktest.c ../$(LIBSONAME) $(FEXTRALIB) && echo OK.
|
||||||
rm -f linktest
|
rm -f linktest
|
||||||
|
|
||||||
endif
|
endif
|
||||||
|
@ -148,15 +152,15 @@ ifeq ($(OSNAME), OSF1)
|
||||||
so : ../$(LIBSONAME)
|
so : ../$(LIBSONAME)
|
||||||
|
|
||||||
../$(LIBSONAME) :
|
../$(LIBSONAME) :
|
||||||
$(CC) -shared -o ../$(LIBSONAME) ../$(LIBNAME)
|
$(CC) $(CFLAGS) $(LDFLAGS) -shared -o ../$(LIBSONAME) ../$(LIBNAME)
|
||||||
endif
|
endif
|
||||||
|
|
||||||
ifeq ($(OSNAME), SunOS)
|
ifeq ($(OSNAME), SunOS)
|
||||||
|
|
||||||
so : ../$(LIBSONAME)
|
so : ../$(LIBSONAME)
|
||||||
$(CC) $(CFLAGS) -shared -o ../$(LIBSONAME) \
|
$(CC) $(CFLAGS) $(LDFLAGS) -shared -o ../$(LIBSONAME) \
|
||||||
-Wl,--whole-archive ../$(LIBNAME) -Wl,--no-whole-archive $(EXTRALIB)
|
-Wl,--whole-archive ../$(LIBNAME) -Wl,--no-whole-archive $(EXTRALIB)
|
||||||
$(CC) $(CFLAGS) -w -o linktest linktest.c ../$(LIBSONAME) $(FEXTRALIB) && echo OK.
|
$(CC) $(CFLAGS) $(LDFLAGS) -w -o linktest linktest.c ../$(LIBSONAME) $(FEXTRALIB) && echo OK.
|
||||||
rm -f linktest
|
rm -f linktest
|
||||||
|
|
||||||
endif
|
endif
|
||||||
|
@ -187,23 +191,23 @@ static : ../$(LIBNAME)
|
||||||
rm -f goto.$(SUFFIX)
|
rm -f goto.$(SUFFIX)
|
||||||
|
|
||||||
linux.def : gensymbol ../Makefile.system ../getarch.c
|
linux.def : gensymbol ../Makefile.system ../getarch.c
|
||||||
perl ./gensymbol linux $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) > $(@F)
|
perl ./gensymbol linux $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) > $(@F)
|
||||||
|
|
||||||
osx.def : gensymbol ../Makefile.system ../getarch.c
|
osx.def : gensymbol ../Makefile.system ../getarch.c
|
||||||
perl ./gensymbol osx $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) > $(@F)
|
perl ./gensymbol osx $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) > $(@F)
|
||||||
|
|
||||||
aix.def : gensymbol ../Makefile.system ../getarch.c
|
aix.def : gensymbol ../Makefile.system ../getarch.c
|
||||||
perl ./gensymbol aix $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) > $(@F)
|
perl ./gensymbol aix $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) > $(@F)
|
||||||
|
|
||||||
symbol.S : gensymbol
|
symbol.S : gensymbol
|
||||||
perl ./gensymbol win2kasm noarch dummy $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) > symbol.S
|
perl ./gensymbol win2kasm noarch dummy $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) > symbol.S
|
||||||
|
|
||||||
test : linktest.c
|
test : linktest.c
|
||||||
$(CC) $(CFLAGS) -w -o linktest linktest.c ../$(LIBSONAME) -lm && echo OK.
|
$(CC) $(CFLAGS) $(LDFLAGS) -w -o linktest linktest.c ../$(LIBSONAME) -lm && echo OK.
|
||||||
rm -f linktest
|
rm -f linktest
|
||||||
|
|
||||||
linktest.c : gensymbol ../Makefile.system ../getarch.c
|
linktest.c : gensymbol ../Makefile.system ../getarch.c
|
||||||
perl ./gensymbol linktest $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) > linktest.c
|
perl ./gensymbol linktest $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) > linktest.c
|
||||||
|
|
||||||
clean ::
|
clean ::
|
||||||
@rm -f *.def *.dylib __.SYMDEF*
|
@rm -f *.def *.dylib __.SYMDEF*
|
||||||
|
|
|
@ -114,8 +114,8 @@
|
||||||
|
|
||||||
# ALLAUX -- Auxiliary routines called from all precisions
|
# ALLAUX -- Auxiliary routines called from all precisions
|
||||||
# already provided by @blasobjs: xerbla, lsame
|
# already provided by @blasobjs: xerbla, lsame
|
||||||
ilaenv, ieeeck, lsamen, xerbla_array, iparmq,
|
ilaenv, ieeeck, lsamen, iparmq,
|
||||||
ilaprec, ilatrans, ilauplo, iladiag, chla_transtype,
|
ilaprec, ilatrans, ilauplo, iladiag,
|
||||||
ilaver, slamch, slamc3,
|
ilaver, slamch, slamc3,
|
||||||
|
|
||||||
# SCLAUX -- Auxiliary routines called from both REAL and COMPLEX.
|
# SCLAUX -- Auxiliary routines called from both REAL and COMPLEX.
|
||||||
|
@ -2672,12 +2672,25 @@
|
||||||
#LAPACKE_zlagsy_work,
|
#LAPACKE_zlagsy_work,
|
||||||
);
|
);
|
||||||
|
|
||||||
|
#These function may need 2 underscores.
|
||||||
|
@lapack_embeded_underscore_objs=(xerbla_array, chla_transtype,);
|
||||||
|
|
||||||
if ($ARGV[5] == 1) {
|
if ($ARGV[5] == 1) {
|
||||||
#NO_LAPACK=1
|
#NO_LAPACK=1
|
||||||
@underscore_objs = (@blasobjs, @misc_underscore_objs);
|
@underscore_objs = (@blasobjs, @misc_underscore_objs);
|
||||||
} elsif (-d "../lapack-3.1.1" || -d "../lapack-3.4.0" || -d "../lapack-3.4.1" ||
|
} elsif (-d "../lapack-3.1.1" || -d "../lapack-3.4.0" || -d "../lapack-3.4.1" ||
|
||||||
-d "../lapack-3.4.2" || -d "../lapack-netlib") {
|
-d "../lapack-3.4.2" || -d "../lapack-netlib") {
|
||||||
@underscore_objs = (@blasobjs, @lapackobjs, @lapackobjs2, @misc_underscore_objs);
|
|
||||||
|
if ($ARGV[7] == 0){
|
||||||
|
# NEED2UNDERSCORES=0
|
||||||
|
# Don't need 2 underscores
|
||||||
|
@underscore_objs = (@blasobjs, @lapackobjs, @lapackobjs2, @misc_underscore_objs, @lapack_embeded_underscore_objs);
|
||||||
|
}else{
|
||||||
|
# Need 2 underscores
|
||||||
|
@underscore_objs = (@blasobjs, @lapackobjs, @lapackobjs2, @misc_underscore_objs);
|
||||||
|
@need_2underscore_objs = (@lapack_embeded_underscore_objs);
|
||||||
|
};
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
@underscore_objs = (@blasobjs, @lapackobjs, @misc_underscore_objs);
|
@underscore_objs = (@blasobjs, @lapackobjs, @misc_underscore_objs);
|
||||||
}
|
}
|
||||||
|
@ -2729,6 +2742,10 @@ if ($ARGV[0] eq "linux"){
|
||||||
print $objs, $bu, "\n";
|
print $objs, $bu, "\n";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
foreach $objs (@need_2underscore_objs) {
|
||||||
|
print $objs, $bu, $bu, "\n";
|
||||||
|
}
|
||||||
|
|
||||||
# if ($ARGV[4] == 0) {
|
# if ($ARGV[4] == 0) {
|
||||||
foreach $objs (@no_underscore_objs) {
|
foreach $objs (@no_underscore_objs) {
|
||||||
print $objs, "\n";
|
print $objs, "\n";
|
||||||
|
@ -2750,6 +2767,10 @@ if ($ARGV[0] eq "osx"){
|
||||||
print "_", $objs, $bu, "\n";
|
print "_", $objs, $bu, "\n";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
foreach $objs (@need_2underscore_objs) {
|
||||||
|
print "_", $objs, $bu, $bu, "\n";
|
||||||
|
}
|
||||||
|
|
||||||
# if ($ARGV[4] == 0) {
|
# if ($ARGV[4] == 0) {
|
||||||
foreach $objs (@no_underscore_objs) {
|
foreach $objs (@no_underscore_objs) {
|
||||||
print "_", $objs, "\n";
|
print "_", $objs, "\n";
|
||||||
|
@ -2767,6 +2788,10 @@ if ($ARGV[0] eq "aix"){
|
||||||
print $objs, $bu, "\n";
|
print $objs, $bu, "\n";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
foreach $objs (@need_2underscore_objs) {
|
||||||
|
print $objs, $bu, $bu, "\n";
|
||||||
|
}
|
||||||
|
|
||||||
# if ($ARGV[4] == 0) {
|
# if ($ARGV[4] == 0) {
|
||||||
foreach $objs (@no_underscore_objs) {
|
foreach $objs (@no_underscore_objs) {
|
||||||
print $objs, "\n";
|
print $objs, "\n";
|
||||||
|
@ -2792,6 +2817,17 @@ if ($ARGV[0] eq "win2k"){
|
||||||
$count ++;
|
$count ++;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
foreach $objs (@need_2underscore_objs) {
|
||||||
|
$uppercase = $objs;
|
||||||
|
$uppercase =~ tr/[a-z]/[A-Z]/;
|
||||||
|
print "\t$objs=$objs","__ \@", $count, "\n";
|
||||||
|
$count ++;
|
||||||
|
print "\t",$objs, "__=$objs","__ \@", $count, "\n";
|
||||||
|
$count ++;
|
||||||
|
print "\t$uppercase=$objs", "__ \@", $count, "\n";
|
||||||
|
$count ++;
|
||||||
|
}
|
||||||
|
|
||||||
#for misc_common_objs
|
#for misc_common_objs
|
||||||
foreach $objs (@misc_common_objs) {
|
foreach $objs (@misc_common_objs) {
|
||||||
|
|
||||||
|
@ -2852,6 +2888,18 @@ if ($ARGV[0] eq "microsoft"){
|
||||||
print "\t$uppercase\_ = $objs","_\n";
|
print "\t$uppercase\_ = $objs","_\n";
|
||||||
$count ++;
|
$count ++;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
foreach $objs (@need_2underscore_objs) {
|
||||||
|
$uppercase = $objs;
|
||||||
|
$uppercase =~ tr/[a-z]/[A-Z]/;
|
||||||
|
print "\t$objs=$objs","__ \@", $count, "\n";
|
||||||
|
$count ++;
|
||||||
|
print "\t",$objs, "__=$objs","__ \@", $count, "\n";
|
||||||
|
$count ++;
|
||||||
|
print "\t$uppercase=$objs", "__ \@", $count, "\n";
|
||||||
|
$count ++;
|
||||||
|
}
|
||||||
|
|
||||||
exit(0);
|
exit(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2868,6 +2916,16 @@ if ($ARGV[0] eq "win2kasm"){
|
||||||
print "_", $uppercase, "_:\n";
|
print "_", $uppercase, "_:\n";
|
||||||
print "\tjmp\t_", $objs, "_\n";
|
print "\tjmp\t_", $objs, "_\n";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
foreach $objs (@need_2underscore_objs) {
|
||||||
|
$uppercase = $objs;
|
||||||
|
$uppercase =~ tr/[a-z]/[A-Z]/;
|
||||||
|
print "\t.align 16\n";
|
||||||
|
print "\t.globl _", $uppercase, "__\n";
|
||||||
|
print "_", $uppercase, "__:\n";
|
||||||
|
print "\tjmp\t_", $objs, "__\n";
|
||||||
|
}
|
||||||
|
|
||||||
exit(0);
|
exit(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2880,6 +2938,11 @@ if ($ARGV[0] eq "linktest"){
|
||||||
foreach $objs (@underscore_objs) {
|
foreach $objs (@underscore_objs) {
|
||||||
print $objs, $bu, "();\n" if $objs ne "xerbla";
|
print $objs, $bu, "();\n" if $objs ne "xerbla";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
foreach $objs (@need_2underscore_objs) {
|
||||||
|
print $objs, $bu, $bu, "();\n";
|
||||||
|
}
|
||||||
|
|
||||||
# if ($ARGV[4] == 0) {
|
# if ($ARGV[4] == 0) {
|
||||||
foreach $objs (@no_underscore_objs) {
|
foreach $objs (@no_underscore_objs) {
|
||||||
print $objs, "();\n";
|
print $objs, "();\n";
|
||||||
|
|
21
f_check
21
f_check
|
@ -114,6 +114,12 @@ if ($compiler eq "") {
|
||||||
$vendor = IBM;
|
$vendor = IBM;
|
||||||
$openmp = "-openmp";
|
$openmp = "-openmp";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# for embeded underscore name, e.g. zho_ge, it may append 2 underscores.
|
||||||
|
$data = `$compiler -O2 -S ftest3.f > /dev/null 2>&1 && cat ftest3.s && rm -f ftest3.s`;
|
||||||
|
if ($data =~ /zho_ge__/) {
|
||||||
|
$need2bu = 1;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if ($vendor eq "") {
|
if ($vendor eq "") {
|
||||||
|
@ -245,6 +251,8 @@ if ($link ne "") {
|
||||||
|
|
||||||
$link =~ s/\-rpath\s+/\-rpath\@/g;
|
$link =~ s/\-rpath\s+/\-rpath\@/g;
|
||||||
|
|
||||||
|
$link =~ s/\-rpath-link\s+/\-rpath-link\@/g;
|
||||||
|
|
||||||
@flags = split(/[\s\,\n]/, $link);
|
@flags = split(/[\s\,\n]/, $link);
|
||||||
# remove leading and trailing quotes from each flag.
|
# remove leading and trailing quotes from each flag.
|
||||||
@flags = map {s/^['"]|['"]$//g; $_} @flags;
|
@flags = map {s/^['"]|['"]$//g; $_} @flags;
|
||||||
|
@ -265,7 +273,15 @@ if ($link ne "") {
|
||||||
$linker_L .= "-Wl,". $flags . " ";
|
$linker_L .= "-Wl,". $flags . " ";
|
||||||
}
|
}
|
||||||
|
|
||||||
if ($flags =~ /^\-rpath/) {
|
if ($flags =~ /^\-rpath\@/) {
|
||||||
|
$flags =~ s/\@/\,/g;
|
||||||
|
if ($vendor eq "PGI") {
|
||||||
|
$flags =~ s/lib$/libso/;
|
||||||
|
}
|
||||||
|
$linker_L .= "-Wl,". $flags . " " ;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ($flags =~ /^\-rpath-link\@/) {
|
||||||
$flags =~ s/\@/\,/g;
|
$flags =~ s/\@/\,/g;
|
||||||
if ($vendor eq "PGI") {
|
if ($vendor eq "PGI") {
|
||||||
$flags =~ s/lib$/libso/;
|
$flags =~ s/lib$/libso/;
|
||||||
|
@ -309,6 +325,9 @@ print MAKEFILE "NOFORTRAN=1\n" if $nofortran == 1;
|
||||||
|
|
||||||
print CONFFILE "#define BUNDERSCORE\t$bu\n" if $bu ne "";
|
print CONFFILE "#define BUNDERSCORE\t$bu\n" if $bu ne "";
|
||||||
print CONFFILE "#define NEEDBUNDERSCORE\t1\n" if $bu ne "";
|
print CONFFILE "#define NEEDBUNDERSCORE\t1\n" if $bu ne "";
|
||||||
|
print CONFFILE "#define NEED2UNDERSCORES\t1\n" if $need2bu ne "";
|
||||||
|
|
||||||
|
print MAKEFILE "NEED2UNDERSCORES=1\n" if $need2bu ne "";
|
||||||
|
|
||||||
if (($linker_l ne "") || ($linker_a ne "")) {
|
if (($linker_l ne "") || ($linker_a ne "")) {
|
||||||
print MAKEFILE "FEXTRALIB=$linker_L $linker_l $linker_a\n";
|
print MAKEFILE "FEXTRALIB=$linker_L $linker_l $linker_a\n";
|
||||||
|
|
|
@ -0,0 +1,6 @@
|
||||||
|
double complex function zho_ge()
|
||||||
|
|
||||||
|
zho_ge = (0.0d0,0.0d0)
|
||||||
|
|
||||||
|
return
|
||||||
|
end
|
|
@ -354,7 +354,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#define CORENAME "OPTERON"
|
#define CORENAME "OPTERON"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(FORCE_BARCELONA) || defined(FORCE_SHANGHAI) || defined(FORCE_ISTANBUL)
|
#if defined(FORCE_BARCELONA) || defined(FORCE_SHANGHAI) || defined(FORCE_ISTANBUL) || defined (FORCE_PILEDRIVER) || defined (FORCE_BULLDOZER)
|
||||||
#define FORCE
|
#define FORCE
|
||||||
#define FORCE_INTEL
|
#define FORCE_INTEL
|
||||||
#define ARCHITECTURE "X86"
|
#define ARCHITECTURE "X86"
|
||||||
|
@ -384,7 +384,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#define CORENAME "BOBCAT"
|
#define CORENAME "BOBCAT"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined (FORCE_BULLDOZER)
|
#if 0
|
||||||
#define FORCE
|
#define FORCE
|
||||||
#define FORCE_INTEL
|
#define FORCE_INTEL
|
||||||
#define ARCHITECTURE "X86"
|
#define ARCHITECTURE "X86"
|
||||||
|
@ -400,7 +400,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#define CORENAME "BULLDOZER"
|
#define CORENAME "BULLDOZER"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined (FORCE_PILEDRIVER)
|
#if 0
|
||||||
#define FORCE
|
#define FORCE
|
||||||
#define FORCE_INTEL
|
#define FORCE_INTEL
|
||||||
#define ARCHITECTURE "X86"
|
#define ARCHITECTURE "X86"
|
||||||
|
|
|
@ -8,7 +8,7 @@
|
||||||
|
|
||||||
int main(int argc, char **argv) {
|
int main(int argc, char **argv) {
|
||||||
|
|
||||||
if ( (argc <= 1) || (argc >= 2) && (*argv[1] == '0')) {
|
if ( (argc <= 1) || ((argc >= 2) && (*argv[1] == '0'))) {
|
||||||
printf("SGEMM_UNROLL_M=%d\n", SGEMM_DEFAULT_UNROLL_M);
|
printf("SGEMM_UNROLL_M=%d\n", SGEMM_DEFAULT_UNROLL_M);
|
||||||
printf("SGEMM_UNROLL_N=%d\n", SGEMM_DEFAULT_UNROLL_N);
|
printf("SGEMM_UNROLL_N=%d\n", SGEMM_DEFAULT_UNROLL_N);
|
||||||
printf("DGEMM_UNROLL_M=%d\n", DGEMM_DEFAULT_UNROLL_M);
|
printf("DGEMM_UNROLL_M=%d\n", DGEMM_DEFAULT_UNROLL_M);
|
||||||
|
|
|
@ -60,7 +60,7 @@ static blasint (*trtri_parallel[])(blas_arg_t *, BLASLONG *, BLASLONG *, FLOAT *
|
||||||
};
|
};
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
extern void dtrtri_lapack_(char *UPLO, char *DIAG, int *N, double *a, int *ldA, int *Info);
|
extern void BLASFUNC(dtrtrilapack)(char *UPLO, char *DIAG, int *N, double *a, int *ldA, int *Info);
|
||||||
|
|
||||||
int NAME(char *UPLO, char *DIAG, blasint *N, FLOAT *a, blasint *ldA, blasint *Info){
|
int NAME(char *UPLO, char *DIAG, blasint *N, FLOAT *a, blasint *ldA, blasint *Info){
|
||||||
|
|
||||||
|
@ -137,7 +137,10 @@ int NAME(char *UPLO, char *DIAG, blasint *N, FLOAT *a, blasint *ldA, blasint *In
|
||||||
// double trtri_U single thread error
|
// double trtri_U single thread error
|
||||||
// call dtrtri from lapack for a walk around.
|
// call dtrtri from lapack for a walk around.
|
||||||
if(uplo==0){
|
if(uplo==0){
|
||||||
dtrtri_lapack_(UPLO, DIAG, N, a, ldA, Info);
|
BLASFUNC(dtrtrilapack)(UPLO, DIAG, N, a, ldA, Info);
|
||||||
|
#ifndef PPC440
|
||||||
|
blas_memory_free(buffer);
|
||||||
|
#endif
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -107,7 +107,7 @@
|
||||||
*> \ingroup doubleOTHERcomputational
|
*> \ingroup doubleOTHERcomputational
|
||||||
*
|
*
|
||||||
* =====================================================================
|
* =====================================================================
|
||||||
SUBROUTINE DTRTRI_LAPACK( UPLO, DIAG, N, A, LDA, INFO )
|
SUBROUTINE DTRTRILAPACK( UPLO, DIAG, N, A, LDA, INFO )
|
||||||
*
|
*
|
||||||
* -- LAPACK computational routine (version 3.4.0) --
|
* -- LAPACK computational routine (version 3.4.0) --
|
||||||
* -- LAPACK is a software package provided by Univ. of Tennessee, --
|
* -- LAPACK is a software package provided by Univ. of Tennessee, --
|
||||||
|
|
|
@ -88,7 +88,7 @@ else
|
||||||
endif
|
endif
|
||||||
endif
|
endif
|
||||||
|
|
||||||
FLDFLAGS = $(FFLAGS:-fPIC=)
|
FLDFLAGS = $(FFLAGS:-fPIC=) $(LDFLAGS)
|
||||||
CEXTRALIB =
|
CEXTRALIB =
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue