Compare commits

...

28 Commits

Author SHA1 Message Date
Zhang Xianyi
3a26470fb7 Merge branch 'develop' 2012-10-09 20:08:28 +08:00
Zhang Xianyi
6c5899dff5 Don't use xgetbv instruction when NO_AVX=1 2012-10-09 14:52:35 +08:00
Zhang Xianyi
2df2878dfc Merge branch 'develop' 2012-10-08 13:38:03 +08:00
Zhang Xianyi
0b719945c5 Updated the doc for 0.2.4 version. 2012-10-08 13:37:44 +08:00
Zhang Xianyi
b1a54a0107 Fixed #141. make f77blas.h compatible with compilers which lack C99 complex number.
Apply the patch from Tony @tonyhill. Thank you.
2012-10-08 12:48:20 +08:00
Zhang Xianyi
08c177ca36 Refs #145. Update LAPACK to 3.4.2 version. 2012-09-29 23:14:39 +08:00
Zhang Xianyi
2573311308 refs #140. Fixed zdot incompatibility ABI issue with GCC 4.7 on Win 32.
GCC 4.7 uses MSVC ABI on Win 32. This means the caller pops the hidden pointer for returning
aggregate structures larger than 8 bytes.
2012-09-24 20:34:33 +08:00
Zhang Xianyi
1d72b8bf1b Fixed generating shared library bug on MIPS. 2012-09-21 11:49:07 +00:00
Zhang Xianyi
758e34efbb Fixed the detection bug on Loongson 3A server. 2012-09-21 10:14:07 +00:00
Zhang Xianyi
735ca38b8f Refs #139. Check OS supporting AVX on runtime. 2012-09-18 15:46:20 +08:00
Zhang Xianyi
f76a384841 Refs #139. Added NO_AVX flag to use old Nehalem kernels on Sandy Bridge.
For example, make NO_AVX=1 or make DYNAMIC_ARCH=1 NO_AVX=1
2012-09-17 23:25:46 +08:00
Zhang Xianyi
9419a43a7f Fixed #142. Added the gesvd and potrs function families to common_interface.h. 2012-09-14 15:15:08 +08:00
Zhang Xianyi
b695680a33 Fixed #143. Don't generate cblas.h with NO_CBLAS. 2012-09-14 14:06:14 +08:00
Jameson Nash
d0e731e8b8 provide support for passing CFLAGS, FFLAGS, PFLAGS, FPFLAGS to make on the command line 2012-08-21 00:31:12 -04:00
Zhang Xianyi
48f075cfd5 Merge branch 'develop' 2012-08-20 16:52:35 +08:00
Zhang Xianyi
3e87648de3 Updated the doc for 0.2.3 version. 2012-08-20 16:51:47 +08:00
Zhang Xianyi
fe4ab95cd5 Refs #136. Fixed a bug about controlling the number of threads on Windows. 2012-08-19 23:50:54 +08:00
Xianyi Zhang
801383effe Fixed a hang bug when shutdown blas threads server on Windows. Added the feature about dynamic changing the number of threads on Windows. 2012-08-14 18:34:32 +08:00
Zhang Xianyi
54cd65e47f Use sandy bridge kernel when DYNAMIC_ARCH=1. 2012-08-13 15:25:08 +08:00
Zhang Xianyi
a55821a2ec Refs #132. Kill the threads when unload the library. 2012-08-11 21:33:15 +08:00
Zhang Xianyi
068861a927 Refs #133. Users can set COMMON_OPT flag to control CFLAGS and FFLAGS. 2012-08-10 14:36:26 +08:00
Zhang Xianyi
d007cca61d Refs #134. Fixed the building bug on IBM Power. 2012-08-10 11:54:21 +08:00
Zhang Xianyi
a92895939e Added the tip for Windows. 2012-08-09 20:37:55 +08:00
Zhang Xianyi
7bd1834d59 Refs #130 Fixed laswp building bug with DYNAMIC_ARCH=1. 2012-08-09 20:36:29 +08:00
Zhang Xianyi
1b056c5328 Refs #130 Prevent reading ipiv array beyond the bound in ?laswp. Use laswp instead of laswp_oncopy in getrf. 2012-08-09 20:06:51 +08:00
Zaheer Chothia
e8306f623a Refs #127. Generate DLL without a version suffix on Windows. 2012-07-30 19:46:30 +02:00
Xianyi Zhang
3108a1853d Added the doc for the conflict with R parallel. 2012-07-13 14:19:30 +08:00
Xianyi Zhang
25f1a573fd Fixed the build bug when DYNAMIC_ARCH=0. 2012-07-07 12:12:24 +08:00
40 changed files with 4078 additions and 804 deletions

View File

@@ -1,4 +1,33 @@
OpenBLAS ChangeLog
====================================================================
Version 0.2.4
8-Oct-2012
common:
* Upgraded LAPACK to 3.4.2 version. (#145)
* Provided support for passing CFLAGS, FFLAGS, PFLAGS,
FPFLAGS to make. (#137)
* f77blas.h:compatibility for compilers without C99 complex
number support. (#141)
x86/x86-64:
* Added NO_AVX flag. Check OS supporting AVX on runtime. (#139)
* Fixed zdot incompatibility ABI issue with GCC 4.7 on
Windows 32-bit. (#140)
MIPS64:
* Fixed the generation of shared library bug.
* Fixed the detection bug on the Loongson 3A server.
====================================================================
Version 0.2.3
20-Aug-2012
common:
* Fixed LAPACK unstable bug about ?laswp. (#130)
* Fixed the shared library bug about unloading the library on
Linux (#132).
* Fixed the compilation failure on BlueGene/P (TARGET=PPC440FP2)
Please use gcc and IBM xlf. (#134)
x86/x86-64:
* Supported goto_set_num_threads and openblas_set_num_threads
APIs in Windows. They can set the number of threads on runtime.
====================================================================
Version 0.2.2
6-July-2012

View File

@@ -3,7 +3,7 @@ include ./Makefile.system
BLASDIRS = interface driver/level2 driver/level3 driver/others
ifndef DYNAMIC_ARCH
ifneq ($(DYNAMIC_ARCH), 1)
BLASDIRS += kernel
endif
@@ -99,11 +99,9 @@ ifeq ($(OSNAME), Darwin)
endif
ifeq ($(OSNAME), WINNT)
$(MAKE) -C exports dll
-ln -fs $(LIBDLLNAME) $(LIBPREFIX).dll
endif
ifeq ($(OSNAME), CYGWIN_NT)
$(MAKE) -C exports dll
-ln -fs $(LIBDLLNAME) $(LIBPREFIX).dll
endif
tests :
@@ -147,7 +145,7 @@ ifeq ($(EXPRECISION), 1)
echo "#define EXPRECISION">> config_last.h
endif
##
ifdef DYNAMIC_ARCH
ifeq ($(DYNAMIC_ARCH), 1)
$(MAKE) -C kernel commonlibs || exit 1
for d in $(DYNAMIC_CORE) ; \
do $(MAKE) GOTOBLAS_MAKEFILE= -C kernel TARGET_CORE=$$d kernel || exit 1 ;\
@@ -165,7 +163,7 @@ prof_blas :
$(MAKE) -C $$d prof || exit 1 ; \
fi; \
done
ifdef DYNAMIC_ARCH
ifeq ($(DYNAMIC_ARCH), 1)
$(MAKE) -C kernel commonprof || exit 1
endif
@@ -184,7 +182,7 @@ hpl :
$(MAKE) -C $$d $(@F) || exit 1 ; \
fi; \
done
ifdef DYNAMIC_ARCH
ifeq ($(DYNAMIC_ARCH), 1)
$(MAKE) -C kernel commonlibs || exit 1
for d in $(DYNAMIC_CORE) ; \
do $(MAKE) GOTOBLAS_MAKEFILE= -C kernel TARGET_CORE=$$d kernel || exit 1 ;\
@@ -203,7 +201,7 @@ ifeq ($(NO_LAPACK), 1)
netlib :
else
netlib : lapack-3.4.1 patch.for_lapack-3.4.1 $(NETLIB_LAPACK_DIR)/make.inc
netlib : lapack-3.4.2 patch.for_lapack-3.4.2 $(NETLIB_LAPACK_DIR)/make.inc
ifndef NOFORTRAN
-@$(MAKE) -C $(NETLIB_LAPACK_DIR) lapacklib
endif
@@ -212,7 +210,7 @@ ifndef NO_LAPACKE
endif
endif
prof_lapack : lapack-3.4.1 $(NETLIB_LAPACK_DIR)/make.inc
prof_lapack : lapack-3.4.2 $(NETLIB_LAPACK_DIR)/make.inc
-@$(MAKE) -C $(NETLIB_LAPACK_DIR) lapack_prof
$(NETLIB_LAPACK_DIR)/make.inc :
@@ -233,28 +231,28 @@ ifndef NOFORTRAN
-@echo "LAPACKLIB_P = ../$(LIBNAME_P)" >> $(NETLIB_LAPACK_DIR)/make.inc
-@echo "SUFFIX = $(SUFFIX)" >> $(NETLIB_LAPACK_DIR)/make.inc
-@echo "PSUFFIX = $(PSUFFIX)" >> $(NETLIB_LAPACK_DIR)/make.inc
# -@echo "CEXTRALIB = $(CEXTRALIB)" >> $(NETLIB_LAPACK_DIR)/make.inc
-@echo "CEXTRALIB = $(EXTRALIB)" >> $(NETLIB_LAPACK_DIR)/make.inc
-@cat make.inc >> $(NETLIB_LAPACK_DIR)/make.inc
endif
lapack-3.4.1 : lapack-3.4.1.tgz
lapack-3.4.2 : lapack-3.4.2.tgz
ifndef NOFORTRAN
ifndef NO_LAPACK
@if test `$(MD5SUM) lapack-3.4.1.tgz | $(AWK) '{print $$1}'` = 44c3869c38c8335c2b9c2a8bb276eb55; then \
@if test `$(MD5SUM) lapack-3.4.2.tgz | $(AWK) '{print $$1}'` = 61bf1a8a4469d4bdb7604f5897179478; then \
echo $(TAR) zxf $< ;\
$(TAR) zxf $< && (cd $(NETLIB_LAPACK_DIR); $(PATCH) -p1 < ../patch.for_lapack-3.4.1) ;\
$(TAR) zxf $< && (cd $(NETLIB_LAPACK_DIR); $(PATCH) -p1 < ../patch.for_lapack-3.4.2) ;\
rm -f $(NETLIB_LAPACK_DIR)/lapacke/make.inc ;\
else \
rm -rf $(NETLIB_LAPACK_DIR) ;\
echo " Cannot download lapack-3.4.1.tgz or the MD5 check sum is wrong (Please use orignal)."; \
echo " Cannot download lapack-3.4.2.tgz or the MD5 check sum is wrong (Please use orignal)."; \
exit 1; \
fi
endif
endif
LAPACK_URL=http://www.netlib.org/lapack/lapack-3.4.1.tgz
LAPACK_URL=http://www.netlib.org/lapack/lapack-3.4.2.tgz
lapack-3.4.1.tgz :
lapack-3.4.2.tgz :
ifndef NOFORTRAN
#http://stackoverflow.com/questions/7656425/makefile-ifeq-logical-or
ifeq ($(OSNAME), $(filter $(OSNAME),Darwin NetBSD))
@@ -278,7 +276,7 @@ ifndef NOFORTRAN
-wget http://www.netlib.org/lapack/timing/timing.tgz
endif
lapack-timing : lapack-3.4.1 large.tgz timing.tgz
lapack-timing : lapack-3.4.2 large.tgz timing.tgz
ifndef NOFORTRAN
(cd $(NETLIB_LAPACK_DIR); $(TAR) zxf ../timing.tgz TIMING)
(cd $(NETLIB_LAPACK_DIR)/TIMING; $(TAR) zxf ../../large.tgz )

View File

@@ -32,8 +32,10 @@ install : lib.grd
@cat common_interface.h >> $(OPENBLAS_INCLUDE_DIR)/f77blas.h
@echo \#endif >> $(OPENBLAS_INCLUDE_DIR)/f77blas.h
ifndef NO_CBLAS
@echo Generating cblas.h in $(OPENBLAS_INCLUDE_DIR)
@sed 's/common/openblas_config/g' cblas.h > $(OPENBLAS_INCLUDE_DIR)/cblas.h
endif
ifndef NO_LAPACKE
@echo Copying LAPACKE header files to $(OPENBLAS_LIBRARY_DIR)

View File

@@ -3,7 +3,7 @@
#
# This library's version
VERSION = 0.2.2
VERSION = 0.2.4
# If you set the suffix, the library name will be libopenblas_$(LIBNAMESUFFIX).a
# and libopenblas_$(LIBNAMESUFFIX).so. Meanwhile, the soname in shared library
@@ -71,6 +71,10 @@ VERSION = 0.2.2
# If you want to disable CPU/Memory affinity on Linux.
# NO_AFFINITY = 1
# Don't use AVX kernel on Sandy Bridge. It is compatible with old compilers
# and OS. However, the performance is low.
# NO_AVX = 1
# If you would like to know minute performance report of GotoBLAS.
# FUNCTION_PROFILE = 1
@@ -108,19 +112,16 @@ VERSION = 0.2.2
# The installation directory.
# PREFIX = /opt/OpenBLAS
# Common Optimization Flag; -O2 is enough.
# DEBUG = 1
ifeq ($(DEBUG), 1)
COMMON_OPT += -g
# -DDEBUG
else
COMMON_OPT += -O2
endif
# Common Optimization Flag;
# The default -O2 is enough.
# COMMON_OPT = -O2
# Profiling flags
COMMON_PROF = -pg
# Build Debug version
# DEBUG = 1
#
# End of user configuration
#

View File

@@ -10,7 +10,7 @@ TOPDIR = .
endif
ifndef NETLIB_LAPACK_DIR
NETLIB_LAPACK_DIR = $(TOPDIR)/lapack-3.4.1
NETLIB_LAPACK_DIR = $(TOPDIR)/lapack-3.4.2
endif
# Default C compiler
@@ -57,6 +57,14 @@ GEMM_MULTITHREAD_THRESHOLD=50
endif
GETARCH_FLAGS += -DGEMM_MULTITHREAD_THRESHOLD=$(GEMM_MULTITHREAD_THRESHOLD)
ifeq ($(NO_AVX), 1)
GETARCH_FLAGS += -DNO_AVX
endif
ifeq ($(DEBUG), 1)
GETARCH_FLAGS += -g
endif
# This operation is expensive, so execution should be once.
ifndef GOTOBLAS_MAKEFILE
export GOTOBLAS_MAKEFILE = 1
@@ -141,6 +149,26 @@ EXTRALIB += -defaultlib:advapi32
SUFFIX = obj
PSUFFIX = pobj
LIBSUFFIX = lib
ifeq ($(C_COMPILER), GCC)
#Test for supporting MS_ABI
GCCVERSIONGTEQ4 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 4)
GCCVERSIONGT4 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \> 4)
GCCMINORVERSIONGTEQ7 := $(shell expr `$(CC) -dumpversion | cut -f2 -d.` \>= 7)
ifeq ($(GCCVERSIONGT4), 1)
# GCC Majar version > 4
# It is compatible with MSVC ABI.
CCOMMON_OPT += -DMS_ABI
endif
ifeq ($(GCCVERSIONGTEQ4), 1)
ifeq ($(GCCMINORVERSIONGTEQ7), 1)
# GCC Version >=4.7
# It is compatible with MSVC ABI.
CCOMMON_OPT += -DMS_ABI
endif
endif
endif
endif
ifeq ($(OSNAME), Interix)
@@ -244,14 +272,20 @@ endif
endif
ifdef DYNAMIC_ARCH
ifeq ($(DYNAMIC_ARCH), 1)
ifeq ($(ARCH), x86)
DYNAMIC_CORE = KATMAI COPPERMINE NORTHWOOD PRESCOTT BANIAS \
CORE2 PENRYN DUNNINGTON NEHALEM SANDYBRIDGE ATHLON OPTERON OPTERON_SSE3 BARCELONA BOBCAT ATOM NANO
CORE2 PENRYN DUNNINGTON NEHALEM ATHLON OPTERON OPTERON_SSE3 BARCELONA BOBCAT ATOM NANO
ifneq ($(NO_AVX), 1)
DYNAMIC_CORE += SANDYBRIDGE
endif
endif
ifeq ($(ARCH), x86_64)
DYNAMIC_CORE = PRESCOTT CORE2 PENRYN DUNNINGTON NEHALEM SANDYBRIDGE OPTERON OPTERON_SSE3 BARCELONA BOBCAT ATOM NANO
DYNAMIC_CORE = PRESCOTT CORE2 PENRYN DUNNINGTON NEHALEM OPTERON OPTERON_SSE3 BARCELONA BOBCAT ATOM NANO
ifneq ($(NO_AVX), 1)
DYNAMIC_CORE += SANDYBRIDGE
endif
endif
ifndef DYNAMIC_CORE
@@ -562,6 +596,10 @@ ifeq ($(NO_LAPACKE), 1)
CCOMMON_OPT += -DNO_LAPACKE
endif
ifeq ($(NO_AVX), 1)
CCOMMON_OPT += -DNO_AVX
endif
ifdef SMP
CCOMMON_OPT += -DSMP_SERVER
@@ -687,11 +725,21 @@ AWK = awk
REVISION = -r$(VERSION)
MAJOR_VERSION = $(word 1,$(subst ., ,$(VERSION)))
CFLAGS = $(COMMON_OPT) $(CCOMMON_OPT) -I$(TOPDIR)
PFLAGS = $(COMMON_OPT) $(CCOMMON_OPT) -I$(TOPDIR) -DPROFILE $(COMMON_PROF)
ifeq ($(DEBUG), 1)
COMMON_OPT += -g
endif
FFLAGS = $(COMMON_OPT) $(FCOMMON_OPT)
FPFLAGS = $(COMMON_OPT) $(FCOMMON_OPT) $(COMMON_PROF)
ifndef COMMON_OPT
COMMON_OPT = -O2
endif
override CFLAGS += $(COMMON_OPT) $(CCOMMON_OPT) -I$(TOPDIR)
override PFLAGS += $(COMMON_OPT) $(CCOMMON_OPT) -I$(TOPDIR) -DPROFILE $(COMMON_PROF)
override FFLAGS += $(COMMON_OPT) $(FCOMMON_OPT)
override FPFLAGS += $(COMMON_OPT) $(FCOMMON_OPT) $(COMMON_PROF)
#MAKEOVERRIDES =
ifndef SUFFIX
SUFFIX = o
@@ -705,7 +753,7 @@ ifndef LIBSUFFIX
LIBSUFFIX = a
endif
ifndef DYNAMIC_ARCH
ifneq ($(DYNAMIC_ARCH), 1)
ifndef SMP
LIBNAME = $(LIBPREFIX)_$(LIBCORE)$(REVISION).$(LIBSUFFIX)
LIBNAME_P = $(LIBPREFIX)_$(LIBCORE)$(REVISION)_p.$(LIBSUFFIX)
@@ -724,8 +772,8 @@ endif
endif
LIBDLLNAME = $(LIBPREFIX).dll
LIBSONAME = $(LIBNAME:.$(LIBSUFFIX)=.so)
LIBDLLNAME = $(LIBNAME:.$(LIBSUFFIX)=.dll)
LIBDYNNAME = $(LIBNAME:.$(LIBSUFFIX)=.dylib)
LIBDEFNAME = $(LIBNAME:.$(LIBSUFFIX)=.def)
LIBEXPNAME = $(LIBNAME:.$(LIBSUFFIX)=.exp)

View File

@@ -22,19 +22,19 @@ BLASOBJS += $(QBLASOBJS) $(XBLASOBJS)
BLASOBJS_P += $(QBLASOBJS_P) $(XBLASOBJS_P)
endif
$(SBLASOBJS) $(SBLASOBJS_P) : CFLAGS += -UDOUBLE -UCOMPLEX
$(DBLASOBJS) $(DBLASOBJS_P) : CFLAGS += -DDOUBLE -UCOMPLEX
$(QBLASOBJS) $(QBLASOBJS_P) : CFLAGS += -DXDOUBLE -UCOMPLEX
$(CBLASOBJS) $(CBLASOBJS_P) : CFLAGS += -UDOUBLE -DCOMPLEX
$(ZBLASOBJS) $(ZBLASOBJS_P) : CFLAGS += -DDOUBLE -DCOMPLEX
$(XBLASOBJS) $(XBLASOBJS_P) : CFLAGS += -DXDOUBLE -DCOMPLEX
$(SBLASOBJS) $(SBLASOBJS_P) : override CFLAGS += -UDOUBLE -UCOMPLEX
$(DBLASOBJS) $(DBLASOBJS_P) : override CFLAGS += -DDOUBLE -UCOMPLEX
$(QBLASOBJS) $(QBLASOBJS_P) : override CFLAGS += -DXDOUBLE -UCOMPLEX
$(CBLASOBJS) $(CBLASOBJS_P) : override CFLAGS += -UDOUBLE -DCOMPLEX
$(ZBLASOBJS) $(ZBLASOBJS_P) : override CFLAGS += -DDOUBLE -DCOMPLEX
$(XBLASOBJS) $(XBLASOBJS_P) : override CFLAGS += -DXDOUBLE -DCOMPLEX
$(SBLASOBJS_P) : CFLAGS += -DPROFILE $(COMMON_PROF)
$(DBLASOBJS_P) : CFLAGS += -DPROFILE $(COMMON_PROF)
$(QBLASOBJS_P) : CFLAGS += -DPROFILE $(COMMON_PROF)
$(CBLASOBJS_P) : CFLAGS += -DPROFILE $(COMMON_PROF)
$(ZBLASOBJS_P) : CFLAGS += -DPROFILE $(COMMON_PROF)
$(XBLASOBJS_P) : CFLAGS += -DPROFILE $(COMMON_PROF)
$(SBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF)
$(DBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF)
$(QBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF)
$(CBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF)
$(ZBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF)
$(XBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF)
libs :: $(BLASOBJS) $(COMMONOBJS)
$(AR) $(ARFLAGS) -ru $(TOPDIR)/$(LIBNAME) $^

View File

@@ -79,7 +79,7 @@ If you compile this lib with USE_OPENMP=1, you should set OMP_NUM_THREADS enviro
### Set the number of threads on runtime.
We provided the below functions to controll the number of threads on runtime. So far, we didn't support changing the number of threads on Windows. On Windows, these functions are dummy.
We provided the below functions to controll the number of threads on runtime.
void goto_set_num_threads(int num_threads);
@@ -97,9 +97,11 @@ OpenBLAS users mailing list: http://list.rdcps.ac.cn/mailman/listinfo/openblas
Please see Changelog.txt to obtain the differences between GotoBLAS2 1.13 BSD version.
## Troubleshooting
* Please read [Faq](https://github.com/xianyi/OpenBLAS/wiki/Faq) at first.
* Please use gcc version 4.6 and above to compile Sandy Bridge AVX kernels on Linux/MingW/BSD.
* Please use Clang version 3.1 and above to compile the library on Sandy Bridge microarchitecture. The Clang 3.0 will generate the wrong AVX binary code.
* The number of CPUs/Cores should less than or equal to 256.
* On Linux, OpenBLAS sets the processor affinity by default. This may cause [the conflict with R parallel](https://stat.ethz.ch/pipermail/r-sig-hpc/2012-April/001348.html). You can build the library with NO_AFFINITY=1.
* On Loongson 3A. make test would be failed because of pthread_create error. The error code is EAGAIN. However, it will be OK when you run the same testcase on shell.
## Specification of Git Branches

View File

@@ -389,10 +389,12 @@ typedef int blasint;
#define OPENBLAS_COMPLEX_C99
typedef float _Complex openblas_complex_float;
typedef double _Complex openblas_complex_double;
typedef xdouble _Complex openblas_complex_xdouble;
#else
#define OPENBLAS_COMPLEX_STRUCT
typedef struct { float real, imag; } openblas_complex_float;
typedef struct { double real, imag; } openblas_complex_double;
typedef struct { xdouble real, imag; } openblas_complex_xdouble;
#endif
#endif // ASSEMBLER

View File

@@ -45,7 +45,7 @@ extern "C" {
int BLASFUNC(xerbla)(char *, blasint *info, blasint);
void BLASFUNC(openblas_set_num_threads)(int *);
void openblas_set_num_threads_(int *);
FLOATRET BLASFUNC(sdot) (blasint *, float *, blasint *, float *, blasint *);
FLOATRET BLASFUNC(sdsdot)(blasint *, float *, float *, blasint *, float *, blasint *);
@@ -76,19 +76,19 @@ myxcomplex_t BLASFUNC(xdotu) (blasint *, xdouble *, blasint *, xdouble *,
myxcomplex_t BLASFUNC(xdotc) (blasint *, xdouble *, blasint *, xdouble *, blasint *);
#elif defined RETURN_BY_STACK
void BLASFUNC(cdotu) (float _Complex *, blasint *, float * , blasint *, float *, blasint *);
void BLASFUNC(cdotc) (float _Complex *, blasint *, float *, blasint *, float *, blasint *);
void BLASFUNC(zdotu) (double _Complex *, blasint *, double *, blasint *, double *, blasint *);
void BLASFUNC(zdotc) (double _Complex *, blasint *, double *, blasint *, double *, blasint *);
void BLASFUNC(xdotu) (xdouble _Complex *, blasint *, xdouble *, blasint *, xdouble *, blasint *);
void BLASFUNC(xdotc) (xdouble _Complex *, blasint *, xdouble *, blasint *, xdouble *, blasint *);
void BLASFUNC(cdotu) (openblas_complex_float *, blasint *, float * , blasint *, float *, blasint *);
void BLASFUNC(cdotc) (openblas_complex_float *, blasint *, float *, blasint *, float *, blasint *);
void BLASFUNC(zdotu) (openblas_complex_double *, blasint *, double *, blasint *, double *, blasint *);
void BLASFUNC(zdotc) (openblas_complex_double *, blasint *, double *, blasint *, double *, blasint *);
void BLASFUNC(xdotu) (openblas_complex_xdouble *, blasint *, xdouble *, blasint *, xdouble *, blasint *);
void BLASFUNC(xdotc) (openblas_complex_xdouble *, blasint *, xdouble *, blasint *, xdouble *, blasint *);
#else
float _Complex BLASFUNC(cdotu) (blasint *, float *, blasint *, float *, blasint *);
float _Complex BLASFUNC(cdotc) (blasint *, float *, blasint *, float *, blasint *);
double _Complex BLASFUNC(zdotu) (blasint *, double *, blasint *, double *, blasint *);
double _Complex BLASFUNC(zdotc) (blasint *, double *, blasint *, double *, blasint *);
xdouble _Complex BLASFUNC(xdotu) (blasint *, xdouble *, blasint *, xdouble *, blasint *);
xdouble _Complex BLASFUNC(xdotc) (blasint *, xdouble *, blasint *, xdouble *, blasint *);
openblas_complex_float BLASFUNC(cdotu) (blasint *, float *, blasint *, float *, blasint *);
openblas_complex_float BLASFUNC(cdotc) (blasint *, float *, blasint *, float *, blasint *);
openblas_complex_double BLASFUNC(zdotu) (blasint *, double *, blasint *, double *, blasint *);
openblas_complex_double BLASFUNC(zdotc) (blasint *, double *, blasint *, double *, blasint *);
openblas_complex_xdouble BLASFUNC(xdotu) (blasint *, xdouble *, blasint *, xdouble *, blasint *);
openblas_complex_xdouble BLASFUNC(xdotc) (blasint *, xdouble *, blasint *, xdouble *, blasint *);
#endif
void BLASFUNC(saxpy) (blasint *, float *, float *, blasint *, float *, blasint *);
@@ -642,6 +642,8 @@ int BLASFUNC(zgemc)(char *, char *, blasint *, blasint *, blasint *, double *,
int BLASFUNC(xgemc)(char *, char *, blasint *, blasint *, blasint *, xdouble *,
xdouble *, blasint *, xdouble *, blasint *, xdouble *, blasint *, xdouble *, xdouble *, blasint *);
/* Lapack routines */
int BLASFUNC(sgetf2)(blasint *, blasint *, float *, blasint *, blasint *, blasint *);
int BLASFUNC(dgetf2)(blasint *, blasint *, double *, blasint *, blasint *, blasint *);
int BLASFUNC(qgetf2)(blasint *, blasint *, xdouble *, blasint *, blasint *, blasint *);
@@ -677,6 +679,13 @@ int BLASFUNC(cgesv)(blasint *, blasint *, float *, blasint *, blasint *, float
int BLASFUNC(zgesv)(blasint *, blasint *, double *, blasint *, blasint *, double*, blasint *, blasint *);
int BLASFUNC(xgesv)(blasint *, blasint *, xdouble *, blasint *, blasint *, xdouble*, blasint *, blasint *);
int BLASFUNC(sgesvd)(char *, char *, blasint *, blasint *, float *, blasint *, float *, float *, blasint *, float *, blasint *, float *, blasint *, blasint *);
int BLASFUNC(dgesvd)(char *, char *, blasint *, blasint *, double *, blasint *, double *, double *, blasint *, double *, blasint *, double *, blasint *, blasint *);
int BLASFUNC(qgesvd)(char *, char *, blasint *, blasint *, xdouble *, blasint *, xdouble *, xdouble *, blasint *, xdouble *, blasint *, xdouble *, blasint *, blasint *);
int BLASFUNC(cgesvd)(char *, char *, blasint *, blasint *, float *, blasint *, float *, float *, blasint *, float *, blasint *, float *, blasint *, blasint *);
int BLASFUNC(zgesvd)(char *, char *, blasint *, blasint *, double *, blasint *, double *, double *, blasint *, double *, blasint *, double *, blasint *, blasint *);
int BLASFUNC(xgesvd)(char *, char *, blasint *, blasint *, xdouble *, blasint *, xdouble *, xdouble *, blasint *, xdouble *, blasint *, xdouble *, blasint *, blasint *);
int BLASFUNC(spotf2)(char *, blasint *, float *, blasint *, blasint *);
int BLASFUNC(dpotf2)(char *, blasint *, double *, blasint *, blasint *);
int BLASFUNC(qpotf2)(char *, blasint *, xdouble *, blasint *, blasint *);
@@ -691,6 +700,13 @@ int BLASFUNC(cpotrf)(char *, blasint *, float *, blasint *, blasint *);
int BLASFUNC(zpotrf)(char *, blasint *, double *, blasint *, blasint *);
int BLASFUNC(xpotrf)(char *, blasint *, xdouble *, blasint *, blasint *);
int BLASFUNC(spotrs)(char *, blasint *, blasint *, float *, blasint *, float *, blasint *, blasint *);
int BLASFUNC(dpotrs)(char *, blasint *, blasint *, double *, blasint *, double *, blasint *, blasint *);
int BLASFUNC(qpotrs)(char *, blasint *, blasint *, xdouble *, blasint *, xdouble *, blasint *, blasint *);
int BLASFUNC(cpotrs)(char *, blasint *, blasint *, float *, blasint *, float *, blasint *, blasint *);
int BLASFUNC(zpotrs)(char *, blasint *, blasint *, double *, blasint *, double *, blasint *, blasint *);
int BLASFUNC(xpotrs)(char *, blasint *, blasint *, xdouble *, blasint *, xdouble *, blasint *, blasint *);
int BLASFUNC(slauu2)(char *, blasint *, float *, blasint *, blasint *);
int BLASFUNC(dlauu2)(char *, blasint *, double *, blasint *, blasint *);
int BLASFUNC(qlauu2)(char *, blasint *, xdouble *, blasint *, blasint *);

View File

@@ -1,5 +1,5 @@
/*****************************************************************************
Copyright (c) 2011, Lab of Parallel Software and Computational Science,ICSAS
Copyright (c) 2011,2012 Lab of Parallel Software and Computational Science,ISCAS
All rights reserved.
Redistribution and use in source and binary forms, with or without
@@ -101,12 +101,14 @@ int detect(void){
fclose(infile);
if(p != NULL){
if (strstr(p, "Loongson-3A")){
return CPU_LOONGSON3A;
}else if(strstr(p, "Loongson-3B")){
return CPU_LOONGSON3B;
}else if (strstr(p, "Loongson-3")){
infile = fopen("/proc/cpuinfo", "r");
p = (char *)NULL;
while (fgets(buffer, sizeof(buffer), infile)){
if (!strncmp("system type", buffer, 11)){
p = strchr(buffer, ':') + 2;
@@ -119,6 +121,24 @@ int detect(void){
}else{
return CPU_SICORTEX;
}
}
//Check model name for Loongson3
infile = fopen("/proc/cpuinfo", "r");
p = (char *)NULL;
while (fgets(buffer, sizeof(buffer), infile)){
if (!strncmp("model name", buffer, 10)){
p = strchr(buffer, ':') + 2;
break;
}
}
fclose(infile);
if(p != NULL){
if (strstr(p, "Loongson-3A")){
return CPU_LOONGSON3A;
}else if(strstr(p, "Loongson-3B")){
return CPU_LOONGSON3B;
}
}
#endif
return CPU_UNKNOWN;
}

View File

@@ -40,6 +40,11 @@
#include <string.h>
#include "cpuid.h"
#ifdef NO_AVX
#define CPUTYPE_SANDYBRIDGE CPUTYPE_NEHALEM
#define CORE_SANDYBRIDGE CORE_NEHALEM
#endif
#ifndef CPUIDEMU
#if defined(__APPLE__) && defined(__i386__)
@@ -109,6 +114,32 @@ static inline int have_excpuid(void){
return eax & 0xffff;
}
#ifndef NO_AVX
static inline void xgetbv(int op, int * eax, int * edx){
__asm__ __volatile__
("xgetbv": "=a" (*eax), "=d" (*edx) : "c" (op) : "cc");
}
#endif
int support_avx(){
#ifndef NO_AVX
int eax, ebx, ecx, edx;
int ret=0;
cpuid(1, &eax, &ebx, &ecx, &edx);
if ((ecx & (1 << 28)) != 0 && (ecx & (1 << 27)) != 0){
xgetbv(0, &eax, &edx);
if((eax & 6) == 6){
ret=1; //OS support AVX
}
}
return ret;
#else
return 0;
#endif
}
int get_vendor(void){
int eax, ebx, ecx, edx;
char vendor[13];
@@ -189,7 +220,9 @@ int get_cputype(int gettype){
if ((ecx & (1 << 9)) != 0) feature |= HAVE_SSSE3;
if ((ecx & (1 << 19)) != 0) feature |= HAVE_SSE4_1;
if ((ecx & (1 << 20)) != 0) feature |= HAVE_SSE4_2;
if ((ecx & (1 << 28)) != 0) feature |= HAVE_AVX;
#ifndef NO_AVX
if (support_avx()) feature |= HAVE_AVX;
#endif
if (have_excpuid() >= 0x01) {
cpuid(0x80000001, &eax, &ebx, &ecx, &edx);
@@ -984,13 +1017,19 @@ int get_cpuname(void){
return CPUTYPE_NEHALEM;
case 10:
//Intel Core i5-2000 /i7-2000 (Sandy Bridge)
return CPUTYPE_SANDYBRIDGE;
if(support_avx())
return CPUTYPE_SANDYBRIDGE;
else
return CPUTYPE_NEHALEM; //OS doesn't support AVX
case 12:
//Xeon Processor 5600 (Westmere-EP)
return CPUTYPE_NEHALEM;
case 13:
//Intel Core i7-3000 / Xeon E5 (Sandy Bridge)
return CPUTYPE_SANDYBRIDGE;
if(support_avx())
return CPUTYPE_SANDYBRIDGE;
else
return CPUTYPE_NEHALEM;
case 15:
//Xeon Processor E7 (Westmere-EX)
return CPUTYPE_NEHALEM;
@@ -999,7 +1038,10 @@ int get_cpuname(void){
case 3:
switch (model) {
case 10:
return CPUTYPE_SANDYBRIDGE;
if(support_avx())
return CPUTYPE_SANDYBRIDGE;
else
return CPUTYPE_NEHALEM;
}
break;
}
@@ -1343,13 +1385,19 @@ int get_coretype(void){
return CORE_NEHALEM;
case 10:
//Intel Core i5-2000 /i7-2000 (Sandy Bridge)
return CORE_SANDYBRIDGE;
if(support_avx())
return CORE_SANDYBRIDGE;
else
return CORE_NEHALEM; //OS doesn't support AVX
case 12:
//Xeon Processor 5600 (Westmere-EP)
return CORE_NEHALEM;
case 13:
//Intel Core i7-3000 / Xeon E5 (Sandy Bridge)
return CORE_SANDYBRIDGE;
if(support_avx())
return CORE_SANDYBRIDGE;
else
return CORE_NEHALEM; //OS doesn't support AVX
case 15:
//Xeon Processor E7 (Westmere-EX)
return CORE_NEHALEM;
@@ -1358,7 +1406,10 @@ int get_coretype(void){
case 3:
switch (model) {
case 10:
return CORE_SANDYBRIDGE;
if(support_avx())
return CORE_SANDYBRIDGE;
else
return CORE_NEHALEM; //OS doesn't support AVX
}
break;
}

View File

@@ -5,7 +5,7 @@
TOPDIR = ..
include $(TOPDIR)/Makefile.system
CFLAGS += -DADD$(BU) -DCBLAS
override CFLAGS += -DADD$(BU) -DCBLAS
LIB = $(TOPDIR)/$(LIBNAME)

View File

@@ -14,7 +14,7 @@ endif
# COMMONOBJS += info.$(SUFFIX)
ifdef DYNAMIC_ARCH
ifeq ($(DYNAMIC_ARCH), 1)
COMMONOBJS += dynamic.$(SUFFIX)
else
COMMONOBJS += parameter.$(SUFFIX)
@@ -70,7 +70,7 @@ ifndef BLAS_SERVER
BLAS_SERVER = blas_server.c
endif
ifdef DYNAMIC_ARCH
ifeq ($(DYNAMIC_ARCH), 1)
HPLOBJS = memory.$(SUFFIX) xerbla.$(SUFFIX) dynamic.$(SUFFIX)
else
HPLOBJS = memory.$(SUFFIX) xerbla.$(SUFFIX) parameter.$(SUFFIX)
@@ -215,7 +215,7 @@ info.$(SUFFIX) : info.c info.h ../../common.h ../../param.h
$(CC) $(CFLAGS) -c $< -o $(@F)
hpl : CFLAGS += -DHPL
hpl_p : CFLAGS += -DHPL
hpl : override CFLAGS += -DHPL
hpl_p : override CFLAGS += -DHPL
include $(TOPDIR)/Makefile.tail

View File

@@ -435,7 +435,7 @@ static int blas_thread_server(void *arg){
blas_memory_free(buffer);
pthread_exit(NULL);
//pthread_exit(NULL);
return 0;
}

View File

@@ -63,13 +63,7 @@ static blas_pool_t pool;
static HANDLE blas_threads [MAX_CPU_NUMBER];
static DWORD blas_threads_id[MAX_CPU_NUMBER];
void goto_set_num_threads(int num)
{
}
void openblas_set_num_threads(int num)
{
}
static void legacy_exec(void *func, int mode, blas_arg_t *args, void *sb){
@@ -187,7 +181,7 @@ static DWORD WINAPI blas_thread_server(void *arg){
do {
action = WaitForMultipleObjects(2, handles, FALSE, INFINITE);
} while ((action != WAIT_OBJECT_0) && (action == WAIT_OBJECT_0 + 1));
} while ((action != WAIT_OBJECT_0) && (action != WAIT_OBJECT_0 + 1));
if (action == WAIT_OBJECT_0 + 1) break;
@@ -271,7 +265,9 @@ static DWORD WINAPI blas_thread_server(void *arg){
} else {
legacy_exec(routine, queue -> mode, queue -> args, sb);
}
}
}else{
continue; //if queue == NULL
}
#ifdef SMP_DEBUG
fprintf(STDERR, "Server[%2ld] Finished!\n", cpu);
@@ -433,7 +429,7 @@ int exec_blas(BLASLONG num, blas_queue_t *queue){
/* Shutdown procedure, but user don't have to call this routine. The */
/* kernel automatically kill threads. */
int blas_thread_shutdown_(void){
int BLASFUNC(blas_thread_shutdown)(void){
int i;
@@ -445,7 +441,7 @@ int blas_thread_shutdown_(void){
SetEvent(pool.killed);
for(i = 0; i < blas_cpu_number - 1; i++){
for(i = 0; i < blas_num_threads - 1; i++){
WaitForSingleObject(blas_threads[i], INFINITE);
}
@@ -456,3 +452,47 @@ int blas_thread_shutdown_(void){
return 0;
}
void goto_set_num_threads(int num_threads)
{
long i;
if (num_threads < 1) num_threads = blas_cpu_number;
if (num_threads > MAX_CPU_NUMBER) num_threads = MAX_CPU_NUMBER;
if (num_threads > blas_num_threads) {
LOCK_COMMAND(&server_lock);
//increased_threads = 1;
if (!blas_server_avail){
InitializeCriticalSection(&pool.lock);
pool.filled = CreateEvent(NULL, FALSE, FALSE, NULL);
pool.killed = CreateEvent(NULL, TRUE, FALSE, NULL);
pool.shutdown = 0;
pool.queue = NULL;
blas_server_avail = 1;
}
for(i = blas_num_threads - 1; i < num_threads - 1; i++){
blas_threads[i] = CreateThread(NULL, 0,
blas_thread_server, (void *)i,
0, &blas_threads_id[i]);
}
blas_num_threads = num_threads;
UNLOCK_COMMAND(&server_lock);
}
blas_cpu_number = num_threads;
}
void openblas_set_num_threads(int num)
{
goto_set_num_threads(num);
}

View File

@@ -60,6 +60,14 @@ extern gotoblas_t gotoblas_NEHALEM;
extern gotoblas_t gotoblas_OPTERON;
extern gotoblas_t gotoblas_OPTERON_SSE3;
extern gotoblas_t gotoblas_BARCELONA;
extern gotoblas_t gotoblas_BOBCAT;
#ifndef NO_AVX
extern gotoblas_t gotoblas_SANDYBRIDGE;
#else
//Use NEHALEM kernels for sandy bridge
#define gotoblas_SANDYBRIDGE gotoblas_NEHALEM
#endif
#define VENDOR_INTEL 1
#define VENDOR_AMD 2
@@ -68,6 +76,31 @@ extern gotoblas_t gotoblas_BARCELONA;
#define BITMASK(a, b, c) ((((a) >> (b)) & (c)))
#ifndef NO_AVX
static inline void xgetbv(int op, int * eax, int * edx){
__asm__ __volatile__
("xgetbv": "=a" (*eax), "=d" (*edx) : "c" (op) : "cc");
}
#endif
int support_avx(){
#ifndef NO_AVX
int eax, ebx, ecx, edx;
int ret=0;
cpuid(1, &eax, &ebx, &ecx, &edx);
if ((ecx & (1 << 28)) != 0 && (ecx & (1 << 27)) != 0){
xgetbv(0, &eax, &edx);
if((eax & 6) == 6){
ret=1; //OS support AVX
}
}
return ret;
#else
return 0;
#endif
}
static int get_vendor(void){
int eax, ebx, ecx, edx;
char vendor[13];
@@ -122,15 +155,38 @@ static gotoblas_t *get_coretype(void){
if (model == 12) return &gotoblas_ATOM;
return NULL;
case 2:
//Intel Core (Clarkdale) / Core (Arrandale)
// Pentium (Clarkdale) / Pentium Mobile (Arrandale)
// Xeon (Clarkdale), 32nm
if (model == 5) return &gotoblas_NEHALEM;
case 2:
//Intel Core (Clarkdale) / Core (Arrandale)
// Pentium (Clarkdale) / Pentium Mobile (Arrandale)
// Xeon (Clarkdale), 32nm
if (model == 5) return &gotoblas_NEHALEM;
//Intel Xeon Processor 5600 (Westmere-EP)
if (model == 12) return &gotoblas_NEHALEM;
return NULL;
//Intel Xeon Processor 5600 (Westmere-EP)
//Xeon Processor E7 (Westmere-EX)
if (model == 12 || model == 15) return &gotoblas_NEHALEM;
//Intel Core i5-2000 /i7-2000 (Sandy Bridge)
//Intel Core i7-3000 / Xeon E5
if (model == 10 || model == 13) {
if(support_avx())
return &gotoblas_SANDYBRIDGE;
else{
fprintf(stderr, "OpenBLAS : Your OS doesn't support AVX. Use Nehalem kernels.\n");
return &gotoblas_NEHALEM; //OS doesn't support AVX. Use old kernels.
}
}
return NULL;
case 3:
//Intel Sandy Bridge 22nm (Ivy Bridge?)
if (model == 10) {
if(support_avx())
return &gotoblas_SANDYBRIDGE;
else{
fprintf(stderr, "OpenBLAS : Your OS doesn't support AVX. Use Nehalem kernels.\n");
return &gotoblas_NEHALEM; //OS doesn't support AVX. Use old kernels.
}
}
return NULL;
}
case 0xf:
if (model <= 0x2) return &gotoblas_NORTHWOOD;
@@ -144,7 +200,9 @@ static gotoblas_t *get_coretype(void){
if ((exfamily == 0) || (exfamily == 2)) {
if (ecx & (1 << 0)) return &gotoblas_OPTERON_SSE3;
else return &gotoblas_OPTERON;
} else {
} else if (exfamily == 5) {
return &gotoblas_BOBCAT;
} else {
return &gotoblas_BARCELONA;
}
}
@@ -178,6 +236,8 @@ static char *corename[] = {
"Opteron(SSE3)",
"Barcelona",
"Nano",
"Sandybridge",
"Bobcat",
};
char *gotoblas_corename(void) {
@@ -197,7 +257,9 @@ char *gotoblas_corename(void) {
if (gotoblas == &gotoblas_OPTERON) return corename[13];
if (gotoblas == &gotoblas_BARCELONA) return corename[14];
if (gotoblas == &gotoblas_NANO) return corename[15];
if (gotoblas == &gotoblas_SANDYBRIDGE) return corename[16];
if (gotoblas == &gotoblas_BOBCAT) return corename[17];
return corename[0];
}
@@ -216,7 +278,7 @@ void gotoblas_dynamic_init(void) {
if (gotoblas && gotoblas -> init) {
gotoblas -> init();
} else {
fprintf(stderr, "GotoBLAS : Architecture Initialization failed. No initialization function found.\n");
fprintf(stderr, "OpenBLAS : Architecture Initialization failed. No initialization function found.\n");
exit(1);
}

View File

@@ -1,5 +1,5 @@
/*****************************************************************************
Copyright (c) 2011, Lab of Parallel Software and Computational Science,ICSAS
Copyright (c) 2011,2012 Lab of Parallel Software and Computational Science,ISCAS
All rights reserved.
Redistribution and use in source and binary forms, with or without
@@ -206,7 +206,15 @@ int get_num_procs(void) {
#endif
/*
OpenBLAS uses the numbers of CPU cores in multithreading.
It can be set by openblas_set_num_threads(int num_threads);
*/
int blas_cpu_number = 0;
/*
The numbers of threads in the thread pool.
This value is equal or large than blas_cpu_number. This means some threads are sleep.
*/
int blas_num_threads = 0;
int goto_get_num_procs (void) {
@@ -1289,6 +1297,7 @@ void DESTRUCTOR gotoblas_quit(void) {
moncontrol (1);
#endif
blas_shutdown();
}
#if (defined(C_PGI) || (!defined(C_SUN) && defined(F_INTERFACE_SUN))) && (defined(ARCH_X86) || defined(ARCH_X86_64))

View File

@@ -36,7 +36,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
extern void openblas_set_num_threads(int num_threads) ;
void NAME(int* num_threads){
void openblas_set_num_threads_(int* num_threads){
openblas_set_num_threads(*num_threads);
}
@@ -46,7 +46,7 @@ void NAME(int* num_threads){
void openblas_set_num_threads(int num_threads) {
}
void NAME(int* num_threads){
void openblas_set_num_threads_(int* num_threads){
}
#endif

View File

@@ -66,6 +66,11 @@ dll : ../$(LIBDLLNAME)
dll2 : libgoto2_shared.dll
# On Windows, we only generate a DLL without a version suffix. This is because
# applications which link against the dynamic library reference a fixed DLL name
# in their import table. By instead using a stable name it is possible to
# upgrade between library versions, without needing to re-link an application.
# For more details see: https://github.com/xianyi/OpenBLAS/issues/127.
../$(LIBDLLNAME) : ../$(LIBNAME) libopenblas.def dllinit.$(SUFFIX)
$(RANLIB) ../$(LIBNAME)
ifeq ($(BINARY32), 1)

View File

@@ -211,6 +211,10 @@ if (!$?) {
if ($?) {
$link = `$compiler $openmp -q32 -v ftest2.f 2>&1 && rm -f a.out a.exe`;
}
#For gfortran MIPS
if ($?) {
$link = `$compiler $openmp -mabi=n32 -v ftest2.f 2>&1 && rm -f a.out a.exe`;
}
$binary = "" if ($?);
}
@@ -219,6 +223,10 @@ if (!$?) {
if ($?) {
$link = `$compiler $openmp -q64 -v ftest2.f 2>&1 && rm -f a.out a.exe`;
}
#For gfortran MIPS
if ($?) {
$link = `$compiler $openmp -mabi=64 -v ftest2.f 2>&1 && rm -f a.out a.exe`;
}
$binary = "" if ($?);
}

View File

@@ -318,7 +318,7 @@ CZBLAS3OBJS = \
ifndef NO_CBLAS
CFLAGS += -I.
override CFLAGS += -I.
SBLAS1OBJS += $(CSBLAS1OBJS)
SBLAS2OBJS += $(CSBLAS2OBJS)
@@ -400,7 +400,7 @@ all :: libs
ifdef FUNCTION_PROFILE
$(BLASOBJS) $(BLASOBJS_P) : functable.h
$(BLASOBJS) $(BLASOBJS_P) : CFLAGS += -DPROFILE_FUNC_NAME=interface_$(*F)
$(BLASOBJS) $(BLASOBJS_P) : override CFLAGS += -DPROFILE_FUNC_NAME=interface_$(*F)
functable.h : Makefile
./create $(FUNCALLFILES) > functable.h
@@ -420,7 +420,7 @@ level3 : $(SBLAS3OBJS) $(DBLAS3OBJS) $(QBLAS3OBJS) $(CBLAS3OBJS) $(ZBLAS3OBJS) $
$(AR) $(ARFLAGS) -ru $(TOPDIR)/$(LIBNAME) $^
$(CSBLASOBJS) $(CSBLASOBJS_P) $(CDBLASOBJS) $(CDBLASOBJS_P) $(CQBLASOBJS) $(CQBLASOBJS_P) \
$(CCBLASOBJS) $(CCBLASOBJS_P) $(CZBLASOBJS) $(CZBLASOBJS_P) $(CXBLASOBJS) $(CXBLASOBJS_P) : CFLAGS += -DCBLAS
$(CCBLASOBJS) $(CCBLASOBJS_P) $(CZBLASOBJS) $(CZBLASOBJS_P) $(CXBLASOBJS) $(CXBLASOBJS_P) : override CFLAGS += -DCBLAS
srot.$(SUFFIX) srot.$(PSUFFIX) : rot.c
$(CC) $(CFLAGS) -c $< -o $(@F)

View File

@@ -6,7 +6,7 @@ TOPDIR = ..
include $(TOPDIR)/Makefile.system
ifdef TARGET_CORE
CFLAGS += -DBUILD_KERNEL -DTABLE_NAME=gotoblas_$(TARGET_CORE)
override CFLAGS += -DBUILD_KERNEL -DTABLE_NAME=gotoblas_$(TARGET_CORE)
BUILD_KERNEL = 1
KDIR =
TSUFFIX = _$(TARGET_CORE)
@@ -48,7 +48,7 @@ HPLOBJS = \
COMMONOBJS += lsame.$(SUFFIX) scabs1.$(SUFFIX) dcabs1.$(SUFFIX)
ifdef DYNAMIC_ARCH
ifeq ($(DYNAMIC_ARCH), 1)
SBLASOBJS += setparam$(TSUFFIX).$(SUFFIX)
CCOMMON_OPT += -DTS=$(TSUFFIX)
endif

View File

@@ -1541,6 +1541,16 @@
popl %ebx
popl %esi
popl %edi
/*remove the hidden return value address from the stack.*/
#if defined(OS_WINNT) || defined(OS_CYGWIN_NT) || defined(OS_INTERIX)
#ifdef MS_ABI
/* For MingW GCC >= 4.7. It is compatible with MSVC ABI. http://gcc.gnu.org/bugzilla/show_bug.cgi?id=36834 */
ret
#else
/* remove the hidden return value address from the stack. For MingW GCC < 4.7 */
ret $0x4
#endif
#else
/*remove the hidden return value address from the stack on Linux.*/
ret $0x4
#endif
EPILOGUE

View File

@@ -118,7 +118,7 @@ static void inner_basic_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *ra
min_jj = js + min_j - jjs;
if (min_jj > GEMM_UNROLL_N) min_jj = GEMM_UNROLL_N;
if (GEMM_UNROLL_N <= 8) {
if (0 && GEMM_UNROLL_N <= 8) {
LASWP_NCOPY(min_jj, off + 1, off + k,
c + (- off + jjs * lda) * COMPSIZE, lda,
@@ -245,7 +245,8 @@ static int inner_advanced_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *
min_jj = MIN(n_to, xxx + div_n) - jjs;
if (min_jj > GEMM_UNROLL_N) min_jj = GEMM_UNROLL_N;
if (GEMM_UNROLL_N <= 8) {
if (0 && GEMM_UNROLL_N <= 8) {
printf("helllo\n");
LASWP_NCOPY(min_jj, off + 1, off + k,
b + (- off + jjs * lda) * COMPSIZE, lda,

View File

@@ -77,10 +77,21 @@ static void inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
min_jj = js + min_j - jjs;
if (min_jj > GEMM_UNROLL_N) min_jj = GEMM_UNROLL_N;
#if 0
LASWP_NCOPY(min_jj, off + 1, off + k,
c + (- off + jjs * lda) * COMPSIZE, lda,
ipiv, sb + k * (jjs - js) * COMPSIZE);
#else
LASWP_PLUS(min_jj, off + 1, off + k, ZERO,
#ifdef COMPLEX
ZERO,
#endif
c + (- off + jjs * lda) * COMPSIZE, lda, NULL, 0, ipiv, 1);
GEMM_ONCOPY (k, min_jj, c + jjs * lda * COMPSIZE, lda, sb + (jjs - js) * k * COMPSIZE);
#endif
for (is = 0; is < k; is += GEMM_P) {
min_i = k - is;
if (min_i > GEMM_P) min_i = GEMM_P;

View File

@@ -113,7 +113,7 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
min_jj = js + jmin - jjs;
if (min_jj > GEMM_UNROLL_N) min_jj = GEMM_UNROLL_N;
#if 0
#if 1
LASWP_PLUS(min_jj, j + offset + 1, j + jb + offset, ZERO,
#ifdef COMPLEX
ZERO,

View File

@@ -48,7 +48,7 @@
int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT *a, BLASLONG lda,
FLOAT *dummy2, BLASLONG dumy3, blasint *ipiv, BLASLONG incx){
BLASLONG i, j, ip1, ip2;
BLASLONG i, j, ip1, ip2, rows;
blasint *piv;
FLOAT *a1;
FLOAT *b1, *b2;
@@ -58,13 +58,34 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT *a, BLASLONG
k1 --;
#ifndef MINUS
ipiv += k1
;
ipiv += k1;
#else
ipiv -= (k2 - 1) * incx;
#endif
if (n <= 0) return 0;
rows = k2-k1;
if (rows <=0) return 0;
if (rows == 1) {
//Only have 1 row
ip1 = *ipiv;
a1 = a + k1 + 1;
b1 = a + ip1;
if(a1 == b1) return 0;
for(j=0; j<n; j++){
A1 = *a1;
B1 = *b1;
*a1 = B1;
*b1 = A1;
a1 += lda;
b1 += lda;
}
return 0;
}
j = n;
if (j > 0) {
@@ -85,10 +106,11 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT *a, BLASLONG
b1 = a + ip1;
b2 = a + ip2;
i = ((k2 - k1) >> 1);
if (i > 0) {
do {
i = (rows >> 1);
i--;
//Main Loop
while (i > 0) {
#ifdef OPTERON
#ifndef MINUS
asm volatile("prefetchw 2 * 128(%0)\n" : : "r"(a1));
@@ -172,12 +194,69 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT *a, BLASLONG
a1 -= 2;
#endif
i --;
} while (i > 0);
}
//Loop Ending
A1 = *a1;
A2 = *a2;
B1 = *b1;
B2 = *b2;
if (b1 == a1) {
if (b2 == a1) {
*a1 = A2;
*a2 = A1;
} else
if (b2 != a2) {
*a2 = B2;
*b2 = A2;
}
} else
if (b1 == a2) {
if (b2 != a1) {
if (b2 == a2) {
*a1 = A2;
*a2 = A1;
} else {
*a1 = A2;
*a2 = B2;
*b2 = A1;
}
}
} else {
if (b2 == a1) {
*a1 = A2;
*a2 = B1;
*b1 = A1;
} else
if (b2 == a2) {
*a1 = B1;
*b1 = A1;
} else
if (b2 == b1) {
*a1 = B1;
*a2 = A1;
*b1 = A2;
} else {
*a1 = B1;
*a2 = B2;
*b1 = A1;
*b2 = A2;
}
}
#ifndef MINUS
a1 += 2;
#else
a1 -= 2;
#endif
i = ((k2 - k1) & 1);
//Remain
i = (rows & 1);
if (i > 0) {
ip1 = *piv;
b1 = a + ip1;
A1 = *a1;
B1 = *b1;
*a1 = B1;

View File

@@ -50,7 +50,7 @@
int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT *a, BLASLONG lda,
FLOAT *dummy2, BLASLONG dumy3, blasint *ipiv, BLASLONG incx){
BLASLONG i, j, ip1, ip2;
BLASLONG i, j, ip1, ip2, rows;
blasint *piv;
FLOAT *a1, *a3;
FLOAT *b1, *b2, *b3, *b4;
@@ -60,8 +60,7 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT *a, BLASLONG
k1 --;
#ifndef MINUS
ipiv += k1
;
ipiv += k1;
#else
ipiv -= (k2 - 1) * incx;
#endif
@@ -69,6 +68,28 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT *a, BLASLONG
if (n <= 0) return 0;
j = (n >> 1);
rows = k2-k1;
if (rows <=0) return 0;
if (rows == 1) {
//Only have 1 row
ip1 = *ipiv;
a1 = a + k1 + 1;
b1 = a + ip1;
if(a1 == b1) return 0;
for(j=0; j<n; j++){
A1 = *a1;
B1 = *b1;
*a1 = B1;
*b1 = A1;
a1 += lda;
b1 += lda;
}
return 0;
}
if (j > 0) {
do {
piv = ipiv;
@@ -92,10 +113,13 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT *a, BLASLONG
b3 = b1 + 1 * lda;
b4 = b2 + 1 * lda;
i = ((k2 - k1) >> 1);
i = ((rows) >> 1);
if (i > 0) {
do {
// Loop pipeline
i--;
//Main Loop
while (i > 0) {
#ifdef CORE2
#ifndef MINUS
asm volatile("prefetcht0 1 * 64(%0)\n" : : "r"(b1));
@@ -202,12 +226,99 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT *a, BLASLONG
a3 -= 2;
#endif
i --;
} while (i > 0);
}
i = ((k2 - k1) & 1);
//Loop Ending
B1 = *b1;
B2 = *b2;
B3 = *b3;
B4 = *b4;
A1 = *a1;
A2 = *a2;
A3 = *a3;
A4 = *a4;
if (b1 == a1) {
if (b2 == a1) {
*a1 = A2;
*a2 = A1;
*a3 = A4;
*a4 = A3;
} else
if (b2 != a2) {
*a2 = B2;
*b2 = A2;
*a4 = B4;
*b4 = A4;
}
} else
if (b1 == a2) {
if (b2 != a1) {
if (b2 == a2) {
*a1 = A2;
*a2 = A1;
*a3 = A4;
*a4 = A3;
} else {
*a1 = A2;
*a2 = B2;
*b2 = A1;
*a3 = A4;
*a4 = B4;
*b4 = A3;
}
}
} else {
if (b2 == a1) {
*a1 = A2;
*a2 = B1;
*b1 = A1;
*a3 = A4;
*a4 = B3;
*b3 = A3;
} else
if (b2 == a2) {
*a1 = B1;
*b1 = A1;
*a3 = B3;
*b3 = A3;
} else
if (b2 == b1) {
*a1 = B1;
*a2 = A1;
*b1 = A2;
*a3 = B3;
*a4 = A3;
*b3 = A4;
} else {
*a1 = B1;
*a2 = B2;
*b1 = A1;
*b2 = A2;
*a3 = B3;
*a4 = B4;
*b3 = A3;
*b4 = A4;
}
}
#ifndef MINUS
a1 += 2;
a3 += 2;
#else
a1 -= 2;
a3 -= 2;
#endif
//Remain
i = ((rows) & 1);
if (i > 0) {
ip1 = *piv;
b1 = a + ip1;
b3 = b1 + 1 * lda;
A1 = *a1;
B1 = *b1;
A3 = *a3;
@@ -240,78 +351,135 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT *a, BLASLONG
b1 = a + ip1;
b2 = a + ip2;
i = ((k2 - k1) >> 1);
if (i > 0) {
do {
A1 = *a1;
A2 = *a2;
B1 = *b1;
B2 = *b2;
i = ((rows) >> 1);
i --;
while (i > 0) {
A1 = *a1;
A2 = *a2;
B1 = *b1;
B2 = *b2;
ip1 = *piv;
piv += incx;
ip2 = *piv;
piv += incx;
ip1 = *piv;
piv += incx;
ip2 = *piv;
piv += incx;
if (b1 == a1) {
if (b1 == a1) {
if (b2 == a1) {
*a1 = A2;
*a2 = A1;
} else
if (b2 != a2) {
*a2 = B2;
*b2 = A2;
}
} else
if (b1 == a2) {
if (b2 != a1) {
if (b2 == a2) {
*a1 = A2;
*a2 = A1;
} else {
*a1 = A2;
*a2 = B2;
*b2 = A1;
}
}
} else {
if (b2 == a1) {
*a1 = A2;
*a2 = A1;
*a2 = B1;
*b1 = A1;
} else
if (b2 != a2) {
*a2 = B2;
*b2 = A2;
}
} else
if (b1 == a2) {
if (b2 != a1) {
if (b2 == a2) {
*a1 = A2;
*a2 = A1;
} else {
*a1 = A2;
*a2 = B2;
*b2 = A1;
}
}
} else {
if (b2 == a1) {
*a1 = A2;
*a2 = B1;
if (b2 == a2) {
*a1 = B1;
*b1 = A1;
} else
if (b2 == a2) {
if (b2 == b1) {
*a1 = B1;
*a2 = A1;
*b1 = A2;
} else {
*a1 = B1;
*a2 = B2;
*b1 = A1;
} else
if (b2 == b1) {
*a1 = B1;
*a2 = A1;
*b1 = A2;
} else {
*a1 = B1;
*a2 = B2;
*b1 = A1;
*b2 = A2;
}
}
*b2 = A2;
}
}
b1 = a + ip1;
b2 = a + ip2;
b1 = a + ip1;
b2 = a + ip2;
#ifndef MINUS
a1 += 2;
a1 += 2;
#else
a1 -= 2;
a1 -= 2;
#endif
i --;
} while (i > 0);
i --;
}
i = ((k2 - k1) & 1);
//Loop Ending (n=1)
A1 = *a1;
A2 = *a2;
B1 = *b1;
B2 = *b2;
if (b1 == a1) {
if (b2 == a1) {
*a1 = A2;
*a2 = A1;
} else
if (b2 != a2) {
*a2 = B2;
*b2 = A2;
}
} else
if (b1 == a2) {
if (b2 != a1) {
if (b2 == a2) {
*a1 = A2;
*a2 = A1;
} else {
*a1 = A2;
*a2 = B2;
*b2 = A1;
}
}
} else {
if (b2 == a1) {
*a1 = A2;
*a2 = B1;
*b1 = A1;
} else
if (b2 == a2) {
*a1 = B1;
*b1 = A1;
} else
if (b2 == b1) {
*a1 = B1;
*a2 = A1;
*b1 = A2;
} else {
*a1 = B1;
*a2 = B2;
*b1 = A1;
*b2 = A2;
}
}
#ifndef MINUS
a1 += 2;
#else
a1 -= 2;
#endif
//Remain
i = (rows & 1);
if (i > 0) {
ip1 = *piv;
b1 = a + ip1;
A1 = *a1;
B1 = *b1;
*a1 = B1;

View File

@@ -54,7 +54,7 @@
int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT *a, BLASLONG lda,
FLOAT *dummy2, BLASLONG dumy3, blasint *ipiv, BLASLONG incx){
BLASLONG i, j, ip1, ip2;
BLASLONG i, j, ip1, ip2, rows;
blasint *piv;
FLOAT *a1, *a3, *a5, *a7;
FLOAT *b1, *b2, *b3, *b4;
@@ -66,14 +66,35 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT *a, BLASLONG
k1 --;
#ifndef MINUS
ipiv += k1
;
ipiv += k1;
#else
ipiv -= (k2 - 1) * incx;
#endif
if (n <= 0) return 0;
rows = k2-k1;
if (rows <=0) return 0;
if (rows == 1) {
//Only have 1 row
ip1 = *ipiv;
a1 = a + k1 + 1;
b1 = a + ip1;
if(a1 == b1) return 0;
for(j=0; j<n; j++){
A1 = *a1;
B1 = *b1;
*a1 = B1;
*b1 = A1;
a1 += lda;
b1 += lda;
}
return 0;
}
j = (n >> 2);
if (j > 0) {
do {
@@ -106,8 +127,9 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT *a, BLASLONG
i = ((k2 - k1) >> 1);
if (i > 0) {
do {
i--; //Loop pipeline
//Main Loop
while (i > 0) {
A1 = *a1;
A2 = *a2;
A3 = *a3;
@@ -259,12 +281,156 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT *a, BLASLONG
a7 -= 2;
#endif
i --;
} while (i > 0);
}
i = ((k2 - k1) & 1);
//Loop Ending
A1 = *a1;
A2 = *a2;
A3 = *a3;
A4 = *a4;
A5 = *a5;
A6 = *a6;
A7 = *a7;
A8 = *a8;
B1 = *b1;
B2 = *b2;
B3 = *b3;
B4 = *b4;
B5 = *b5;
B6 = *b6;
B7 = *b7;
B8 = *b8;
if (b1 == a1) {
if (b2 == a1) {
*a1 = A2;
*a2 = A1;
*a3 = A4;
*a4 = A3;
*a5 = A6;
*a6 = A5;
*a7 = A8;
*a8 = A7;
} else
if (b2 != a2) {
*a2 = B2;
*b2 = A2;
*a4 = B4;
*b4 = A4;
*a6 = B6;
*b6 = A6;
*a8 = B8;
*b8 = A8;
}
} else
if (b1 == a2) {
if (b2 != a1) {
if (b2 == a2) {
*a1 = A2;
*a2 = A1;
*a3 = A4;
*a4 = A3;
*a5 = A6;
*a6 = A5;
*a7 = A8;
*a8 = A7;
} else {
*a1 = A2;
*a2 = B2;
*b2 = A1;
*a3 = A4;
*a4 = B4;
*b4 = A3;
*a5 = A6;
*a6 = B6;
*b6 = A5;
*a7 = A8;
*a8 = B8;
*b8 = A7;
}
}
} else {
if (b2 == a1) {
*a1 = A2;
*a2 = B1;
*b1 = A1;
*a3 = A4;
*a4 = B3;
*b3 = A3;
*a5 = A6;
*a6 = B5;
*b5 = A5;
*a7 = A8;
*a8 = B7;
*b7 = A7;
} else
if (b2 == a2) {
*a1 = B1;
*b1 = A1;
*a3 = B3;
*b3 = A3;
*a5 = B5;
*b5 = A5;
*a7 = B7;
*b7 = A7;
} else
if (b2 == b1) {
*a1 = B1;
*a2 = A1;
*b1 = A2;
*a3 = B3;
*a4 = A3;
*b3 = A4;
*a5 = B5;
*a6 = A5;
*b5 = A6;
*a7 = B7;
*a8 = A7;
*b7 = A8;
} else {
*a1 = B1;
*a2 = B2;
*b1 = A1;
*b2 = A2;
*a3 = B3;
*a4 = B4;
*b3 = A3;
*b4 = A4;
*a5 = B5;
*a6 = B6;
*b5 = A5;
*b6 = A6;
*a7 = B7;
*a8 = B8;
*b7 = A7;
*b8 = A8;
}
}
#ifndef MINUS
a1 += 2;
a3 += 2;
a5 += 2;
a7 += 2;
#else
a1 -= 2;
a3 -= 2;
a5 -= 2;
a7 -= 2;
#endif
//Remain
i = ((rows) & 1);
if (i > 0) {
ip1 = *piv;
b1 = a + ip1;
b3 = b1 + 1 * lda;
b5 = b1 + 2 * lda;
b7 = b1 + 3 * lda;
A1 = *a1;
B1 = *b1;
A3 = *a3;
@@ -312,10 +478,10 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT *a, BLASLONG
b3 = b1 + 1 * lda;
b4 = b2 + 1 * lda;
i = ((k2 - k1) >> 1);
if (i > 0) {
do {
i = ((rows) >> 1);
i--;
while (i > 0) {
A1 = *a1;
A2 = *a2;
A3 = *a3;
@@ -409,12 +575,97 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT *a, BLASLONG
a3 -= 2;
#endif
i --;
} while (i > 0);
}
i = ((k2 - k1) & 1);
//Loop Ending
B1 = *b1;
B2 = *b2;
B3 = *b3;
B4 = *b4;
A1 = *a1;
A2 = *a2;
A3 = *a3;
A4 = *a4;
if (b1 == a1) {
if (b2 == a1) {
*a1 = A2;
*a2 = A1;
*a3 = A4;
*a4 = A3;
} else
if (b2 != a2) {
*a2 = B2;
*b2 = A2;
*a4 = B4;
*b4 = A4;
}
} else
if (b1 == a2) {
if (b2 != a1) {
if (b2 == a2) {
*a1 = A2;
*a2 = A1;
*a3 = A4;
*a4 = A3;
} else {
*a1 = A2;
*a2 = B2;
*b2 = A1;
*a3 = A4;
*a4 = B4;
*b4 = A3;
}
}
} else {
if (b2 == a1) {
*a1 = A2;
*a2 = B1;
*b1 = A1;
*a3 = A4;
*a4 = B3;
*b3 = A3;
} else
if (b2 == a2) {
*a1 = B1;
*b1 = A1;
*a3 = B3;
*b3 = A3;
} else
if (b2 == b1) {
*a1 = B1;
*a2 = A1;
*b1 = A2;
*a3 = B3;
*a4 = A3;
*b3 = A4;
} else {
*a1 = B1;
*a2 = B2;
*b1 = A1;
*b2 = A2;
*a3 = B3;
*a4 = B4;
*b3 = A3;
*b4 = A4;
}
}
#ifndef MINUS
a1 += 2;
a3 += 2;
#else
a1 -= 2;
a3 -= 2;
#endif
i = ((rows) & 1);
if (i > 0) {
ip1 = *piv;
b1 = a + ip1;
b3 = b1 + 1 * lda;
A1 = *a1;
B1 = *b1;
A3 = *a3;
@@ -445,78 +696,135 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT *a, BLASLONG
b1 = a + ip1;
b2 = a + ip2;
i = ((k2 - k1) >> 1);
if (i > 0) {
do {
A1 = *a1;
A2 = *a2;
B1 = *b1;
B2 = *b2;
i = ((rows) >> 1);
i --;
while (i > 0) {
A1 = *a1;
A2 = *a2;
B1 = *b1;
B2 = *b2;
ip1 = *piv;
piv += incx;
ip2 = *piv;
piv += incx;
ip1 = *piv;
piv += incx;
ip2 = *piv;
piv += incx;
if (b1 == a1) {
if (b1 == a1) {
if (b2 == a1) {
*a1 = A2;
*a2 = A1;
} else
if (b2 != a2) {
*a2 = B2;
*b2 = A2;
}
} else
if (b1 == a2) {
if (b2 != a1) {
if (b2 == a2) {
*a1 = A2;
*a2 = A1;
} else {
*a1 = A2;
*a2 = B2;
*b2 = A1;
}
}
} else {
if (b2 == a1) {
*a1 = A2;
*a2 = A1;
*a2 = B1;
*b1 = A1;
} else
if (b2 != a2) {
*a2 = B2;
*b2 = A2;
}
} else
if (b1 == a2) {
if (b2 != a1) {
if (b2 == a2) {
*a1 = A2;
*a2 = A1;
} else {
*a1 = A2;
*a2 = B2;
*b2 = A1;
}
}
} else {
if (b2 == a1) {
*a1 = A2;
*a2 = B1;
if (b2 == a2) {
*a1 = B1;
*b1 = A1;
} else
if (b2 == a2) {
if (b2 == b1) {
*a1 = B1;
*a2 = A1;
*b1 = A2;
} else {
*a1 = B1;
*a2 = B2;
*b1 = A1;
} else
if (b2 == b1) {
*a1 = B1;
*a2 = A1;
*b1 = A2;
} else {
*a1 = B1;
*a2 = B2;
*b1 = A1;
*b2 = A2;
}
}
*b2 = A2;
}
}
b1 = a + ip1;
b2 = a + ip2;
b1 = a + ip1;
b2 = a + ip2;
#ifndef MINUS
a1 += 2;
a1 += 2;
#else
a1 -= 2;
a1 -= 2;
#endif
i --;
} while (i > 0);
i --;
}
i = ((k2 - k1) & 1);
//Loop Ending (n=1)
A1 = *a1;
A2 = *a2;
B1 = *b1;
B2 = *b2;
if (b1 == a1) {
if (b2 == a1) {
*a1 = A2;
*a2 = A1;
} else
if (b2 != a2) {
*a2 = B2;
*b2 = A2;
}
} else
if (b1 == a2) {
if (b2 != a1) {
if (b2 == a2) {
*a1 = A2;
*a2 = A1;
} else {
*a1 = A2;
*a2 = B2;
*b2 = A1;
}
}
} else {
if (b2 == a1) {
*a1 = A2;
*a2 = B1;
*b1 = A1;
} else
if (b2 == a2) {
*a1 = B1;
*b1 = A1;
} else
if (b2 == b1) {
*a1 = B1;
*a2 = A1;
*b1 = A2;
} else {
*a1 = B1;
*a2 = B2;
*b1 = A1;
*b2 = A2;
}
}
#ifndef MINUS
a1 += 2;
#else
a1 -= 2;
#endif
//Remain
i = (rows & 1);
if (i > 0) {
ip1 = *piv;
b1 = a + ip1;
A1 = *a1;
B1 = *b1;
*a1 = B1;

File diff suppressed because it is too large Load Diff

View File

@@ -49,7 +49,7 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT dummy4,
FLOAT *a, BLASLONG lda,
FLOAT *dummy2, BLASLONG dumy3, blasint *ipiv, BLASLONG incx){
BLASLONG i, j, ip1, ip2;
BLASLONG i, j, ip1, ip2, rows;
blasint *piv;
FLOAT *a1;
FLOAT *b1, *b2;
@@ -66,6 +66,38 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT dummy4,
#endif
if (n <= 0) return 0;
rows = k2-k1;
if (rows <=0) return 0;
if (rows == 1) {
//Only have 1 row
ip1 = *ipiv * 2;
#ifndef MINUS
a1 = a + (k1 + 1) * 2;
#else
a1 = a + k2 * 2;
#endif
b1 = a + ip1;
if(a1 == b1) return 0;
for(j=0; j<n; j++){
A1 = *(a1 + 0);
A2 = *(a1 + 1);
B1 = *(b1 + 0);
B2 = *(b1 + 1);
*(a1 + 0) = B1;
*(a1 + 1) = B2;
*(b1 + 0) = A1;
*(b1 + 1) = A2;
a1 += lda;
b1 += lda;
}
return 0;
}
j = n;
if (j > 0) {
@@ -87,9 +119,10 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT dummy4,
b2 = a + ip2;
i = ((k2 - k1) >> 1);
if (i > 0) {
do {
i --;
//Loop pipeline
//Main Loop
while (i > 0) {
#ifdef OPTERON
#ifndef MINUS
asm volatile("prefetchw 2 * 128(%0)\n" : : "r"(a1));
@@ -198,12 +231,98 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT dummy4,
a1 -= 4;
#endif
i --;
} while (i > 0);
}
i = ((k2 - k1) & 1);
//Loop Ending
A1 = *(a1 + 0);
A2 = *(a1 + 1);
A3 = *(a2 + 0);
A4 = *(a2 + 1);
B1 = *(b1 + 0);
B2 = *(b1 + 1);
B3 = *(b2 + 0);
B4 = *(b2 + 1);
if (b1 == a1) {
if (b2 == a1) {
*(a1 + 0) = A3;
*(a1 + 1) = A4;
*(a2 + 0) = A1;
*(a2 + 1) = A2;
} else
if (b2 != a2) {
*(a2 + 0) = B3;
*(a2 + 1) = B4;
*(b2 + 0) = A3;
*(b2 + 1) = A4;
}
} else
if (b1 == a2) {
if (b2 != a1) {
if (b2 == a2) {
*(a1 + 0) = A3;
*(a1 + 1) = A4;
*(a2 + 0) = A1;
*(a2 + 1) = A2;
} else {
*(a1 + 0) = A3;
*(a1 + 1) = A4;
*(a2 + 0) = B3;
*(a2 + 1) = B4;
*(b2 + 0) = A1;
*(b2 + 1) = A2;
}
}
} else {
if (b2 == a1) {
*(a1 + 0) = A3;
*(a1 + 1) = A4;
*(a2 + 0) = B1;
*(a2 + 1) = B2;
*(b1 + 0) = A1;
*(b1 + 1) = A2;
} else
if (b2 == a2) {
*(a1 + 0) = B1;
*(a1 + 1) = B2;
*(b1 + 0) = A1;
*(b1 + 1) = A2;
} else
if (b2 == b1) {
*(a1 + 0) = B1;
*(a1 + 1) = B2;
*(a2 + 0) = A1;
*(a2 + 1) = A2;
*(b1 + 0) = A3;
*(b1 + 1) = A4;
} else {
*(a1 + 0) = B1;
*(a1 + 1) = B2;
*(a2 + 0) = B3;
*(a2 + 1) = B4;
*(b1 + 0) = A1;
*(b1 + 1) = A2;
*(b2 + 0) = A3;
*(b2 + 1) = A4;
}
}
#ifndef MINUS
a1 += 4;
#else
a1 -= 4;
#endif
//Remain
i = (rows & 1);
if (i > 0) {
ip1 = *piv * 2;
b1 = a + ip1;
A1 = *(a1 + 0);
A2 = *(a1 + 1);
B1 = *(b1 + 0);

View File

@@ -49,7 +49,7 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT dummy4,
FLOAT *a, BLASLONG lda,
FLOAT *dummy2, BLASLONG dumy3, blasint *ipiv, BLASLONG incx){
BLASLONG i, j, ip1, ip2;
BLASLONG i, j, ip1, ip2, rows;
blasint *piv;
FLOAT *a1;
FLOAT *b1, *b2;
@@ -68,6 +68,38 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT dummy4,
if (n <= 0) return 0;
rows = k2-k1;
if (rows <=0) return 0;
if (rows == 1) {
//Only have 1 row
ip1 = *ipiv * 2;
#ifndef MINUS
a1 = a + (k1 + 1) * 2;
#else
a1 = a + k2 * 2;
#endif
b1 = a + ip1;
if(a1 == b1) return 0;
for(j=0; j<n; j++){
A1 = *(a1 + 0);
A2 = *(a1 + 1);
B1 = *(b1 + 0);
B2 = *(b1 + 1);
*(a1 + 0) = B1;
*(a1 + 1) = B2;
*(b1 + 0) = A1;
*(b1 + 1) = A2;
a1 += lda;
b1 += lda;
}
return 0;
}
j = (n >> 1);
if (j > 0) {
@@ -88,10 +120,12 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT dummy4,
b1 = a + ip1;
b2 = a + ip2;
i = ((k2 - k1) >> 1);
if (i > 0) {
do {
i = (rows >> 1);
i--;
//Loop pipeline
//Main Loop
while (i > 0) {
#ifdef CORE2
#ifndef MINUS
asm volatile("prefetcht0 1 * 64(%0)\n" : : "r"(b1));
@@ -246,12 +280,149 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT dummy4,
a1 -= 4;
#endif
i --;
} while (i > 0);
}
i = ((k2 - k1) & 1);
//Loop Ending
A1 = *(a1 + 0);
A2 = *(a1 + 1);
A3 = *(a2 + 0);
A4 = *(a2 + 1);
A5 = *(a1 + 0 + lda);
A6 = *(a1 + 1 + lda);
A7 = *(a2 + 0 + lda);
A8 = *(a2 + 1 + lda);
B1 = *(b1 + 0);
B2 = *(b1 + 1);
B3 = *(b2 + 0);
B4 = *(b2 + 1);
B5 = *(b1 + 0 + lda);
B6 = *(b1 + 1 + lda);
B7 = *(b2 + 0 + lda);
B8 = *(b2 + 1 + lda);
if (b1 == a1) {
if (b2 == a1) {
*(a1 + 0) = A3;
*(a1 + 1) = A4;
*(a2 + 0) = A1;
*(a2 + 1) = A2;
*(a1 + 0 + lda) = A7;
*(a1 + 1 + lda) = A8;
*(a2 + 0 + lda) = A5;
*(a2 + 1 + lda) = A6;
} else
if (b2 != a2) {
*(a2 + 0) = B3;
*(a2 + 1) = B4;
*(b2 + 0) = A3;
*(b2 + 1) = A4;
*(a2 + 0 + lda) = B7;
*(a2 + 1 + lda) = B8;
*(b2 + 0 + lda) = A7;
*(b2 + 1 + lda) = A8;
}
} else
if (b1 == a2) {
if (b2 != a1) {
if (b2 == a2) {
*(a1 + 0) = A3;
*(a1 + 1) = A4;
*(a2 + 0) = A1;
*(a2 + 1) = A2;
*(a1 + 0 + lda) = A7;
*(a1 + 1 + lda) = A8;
*(a2 + 0 + lda) = A5;
*(a2 + 1 + lda) = A6;
} else {
*(a1 + 0) = A3;
*(a1 + 1) = A4;
*(a2 + 0) = B3;
*(a2 + 1) = B4;
*(b2 + 0) = A1;
*(b2 + 1) = A2;
*(a1 + 0 + lda) = A7;
*(a1 + 1 + lda) = A8;
*(a2 + 0 + lda) = B7;
*(a2 + 1 + lda) = B8;
*(b2 + 0 + lda) = A5;
*(b2 + 1 + lda) = A6;
}
}
} else {
if (b2 == a1) {
*(a1 + 0) = A3;
*(a1 + 1) = A4;
*(a2 + 0) = B1;
*(a2 + 1) = B2;
*(b1 + 0) = A1;
*(b1 + 1) = A2;
*(a1 + 0 + lda) = A7;
*(a1 + 1 + lda) = A8;
*(a2 + 0 + lda) = B5;
*(a2 + 1 + lda) = B6;
*(b1 + 0 + lda) = A5;
*(b1 + 1 + lda) = A6;
} else
if (b2 == a2) {
*(a1 + 0) = B1;
*(a1 + 1) = B2;
*(b1 + 0) = A1;
*(b1 + 1) = A2;
*(a1 + 0 + lda) = B5;
*(a1 + 1 + lda) = B6;
*(b1 + 0 + lda) = A5;
*(b1 + 1 + lda) = A6;
} else
if (b2 == b1) {
*(a1 + 0) = B1;
*(a1 + 1) = B2;
*(a2 + 0) = A1;
*(a2 + 1) = A2;
*(b1 + 0) = A3;
*(b1 + 1) = A4;
*(a1 + 0 + lda) = B5;
*(a1 + 1 + lda) = B6;
*(a2 + 0 + lda) = A5;
*(a2 + 1 + lda) = A6;
*(b1 + 0 + lda) = A7;
*(b1 + 1 + lda) = A8;
} else {
*(a1 + 0) = B1;
*(a1 + 1) = B2;
*(a2 + 0) = B3;
*(a2 + 1) = B4;
*(b1 + 0) = A1;
*(b1 + 1) = A2;
*(b2 + 0) = A3;
*(b2 + 1) = A4;
*(a1 + 0 + lda) = B5;
*(a1 + 1 + lda) = B6;
*(a2 + 0 + lda) = B7;
*(a2 + 1 + lda) = B8;
*(b1 + 0 + lda) = A5;
*(b1 + 1 + lda) = A6;
*(b2 + 0 + lda) = A7;
*(b2 + 1 + lda) = A8;
}
}
#ifndef MINUS
a1 += 4;
#else
a1 -= 4;
#endif
//Remain
i = (rows & 1);
if (i > 0) {
ip1 = *piv * 2;
b1 = a + ip1;
A1 = *(a1 + 0);
A2 = *(a1 + 1);
A3 = *(a1 + 0 + lda);
@@ -293,10 +464,12 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT dummy4,
b1 = a + ip1;
b2 = a + ip2;
i = ((k2 - k1) >> 1);
if (i > 0) {
do {
i = (rows >> 1);
i--;
//Loop pipeline
//Main Loop
while (i > 0) {
A1 = *(a1 + 0);
A2 = *(a1 + 1);
A3 = *(a2 + 0);
@@ -384,12 +557,94 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT dummy4,
a1 -= 4;
#endif
i --;
} while (i > 0);
}
//Loop Ending
A1 = *(a1 + 0);
A2 = *(a1 + 1);
A3 = *(a2 + 0);
A4 = *(a2 + 1);
B1 = *(b1 + 0);
B2 = *(b1 + 1);
B3 = *(b2 + 0);
B4 = *(b2 + 1);
if (b1 == a1) {
if (b2 == a1) {
*(a1 + 0) = A3;
*(a1 + 1) = A4;
*(a2 + 0) = A1;
*(a2 + 1) = A2;
} else
if (b2 != a2) {
*(a2 + 0) = B3;
*(a2 + 1) = B4;
*(b2 + 0) = A3;
*(b2 + 1) = A4;
}
} else
if (b1 == a2) {
if (b2 != a1) {
if (b2 == a2) {
*(a1 + 0) = A3;
*(a1 + 1) = A4;
*(a2 + 0) = A1;
*(a2 + 1) = A2;
} else {
*(a1 + 0) = A3;
*(a1 + 1) = A4;
*(a2 + 0) = B3;
*(a2 + 1) = B4;
*(b2 + 0) = A1;
*(b2 + 1) = A2;
}
}
} else {
if (b2 == a1) {
*(a1 + 0) = A3;
*(a1 + 1) = A4;
*(a2 + 0) = B1;
*(a2 + 1) = B2;
*(b1 + 0) = A1;
*(b1 + 1) = A2;
} else
if (b2 == a2) {
*(a1 + 0) = B1;
*(a1 + 1) = B2;
*(b1 + 0) = A1;
*(b1 + 1) = A2;
} else
if (b2 == b1) {
*(a1 + 0) = B1;
*(a1 + 1) = B2;
*(a2 + 0) = A1;
*(a2 + 1) = A2;
*(b1 + 0) = A3;
*(b1 + 1) = A4;
} else {
*(a1 + 0) = B1;
*(a1 + 1) = B2;
*(a2 + 0) = B3;
*(a2 + 1) = B4;
*(b1 + 0) = A1;
*(b1 + 1) = A2;
*(b2 + 0) = A3;
*(b2 + 1) = A4;
}
}
#ifndef MINUS
a1 += 4;
#else
a1 -= 4;
#endif
i = ((k2 - k1) & 1);
//Remain
i = (rows & 1);
if (i > 0) {
ip1 = *piv * 2;
b1 = a + ip1;
A1 = *(a1 + 0);
A2 = *(a1 + 1);
B1 = *(b1 + 0);

View File

@@ -55,7 +55,7 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT dummy4,
FLOAT *a, BLASLONG lda,
FLOAT *dummy2, BLASLONG dumy3, blasint *ipiv, BLASLONG incx){
BLASLONG i, j, ip1, ip2;
BLASLONG i, j, ip1, ip2, rows;
blasint *piv;
FLOAT *a1, *a3, *a5, *a7;
FLOAT *b1, *b2, *b3, *b4;
@@ -76,6 +76,38 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT dummy4,
#endif
if (n <= 0) return 0;
rows = k2-k1;
if (rows <=0) return 0;
if (rows == 1) {
//Only have 1 row
ip1 = *ipiv * 2;
#ifndef MINUS
a1 = a + (k1 + 1) * 2;
#else
a1 = a + k2 * 2;
#endif
b1 = a + ip1;
if(a1 == b1) return 0;
for(j=0; j<n; j++){
A1 = *(a1 + 0);
A2 = *(a1 + 1);
B1 = *(b1 + 0);
B2 = *(b1 + 1);
*(a1 + 0) = B1;
*(a1 + 1) = B2;
*(b1 + 0) = A1;
*(b1 + 1) = A2;
a1 += lda;
b1 += lda;
}
return 0;
}
j = (n >> 2);
if (j > 0) {
@@ -107,10 +139,12 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT dummy4,
b7 = b1 + 3 * lda;
b8 = b2 + 3 * lda;
i = ((k2 - k1) >> 1);
if (i > 0) {
do {
i = (rows >> 1);
i--;
//Loop pipeline
//Main Loop
while (i > 0) {
A1 = *(a1 + 0);
A2 = *(a1 + 1);
A3 = *(a2 + 0);
@@ -366,12 +400,260 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT dummy4,
a7 -= 4;
#endif
i --;
} while (i > 0);
}
i = ((k2 - k1) & 1);
//Loop Ending
A1 = *(a1 + 0);
A2 = *(a1 + 1);
A3 = *(a2 + 0);
A4 = *(a2 + 1);
A5 = *(a3 + 0);
A6 = *(a3 + 1);
A7 = *(a4 + 0);
A8 = *(a4 + 1);
A9 = *(a5 + 0);
A10 = *(a5 + 1);
A11 = *(a6 + 0);
A12 = *(a6 + 1);
A13 = *(a7 + 0);
A14 = *(a7 + 1);
A15 = *(a8 + 0);
A16 = *(a8 + 1);
B1 = *(b1 + 0);
B2 = *(b1 + 1);
B3 = *(b2 + 0);
B4 = *(b2 + 1);
B5 = *(b3 + 0);
B6 = *(b3 + 1);
B7 = *(b4 + 0);
B8 = *(b4 + 1);
B9 = *(b5 + 0);
B10 = *(b5 + 1);
B11 = *(b6 + 0);
B12 = *(b6 + 1);
B13 = *(b7 + 0);
B14 = *(b7 + 1);
B15 = *(b8 + 0);
B16 = *(b8 + 1);
if (b1 == a1) {
if (b2 == a1) {
*(a1 + 0) = A3;
*(a1 + 1) = A4;
*(a2 + 0) = A1;
*(a2 + 1) = A2;
*(a3 + 0) = A7;
*(a3 + 1) = A8;
*(a4 + 0) = A5;
*(a4 + 1) = A6;
*(a5 + 0) = A11;
*(a5 + 1) = A12;
*(a6 + 0) = A9;
*(a6 + 1) = A10;
*(a7 + 0) = A15;
*(a7 + 1) = A16;
*(a8 + 0) = A13;
*(a8 + 1) = A14;
} else
if (b2 != a2) {
*(a2 + 0) = B3;
*(a2 + 1) = B4;
*(b2 + 0) = A3;
*(b2 + 1) = A4;
*(a4 + 0) = B7;
*(a4 + 1) = B8;
*(b4 + 0) = A7;
*(b4 + 1) = A8;
*(a6 + 0) = B11;
*(a6 + 1) = B12;
*(b6 + 0) = A11;
*(b6 + 1) = A12;
*(a8 + 0) = B15;
*(a8 + 1) = B16;
*(b8 + 0) = A15;
*(b8 + 1) = A16;
}
} else
if (b1 == a2) {
if (b2 != a1) {
if (b2 == a2) {
*(a1 + 0) = A3;
*(a1 + 1) = A4;
*(a2 + 0) = A1;
*(a2 + 1) = A2;
*(a3 + 0) = A7;
*(a3 + 1) = A8;
*(a4 + 0) = A5;
*(a4 + 1) = A6;
*(a5 + 0) = A11;
*(a5 + 1) = A12;
*(a6 + 0) = A9;
*(a6 + 1) = A10;
*(a7 + 0) = A15;
*(a7 + 1) = A16;
*(a8 + 0) = A13;
*(a8 + 1) = A14;
} else {
*(a1 + 0) = A3;
*(a1 + 1) = A4;
*(a2 + 0) = B3;
*(a2 + 1) = B4;
*(b2 + 0) = A1;
*(b2 + 1) = A2;
*(a3 + 0) = A7;
*(a3 + 1) = A8;
*(a4 + 0) = B7;
*(a4 + 1) = B8;
*(b4 + 0) = A5;
*(b4 + 1) = A6;
*(a5 + 0) = A11;
*(a5 + 1) = A12;
*(a6 + 0) = B11;
*(a6 + 1) = B12;
*(b6 + 0) = A9;
*(b6 + 1) = A10;
*(a7 + 0) = A15;
*(a7 + 1) = A16;
*(a8 + 0) = B15;
*(a8 + 1) = B16;
*(b8 + 0) = A13;
*(b8 + 1) = A14;
}
}
} else {
if (b2 == a1) {
*(a1 + 0) = A3;
*(a1 + 1) = A4;
*(a2 + 0) = B1;
*(a2 + 1) = B2;
*(b1 + 0) = A1;
*(b1 + 1) = A2;
*(a3 + 0) = A7;
*(a3 + 1) = A8;
*(a4 + 0) = B5;
*(a4 + 1) = B6;
*(b3 + 0) = A5;
*(b3 + 1) = A6;
*(a5 + 0) = A11;
*(a5 + 1) = A12;
*(a6 + 0) = B9;
*(a6 + 1) = B10;
*(b5 + 0) = A9;
*(b5 + 1) = A10;
*(a7 + 0) = A15;
*(a7 + 1) = A16;
*(a8 + 0) = B13;
*(a8 + 1) = B14;
*(b7 + 0) = A13;
*(b7 + 1) = A14;
} else
if (b2 == a2) {
*(a1 + 0) = B1;
*(a1 + 1) = B2;
*(b1 + 0) = A1;
*(b1 + 1) = A2;
*(a3 + 0) = B5;
*(a3 + 1) = B6;
*(b3 + 0) = A5;
*(b3 + 1) = A6;
*(a5 + 0) = B9;
*(a5 + 1) = B10;
*(b5 + 0) = A9;
*(b5 + 1) = A10;
*(a7 + 0) = B13;
*(a7 + 1) = B14;
*(b7 + 0) = A13;
*(b7 + 1) = A14;
} else
if (b2 == b1) {
*(a1 + 0) = B1;
*(a1 + 1) = B2;
*(a2 + 0) = A1;
*(a2 + 1) = A2;
*(b1 + 0) = A3;
*(b1 + 1) = A4;
*(a3 + 0) = B5;
*(a3 + 1) = B6;
*(a4 + 0) = A5;
*(a4 + 1) = A6;
*(b3 + 0) = A7;
*(b3 + 1) = A8;
*(a5 + 0) = B9;
*(a5 + 1) = B10;
*(a6 + 0) = A9;
*(a6 + 1) = A10;
*(b5 + 0) = A11;
*(b5 + 1) = A12;
*(a7 + 0) = B13;
*(a7 + 1) = B14;
*(a8 + 0) = A13;
*(a8 + 1) = A14;
*(b7 + 0) = A15;
*(b7 + 1) = A16;
} else {
*(a1 + 0) = B1;
*(a1 + 1) = B2;
*(a2 + 0) = B3;
*(a2 + 1) = B4;
*(b1 + 0) = A1;
*(b1 + 1) = A2;
*(b2 + 0) = A3;
*(b2 + 1) = A4;
*(a3 + 0) = B5;
*(a3 + 1) = B6;
*(a4 + 0) = B7;
*(a4 + 1) = B8;
*(b3 + 0) = A5;
*(b3 + 1) = A6;
*(b4 + 0) = A7;
*(b4 + 1) = A8;
*(a5 + 0) = B9;
*(a5 + 1) = B10;
*(a6 + 0) = B11;
*(a6 + 1) = B12;
*(b5 + 0) = A9;
*(b5 + 1) = A10;
*(b6 + 0) = A11;
*(b6 + 1) = A12;
*(a7 + 0) = B13;
*(a7 + 1) = B14;
*(a8 + 0) = B15;
*(a8 + 1) = B16;
*(b7 + 0) = A13;
*(b7 + 1) = A14;
*(b8 + 0) = A15;
*(b8 + 1) = A16;
}
}
#ifndef MINUS
a1 += 4;
a3 += 4;
a5 += 4;
a7 += 4;
#else
a1 -= 4;
a3 -= 4;
a5 -= 4;
a7 -= 4;
#endif
//Remain
i = (rows & 1);
if (i > 0) {
ip1 = *piv * 2;
b1 = a + ip1;
b3 = b1 + 1 * lda;
b5 = b1 + 2 * lda;
b7 = b1 + 3 * lda;
A1 = *(a1 + 0);
A2 = *(a1 + 1);
A3 = *(a3 + 0);
@@ -435,37 +717,205 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT dummy4,
b3 = b1 + lda;
b4 = b2 + lda;
i = ((k2 - k1) >> 1);
if (i > 0) {
do {
A1 = *(a1 + 0);
A2 = *(a1 + 1);
A3 = *(a2 + 0);
A4 = *(a2 + 1);
A5 = *(a3 + 0);
A6 = *(a3 + 1);
A7 = *(a4 + 0);
A8 = *(a4 + 1);
i = (rows >> 1);
i--;
B1 = *(b1 + 0);
B2 = *(b1 + 1);
B3 = *(b2 + 0);
B4 = *(b2 + 1);
//Loop pipeline
//Main Loop
while (i > 0) {
A1 = *(a1 + 0);
A2 = *(a1 + 1);
A3 = *(a2 + 0);
A4 = *(a2 + 1);
B5 = *(b3 + 0);
B6 = *(b3 + 1);
B7 = *(b4 + 0);
B8 = *(b4 + 1);
A5 = *(a3 + 0);
A6 = *(a3 + 1);
A7 = *(a4 + 0);
A8 = *(a4 + 1);
B1 = *(b1 + 0);
B2 = *(b1 + 1);
B3 = *(b2 + 0);
B4 = *(b2 + 1);
ip1 = *piv * 2;
piv += incx;
ip2 = *piv * 2;
piv += incx;
B5 = *(b3 + 0);
B6 = *(b3 + 1);
B7 = *(b4 + 0);
B8 = *(b4 + 1);
if (b1 == a1) {
ip1 = *piv * 2;
piv += incx;
ip2 = *piv * 2;
piv += incx;
if (b1 == a1) {
if (b2 == a1) {
*(a1 + 0) = A3;
*(a1 + 1) = A4;
*(a2 + 0) = A1;
*(a2 + 1) = A2;
*(a3 + 0) = A7;
*(a3 + 1) = A8;
*(a4 + 0) = A5;
*(a4 + 1) = A6;
} else
if (b2 != a2) {
*(a2 + 0) = B3;
*(a2 + 1) = B4;
*(b2 + 0) = A3;
*(b2 + 1) = A4;
*(a4 + 0) = B7;
*(a4 + 1) = B8;
*(b4 + 0) = A7;
*(b4 + 1) = A8;
}
} else
if (b1 == a2) {
if (b2 != a1) {
if (b2 == a2) {
*(a1 + 0) = A3;
*(a1 + 1) = A4;
*(a2 + 0) = A1;
*(a2 + 1) = A2;
*(a3 + 0) = A7;
*(a3 + 1) = A8;
*(a4 + 0) = A5;
*(a4 + 1) = A6;
} else {
*(a1 + 0) = A3;
*(a1 + 1) = A4;
*(a2 + 0) = B3;
*(a2 + 1) = B4;
*(b2 + 0) = A1;
*(b2 + 1) = A2;
*(a3 + 0) = A7;
*(a3 + 1) = A8;
*(a4 + 0) = B7;
*(a4 + 1) = B8;
*(b4 + 0) = A5;
*(b4 + 1) = A6;
}
}
} else {
if (b2 == a1) {
*(a1 + 0) = A3;
*(a1 + 1) = A4;
*(a2 + 0) = B1;
*(a2 + 1) = B2;
*(b1 + 0) = A1;
*(b1 + 1) = A2;
*(a3 + 0) = A7;
*(a3 + 1) = A8;
*(a4 + 0) = B5;
*(a4 + 1) = B6;
*(b3 + 0) = A5;
*(b3 + 1) = A6;
} else
if (b2 == a2) {
*(a1 + 0) = B1;
*(a1 + 1) = B2;
*(b1 + 0) = A1;
*(b1 + 1) = A2;
*(a3 + 0) = B5;
*(a3 + 1) = B6;
*(b3 + 0) = A5;
*(b3 + 1) = A6;
} else
if (b2 == b1) {
*(a1 + 0) = B1;
*(a1 + 1) = B2;
*(a2 + 0) = A1;
*(a2 + 1) = A2;
*(b1 + 0) = A3;
*(b1 + 1) = A4;
*(a3 + 0) = B5;
*(a3 + 1) = B6;
*(a4 + 0) = A5;
*(a4 + 1) = A6;
*(b3 + 0) = A7;
*(b3 + 1) = A8;
} else {
*(a1 + 0) = B1;
*(a1 + 1) = B2;
*(a2 + 0) = B3;
*(a2 + 1) = B4;
*(b1 + 0) = A1;
*(b1 + 1) = A2;
*(b2 + 0) = A3;
*(b2 + 1) = A4;
*(a3 + 0) = B5;
*(a3 + 1) = B6;
*(a4 + 0) = B7;
*(a4 + 1) = B8;
*(b3 + 0) = A5;
*(b3 + 1) = A6;
*(b4 + 0) = A7;
*(b4 + 1) = A8;
}
}
b1 = a + ip1;
b2 = a + ip2;
b3 = b1 + lda;
b4 = b2 + lda;
#ifndef MINUS
a1 += 4;
a3 += 4;
#else
a1 -= 4;
a3 -= 4;
#endif
i --;
}
//Loop Ending
A1 = *(a1 + 0);
A2 = *(a1 + 1);
A3 = *(a2 + 0);
A4 = *(a2 + 1);
A5 = *(a3 + 0);
A6 = *(a3 + 1);
A7 = *(a4 + 0);
A8 = *(a4 + 1);
B1 = *(b1 + 0);
B2 = *(b1 + 1);
B3 = *(b2 + 0);
B4 = *(b2 + 1);
B5 = *(b3 + 0);
B6 = *(b3 + 1);
B7 = *(b4 + 0);
B8 = *(b4 + 1);
if (b1 == a1) {
if (b2 == a1) {
*(a1 + 0) = A3;
*(a1 + 1) = A4;
*(a2 + 0) = A1;
*(a2 + 1) = A2;
*(a3 + 0) = A7;
*(a3 + 1) = A8;
*(a4 + 0) = A5;
*(a4 + 1) = A6;
} else
if (b2 != a2) {
*(a2 + 0) = B3;
*(a2 + 1) = B4;
*(b2 + 0) = A3;
*(b2 + 1) = A4;
*(a4 + 0) = B7;
*(a4 + 1) = B8;
*(b4 + 0) = A7;
*(b4 + 1) = A8;
}
} else
if (b1 == a2) {
if (b2 != a1) {
if (b2 == a2) {
*(a1 + 0) = A3;
*(a1 + 1) = A4;
*(a2 + 0) = A1;
@@ -474,122 +924,96 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT dummy4,
*(a3 + 1) = A8;
*(a4 + 0) = A5;
*(a4 + 1) = A6;
} else {
*(a1 + 0) = A3;
*(a1 + 1) = A4;
*(a2 + 0) = B3;
*(a2 + 1) = B4;
*(b2 + 0) = A1;
*(b2 + 1) = A2;
*(a3 + 0) = A7;
*(a3 + 1) = A8;
*(a4 + 0) = B7;
*(a4 + 1) = B8;
*(b4 + 0) = A5;
*(b4 + 1) = A6;
}
}
} else {
if (b2 == a1) {
*(a1 + 0) = A3;
*(a1 + 1) = A4;
*(a2 + 0) = B1;
*(a2 + 1) = B2;
*(b1 + 0) = A1;
*(b1 + 1) = A2;
*(a3 + 0) = A7;
*(a3 + 1) = A8;
*(a4 + 0) = B5;
*(a4 + 1) = B6;
*(b3 + 0) = A5;
*(b3 + 1) = A6;
} else
if (b2 == a2) {
*(a1 + 0) = B1;
*(a1 + 1) = B2;
*(b1 + 0) = A1;
*(b1 + 1) = A2;
*(a3 + 0) = B5;
*(a3 + 1) = B6;
*(b3 + 0) = A5;
*(b3 + 1) = A6;
} else
if (b2 != a2) {
if (b2 == b1) {
*(a1 + 0) = B1;
*(a1 + 1) = B2;
*(a2 + 0) = A1;
*(a2 + 1) = A2;
*(b1 + 0) = A3;
*(b1 + 1) = A4;
*(a3 + 0) = B5;
*(a3 + 1) = B6;
*(a4 + 0) = A5;
*(a4 + 1) = A6;
*(b3 + 0) = A7;
*(b3 + 1) = A8;
} else {
*(a1 + 0) = B1;
*(a1 + 1) = B2;
*(a2 + 0) = B3;
*(a2 + 1) = B4;
*(b1 + 0) = A1;
*(b1 + 1) = A2;
*(b2 + 0) = A3;
*(b2 + 1) = A4;
*(a3 + 0) = B5;
*(a3 + 1) = B6;
*(a4 + 0) = B7;
*(a4 + 1) = B8;
*(b3 + 0) = A5;
*(b3 + 1) = A6;
*(b4 + 0) = A7;
*(b4 + 1) = A8;
}
} else
if (b1 == a2) {
if (b2 != a1) {
if (b2 == a2) {
*(a1 + 0) = A3;
*(a1 + 1) = A4;
*(a2 + 0) = A1;
*(a2 + 1) = A2;
*(a3 + 0) = A7;
*(a3 + 1) = A8;
*(a4 + 0) = A5;
*(a4 + 1) = A6;
} else {
*(a1 + 0) = A3;
*(a1 + 1) = A4;
*(a2 + 0) = B3;
*(a2 + 1) = B4;
*(b2 + 0) = A1;
*(b2 + 1) = A2;
*(a3 + 0) = A7;
*(a3 + 1) = A8;
*(a4 + 0) = B7;
*(a4 + 1) = B8;
*(b4 + 0) = A5;
*(b4 + 1) = A6;
}
}
} else {
if (b2 == a1) {
*(a1 + 0) = A3;
*(a1 + 1) = A4;
*(a2 + 0) = B1;
*(a2 + 1) = B2;
*(b1 + 0) = A1;
*(b1 + 1) = A2;
*(a3 + 0) = A7;
*(a3 + 1) = A8;
*(a4 + 0) = B5;
*(a4 + 1) = B6;
*(b3 + 0) = A5;
*(b3 + 1) = A6;
} else
if (b2 == a2) {
*(a1 + 0) = B1;
*(a1 + 1) = B2;
*(b1 + 0) = A1;
*(b1 + 1) = A2;
*(a3 + 0) = B5;
*(a3 + 1) = B6;
*(b3 + 0) = A5;
*(b3 + 1) = A6;
} else
if (b2 == b1) {
*(a1 + 0) = B1;
*(a1 + 1) = B2;
*(a2 + 0) = A1;
*(a2 + 1) = A2;
*(b1 + 0) = A3;
*(b1 + 1) = A4;
*(a3 + 0) = B5;
*(a3 + 1) = B6;
*(a4 + 0) = A5;
*(a4 + 1) = A6;
*(b3 + 0) = A7;
*(b3 + 1) = A8;
} else {
*(a1 + 0) = B1;
*(a1 + 1) = B2;
*(a2 + 0) = B3;
*(a2 + 1) = B4;
*(b1 + 0) = A1;
*(b1 + 1) = A2;
*(b2 + 0) = A3;
*(b2 + 1) = A4;
*(a3 + 0) = B5;
*(a3 + 1) = B6;
*(a4 + 0) = B7;
*(a4 + 1) = B8;
*(b3 + 0) = A5;
*(b3 + 1) = A6;
*(b4 + 0) = A7;
*(b4 + 1) = A8;
}
}
b1 = a + ip1;
b2 = a + ip2;
b3 = b1 + lda;
b4 = b2 + lda;
}
#ifndef MINUS
a1 += 4;
a3 += 4;
a1 += 4;
a3 += 4;
#else
a1 -= 4;
a3 -= 4;
a1 -= 4;
a3 -= 4;
#endif
i --;
} while (i > 0);
}
i = ((k2 - k1) & 1);
//Remain
i = (rows & 1);
if (i > 0) {
ip1 = *piv * 2;
b1 = a + ip1;
b3 = b1 + lda;
A1 = *(a1 + 0);
A2 = *(a1 + 1);
A3 = *(a3 + 0);
@@ -629,10 +1053,12 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT dummy4,
b1 = a + ip1;
b2 = a + ip2;
i = ((k2 - k1) >> 1);
if (i > 0) {
do {
i = (rows >> 1);
i--;
//Loop pipeline
//Main Loop
while (i > 0) {
A1 = *(a1 + 0);
A2 = *(a1 + 1);
A3 = *(a2 + 0);
@@ -720,12 +1146,94 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT dummy4,
a1 -= 4;
#endif
i --;
} while (i > 0);
}
//Loop Ending
A1 = *(a1 + 0);
A2 = *(a1 + 1);
A3 = *(a2 + 0);
A4 = *(a2 + 1);
B1 = *(b1 + 0);
B2 = *(b1 + 1);
B3 = *(b2 + 0);
B4 = *(b2 + 1);
if (b1 == a1) {
if (b2 == a1) {
*(a1 + 0) = A3;
*(a1 + 1) = A4;
*(a2 + 0) = A1;
*(a2 + 1) = A2;
} else
if (b2 != a2) {
*(a2 + 0) = B3;
*(a2 + 1) = B4;
*(b2 + 0) = A3;
*(b2 + 1) = A4;
}
} else
if (b1 == a2) {
if (b2 != a1) {
if (b2 == a2) {
*(a1 + 0) = A3;
*(a1 + 1) = A4;
*(a2 + 0) = A1;
*(a2 + 1) = A2;
} else {
*(a1 + 0) = A3;
*(a1 + 1) = A4;
*(a2 + 0) = B3;
*(a2 + 1) = B4;
*(b2 + 0) = A1;
*(b2 + 1) = A2;
}
}
} else {
if (b2 == a1) {
*(a1 + 0) = A3;
*(a1 + 1) = A4;
*(a2 + 0) = B1;
*(a2 + 1) = B2;
*(b1 + 0) = A1;
*(b1 + 1) = A2;
} else
if (b2 == a2) {
*(a1 + 0) = B1;
*(a1 + 1) = B2;
*(b1 + 0) = A1;
*(b1 + 1) = A2;
} else
if (b2 == b1) {
*(a1 + 0) = B1;
*(a1 + 1) = B2;
*(a2 + 0) = A1;
*(a2 + 1) = A2;
*(b1 + 0) = A3;
*(b1 + 1) = A4;
} else {
*(a1 + 0) = B1;
*(a1 + 1) = B2;
*(a2 + 0) = B3;
*(a2 + 1) = B4;
*(b1 + 0) = A1;
*(b1 + 1) = A2;
*(b2 + 0) = A3;
*(b2 + 1) = A4;
}
}
#ifndef MINUS
a1 += 4;
#else
a1 -= 4;
#endif
i = ((k2 - k1) & 1);
//Remain
i = (rows & 1);
if (i > 0) {
ip1 = *piv * 2;
b1 = a + ip1;
A1 = *(a1 + 0);
A2 = *(a1 + 1);
B1 = *(b1 + 0);

View File

@@ -16,12 +16,17 @@ LASWP = ../generic/laswp_k_1.c
ZLASWP = ../generic/zlaswp_k_1.c
endif
ifeq ($(DYNAMIC_ARCH), 1)
LASWP = ../generic/laswp_k_4.c
ZLASWP = ../generic/zlaswp_k_4.c
endif
ifndef LASWP
LASWP = ../generic/laswp_k_1.c
LASWP = ../generic/laswp_k.c
endif
ifndef ZLASWP
ZLASWP = ../generic/zlaswp_k_1.c
ZLASWP = ../generic/zlaswp_k.c
endif
include ../generic/Makefile

View File

@@ -21,12 +21,17 @@ LASWP = ../generic/laswp_k_1.c
ZLASWP = ../generic/zlaswp_k_1.c
endif
ifeq ($(DYNAMIC_ARCH), 1)
LASWP = ../generic/laswp_k_4.c
ZLASWP = ../generic/zlaswp_k_4.c
endif
ifndef LASWP
LASWP = ../generic/laswp_k_1.c
LASWP = ../generic/laswp_k.c
endif
ifndef ZLASWP
ZLASWP = ../generic/zlaswp_k_1.c
ZLASWP = ../generic/zlaswp_k.c
endif
include ../generic/Makefile

View File

@@ -53,20 +53,28 @@ typedef int blasint;
#include <complex.h>
typedef float _Complex openblas_complex_float;
typedef double _Complex openblas_complex_double;
#define openblas_make_complex_float(real, imag) ((real) + ((imag) * _Complex_I))
#define openblas_make_complex_double(real, imag) ((real) + ((imag) * _Complex_I))
#define openblas_complex_float_real(z) (creal(z))
#define openblas_complex_float_imag(z) (cimag(z))
#define openblas_complex_double_real(z) (creal(z))
#define openblas_complex_double_imag(z) (cimag(z))
typedef xdouble _Complex openblas_complex_xdouble;
#define openblas_make_complex_float(real, imag) ((real) + ((imag) * _Complex_I))
#define openblas_make_complex_double(real, imag) ((real) + ((imag) * _Complex_I))
#define openblas_make_complex_xdouble(real, imag) ((real) + ((imag) * _Complex_I))
#define openblas_complex_float_real(z) (creal(z))
#define openblas_complex_float_imag(z) (cimag(z))
#define openblas_complex_double_real(z) (creal(z))
#define openblas_complex_double_imag(z) (cimag(z))
#define openblas_complex_xdouble_real(z) (creal(z))
#define openblas_complex_xdouble_imag(z) (cimag(z))
#else
#define OPENBLAS_COMPLEX_STRUCT
typedef struct { float real, imag; } openblas_complex_float;
typedef struct { double real, imag; } openblas_complex_double;
#define openblas_make_complex_float(real, imag) {(real), (imag)}
#define openblas_make_complex_double(real, imag) {(real), (imag)}
#define openblas_complex_float_real(z) ((z).real)
#define openblas_complex_float_imag(z) ((z).imag)
#define openblas_complex_double_real(z) ((z).real)
#define openblas_complex_double_imag(z) ((z).imag)
typedef struct { xdouble real, imag; } openblas_complex_xdouble;
#define openblas_make_complex_float(real, imag) {(real), (imag)}
#define openblas_make_complex_double(real, imag) {(real), (imag)}
#define openblas_make_complex_xdouble(real, imag) {(real), (imag)}
#define openblas_complex_float_real(z) ((z).real)
#define openblas_complex_float_imag(z) ((z).imag)
#define openblas_complex_double_real(z) ((z).real)
#define openblas_complex_double_imag(z) ((z).imag)
#define openblas_complex_xdouble_real(z) ((z).real)
#define openblas_complex_xdouble_imag(z) ((z).imag)
#endif

View File

@@ -191,7 +191,7 @@ diff -ruN lapack-3.4.1.old/SRC/Makefile lapack-3.4.1/SRC/Makefile
+ slaqtr.$(SUFFIX) slar1v.$(SUFFIX) slar2v.$(SUFFIX) ilaslr.$(SUFFIX) ilaslc.$(SUFFIX) \
+ slarf.$(SUFFIX) slarfb.$(SUFFIX) slarfg.$(SUFFIX) slarfgp.$(SUFFIX) slarft.$(SUFFIX) slarfx.$(SUFFIX) slargv.$(SUFFIX) \
+ slarrv.$(SUFFIX) slartv.$(SUFFIX) \
+ slarz.$(SUFFIX) slarzb.$(SUFFIX) slarzt.$(SUFFIX) slaswp.$(SUFFIX) slasy2.$(SUFFIX) slasyf.$(SUFFIX) \
+ slarz.$(SUFFIX) slarzb.$(SUFFIX) slarzt.$(SUFFIX) slasy2.$(SUFFIX) slasyf.$(SUFFIX) \
+ slatbs.$(SUFFIX) slatdf.$(SUFFIX) slatps.$(SUFFIX) slatrd.$(SUFFIX) slatrs.$(SUFFIX) slatrz.$(SUFFIX) slatzm.$(SUFFIX) \
+ sopgtr.$(SUFFIX) sopmtr.$(SUFFIX) sorg2l.$(SUFFIX) sorg2r.$(SUFFIX) \
+ sorgbr.$(SUFFIX) sorghr.$(SUFFIX) sorgl2.$(SUFFIX) sorglq.$(SUFFIX) sorgql.$(SUFFIX) sorgqr.$(SUFFIX) sorgr2.$(SUFFIX) \
@@ -345,7 +345,7 @@ diff -ruN lapack-3.4.1.old/SRC/Makefile lapack-3.4.1/SRC/Makefile
+ clarf.$(SUFFIX) clarfb.$(SUFFIX) clarfg.$(SUFFIX) clarft.$(SUFFIX) clarfgp.$(SUFFIX) \
+ clarfx.$(SUFFIX) clargv.$(SUFFIX) clarnv.$(SUFFIX) clarrv.$(SUFFIX) clartg.$(SUFFIX) clartv.$(SUFFIX) \
+ clarz.$(SUFFIX) clarzb.$(SUFFIX) clarzt.$(SUFFIX) clascl.$(SUFFIX) claset.$(SUFFIX) clasr.$(SUFFIX) classq.$(SUFFIX) \
+ claswp.$(SUFFIX) clasyf.$(SUFFIX) clatbs.$(SUFFIX) clatdf.$(SUFFIX) clatps.$(SUFFIX) clatrd.$(SUFFIX) clatrs.$(SUFFIX) clatrz.$(SUFFIX) \
+ clasyf.$(SUFFIX) clatbs.$(SUFFIX) clatdf.$(SUFFIX) clatps.$(SUFFIX) clatrd.$(SUFFIX) clatrs.$(SUFFIX) clatrz.$(SUFFIX) \
+ clatzm.$(SUFFIX) cpbcon.$(SUFFIX) cpbequ.$(SUFFIX) cpbrfs.$(SUFFIX) cpbstf.$(SUFFIX) cpbsv.$(SUFFIX) \
+ cpbsvx.$(SUFFIX) cpbtf2.$(SUFFIX) cpbtrf.$(SUFFIX) cpbtrs.$(SUFFIX) cpocon.$(SUFFIX) cpoequ.$(SUFFIX) cporfs.$(SUFFIX) \
+ cposv.$(SUFFIX) cposvx.$(SUFFIX) cpotri.$(SUFFIX) cpstrf.$(SUFFIX) cpstf2.$(SUFFIX) \
@@ -484,7 +484,7 @@ diff -ruN lapack-3.4.1.old/SRC/Makefile lapack-3.4.1/SRC/Makefile
+ dlaqtr.$(SUFFIX) dlar1v.$(SUFFIX) dlar2v.$(SUFFIX) iladlr.$(SUFFIX) iladlc.$(SUFFIX) \
+ dlarf.$(SUFFIX) dlarfb.$(SUFFIX) dlarfg.$(SUFFIX) dlarfgp.$(SUFFIX) dlarft.$(SUFFIX) dlarfx.$(SUFFIX) \
+ dlargv.$(SUFFIX) dlarrv.$(SUFFIX) dlartv.$(SUFFIX) \
+ dlarz.$(SUFFIX) dlarzb.$(SUFFIX) dlarzt.$(SUFFIX) dlaswp.$(SUFFIX) dlasy2.$(SUFFIX) dlasyf.$(SUFFIX) \
+ dlarz.$(SUFFIX) dlarzb.$(SUFFIX) dlarzt.$(SUFFIX) dlasy2.$(SUFFIX) dlasyf.$(SUFFIX) \
+ dlatbs.$(SUFFIX) dlatdf.$(SUFFIX) dlatps.$(SUFFIX) dlatrd.$(SUFFIX) dlatrs.$(SUFFIX) dlatrz.$(SUFFIX) dlatzm.$(SUFFIX) \
+ dopgtr.$(SUFFIX) dopmtr.$(SUFFIX) dorg2l.$(SUFFIX) dorg2r.$(SUFFIX) \
+ dorgbr.$(SUFFIX) dorghr.$(SUFFIX) dorgl2.$(SUFFIX) dorglq.$(SUFFIX) dorgql.$(SUFFIX) dorgqr.$(SUFFIX) dorgr2.$(SUFFIX) \
@@ -643,7 +643,7 @@ diff -ruN lapack-3.4.1.old/SRC/Makefile lapack-3.4.1/SRC/Makefile
+ zlarfg.$(SUFFIX) zlarft.$(SUFFIX) zlarfgp.$(SUFFIX) \
+ zlarfx.$(SUFFIX) zlargv.$(SUFFIX) zlarnv.$(SUFFIX) zlarrv.$(SUFFIX) zlartg.$(SUFFIX) zlartv.$(SUFFIX) \
+ zlarz.$(SUFFIX) zlarzb.$(SUFFIX) zlarzt.$(SUFFIX) zlascl.$(SUFFIX) zlaset.$(SUFFIX) zlasr.$(SUFFIX) \
+ zlassq.$(SUFFIX) zlaswp.$(SUFFIX) zlasyf.$(SUFFIX) \
+ zlassq.$(SUFFIX) zlasyf.$(SUFFIX) \
+ zlatbs.$(SUFFIX) zlatdf.$(SUFFIX) zlatps.$(SUFFIX) zlatrd.$(SUFFIX) zlatrs.$(SUFFIX) zlatrz.$(SUFFIX) zlatzm.$(SUFFIX) zlauu2.$(SUFFIX) \
+ zpbcon.$(SUFFIX) zpbequ.$(SUFFIX) zpbrfs.$(SUFFIX) zpbstf.$(SUFFIX) zpbsv.$(SUFFIX) \
+ zpbsvx.$(SUFFIX) zpbtf2.$(SUFFIX) zpbtrf.$(SUFFIX) zpbtrs.$(SUFFIX) zpocon.$(SUFFIX) zpoequ.$(SUFFIX) zporfs.$(SUFFIX) \

932
patch.for_lapack-3.4.2 Normal file
View File

@@ -0,0 +1,932 @@
diff -ruN lapack-3.4.2.old/INSTALL/Makefile lapack-3.4.2/INSTALL/Makefile
--- lapack-3.4.2.old/INSTALL/Makefile 2011-10-01 04:37:03 +0200
+++ lapack-3.4.2/INSTALL/Makefile 2012-04-22 21:48:48 +0200
@@ -27,7 +27,7 @@
$(LOADER) $(LOADOPTS) -o testversion ilaver.o LAPACK_version.o
clean:
- rm -f *.o
+ rm -f *.o test*
.f.o:
$(FORTRAN) $(OPTS) -c $< -o $@
diff -ruN lapack-3.4.2.old/Makefile lapack-3.4.2/Makefile
--- lapack-3.4.2.old/Makefile 2012-04-13 20:13:07 +0200
+++ lapack-3.4.2/Makefile 2012-04-22 21:48:07 +0200
@@ -20,9 +20,12 @@
blaslib:
( cd BLAS/SRC; $(MAKE) )
-lapacklib: lapack_install
+lapacklib:
( cd SRC; $(MAKE) )
+lapack_prof:
+ ( cd SRC; $(MAKE) lapack_prof)
+
lapackelib: lapacklib
( cd lapacke; $(MAKE) )
diff -ruN lapack-3.4.2.old/SRC/Makefile lapack-3.4.2/SRC/Makefile
--- lapack-3.4.2.old/SRC/Makefile 2012-04-02 21:06:36 +0200
+++ lapack-3.4.2/SRC/Makefile 2012-04-22 21:40:21 +0200
@@ -54,363 +54,371 @@
#
#######################################################################
-ALLAUX = ilaenv.o ieeeck.o lsamen.o xerbla.o xerbla_array.o iparmq.o \
- ilaprec.o ilatrans.o ilauplo.o iladiag.o chla_transtype.o \
- ../INSTALL/ilaver.o ../INSTALL/lsame.o ../INSTALL/slamch.o
+ALLAUX = ilaenv.$(SUFFIX) ieeeck.$(SUFFIX) lsamen.$(SUFFIX) xerbla_array.$(SUFFIX) iparmq.$(SUFFIX) \
+ ilaprec.$(SUFFIX) ilatrans.$(SUFFIX) ilauplo.$(SUFFIX) iladiag.$(SUFFIX) chla_transtype.$(SUFFIX) \
+ ../INSTALL/ilaver.$(SUFFIX)
SCLAUX = \
- sbdsdc.o \
- sbdsqr.o sdisna.o slabad.o slacpy.o sladiv.o slae2.o slaebz.o \
- slaed0.o slaed1.o slaed2.o slaed3.o slaed4.o slaed5.o slaed6.o \
- slaed7.o slaed8.o slaed9.o slaeda.o slaev2.o slagtf.o \
- slagts.o slamrg.o slanst.o \
- slapy2.o slapy3.o slarnv.o \
- slarra.o slarrb.o slarrc.o slarrd.o slarre.o slarrf.o slarrj.o \
- slarrk.o slarrr.o slaneg.o \
- slartg.o slaruv.o slas2.o slascl.o \
- slasd0.o slasd1.o slasd2.o slasd3.o slasd4.o slasd5.o slasd6.o \
- slasd7.o slasd8.o slasda.o slasdq.o slasdt.o \
- slaset.o slasq1.o slasq2.o slasq3.o slasq4.o slasq5.o slasq6.o \
- slasr.o slasrt.o slassq.o slasv2.o spttrf.o sstebz.o sstedc.o \
- ssteqr.o ssterf.o slaisnan.o sisnan.o \
- slartgp.o slartgs.o \
- ../INSTALL/second_$(TIMER).o
+ sbdsdc.$(SUFFIX) \
+ sbdsqr.$(SUFFIX) sdisna.$(SUFFIX) slabad.$(SUFFIX) slacpy.$(SUFFIX) sladiv.$(SUFFIX) slae2.$(SUFFIX) slaebz.$(SUFFIX) \
+ slaed0.$(SUFFIX) slaed1.$(SUFFIX) slaed2.$(SUFFIX) slaed3.$(SUFFIX) slaed4.$(SUFFIX) slaed5.$(SUFFIX) slaed6.$(SUFFIX) \
+ slaed7.$(SUFFIX) slaed8.$(SUFFIX) slaed9.$(SUFFIX) slaeda.$(SUFFIX) slaev2.$(SUFFIX) slagtf.$(SUFFIX) \
+ slagts.$(SUFFIX) slamrg.$(SUFFIX) slanst.$(SUFFIX) \
+ slapy2.$(SUFFIX) slapy3.$(SUFFIX) slarnv.$(SUFFIX) \
+ slarra.$(SUFFIX) slarrb.$(SUFFIX) slarrc.$(SUFFIX) slarrd.$(SUFFIX) slarre.$(SUFFIX) slarrf.$(SUFFIX) slarrj.$(SUFFIX) \
+ slarrk.$(SUFFIX) slarrr.$(SUFFIX) slaneg.$(SUFFIX) \
+ slartg.$(SUFFIX) slaruv.$(SUFFIX) slas2.$(SUFFIX) slascl.$(SUFFIX) \
+ slasd0.$(SUFFIX) slasd1.$(SUFFIX) slasd2.$(SUFFIX) slasd3.$(SUFFIX) slasd4.$(SUFFIX) slasd5.$(SUFFIX) slasd6.$(SUFFIX) \
+ slasd7.$(SUFFIX) slasd8.$(SUFFIX) slasda.$(SUFFIX) slasdq.$(SUFFIX) slasdt.$(SUFFIX) \
+ slaset.$(SUFFIX) slasq1.$(SUFFIX) slasq2.$(SUFFIX) slasq3.$(SUFFIX) slasq4.$(SUFFIX) slasq5.$(SUFFIX) slasq6.$(SUFFIX) \
+ slasr.$(SUFFIX) slasrt.$(SUFFIX) slassq.$(SUFFIX) slasv2.$(SUFFIX) spttrf.$(SUFFIX) sstebz.$(SUFFIX) sstedc.$(SUFFIX) \
+ ssteqr.$(SUFFIX) ssterf.$(SUFFIX) slaisnan.$(SUFFIX) sisnan.$(SUFFIX) \
+ slartgp.$(SUFFIX) slartgs.$(SUFFIX) \
+ ../INSTALL/second_$(TIMER).$(SUFFIX)
DZLAUX = \
- dbdsdc.o \
- dbdsqr.o ddisna.o dlabad.o dlacpy.o dladiv.o dlae2.o dlaebz.o \
- dlaed0.o dlaed1.o dlaed2.o dlaed3.o dlaed4.o dlaed5.o dlaed6.o \
- dlaed7.o dlaed8.o dlaed9.o dlaeda.o dlaev2.o dlagtf.o \
- dlagts.o dlamrg.o dlanst.o \
- dlapy2.o dlapy3.o dlarnv.o \
- dlarra.o dlarrb.o dlarrc.o dlarrd.o dlarre.o dlarrf.o dlarrj.o \
- dlarrk.o dlarrr.o dlaneg.o \
- dlartg.o dlaruv.o dlas2.o dlascl.o \
- dlasd0.o dlasd1.o dlasd2.o dlasd3.o dlasd4.o dlasd5.o dlasd6.o \
- dlasd7.o dlasd8.o dlasda.o dlasdq.o dlasdt.o \
- dlaset.o dlasq1.o dlasq2.o dlasq3.o dlasq4.o dlasq5.o dlasq6.o \
- dlasr.o dlasrt.o dlassq.o dlasv2.o dpttrf.o dstebz.o dstedc.o \
- dsteqr.o dsterf.o dlaisnan.o disnan.o \
- dlartgp.o dlartgs.o \
- ../INSTALL/dlamch.o ../INSTALL/dsecnd_$(TIMER).o
+ dbdsdc.$(SUFFIX) \
+ dbdsqr.$(SUFFIX) ddisna.$(SUFFIX) dlabad.$(SUFFIX) dlacpy.$(SUFFIX) dladiv.$(SUFFIX) dlae2.$(SUFFIX) dlaebz.$(SUFFIX) \
+ dlaed0.$(SUFFIX) dlaed1.$(SUFFIX) dlaed2.$(SUFFIX) dlaed3.$(SUFFIX) dlaed4.$(SUFFIX) dlaed5.$(SUFFIX) dlaed6.$(SUFFIX) \
+ dlaed7.$(SUFFIX) dlaed8.$(SUFFIX) dlaed9.$(SUFFIX) dlaeda.$(SUFFIX) dlaev2.$(SUFFIX) dlagtf.$(SUFFIX) \
+ dlagts.$(SUFFIX) dlamrg.$(SUFFIX) dlanst.$(SUFFIX) \
+ dlapy2.$(SUFFIX) dlapy3.$(SUFFIX) dlarnv.$(SUFFIX) \
+ dlarra.$(SUFFIX) dlarrb.$(SUFFIX) dlarrc.$(SUFFIX) dlarrd.$(SUFFIX) dlarre.$(SUFFIX) dlarrf.$(SUFFIX) dlarrj.$(SUFFIX) \
+ dlarrk.$(SUFFIX) dlarrr.$(SUFFIX) dlaneg.$(SUFFIX) \
+ dlartg.$(SUFFIX) dlaruv.$(SUFFIX) dlas2.$(SUFFIX) dlascl.$(SUFFIX) \
+ dlasd0.$(SUFFIX) dlasd1.$(SUFFIX) dlasd2.$(SUFFIX) dlasd3.$(SUFFIX) dlasd4.$(SUFFIX) dlasd5.$(SUFFIX) dlasd6.$(SUFFIX) \
+ dlasd7.$(SUFFIX) dlasd8.$(SUFFIX) dlasda.$(SUFFIX) dlasdq.$(SUFFIX) dlasdt.$(SUFFIX) \
+ dlaset.$(SUFFIX) dlasq1.$(SUFFIX) dlasq2.$(SUFFIX) dlasq3.$(SUFFIX) dlasq4.$(SUFFIX) dlasq5.$(SUFFIX) dlasq6.$(SUFFIX) \
+ dlasr.$(SUFFIX) dlasrt.$(SUFFIX) dlassq.$(SUFFIX) dlasv2.$(SUFFIX) dpttrf.$(SUFFIX) dstebz.$(SUFFIX) dstedc.$(SUFFIX) \
+ dsteqr.$(SUFFIX) dsterf.$(SUFFIX) dlaisnan.$(SUFFIX) disnan.$(SUFFIX) \
+ dlartgp.$(SUFFIX) dlartgs.$(SUFFIX) \
+ ../INSTALL/dsecnd_$(TIMER).$(SUFFIX)
SLASRC = \
- sgbbrd.o sgbcon.o sgbequ.o sgbrfs.o sgbsv.o \
- sgbsvx.o sgbtf2.o sgbtrf.o sgbtrs.o sgebak.o sgebal.o sgebd2.o \
- sgebrd.o sgecon.o sgeequ.o sgees.o sgeesx.o sgeev.o sgeevx.o \
- sgegs.o sgegv.o sgehd2.o sgehrd.o sgelq2.o sgelqf.o \
- sgels.o sgelsd.o sgelss.o sgelsx.o sgelsy.o sgeql2.o sgeqlf.o \
- sgeqp3.o sgeqpf.o sgeqr2.o sgeqr2p.o sgeqrf.o sgeqrfp.o sgerfs.o \
- sgerq2.o sgerqf.o sgesc2.o sgesdd.o sgesv.o sgesvd.o sgesvx.o \
- sgetc2.o sgetf2.o sgetri.o \
- sggbak.o sggbal.o sgges.o sggesx.o sggev.o sggevx.o \
- sggglm.o sgghrd.o sgglse.o sggqrf.o \
- sggrqf.o sggsvd.o sggsvp.o sgtcon.o sgtrfs.o sgtsv.o \
- sgtsvx.o sgttrf.o sgttrs.o sgtts2.o shgeqz.o \
- shsein.o shseqr.o slabrd.o slacon.o slacn2.o \
- slaein.o slaexc.o slag2.o slags2.o slagtm.o slagv2.o slahqr.o \
- slahrd.o slahr2.o slaic1.o slaln2.o slals0.o slalsa.o slalsd.o \
- slangb.o slange.o slangt.o slanhs.o slansb.o slansp.o \
- slansy.o slantb.o slantp.o slantr.o slanv2.o \
- slapll.o slapmt.o \
- slaqgb.o slaqge.o slaqp2.o slaqps.o slaqsb.o slaqsp.o slaqsy.o \
- slaqr0.o slaqr1.o slaqr2.o slaqr3.o slaqr4.o slaqr5.o \
- slaqtr.o slar1v.o slar2v.o ilaslr.o ilaslc.o \
- slarf.o slarfb.o slarfg.o slarfgp.o slarft.o slarfx.o slargv.o \
- slarrv.o slartv.o \
- slarz.o slarzb.o slarzt.o slaswp.o slasy2.o slasyf.o \
- slatbs.o slatdf.o slatps.o slatrd.o slatrs.o slatrz.o slatzm.o \
- slauu2.o slauum.o sopgtr.o sopmtr.o sorg2l.o sorg2r.o \
- sorgbr.o sorghr.o sorgl2.o sorglq.o sorgql.o sorgqr.o sorgr2.o \
- sorgrq.o sorgtr.o sorm2l.o sorm2r.o \
- sormbr.o sormhr.o sorml2.o sormlq.o sormql.o sormqr.o sormr2.o \
- sormr3.o sormrq.o sormrz.o sormtr.o spbcon.o spbequ.o spbrfs.o \
- spbstf.o spbsv.o spbsvx.o \
- spbtf2.o spbtrf.o spbtrs.o spocon.o spoequ.o sporfs.o sposv.o \
- sposvx.o spotf2.o spotri.o spstrf.o spstf2.o \
- sppcon.o sppequ.o \
- spprfs.o sppsv.o sppsvx.o spptrf.o spptri.o spptrs.o sptcon.o \
- spteqr.o sptrfs.o sptsv.o sptsvx.o spttrs.o sptts2.o srscl.o \
- ssbev.o ssbevd.o ssbevx.o ssbgst.o ssbgv.o ssbgvd.o ssbgvx.o \
- ssbtrd.o sspcon.o sspev.o sspevd.o sspevx.o sspgst.o \
- sspgv.o sspgvd.o sspgvx.o ssprfs.o sspsv.o sspsvx.o ssptrd.o \
- ssptrf.o ssptri.o ssptrs.o sstegr.o sstein.o sstev.o sstevd.o sstevr.o \
- sstevx.o \
- ssycon.o ssyev.o ssyevd.o ssyevr.o ssyevx.o ssygs2.o \
- ssygst.o ssygv.o ssygvd.o ssygvx.o ssyrfs.o ssysv.o ssysvx.o \
- ssytd2.o ssytf2.o ssytrd.o ssytrf.o ssytri.o ssytri2.o ssytri2x.o \
- ssyswapr.o ssytrs.o ssytrs2.o ssyconv.o \
- stbcon.o \
- stbrfs.o stbtrs.o stgevc.o stgex2.o stgexc.o stgsen.o \
- stgsja.o stgsna.o stgsy2.o stgsyl.o stpcon.o stprfs.o stptri.o \
- stptrs.o \
- strcon.o strevc.o strexc.o strrfs.o strsen.o strsna.o strsyl.o \
- strti2.o strtri.o strtrs.o stzrqf.o stzrzf.o sstemr.o \
- slansf.o spftrf.o spftri.o spftrs.o ssfrk.o stfsm.o stftri.o stfttp.o \
- stfttr.o stpttf.o stpttr.o strttf.o strttp.o \
- sgejsv.o sgesvj.o sgsvj0.o sgsvj1.o \
- sgeequb.o ssyequb.o spoequb.o sgbequb.o \
- sbbcsd.o slapmr.o sorbdb.o sorcsd.o \
- sgeqrt.o sgeqrt2.o sgeqrt3.o sgemqrt.o \
- stpqrt.o stpqrt2.o stpmqrt.o stprfb.o
+ sgbbrd.$(SUFFIX) sgbcon.$(SUFFIX) sgbequ.$(SUFFIX) sgbrfs.$(SUFFIX) sgbsv.$(SUFFIX) \
+ sgbsvx.$(SUFFIX) sgbtf2.$(SUFFIX) sgbtrf.$(SUFFIX) sgbtrs.$(SUFFIX) sgebak.$(SUFFIX) sgebal.$(SUFFIX) sgebd2.$(SUFFIX) \
+ sgebrd.$(SUFFIX) sgecon.$(SUFFIX) sgeequ.$(SUFFIX) sgees.$(SUFFIX) sgeesx.$(SUFFIX) sgeev.$(SUFFIX) sgeevx.$(SUFFIX) \
+ sgegs.$(SUFFIX) sgegv.$(SUFFIX) sgehd2.$(SUFFIX) sgehrd.$(SUFFIX) sgelq2.$(SUFFIX) sgelqf.$(SUFFIX) \
+ sgels.$(SUFFIX) sgelsd.$(SUFFIX) sgelss.$(SUFFIX) sgelsx.$(SUFFIX) sgelsy.$(SUFFIX) sgeql2.$(SUFFIX) sgeqlf.$(SUFFIX) \
+ sgeqp3.$(SUFFIX) sgeqpf.$(SUFFIX) sgeqr2.$(SUFFIX) sgeqr2p.$(SUFFIX) sgeqrf.$(SUFFIX) sgeqrfp.$(SUFFIX) sgerfs.$(SUFFIX) \
+ sgerq2.$(SUFFIX) sgerqf.$(SUFFIX) sgesc2.$(SUFFIX) sgesdd.$(SUFFIX) sgesv.$(SUFFIX) sgesvd.$(SUFFIX) sgesvx.$(SUFFIX) \
+ sgetc2.$(SUFFIX) sgetri.$(SUFFIX) \
+ sggbak.$(SUFFIX) sggbal.$(SUFFIX) sgges.$(SUFFIX) sggesx.$(SUFFIX) sggev.$(SUFFIX) sggevx.$(SUFFIX) \
+ sggglm.$(SUFFIX) sgghrd.$(SUFFIX) sgglse.$(SUFFIX) sggqrf.$(SUFFIX) \
+ sggrqf.$(SUFFIX) sggsvd.$(SUFFIX) sggsvp.$(SUFFIX) sgtcon.$(SUFFIX) sgtrfs.$(SUFFIX) sgtsv.$(SUFFIX) \
+ sgtsvx.$(SUFFIX) sgttrf.$(SUFFIX) sgttrs.$(SUFFIX) sgtts2.$(SUFFIX) shgeqz.$(SUFFIX) \
+ shsein.$(SUFFIX) shseqr.$(SUFFIX) slabrd.$(SUFFIX) slacon.$(SUFFIX) slacn2.$(SUFFIX) \
+ slaein.$(SUFFIX) slaexc.$(SUFFIX) slag2.$(SUFFIX) slags2.$(SUFFIX) slagtm.$(SUFFIX) slagv2.$(SUFFIX) slahqr.$(SUFFIX) \
+ slahrd.$(SUFFIX) slahr2.$(SUFFIX) slaic1.$(SUFFIX) slaln2.$(SUFFIX) slals0.$(SUFFIX) slalsa.$(SUFFIX) slalsd.$(SUFFIX) \
+ slangb.$(SUFFIX) slange.$(SUFFIX) slangt.$(SUFFIX) slanhs.$(SUFFIX) slansb.$(SUFFIX) slansp.$(SUFFIX) \
+ slansy.$(SUFFIX) slantb.$(SUFFIX) slantp.$(SUFFIX) slantr.$(SUFFIX) slanv2.$(SUFFIX) \
+ slapll.$(SUFFIX) slapmt.$(SUFFIX) \
+ slaqgb.$(SUFFIX) slaqge.$(SUFFIX) slaqp2.$(SUFFIX) slaqps.$(SUFFIX) slaqsb.$(SUFFIX) slaqsp.$(SUFFIX) slaqsy.$(SUFFIX) \
+ slaqr0.$(SUFFIX) slaqr1.$(SUFFIX) slaqr2.$(SUFFIX) slaqr3.$(SUFFIX) slaqr4.$(SUFFIX) slaqr5.$(SUFFIX) \
+ slaqtr.$(SUFFIX) slar1v.$(SUFFIX) slar2v.$(SUFFIX) ilaslr.$(SUFFIX) ilaslc.$(SUFFIX) \
+ slarf.$(SUFFIX) slarfb.$(SUFFIX) slarfg.$(SUFFIX) slarfgp.$(SUFFIX) slarft.$(SUFFIX) slarfx.$(SUFFIX) slargv.$(SUFFIX) \
+ slarrv.$(SUFFIX) slartv.$(SUFFIX) \
+ slarz.$(SUFFIX) slarzb.$(SUFFIX) slarzt.$(SUFFIX) slasy2.$(SUFFIX) slasyf.$(SUFFIX) \
+ slatbs.$(SUFFIX) slatdf.$(SUFFIX) slatps.$(SUFFIX) slatrd.$(SUFFIX) slatrs.$(SUFFIX) slatrz.$(SUFFIX) slatzm.$(SUFFIX) \
+ sopgtr.$(SUFFIX) sopmtr.$(SUFFIX) sorg2l.$(SUFFIX) sorg2r.$(SUFFIX) \
+ sorgbr.$(SUFFIX) sorghr.$(SUFFIX) sorgl2.$(SUFFIX) sorglq.$(SUFFIX) sorgql.$(SUFFIX) sorgqr.$(SUFFIX) sorgr2.$(SUFFIX) \
+ sorgrq.$(SUFFIX) sorgtr.$(SUFFIX) sorm2l.$(SUFFIX) sorm2r.$(SUFFIX) \
+ sormbr.$(SUFFIX) sormhr.$(SUFFIX) sorml2.$(SUFFIX) sormlq.$(SUFFIX) sormql.$(SUFFIX) sormqr.$(SUFFIX) sormr2.$(SUFFIX) \
+ sormr3.$(SUFFIX) sormrq.$(SUFFIX) sormrz.$(SUFFIX) sormtr.$(SUFFIX) spbcon.$(SUFFIX) spbequ.$(SUFFIX) spbrfs.$(SUFFIX) \
+ spbstf.$(SUFFIX) spbsv.$(SUFFIX) spbsvx.$(SUFFIX) \
+ spbtf2.$(SUFFIX) spbtrf.$(SUFFIX) spbtrs.$(SUFFIX) spocon.$(SUFFIX) spoequ.$(SUFFIX) sporfs.$(SUFFIX) sposv.$(SUFFIX) \
+ sposvx.$(SUFFIX) spotri.$(SUFFIX) spstrf.$(SUFFIX) spstf2.$(SUFFIX) \
+ sppcon.$(SUFFIX) sppequ.$(SUFFIX) \
+ spprfs.$(SUFFIX) sppsv.$(SUFFIX) sppsvx.$(SUFFIX) spptrf.$(SUFFIX) spptri.$(SUFFIX) spptrs.$(SUFFIX) sptcon.$(SUFFIX) \
+ spteqr.$(SUFFIX) sptrfs.$(SUFFIX) sptsv.$(SUFFIX) sptsvx.$(SUFFIX) spttrs.$(SUFFIX) sptts2.$(SUFFIX) srscl.$(SUFFIX) \
+ ssbev.$(SUFFIX) ssbevd.$(SUFFIX) ssbevx.$(SUFFIX) ssbgst.$(SUFFIX) ssbgv.$(SUFFIX) ssbgvd.$(SUFFIX) ssbgvx.$(SUFFIX) \
+ ssbtrd.$(SUFFIX) sspcon.$(SUFFIX) sspev.$(SUFFIX) sspevd.$(SUFFIX) sspevx.$(SUFFIX) sspgst.$(SUFFIX) \
+ sspgv.$(SUFFIX) sspgvd.$(SUFFIX) sspgvx.$(SUFFIX) ssprfs.$(SUFFIX) sspsv.$(SUFFIX) sspsvx.$(SUFFIX) ssptrd.$(SUFFIX) \
+ ssptrf.$(SUFFIX) ssptri.$(SUFFIX) ssptrs.$(SUFFIX) sstegr.$(SUFFIX) sstein.$(SUFFIX) sstev.$(SUFFIX) sstevd.$(SUFFIX) sstevr.$(SUFFIX) \
+ sstevx.$(SUFFIX) \
+ ssycon.$(SUFFIX) ssyev.$(SUFFIX) ssyevd.$(SUFFIX) ssyevr.$(SUFFIX) ssyevx.$(SUFFIX) ssygs2.$(SUFFIX) \
+ ssygst.$(SUFFIX) ssygv.$(SUFFIX) ssygvd.$(SUFFIX) ssygvx.$(SUFFIX) ssyrfs.$(SUFFIX) ssysv.$(SUFFIX) ssysvx.$(SUFFIX) \
+ ssytd2.$(SUFFIX) ssytf2.$(SUFFIX) ssytrd.$(SUFFIX) ssytrf.$(SUFFIX) ssytri.$(SUFFIX) ssytri2.$(SUFFIX) ssytri2x.$(SUFFIX) \
+ ssyswapr.$(SUFFIX) ssytrs.$(SUFFIX) ssytrs2.$(SUFFIX) ssyconv.$(SUFFIX) \
+ stbcon.$(SUFFIX) \
+ stbrfs.$(SUFFIX) stbtrs.$(SUFFIX) stgevc.$(SUFFIX) stgex2.$(SUFFIX) stgexc.$(SUFFIX) stgsen.$(SUFFIX) \
+ stgsja.$(SUFFIX) stgsna.$(SUFFIX) stgsy2.$(SUFFIX) stgsyl.$(SUFFIX) stpcon.$(SUFFIX) stprfs.$(SUFFIX) stptri.$(SUFFIX) \
+ stptrs.$(SUFFIX) \
+ strcon.$(SUFFIX) strevc.$(SUFFIX) strexc.$(SUFFIX) strrfs.$(SUFFIX) strsen.$(SUFFIX) strsna.$(SUFFIX) strsyl.$(SUFFIX) \
+ strtrs.$(SUFFIX) stzrqf.$(SUFFIX) stzrzf.$(SUFFIX) sstemr.$(SUFFIX) \
+ slansf.$(SUFFIX) spftrf.$(SUFFIX) spftri.$(SUFFIX) spftrs.$(SUFFIX) ssfrk.$(SUFFIX) stfsm.$(SUFFIX) stftri.$(SUFFIX) stfttp.$(SUFFIX) \
+ stfttr.$(SUFFIX) stpttf.$(SUFFIX) stpttr.$(SUFFIX) strttf.$(SUFFIX) strttp.$(SUFFIX) \
+ sgejsv.$(SUFFIX) sgesvj.$(SUFFIX) sgsvj0.$(SUFFIX) sgsvj1.$(SUFFIX) \
+ sgeequb.$(SUFFIX) ssyequb.$(SUFFIX) spoequb.$(SUFFIX) sgbequb.$(SUFFIX) \
+ sbbcsd.$(SUFFIX) slapmr.$(SUFFIX) sorbdb.$(SUFFIX) sorcsd.$(SUFFIX) \
+ sgeqrt.$(SUFFIX) sgeqrt2.$(SUFFIX) sgeqrt3.$(SUFFIX) sgemqrt.$(SUFFIX) \
+ stpqrt.$(SUFFIX) stpqrt2.$(SUFFIX) stpmqrt.$(SUFFIX) stprfb.$(SUFFIX)
-DSLASRC = spotrs.o sgetrs.o spotrf.o sgetrf.o
+DSLASRC = spotrs.$(SUFFIX)
ifdef USEXBLAS
-SXLASRC = sgesvxx.o sgerfsx.o sla_gerfsx_extended.o sla_geamv.o \
- sla_gercond.o sla_gerpvgrw.o ssysvxx.o ssyrfsx.o \
- sla_syrfsx_extended.o sla_syamv.o sla_syrcond.o sla_syrpvgrw.o \
- sposvxx.o sporfsx.o sla_porfsx_extended.o sla_porcond.o \
- sla_porpvgrw.o sgbsvxx.o sgbrfsx.o sla_gbrfsx_extended.o \
- sla_gbamv.o sla_gbrcond.o sla_gbrpvgrw.o sla_lin_berr.o slarscl2.o \
- slascl2.o sla_wwaddw.o
+SXLASRC = sgesvxx.$(SUFFIX) sgerfsx.$(SUFFIX) sla_gerfsx_extended.$(SUFFIX) sla_geamv.$(SUFFIX) \
+ sla_gercond.$(SUFFIX) sla_gerpvgrw.$(SUFFIX) ssysvxx.$(SUFFIX) ssyrfsx.$(SUFFIX) \
+ sla_syrfsx_extended.$(SUFFIX) sla_syamv.$(SUFFIX) sla_syrcond.$(SUFFIX) sla_syrpvgrw.$(SUFFIX) \
+ sposvxx.$(SUFFIX) sporfsx.$(SUFFIX) sla_porfsx_extended.$(SUFFIX) sla_porcond.$(SUFFIX) \
+ sla_porpvgrw.$(SUFFIX) sgbsvxx.$(SUFFIX) sgbrfsx.$(SUFFIX) sla_gbrfsx_extended.$(SUFFIX) \
+ sla_gbamv.$(SUFFIX) sla_gbrcond.$(SUFFIX) sla_gbrpvgrw.$(SUFFIX) sla_lin_berr.$(SUFFIX) slarscl2.$(SUFFIX) \
+ slascl2.$(SUFFIX) sla_wwaddw.$(SUFFIX)
endif
CLASRC = \
- cbdsqr.o cgbbrd.o cgbcon.o cgbequ.o cgbrfs.o cgbsv.o cgbsvx.o \
- cgbtf2.o cgbtrf.o cgbtrs.o cgebak.o cgebal.o cgebd2.o cgebrd.o \
- cgecon.o cgeequ.o cgees.o cgeesx.o cgeev.o cgeevx.o \
- cgegs.o cgegv.o cgehd2.o cgehrd.o cgelq2.o cgelqf.o \
- cgels.o cgelsd.o cgelss.o cgelsx.o cgelsy.o cgeql2.o cgeqlf.o cgeqp3.o \
- cgeqpf.o cgeqr2.o cgeqr2p.o cgeqrf.o cgeqrfp.o cgerfs.o \
- cgerq2.o cgerqf.o cgesc2.o cgesdd.o cgesv.o cgesvd.o \
- cgesvx.o cgetc2.o cgetf2.o cgetri.o \
- cggbak.o cggbal.o cgges.o cggesx.o cggev.o cggevx.o cggglm.o \
- cgghrd.o cgglse.o cggqrf.o cggrqf.o \
- cggsvd.o cggsvp.o \
- cgtcon.o cgtrfs.o cgtsv.o cgtsvx.o cgttrf.o cgttrs.o cgtts2.o chbev.o \
- chbevd.o chbevx.o chbgst.o chbgv.o chbgvd.o chbgvx.o chbtrd.o \
- checon.o cheev.o cheevd.o cheevr.o cheevx.o chegs2.o chegst.o \
- chegv.o chegvd.o chegvx.o cherfs.o chesv.o chesvx.o chetd2.o \
- chetf2.o chetrd.o \
- chetrf.o chetri.o chetri2.o chetri2x.o cheswapr.o \
- chetrs.o chetrs2.o chgeqz.o chpcon.o chpev.o chpevd.o \
- chpevx.o chpgst.o chpgv.o chpgvd.o chpgvx.o chprfs.o chpsv.o \
- chpsvx.o \
- chptrd.o chptrf.o chptri.o chptrs.o chsein.o chseqr.o clabrd.o \
- clacgv.o clacon.o clacn2.o clacp2.o clacpy.o clacrm.o clacrt.o cladiv.o \
- claed0.o claed7.o claed8.o \
- claein.o claesy.o claev2.o clags2.o clagtm.o \
- clahef.o clahqr.o \
- clahrd.o clahr2.o claic1.o clals0.o clalsa.o clalsd.o clangb.o clange.o clangt.o \
- clanhb.o clanhe.o \
- clanhp.o clanhs.o clanht.o clansb.o clansp.o clansy.o clantb.o \
- clantp.o clantr.o clapll.o clapmt.o clarcm.o claqgb.o claqge.o \
- claqhb.o claqhe.o claqhp.o claqp2.o claqps.o claqsb.o \
- claqr0.o claqr1.o claqr2.o claqr3.o claqr4.o claqr5.o \
- claqsp.o claqsy.o clar1v.o clar2v.o ilaclr.o ilaclc.o \
- clarf.o clarfb.o clarfg.o clarft.o clarfgp.o \
- clarfx.o clargv.o clarnv.o clarrv.o clartg.o clartv.o \
- clarz.o clarzb.o clarzt.o clascl.o claset.o clasr.o classq.o \
- claswp.o clasyf.o clatbs.o clatdf.o clatps.o clatrd.o clatrs.o clatrz.o \
- clatzm.o clauu2.o clauum.o cpbcon.o cpbequ.o cpbrfs.o cpbstf.o cpbsv.o \
- cpbsvx.o cpbtf2.o cpbtrf.o cpbtrs.o cpocon.o cpoequ.o cporfs.o \
- cposv.o cposvx.o cpotf2.o cpotri.o cpstrf.o cpstf2.o \
- cppcon.o cppequ.o cpprfs.o cppsv.o cppsvx.o cpptrf.o cpptri.o cpptrs.o \
- cptcon.o cpteqr.o cptrfs.o cptsv.o cptsvx.o cpttrf.o cpttrs.o cptts2.o \
- crot.o cspcon.o cspmv.o cspr.o csprfs.o cspsv.o \
- cspsvx.o csptrf.o csptri.o csptrs.o csrscl.o cstedc.o \
- cstegr.o cstein.o csteqr.o \
- csycon.o csymv.o \
- csyr.o csyrfs.o csysv.o csysvx.o csytf2.o csytrf.o csytri.o csytri2.o csytri2x.o \
- csyswapr.o csytrs.o csytrs2.o csyconv.o \
- ctbcon.o ctbrfs.o ctbtrs.o ctgevc.o ctgex2.o \
- ctgexc.o ctgsen.o ctgsja.o ctgsna.o ctgsy2.o ctgsyl.o ctpcon.o \
- ctprfs.o ctptri.o \
- ctptrs.o ctrcon.o ctrevc.o ctrexc.o ctrrfs.o ctrsen.o ctrsna.o \
- ctrsyl.o ctrti2.o ctrtri.o ctrtrs.o ctzrqf.o ctzrzf.o cung2l.o cung2r.o \
- cungbr.o cunghr.o cungl2.o cunglq.o cungql.o cungqr.o cungr2.o \
- cungrq.o cungtr.o cunm2l.o cunm2r.o cunmbr.o cunmhr.o cunml2.o \
- cunmlq.o cunmql.o cunmqr.o cunmr2.o cunmr3.o cunmrq.o cunmrz.o \
- cunmtr.o cupgtr.o cupmtr.o icmax1.o scsum1.o cstemr.o \
- chfrk.o ctfttp.o clanhf.o cpftrf.o cpftri.o cpftrs.o ctfsm.o ctftri.o \
- ctfttr.o ctpttf.o ctpttr.o ctrttf.o ctrttp.o \
- cgeequb.o cgbequb.o csyequb.o cpoequb.o cheequb.o \
- cbbcsd.o clapmr.o cunbdb.o cuncsd.o \
- cgeqrt.o cgeqrt2.o cgeqrt3.o cgemqrt.o \
- ctpqrt.o ctpqrt2.o ctpmqrt.o ctprfb.o
+ cbdsqr.$(SUFFIX) cgbbrd.$(SUFFIX) cgbcon.$(SUFFIX) cgbequ.$(SUFFIX) cgbrfs.$(SUFFIX) cgbsv.$(SUFFIX) cgbsvx.$(SUFFIX) \
+ cgbtf2.$(SUFFIX) cgbtrf.$(SUFFIX) cgbtrs.$(SUFFIX) cgebak.$(SUFFIX) cgebal.$(SUFFIX) cgebd2.$(SUFFIX) cgebrd.$(SUFFIX) \
+ cgecon.$(SUFFIX) cgeequ.$(SUFFIX) cgees.$(SUFFIX) cgeesx.$(SUFFIX) cgeev.$(SUFFIX) cgeevx.$(SUFFIX) \
+ cgegs.$(SUFFIX) cgegv.$(SUFFIX) cgehd2.$(SUFFIX) cgehrd.$(SUFFIX) cgelq2.$(SUFFIX) cgelqf.$(SUFFIX) \
+ cgels.$(SUFFIX) cgelsd.$(SUFFIX) cgelss.$(SUFFIX) cgelsx.$(SUFFIX) cgelsy.$(SUFFIX) cgeql2.$(SUFFIX) cgeqlf.$(SUFFIX) cgeqp3.$(SUFFIX) \
+ cgeqpf.$(SUFFIX) cgeqr2.$(SUFFIX) cgeqr2p.$(SUFFIX) cgeqrf.$(SUFFIX) cgeqrfp.$(SUFFIX) cgerfs.$(SUFFIX) \
+ cgerq2.$(SUFFIX) cgerqf.$(SUFFIX) cgesc2.$(SUFFIX) cgesdd.$(SUFFIX) cgesv.$(SUFFIX) cgesvd.$(SUFFIX) \
+ cgesvx.$(SUFFIX) cgetc2.$(SUFFIX) cgetri.$(SUFFIX) \
+ cggbak.$(SUFFIX) cggbal.$(SUFFIX) cgges.$(SUFFIX) cggesx.$(SUFFIX) cggev.$(SUFFIX) cggevx.$(SUFFIX) cggglm.$(SUFFIX) \
+ cgghrd.$(SUFFIX) cgglse.$(SUFFIX) cggqrf.$(SUFFIX) cggrqf.$(SUFFIX) \
+ cggsvd.$(SUFFIX) cggsvp.$(SUFFIX) \
+ cgtcon.$(SUFFIX) cgtrfs.$(SUFFIX) cgtsv.$(SUFFIX) cgtsvx.$(SUFFIX) cgttrf.$(SUFFIX) cgttrs.$(SUFFIX) cgtts2.$(SUFFIX) chbev.$(SUFFIX) \
+ chbevd.$(SUFFIX) chbevx.$(SUFFIX) chbgst.$(SUFFIX) chbgv.$(SUFFIX) chbgvd.$(SUFFIX) chbgvx.$(SUFFIX) chbtrd.$(SUFFIX) \
+ checon.$(SUFFIX) cheev.$(SUFFIX) cheevd.$(SUFFIX) cheevr.$(SUFFIX) cheevx.$(SUFFIX) chegs2.$(SUFFIX) chegst.$(SUFFIX) \
+ chegv.$(SUFFIX) chegvd.$(SUFFIX) chegvx.$(SUFFIX) cherfs.$(SUFFIX) chesv.$(SUFFIX) chesvx.$(SUFFIX) chetd2.$(SUFFIX) \
+ chetf2.$(SUFFIX) chetrd.$(SUFFIX) \
+ chetrf.$(SUFFIX) chetri.$(SUFFIX) chetri2.$(SUFFIX) chetri2x.$(SUFFIX) cheswapr.$(SUFFIX) \
+ chetrs.$(SUFFIX) chetrs2.$(SUFFIX) chgeqz.$(SUFFIX) chpcon.$(SUFFIX) chpev.$(SUFFIX) chpevd.$(SUFFIX) \
+ chpevx.$(SUFFIX) chpgst.$(SUFFIX) chpgv.$(SUFFIX) chpgvd.$(SUFFIX) chpgvx.$(SUFFIX) chprfs.$(SUFFIX) chpsv.$(SUFFIX) \
+ chpsvx.$(SUFFIX) \
+ chptrd.$(SUFFIX) chptrf.$(SUFFIX) chptri.$(SUFFIX) chptrs.$(SUFFIX) chsein.$(SUFFIX) chseqr.$(SUFFIX) clabrd.$(SUFFIX) \
+ clacgv.$(SUFFIX) clacon.$(SUFFIX) clacn2.$(SUFFIX) clacp2.$(SUFFIX) clacpy.$(SUFFIX) clacrm.$(SUFFIX) clacrt.$(SUFFIX) cladiv.$(SUFFIX) \
+ claed0.$(SUFFIX) claed7.$(SUFFIX) claed8.$(SUFFIX) \
+ claein.$(SUFFIX) claesy.$(SUFFIX) claev2.$(SUFFIX) clags2.$(SUFFIX) clagtm.$(SUFFIX) \
+ clahef.$(SUFFIX) clahqr.$(SUFFIX) \
+ clahrd.$(SUFFIX) clahr2.$(SUFFIX) claic1.$(SUFFIX) clals0.$(SUFFIX) clalsa.$(SUFFIX) clalsd.$(SUFFIX) clangb.$(SUFFIX) clange.$(SUFFIX) clangt.$(SUFFIX) \
+ clanhb.$(SUFFIX) clanhe.$(SUFFIX) \
+ clanhp.$(SUFFIX) clanhs.$(SUFFIX) clanht.$(SUFFIX) clansb.$(SUFFIX) clansp.$(SUFFIX) clansy.$(SUFFIX) clantb.$(SUFFIX) \
+ clantp.$(SUFFIX) clantr.$(SUFFIX) clapll.$(SUFFIX) clapmt.$(SUFFIX) clarcm.$(SUFFIX) claqgb.$(SUFFIX) claqge.$(SUFFIX) \
+ claqhb.$(SUFFIX) claqhe.$(SUFFIX) claqhp.$(SUFFIX) claqp2.$(SUFFIX) claqps.$(SUFFIX) claqsb.$(SUFFIX) \
+ claqr0.$(SUFFIX) claqr1.$(SUFFIX) claqr2.$(SUFFIX) claqr3.$(SUFFIX) claqr4.$(SUFFIX) claqr5.$(SUFFIX) \
+ claqsp.$(SUFFIX) claqsy.$(SUFFIX) clar1v.$(SUFFIX) clar2v.$(SUFFIX) ilaclr.$(SUFFIX) ilaclc.$(SUFFIX) \
+ clarf.$(SUFFIX) clarfb.$(SUFFIX) clarfg.$(SUFFIX) clarft.$(SUFFIX) clarfgp.$(SUFFIX) \
+ clarfx.$(SUFFIX) clargv.$(SUFFIX) clarnv.$(SUFFIX) clarrv.$(SUFFIX) clartg.$(SUFFIX) clartv.$(SUFFIX) \
+ clarz.$(SUFFIX) clarzb.$(SUFFIX) clarzt.$(SUFFIX) clascl.$(SUFFIX) claset.$(SUFFIX) clasr.$(SUFFIX) classq.$(SUFFIX) \
+ clasyf.$(SUFFIX) clatbs.$(SUFFIX) clatdf.$(SUFFIX) clatps.$(SUFFIX) clatrd.$(SUFFIX) clatrs.$(SUFFIX) clatrz.$(SUFFIX) \
+ clatzm.$(SUFFIX) cpbcon.$(SUFFIX) cpbequ.$(SUFFIX) cpbrfs.$(SUFFIX) cpbstf.$(SUFFIX) cpbsv.$(SUFFIX) \
+ cpbsvx.$(SUFFIX) cpbtf2.$(SUFFIX) cpbtrf.$(SUFFIX) cpbtrs.$(SUFFIX) cpocon.$(SUFFIX) cpoequ.$(SUFFIX) cporfs.$(SUFFIX) \
+ cposv.$(SUFFIX) cposvx.$(SUFFIX) cpotri.$(SUFFIX) cpstrf.$(SUFFIX) cpstf2.$(SUFFIX) \
+ cppcon.$(SUFFIX) cppequ.$(SUFFIX) cpprfs.$(SUFFIX) cppsv.$(SUFFIX) cppsvx.$(SUFFIX) cpptrf.$(SUFFIX) cpptri.$(SUFFIX) cpptrs.$(SUFFIX) \
+ cptcon.$(SUFFIX) cpteqr.$(SUFFIX) cptrfs.$(SUFFIX) cptsv.$(SUFFIX) cptsvx.$(SUFFIX) cpttrf.$(SUFFIX) cpttrs.$(SUFFIX) cptts2.$(SUFFIX) \
+ crot.$(SUFFIX) cspcon.$(SUFFIX) cspmv.$(SUFFIX) cspr.$(SUFFIX) csprfs.$(SUFFIX) cspsv.$(SUFFIX) \
+ cspsvx.$(SUFFIX) csptrf.$(SUFFIX) csptri.$(SUFFIX) csptrs.$(SUFFIX) csrscl.$(SUFFIX) cstedc.$(SUFFIX) \
+ cstegr.$(SUFFIX) cstein.$(SUFFIX) csteqr.$(SUFFIX) \
+ csycon.$(SUFFIX) csymv.$(SUFFIX) \
+ csyr.$(SUFFIX) csyrfs.$(SUFFIX) csysv.$(SUFFIX) csysvx.$(SUFFIX) csytf2.$(SUFFIX) csytrf.$(SUFFIX) csytri.$(SUFFIX) csytri2.$(SUFFIX) csytri2x.$(SUFFIX) \
+ csyswapr.$(SUFFIX) csytrs.$(SUFFIX) csytrs2.$(SUFFIX) csyconv.$(SUFFIX) \
+ ctbcon.$(SUFFIX) ctbrfs.$(SUFFIX) ctbtrs.$(SUFFIX) ctgevc.$(SUFFIX) ctgex2.$(SUFFIX) \
+ ctgexc.$(SUFFIX) ctgsen.$(SUFFIX) ctgsja.$(SUFFIX) ctgsna.$(SUFFIX) ctgsy2.$(SUFFIX) ctgsyl.$(SUFFIX) ctpcon.$(SUFFIX) \
+ ctprfs.$(SUFFIX) ctptri.$(SUFFIX) \
+ ctptrs.$(SUFFIX) ctrcon.$(SUFFIX) ctrevc.$(SUFFIX) ctrexc.$(SUFFIX) ctrrfs.$(SUFFIX) ctrsen.$(SUFFIX) ctrsna.$(SUFFIX) \
+ ctrsyl.$(SUFFIX) ctrtrs.$(SUFFIX) ctzrqf.$(SUFFIX) ctzrzf.$(SUFFIX) cung2l.$(SUFFIX) cung2r.$(SUFFIX) \
+ cungbr.$(SUFFIX) cunghr.$(SUFFIX) cungl2.$(SUFFIX) cunglq.$(SUFFIX) cungql.$(SUFFIX) cungqr.$(SUFFIX) cungr2.$(SUFFIX) \
+ cungrq.$(SUFFIX) cungtr.$(SUFFIX) cunm2l.$(SUFFIX) cunm2r.$(SUFFIX) cunmbr.$(SUFFIX) cunmhr.$(SUFFIX) cunml2.$(SUFFIX) \
+ cunmlq.$(SUFFIX) cunmql.$(SUFFIX) cunmqr.$(SUFFIX) cunmr2.$(SUFFIX) cunmr3.$(SUFFIX) cunmrq.$(SUFFIX) cunmrz.$(SUFFIX) \
+ cunmtr.$(SUFFIX) cupgtr.$(SUFFIX) cupmtr.$(SUFFIX) icmax1.$(SUFFIX) scsum1.$(SUFFIX) cstemr.$(SUFFIX) \
+ chfrk.$(SUFFIX) ctfttp.$(SUFFIX) clanhf.$(SUFFIX) cpftrf.$(SUFFIX) cpftri.$(SUFFIX) cpftrs.$(SUFFIX) ctfsm.$(SUFFIX) ctftri.$(SUFFIX) \
+ ctfttr.$(SUFFIX) ctpttf.$(SUFFIX) ctpttr.$(SUFFIX) ctrttf.$(SUFFIX) ctrttp.$(SUFFIX) \
+ cgeequb.$(SUFFIX) cgbequb.$(SUFFIX) csyequb.$(SUFFIX) cpoequb.$(SUFFIX) cheequb.$(SUFFIX) \
+ cbbcsd.$(SUFFIX) clapmr.$(SUFFIX) cunbdb.$(SUFFIX) cuncsd.$(SUFFIX) \
+ cgeqrt.$(SUFFIX) cgeqrt2.$(SUFFIX) cgeqrt3.$(SUFFIX) cgemqrt.$(SUFFIX) \
+ ctpqrt.$(SUFFIX) ctpqrt2.$(SUFFIX) ctpmqrt.$(SUFFIX) ctprfb.$(SUFFIX)
ifdef USEXBLAS
-CXLASRC = cgesvxx.o cgerfsx.o cla_gerfsx_extended.o cla_geamv.o \
- cla_gercond_c.o cla_gercond_x.o cla_gerpvgrw.o \
- csysvxx.o csyrfsx.o cla_syrfsx_extended.o cla_syamv.o \
- cla_syrcond_c.o cla_syrcond_x.o cla_syrpvgrw.o \
- cposvxx.o cporfsx.o cla_porfsx_extended.o \
- cla_porcond_c.o cla_porcond_x.o cla_porpvgrw.o \
- cgbsvxx.o cgbrfsx.o cla_gbrfsx_extended.o cla_gbamv.o \
- cla_gbrcond_c.o cla_gbrcond_x.o cla_gbrpvgrw.o \
- chesvxx.o cherfsx.o cla_herfsx_extended.o cla_heamv.o \
- cla_hercond_c.o cla_hercond_x.o cla_herpvgrw.o \
- cla_lin_berr.o clarscl2.o clascl2.o cla_wwaddw.o
+CXLASRC = cgesvxx.$(SUFFIX) cgerfsx.$(SUFFIX) cla_gerfsx_extended.$(SUFFIX) cla_geamv.$(SUFFIX) \
+ cla_gercond_c.$(SUFFIX) cla_gercond_x.$(SUFFIX) cla_gerpvgrw.$(SUFFIX) \
+ csysvxx.$(SUFFIX) csyrfsx.$(SUFFIX) cla_syrfsx_extended.$(SUFFIX) cla_syamv.$(SUFFIX) \
+ cla_syrcond_c.$(SUFFIX) cla_syrcond_x.$(SUFFIX) cla_syrpvgrw.$(SUFFIX) \
+ cposvxx.$(SUFFIX) cporfsx.$(SUFFIX) cla_porfsx_extended.$(SUFFIX) \
+ cla_porcond_c.$(SUFFIX) cla_porcond_x.$(SUFFIX) cla_porpvgrw.$(SUFFIX) \
+ cgbsvxx.$(SUFFIX) cgbrfsx.$(SUFFIX) cla_gbrfsx_extended.$(SUFFIX) cla_gbamv.$(SUFFIX) \
+ cla_gbrcond_c.$(SUFFIX) cla_gbrcond_x.$(SUFFIX) cla_gbrpvgrw.$(SUFFIX) \
+ chesvxx.$(SUFFIX) cherfsx.$(SUFFIX) cla_herfsx_extended.$(SUFFIX) cla_heamv.$(SUFFIX) \
+ cla_hercond_c.$(SUFFIX) cla_hercond_x.$(SUFFIX) cla_herpvgrw.$(SUFFIX) \
+ cla_lin_berr.$(SUFFIX) clarscl2.$(SUFFIX) clascl2.$(SUFFIX) cla_wwaddw.$(SUFFIX)
endif
-ZCLASRC = cpotrs.o cgetrs.o cpotrf.o cgetrf.o
+ZCLASRC = cpotrs.$(SUFFIX)
DLASRC = \
- dgbbrd.o dgbcon.o dgbequ.o dgbrfs.o dgbsv.o \
- dgbsvx.o dgbtf2.o dgbtrf.o dgbtrs.o dgebak.o dgebal.o dgebd2.o \
- dgebrd.o dgecon.o dgeequ.o dgees.o dgeesx.o dgeev.o dgeevx.o \
- dgegs.o dgegv.o dgehd2.o dgehrd.o dgelq2.o dgelqf.o \
- dgels.o dgelsd.o dgelss.o dgelsx.o dgelsy.o dgeql2.o dgeqlf.o \
- dgeqp3.o dgeqpf.o dgeqr2.o dgeqr2p.o dgeqrf.o dgeqrfp.o dgerfs.o \
- dgerq2.o dgerqf.o dgesc2.o dgesdd.o dgesv.o dgesvd.o dgesvx.o \
- dgetc2.o dgetf2.o dgetrf.o dgetri.o \
- dgetrs.o dggbak.o dggbal.o dgges.o dggesx.o dggev.o dggevx.o \
- dggglm.o dgghrd.o dgglse.o dggqrf.o \
- dggrqf.o dggsvd.o dggsvp.o dgtcon.o dgtrfs.o dgtsv.o \
- dgtsvx.o dgttrf.o dgttrs.o dgtts2.o dhgeqz.o \
- dhsein.o dhseqr.o dlabrd.o dlacon.o dlacn2.o \
- dlaein.o dlaexc.o dlag2.o dlags2.o dlagtm.o dlagv2.o dlahqr.o \
- dlahrd.o dlahr2.o dlaic1.o dlaln2.o dlals0.o dlalsa.o dlalsd.o \
- dlangb.o dlange.o dlangt.o dlanhs.o dlansb.o dlansp.o \
- dlansy.o dlantb.o dlantp.o dlantr.o dlanv2.o \
- dlapll.o dlapmt.o \
- dlaqgb.o dlaqge.o dlaqp2.o dlaqps.o dlaqsb.o dlaqsp.o dlaqsy.o \
- dlaqr0.o dlaqr1.o dlaqr2.o dlaqr3.o dlaqr4.o dlaqr5.o \
- dlaqtr.o dlar1v.o dlar2v.o iladlr.o iladlc.o \
- dlarf.o dlarfb.o dlarfg.o dlarfgp.o dlarft.o dlarfx.o \
- dlargv.o dlarrv.o dlartv.o \
- dlarz.o dlarzb.o dlarzt.o dlaswp.o dlasy2.o dlasyf.o \
- dlatbs.o dlatdf.o dlatps.o dlatrd.o dlatrs.o dlatrz.o dlatzm.o dlauu2.o \
- dlauum.o dopgtr.o dopmtr.o dorg2l.o dorg2r.o \
- dorgbr.o dorghr.o dorgl2.o dorglq.o dorgql.o dorgqr.o dorgr2.o \
- dorgrq.o dorgtr.o dorm2l.o dorm2r.o \
- dormbr.o dormhr.o dorml2.o dormlq.o dormql.o dormqr.o dormr2.o \
- dormr3.o dormrq.o dormrz.o dormtr.o dpbcon.o dpbequ.o dpbrfs.o \
- dpbstf.o dpbsv.o dpbsvx.o \
- dpbtf2.o dpbtrf.o dpbtrs.o dpocon.o dpoequ.o dporfs.o dposv.o \
- dposvx.o dpotf2.o dpotrf.o dpotri.o dpotrs.o dpstrf.o dpstf2.o \
- dppcon.o dppequ.o \
- dpprfs.o dppsv.o dppsvx.o dpptrf.o dpptri.o dpptrs.o dptcon.o \
- dpteqr.o dptrfs.o dptsv.o dptsvx.o dpttrs.o dptts2.o drscl.o \
- dsbev.o dsbevd.o dsbevx.o dsbgst.o dsbgv.o dsbgvd.o dsbgvx.o \
- dsbtrd.o dspcon.o dspev.o dspevd.o dspevx.o dspgst.o \
- dspgv.o dspgvd.o dspgvx.o dsprfs.o dspsv.o dspsvx.o dsptrd.o \
- dsptrf.o dsptri.o dsptrs.o dstegr.o dstein.o dstev.o dstevd.o dstevr.o \
- dstevx.o \
- dsycon.o dsyev.o dsyevd.o dsyevr.o \
- dsyevx.o dsygs2.o dsygst.o dsygv.o dsygvd.o dsygvx.o dsyrfs.o \
- dsysv.o dsysvx.o \
- dsytd2.o dsytf2.o dsytrd.o dsytrf.o dsytri.o dsytri2.o dsytri2x.o \
- dsyswapr.o dsytrs.o dsytrs2.o dsyconv.o \
- dtbcon.o dtbrfs.o dtbtrs.o dtgevc.o dtgex2.o dtgexc.o dtgsen.o \
- dtgsja.o dtgsna.o dtgsy2.o dtgsyl.o dtpcon.o dtprfs.o dtptri.o \
- dtptrs.o \
- dtrcon.o dtrevc.o dtrexc.o dtrrfs.o dtrsen.o dtrsna.o dtrsyl.o \
- dtrti2.o dtrtri.o dtrtrs.o dtzrqf.o dtzrzf.o dstemr.o \
- dsgesv.o dsposv.o dlag2s.o slag2d.o dlat2s.o \
- dlansf.o dpftrf.o dpftri.o dpftrs.o dsfrk.o dtfsm.o dtftri.o dtfttp.o \
- dtfttr.o dtpttf.o dtpttr.o dtrttf.o dtrttp.o \
- dgejsv.o dgesvj.o dgsvj0.o dgsvj1.o \
- dgeequb.o dsyequb.o dpoequb.o dgbequb.o \
- dbbcsd.o dlapmr.o dorbdb.o dorcsd.o \
- dgeqrt.o dgeqrt2.o dgeqrt3.o dgemqrt.o \
- dtpqrt.o dtpqrt2.o dtpmqrt.o dtprfb.o
+ dgbbrd.$(SUFFIX) dgbcon.$(SUFFIX) dgbequ.$(SUFFIX) dgbrfs.$(SUFFIX) dgbsv.$(SUFFIX) \
+ dgbsvx.$(SUFFIX) dgbtf2.$(SUFFIX) dgbtrf.$(SUFFIX) dgbtrs.$(SUFFIX) dgebak.$(SUFFIX) dgebal.$(SUFFIX) dgebd2.$(SUFFIX) \
+ dgebrd.$(SUFFIX) dgecon.$(SUFFIX) dgeequ.$(SUFFIX) dgees.$(SUFFIX) dgeesx.$(SUFFIX) dgeev.$(SUFFIX) dgeevx.$(SUFFIX) \
+ dgegs.$(SUFFIX) dgegv.$(SUFFIX) dgehd2.$(SUFFIX) dgehrd.$(SUFFIX) dgelq2.$(SUFFIX) dgelqf.$(SUFFIX) \
+ dgels.$(SUFFIX) dgelsd.$(SUFFIX) dgelss.$(SUFFIX) dgelsx.$(SUFFIX) dgelsy.$(SUFFIX) dgeql2.$(SUFFIX) dgeqlf.$(SUFFIX) \
+ dgeqp3.$(SUFFIX) dgeqpf.$(SUFFIX) dgeqr2.$(SUFFIX) dgeqr2p.$(SUFFIX) dgeqrf.$(SUFFIX) dgeqrfp.$(SUFFIX) dgerfs.$(SUFFIX) \
+ dgerq2.$(SUFFIX) dgerqf.$(SUFFIX) dgesc2.$(SUFFIX) dgesdd.$(SUFFIX) dgesv.$(SUFFIX) dgesvd.$(SUFFIX) dgesvx.$(SUFFIX) \
+ dgetc2.$(SUFFIX) dgetri.$(SUFFIX) \
+ dggbak.$(SUFFIX) dggbal.$(SUFFIX) dgges.$(SUFFIX) dggesx.$(SUFFIX) dggev.$(SUFFIX) dggevx.$(SUFFIX) \
+ dggglm.$(SUFFIX) dgghrd.$(SUFFIX) dgglse.$(SUFFIX) dggqrf.$(SUFFIX) \
+ dggrqf.$(SUFFIX) dggsvd.$(SUFFIX) dggsvp.$(SUFFIX) dgtcon.$(SUFFIX) dgtrfs.$(SUFFIX) dgtsv.$(SUFFIX) \
+ dgtsvx.$(SUFFIX) dgttrf.$(SUFFIX) dgttrs.$(SUFFIX) dgtts2.$(SUFFIX) dhgeqz.$(SUFFIX) \
+ dhsein.$(SUFFIX) dhseqr.$(SUFFIX) dlabrd.$(SUFFIX) dlacon.$(SUFFIX) dlacn2.$(SUFFIX) \
+ dlaein.$(SUFFIX) dlaexc.$(SUFFIX) dlag2.$(SUFFIX) dlags2.$(SUFFIX) dlagtm.$(SUFFIX) dlagv2.$(SUFFIX) dlahqr.$(SUFFIX) \
+ dlahrd.$(SUFFIX) dlahr2.$(SUFFIX) dlaic1.$(SUFFIX) dlaln2.$(SUFFIX) dlals0.$(SUFFIX) dlalsa.$(SUFFIX) dlalsd.$(SUFFIX) \
+ dlangb.$(SUFFIX) dlange.$(SUFFIX) dlangt.$(SUFFIX) dlanhs.$(SUFFIX) dlansb.$(SUFFIX) dlansp.$(SUFFIX) \
+ dlansy.$(SUFFIX) dlantb.$(SUFFIX) dlantp.$(SUFFIX) dlantr.$(SUFFIX) dlanv2.$(SUFFIX) \
+ dlapll.$(SUFFIX) dlapmt.$(SUFFIX) \
+ dlaqgb.$(SUFFIX) dlaqge.$(SUFFIX) dlaqp2.$(SUFFIX) dlaqps.$(SUFFIX) dlaqsb.$(SUFFIX) dlaqsp.$(SUFFIX) dlaqsy.$(SUFFIX) \
+ dlaqr0.$(SUFFIX) dlaqr1.$(SUFFIX) dlaqr2.$(SUFFIX) dlaqr3.$(SUFFIX) dlaqr4.$(SUFFIX) dlaqr5.$(SUFFIX) \
+ dlaqtr.$(SUFFIX) dlar1v.$(SUFFIX) dlar2v.$(SUFFIX) iladlr.$(SUFFIX) iladlc.$(SUFFIX) \
+ dlarf.$(SUFFIX) dlarfb.$(SUFFIX) dlarfg.$(SUFFIX) dlarfgp.$(SUFFIX) dlarft.$(SUFFIX) dlarfx.$(SUFFIX) \
+ dlargv.$(SUFFIX) dlarrv.$(SUFFIX) dlartv.$(SUFFIX) \
+ dlarz.$(SUFFIX) dlarzb.$(SUFFIX) dlarzt.$(SUFFIX) dlasy2.$(SUFFIX) dlasyf.$(SUFFIX) \
+ dlatbs.$(SUFFIX) dlatdf.$(SUFFIX) dlatps.$(SUFFIX) dlatrd.$(SUFFIX) dlatrs.$(SUFFIX) dlatrz.$(SUFFIX) dlatzm.$(SUFFIX) \
+ dopgtr.$(SUFFIX) dopmtr.$(SUFFIX) dorg2l.$(SUFFIX) dorg2r.$(SUFFIX) \
+ dorgbr.$(SUFFIX) dorghr.$(SUFFIX) dorgl2.$(SUFFIX) dorglq.$(SUFFIX) dorgql.$(SUFFIX) dorgqr.$(SUFFIX) dorgr2.$(SUFFIX) \
+ dorgrq.$(SUFFIX) dorgtr.$(SUFFIX) dorm2l.$(SUFFIX) dorm2r.$(SUFFIX) \
+ dormbr.$(SUFFIX) dormhr.$(SUFFIX) dorml2.$(SUFFIX) dormlq.$(SUFFIX) dormql.$(SUFFIX) dormqr.$(SUFFIX) dormr2.$(SUFFIX) \
+ dormr3.$(SUFFIX) dormrq.$(SUFFIX) dormrz.$(SUFFIX) dormtr.$(SUFFIX) dpbcon.$(SUFFIX) dpbequ.$(SUFFIX) dpbrfs.$(SUFFIX) \
+ dpbstf.$(SUFFIX) dpbsv.$(SUFFIX) dpbsvx.$(SUFFIX) \
+ dpbtf2.$(SUFFIX) dpbtrf.$(SUFFIX) dpbtrs.$(SUFFIX) dpocon.$(SUFFIX) dpoequ.$(SUFFIX) dporfs.$(SUFFIX) dposv.$(SUFFIX) \
+ dposvx.$(SUFFIX) dpotri.$(SUFFIX) dpotrs.$(SUFFIX) dpstrf.$(SUFFIX) dpstf2.$(SUFFIX) \
+ dppcon.$(SUFFIX) dppequ.$(SUFFIX) \
+ dpprfs.$(SUFFIX) dppsv.$(SUFFIX) dppsvx.$(SUFFIX) dpptrf.$(SUFFIX) dpptri.$(SUFFIX) dpptrs.$(SUFFIX) dptcon.$(SUFFIX) \
+ dpteqr.$(SUFFIX) dptrfs.$(SUFFIX) dptsv.$(SUFFIX) dptsvx.$(SUFFIX) dpttrs.$(SUFFIX) dptts2.$(SUFFIX) drscl.$(SUFFIX) \
+ dsbev.$(SUFFIX) dsbevd.$(SUFFIX) dsbevx.$(SUFFIX) dsbgst.$(SUFFIX) dsbgv.$(SUFFIX) dsbgvd.$(SUFFIX) dsbgvx.$(SUFFIX) \
+ dsbtrd.$(SUFFIX) dspcon.$(SUFFIX) dspev.$(SUFFIX) dspevd.$(SUFFIX) dspevx.$(SUFFIX) dspgst.$(SUFFIX) \
+ dspgv.$(SUFFIX) dspgvd.$(SUFFIX) dspgvx.$(SUFFIX) dsprfs.$(SUFFIX) dspsv.$(SUFFIX) dspsvx.$(SUFFIX) dsptrd.$(SUFFIX) \
+ dsptrf.$(SUFFIX) dsptri.$(SUFFIX) dsptrs.$(SUFFIX) dstegr.$(SUFFIX) dstein.$(SUFFIX) dstev.$(SUFFIX) dstevd.$(SUFFIX) dstevr.$(SUFFIX) \
+ dstevx.$(SUFFIX) \
+ dsycon.$(SUFFIX) dsyev.$(SUFFIX) dsyevd.$(SUFFIX) dsyevr.$(SUFFIX) \
+ dsyevx.$(SUFFIX) dsygs2.$(SUFFIX) dsygst.$(SUFFIX) dsygv.$(SUFFIX) dsygvd.$(SUFFIX) dsygvx.$(SUFFIX) dsyrfs.$(SUFFIX) \
+ dsysv.$(SUFFIX) dsysvx.$(SUFFIX) \
+ dsytd2.$(SUFFIX) dsytf2.$(SUFFIX) dsytrd.$(SUFFIX) dsytrf.$(SUFFIX) dsytri.$(SUFFIX) dsytri2.$(SUFFIX) dsytri2x.$(SUFFIX) \
+ dsyswapr.$(SUFFIX) dsytrs.$(SUFFIX) dsytrs2.$(SUFFIX) dsyconv.$(SUFFIX) \
+ dtbcon.$(SUFFIX) dtbrfs.$(SUFFIX) dtbtrs.$(SUFFIX) dtgevc.$(SUFFIX) dtgex2.$(SUFFIX) dtgexc.$(SUFFIX) dtgsen.$(SUFFIX) \
+ dtgsja.$(SUFFIX) dtgsna.$(SUFFIX) dtgsy2.$(SUFFIX) dtgsyl.$(SUFFIX) dtpcon.$(SUFFIX) dtprfs.$(SUFFIX) dtptri.$(SUFFIX) \
+ dtptrs.$(SUFFIX) \
+ dtrcon.$(SUFFIX) dtrevc.$(SUFFIX) dtrexc.$(SUFFIX) dtrrfs.$(SUFFIX) dtrsen.$(SUFFIX) dtrsna.$(SUFFIX) dtrsyl.$(SUFFIX) \
+ dtrtrs.$(SUFFIX) dtzrqf.$(SUFFIX) dtzrzf.$(SUFFIX) dstemr.$(SUFFIX) \
+ dsgesv.$(SUFFIX) dsposv.$(SUFFIX) dlag2s.$(SUFFIX) slag2d.$(SUFFIX) dlat2s.$(SUFFIX) \
+ dlansf.$(SUFFIX) dpftrf.$(SUFFIX) dpftri.$(SUFFIX) dpftrs.$(SUFFIX) dsfrk.$(SUFFIX) dtfsm.$(SUFFIX) dtftri.$(SUFFIX) dtfttp.$(SUFFIX) \
+ dtfttr.$(SUFFIX) dtpttf.$(SUFFIX) dtpttr.$(SUFFIX) dtrttf.$(SUFFIX) dtrttp.$(SUFFIX) \
+ dgejsv.$(SUFFIX) dgesvj.$(SUFFIX) dgsvj0.$(SUFFIX) dgsvj1.$(SUFFIX) \
+ dgeequb.$(SUFFIX) dsyequb.$(SUFFIX) dpoequb.$(SUFFIX) dgbequb.$(SUFFIX) \
+ dbbcsd.$(SUFFIX) dlapmr.$(SUFFIX) dorbdb.$(SUFFIX) dorcsd.$(SUFFIX) \
+ dgeqrt.$(SUFFIX) dgeqrt2.$(SUFFIX) dgeqrt3.$(SUFFIX) dgemqrt.$(SUFFIX) \
+ dtpqrt.$(SUFFIX) dtpqrt2.$(SUFFIX) dtpmqrt.$(SUFFIX) dtprfb.$(SUFFIX)
ifdef USEXBLAS
-DXLASRC = dgesvxx.o dgerfsx.o dla_gerfsx_extended.o dla_geamv.o \
- dla_gercond.o dla_gerpvgrw.o dsysvxx.o dsyrfsx.o \
- dla_syrfsx_extended.o dla_syamv.o dla_syrcond.o dla_syrpvgrw.o \
- dposvxx.o dporfsx.o dla_porfsx_extended.o dla_porcond.o \
- dla_porpvgrw.o dgbsvxx.o dgbrfsx.o dla_gbrfsx_extended.o \
- dla_gbamv.o dla_gbrcond.o dla_gbrpvgrw.o dla_lin_berr.o dlarscl2.o \
- dlascl2.o dla_wwaddw.o
+DXLASRC = dgesvxx.$(SUFFIX) dgerfsx.$(SUFFIX) dla_gerfsx_extended.$(SUFFIX) dla_geamv.$(SUFFIX) \
+ dla_gercond.$(SUFFIX) dla_gerpvgrw.$(SUFFIX) dsysvxx.$(SUFFIX) dsyrfsx.$(SUFFIX) \
+ dla_syrfsx_extended.$(SUFFIX) dla_syamv.$(SUFFIX) dla_syrcond.$(SUFFIX) dla_syrpvgrw.$(SUFFIX) \
+ dposvxx.$(SUFFIX) dporfsx.$(SUFFIX) dla_porfsx_extended.$(SUFFIX) dla_porcond.$(SUFFIX) \
+ dla_porpvgrw.$(SUFFIX) dgbsvxx.$(SUFFIX) dgbrfsx.$(SUFFIX) dla_gbrfsx_extended.$(SUFFIX) \
+ dla_gbamv.$(SUFFIX) dla_gbrcond.$(SUFFIX) dla_gbrpvgrw.$(SUFFIX) dla_lin_berr.$(SUFFIX) dlarscl2.$(SUFFIX) \
+ dlascl2.$(SUFFIX) dla_wwaddw.$(SUFFIX)
endif
ZLASRC = \
- zbdsqr.o zgbbrd.o zgbcon.o zgbequ.o zgbrfs.o zgbsv.o zgbsvx.o \
- zgbtf2.o zgbtrf.o zgbtrs.o zgebak.o zgebal.o zgebd2.o zgebrd.o \
- zgecon.o zgeequ.o zgees.o zgeesx.o zgeev.o zgeevx.o \
- zgegs.o zgegv.o zgehd2.o zgehrd.o zgelq2.o zgelqf.o \
- zgels.o zgelsd.o zgelss.o zgelsx.o zgelsy.o zgeql2.o zgeqlf.o zgeqp3.o \
- zgeqpf.o zgeqr2.o zgeqr2p.o zgeqrf.o zgeqrfp.o zgerfs.o zgerq2.o zgerqf.o \
- zgesc2.o zgesdd.o zgesv.o zgesvd.o zgesvx.o zgetc2.o zgetf2.o zgetrf.o \
- zgetri.o zgetrs.o \
- zggbak.o zggbal.o zgges.o zggesx.o zggev.o zggevx.o zggglm.o \
- zgghrd.o zgglse.o zggqrf.o zggrqf.o \
- zggsvd.o zggsvp.o \
- zgtcon.o zgtrfs.o zgtsv.o zgtsvx.o zgttrf.o zgttrs.o zgtts2.o zhbev.o \
- zhbevd.o zhbevx.o zhbgst.o zhbgv.o zhbgvd.o zhbgvx.o zhbtrd.o \
- zhecon.o zheev.o zheevd.o zheevr.o zheevx.o zhegs2.o zhegst.o \
- zhegv.o zhegvd.o zhegvx.o zherfs.o zhesv.o zhesvx.o zhetd2.o \
- zhetf2.o zhetrd.o \
- zhetrf.o zhetri.o zhetri2.o zhetri2x.o zheswapr.o \
- zhetrs.o zhetrs2.o zhgeqz.o zhpcon.o zhpev.o zhpevd.o \
- zhpevx.o zhpgst.o zhpgv.o zhpgvd.o zhpgvx.o zhprfs.o zhpsv.o \
- zhpsvx.o \
- zhptrd.o zhptrf.o zhptri.o zhptrs.o zhsein.o zhseqr.o zlabrd.o \
- zlacgv.o zlacon.o zlacn2.o zlacp2.o zlacpy.o zlacrm.o zlacrt.o zladiv.o \
- zlaed0.o zlaed7.o zlaed8.o \
- zlaein.o zlaesy.o zlaev2.o zlags2.o zlagtm.o \
- zlahef.o zlahqr.o \
- zlahrd.o zlahr2.o zlaic1.o zlals0.o zlalsa.o zlalsd.o zlangb.o zlange.o \
- zlangt.o zlanhb.o \
- zlanhe.o \
- zlanhp.o zlanhs.o zlanht.o zlansb.o zlansp.o zlansy.o zlantb.o \
- zlantp.o zlantr.o zlapll.o zlapmt.o zlaqgb.o zlaqge.o \
- zlaqhb.o zlaqhe.o zlaqhp.o zlaqp2.o zlaqps.o zlaqsb.o \
- zlaqr0.o zlaqr1.o zlaqr2.o zlaqr3.o zlaqr4.o zlaqr5.o \
- zlaqsp.o zlaqsy.o zlar1v.o zlar2v.o ilazlr.o ilazlc.o \
- zlarcm.o zlarf.o zlarfb.o \
- zlarfg.o zlarft.o zlarfgp.o \
- zlarfx.o zlargv.o zlarnv.o zlarrv.o zlartg.o zlartv.o \
- zlarz.o zlarzb.o zlarzt.o zlascl.o zlaset.o zlasr.o \
- zlassq.o zlaswp.o zlasyf.o \
- zlatbs.o zlatdf.o zlatps.o zlatrd.o zlatrs.o zlatrz.o zlatzm.o zlauu2.o \
- zlauum.o zpbcon.o zpbequ.o zpbrfs.o zpbstf.o zpbsv.o \
- zpbsvx.o zpbtf2.o zpbtrf.o zpbtrs.o zpocon.o zpoequ.o zporfs.o \
- zposv.o zposvx.o zpotf2.o zpotrf.o zpotri.o zpotrs.o zpstrf.o zpstf2.o \
- zppcon.o zppequ.o zpprfs.o zppsv.o zppsvx.o zpptrf.o zpptri.o zpptrs.o \
- zptcon.o zpteqr.o zptrfs.o zptsv.o zptsvx.o zpttrf.o zpttrs.o zptts2.o \
- zrot.o zspcon.o zspmv.o zspr.o zsprfs.o zspsv.o \
- zspsvx.o zsptrf.o zsptri.o zsptrs.o zdrscl.o zstedc.o \
- zstegr.o zstein.o zsteqr.o \
- zsycon.o zsymv.o \
- zsyr.o zsyrfs.o zsysv.o zsysvx.o zsytf2.o zsytrf.o zsytri.o zsytri2.o zsytri2x.o \
- zsyswapr.o zsytrs.o zsytrs2.o zsyconv.o \
- ztbcon.o ztbrfs.o ztbtrs.o ztgevc.o ztgex2.o \
- ztgexc.o ztgsen.o ztgsja.o ztgsna.o ztgsy2.o ztgsyl.o ztpcon.o \
- ztprfs.o ztptri.o \
- ztptrs.o ztrcon.o ztrevc.o ztrexc.o ztrrfs.o ztrsen.o ztrsna.o \
- ztrsyl.o ztrti2.o ztrtri.o ztrtrs.o ztzrqf.o ztzrzf.o zung2l.o \
- zung2r.o zungbr.o zunghr.o zungl2.o zunglq.o zungql.o zungqr.o zungr2.o \
- zungrq.o zungtr.o zunm2l.o zunm2r.o zunmbr.o zunmhr.o zunml2.o \
- zunmlq.o zunmql.o zunmqr.o zunmr2.o zunmr3.o zunmrq.o zunmrz.o \
- zunmtr.o zupgtr.o \
- zupmtr.o izmax1.o dzsum1.o zstemr.o \
- zcgesv.o zcposv.o zlag2c.o clag2z.o zlat2c.o \
- zhfrk.o ztfttp.o zlanhf.o zpftrf.o zpftri.o zpftrs.o ztfsm.o ztftri.o \
- ztfttr.o ztpttf.o ztpttr.o ztrttf.o ztrttp.o \
- zgeequb.o zgbequb.o zsyequb.o zpoequb.o zheequb.o \
- zbbcsd.o zlapmr.o zunbdb.o zuncsd.o \
- zgeqrt.o zgeqrt2.o zgeqrt3.o zgemqrt.o \
- ztpqrt.o ztpqrt2.o ztpmqrt.o ztprfb.o
+ zbdsqr.$(SUFFIX) zgbbrd.$(SUFFIX) zgbcon.$(SUFFIX) zgbequ.$(SUFFIX) zgbrfs.$(SUFFIX) zgbsv.$(SUFFIX) zgbsvx.$(SUFFIX) \
+ zgbtf2.$(SUFFIX) zgbtrf.$(SUFFIX) zgbtrs.$(SUFFIX) zgebak.$(SUFFIX) zgebal.$(SUFFIX) zgebd2.$(SUFFIX) zgebrd.$(SUFFIX) \
+ zgecon.$(SUFFIX) zgeequ.$(SUFFIX) zgees.$(SUFFIX) zgeesx.$(SUFFIX) zgeev.$(SUFFIX) zgeevx.$(SUFFIX) \
+ zgegs.$(SUFFIX) zgegv.$(SUFFIX) zgehd2.$(SUFFIX) zgehrd.$(SUFFIX) zgelq2.$(SUFFIX) zgelqf.$(SUFFIX) \
+ zgels.$(SUFFIX) zgelsd.$(SUFFIX) zgelss.$(SUFFIX) zgelsx.$(SUFFIX) zgelsy.$(SUFFIX) zgeql2.$(SUFFIX) zgeqlf.$(SUFFIX) zgeqp3.$(SUFFIX) \
+ zgeqpf.$(SUFFIX) zgeqr2.$(SUFFIX) zgeqr2p.$(SUFFIX) zgeqrf.$(SUFFIX) zgeqrfp.$(SUFFIX) zgerfs.$(SUFFIX) zgerq2.$(SUFFIX) zgerqf.$(SUFFIX) \
+ zgesc2.$(SUFFIX) zgesdd.$(SUFFIX) zgesv.$(SUFFIX) zgesvd.$(SUFFIX) zgesvx.$(SUFFIX) zgetc2.$(SUFFIX) \
+ zgetri.$(SUFFIX) \
+ zggbak.$(SUFFIX) zggbal.$(SUFFIX) zgges.$(SUFFIX) zggesx.$(SUFFIX) zggev.$(SUFFIX) zggevx.$(SUFFIX) zggglm.$(SUFFIX) \
+ zgghrd.$(SUFFIX) zgglse.$(SUFFIX) zggqrf.$(SUFFIX) zggrqf.$(SUFFIX) \
+ zggsvd.$(SUFFIX) zggsvp.$(SUFFIX) \
+ zgtcon.$(SUFFIX) zgtrfs.$(SUFFIX) zgtsv.$(SUFFIX) zgtsvx.$(SUFFIX) zgttrf.$(SUFFIX) zgttrs.$(SUFFIX) zgtts2.$(SUFFIX) zhbev.$(SUFFIX) \
+ zhbevd.$(SUFFIX) zhbevx.$(SUFFIX) zhbgst.$(SUFFIX) zhbgv.$(SUFFIX) zhbgvd.$(SUFFIX) zhbgvx.$(SUFFIX) zhbtrd.$(SUFFIX) \
+ zhecon.$(SUFFIX) zheev.$(SUFFIX) zheevd.$(SUFFIX) zheevr.$(SUFFIX) zheevx.$(SUFFIX) zhegs2.$(SUFFIX) zhegst.$(SUFFIX) \
+ zhegv.$(SUFFIX) zhegvd.$(SUFFIX) zhegvx.$(SUFFIX) zherfs.$(SUFFIX) zhesv.$(SUFFIX) zhesvx.$(SUFFIX) zhetd2.$(SUFFIX) \
+ zhetf2.$(SUFFIX) zhetrd.$(SUFFIX) \
+ zhetrf.$(SUFFIX) zhetri.$(SUFFIX) zhetri2.$(SUFFIX) zhetri2x.$(SUFFIX) zheswapr.$(SUFFIX) \
+ zhetrs.$(SUFFIX) zhetrs2.$(SUFFIX) zhgeqz.$(SUFFIX) zhpcon.$(SUFFIX) zhpev.$(SUFFIX) zhpevd.$(SUFFIX) \
+ zhpevx.$(SUFFIX) zhpgst.$(SUFFIX) zhpgv.$(SUFFIX) zhpgvd.$(SUFFIX) zhpgvx.$(SUFFIX) zhprfs.$(SUFFIX) zhpsv.$(SUFFIX) \
+ zhpsvx.$(SUFFIX) \
+ zhptrd.$(SUFFIX) zhptrf.$(SUFFIX) zhptri.$(SUFFIX) zhptrs.$(SUFFIX) zhsein.$(SUFFIX) zhseqr.$(SUFFIX) zlabrd.$(SUFFIX) \
+ zlacgv.$(SUFFIX) zlacon.$(SUFFIX) zlacn2.$(SUFFIX) zlacp2.$(SUFFIX) zlacpy.$(SUFFIX) zlacrm.$(SUFFIX) zlacrt.$(SUFFIX) zladiv.$(SUFFIX) \
+ zlaed0.$(SUFFIX) zlaed7.$(SUFFIX) zlaed8.$(SUFFIX) \
+ zlaein.$(SUFFIX) zlaesy.$(SUFFIX) zlaev2.$(SUFFIX) zlags2.$(SUFFIX) zlagtm.$(SUFFIX) \
+ zlahef.$(SUFFIX) zlahqr.$(SUFFIX) \
+ zlahrd.$(SUFFIX) zlahr2.$(SUFFIX) zlaic1.$(SUFFIX) zlals0.$(SUFFIX) zlalsa.$(SUFFIX) zlalsd.$(SUFFIX) zlangb.$(SUFFIX) zlange.$(SUFFIX) \
+ zlangt.$(SUFFIX) zlanhb.$(SUFFIX) \
+ zlanhe.$(SUFFIX) \
+ zlanhp.$(SUFFIX) zlanhs.$(SUFFIX) zlanht.$(SUFFIX) zlansb.$(SUFFIX) zlansp.$(SUFFIX) zlansy.$(SUFFIX) zlantb.$(SUFFIX) \
+ zlantp.$(SUFFIX) zlantr.$(SUFFIX) zlapll.$(SUFFIX) zlapmt.$(SUFFIX) zlaqgb.$(SUFFIX) zlaqge.$(SUFFIX) \
+ zlaqhb.$(SUFFIX) zlaqhe.$(SUFFIX) zlaqhp.$(SUFFIX) zlaqp2.$(SUFFIX) zlaqps.$(SUFFIX) zlaqsb.$(SUFFIX) \
+ zlaqr0.$(SUFFIX) zlaqr1.$(SUFFIX) zlaqr2.$(SUFFIX) zlaqr3.$(SUFFIX) zlaqr4.$(SUFFIX) zlaqr5.$(SUFFIX) \
+ zlaqsp.$(SUFFIX) zlaqsy.$(SUFFIX) zlar1v.$(SUFFIX) zlar2v.$(SUFFIX) ilazlr.$(SUFFIX) ilazlc.$(SUFFIX) \
+ zlarcm.$(SUFFIX) zlarf.$(SUFFIX) zlarfb.$(SUFFIX) \
+ zlarfg.$(SUFFIX) zlarft.$(SUFFIX) zlarfgp.$(SUFFIX) \
+ zlarfx.$(SUFFIX) zlargv.$(SUFFIX) zlarnv.$(SUFFIX) zlarrv.$(SUFFIX) zlartg.$(SUFFIX) zlartv.$(SUFFIX) \
+ zlarz.$(SUFFIX) zlarzb.$(SUFFIX) zlarzt.$(SUFFIX) zlascl.$(SUFFIX) zlaset.$(SUFFIX) zlasr.$(SUFFIX) \
+ zlassq.$(SUFFIX) zlasyf.$(SUFFIX) \
+ zlatbs.$(SUFFIX) zlatdf.$(SUFFIX) zlatps.$(SUFFIX) zlatrd.$(SUFFIX) zlatrs.$(SUFFIX) zlatrz.$(SUFFIX) zlatzm.$(SUFFIX) zlauu2.$(SUFFIX) \
+ zpbcon.$(SUFFIX) zpbequ.$(SUFFIX) zpbrfs.$(SUFFIX) zpbstf.$(SUFFIX) zpbsv.$(SUFFIX) \
+ zpbsvx.$(SUFFIX) zpbtf2.$(SUFFIX) zpbtrf.$(SUFFIX) zpbtrs.$(SUFFIX) zpocon.$(SUFFIX) zpoequ.$(SUFFIX) zporfs.$(SUFFIX) \
+ zposv.$(SUFFIX) zposvx.$(SUFFIX) zpotri.$(SUFFIX) zpotrs.$(SUFFIX) zpstrf.$(SUFFIX) zpstf2.$(SUFFIX) \
+ zppcon.$(SUFFIX) zppequ.$(SUFFIX) zpprfs.$(SUFFIX) zppsv.$(SUFFIX) zppsvx.$(SUFFIX) zpptrf.$(SUFFIX) zpptri.$(SUFFIX) zpptrs.$(SUFFIX) \
+ zptcon.$(SUFFIX) zpteqr.$(SUFFIX) zptrfs.$(SUFFIX) zptsv.$(SUFFIX) zptsvx.$(SUFFIX) zpttrf.$(SUFFIX) zpttrs.$(SUFFIX) zptts2.$(SUFFIX) \
+ zrot.$(SUFFIX) zspcon.$(SUFFIX) zspmv.$(SUFFIX) zspr.$(SUFFIX) zsprfs.$(SUFFIX) zspsv.$(SUFFIX) \
+ zspsvx.$(SUFFIX) zsptrf.$(SUFFIX) zsptri.$(SUFFIX) zsptrs.$(SUFFIX) zdrscl.$(SUFFIX) zstedc.$(SUFFIX) \
+ zstegr.$(SUFFIX) zstein.$(SUFFIX) zsteqr.$(SUFFIX) \
+ zsycon.$(SUFFIX) zsymv.$(SUFFIX) \
+ zsyr.$(SUFFIX) zsyrfs.$(SUFFIX) zsysv.$(SUFFIX) zsysvx.$(SUFFIX) zsytf2.$(SUFFIX) zsytrf.$(SUFFIX) zsytri.$(SUFFIX) zsytri2.$(SUFFIX) zsytri2x.$(SUFFIX) \
+ zsyswapr.$(SUFFIX) zsytrs.$(SUFFIX) zsytrs2.$(SUFFIX) zsyconv.$(SUFFIX) \
+ ztbcon.$(SUFFIX) ztbrfs.$(SUFFIX) ztbtrs.$(SUFFIX) ztgevc.$(SUFFIX) ztgex2.$(SUFFIX) \
+ ztgexc.$(SUFFIX) ztgsen.$(SUFFIX) ztgsja.$(SUFFIX) ztgsna.$(SUFFIX) ztgsy2.$(SUFFIX) ztgsyl.$(SUFFIX) ztpcon.$(SUFFIX) \
+ ztprfs.$(SUFFIX) ztptri.$(SUFFIX) \
+ ztptrs.$(SUFFIX) ztrcon.$(SUFFIX) ztrevc.$(SUFFIX) ztrexc.$(SUFFIX) ztrrfs.$(SUFFIX) ztrsen.$(SUFFIX) ztrsna.$(SUFFIX) \
+ ztrsyl.$(SUFFIX) ztrtrs.$(SUFFIX) ztzrqf.$(SUFFIX) ztzrzf.$(SUFFIX) zung2l.$(SUFFIX) \
+ zung2r.$(SUFFIX) zungbr.$(SUFFIX) zunghr.$(SUFFIX) zungl2.$(SUFFIX) zunglq.$(SUFFIX) zungql.$(SUFFIX) zungqr.$(SUFFIX) zungr2.$(SUFFIX) \
+ zungrq.$(SUFFIX) zungtr.$(SUFFIX) zunm2l.$(SUFFIX) zunm2r.$(SUFFIX) zunmbr.$(SUFFIX) zunmhr.$(SUFFIX) zunml2.$(SUFFIX) \
+ zunmlq.$(SUFFIX) zunmql.$(SUFFIX) zunmqr.$(SUFFIX) zunmr2.$(SUFFIX) zunmr3.$(SUFFIX) zunmrq.$(SUFFIX) zunmrz.$(SUFFIX) \
+ zunmtr.$(SUFFIX) zupgtr.$(SUFFIX) \
+ zupmtr.$(SUFFIX) izmax1.$(SUFFIX) dzsum1.$(SUFFIX) zstemr.$(SUFFIX) \
+ zcgesv.$(SUFFIX) zcposv.$(SUFFIX) zlag2c.$(SUFFIX) clag2z.$(SUFFIX) zlat2c.$(SUFFIX) \
+ zhfrk.$(SUFFIX) ztfttp.$(SUFFIX) zlanhf.$(SUFFIX) zpftrf.$(SUFFIX) zpftri.$(SUFFIX) zpftrs.$(SUFFIX) ztfsm.$(SUFFIX) ztftri.$(SUFFIX) \
+ ztfttr.$(SUFFIX) ztpttf.$(SUFFIX) ztpttr.$(SUFFIX) ztrttf.$(SUFFIX) ztrttp.$(SUFFIX) \
+ zgeequb.$(SUFFIX) zgbequb.$(SUFFIX) zsyequb.$(SUFFIX) zpoequb.$(SUFFIX) zheequb.$(SUFFIX) \
+ zbbcsd.$(SUFFIX) zlapmr.$(SUFFIX) zunbdb.$(SUFFIX) zuncsd.$(SUFFIX) \
+ zgeqrt.$(SUFFIX) zgeqrt2.$(SUFFIX) zgeqrt3.$(SUFFIX) zgemqrt.$(SUFFIX) \
+ ztpqrt.$(SUFFIX) ztpqrt2.$(SUFFIX) ztpmqrt.$(SUFFIX) ztprfb.$(SUFFIX)
ifdef USEXBLAS
-ZXLASRC = zgesvxx.o zgerfsx.o zla_gerfsx_extended.o zla_geamv.o \
- zla_gercond_c.o zla_gercond_x.o zla_gerpvgrw.o zsysvxx.o zsyrfsx.o \
- zla_syrfsx_extended.o zla_syamv.o zla_syrcond_c.o zla_syrcond_x.o \
- zla_syrpvgrw.o zposvxx.o zporfsx.o zla_porfsx_extended.o \
- zla_porcond_c.o zla_porcond_x.o zla_porpvgrw.o zgbsvxx.o zgbrfsx.o \
- zla_gbrfsx_extended.o zla_gbamv.o zla_gbrcond_c.o zla_gbrcond_x.o \
- zla_gbrpvgrw.o zhesvxx.o zherfsx.o zla_herfsx_extended.o \
- zla_heamv.o zla_hercond_c.o zla_hercond_x.o zla_herpvgrw.o \
- zla_lin_berr.o zlarscl2.o zlascl2.o zla_wwaddw.o
+ZXLASRC = zgesvxx.$(SUFFIX) zgerfsx.$(SUFFIX) zla_gerfsx_extended.$(SUFFIX) zla_geamv.$(SUFFIX) \
+ zla_gercond_c.$(SUFFIX) zla_gercond_x.$(SUFFIX) zla_gerpvgrw.$(SUFFIX) zsysvxx.$(SUFFIX) zsyrfsx.$(SUFFIX) \
+ zla_syrfsx_extended.$(SUFFIX) zla_syamv.$(SUFFIX) zla_syrcond_c.$(SUFFIX) zla_syrcond_x.$(SUFFIX) \
+ zla_syrpvgrw.$(SUFFIX) zposvxx.$(SUFFIX) zporfsx.$(SUFFIX) zla_porfsx_extended.$(SUFFIX) \
+ zla_porcond_c.$(SUFFIX) zla_porcond_x.$(SUFFIX) zla_porpvgrw.$(SUFFIX) zgbsvxx.$(SUFFIX) zgbrfsx.$(SUFFIX) \
+ zla_gbrfsx_extended.$(SUFFIX) zla_gbamv.$(SUFFIX) zla_gbrcond_c.$(SUFFIX) zla_gbrcond_x.$(SUFFIX) \
+ zla_gbrpvgrw.$(SUFFIX) zhesvxx.$(SUFFIX) zherfsx.$(SUFFIX) zla_herfsx_extended.$(SUFFIX) \
+ zla_heamv.$(SUFFIX) zla_hercond_c.$(SUFFIX) zla_hercond_x.$(SUFFIX) zla_herpvgrw.$(SUFFIX) \
+ zla_lin_berr.$(SUFFIX) zlarscl2.$(SUFFIX) zlascl2.$(SUFFIX) zla_wwaddw.$(SUFFIX)
endif
ALLOBJ = $(SLASRC) $(DLASRC) $(DSLASRC) $(CLASRC) $(ZLASRC) $(ZCLASRC) \
$(SCLAUX) $(DZLAUX) $(ALLAUX)
+ALLOBJ_P = $(ALLOBJ:.$(SUFFIX)=.$(PSUFFIX))
+
ifdef USEXBLAS
ALLXOBJ = $(SXLASRC) $(DXLASRC) $(CXLASRC) $(ZXLASRC)
endif
all: ../$(LAPACKLIB)
+lapack_prof: ../$(LAPACKLIB_P)
+
../$(LAPACKLIB): $(ALLOBJ) $(ALLXOBJ)
$(ARCH) $(ARCHFLAGS) $@ $(ALLOBJ) $(ALLXOBJ)
$(RANLIB) $@
+../$(LAPACKLIB_P): $(ALLOBJ_P)
+ $(ARCH) $(ARCHFLAGS) $@ $(ALLOBJ_P)
+ $(RANLIB) $@
+
single: $(SLASRC) $(DSLASRC) $(SXLASRC) $(SCLAUX) $(ALLAUX)
$(ARCH) $(ARCHFLAGS) ../$(LAPACKLIB) $(SLASRC) $(DSLASRC) \
$(SXLASRC) $(SCLAUX) $(ALLAUX) $(ALLXAUX)
@@ -451,15 +459,24 @@
@FRC=$(FRC)
clean:
- rm -f *.o
+ rm -f *.$(SUFFIX) *.$(PSUFFIX)
-.f.o:
+%.$(SUFFIX): %.f
$(FORTRAN) $(OPTS) -c $< -o $@
-slaruv.o: slaruv.f ; $(FORTRAN) $(NOOPT) -c $< -o $@
-dlaruv.o: dlaruv.f ; $(FORTRAN) $(NOOPT) -c $< -o $@
-sla_wwaddw.o: sla_wwaddw.f ; $(FORTRAN) $(NOOPT) -c $< -o $@
-dla_wwaddw.o: dla_wwaddw.f ; $(FORTRAN) $(NOOPT) -c $< -o $@
-cla_wwaddw.o: cla_wwaddw.f ; $(FORTRAN) $(NOOPT) -c $< -o $@
-zla_wwaddw.o: zla_wwaddw.f ; $(FORTRAN) $(NOOPT) -c $< -o $@
+%.$(PSUFFIX): %.f
+ $(FORTRAN) $(POPTS) -c $< -o $@
+slaruv.$(SUFFIX): slaruv.f ; $(FORTRAN) $(NOOPT) -O0 -c $< -o $@
+dlaruv.$(SUFFIX): dlaruv.f ; $(FORTRAN) $(NOOPT) -O0 -c $< -o $@
+sla_wwaddw.$(SUFFIX): sla_wwaddw.f ; $(FORTRAN) $(NOOPT) -O0 -c $< -o $@
+dla_wwaddw.$(SUFFIX): dla_wwaddw.f ; $(FORTRAN) $(NOOPT) -O0 -c $< -o $@
+cla_wwaddw.$(SUFFIX): cla_wwaddw.f ; $(FORTRAN) $(NOOPT) -O0 -c $< -o $@
+zla_wwaddw.$(SUFFIX): zla_wwaddw.f ; $(FORTRAN) $(NOOPT) -O0 -c $< -o $@
+
+slaruv.$(PSUFFIX): slaruv.f ; $(FORTRAN) $(PNOOPT) -O0 -c $< -o $@
+dlaruv.$(PSUFFIX): dlaruv.f ; $(FORTRAN) $(PNOOPT) -O0 -c $< -o $@
+sla_wwaddw.$(PSUFFIX): sla_wwaddw.f ; $(FORTRAN) $(PNOOPT) -O0 -c $< -o $@
+dla_wwaddw.$(PSUFFIX): dla_wwaddw.f ; $(FORTRAN) $(PNOOPT) -O0 -c $< -o $@
+cla_wwaddw.$(PSUFFIX): cla_wwaddw.f ; $(FORTRAN) $(PNOOPT) -O0 -c $< -o $@
+zla_wwaddw.$(PSUFFIX): zla_wwaddw.f ; $(FORTRAN) $(PNOOPT) -O0 -c $< -o $@
diff -ruN lapack-3.4.2.old/TESTING/EIG/Makefile lapack-3.4.2/TESTING/EIG/Makefile
--- lapack-3.4.2.old/TESTING/EIG/Makefile 2011-09-26 23:52:31 +0200
+++ lapack-3.4.2/TESTING/EIG/Makefile 2012-04-22 21:41:45 +0200
@@ -78,7 +78,7 @@
cget35.o cget36.o cget37.o cget38.o cget51.o cget52.o \
cget54.o cglmts.o cgqrts.o cgrqts.o cgsvts.o \
chbt21.o chet21.o chet22.o chpt21.o chst01.o \
- clarfy.o clarhs.o clatm4.o clctes.o clctsx.o clsets.o csbmv.o \
+ clarfy.o clarhs.o clatm4.o clctes.o clctsx.o clsets.o \
csgt01.o cslect.o \
cstt21.o cstt22.o cunt01.o cunt03.o
@@ -115,7 +115,7 @@
zget35.o zget36.o zget37.o zget38.o zget51.o zget52.o \
zget54.o zglmts.o zgqrts.o zgrqts.o zgsvts.o \
zhbt21.o zhet21.o zhet22.o zhpt21.o zhst01.o \
- zlarfy.o zlarhs.o zlatm4.o zlctes.o zlctsx.o zlsets.o zsbmv.o \
+ zlarfy.o zlarhs.o zlatm4.o zlctes.o zlctsx.o zlsets.o \
zsgt01.o zslect.o \
zstt21.o zstt22.o zunt01.o zunt03.o
@@ -129,22 +129,22 @@
../xeigtsts: $(SEIGTST) $(SCIGTST) $(AEIGTST) ../../$(LAPACKLIB); \
$(LOADER) $(LOADOPTS) -o xeigtsts \
$(SEIGTST) $(SCIGTST) $(AEIGTST) ../../$(TMGLIB) \
- ../../$(LAPACKLIB) $(BLASLIB) && mv xeigtsts $@
+ ../../$(LAPACKLIB) $(BLASLIB) $(CEXTRALIB) && mv xeigtsts $@
../xeigtstc: $(CEIGTST) $(SCIGTST) $(AEIGTST) ../../$(LAPACKLIB); \
$(LOADER) $(LOADOPTS) -o xeigtstc \
$(CEIGTST) $(SCIGTST) $(AEIGTST) ../../$(TMGLIB) \
- ../../$(LAPACKLIB) $(BLASLIB) && mv xeigtstc $@
+ ../../$(LAPACKLIB) $(BLASLIB) $(CEXTRALIB) && mv xeigtstc $@
../xeigtstd: $(DEIGTST) $(DZIGTST) $(AEIGTST) ../../$(LAPACKLIB); \
$(LOADER) $(LOADOPTS) -o xeigtstd \
$(DEIGTST) $(DZIGTST) $(AEIGTST) ../../$(TMGLIB) \
- ../../$(LAPACKLIB) $(BLASLIB) && mv xeigtstd $@
+ ../../$(LAPACKLIB) $(BLASLIB) $(CEXTRALIB) && mv xeigtstd $@
../xeigtstz: $(ZEIGTST) $(DZIGTST) $(AEIGTST) ../../$(LAPACKLIB); \
$(LOADER) $(LOADOPTS) -o xeigtstz \
$(ZEIGTST) $(DZIGTST) $(AEIGTST) ../../$(TMGLIB) \
- ../../$(LAPACKLIB) $(BLASLIB) && mv xeigtstz $@
+ ../../$(LAPACKLIB) $(BLASLIB) $(CEXTRALIB) && mv xeigtstz $@
$(AEIGTST): $(FRC)
$(SCIGTST): $(FRC)
diff -ruN lapack-3.4.2.old/TESTING/LIN/Makefile lapack-3.4.2/TESTING/LIN/Makefile
--- lapack-3.4.2.old/TESTING/LIN/Makefile 2012-04-02 21:06:36 +0200
+++ lapack-3.4.2/TESTING/LIN/Makefile 2012-04-22 21:43:30 +0200
@@ -109,7 +109,7 @@
cqpt01.o cqrt01.o cqrt01p.o cqrt02.o cqrt03.o cqrt11.o \
cqrt12.o cqrt13.o cqrt14.o cqrt15.o cqrt16.o \
cqrt17.o crqt01.o crqt02.o crqt03.o crzt01.o crzt02.o \
- csbmv.o cspt01.o \
+ cspt01.o \
cspt02.o cspt03.o csyt01.o csyt02.o csyt03.o \
ctbt02.o ctbt03.o ctbt05.o ctbt06.o ctpt01.o \
ctpt02.o ctpt03.o ctpt05.o ctpt06.o ctrt01.o \
@@ -188,7 +188,7 @@
zqpt01.o zqrt01.o zqrt01p.o zqrt02.o zqrt03.o zqrt11.o \
zqrt12.o zqrt13.o zqrt14.o zqrt15.o zqrt16.o \
zqrt17.o zrqt01.o zrqt02.o zrqt03.o zrzt01.o zrzt02.o \
- zsbmv.o zspt01.o \
+ zspt01.o \
zspt02.o zspt03.o zsyt01.o zsyt02.o zsyt03.o \
ztbt02.o ztbt03.o ztbt05.o ztbt06.o ztpt01.o \
ztpt02.o ztpt03.o ztpt05.o ztpt06.o ztrt01.o \
@@ -214,7 +214,7 @@
zdrvab.o zdrvac.o zerrab.o zerrac.o zget08.o \
alaerh.o alahd.o aladhd.o alareq.o \
chkxer.o zget02.o zlarhs.o zlatb4.o \
- zsbmv.o xerbla.o zpot06.o zlaipd.o
+ xerbla.o zpot06.o zlaipd.o
SLINTSTRFP = schkrfp.o sdrvrfp.o sdrvrf1.o sdrvrf2.o sdrvrf3.o sdrvrf4.o serrrfp.o \
slatb4.o slarhs.o sget04.o spot01.o spot03.o spot02.o \
@@ -225,11 +225,11 @@
chkxer.o xerbla.o alaerh.o aladhd.o alahd.o alasvm.o
CLINTSTRFP = cchkrfp.o cdrvrfp.o cdrvrf1.o cdrvrf2.o cdrvrf3.o cdrvrf4.o cerrrfp.o \
- claipd.o clatb4.o clarhs.o csbmv.o cget04.o cpot01.o cpot03.o cpot02.o \
+ claipd.o clatb4.o clarhs.o cget04.o cpot01.o cpot03.o cpot02.o \
chkxer.o xerbla.o alaerh.o aladhd.o alahd.o alasvm.o
ZLINTSTRFP = zchkrfp.o zdrvrfp.o zdrvrf1.o zdrvrf2.o zdrvrf3.o zdrvrf4.o zerrrfp.o \
- zlatb4.o zlaipd.o zlarhs.o zsbmv.o zget04.o zpot01.o zpot03.o zpot02.o \
+ zlatb4.o zlaipd.o zlarhs.o zget04.o zpot01.o zpot03.o zpot02.o \
chkxer.o xerbla.o alaerh.o aladhd.o alahd.o alasvm.o
all: single double complex complex16 proto-single proto-double proto-complex proto-complex16
@@ -246,43 +246,43 @@
xlintsts : $(ALINTST) $(SLINTST) $(SCLNTST) ../../$(LAPACKLIB)
$(LOADER) $(LOADOPTS) $(ALINTST) $(SCLNTST) $(SLINTST) \
- ../../$(TMGLIB) ../../$(LAPACKLIB) $(XBLASLIB) $(BLASLIB) -o $@
+ ../../$(TMGLIB) ../../$(LAPACKLIB) $(XBLASLIB) $(BLASLIB) -o $@ $(CEXTRALIB)
xlintstc : $(ALINTST) $(CLINTST) $(SCLNTST) ../../$(LAPACKLIB)
$(LOADER) $(LOADOPTS) $(ALINTST) $(SCLNTST) $(CLINTST) \
- ../../$(TMGLIB) ../../$(LAPACKLIB) $(XBLASLIB) $(BLASLIB) -o $@
+ ../../$(TMGLIB) ../../$(LAPACKLIB) $(XBLASLIB) $(BLASLIB) -o $@ $(CEXTRALIB)
xlintstd : $(ALINTST) $(DLINTST) $(DZLNTST) ../../$(LAPACKLIB)
$(LOADER) $(LOADOPTS) $^ \
- ../../$(TMGLIB) ../../$(LAPACKLIB) $(XBLASLIB) $(BLASLIB) -o $@
+ ../../$(TMGLIB) ../../$(LAPACKLIB) $(XBLASLIB) $(BLASLIB) -o $@ $(CEXTRALIB)
xlintstz : $(ALINTST) $(ZLINTST) $(DZLNTST) ../../$(LAPACKLIB)
$(LOADER) $(LOADOPTS) $(ALINTST) $(DZLNTST) $(ZLINTST) \
- ../../$(TMGLIB) ../../$(LAPACKLIB) $(XBLASLIB) $(BLASLIB) -o $@
+ ../../$(TMGLIB) ../../$(LAPACKLIB) $(XBLASLIB) $(BLASLIB) -o $@ $(CEXTRALIB)
xlintstds : $(DSLINTST) ../../$(LAPACKLIB)
$(LOADER) $(LOADOPTS) $(DSLINTST) \
- ../../$(TMGLIB) ../../$(LAPACKLIB) $(BLASLIB) -o $@
+ ../../$(TMGLIB) ../../$(LAPACKLIB) $(BLASLIB) -o $@ $(CEXTRALIB)
xlintstzc : $(ZCLINTST) ../../$(LAPACKLIB)
$(LOADER) $(LOADOPTS) $(ZCLINTST) \
- ../../$(TMGLIB) ../../$(LAPACKLIB) $(BLASLIB) -o $@
+ ../../$(TMGLIB) ../../$(LAPACKLIB) $(BLASLIB) -o $@ $(CEXTRALIB)
xlintstrfs : $(SLINTSTRFP) ../../$(LAPACKLIB)
$(LOADER) $(LOADOPTS) $(SLINTSTRFP) \
- ../../$(TMGLIB) ../../$(LAPACKLIB) $(BLASLIB) -o $@
+ ../../$(TMGLIB) ../../$(LAPACKLIB) $(BLASLIB) -o $@ $(CEXTRALIB)
xlintstrfd : $(DLINTSTRFP) ../../$(LAPACKLIB)
$(LOADER) $(LOADOPTS) $(DLINTSTRFP) \
- ../../$(TMGLIB) ../../$(LAPACKLIB) $(BLASLIB) -o $@
+ ../../$(TMGLIB) ../../$(LAPACKLIB) $(BLASLIB) -o $@ $(CEXTRALIB)
xlintstrfc : $(CLINTSTRFP) ../../$(LAPACKLIB)
$(LOADER) $(LOADOPTS) $(CLINTSTRFP) \
- ../../$(TMGLIB) ../../$(LAPACKLIB) $(BLASLIB) -o $@
+ ../../$(TMGLIB) ../../$(LAPACKLIB) $(BLASLIB) -o $@ $(CEXTRALIB)
xlintstrfz : $(ZLINTSTRFP) ../../$(LAPACKLIB)
$(LOADER) $(LOADOPTS) $(ZLINTSTRFP) \
- ../../$(TMGLIB) ../../$(LAPACKLIB) $(BLASLIB) -o $@
+ ../../$(TMGLIB) ../../$(LAPACKLIB) $(BLASLIB) -o $@ $(CEXTRALIB)
../xlintsts: xlintsts
mv xlintsts $@
diff -ruN lapack-3.4.2.old/lapacke/src/Makefile lapack-3.4.2/lapacke/src/Makefile
--- lapack-3.4.2.old/lapacke/src/Makefile 2012-04-02 22:16:32 +0200
+++ lapack-3.4.2/lapacke/src/Makefile 2012-04-22 21:38:38 +0200
@@ -2041,19 +2041,21 @@
lapacke_zlagsy.o \
lapacke_zlagsy_work.o
-ALLOBJ = $(SRC_OBJ) $(MATGEN_OBJ)
+OBJ_FILES := $(SRC_OBJ)
-ifdef USEXBLAS
-ALLXOBJ = $(SXLASRC) $(DXLASRC) $(CXLASRC) $(ZXLASRC)
+ifdef LAPACKE_EXTENDED
+OBJ_FILES += $(SXLASRC) $(DXLASRC) $(CXLASRC) $(ZXLASRC)
endif
-
-OBJ_FILES := $(C_FILES:.o=.o)
+ifdef LAPACKE_TESTING
+OBJ_FILES += $(MATGEN_OBJ)
+endif
all: ../../$(LAPACKELIB)
-../../$(LAPACKELIB): $(ALLOBJ) $(ALLXOBJ)
- $(ARCH) $(ARCHFLAGS) ../../$(LAPACKELIB) $(ALLOBJ) $(ALLXOBJ)
+../../$(LAPACKELIB): $(OBJ_FILES)
+# http://hackage.haskell.org/trac/gtk2hs/ticket/1146
+ echo $(OBJ_FILES) | xargs -n 100 $(ARCH) $(ARCHFLAGS) ../../$(LAPACKELIB)
$(RANLIB) ../../$(LAPACKELIB)
.c.o:

View File

@@ -1,3 +1,4 @@
#!/bin/bash
echo " Please read https://github.com/xianyi/OpenBLAS/wiki/How-to-use-OpenBLAS-in-Microsoft-Visual-Studio "
make BINARY=32 CC=gcc FC=gfortran

View File

@@ -1,3 +1,4 @@
#!/bin/bash
echo " Please read https://github.com/xianyi/OpenBLAS/wiki/How-to-use-OpenBLAS-in-Microsoft-Visual-Studio "
make BINARY=64 CC=x86_64-w64-mingw32-gcc FC=x86_64-w64-mingw32-gfortran