Compare commits
138 Commits
piledriver
...
v0.2.9
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
f773f492f3 | ||
|
|
21a6b5f79e | ||
|
|
a40116de25 | ||
|
|
b31ec99372 | ||
|
|
0ac073fa94 | ||
|
|
25e899b60b | ||
|
|
219bcb119d | ||
|
|
5664445543 | ||
|
|
89da450800 | ||
|
|
c26bbee489 | ||
|
|
ced13574a0 | ||
|
|
fe858873af | ||
|
|
a8d4d1c4d3 | ||
|
|
c4ccb3fbb2 | ||
|
|
a748d3a75d | ||
|
|
a5ab231ad4 | ||
|
|
dbaeea7b59 | ||
|
|
10a16bd690 | ||
|
|
406f5bd22b | ||
|
|
a0ae53966f | ||
|
|
0d75f3b6a2 | ||
|
|
abad6f66d6 | ||
|
|
2ff66e661d | ||
|
|
5e55034922 | ||
|
|
9a9e810239 | ||
|
|
45be9ac111 | ||
|
|
9f201558c9 | ||
|
|
d4237cb7f3 | ||
|
|
d2a8ff4b04 | ||
|
|
f331cb1a76 | ||
|
|
9ed981c5dc | ||
|
|
aaa9d7fbf8 | ||
|
|
ebc95e6f11 | ||
|
|
61a2c50e8e | ||
|
|
4f98f8c9b3 | ||
|
|
536875d463 | ||
|
|
65f2fba4c3 | ||
|
|
eea6f51df9 | ||
|
|
6fc4646709 | ||
|
|
ac029f81b3 | ||
|
|
c0cf875a82 | ||
|
|
b6d904838e | ||
|
|
5379eff022 | ||
|
|
aaddb05411 | ||
|
|
e52532a9fe | ||
|
|
e826a5a6af | ||
|
|
165d5436b5 | ||
|
|
409b52255c | ||
|
|
5953972a5a | ||
|
|
d751224ea4 | ||
|
|
4a5938b5cc | ||
|
|
d18bc5468f | ||
|
|
8877c6db51 | ||
|
|
c38379c9dd | ||
|
|
a0b07c1440 | ||
|
|
43fbdb7a5a | ||
|
|
777cebc8c7 | ||
|
|
aa5c73e20f | ||
|
|
5e5ef28ca0 | ||
|
|
650ed34336 | ||
|
|
189ca1bcee | ||
|
|
4c1caa7454 | ||
|
|
7bb19cf90e | ||
|
|
2a94aaaf2e | ||
|
|
5e4b4f6712 | ||
|
|
47e8950e77 | ||
|
|
f45f2c8465 | ||
|
|
10780ae650 | ||
|
|
9bae50f700 | ||
|
|
0758c1a374 | ||
|
|
564ff395f6 | ||
|
|
7fb78a5f01 | ||
|
|
8204ab4aa8 | ||
|
|
48d1325784 | ||
|
|
57bbc586ef | ||
|
|
bfef3c5dd1 | ||
|
|
d972f4a60a | ||
|
|
eebce01cf2 | ||
|
|
e2c39a4a8e | ||
|
|
1e8e6faa7e | ||
|
|
c7eb901496 | ||
|
|
2ed03ea0a2 | ||
|
|
de00e2937a | ||
|
|
e187b5e9d0 | ||
|
|
0947fc1c89 | ||
|
|
4d61607c9e | ||
|
|
781bfb6e66 | ||
|
|
79a82ba7f1 | ||
|
|
d63bd7fa5e | ||
|
|
e265c4ec86 | ||
|
|
0732238213 | ||
|
|
5f3b68b4d4 | ||
|
|
2424af62fd | ||
|
|
6b252033ae | ||
|
|
320c805905 | ||
|
|
e673848a9b | ||
|
|
a35a1a9ae7 | ||
|
|
793509a3b5 | ||
|
|
020f36f970 | ||
|
|
9d0cc399ac | ||
|
|
025fc914cc | ||
|
|
43bb633096 | ||
|
|
187237b622 | ||
|
|
66198faab6 | ||
|
|
47b22763f8 | ||
|
|
4d42368214 | ||
|
|
3e068e78e2 | ||
|
|
1140c489c9 | ||
|
|
804a306313 | ||
|
|
9db0fb8b02 | ||
|
|
692b14cecd | ||
|
|
322a178430 | ||
|
|
f80f29e256 | ||
|
|
2c556f093a | ||
|
|
3b027d2528 | ||
|
|
57526cae99 | ||
|
|
5de5ef118c | ||
|
|
b161ac29e3 | ||
|
|
b20ee6924a | ||
|
|
49bd98f410 | ||
|
|
a14f98ca7c | ||
|
|
138a841390 | ||
|
|
046e4013cb | ||
|
|
dd2d3e61ab | ||
|
|
3617c22a56 | ||
|
|
f9daebba0a | ||
|
|
9a557e90da | ||
|
|
2d557eb1e0 | ||
|
|
a789b77b75 | ||
|
|
75acf96d94 | ||
|
|
8c7687b419 | ||
|
|
3e0a7b931c | ||
|
|
306d9f2e35 | ||
|
|
7b8604ea29 | ||
|
|
ab69443bd4 | ||
|
|
b263e096af | ||
|
|
05bb391c3a | ||
|
|
0ab080987d |
@@ -10,13 +10,26 @@
|
||||
* Optimize BLAS3 on ICT Loongson 3A.
|
||||
* Optimize BLAS3 on Intel Sandy Bridge.
|
||||
|
||||
* Werner Saar <wernsaar@googlemail.com>
|
||||
* [2013-03-04] Optimize AVX and FMA4 DGEMM on AMD Bulldozer
|
||||
* [2013-04-27] Optimize AVX and FMA4 TRSM on AMD Bulldozer
|
||||
* [2013-06-09] Optimize AVX and FMA4 SGEMM on AMD Bulldozer
|
||||
* [2013-06-11] Optimize AVX and FMA4 ZGEMM on AMD Bulldozer
|
||||
* [2013-06-12] Optimize AVX and FMA4 CGEMM on AMD Bulldozer
|
||||
* [2013-06-16] Optimize dgemv_n kernel on AMD Bulldozer
|
||||
* [2013-06-20] Optimize ddot, daxpy kernel on AMD Bulldozer
|
||||
* [2013-06-21] Optimize dcopy kernel on AMD Bulldozer
|
||||
* Porting and Optimization on ARM Cortex-A9
|
||||
* Optimization on AMD Piledriver
|
||||
* Optimization on Intel Haswell
|
||||
|
||||
## Previous Developers
|
||||
|
||||
* Zaheer Chothia <zaheer.chothia@gmail.com>
|
||||
* Improve the compatibility about complex number
|
||||
* Build LAPACKE: C interface to LAPACK
|
||||
* Improve the windows build.
|
||||
|
||||
## Previous Developers
|
||||
|
||||
* Chen Shaohu <huhumartinwar@gmail.com>
|
||||
* Optimize GEMV on the Loongson 3A processor.
|
||||
|
||||
@@ -52,16 +65,7 @@ In chronological order:
|
||||
|
||||
* Sébastien Villemot <sebastien@debian.org>
|
||||
* [2012-11-14] Fix compilation with TARGET=GENERIC. Patch applied to Debian package.
|
||||
|
||||
* Werner Saar <wernsaar@googlemail.com>
|
||||
* [2013-03-04] Optimize AVX and FMA4 DGEMM on AMD Bulldozer
|
||||
* [2013-04-27] Optimize AVX and FMA4 TRSM on AMD Bulldozer
|
||||
* [2013-06-09] Optimize AVX and FMA4 SGEMM on AMD Bulldozer
|
||||
* [2013-06-11] Optimize AVX and FMA4 ZGEMM on AMD Bulldozer
|
||||
* [2013-06-12] Optimize AVX and FMA4 CGEMM on AMD Bulldozer
|
||||
* [2013-06-16] Optimize dgemv_n kernel on AMD Bulldozer
|
||||
* [2013-06-20] Optimize ddot, daxpy kernel on AMD Bulldozer
|
||||
* [2013-06-21] Optimize dcopy kernel on AMD Bulldozer
|
||||
* [2013-08-28] Avoid failure on qemu guests declaring an Athlon CPU without 3dnow!
|
||||
|
||||
* Kang-Che Sung <Explorer09@gmail.com>
|
||||
* [2013-05-17] Fix typo in the document. Re-order the architecture list in getarch.c.
|
||||
@@ -79,10 +83,34 @@ In chronological order:
|
||||
* [2013-07-11] create openblas_get_parallel to retrieve information which parallelization
|
||||
model is used by OpenBLAS.
|
||||
|
||||
* Elliot Saba <staticfloat@gmail.com>
|
||||
* [2013-07-22] Add in return value for `interface/trtri.c`
|
||||
|
||||
* Sébastien Fabbro <bicatali@gentoo.org>
|
||||
* [2013-07-24] Modify makefile to respect user's LDFLAGS
|
||||
* [2013-07-24] Add stack markings for GNU as arch-independent for assembler files
|
||||
|
||||
* Viral B. Shah <viral@mayin.org>
|
||||
* [2013-08-21] Patch LAPACK XLASD4.f as discussed in JuliaLang/julia#2340
|
||||
|
||||
* Lars Buitinck <https://github.com/larsmans>
|
||||
* [2013-08-28] get rid of the generated cblas_noconst.h file
|
||||
* [2013-08-28] Missing threshold in gemm.c
|
||||
* [2013-08-28] fix default prefix handling in makefiles
|
||||
|
||||
* yieldthought <https://github.com/yieldthought>
|
||||
* [2013-10-08] Remove -Wl,--retain-symbols-file from dynamic link line to fix tool support
|
||||
|
||||
* Keno Fischer <https://github.com/loladiro>
|
||||
* [2013-10-23] Use FC instead of CC to link the dynamic library on OS X
|
||||
|
||||
* Christopher Meng <cickumqt@gmail.com>
|
||||
* [2013-12-09] Add DESTDIR support for easier building on RPM based distros.
|
||||
Use install command instead of cp to install files with permissions control.
|
||||
|
||||
* Lucas Beyer <lucasb.eyer.be@gmail.com>
|
||||
* [2013-12-10] Added support for NO_SHARED in make install.
|
||||
|
||||
* carlkl <https://github.com/carlkl>
|
||||
* [2013-12-13] Fixed LAPACKE building bug on Windows
|
||||
|
||||
|
||||
@@ -1,4 +1,42 @@
|
||||
OpenBLAS ChangeLog
|
||||
====================================================================
|
||||
Version 0.2.9
|
||||
10-Jun-2014
|
||||
common:
|
||||
* Improved the result for LAPACK testing. (#372)
|
||||
* Installed DLL to prefix/bin instead of prefix/lib. (#366)
|
||||
* Build import library on Windows.(#374)
|
||||
x86/x86-64:
|
||||
* To improve LAPACK testing, we fallback some kernels. (#372)
|
||||
https://github.com/xianyi/OpenBLAS/wiki/Fixed-optimized-kernels-To-do-List
|
||||
|
||||
====================================================================
|
||||
Version 0.2.9.rc2
|
||||
06-Mar-2014
|
||||
common:
|
||||
* Added OPENBLAS_VERBOSE environment variable.(#338)
|
||||
* Make OpenBLAS thread-pool resilient to fork via pthread_atfork.
|
||||
(#294, Thank Olivier Grisel)
|
||||
* Rewrote rotmg
|
||||
* Fixed sdsdot bug.
|
||||
x86/x86-64:
|
||||
* Detect Intel Haswell for new Macbook.
|
||||
|
||||
====================================================================
|
||||
Version 0.2.9.rc1
|
||||
13-Jan-2013
|
||||
common:
|
||||
* Update LAPACK to 3.5.0 version
|
||||
* Fixed compatiable issues with Clang and Pathscale compilers.
|
||||
|
||||
x86/x86-64:
|
||||
* Optimization on Intel Haswell.
|
||||
* Enable optimization kernels on AMD Bulldozer and Piledriver.
|
||||
|
||||
ARM:
|
||||
* Support ARMv6 and ARMv7 ISA.
|
||||
* Optimization on ARM Cortex-A9.
|
||||
|
||||
====================================================================
|
||||
Version 0.2.8
|
||||
01-Aug-2013
|
||||
@@ -24,7 +62,7 @@ common:
|
||||
parallelization model is used by OpenBLAS. (Thank grisuthedragon)
|
||||
* Detect LLVM/Clang compiler. The default compiler is Clang on Mac OS X.
|
||||
* Change LIBSUFFIX from .lib to .a on windows.
|
||||
* A walk round for dtrti_U single thread bug. Replace it with LAPACK codes. (#191)
|
||||
* A work-around for dtrti_U single thread bug. Replace it with LAPACK codes. (#191)
|
||||
|
||||
x86/x86-64:
|
||||
* Optimize c/zgemm, trsm, dgemv_n, ddot, daxpy, dcopy on
|
||||
@@ -257,7 +295,7 @@ x86/x86_64:
|
||||
* Fixed #28 a wrong result of dsdot on x86_64.
|
||||
* Fixed #32 a SEGFAULT bug of zdotc with gcc-4.6.
|
||||
* Fixed #33 ztrmm bug on Nehalem.
|
||||
* Walk round #27 the low performance axpy issue with small imput size & multithreads.
|
||||
* Work-around #27 the low performance axpy issue with small imput size & multithreads.
|
||||
|
||||
MIPS64:
|
||||
* Fixed #28 a wrong result of dsdot on Loongson3A/MIPS64.
|
||||
@@ -281,7 +319,7 @@ common:
|
||||
|
||||
x86/x86_64:
|
||||
* On x86 32bits, fixed a bug in zdot_sse2.S line 191. This would casue
|
||||
zdotu & zdotc failures.Instead,Walk around it. (Refs issue #8 #9 on github)
|
||||
zdotu & zdotc failures. Instead, work-around it. (Refs issue #8 #9 on github)
|
||||
* Modified ?axpy functions to return same netlib BLAS results
|
||||
when incx==0 or incy==0 (Refs issue #7 on github)
|
||||
* Modified ?swap functions to return same netlib BLAS results
|
||||
|
||||
57
Makefile
57
Makefile
@@ -57,7 +57,7 @@ endif
|
||||
|
||||
ifeq ($(USE_OPENMP), 1)
|
||||
@echo
|
||||
@echo " Use OpenMP in the multithreading. Becasue of ignoring OPENBLAS_NUM_THREADS and GOTO_NUM_THREADS flags, "
|
||||
@echo " Use OpenMP in the multithreading. Because of ignoring OPENBLAS_NUM_THREADS and GOTO_NUM_THREADS flags, "
|
||||
@echo " you should use OMP_NUM_THREADS environment variable to control the number of threads."
|
||||
@echo
|
||||
endif
|
||||
@@ -128,6 +128,11 @@ ifeq ($(CORE), UNKOWN)
|
||||
endif
|
||||
ifeq ($(NOFORTRAN), 1)
|
||||
$(error OpenBLAS: Detecting fortran compiler failed. Please install fortran compiler, e.g. gfortran, ifort, openf90.)
|
||||
endif
|
||||
ifeq ($(NO_STATIC), 1)
|
||||
ifeq ($(NO_SHARED), 1)
|
||||
$(error OpenBLAS: neither static nor shared are enabled.)
|
||||
endif
|
||||
endif
|
||||
@-ln -fs $(LIBNAME) $(LIBPREFIX).$(LIBSUFFIX)
|
||||
@for d in $(SUBDIRS) ; \
|
||||
@@ -207,6 +212,7 @@ else
|
||||
netlib : lapack_prebuild
|
||||
ifndef NOFORTRAN
|
||||
@$(MAKE) -C $(NETLIB_LAPACK_DIR) lapacklib
|
||||
@$(MAKE) -C $(NETLIB_LAPACK_DIR) tmglib
|
||||
endif
|
||||
ifndef NO_LAPACKE
|
||||
@$(MAKE) -C $(NETLIB_LAPACK_DIR) lapackelib
|
||||
@@ -230,45 +236,21 @@ ifndef NOFORTRAN
|
||||
-@echo "ARCHFLAGS = -ru" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@echo "RANLIB = $(RANLIB)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@echo "LAPACKLIB = ../$(LIBNAME)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@echo "TMGLIB = ../$(LIBNAME)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@echo "BLASLIB = ../../../$(LIBNAME)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@echo "LAPACKELIB = ../$(LIBNAME)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@echo "LAPACKLIB_P = ../$(LIBNAME_P)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@echo "SUFFIX = $(SUFFIX)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@echo "PSUFFIX = $(PSUFFIX)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@echo "CEXTRALIB = $(EXTRALIB)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
ifeq ($(F_COMPILER), GFORTRAN)
|
||||
-@echo "TIMER = INT_ETIME" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
else
|
||||
-@echo "TIMER = NONE" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
endif
|
||||
-@cat make.inc >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
endif
|
||||
|
||||
lapack-3.4.2 : lapack-3.4.2.tgz
|
||||
ifndef NOFORTRAN
|
||||
ifndef NO_LAPACK
|
||||
@if test `$(MD5SUM) $< | $(AWK) '{print $$1}'` = 61bf1a8a4469d4bdb7604f5897179478; then \
|
||||
echo $(TAR) zxf $< ;\
|
||||
$(TAR) zxf $< && (cd $(NETLIB_LAPACK_DIR); $(PATCH) -p1 < ../patch.for_lapack-3.4.2) ;\
|
||||
rm -f $(NETLIB_LAPACK_DIR)/lapacke/make.inc ;\
|
||||
else \
|
||||
rm -rf $(NETLIB_LAPACK_DIR) ;\
|
||||
echo " Cannot download lapack-3.4.2.tgz or the MD5 check sum is wrong (Please use orignal)."; \
|
||||
exit 1; \
|
||||
fi
|
||||
endif
|
||||
endif
|
||||
|
||||
LAPACK_URL=http://www.netlib.org/lapack/lapack-3.4.2.tgz
|
||||
|
||||
lapack-3.4.2.tgz :
|
||||
ifndef NOFORTRAN
|
||||
#http://stackoverflow.com/questions/7656425/makefile-ifeq-logical-or
|
||||
ifeq ($(OSNAME), $(filter $(OSNAME),Darwin NetBSD))
|
||||
curl -O $(LAPACK_URL);
|
||||
else
|
||||
ifeq ($(OSNAME), FreeBSD)
|
||||
fetch $(LAPACK_URL);
|
||||
else
|
||||
wget -O $@ $(LAPACK_URL);
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
large.tgz :
|
||||
ifndef NOFORTRAN
|
||||
if [ ! -a $< ]; then
|
||||
@@ -287,17 +269,15 @@ lapack-timing : large.tgz timing.tgz
|
||||
ifndef NOFORTRAN
|
||||
(cd $(NETLIB_LAPACK_DIR); $(TAR) zxf ../timing.tgz TIMING)
|
||||
(cd $(NETLIB_LAPACK_DIR)/TIMING; $(TAR) zxf ../../large.tgz )
|
||||
make -C $(NETLIB_LAPACK_DIR) tmglib
|
||||
make -C $(NETLIB_LAPACK_DIR)/TIMING
|
||||
endif
|
||||
|
||||
|
||||
lapack-test :
|
||||
$(MAKE) -C $(NETLIB_LAPACK_DIR) tmglib
|
||||
$(MAKE) -C $(NETLIB_LAPACK_DIR)/TESTING xeigtstc xeigtstd xeigtsts xeigtstz xlintstc xlintstd xlintstds xlintsts xlintstz xlintstzc
|
||||
@rm -f $(NETLIB_LAPACK_DIR)/TESTING/*.out
|
||||
$(MAKE) -j 1 -C $(NETLIB_LAPACK_DIR)/TESTING
|
||||
$(GREP) failed $(NETLIB_LAPACK_DIR)/TESTING/*.out
|
||||
(cd $(NETLIB_LAPACK_DIR)/TESTING && rm -f x* *.out)
|
||||
make -j 1 -C $(NETLIB_LAPACK_DIR)/TESTING xeigtstc xeigtstd xeigtsts xeigtstz xlintstc xlintstd xlintstds xlintstrfd xlintstrfz xlintsts xlintstz xlintstzc xlintstrfs xlintstrfc
|
||||
(cd $(NETLIB_LAPACK_DIR); ./lapack_testing.py -r )
|
||||
|
||||
|
||||
dummy :
|
||||
|
||||
@@ -323,4 +303,5 @@ endif
|
||||
@$(MAKE) -C $(NETLIB_LAPACK_DIR) clean
|
||||
@rm -f $(NETLIB_LAPACK_DIR)/make.inc $(NETLIB_LAPACK_DIR)/lapacke/include/lapacke_mangling.h
|
||||
@rm -f *.grd Makefile.conf_last config_last.h
|
||||
@(cd $(NETLIB_LAPACK_DIR)/TESTING && rm -f x* *.out testing_results.txt)
|
||||
@echo Done.
|
||||
|
||||
@@ -10,3 +10,9 @@ FCOMMON_OPT += -marm -mfpu=vfp -mfloat-abi=hard -march=armv6
|
||||
endif
|
||||
|
||||
|
||||
ifeq ($(CORE), ARMV5)
|
||||
CCOMMON_OPT += -marm -mfpu=vfp -mfloat-abi=hard -march=armv6
|
||||
FCOMMON_OPT += -marm -mfpu=vfp -mfloat-abi=hard -march=armv6
|
||||
endif
|
||||
|
||||
|
||||
|
||||
@@ -7,6 +7,7 @@ PREFIX ?= /opt/OpenBLAS
|
||||
|
||||
OPENBLAS_INCLUDE_DIR := $(PREFIX)/include
|
||||
OPENBLAS_LIBRARY_DIR := $(PREFIX)/lib
|
||||
OPENBLAS_BINARY_DIR := $(PREFIX)/bin
|
||||
OPENBLAS_BUILD_DIR := $(CURDIR)
|
||||
|
||||
.PHONY : install
|
||||
@@ -19,11 +20,12 @@ install : lib.grd
|
||||
@-mkdir -p $(DESTDIR)$(PREFIX)
|
||||
@-mkdir -p $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)
|
||||
@-mkdir -p $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)
|
||||
@-mkdir -p $(DESTDIR)$(OPENBLAS_BINARY_DIR)
|
||||
@echo Generating openblas_config.h in $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)
|
||||
#for inc
|
||||
@echo \#ifndef OPENBLAS_CONFIG_H > $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/openblas_config.h
|
||||
@echo \#define OPENBLAS_CONFIG_H >> $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/openblas_config.h
|
||||
@awk '{print $$1, "OPENBLAS_"$$2, $$3}' config_last.h >> $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/openblas_config.h
|
||||
@awk 'NF {print $$1, "OPENBLAS_"$$2, $$3}' config_last.h >> $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/openblas_config.h
|
||||
@echo \#define OPENBLAS_VERSION \" OpenBLAS $(VERSION) \" >> $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/openblas_config.h
|
||||
@cat openblas_config_template.h >> $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/openblas_config.h
|
||||
@echo \#endif \/\* OPENBLAS_CONFIG_H \*\/ >> $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/openblas_config.h
|
||||
@@ -49,10 +51,12 @@ ifndef NO_LAPACKE
|
||||
endif
|
||||
|
||||
#for install static library
|
||||
ifndef NO_STATIC
|
||||
@echo Copying the static library to $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)
|
||||
@install -pm644 $(LIBNAME) $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)
|
||||
@cd $(DESTDIR)$(OPENBLAS_LIBRARY_DIR) ; \
|
||||
ln -fs $(LIBNAME) $(LIBPREFIX).$(LIBSUFFIX)
|
||||
endif
|
||||
#for install shared library
|
||||
ifndef NO_SHARED
|
||||
@echo Copying the shared library to $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)
|
||||
@@ -78,10 +82,11 @@ ifeq ($(OSNAME), Darwin)
|
||||
@-ln -fs $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)/$(LIBDYNNAME) $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)/$(LIBPREFIX).dylib
|
||||
endif
|
||||
ifeq ($(OSNAME), WINNT)
|
||||
@-cp $(LIBDLLNAME) $(OPENBLAS_LIBRARY_DIR)
|
||||
@-cp $(LIBDLLNAME) $(OPENBLAS_BINARY_DIR)
|
||||
@-cp $(LIBPREFIX).lib $(OPENBLAS_LIBRARY_DIR)
|
||||
endif
|
||||
ifeq ($(OSNAME), CYGWIN_NT)
|
||||
@-cp $(LIBDLLNAME) $(OPENBLAS_LIBRARY_DIR)
|
||||
@-cp $(LIBDLLNAME) $(OPENBLAS_BINARY_DIR)
|
||||
endif
|
||||
endif
|
||||
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
#
|
||||
|
||||
# This library's version
|
||||
VERSION = 0.2.8
|
||||
VERSION = 0.2.9
|
||||
|
||||
# If you set the suffix, the library name will be libopenblas_$(LIBNAMESUFFIX).a
|
||||
# and libopenblas_$(LIBNAMESUFFIX).so. Meanwhile, the soname in shared library
|
||||
@@ -48,6 +48,9 @@ VERSION = 0.2.8
|
||||
# automatically detected by the the script.
|
||||
# NUM_THREADS = 24
|
||||
|
||||
# if you don't need to install the static library, please comment it in.
|
||||
# NO_STATIC = 1
|
||||
|
||||
# if you don't need generate the shared library, please comment it in.
|
||||
# NO_SHARED = 1
|
||||
|
||||
@@ -76,10 +79,10 @@ VERSION = 0.2.8
|
||||
# Unfortunately most of kernel won't give us high quality buffer.
|
||||
# BLAS tries to find the best region before entering main function,
|
||||
# but it will consume time. If you don't like it, you can disable one.
|
||||
# NO_WARMUP = 1
|
||||
NO_WARMUP = 1
|
||||
|
||||
# If you want to disable CPU/Memory affinity on Linux.
|
||||
# NO_AFFINITY = 1
|
||||
NO_AFFINITY = 1
|
||||
|
||||
# Don't use AVX kernel on Sandy Bridge. It is compatible with old compilers
|
||||
# and OS. However, the performance is low.
|
||||
@@ -129,6 +132,9 @@ VERSION = 0.2.8
|
||||
# The default -O2 is enough.
|
||||
# COMMON_OPT = -O2
|
||||
|
||||
# gfortran option for LAPACK
|
||||
FCOMMON_OPT = -frecursive
|
||||
|
||||
# Profiling flags
|
||||
COMMON_PROF = -pg
|
||||
|
||||
|
||||
@@ -158,6 +158,7 @@ endif
|
||||
|
||||
ifeq ($(OSNAME), Linux)
|
||||
EXTRALIB += -lm
|
||||
NO_EXPRECISION = 1
|
||||
endif
|
||||
|
||||
ifeq ($(OSNAME), AIX)
|
||||
@@ -846,19 +847,6 @@ ifeq ($(DEBUG), 1)
|
||||
COMMON_OPT += -g
|
||||
endif
|
||||
|
||||
ifndef COMMON_OPT
|
||||
ifeq ($(ARCH), arm)
|
||||
COMMON_OPT = -O3
|
||||
endif
|
||||
endif
|
||||
|
||||
ifndef COMMON_OPT
|
||||
ifeq ($(ARCH), arm64)
|
||||
COMMON_OPT = -O3
|
||||
endif
|
||||
endif
|
||||
|
||||
|
||||
ifndef COMMON_OPT
|
||||
COMMON_OPT = -O2
|
||||
endif
|
||||
@@ -872,8 +860,14 @@ override FPFLAGS += $(COMMON_OPT) $(FCOMMON_OPT) $(COMMON_PROF)
|
||||
#MAKEOVERRIDES =
|
||||
|
||||
#For LAPACK Fortran codes.
|
||||
#Disable -fopenmp for LAPACK Fortran codes on Windows.
|
||||
ifdef OS_WINDOWS
|
||||
LAPACK_FFLAGS := $(filter-out -fopenmp -mp -openmp -xopenmp=parallel,$(FFLAGS))
|
||||
LAPACK_FPFLAGS := $(filter-out -fopenmp -mp -openmp -xopenmp=parallel,$(FPFLAGS))
|
||||
else
|
||||
LAPACK_FFLAGS := $(FFLAGS)
|
||||
LAPACK_FPFLAGS := $(FPFLAGS)
|
||||
endif
|
||||
|
||||
LAPACK_CFLAGS = $(CFLAGS)
|
||||
LAPACK_CFLAGS += -DHAVE_LAPACK_CONFIG_H
|
||||
|
||||
12
common.h
12
common.h
@@ -310,15 +310,23 @@ typedef int blasint;
|
||||
#define YIELDING SwitchToThread()
|
||||
#endif
|
||||
|
||||
#if defined(ARMV7) || defined(ARMV6) || defined(ARMV8)
|
||||
#if defined(ARMV7) || defined(ARMV6) || defined(ARMV8) || defined(ARMV5)
|
||||
#define YIELDING asm volatile ("nop;nop;nop;nop;nop;nop;nop;nop; \n");
|
||||
#endif
|
||||
|
||||
#ifdef PILEDRIVER
|
||||
#ifdef BULLDOZER
|
||||
#ifndef YIELDING
|
||||
#define YIELDING __asm__ __volatile__ ("nop;nop;nop;nop;nop;nop;nop;nop;\n");
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef PILEDRIVER
|
||||
#ifndef YIELDING
|
||||
#define YIELDING __asm__ __volatile__ ("nop;nop;nop;nop;nop;nop;nop;nop;\n");
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifndef YIELDING
|
||||
#define YIELDING sched_yield()
|
||||
#endif
|
||||
|
||||
@@ -17,7 +17,7 @@
|
||||
#define SCOPY_K scopy_k
|
||||
#define SDOTU_K sdot_k
|
||||
#define SDOTC_K sdot_k
|
||||
#define SDSDOT_K sdot_k
|
||||
#define SDSDOT_K dsdot_k
|
||||
#define DSDOT_K dsdot_k
|
||||
#define SNRM2_K snrm2_k
|
||||
#define SSCAL_K sscal_k
|
||||
@@ -162,7 +162,7 @@
|
||||
#define SCOPY_K gotoblas -> scopy_k
|
||||
#define SDOTU_K gotoblas -> sdot_k
|
||||
#define SDOTC_K gotoblas -> sdot_k
|
||||
#define SDSDOT_K gotoblas -> sdot_k
|
||||
#define SDSDOT_K gotoblas -> dsdot_k
|
||||
#define DSDOT_K gotoblas -> dsdot_k
|
||||
#define SNRM2_K gotoblas -> snrm2_k
|
||||
#define SSCAL_K gotoblas -> sscal_k
|
||||
|
||||
13
cpuid_x86.c
13
cpuid_x86.c
@@ -1051,11 +1051,14 @@ int get_cpuname(void){
|
||||
case 3:
|
||||
switch (model) {
|
||||
case 10:
|
||||
case 14:
|
||||
// Ivy Bridge
|
||||
if(support_avx())
|
||||
return CPUTYPE_SANDYBRIDGE;
|
||||
else
|
||||
return CPUTYPE_NEHALEM;
|
||||
case 12:
|
||||
case 15:
|
||||
if(support_avx())
|
||||
return CPUTYPE_HASWELL;
|
||||
else
|
||||
@@ -1065,6 +1068,7 @@ int get_cpuname(void){
|
||||
case 4:
|
||||
switch (model) {
|
||||
case 5:
|
||||
case 6:
|
||||
if(support_avx())
|
||||
return CPUTYPE_HASWELL;
|
||||
else
|
||||
@@ -1457,11 +1461,13 @@ int get_coretype(void){
|
||||
case 3:
|
||||
switch (model) {
|
||||
case 10:
|
||||
case 14:
|
||||
if(support_avx())
|
||||
return CORE_SANDYBRIDGE;
|
||||
else
|
||||
return CORE_NEHALEM; //OS doesn't support AVX
|
||||
case 12:
|
||||
case 15:
|
||||
if(support_avx())
|
||||
return CORE_HASWELL;
|
||||
else
|
||||
@@ -1471,6 +1477,7 @@ int get_coretype(void){
|
||||
case 4:
|
||||
switch (model) {
|
||||
case 5:
|
||||
case 6:
|
||||
if(support_avx())
|
||||
return CORE_HASWELL;
|
||||
else
|
||||
@@ -1551,7 +1558,13 @@ void get_cpuconfig(void){
|
||||
printf("#define L2_SIZE %d\n", info.size * 1024);
|
||||
printf("#define L2_ASSOCIATIVE %d\n", info.associative);
|
||||
printf("#define L2_LINESIZE %d\n", info.linesize);
|
||||
} else {
|
||||
//fall back for some virtual machines.
|
||||
printf("#define L2_SIZE 1048576\n");
|
||||
printf("#define L2_ASSOCIATIVE 6\n");
|
||||
printf("#define L2_LINESIZE 64\n");
|
||||
}
|
||||
|
||||
|
||||
get_cacheinfo(CACHE_INFO_L3, &info);
|
||||
if (info.size > 0) {
|
||||
|
||||
2
ctest.c
2
ctest.c
@@ -125,7 +125,7 @@ ARCH_IA64
|
||||
BINARY_64
|
||||
#endif
|
||||
|
||||
#if defined(__ARM_ARCH) || defined(__ARM_ARCH_7A__)
|
||||
#if defined(__ARM_ARCH) || defined(__ARM_ARCH_7A__) || defined(__arm__)
|
||||
ARCH_ARM
|
||||
#endif
|
||||
|
||||
|
||||
@@ -1,9 +1,9 @@
|
||||
TOPDIR = ../..
|
||||
include ../../Makefile.system
|
||||
|
||||
COMMONOBJS = memory.$(SUFFIX) xerbla.$(SUFFIX) c_abs.$(SUFFIX) z_abs.$(SUFFIX) openblas_set_num_threads.$(SUFFIX) openblas_get_config.$(SUFFIX) openblas_get_parallel.$(SUFFIX)
|
||||
COMMONOBJS = memory.$(SUFFIX) xerbla.$(SUFFIX) c_abs.$(SUFFIX) z_abs.$(SUFFIX) openblas_set_num_threads.$(SUFFIX) openblas_get_config.$(SUFFIX) openblas_get_parallel.$(SUFFIX) openblas_error_handle.$(SUFFIX)
|
||||
|
||||
COMMONOBJS += slamch.$(SUFFIX) slamc3.$(SUFFIX) dlamch.$(SUFFIX) dlamc3.$(SUFFIX)
|
||||
#COMMONOBJS += slamch.$(SUFFIX) slamc3.$(SUFFIX) dlamch.$(SUFFIX) dlamc3.$(SUFFIX)
|
||||
|
||||
ifdef SMP
|
||||
COMMONOBJS += blas_server.$(SUFFIX) divtable.$(SUFFIX) blasL1thread.$(SUFFIX)
|
||||
@@ -109,6 +109,9 @@ openblas_get_config.$(SUFFIX) : openblas_get_config.c
|
||||
openblas_get_parallel.$(SUFFIX) : openblas_get_parallel.c
|
||||
$(CC) $(CFLAGS) -c $< -o $(@F)
|
||||
|
||||
openblas_error_handle.$(SUFFIX) : openblas_error_handle.c
|
||||
$(CC) $(CFLAGS) -c $< -o $(@F)
|
||||
|
||||
blasL1thread.$(SUFFIX) : blas_l1_thread.c ../../common.h ../../common_thread.h
|
||||
$(CC) $(CFLAGS) -c $< -o $(@F)
|
||||
|
||||
|
||||
@@ -74,6 +74,21 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#include <sys/resource.h>
|
||||
#endif
|
||||
|
||||
#ifndef likely
|
||||
#ifdef __GNUC__
|
||||
#define likely(x) __builtin_expect(!!(x), 1)
|
||||
#else
|
||||
#define likely(x) (x)
|
||||
#endif
|
||||
#endif
|
||||
#ifndef unlikely
|
||||
#ifdef __GNUC__
|
||||
#define unlikely(x) __builtin_expect(!!(x), 0)
|
||||
#else
|
||||
#define unlikely(x) (x)
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef SMP_SERVER
|
||||
|
||||
#undef MONITOR
|
||||
@@ -584,6 +599,10 @@ static BLASULONG exec_queue_lock = 0;
|
||||
|
||||
int exec_blas_async(BLASLONG pos, blas_queue_t *queue){
|
||||
|
||||
#ifdef SMP_SERVER
|
||||
// Handle lazy re-init of the thread-pool after a POSIX fork
|
||||
if (unlikely(blas_server_avail == 0)) blas_thread_init();
|
||||
#endif
|
||||
BLASLONG i = 0;
|
||||
blas_queue_t *current = queue;
|
||||
#if defined(OS_LINUX) && !defined(NO_AFFINITY) && !defined(PARAMTEST)
|
||||
@@ -708,7 +727,11 @@ int exec_blas_async_wait(BLASLONG num, blas_queue_t *queue){
|
||||
/* Execute Threads */
|
||||
int exec_blas(BLASLONG num, blas_queue_t *queue){
|
||||
|
||||
int (*routine)(blas_arg_t *, void *, void *, double *, double *, BLASLONG);
|
||||
#ifdef SMP_SERVER
|
||||
// Handle lazy re-init of the thread-pool after a POSIX fork
|
||||
if (unlikely(blas_server_avail == 0)) blas_thread_init();
|
||||
#endif
|
||||
int (*routine)(blas_arg_t *, void *, void *, double *, double *, BLASLONG);
|
||||
|
||||
#ifdef TIMING_DEBUG
|
||||
BLASULONG start, stop;
|
||||
|
||||
@@ -441,7 +441,7 @@ int BLASFUNC(blas_thread_shutdown)(void){
|
||||
if (blas_server_avail){
|
||||
|
||||
SetEvent(pool.killed);
|
||||
printf("blas_num_threads=%d\n", blas_num_threads);
|
||||
|
||||
for(i = 0; i < blas_num_threads - 1; i++){
|
||||
WaitForSingleObject(blas_threads[i], 5); //INFINITE);
|
||||
TerminateThread(blas_threads[i],0);
|
||||
|
||||
@@ -38,6 +38,7 @@
|
||||
|
||||
#include "common.h"
|
||||
|
||||
|
||||
#ifdef ARCH_X86
|
||||
#define EXTERN extern
|
||||
#else
|
||||
@@ -108,6 +109,11 @@ int support_avx(){
|
||||
#endif
|
||||
}
|
||||
|
||||
extern void openblas_warning(int verbose, const char * msg);
|
||||
#define FALLBACK_VERBOSE 1
|
||||
#define NEHALEM_FALLBACK "OpenBLAS : Your OS does not support AVX instructions. OpenBLAS is using Nehalem kernels as a fallback, which may give poorer performance.\n"
|
||||
#define BARCELONA_FALLBACK "OpenBLAS : Your OS does not support AVX instructions. OpenBLAS is using Barcelona kernels as a fallback, which may give poorer performance.\n"
|
||||
|
||||
static int get_vendor(void){
|
||||
int eax, ebx, ecx, edx;
|
||||
char vendor[13];
|
||||
@@ -179,38 +185,38 @@ static gotoblas_t *get_coretype(void){
|
||||
if(support_avx())
|
||||
return &gotoblas_SANDYBRIDGE;
|
||||
else{
|
||||
fprintf(stderr, "OpenBLAS : Your OS does not support AVX instructions. OpenBLAS is using Nehalem kernels as a fallback, which may give poorer performance.\n");
|
||||
openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK);
|
||||
return &gotoblas_NEHALEM; //OS doesn't support AVX. Use old kernels.
|
||||
}
|
||||
}
|
||||
return NULL;
|
||||
case 3:
|
||||
//Intel Sandy Bridge 22nm (Ivy Bridge?)
|
||||
if (model == 10) {
|
||||
if (model == 10 || model == 14) {
|
||||
if(support_avx())
|
||||
return &gotoblas_SANDYBRIDGE;
|
||||
else{
|
||||
fprintf(stderr, "OpenBLAS : Your OS does not support AVX instructions. OpenBLAS is using Nehalem kernels as a fallback, which may give poorer performance.\n");
|
||||
openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK);
|
||||
return &gotoblas_NEHALEM; //OS doesn't support AVX. Use old kernels.
|
||||
}
|
||||
}
|
||||
//Intel Haswell
|
||||
if (model == 12) {
|
||||
if (model == 12 || model == 15) {
|
||||
if(support_avx())
|
||||
return &gotoblas_HASWELL;
|
||||
else{
|
||||
fprintf(stderr, "OpenBLAS : Your OS does not support AVX instructions. OpenBLAS is using Nehalem kernels as a fallback, which may give poorer performance.\n");
|
||||
openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK);
|
||||
return &gotoblas_NEHALEM; //OS doesn't support AVX. Use old kernels.
|
||||
}
|
||||
}
|
||||
return NULL;
|
||||
case 4:
|
||||
//Intel Haswell
|
||||
if (model == 5) {
|
||||
if (model == 5 || model == 6) {
|
||||
if(support_avx())
|
||||
return &gotoblas_HASWELL;
|
||||
else{
|
||||
fprintf(stderr, "OpenBLAS : Your OS does not support AVX instructions. OpenBLAS is using Nehalem kernels as a fallback, which may give poorer performance.\n");
|
||||
openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK);
|
||||
return &gotoblas_NEHALEM; //OS doesn't support AVX. Use old kernels.
|
||||
}
|
||||
}
|
||||
@@ -248,7 +254,7 @@ static gotoblas_t *get_coretype(void){
|
||||
if(support_avx())
|
||||
return &gotoblas_BULLDOZER;
|
||||
else{
|
||||
fprintf(stderr, "OpenBLAS : Your OS does not support AVX instructions. OpenBLAS is using Barcelona kernels as a fallback, which may give poorer performance.\n");
|
||||
openblas_warning(FALLBACK_VERBOSE, BARCELONA_FALLBACK);
|
||||
return &gotoblas_BARCELONA; //OS doesn't support AVX. Use old kernels.
|
||||
}
|
||||
}else if(model == 2){
|
||||
@@ -256,7 +262,7 @@ static gotoblas_t *get_coretype(void){
|
||||
if(support_avx())
|
||||
return &gotoblas_PILEDRIVER;
|
||||
else{
|
||||
fprintf(stderr, "OpenBLAS : Your OS does not support AVX instructions. OpenBLAS is using Barcelona kernels as a fallback, which may give poorer performance.\n");
|
||||
openblas_warning(FALLBACK_VERBOSE, BARCELONA_FALLBACK);
|
||||
return &gotoblas_BARCELONA; //OS doesn't support AVX. Use old kernels.
|
||||
}
|
||||
}
|
||||
@@ -351,7 +357,7 @@ void gotoblas_dynamic_init(void) {
|
||||
if (gotoblas && gotoblas -> init) {
|
||||
gotoblas -> init();
|
||||
} else {
|
||||
fprintf(stderr, "OpenBLAS : Architecture Initialization failed. No initialization function found.\n");
|
||||
openblas_warning(0, "OpenBLAS : Architecture Initialization failed. No initialization function found.\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
|
||||
@@ -143,6 +143,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
gotoblas_t *gotoblas = NULL;
|
||||
#endif
|
||||
|
||||
extern void openblas_warning(int verbose, const char * msg);
|
||||
|
||||
#ifndef SMP
|
||||
|
||||
#define blas_cpu_number 1
|
||||
@@ -253,6 +255,23 @@ int goto_get_num_procs (void) {
|
||||
return blas_cpu_number;
|
||||
}
|
||||
|
||||
void openblas_fork_handler()
|
||||
{
|
||||
// This handler shuts down the OpenBLAS-managed PTHREAD pool when OpenBLAS is
|
||||
// built with "make USE_OPENMP=0".
|
||||
// Hanging can still happen when OpenBLAS is built against the libgomp
|
||||
// implementation of OpenMP. The problem is tracked at:
|
||||
// http://gcc.gnu.org/bugzilla/show_bug.cgi?id=60035
|
||||
// In the mean time build with USE_OPENMP=0 or link against another
|
||||
// implementation of OpenMP.
|
||||
#if !defined(OS_WINDOWS) && defined(SMP_SERVER)
|
||||
int err;
|
||||
err = pthread_atfork ((void (*)(void)) BLASFUNC(blas_thread_shutdown), NULL, NULL);
|
||||
if(err != 0)
|
||||
openblas_warning(0, "OpenBLAS Warning ... cannot install fork handler. You may meet hang after fork.\n");
|
||||
#endif
|
||||
}
|
||||
|
||||
int blas_get_cpu_number(void){
|
||||
char *p;
|
||||
#if defined(OS_LINUX) || defined(OS_WINDOWS) || defined(OS_FREEBSD) || defined(OS_DARWIN)
|
||||
@@ -1268,6 +1287,9 @@ void CONSTRUCTOR gotoblas_init(void) {
|
||||
|
||||
if (gotoblas_initialized) return;
|
||||
|
||||
#ifdef SMP
|
||||
openblas_fork_handler();
|
||||
#endif
|
||||
|
||||
#ifdef PROFILE
|
||||
moncontrol (0);
|
||||
|
||||
51
driver/others/openblas_error_handle.c
Normal file
51
driver/others/openblas_error_handle.c
Normal file
@@ -0,0 +1,51 @@
|
||||
/***************************************************************************
|
||||
Copyright (c) 2013, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*****************************************************************************/
|
||||
|
||||
#include "common.h"
|
||||
|
||||
int openblas_verbose() {
|
||||
int ret=0;
|
||||
char *p;
|
||||
p = getenv("OPENBLAS_VERBOSE");
|
||||
if (p) ret = atoi(p);
|
||||
if(ret<0) ret=0;
|
||||
return ret;
|
||||
}
|
||||
|
||||
void openblas_warning(int verbose, const char * msg) {
|
||||
int current_verbose;
|
||||
current_verbose=openblas_verbose();
|
||||
if(current_verbose >= verbose){
|
||||
fprintf(stderr, "%s", msg);
|
||||
}
|
||||
}
|
||||
@@ -75,9 +75,6 @@ zip : dll
|
||||
zip $(LIBZIPNAME) $(LIBDLLNAME) $(LIBNAME)
|
||||
|
||||
dll : ../$(LIBDLLNAME)
|
||||
#libgoto2.dll
|
||||
|
||||
dll2 : libgoto2_shared.dll
|
||||
|
||||
# On Windows, we only generate a DLL without a version suffix. This is because
|
||||
# applications which link against the dynamic library reference a fixed DLL name
|
||||
@@ -86,36 +83,19 @@ dll2 : libgoto2_shared.dll
|
||||
# For more details see: https://github.com/xianyi/OpenBLAS/issues/127.
|
||||
../$(LIBDLLNAME) : ../$(LIBNAME) libopenblas.def dllinit.$(SUFFIX)
|
||||
$(RANLIB) ../$(LIBNAME)
|
||||
ifeq ($(BINARY32), 1)
|
||||
$(DLLWRAP) -o ../$(LIBDLLNAME) --def libopenblas.def \
|
||||
--entry _dllinit@12 -s dllinit.$(SUFFIX) --dllname $(@F) ../$(LIBNAME) $(EXTRALIB)
|
||||
-lib /machine:i386 /def:libopenblas.def
|
||||
else
|
||||
$(DLLWRAP) -o ../$(LIBDLLNAME) --def libopenblas.def \
|
||||
--entry $(FU)dllinit -s dllinit.$(SUFFIX) --dllname $(@F) ../$(LIBNAME) $(EXTRALIB)
|
||||
-lib /machine:X64 /def:libopenblas.def
|
||||
endif
|
||||
|
||||
libgoto2_shared.dll : ../$(LIBNAME) libgoto2_shared.def
|
||||
$(CC) $(CFLAGS) $(LDFLAGS) libgoto2_shared.def -shared -o $(@F) \
|
||||
-Wl,--whole-archive ../$(LIBNAME) -Wl,--no-whole-archive \
|
||||
-Wl,--out-implib,libgoto2_shared.lib $(FEXTRALIB)
|
||||
$(CC) $(CFLAGS) $(LDFLAGS) libopenblas.def dllinit.$(SUFFIX) \
|
||||
-shared -o ../$(LIBDLLNAME) -Wl,--out-implib,../$(LIBPREFIX).lib \
|
||||
-Wl,--whole-archive ../$(LIBNAME) -Wl,--no-whole-archive $(FEXTRALIB)
|
||||
|
||||
libopenblas.def : gensymbol
|
||||
perl ./gensymbol win2k $(ARCH) dummy $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) > $(@F)
|
||||
|
||||
libgoto2_shared.def : gensymbol
|
||||
perl ./gensymbol win2k $(ARCH) dummy $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) > $(@F)
|
||||
|
||||
libgoto_hpl.def : gensymbol
|
||||
perl ./gensymbol win2khpl $(ARCH) dummy $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) > $(@F)
|
||||
|
||||
$(LIBDYNNAME) : ../$(LIBNAME) osx.def
|
||||
$(FC) $(FFLAGS) -all_load -headerpad_max_install_names -install_name $(CURDIR)/../$(LIBDYNNAME) -dynamiclib -o ../$(LIBDYNNAME) $< -Wl,-exported_symbols_list,osx.def $(FEXTRALIB)
|
||||
|
||||
symbol.$(SUFFIX) : symbol.S
|
||||
$(CC) $(CFLAGS) -c -o $(@F) $^
|
||||
|
||||
dllinit.$(SUFFIX) : dllinit.c
|
||||
$(CC) $(CFLAGS) -c -o $(@F) -s $<
|
||||
|
||||
@@ -123,7 +103,7 @@ ifeq ($(OSNAME), Linux)
|
||||
|
||||
so : ../$(LIBSONAME)
|
||||
|
||||
../$(LIBSONAME) : ../$(LIBNAME) linux.def linktest.c
|
||||
../$(LIBSONAME) : ../$(LIBNAME) linktest.c
|
||||
ifneq ($(C_COMPILER), LSB)
|
||||
$(CC) $(CFLAGS) $(LDFLAGS) -shared -o ../$(LIBSONAME) \
|
||||
-Wl,--whole-archive ../$(LIBNAME) -Wl,--no-whole-archive \
|
||||
@@ -145,7 +125,7 @@ ifeq ($(OSNAME), $(filter $(OSNAME),FreeBSD NetBSD))
|
||||
|
||||
so : ../$(LIBSONAME)
|
||||
|
||||
../$(LIBSONAME) : ../$(LIBNAME) linux.def linktest.c
|
||||
../$(LIBSONAME) : ../$(LIBNAME) linktest.c
|
||||
$(CC) $(CFLAGS) $(LDFLAGS) -shared -o ../$(LIBSONAME) \
|
||||
-Wl,--whole-archive ../$(LIBNAME) -Wl,--no-whole-archive \
|
||||
$(FEXTRALIB) $(EXTRALIB)
|
||||
@@ -197,18 +177,12 @@ static : ../$(LIBNAME)
|
||||
$(AR) -cq ../$(LIBNAME) goto.$(SUFFIX)
|
||||
rm -f goto.$(SUFFIX)
|
||||
|
||||
linux.def : gensymbol ../Makefile.system ../getarch.c
|
||||
perl ./gensymbol linux $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) > $(@F)
|
||||
|
||||
osx.def : gensymbol ../Makefile.system ../getarch.c
|
||||
perl ./gensymbol osx $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) > $(@F)
|
||||
|
||||
aix.def : gensymbol ../Makefile.system ../getarch.c
|
||||
perl ./gensymbol aix $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) > $(@F)
|
||||
|
||||
symbol.S : gensymbol
|
||||
perl ./gensymbol win2kasm noarch dummy $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) > symbol.S
|
||||
|
||||
test : linktest.c
|
||||
$(CC) $(CFLAGS) $(LDFLAGS) -w -o linktest linktest.c ../$(LIBSONAME) -lm && echo OK.
|
||||
rm -f linktest
|
||||
|
||||
@@ -41,7 +41,7 @@
|
||||
void gotoblas_init(void);
|
||||
void gotoblas_quit(void);
|
||||
|
||||
BOOL APIENTRY dllinit(HINSTANCE hInst, DWORD reason, LPVOID reserved) {
|
||||
BOOL APIENTRY DllMain(HINSTANCE hInst, DWORD reason, LPVOID reserved) {
|
||||
|
||||
if (reason == DLL_PROCESS_ATTACH) {
|
||||
gotoblas_init();
|
||||
|
||||
@@ -2667,34 +2667,34 @@
|
||||
## @(MATGEN_OBJ) from `lapack-3.4.1/lapacke/src/Makefile`
|
||||
## Not exported: requires LAPACKE_TESTING to be set and depends on libtmg
|
||||
## (see `lapack-3.4.1/TESTING/MATGEN`).
|
||||
#LAPACKE_clatms,
|
||||
#LAPACKE_clatms_work,
|
||||
#LAPACKE_dlatms,
|
||||
#LAPACKE_dlatms_work,
|
||||
#LAPACKE_slatms,
|
||||
#LAPACKE_slatms_work,
|
||||
#LAPACKE_zlatms,
|
||||
#LAPACKE_zlatms_work,
|
||||
#LAPACKE_clagge,
|
||||
#LAPACKE_clagge_work,
|
||||
#LAPACKE_dlagge,
|
||||
#LAPACKE_dlagge_work,
|
||||
#LAPACKE_slagge,
|
||||
#LAPACKE_slagge_work,
|
||||
#LAPACKE_zlagge,
|
||||
#LAPACKE_zlagge_work,
|
||||
#LAPACKE_claghe,
|
||||
#LAPACKE_claghe_work,
|
||||
#LAPACKE_zlaghe,
|
||||
#LAPACKE_zlaghe_work,
|
||||
#LAPACKE_clagsy,
|
||||
#LAPACKE_clagsy_work,
|
||||
#LAPACKE_dlagsy,
|
||||
#LAPACKE_dlagsy_work,
|
||||
#LAPACKE_slagsy,
|
||||
#LAPACKE_slagsy_work,
|
||||
#LAPACKE_zlagsy,
|
||||
#LAPACKE_zlagsy_work,
|
||||
LAPACKE_clatms,
|
||||
LAPACKE_clatms_work,
|
||||
LAPACKE_dlatms,
|
||||
LAPACKE_dlatms_work,
|
||||
LAPACKE_slatms,
|
||||
LAPACKE_slatms_work,
|
||||
LAPACKE_zlatms,
|
||||
LAPACKE_zlatms_work,
|
||||
LAPACKE_clagge,
|
||||
LAPACKE_clagge_work,
|
||||
LAPACKE_dlagge,
|
||||
LAPACKE_dlagge_work,
|
||||
LAPACKE_slagge,
|
||||
LAPACKE_slagge_work,
|
||||
LAPACKE_zlagge,
|
||||
LAPACKE_zlagge_work,
|
||||
LAPACKE_claghe,
|
||||
LAPACKE_claghe_work,
|
||||
LAPACKE_zlaghe,
|
||||
LAPACKE_zlaghe_work,
|
||||
LAPACKE_clagsy,
|
||||
LAPACKE_clagsy_work,
|
||||
LAPACKE_dlagsy,
|
||||
LAPACKE_dlagsy_work,
|
||||
LAPACKE_slagsy,
|
||||
LAPACKE_slagsy_work,
|
||||
LAPACKE_zlagsy,
|
||||
LAPACKE_zlagsy_work,
|
||||
);
|
||||
|
||||
#These function may need 2 underscores.
|
||||
@@ -2725,8 +2725,7 @@ if ($ARGV[8] == 1) {
|
||||
} elsif ($ARGV[5] == 1) {
|
||||
#NO_LAPACK=1
|
||||
@underscore_objs = (@blasobjs, @misc_underscore_objs);
|
||||
} elsif (-d "../lapack-3.1.1" || -d "../lapack-3.4.0" || -d "../lapack-3.4.1" ||
|
||||
-d "../lapack-3.4.2" || -d "../lapack-netlib") {
|
||||
} elsif (-d "../lapack-netlib") {
|
||||
|
||||
if ($ARGV[7] == 0){
|
||||
# NEED2UNDERSCORES=0
|
||||
@@ -2771,14 +2770,6 @@ if ($ARGV[6] == 1) {
|
||||
@no_underscore_objs = (@no_underscore_objs, @lapackeobjs);
|
||||
}
|
||||
|
||||
@linuxobjs = ('__strtol_internal', 'exit', 'free', 'getenv', 'malloc',
|
||||
'mmap', 'printf', 'sqrt',
|
||||
'pthread_cond_broadcast', 'pthread_cond_destroy',
|
||||
'pthread_cond_init', 'pthread_cond_signal', 'pthread_cond_wait',
|
||||
'pthread_create', 'pthread_exit', 'pthread_join',
|
||||
'pthread_mutex_destroy', 'pthread_mutex_init',
|
||||
'pthread_mutex_lock', 'pthread_mutex_unlock');
|
||||
|
||||
@hplobjs = (daxpy, dcopy, dscal, idamax, dgemv, dtrsv, dger, dgemm, dtrsm);
|
||||
@hplobjs2 = (HPL_dlaswp00N, HPL_dlaswp01N, HPL_dlaswp01T);
|
||||
|
||||
@@ -2786,31 +2777,6 @@ $bu = $ARGV[2];
|
||||
|
||||
$bu = "" if (($bu eq "0") || ($bu eq "1"));
|
||||
|
||||
if ($ARGV[0] eq "linux"){
|
||||
|
||||
@underscore_objs = (@underscore_objs, @misc_common_objs);
|
||||
@no_underscore_objs = (@no_underscore_objs, @misc_common_objs);
|
||||
|
||||
foreach $objs (@underscore_objs) {
|
||||
print $objs, $bu, "\n";
|
||||
}
|
||||
|
||||
foreach $objs (@need_2underscore_objs) {
|
||||
print $objs, $bu, $bu, "\n";
|
||||
}
|
||||
|
||||
# if ($ARGV[4] == 0) {
|
||||
foreach $objs (@no_underscore_objs) {
|
||||
print $objs, "\n";
|
||||
}
|
||||
# }
|
||||
|
||||
foreach $objs (@linuxobjs) {
|
||||
print $objs, "\n";
|
||||
}
|
||||
exit(0);
|
||||
}
|
||||
|
||||
if ($ARGV[0] eq "osx"){
|
||||
|
||||
@underscore_objs = (@underscore_objs, @misc_common_objs);
|
||||
@@ -2956,32 +2922,6 @@ if ($ARGV[0] eq "microsoft"){
|
||||
exit(0);
|
||||
}
|
||||
|
||||
if ($ARGV[0] eq "win2kasm"){
|
||||
|
||||
@underscore_objs = (@underscore_objs, @misc_common_objs);
|
||||
|
||||
print "\t.text\n";
|
||||
foreach $objs (@underscore_objs) {
|
||||
$uppercase = $objs;
|
||||
$uppercase =~ tr/[a-z]/[A-Z]/;
|
||||
print "\t.align 16\n";
|
||||
print "\t.globl _", $uppercase, "_\n";
|
||||
print "_", $uppercase, "_:\n";
|
||||
print "\tjmp\t_", $objs, "_\n";
|
||||
}
|
||||
|
||||
foreach $objs (@need_2underscore_objs) {
|
||||
$uppercase = $objs;
|
||||
$uppercase =~ tr/[a-z]/[A-Z]/;
|
||||
print "\t.align 16\n";
|
||||
print "\t.globl _", $uppercase, "__\n";
|
||||
print "_", $uppercase, "__:\n";
|
||||
print "\tjmp\t_", $objs, "__\n";
|
||||
}
|
||||
|
||||
exit(0);
|
||||
}
|
||||
|
||||
if ($ARGV[0] eq "linktest"){
|
||||
|
||||
@underscore_objs = (@underscore_objs, @misc_common_objs);
|
||||
|
||||
16
getarch.c
16
getarch.c
@@ -724,6 +724,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#else
|
||||
#endif
|
||||
|
||||
#ifdef FORCE_ARMV5
|
||||
#define FORCE
|
||||
#define ARCHITECTURE "ARM"
|
||||
#define SUBARCHITECTURE "ARMV5"
|
||||
#define SUBDIRNAME "arm"
|
||||
#define ARCHCONFIG "-DARMV5 " \
|
||||
"-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=32 " \
|
||||
"-DL2_SIZE=512488 -DL2_LINESIZE=32 " \
|
||||
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=4 " \
|
||||
"-DHAVE_VFP"
|
||||
#define LIBNAME "armv5"
|
||||
#define CORENAME "ARMV5"
|
||||
#else
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef FORCE_ARMV8
|
||||
#define FORCE
|
||||
#define ARCHITECTURE "ARM64"
|
||||
|
||||
@@ -2,11 +2,11 @@ TOPDIR = ..
|
||||
include $(TOPDIR)/Makefile.system
|
||||
|
||||
ifeq ($(ARCH), x86)
|
||||
SUPPORT_GEMM3M = 1
|
||||
SUPPORT_GEMM3M = 0
|
||||
endif
|
||||
|
||||
ifeq ($(ARCH), x86_64)
|
||||
SUPPORT_GEMM3M = 1
|
||||
SUPPORT_GEMM3M = 0
|
||||
endif
|
||||
|
||||
ifeq ($(ARCH), ia64)
|
||||
@@ -342,30 +342,56 @@ CBLASOBJS = $(CBLAS1OBJS) $(CBLAS2OBJS) $(CBLAS3OBJS)
|
||||
ZBLASOBJS = $(ZBLAS1OBJS) $(ZBLAS2OBJS) $(ZBLAS3OBJS)
|
||||
XBLASOBJS = $(XBLAS1OBJS) $(XBLAS2OBJS) $(XBLAS3OBJS)
|
||||
|
||||
#SLAPACKOBJS = \
|
||||
# sgetf2.$(SUFFIX) sgetrf.$(SUFFIX) slauu2.$(SUFFIX) slauum.$(SUFFIX) \
|
||||
# spotf2.$(SUFFIX) spotrf.$(SUFFIX) strti2.$(SUFFIX) strtri.$(SUFFIX) \
|
||||
# slaswp.$(SUFFIX) sgetrs.$(SUFFIX) sgesv.$(SUFFIX) spotri.$(SUFFIX) \
|
||||
|
||||
SLAPACKOBJS = \
|
||||
sgetf2.$(SUFFIX) sgetrf.$(SUFFIX) slauu2.$(SUFFIX) slauum.$(SUFFIX) \
|
||||
spotf2.$(SUFFIX) spotrf.$(SUFFIX) strti2.$(SUFFIX) strtri.$(SUFFIX) \
|
||||
slaswp.$(SUFFIX) sgetrs.$(SUFFIX) sgesv.$(SUFFIX) spotri.$(SUFFIX) \
|
||||
sgetrf.$(SUFFIX) sgetrs.$(SUFFIX) spotrf.$(SUFFIX) sgetf2.$(SUFFIX) \
|
||||
spotf2.$(SUFFIX) slaswp.$(SUFFIX) sgesv.$(SUFFIX) slauu2.$(SUFFIX) \
|
||||
slauum.$(SUFFIX) strti2.$(SUFFIX) strtri.$(SUFFIX) spotri.$(SUFFIX)
|
||||
|
||||
|
||||
#DLAPACKOBJS = \
|
||||
# dgetf2.$(SUFFIX) dgetrf.$(SUFFIX) dlauu2.$(SUFFIX) dlauum.$(SUFFIX) \
|
||||
# dpotf2.$(SUFFIX) dpotrf.$(SUFFIX) dtrti2.$(SUFFIX) dtrtri.$(SUFFIX) \
|
||||
# dlaswp.$(SUFFIX) dgetrs.$(SUFFIX) dgesv.$(SUFFIX) dpotri.$(SUFFIX) \
|
||||
|
||||
DLAPACKOBJS = \
|
||||
dgetf2.$(SUFFIX) dgetrf.$(SUFFIX) dlauu2.$(SUFFIX) dlauum.$(SUFFIX) \
|
||||
dpotf2.$(SUFFIX) dpotrf.$(SUFFIX) dtrti2.$(SUFFIX) dtrtri.$(SUFFIX) \
|
||||
dlaswp.$(SUFFIX) dgetrs.$(SUFFIX) dgesv.$(SUFFIX) dpotri.$(SUFFIX) \
|
||||
dgetrf.$(SUFFIX) dgetrs.$(SUFFIX) dpotrf.$(SUFFIX) dgetf2.$(SUFFIX) \
|
||||
dpotf2.$(SUFFIX) dlaswp.$(SUFFIX) dgesv.$(SUFFIX) dlauu2.$(SUFFIX) \
|
||||
dlauum.$(SUFFIX) dtrti2.$(SUFFIX) dtrtri.$(SUFFIX) dpotri.$(SUFFIX)
|
||||
|
||||
|
||||
QLAPACKOBJS = \
|
||||
qgetf2.$(SUFFIX) qgetrf.$(SUFFIX) qlauu2.$(SUFFIX) qlauum.$(SUFFIX) \
|
||||
qpotf2.$(SUFFIX) qpotrf.$(SUFFIX) qtrti2.$(SUFFIX) qtrtri.$(SUFFIX) \
|
||||
qlaswp.$(SUFFIX) qgetrs.$(SUFFIX) qgesv.$(SUFFIX) qpotri.$(SUFFIX) \
|
||||
|
||||
#CLAPACKOBJS = \
|
||||
# cgetf2.$(SUFFIX) cgetrf.$(SUFFIX) clauu2.$(SUFFIX) clauum.$(SUFFIX) \
|
||||
# cpotf2.$(SUFFIX) cpotrf.$(SUFFIX) ctrti2.$(SUFFIX) ctrtri.$(SUFFIX) \
|
||||
# claswp.$(SUFFIX) cgetrs.$(SUFFIX) cgesv.$(SUFFIX) cpotri.$(SUFFIX) \
|
||||
|
||||
CLAPACKOBJS = \
|
||||
cgetf2.$(SUFFIX) cgetrf.$(SUFFIX) clauu2.$(SUFFIX) clauum.$(SUFFIX) \
|
||||
cpotf2.$(SUFFIX) cpotrf.$(SUFFIX) ctrti2.$(SUFFIX) ctrtri.$(SUFFIX) \
|
||||
claswp.$(SUFFIX) cgetrs.$(SUFFIX) cgesv.$(SUFFIX) cpotri.$(SUFFIX) \
|
||||
cgetrf.$(SUFFIX) cgetrs.$(SUFFIX) cpotrf.$(SUFFIX) cgetf2.$(SUFFIX) \
|
||||
cpotf2.$(SUFFIX) claswp.$(SUFFIX) cgesv.$(SUFFIX) clauu2.$(SUFFIX) \
|
||||
clauum.$(SUFFIX) ctrti2.$(SUFFIX) ctrtri.$(SUFFIX) cpotri.$(SUFFIX)
|
||||
|
||||
|
||||
#ZLAPACKOBJS = \
|
||||
# zgetf2.$(SUFFIX) zgetrf.$(SUFFIX) zlauu2.$(SUFFIX) zlauum.$(SUFFIX) \
|
||||
# zpotf2.$(SUFFIX) zpotrf.$(SUFFIX) ztrti2.$(SUFFIX) ztrtri.$(SUFFIX) \
|
||||
# zlaswp.$(SUFFIX) zgetrs.$(SUFFIX) zgesv.$(SUFFIX) zpotri.$(SUFFIX) \
|
||||
|
||||
ZLAPACKOBJS = \
|
||||
zgetf2.$(SUFFIX) zgetrf.$(SUFFIX) zlauu2.$(SUFFIX) zlauum.$(SUFFIX) \
|
||||
zpotf2.$(SUFFIX) zpotrf.$(SUFFIX) ztrti2.$(SUFFIX) ztrtri.$(SUFFIX) \
|
||||
zlaswp.$(SUFFIX) zgetrs.$(SUFFIX) zgesv.$(SUFFIX) zpotri.$(SUFFIX) \
|
||||
zgetrf.$(SUFFIX) zgetrs.$(SUFFIX) zpotrf.$(SUFFIX) zgetf2.$(SUFFIX) \
|
||||
zpotf2.$(SUFFIX) zlaswp.$(SUFFIX) zgesv.$(SUFFIX) zlauu2.$(SUFFIX) \
|
||||
zlauum.$(SUFFIX) ztrti2.$(SUFFIX) ztrtri.$(SUFFIX) zpotri.$(SUFFIX)
|
||||
|
||||
|
||||
|
||||
|
||||
XLAPACKOBJS = \
|
||||
xgetf2.$(SUFFIX) xgetrf.$(SUFFIX) xlauu2.$(SUFFIX) xlauum.$(SUFFIX) \
|
||||
@@ -375,10 +401,10 @@ XLAPACKOBJS = \
|
||||
ifneq ($(NO_LAPACK), 1)
|
||||
SBLASOBJS += $(SLAPACKOBJS)
|
||||
DBLASOBJS += $(DLAPACKOBJS)
|
||||
QBLASOBJS += $(QLAPACKOBJS)
|
||||
#QBLASOBJS += $(QLAPACKOBJS)
|
||||
CBLASOBJS += $(CLAPACKOBJS)
|
||||
ZBLASOBJS += $(ZLAPACKOBJS)
|
||||
XBLASOBJS += $(XLAPACKOBJS)
|
||||
#XBLASOBJS += $(XLAPACKOBJS)
|
||||
|
||||
endif
|
||||
|
||||
@@ -1731,217 +1757,217 @@ cblas_cher2k.$(SUFFIX) cblas_cher2k.$(PSUFFIX) : syr2k.c
|
||||
cblas_zher2k.$(SUFFIX) cblas_zher2k.$(PSUFFIX) : syr2k.c
|
||||
$(CC) -DCBLAS -c $(CFLAGS) -DHEMM $< -o $(@F)
|
||||
|
||||
sgetf2.$(SUFFIX) sgetf2.$(PSUFFIX) : getf2.c
|
||||
sgetf2.$(SUFFIX) sgetf2.$(PSUFFIX) : lapack/getf2.c
|
||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||
|
||||
dgetf2.$(SUFFIX) dgetf2.$(PSUFFIX) : getf2.c
|
||||
dgetf2.$(SUFFIX) dgetf2.$(PSUFFIX) : lapack/getf2.c
|
||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||
|
||||
qgetf2.$(SUFFIX) qgetf2.$(PSUFFIX) : getf2.c
|
||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||
|
||||
cgetf2.$(SUFFIX) cgetf2.$(PSUFFIX) : zgetf2.c
|
||||
cgetf2.$(SUFFIX) cgetf2.$(PSUFFIX) : lapack/zgetf2.c
|
||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||
|
||||
zgetf2.$(SUFFIX) zgetf2.$(PSUFFIX) : zgetf2.c
|
||||
zgetf2.$(SUFFIX) zgetf2.$(PSUFFIX) : lapack/zgetf2.c
|
||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||
|
||||
xgetf2.$(SUFFIX) xgetf2.$(PSUFFIX) : zgetf2.c
|
||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||
|
||||
sgetrf.$(SUFFIX) sgetrf.$(PSUFFIX) : getrf.c
|
||||
sgetrf.$(SUFFIX) sgetrf.$(PSUFFIX) : lapack/getrf.c
|
||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||
|
||||
dgetrf.$(SUFFIX) dgetrf.$(PSUFFIX) : getrf.c
|
||||
dgetrf.$(SUFFIX) dgetrf.$(PSUFFIX) : lapack/getrf.c
|
||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||
|
||||
qgetrf.$(SUFFIX) qgetrf.$(PSUFFIX) : getrf.c
|
||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||
|
||||
cgetrf.$(SUFFIX) cgetrf.$(PSUFFIX) : zgetrf.c
|
||||
cgetrf.$(SUFFIX) cgetrf.$(PSUFFIX) : lapack/zgetrf.c
|
||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||
|
||||
zgetrf.$(SUFFIX) zgetrf.$(PSUFFIX) : zgetrf.c
|
||||
zgetrf.$(SUFFIX) zgetrf.$(PSUFFIX) : lapack/zgetrf.c
|
||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||
|
||||
xgetrf.$(SUFFIX) xgetrf.$(PSUFFIX) : zgetrf.c
|
||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||
|
||||
slauu2.$(SUFFIX) slauu2.$(PSUFFIX) : lauu2.c
|
||||
slauu2.$(SUFFIX) slauu2.$(PSUFFIX) : lapack/lauu2.c
|
||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||
|
||||
dlauu2.$(SUFFIX) dlauu2.$(PSUFFIX) : lauu2.c
|
||||
dlauu2.$(SUFFIX) dlauu2.$(PSUFFIX) : lapack/lauu2.c
|
||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||
|
||||
qlauu2.$(SUFFIX) qlauu2.$(PSUFFIX) : lauu2.c
|
||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||
|
||||
clauu2.$(SUFFIX) clauu2.$(PSUFFIX) : zlauu2.c
|
||||
clauu2.$(SUFFIX) clauu2.$(PSUFFIX) : lapack/zlauu2.c
|
||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||
|
||||
zlauu2.$(SUFFIX) zlauu2.$(PSUFFIX) : zlauu2.c
|
||||
zlauu2.$(SUFFIX) zlauu2.$(PSUFFIX) : lapack/zlauu2.c
|
||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||
|
||||
xlauu2.$(SUFFIX) xlauu2.$(PSUFFIX) : zlauu2.c
|
||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||
|
||||
slauum.$(SUFFIX) slauum.$(PSUFFIX) : lauum.c
|
||||
slauum.$(SUFFIX) slauum.$(PSUFFIX) : lapack/lauum.c
|
||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||
|
||||
dlauum.$(SUFFIX) dlauum.$(PSUFFIX) : lauum.c
|
||||
dlauum.$(SUFFIX) dlauum.$(PSUFFIX) : lapack/lauum.c
|
||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||
|
||||
qlauum.$(SUFFIX) qlauum.$(PSUFFIX) : lauum.c
|
||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||
|
||||
clauum.$(SUFFIX) clauum.$(PSUFFIX) : zlauum.c
|
||||
clauum.$(SUFFIX) clauum.$(PSUFFIX) : lapack/zlauum.c
|
||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||
|
||||
zlauum.$(SUFFIX) zlauum.$(PSUFFIX) : zlauum.c
|
||||
zlauum.$(SUFFIX) zlauum.$(PSUFFIX) : lapack/zlauum.c
|
||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||
|
||||
xlauum.$(SUFFIX) xlauum.$(PSUFFIX) : zlauum.c
|
||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||
|
||||
spotf2.$(SUFFIX) spotf2.$(PSUFFIX) : potf2.c
|
||||
spotf2.$(SUFFIX) spotf2.$(PSUFFIX) : lapack/potf2.c
|
||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||
|
||||
dpotf2.$(SUFFIX) dpotf2.$(PSUFFIX) : potf2.c
|
||||
dpotf2.$(SUFFIX) dpotf2.$(PSUFFIX) : lapack/potf2.c
|
||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||
|
||||
qpotf2.$(SUFFIX) qpotf2.$(PSUFFIX) : potf2.c
|
||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||
|
||||
cpotf2.$(SUFFIX) cpotf2.$(PSUFFIX) : zpotf2.c
|
||||
cpotf2.$(SUFFIX) cpotf2.$(PSUFFIX) : lapack/zpotf2.c
|
||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||
|
||||
zpotf2.$(SUFFIX) zpotf2.$(PSUFFIX) : zpotf2.c
|
||||
zpotf2.$(SUFFIX) zpotf2.$(PSUFFIX) : lapack/zpotf2.c
|
||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||
|
||||
xpotf2.$(SUFFIX) xpotf2.$(PSUFFIX) : zpotf2.c
|
||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||
|
||||
spotrf.$(SUFFIX) spotrf.$(PSUFFIX) : potrf.c
|
||||
spotrf.$(SUFFIX) spotrf.$(PSUFFIX) : lapack/potrf.c
|
||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||
|
||||
dpotrf.$(SUFFIX) dpotrf.$(PSUFFIX) : potrf.c
|
||||
dpotrf.$(SUFFIX) dpotrf.$(PSUFFIX) : lapack/potrf.c
|
||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||
|
||||
qpotrf.$(SUFFIX) qpotrf.$(PSUFFIX) : potrf.c
|
||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||
|
||||
cpotrf.$(SUFFIX) cpotrf.$(PSUFFIX) : zpotrf.c
|
||||
cpotrf.$(SUFFIX) cpotrf.$(PSUFFIX) : lapack/zpotrf.c
|
||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||
|
||||
zpotrf.$(SUFFIX) zpotrf.$(PSUFFIX) : zpotrf.c
|
||||
zpotrf.$(SUFFIX) zpotrf.$(PSUFFIX) : lapack/zpotrf.c
|
||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||
|
||||
xpotrf.$(SUFFIX) xpotrf.$(PSUFFIX) : zpotrf.c
|
||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||
|
||||
strti2.$(SUFFIX) strti2.$(PSUFFIX) : trti2.c
|
||||
strti2.$(SUFFIX) strti2.$(PSUFFIX) : lapack/trti2.c
|
||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||
|
||||
dtrti2.$(SUFFIX) dtrti2.$(PSUFFIX) : trti2.c
|
||||
dtrti2.$(SUFFIX) dtrti2.$(PSUFFIX) : lapack/trti2.c
|
||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||
|
||||
qtrti2.$(SUFFIX) qtrti2.$(PSUFFIX) : trti2.c
|
||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||
|
||||
ctrti2.$(SUFFIX) ctrti2.$(PSUFFIX) : ztrti2.c
|
||||
ctrti2.$(SUFFIX) ctrti2.$(PSUFFIX) : lapack/ztrti2.c
|
||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||
|
||||
ztrti2.$(SUFFIX) ztrti2.$(PSUFFIX) : ztrti2.c
|
||||
ztrti2.$(SUFFIX) ztrti2.$(PSUFFIX) : lapack/ztrti2.c
|
||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||
|
||||
xtrti2.$(SUFFIX) xtrti2.$(PSUFFIX) : ztrti2.c
|
||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||
|
||||
strtri.$(SUFFIX) strtri.$(PSUFFIX) : trtri.c
|
||||
strtri.$(SUFFIX) strtri.$(PSUFFIX) : lapack/trtri.c
|
||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||
|
||||
dtrtri.$(SUFFIX) dtrtri.$(PSUFFIX) : trtri.c
|
||||
dtrtri.$(SUFFIX) dtrtri.$(PSUFFIX) : lapack/trtri.c
|
||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||
|
||||
qtrtri.$(SUFFIX) qtrtri.$(PSUFFIX) : trtri.c
|
||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||
|
||||
ctrtri.$(SUFFIX) ctrtri.$(PSUFFIX) : ztrtri.c
|
||||
ctrtri.$(SUFFIX) ctrtri.$(PSUFFIX) : lapack/ztrtri.c
|
||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||
|
||||
ztrtri.$(SUFFIX) ztrtri.$(PSUFFIX) : ztrtri.c
|
||||
ztrtri.$(SUFFIX) ztrtri.$(PSUFFIX) : lapack/ztrtri.c
|
||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||
|
||||
xtrtri.$(SUFFIX) xtrtri.$(PSUFFIX) : ztrtri.c
|
||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||
|
||||
slaswp.$(SUFFIX) slaswp.$(PSUFFIX) : laswp.c
|
||||
slaswp.$(SUFFIX) slaswp.$(PSUFFIX) : lapack/laswp.c
|
||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||
|
||||
dlaswp.$(SUFFIX) dlaswp.$(PSUFFIX) : laswp.c
|
||||
dlaswp.$(SUFFIX) dlaswp.$(PSUFFIX) : lapack/laswp.c
|
||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||
|
||||
qlaswp.$(SUFFIX) qlaswp.$(PSUFFIX) : laswp.c
|
||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||
|
||||
claswp.$(SUFFIX) claswp.$(PSUFFIX) : zlaswp.c
|
||||
claswp.$(SUFFIX) claswp.$(PSUFFIX) : lapack/zlaswp.c
|
||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||
|
||||
zlaswp.$(SUFFIX) zlaswp.$(PSUFFIX) : zlaswp.c
|
||||
zlaswp.$(SUFFIX) zlaswp.$(PSUFFIX) : lapack/zlaswp.c
|
||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||
|
||||
xlaswp.$(SUFFIX) xlaswp.$(PSUFFIX) : zlaswp.c
|
||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||
|
||||
sgetrs.$(SUFFIX) sgetrs.$(PSUFFIX) : getrs.c
|
||||
sgetrs.$(SUFFIX) sgetrs.$(PSUFFIX) : lapack/getrs.c
|
||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||
|
||||
dgetrs.$(SUFFIX) dgetrs.$(PSUFFIX) : getrs.c
|
||||
dgetrs.$(SUFFIX) dgetrs.$(PSUFFIX) : lapack/getrs.c
|
||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||
|
||||
qgetrs.$(SUFFIX) qgetrs.$(PSUFFIX) : getrs.c
|
||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||
|
||||
cgetrs.$(SUFFIX) cgetrs.$(PSUFFIX) : zgetrs.c
|
||||
cgetrs.$(SUFFIX) cgetrs.$(PSUFFIX) : lapack/zgetrs.c
|
||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||
|
||||
zgetrs.$(SUFFIX) zgetrs.$(PSUFFIX) : zgetrs.c
|
||||
zgetrs.$(SUFFIX) zgetrs.$(PSUFFIX) : lapack/zgetrs.c
|
||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||
|
||||
xgetrs.$(SUFFIX) xgetrs.$(PSUFFIX) : zgetrs.c
|
||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||
|
||||
sgesv.$(SUFFIX) sgesv.$(PSUFFIX) : gesv.c
|
||||
sgesv.$(SUFFIX) sgesv.$(PSUFFIX) : lapack/gesv.c
|
||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||
|
||||
dgesv.$(SUFFIX) dgesv.$(PSUFFIX) : gesv.c
|
||||
dgesv.$(SUFFIX) dgesv.$(PSUFFIX) : lapack/gesv.c
|
||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||
|
||||
qgesv.$(SUFFIX) qgesv.$(PSUFFIX) : gesv.c
|
||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||
|
||||
cgesv.$(SUFFIX) cgesv.$(PSUFFIX) : gesv.c
|
||||
cgesv.$(SUFFIX) cgesv.$(PSUFFIX) : lapack/gesv.c
|
||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||
|
||||
zgesv.$(SUFFIX) zgesv.$(PSUFFIX) : gesv.c
|
||||
zgesv.$(SUFFIX) zgesv.$(PSUFFIX) : lapack/gesv.c
|
||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||
|
||||
xgesv.$(SUFFIX) xgesv.$(PSUFFIX) : gesv.c
|
||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||
|
||||
spotri.$(SUFFIX) spotri.$(PSUFFIX) : potri.c
|
||||
spotri.$(SUFFIX) spotri.$(PSUFFIX) : lapack/potri.c
|
||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||
|
||||
dpotri.$(SUFFIX) dpotri.$(PSUFFIX) : potri.c
|
||||
dpotri.$(SUFFIX) dpotri.$(PSUFFIX) : lapack/potri.c
|
||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||
|
||||
qpotri.$(SUFFIX) qpotri.$(PSUFFIX) : potri.c
|
||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||
|
||||
cpotri.$(SUFFIX) cpotri.$(PSUFFIX) : zpotri.c
|
||||
cpotri.$(SUFFIX) cpotri.$(PSUFFIX) : lapack/zpotri.c
|
||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||
|
||||
zpotri.$(SUFFIX) zpotri.$(PSUFFIX) : zpotri.c
|
||||
zpotri.$(SUFFIX) zpotri.$(PSUFFIX) : lapack/zpotri.c
|
||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||
|
||||
xpotri.$(SUFFIX) xpotri.$(PSUFFIX) : zpotri.c
|
||||
|
||||
@@ -86,7 +86,8 @@ void CNAME(blasint n, FLOAT alpha, FLOAT *x, blasint incx, FLOAT *y, blasint inc
|
||||
if (incx == 0 || incy == 0)
|
||||
nthreads = 1;
|
||||
|
||||
//Temporarily walk around the low performance issue with small imput size & multithreads.
|
||||
//Temporarily work-around the low performance issue with small imput size &
|
||||
//multithreads.
|
||||
if (n <= 10000)
|
||||
nthreads = 1;
|
||||
|
||||
|
||||
@@ -149,7 +149,10 @@ int NAME(char *UPLO, blasint *N, FLOAT *a, blasint *ldA, blasint *Info){
|
||||
blas_memory_free(buffer);
|
||||
#endif
|
||||
|
||||
FUNCTION_PROFILE_END(COMPSIZE * COMPSIZE, args.m * args.n, 2. / 3. * args.m * args.n * args.n);
|
||||
FUNCTION_PROFILE_END(COMPSIZE * COMPSIZE, .5 * args.n * args.n,
|
||||
args.n * (1./3. + args.n * ( 1./2. + args.n * 1./6.))
|
||||
+ args.n * (1./3. + args.n * (-1./2. + args.n * 1./6.)));
|
||||
|
||||
|
||||
IDEBUG_END;
|
||||
|
||||
@@ -1,3 +1,38 @@
|
||||
/***************************************************************************
|
||||
Copyright (c) 2013, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
/**************************************************************************************
|
||||
* 2014/05/02 Saar
|
||||
* fixed two bugs as reported by Brendan Tracey
|
||||
* Test with lapack-3.5.0 : OK
|
||||
*
|
||||
**************************************************************************************/
|
||||
|
||||
|
||||
#include "common.h"
|
||||
#ifdef FUNCTION_PROFILE
|
||||
#include "functable.h"
|
||||
@@ -7,6 +42,8 @@
|
||||
#define GAMSQ 16777216.e0
|
||||
#define RGAMSQ 5.9604645e-8
|
||||
|
||||
#define TWO 2.e0
|
||||
|
||||
#ifdef DOUBLE
|
||||
#define ABS(x) fabs(x)
|
||||
#else
|
||||
@@ -25,181 +62,174 @@ void CNAME(FLOAT *dd1, FLOAT *dd2, FLOAT *dx1, FLOAT dy1, FLOAT *dparam){
|
||||
|
||||
#endif
|
||||
|
||||
FLOAT du, dp1, dp2, dq2, dq1, dh11, dh21, dh12, dh22;
|
||||
int igo, flag;
|
||||
FLOAT dtemp;
|
||||
FLOAT du, dp1, dp2, dq2, dq1, dh11, dh21, dh12, dh22, dflag, dtemp;
|
||||
|
||||
#ifndef CBLAS
|
||||
PRINT_DEBUG_NAME;
|
||||
#else
|
||||
PRINT_DEBUG_CNAME;
|
||||
#endif
|
||||
if(*dd1 < ZERO)
|
||||
{
|
||||
dflag = -ONE;
|
||||
dh11 = ZERO;
|
||||
dh12 = ZERO;
|
||||
dh21 = ZERO;
|
||||
dh22 = ZERO;
|
||||
|
||||
dh11 = ZERO;
|
||||
dh12 = ZERO;
|
||||
dh21 = ZERO;
|
||||
dh22 = ZERO;
|
||||
*dd1 = ZERO;
|
||||
*dd2 = ZERO;
|
||||
*dx1 = ZERO;
|
||||
}
|
||||
else
|
||||
{
|
||||
dp2 = *dd2 * dy1;
|
||||
if(dp2 == ZERO)
|
||||
{
|
||||
dflag = -TWO;
|
||||
dparam[0] = dflag;
|
||||
return;
|
||||
}
|
||||
dp1 = *dd1 * *dx1;
|
||||
dq2 = dp2 * dy1;
|
||||
dq1 = dp1 * *dx1;
|
||||
if(ABS(dq1) > ABS(dq2))
|
||||
{
|
||||
dh21 = - dy1 / *dx1;
|
||||
dh12 = dp2 / dp1;
|
||||
|
||||
if (*dd1 < ZERO) goto L60;
|
||||
du = ONE - dh12 * dh21;
|
||||
if(du > ZERO)
|
||||
{
|
||||
dflag = ZERO;
|
||||
*dd1 = *dd1 / du;
|
||||
*dd2 = *dd2 / du;
|
||||
*dx1 = *dx1 * du;
|
||||
|
||||
dp2 = *dd2 * dy1;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if(dq2 < ZERO)
|
||||
{
|
||||
dflag = -ONE;
|
||||
|
||||
dh11 = ZERO;
|
||||
dh12 = ZERO;
|
||||
dh21 = ZERO;
|
||||
dh22 = ZERO;
|
||||
|
||||
if (dp2 == ZERO) {
|
||||
flag = -2;
|
||||
goto L260;
|
||||
}
|
||||
*dd1 = ZERO;
|
||||
*dd2 = ZERO;
|
||||
*dx1 = ZERO;
|
||||
}
|
||||
else
|
||||
{
|
||||
dflag = ONE;
|
||||
|
||||
dp1 = *dd1 * *dx1;
|
||||
dq2 = dp2 * dy1;
|
||||
dq1 = dp1 * *dx1;
|
||||
dh11 = dp1 / dp2;
|
||||
dh22 = *dx1 / dy1;
|
||||
du = ONE + dh11 * dh22;
|
||||
dtemp = *dd2 / du;
|
||||
|
||||
if (! (ABS(dq1) > ABS(dq2))) goto L40;
|
||||
|
||||
dh21 = -(dy1) / *dx1;
|
||||
dh12 = dp2 / dp1;
|
||||
|
||||
du = ONE - dh12 * dh21;
|
||||
|
||||
if (du <= ZERO) goto L60;
|
||||
|
||||
flag = 0;
|
||||
*dd1 /= du;
|
||||
*dd2 /= du;
|
||||
*dx1 *= du;
|
||||
|
||||
goto L100;
|
||||
|
||||
L40:
|
||||
if (dq2 < ZERO) goto L60;
|
||||
|
||||
flag = 1;
|
||||
dh11 = dp1 / dp2;
|
||||
dh22 = *dx1 / dy1;
|
||||
du = ONE + dh11 * dh22;
|
||||
dtemp = *dd2 / du;
|
||||
*dd2 = *dd1 / du;
|
||||
*dd1 = dtemp;
|
||||
*dx1 = dy1 * du;
|
||||
goto L100;
|
||||
|
||||
L60:
|
||||
flag = -1;
|
||||
dh11 = ZERO;
|
||||
dh12 = ZERO;
|
||||
dh21 = ZERO;
|
||||
dh22 = ZERO;
|
||||
|
||||
*dd1 = ZERO;
|
||||
*dd2 = ZERO;
|
||||
*dx1 = ZERO;
|
||||
goto L220;
|
||||
*dd2 = *dd1 / du;
|
||||
*dd1 = dtemp;
|
||||
*dx1 = dy1 * du;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
L70:
|
||||
if (flag < 0) goto L90;
|
||||
|
||||
if (flag > 0) goto L80;
|
||||
|
||||
dh11 = ONE;
|
||||
dh22 = ONE;
|
||||
flag = -1;
|
||||
goto L90;
|
||||
if(*dd1 != ZERO)
|
||||
{
|
||||
while( (*dd1 <= RGAMSQ) || (*dd1 >= GAMSQ) )
|
||||
{
|
||||
if(dflag == ZERO)
|
||||
{
|
||||
dh11 = ONE;
|
||||
dh22 = ONE;
|
||||
dflag = -ONE;
|
||||
}
|
||||
else
|
||||
{
|
||||
if(dflag == ONE)
|
||||
{
|
||||
dh21 = -ONE;
|
||||
dh12 = ONE;
|
||||
dflag = -ONE;
|
||||
}
|
||||
}
|
||||
if( *dd1 <= RGAMSQ )
|
||||
{
|
||||
*dd1 = *dd1 * (GAM * GAM);
|
||||
*dx1 = *dx1 / GAM;
|
||||
dh11 = dh11 / GAM;
|
||||
dh12 = dh12 / GAM;
|
||||
}
|
||||
else
|
||||
{
|
||||
*dd1 = *dd1 / (GAM * GAM);
|
||||
*dx1 = *dx1 * GAM;
|
||||
dh11 = dh11 * GAM;
|
||||
dh12 = dh12 * GAM;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if(*dd2 != ZERO)
|
||||
{
|
||||
while( (ABS(*dd2) <= RGAMSQ) || (ABS(*dd2) >= GAMSQ) )
|
||||
{
|
||||
if(dflag == ZERO)
|
||||
{
|
||||
dh11 = ONE;
|
||||
dh22 = ONE;
|
||||
dflag = -ONE;
|
||||
}
|
||||
else
|
||||
{
|
||||
if(dflag == ONE)
|
||||
{
|
||||
dh21 = -ONE;
|
||||
dh12 = ONE;
|
||||
dflag = -ONE;
|
||||
}
|
||||
}
|
||||
if( ABS(*dd2) <= RGAMSQ )
|
||||
{
|
||||
*dd2 = *dd2 * (GAM * GAM);
|
||||
dh21 = dh21 / GAM;
|
||||
dh22 = dh22 / GAM;
|
||||
}
|
||||
else
|
||||
{
|
||||
*dd2 = *dd2 / (GAM * GAM);
|
||||
dh21 = dh21 * GAM;
|
||||
dh22 = dh22 * GAM;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
L80:
|
||||
dh21 = -ONE;
|
||||
dh12 = ONE;
|
||||
flag = -1;
|
||||
if(dflag < ZERO)
|
||||
{
|
||||
dparam[1] = dh11;
|
||||
dparam[2] = dh21;
|
||||
dparam[3] = dh12;
|
||||
dparam[4] = dh22;
|
||||
}
|
||||
else
|
||||
{
|
||||
if(dflag == ZERO)
|
||||
{
|
||||
dparam[2] = dh21;
|
||||
dparam[3] = dh12;
|
||||
}
|
||||
else
|
||||
{
|
||||
dparam[1] = dh11;
|
||||
dparam[4] = dh22;
|
||||
}
|
||||
}
|
||||
|
||||
L90:
|
||||
switch (igo) {
|
||||
case 0: goto L120;
|
||||
case 1: goto L150;
|
||||
case 2: goto L180;
|
||||
case 3: goto L210;
|
||||
}
|
||||
|
||||
L100:
|
||||
if (!(*dd1 <= RGAMSQ)) goto L130;
|
||||
if (*dd1 == ZERO) goto L160;
|
||||
igo = 0;
|
||||
goto L70;
|
||||
|
||||
L120:
|
||||
*dd1 *= GAM * GAM;
|
||||
*dx1 /= GAM;
|
||||
dh11 /= GAM;
|
||||
dh12 /= GAM;
|
||||
goto L100;
|
||||
|
||||
L130:
|
||||
if (! (*dd1 >= GAMSQ)) {
|
||||
goto L160;
|
||||
}
|
||||
igo = 1;
|
||||
goto L70;
|
||||
|
||||
L150:
|
||||
*dd1 /= GAM * GAM;
|
||||
*dx1 *= GAM;
|
||||
dh11 *= GAM;
|
||||
dh12 *= GAM;
|
||||
goto L130;
|
||||
|
||||
L160:
|
||||
if (! (ABS(*dd2) <= RGAMSQ)) {
|
||||
goto L190;
|
||||
}
|
||||
if (*dd2 == ZERO) {
|
||||
goto L220;
|
||||
}
|
||||
igo = 2;
|
||||
goto L70;
|
||||
|
||||
L180:
|
||||
/* Computing 2nd power */
|
||||
*dd2 *= GAM * GAM;
|
||||
dh21 /= GAM;
|
||||
dh22 /= GAM;
|
||||
goto L160;
|
||||
|
||||
L190:
|
||||
if (! (ABS(*dd2) >= GAMSQ)) {
|
||||
goto L220;
|
||||
}
|
||||
igo = 3;
|
||||
goto L70;
|
||||
|
||||
L210:
|
||||
/* Computing 2nd power */
|
||||
*dd2 /= GAM * GAM;
|
||||
dh21 *= GAM;
|
||||
dh22 *= GAM;
|
||||
goto L190;
|
||||
|
||||
L220:
|
||||
if (flag < 0) {
|
||||
goto L250;
|
||||
} else if (flag == 0) {
|
||||
goto L230;
|
||||
} else {
|
||||
goto L240;
|
||||
}
|
||||
L230:
|
||||
dparam[2] = dh21;
|
||||
dparam[3] = dh12;
|
||||
goto L260;
|
||||
L240:
|
||||
dparam[2] = dh11;
|
||||
dparam[4] = dh22;
|
||||
goto L260;
|
||||
L250:
|
||||
dparam[1] = dh11;
|
||||
dparam[2] = dh21;
|
||||
dparam[3] = dh12;
|
||||
dparam[4] = dh22;
|
||||
L260:
|
||||
dparam[0] = (FLOAT) flag;
|
||||
return;
|
||||
dparam[0] = dflag;
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -61,7 +61,7 @@ static int (*sbmv[])(BLASLONG, BLASLONG, FLOAT, FLOAT *, BLASLONG, FLOAT *, BLA
|
||||
#endif
|
||||
};
|
||||
|
||||
#ifdef SMP
|
||||
#ifdef SMPBUG
|
||||
static int (*sbmv_thread[])(BLASLONG, BLASLONG, FLOAT, FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *, int) = {
|
||||
#ifdef XDOUBLE
|
||||
qsbmv_thread_U, qsbmv_thread_L,
|
||||
@@ -90,7 +90,7 @@ void NAME(char *UPLO, blasint *N, blasint *K, FLOAT *ALPHA, FLOAT *a, blasint *
|
||||
blasint info;
|
||||
int uplo;
|
||||
FLOAT *buffer;
|
||||
#ifdef SMP
|
||||
#ifdef SMPBUG
|
||||
int nthreads;
|
||||
#endif
|
||||
|
||||
@@ -130,7 +130,7 @@ void CNAME(enum CBLAS_ORDER order,
|
||||
FLOAT *buffer;
|
||||
int uplo;
|
||||
blasint info;
|
||||
#ifdef SMP
|
||||
#ifdef SMPBUG
|
||||
int nthreads;
|
||||
#endif
|
||||
|
||||
@@ -189,7 +189,7 @@ void CNAME(enum CBLAS_ORDER order,
|
||||
|
||||
buffer = (FLOAT *)blas_memory_alloc(1);
|
||||
|
||||
#ifdef SMP
|
||||
#ifdef SMPBUG
|
||||
nthreads = num_cpu_avail(2);
|
||||
|
||||
if (nthreads == 1) {
|
||||
@@ -197,7 +197,7 @@ void CNAME(enum CBLAS_ORDER order,
|
||||
|
||||
(sbmv[uplo])(n, k, alpha, a, lda, x, incx, y, incy, buffer);
|
||||
|
||||
#ifdef SMP
|
||||
#ifdef SMPBUG
|
||||
} else {
|
||||
|
||||
(sbmv_thread[uplo])(n, k, alpha, a, lda, x, incx, y, incy, buffer, nthreads);
|
||||
|
||||
@@ -52,8 +52,8 @@ FLOATRET NAME(blasint *N, FLOAT *a, FLOAT *x, blasint *INCX, FLOAT *y, blasint *
|
||||
FLOATRET ret;
|
||||
|
||||
PRINT_DEBUG_NAME;
|
||||
|
||||
if (n <= 0) return 0.;
|
||||
|
||||
if (n <= 0) return(*a) ;
|
||||
|
||||
IDEBUG_START;
|
||||
|
||||
@@ -80,7 +80,7 @@ FLOAT CNAME(blasint n, FLOAT alpha, FLOAT *x, blasint incx, FLOAT *y, blasint in
|
||||
|
||||
PRINT_DEBUG_CNAME;
|
||||
|
||||
if (n <= 0) return 0.;
|
||||
if (n <= 0) return (alpha);
|
||||
|
||||
IDEBUG_START;
|
||||
|
||||
|
||||
@@ -61,7 +61,7 @@ static int (*spmv[])(BLASLONG, FLOAT, FLOAT *, FLOAT *, BLASLONG, FLOAT *, BLAS
|
||||
#endif
|
||||
};
|
||||
|
||||
#ifdef SMP
|
||||
#ifdef SMPTEST
|
||||
static int (*spmv_thread[])(BLASLONG, FLOAT, FLOAT *, FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *, int) = {
|
||||
#ifdef XDOUBLE
|
||||
qspmv_thread_U, qspmv_thread_L,
|
||||
@@ -88,7 +88,7 @@ void NAME(char *UPLO, blasint *N, FLOAT *ALPHA, FLOAT *a,
|
||||
blasint info;
|
||||
int uplo;
|
||||
FLOAT *buffer;
|
||||
#ifdef SMP
|
||||
#ifdef SMPTEST
|
||||
int nthreads;
|
||||
#endif
|
||||
|
||||
@@ -126,7 +126,7 @@ void CNAME(enum CBLAS_ORDER order,
|
||||
FLOAT *buffer;
|
||||
int uplo;
|
||||
blasint info;
|
||||
#ifdef SMP
|
||||
#ifdef SMPTEST
|
||||
int nthreads;
|
||||
#endif
|
||||
|
||||
@@ -181,7 +181,7 @@ void CNAME(enum CBLAS_ORDER order,
|
||||
|
||||
buffer = (FLOAT *)blas_memory_alloc(1);
|
||||
|
||||
#ifdef SMP
|
||||
#ifdef SMPTEST
|
||||
nthreads = num_cpu_avail(2);
|
||||
|
||||
if (nthreads == 1) {
|
||||
@@ -189,7 +189,7 @@ void CNAME(enum CBLAS_ORDER order,
|
||||
|
||||
(spmv[uplo])(n, alpha, a, x, incx, y, incy, buffer);
|
||||
|
||||
#ifdef SMP
|
||||
#ifdef SMPTEST
|
||||
} else {
|
||||
|
||||
(spmv_thread[uplo])(n, alpha, a, x, incx, y, incy, buffer, nthreads);
|
||||
|
||||
@@ -145,10 +145,21 @@ void NAME(char *UPLO, char *TRANS,
|
||||
if (uplo_arg == 'U') uplo = 0;
|
||||
if (uplo_arg == 'L') uplo = 1;
|
||||
|
||||
#ifndef COMPLEX
|
||||
if (trans_arg == 'N') trans = 0;
|
||||
if (trans_arg == 'T') trans = 1;
|
||||
if (trans_arg == 'R') trans = 0;
|
||||
if (trans_arg == 'C') trans = 1;
|
||||
#else
|
||||
#ifdef HEMM
|
||||
if (trans_arg == 'N') trans = 0;
|
||||
if (trans_arg == 'C') trans = 1;
|
||||
#else
|
||||
if (trans_arg == 'N') trans = 0;
|
||||
if (trans_arg == 'T') trans = 1;
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
nrowa = args.n;
|
||||
if (trans & 1) nrowa = args.k;
|
||||
|
||||
@@ -148,10 +148,21 @@ void NAME(char *UPLO, char *TRANS,
|
||||
if (uplo_arg == 'U') uplo = 0;
|
||||
if (uplo_arg == 'L') uplo = 1;
|
||||
|
||||
|
||||
#ifndef COMPLEX
|
||||
if (trans_arg == 'N') trans = 0;
|
||||
if (trans_arg == 'T') trans = 1;
|
||||
if (trans_arg == 'R') trans = 0;
|
||||
if (trans_arg == 'C') trans = 1;
|
||||
#else
|
||||
#ifdef HEMM
|
||||
if (trans_arg == 'N') trans = 0;
|
||||
if (trans_arg == 'C') trans = 1;
|
||||
#else
|
||||
if (trans_arg == 'N') trans = 0;
|
||||
if (trans_arg == 'T') trans = 1;
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
nrowa = args.n;
|
||||
if (trans & 1) nrowa = args.k;
|
||||
|
||||
@@ -61,7 +61,7 @@ static int (*hbmv[])(BLASLONG, BLASLONG, FLOAT, FLOAT, FLOAT *, BLASLONG, FLOAT
|
||||
#endif
|
||||
};
|
||||
|
||||
#ifdef SMP
|
||||
#ifdef SMPBUG
|
||||
static int (*hbmv_thread[])(BLASLONG, BLASLONG, FLOAT *, FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *, int) = {
|
||||
#ifdef XDOUBLE
|
||||
xhbmv_thread_U, xhbmv_thread_L, xhbmv_thread_V, xhbmv_thread_M,
|
||||
@@ -92,7 +92,7 @@ void NAME(char *UPLO, blasint *N, blasint *K, FLOAT *ALPHA, FLOAT *a, blasint *
|
||||
blasint info;
|
||||
int uplo;
|
||||
FLOAT *buffer;
|
||||
#ifdef SMP
|
||||
#ifdef SMPBUG
|
||||
int nthreads;
|
||||
#endif
|
||||
|
||||
@@ -138,7 +138,7 @@ void CNAME(enum CBLAS_ORDER order,
|
||||
FLOAT *buffer;
|
||||
int uplo;
|
||||
blasint info;
|
||||
#ifdef SMP
|
||||
#ifdef SMPBUG
|
||||
int nthreads;
|
||||
#endif
|
||||
|
||||
@@ -197,7 +197,7 @@ void CNAME(enum CBLAS_ORDER order,
|
||||
|
||||
buffer = (FLOAT *)blas_memory_alloc(1);
|
||||
|
||||
#ifdef SMP
|
||||
#ifdef SMPBUG
|
||||
nthreads = num_cpu_avail(2);
|
||||
|
||||
if (nthreads == 1) {
|
||||
@@ -205,7 +205,7 @@ void CNAME(enum CBLAS_ORDER order,
|
||||
|
||||
(hbmv[uplo])(n, k, alpha_r, alpha_i, a, lda, x, incx, y, incy, buffer);
|
||||
|
||||
#ifdef SMP
|
||||
#ifdef SMPBUG
|
||||
} else {
|
||||
|
||||
(hbmv_thread[uplo])(n, k, ALPHA, a, lda, x, incx, y, incy, buffer, nthreads);
|
||||
|
||||
@@ -61,7 +61,7 @@ static int (*sbmv[])(BLASLONG, BLASLONG, FLOAT, FLOAT, FLOAT *, BLASLONG, FLOAT
|
||||
#endif
|
||||
};
|
||||
|
||||
#ifdef SMP
|
||||
#ifdef SMPBUG
|
||||
static int (*sbmv_thread[])(BLASLONG, BLASLONG, FLOAT *, FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *, int) = {
|
||||
#ifdef XDOUBLE
|
||||
xsbmv_thread_U, xsbmv_thread_L,
|
||||
@@ -90,7 +90,7 @@ void NAME(char *UPLO, blasint *N, blasint *K, FLOAT *ALPHA, FLOAT *a, blasint *
|
||||
blasint info;
|
||||
int uplo;
|
||||
FLOAT *buffer;
|
||||
#ifdef SMP
|
||||
#ifdef SMPBUG
|
||||
int nthreads;
|
||||
#endif
|
||||
|
||||
@@ -131,7 +131,7 @@ void NAME(char *UPLO, blasint *N, blasint *K, FLOAT *ALPHA, FLOAT *a, blasint *
|
||||
|
||||
buffer = (FLOAT *)blas_memory_alloc(1);
|
||||
|
||||
#ifdef SMP
|
||||
#ifdef SMPBUG
|
||||
nthreads = num_cpu_avail(2);
|
||||
|
||||
if (nthreads == 1) {
|
||||
@@ -139,7 +139,7 @@ void NAME(char *UPLO, blasint *N, blasint *K, FLOAT *ALPHA, FLOAT *a, blasint *
|
||||
|
||||
(sbmv[uplo])(n, k, alpha_r, alpha_i, a, lda, b, incx, c, incy, buffer);
|
||||
|
||||
#ifdef SMP
|
||||
#ifdef SMPBUG
|
||||
} else {
|
||||
|
||||
(sbmv_thread[uplo])(n, k, ALPHA, a, lda, b, incx, c, incy, buffer, nthreads);
|
||||
|
||||
@@ -61,7 +61,7 @@ static int (*spmv[])(BLASLONG, FLOAT, FLOAT, FLOAT *, FLOAT *, BLASLONG, FLOAT
|
||||
#endif
|
||||
};
|
||||
|
||||
#ifdef SMP
|
||||
#ifdef SMPTEST
|
||||
static int (*spmv_thread[])(BLASLONG, FLOAT *, FLOAT *, FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *, int) = {
|
||||
#ifdef XDOUBLE
|
||||
xspmv_thread_U, xspmv_thread_L,
|
||||
@@ -88,7 +88,7 @@ void NAME(char *UPLO, blasint *N, FLOAT *ALPHA, FLOAT *a,
|
||||
blasint info;
|
||||
int uplo;
|
||||
FLOAT *buffer;
|
||||
#ifdef SMP
|
||||
#ifdef SMPTEST
|
||||
int nthreads;
|
||||
#endif
|
||||
|
||||
@@ -127,7 +127,7 @@ void NAME(char *UPLO, blasint *N, FLOAT *ALPHA, FLOAT *a,
|
||||
|
||||
buffer = (FLOAT *)blas_memory_alloc(1);
|
||||
|
||||
#ifdef SMP
|
||||
#ifdef SMPTEST
|
||||
nthreads = num_cpu_avail(2);
|
||||
|
||||
if (nthreads == 1) {
|
||||
@@ -135,7 +135,7 @@ void NAME(char *UPLO, blasint *N, FLOAT *ALPHA, FLOAT *a,
|
||||
|
||||
(spmv[uplo])(n, alpha_r, alpha_i, a, b, incx, c, incy, buffer);
|
||||
|
||||
#ifdef SMP
|
||||
#ifdef SMPTEST
|
||||
|
||||
} else {
|
||||
|
||||
|
||||
@@ -674,7 +674,7 @@ $(KDIR)sdot_k$(TSUFFIX).$(SUFFIX) $(KDIR)sdot_k$(TPSUFFIX).$(PSUFFIX) : $(KERNEL
|
||||
$(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE $< -o $@
|
||||
|
||||
$(KDIR)sdsdot_k$(TSUFFIX).$(SUFFIX) $(KDIR)sdsdot_k$(TPSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(SDOTKERNEL)
|
||||
$(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE $< -o $@
|
||||
$(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -DDSDOT $< -o $@
|
||||
|
||||
$(KDIR)zdotu_k$(TSUFFIX).$(SUFFIX) $(KDIR)zdotu_k$(TPSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(ZDOTKERNEL)
|
||||
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -UCONJ $< -o $@
|
||||
|
||||
134
kernel/arm/KERNEL.ARMV5
Normal file
134
kernel/arm/KERNEL.ARMV5
Normal file
@@ -0,0 +1,134 @@
|
||||
SAMAXKERNEL = ../arm/amax.c
|
||||
DAMAXKERNEL = ../arm/amax.c
|
||||
CAMAXKERNEL = ../arm/zamax.c
|
||||
ZAMAXKERNEL = ../arm/zamax.c
|
||||
|
||||
SAMINKERNEL = ../arm/amin.c
|
||||
DAMINKERNEL = ../arm/amin.c
|
||||
CAMINKERNEL = ../arm/zamin.c
|
||||
ZAMINKERNEL = ../arm/zamin.c
|
||||
|
||||
SMAXKERNEL = ../arm/max.c
|
||||
DMAXKERNEL = ../arm/max.c
|
||||
|
||||
SMINKERNEL = ../arm/min.c
|
||||
DMINKERNEL = ../arm/min.c
|
||||
|
||||
ISAMAXKERNEL = ../arm/iamax.c
|
||||
IDAMAXKERNEL = ../arm/iamax.c
|
||||
ICAMAXKERNEL = ../arm/izamax.c
|
||||
IZAMAXKERNEL = ../arm/izamax.c
|
||||
|
||||
ISAMINKERNEL = ../arm/iamin.c
|
||||
IDAMINKERNEL = ../arm/iamin.c
|
||||
ICAMINKERNEL = ../arm/izamin.c
|
||||
IZAMINKERNEL = ../arm/izamin.c
|
||||
|
||||
ISMAXKERNEL = ../arm/imax.c
|
||||
IDMAXKERNEL = ../arm/imax.c
|
||||
|
||||
ISMINKERNEL = ../arm/imin.c
|
||||
IDMINKERNEL = ../arm/imin.c
|
||||
|
||||
SASUMKERNEL = ../arm/asum.c
|
||||
DASUMKERNEL = ../arm/asum.c
|
||||
CASUMKERNEL = ../arm/zasum.c
|
||||
ZASUMKERNEL = ../arm/zasum.c
|
||||
|
||||
SAXPYKERNEL = ../arm/axpy.c
|
||||
DAXPYKERNEL = ../arm/axpy.c
|
||||
CAXPYKERNEL = ../arm/zaxpy.c
|
||||
ZAXPYKERNEL = ../arm/zaxpy.c
|
||||
|
||||
SCOPYKERNEL = ../arm/copy.c
|
||||
DCOPYKERNEL = ../arm/copy.c
|
||||
CCOPYKERNEL = ../arm/zcopy.c
|
||||
ZCOPYKERNEL = ../arm/zcopy.c
|
||||
|
||||
SDOTKERNEL = ../arm/dot.c
|
||||
DDOTKERNEL = ../arm/dot.c
|
||||
CDOTKERNEL = ../arm/zdot.c
|
||||
ZDOTKERNEL = ../arm/zdot.c
|
||||
|
||||
SNRM2KERNEL = ../arm/nrm2.c
|
||||
DNRM2KERNEL = ../arm/nrm2.c
|
||||
CNRM2KERNEL = ../arm/znrm2.c
|
||||
ZNRM2KERNEL = ../arm/znrm2.c
|
||||
|
||||
SROTKERNEL = ../arm/rot.c
|
||||
DROTKERNEL = ../arm/rot.c
|
||||
CROTKERNEL = ../arm/zrot.c
|
||||
ZROTKERNEL = ../arm/zrot.c
|
||||
|
||||
SSCALKERNEL = ../arm/scal.c
|
||||
DSCALKERNEL = ../arm/scal.c
|
||||
CSCALKERNEL = ../arm/zscal.c
|
||||
ZSCALKERNEL = ../arm/zscal.c
|
||||
|
||||
SSWAPKERNEL = ../arm/swap.c
|
||||
DSWAPKERNEL = ../arm/swap.c
|
||||
CSWAPKERNEL = ../arm/zswap.c
|
||||
ZSWAPKERNEL = ../arm/zswap.c
|
||||
|
||||
SGEMVNKERNEL = ../arm/gemv_n.c
|
||||
DGEMVNKERNEL = ../arm/gemv_n.c
|
||||
CGEMVNKERNEL = ../arm/zgemv_n.c
|
||||
ZGEMVNKERNEL = ../arm/zgemv_n.c
|
||||
|
||||
SGEMVTKERNEL = ../arm/gemv_t.c
|
||||
DGEMVTKERNEL = ../arm/gemv_t.c
|
||||
CGEMVTKERNEL = ../arm/zgemv_t.c
|
||||
ZGEMVTKERNEL = ../arm/zgemv_t.c
|
||||
|
||||
STRMMKERNEL = ../generic/trmmkernel_2x2.c
|
||||
DTRMMKERNEL = ../generic/trmmkernel_2x2.c
|
||||
CTRMMKERNEL = ../generic/ztrmmkernel_2x2.c
|
||||
ZTRMMKERNEL = ../generic/ztrmmkernel_2x2.c
|
||||
|
||||
SGEMMKERNEL = ../generic/gemmkernel_2x2.c
|
||||
SGEMMONCOPY = ../generic/gemm_ncopy_2.c
|
||||
SGEMMOTCOPY = ../generic/gemm_tcopy_2.c
|
||||
SGEMMONCOPYOBJ = sgemm_oncopy.o
|
||||
SGEMMOTCOPYOBJ = sgemm_otcopy.o
|
||||
|
||||
DGEMMKERNEL = ../generic/gemmkernel_2x2.c
|
||||
DGEMMONCOPY = ../generic/gemm_ncopy_2.c
|
||||
DGEMMOTCOPY = ../generic/gemm_tcopy_2.c
|
||||
DGEMMONCOPYOBJ = dgemm_oncopy.o
|
||||
DGEMMOTCOPYOBJ = dgemm_otcopy.o
|
||||
|
||||
CGEMMKERNEL = ../generic/zgemmkernel_2x2.c
|
||||
CGEMMONCOPY = ../generic/zgemm_ncopy_2.c
|
||||
CGEMMOTCOPY = ../generic/zgemm_tcopy_2.c
|
||||
CGEMMONCOPYOBJ = cgemm_oncopy.o
|
||||
CGEMMOTCOPYOBJ = cgemm_otcopy.o
|
||||
|
||||
ZGEMMKERNEL = ../generic/zgemmkernel_2x2.c
|
||||
ZGEMMONCOPY = ../generic/zgemm_ncopy_2.c
|
||||
ZGEMMOTCOPY = ../generic/zgemm_tcopy_2.c
|
||||
ZGEMMONCOPYOBJ = zgemm_oncopy.o
|
||||
ZGEMMOTCOPYOBJ = zgemm_otcopy.o
|
||||
|
||||
STRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
|
||||
STRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
|
||||
STRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
|
||||
STRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
|
||||
|
||||
DTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
|
||||
DTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
|
||||
DTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
|
||||
DTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
|
||||
|
||||
CTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
|
||||
CTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
|
||||
CTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
|
||||
CTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
|
||||
|
||||
ZTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
|
||||
ZTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
|
||||
ZTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
|
||||
ZTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -1,3 +1,27 @@
|
||||
SGEMVNKERNEL = ../arm/gemv_n.c
|
||||
SGEMVTKERNEL = ../arm/gemv_t.c
|
||||
|
||||
DGEMVNKERNEL = ../arm/gemv_n.c
|
||||
DGEMVTKERNEL = ../arm/gemv_t.c
|
||||
|
||||
CTRMMKERNEL = ../generic/ztrmmkernel_2x2.c
|
||||
CGEMMKERNEL = ../generic/zgemmkernel_2x2.c
|
||||
CGEMMONCOPY = ../generic/zgemm_ncopy_2.c
|
||||
CGEMMOTCOPY = ../generic/zgemm_tcopy_2.c
|
||||
|
||||
#ZTRMMKERNEL = ../generic/ztrmmkernel_2x2.c
|
||||
#ZGEMMKERNEL = ../generic/zgemmkernel_2x2.c
|
||||
#ZGEMMONCOPY = ../generic/zgemm_ncopy_2.c
|
||||
#ZGEMMOTCOPY = ../generic/zgemm_tcopy_2.c
|
||||
|
||||
|
||||
#STRMMKERNEL = ../generic/trmmkernel_2x2.c
|
||||
#SGEMMKERNEL = ../generic/gemmkernel_2x2.c
|
||||
#SGEMMONCOPY = ../generic/gemm_ncopy_2.c
|
||||
#SGEMMOTCOPY = ../generic/gemm_tcopy_2.c
|
||||
|
||||
|
||||
###############################################################################
|
||||
SAMAXKERNEL = iamax_vfp.S
|
||||
DAMAXKERNEL = iamax_vfp.S
|
||||
CAMAXKERNEL = iamax_vfp.S
|
||||
@@ -40,10 +64,10 @@ DAXPYKERNEL = axpy_vfp.S
|
||||
CAXPYKERNEL = axpy_vfp.S
|
||||
ZAXPYKERNEL = axpy_vfp.S
|
||||
|
||||
SCOPYKERNEL = scopy_vfp.S
|
||||
DCOPYKERNEL = dcopy_vfp.S
|
||||
CCOPYKERNEL = ccopy_vfp.S
|
||||
ZCOPYKERNEL = zcopy_vfp.S
|
||||
SCOPYKERNEL = copy.c
|
||||
DCOPYKERNEL = copy.c
|
||||
CCOPYKERNEL = zcopy.c
|
||||
ZCOPYKERNEL = zcopy.c
|
||||
|
||||
SDOTKERNEL = sdot_vfp.S
|
||||
DDOTKERNEL = ddot_vfp.S
|
||||
@@ -60,29 +84,29 @@ DROTKERNEL = rot_vfp.S
|
||||
CROTKERNEL = rot_vfp.S
|
||||
ZROTKERNEL = rot_vfp.S
|
||||
|
||||
SSCALKERNEL = scal_vfp.S
|
||||
DSCALKERNEL = scal_vfp.S
|
||||
CSCALKERNEL = scal_vfp.S
|
||||
ZSCALKERNEL = scal_vfp.S
|
||||
SSCALKERNEL = scal.c
|
||||
DSCALKERNEL = scal.c
|
||||
CSCALKERNEL = zscal.c
|
||||
ZSCALKERNEL = zscal.c
|
||||
|
||||
SSWAPKERNEL = swap_vfp.S
|
||||
DSWAPKERNEL = swap_vfp.S
|
||||
CSWAPKERNEL = swap_vfp.S
|
||||
ZSWAPKERNEL = swap_vfp.S
|
||||
|
||||
SGEMVNKERNEL = gemv_n_vfp.S
|
||||
DGEMVNKERNEL = gemv_n_vfp.S
|
||||
# BAD SGEMVNKERNEL = gemv_n_vfp.S
|
||||
# BAD DGEMVNKERNEL = gemv_n_vfp.S
|
||||
CGEMVNKERNEL = cgemv_n_vfp.S
|
||||
ZGEMVNKERNEL = zgemv_n_vfp.S
|
||||
|
||||
SGEMVTKERNEL = gemv_t_vfp.S
|
||||
DGEMVTKERNEL = gemv_t_vfp.S
|
||||
# BAD SGEMVTKERNEL = gemv_t_vfp.S
|
||||
# BAD DGEMVTKERNEL = gemv_t_vfp.S
|
||||
CGEMVTKERNEL = cgemv_t_vfp.S
|
||||
ZGEMVTKERNEL = zgemv_t_vfp.S
|
||||
|
||||
STRMMKERNEL = strmm_kernel_4x2_vfp.S
|
||||
DTRMMKERNEL = dtrmm_kernel_4x2_vfp.S
|
||||
CTRMMKERNEL = ctrmm_kernel_2x2_vfp.S
|
||||
#CTRMMKERNEL = ctrmm_kernel_2x2_vfp.S
|
||||
ZTRMMKERNEL = ztrmm_kernel_2x2_vfp.S
|
||||
|
||||
SGEMMKERNEL = sgemm_kernel_4x2_vfp.S
|
||||
@@ -105,9 +129,9 @@ DGEMMOTCOPY = ../generic/gemm_tcopy_2.c
|
||||
DGEMMONCOPYOBJ = dgemm_oncopy.o
|
||||
DGEMMOTCOPYOBJ = dgemm_otcopy.o
|
||||
|
||||
CGEMMKERNEL = cgemm_kernel_2x2_vfp.S
|
||||
CGEMMONCOPY = cgemm_ncopy_2_vfp.S
|
||||
CGEMMOTCOPY = cgemm_tcopy_2_vfp.S
|
||||
#CGEMMKERNEL = cgemm_kernel_2x2_vfp.S
|
||||
#CGEMMONCOPY = cgemm_ncopy_2_vfp.S
|
||||
#CGEMMOTCOPY = cgemm_tcopy_2_vfp.S
|
||||
CGEMMONCOPYOBJ = cgemm_oncopy.o
|
||||
CGEMMOTCOPYOBJ = cgemm_otcopy.o
|
||||
|
||||
|
||||
@@ -1,3 +1,8 @@
|
||||
SGEMVNKERNEL = ../arm/gemv_n.c
|
||||
SGEMVTKERNEL = ../arm/gemv_t.c
|
||||
|
||||
|
||||
#################################################################################
|
||||
SAMAXKERNEL = iamax_vfp.S
|
||||
DAMAXKERNEL = iamax_vfp.S
|
||||
CAMAXKERNEL = iamax_vfp.S
|
||||
@@ -45,10 +50,10 @@ DAXPYKERNEL = axpy_vfp.S
|
||||
CAXPYKERNEL = axpy_vfp.S
|
||||
ZAXPYKERNEL = axpy_vfp.S
|
||||
|
||||
SCOPYKERNEL = scopy_vfp.S
|
||||
DCOPYKERNEL = dcopy_vfp.S
|
||||
CCOPYKERNEL = ccopy_vfp.S
|
||||
ZCOPYKERNEL = zcopy_vfp.S
|
||||
SCOPYKERNEL = copy.c
|
||||
DCOPYKERNEL = copy.c
|
||||
CCOPYKERNEL = zcopy.c
|
||||
ZCOPYKERNEL = zcopy.c
|
||||
|
||||
SDOTKERNEL = sdot_vfp.S
|
||||
DDOTKERNEL = ddot_vfp.S
|
||||
@@ -65,17 +70,17 @@ DROTKERNEL = rot_vfp.S
|
||||
CROTKERNEL = rot_vfp.S
|
||||
ZROTKERNEL = rot_vfp.S
|
||||
|
||||
SSCALKERNEL = scal_vfp.S
|
||||
DSCALKERNEL = scal_vfp.S
|
||||
CSCALKERNEL = scal_vfp.S
|
||||
ZSCALKERNEL = scal_vfp.S
|
||||
SSCALKERNEL = scal.c
|
||||
DSCALKERNEL = scal.c
|
||||
CSCALKERNEL = zscal.c
|
||||
ZSCALKERNEL = zscal.c
|
||||
|
||||
SGEMVNKERNEL = gemv_n_vfp.S
|
||||
# BAD SGEMVNKERNEL = gemv_n_vfp.S
|
||||
DGEMVNKERNEL = gemv_n_vfp.S
|
||||
CGEMVNKERNEL = cgemv_n_vfp.S
|
||||
ZGEMVNKERNEL = zgemv_n_vfp.S
|
||||
|
||||
SGEMVTKERNEL = gemv_t_vfp.S
|
||||
# BAD SGEMVTKERNEL = gemv_t_vfp.S
|
||||
DGEMVTKERNEL = gemv_t_vfp.S
|
||||
CGEMVTKERNEL = cgemv_t_vfp.S
|
||||
ZGEMVTKERNEL = zgemv_t_vfp.S
|
||||
|
||||
@@ -38,20 +38,21 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT *dummy, BLASLONG dummy2)
|
||||
{
|
||||
BLASLONG i=0;
|
||||
BLASLONG i=0,j=0;
|
||||
|
||||
if ( n < 0 || inc_x < 1 ) return(0);
|
||||
if ( da == 1.0 ) return(0);
|
||||
|
||||
n *= inc_x;
|
||||
while(i < n)
|
||||
while(j < n)
|
||||
{
|
||||
|
||||
x[i] = da * x[i] ;
|
||||
if ( da == 0.0 )
|
||||
x[i]=0.0;
|
||||
else
|
||||
x[i] = da * x[i] ;
|
||||
|
||||
i += inc_x ;
|
||||
j++;
|
||||
|
||||
}
|
||||
return(0);
|
||||
return;
|
||||
|
||||
}
|
||||
|
||||
|
||||
@@ -43,19 +43,39 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da_r,FLOAT da_i, F
|
||||
BLASLONG ip = 0;
|
||||
FLOAT temp;
|
||||
|
||||
if ( n < 0 || inc_x < 1 ) return(0);
|
||||
|
||||
inc_x2 = 2 * inc_x;
|
||||
for ( i=0; i<n; i++ )
|
||||
{
|
||||
|
||||
temp = da_r * x[ip] - da_i * x[ip+1] ;
|
||||
x[ip+1] = da_r * x[ip+1] + da_i * x[ip] ;
|
||||
if ( da_r == 0.0 )
|
||||
{
|
||||
if ( da_i == 0.0 )
|
||||
{
|
||||
temp = 0.0;
|
||||
x[ip+1] = 0.0 ;
|
||||
}
|
||||
else
|
||||
{
|
||||
temp = - da_i * x[ip+1] ;
|
||||
x[ip+1] = da_i * x[ip] ;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if ( da_i == 0.0 )
|
||||
{
|
||||
temp = da_r * x[ip] ;
|
||||
x[ip+1] = da_r * x[ip+1];
|
||||
}
|
||||
else
|
||||
{
|
||||
temp = da_r * x[ip] - da_i * x[ip+1] ;
|
||||
x[ip+1] = da_r * x[ip+1] + da_i * x[ip] ;
|
||||
}
|
||||
}
|
||||
x[ip] = temp;
|
||||
|
||||
ip += inc_x2;
|
||||
}
|
||||
|
||||
|
||||
return(0);
|
||||
|
||||
|
||||
@@ -59,7 +59,7 @@ CASUMKERNEL = zasum_sse.S
|
||||
endif
|
||||
|
||||
ifndef SDOTKERNEL
|
||||
SDOTKERNEL = dot_sse.S
|
||||
SDOTKERNEL = ../arm/dot.c
|
||||
endif
|
||||
|
||||
ifndef CDOTKERNEL
|
||||
@@ -107,11 +107,11 @@ CSWAPKERNEL = zswap_sse.S
|
||||
endif
|
||||
|
||||
ifndef SGEMVNKERNEL
|
||||
SGEMVNKERNEL = gemv_n_sse.S
|
||||
SGEMVNKERNEL = ../arm/gemv_n.c
|
||||
endif
|
||||
|
||||
ifndef SGEMVTKERNEL
|
||||
SGEMVTKERNEL = gemv_t_sse.S
|
||||
SGEMVTKERNEL = ../arm/gemv_t.c
|
||||
endif
|
||||
|
||||
ifndef CGEMVNKERNEL
|
||||
|
||||
@@ -119,7 +119,7 @@ XCOPYKERNEL = zcopy.S
|
||||
endif
|
||||
|
||||
ifndef SDOTKERNEL
|
||||
SDOTKERNEL = dot_sse.S
|
||||
SDOTKERNEL = ../arm/dot.c
|
||||
endif
|
||||
|
||||
ifndef DDOTKERNEL
|
||||
@@ -369,11 +369,11 @@ endif
|
||||
GEMVDEP = ../l2param.h
|
||||
|
||||
ifndef SGEMVNKERNEL
|
||||
SGEMVNKERNEL = sgemv_n.S
|
||||
SGEMVNKERNEL = ../arm/gemv_n.c
|
||||
endif
|
||||
|
||||
ifndef SGEMVTKERNEL
|
||||
SGEMVTKERNEL = sgemv_t.S
|
||||
SGEMVTKERNEL = ../arm/gemv_t.c
|
||||
endif
|
||||
|
||||
ifndef DGEMVNKERNEL
|
||||
|
||||
@@ -7,15 +7,19 @@ SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX)
|
||||
SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX)
|
||||
SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||
SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||
DGEMMKERNEL = gemm_kernel_2x8_nehalem.S
|
||||
DGEMMINCOPY = dgemm_ncopy_2.S
|
||||
DGEMMITCOPY = dgemm_tcopy_2.S
|
||||
DGEMMONCOPY = ../generic/gemm_ncopy_8.c
|
||||
DGEMMOTCOPY = dgemm_tcopy_8.S
|
||||
DGEMMINCOPYOBJ = dgemm_incopy$(TSUFFIX).$(SUFFIX)
|
||||
DGEMMITCOPYOBJ = dgemm_itcopy$(TSUFFIX).$(SUFFIX)
|
||||
|
||||
|
||||
DGEMMKERNEL = gemm_kernel_4x4_core2.S
|
||||
DGEMMINCOPY =
|
||||
DGEMMITCOPY =
|
||||
DGEMMONCOPY = gemm_ncopy_4.S
|
||||
DGEMMOTCOPY = gemm_tcopy_4.S
|
||||
DGEMMINCOPYOBJ =
|
||||
DGEMMITCOPYOBJ =
|
||||
DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||
DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||
|
||||
|
||||
CGEMMKERNEL = zgemm_kernel_2x4_nehalem.S
|
||||
CGEMMINCOPY = zgemm_ncopy_2.S
|
||||
CGEMMITCOPY = zgemm_tcopy_2.S
|
||||
@@ -40,10 +44,11 @@ STRSMKERNEL_LT = trsm_kernel_LT_4x8_nehalem.S
|
||||
STRSMKERNEL_RN = trsm_kernel_LT_4x8_nehalem.S
|
||||
STRSMKERNEL_RT = trsm_kernel_RT_4x8_nehalem.S
|
||||
|
||||
DTRSMKERNEL_LN = trsm_kernel_LN_2x8_nehalem.S
|
||||
DTRSMKERNEL_LT = trsm_kernel_LT_2x8_nehalem.S
|
||||
DTRSMKERNEL_RN = trsm_kernel_LT_2x8_nehalem.S
|
||||
DTRSMKERNEL_RT = trsm_kernel_RT_2x8_nehalem.S
|
||||
DTRSMKERNEL_LN = trsm_kernel_LN_4x4_core2.S
|
||||
DTRSMKERNEL_LT = trsm_kernel_LT_4x4_core2.S
|
||||
DTRSMKERNEL_RN = trsm_kernel_LT_4x4_core2.S
|
||||
DTRSMKERNEL_RT = trsm_kernel_RT_4x4_core2.S
|
||||
|
||||
|
||||
CTRSMKERNEL_LN = ztrsm_kernel_LN_2x4_nehalem.S
|
||||
CTRSMKERNEL_LT = ztrsm_kernel_LT_2x4_nehalem.S
|
||||
|
||||
@@ -1,34 +1,35 @@
|
||||
SGEMMKERNEL = sgemm_kernel_8x8_sandy.S
|
||||
SGEMMINCOPY =
|
||||
SGEMMITCOPY =
|
||||
SGEMMKERNEL = gemm_kernel_4x8_nehalem.S
|
||||
SGEMMINCOPY = gemm_ncopy_4.S
|
||||
SGEMMITCOPY = gemm_tcopy_4.S
|
||||
SGEMMONCOPY = ../generic/gemm_ncopy_8.c
|
||||
SGEMMOTCOPY = ../generic/gemm_tcopy_8.c
|
||||
SGEMMINCOPYOBJ =
|
||||
SGEMMITCOPYOBJ =
|
||||
SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX)
|
||||
SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX)
|
||||
SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||
SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||
|
||||
|
||||
DGEMMKERNEL = dgemm_kernel_4x8_sandy.S
|
||||
DGEMMINCOPY = ../generic/gemm_ncopy_8.c
|
||||
DGEMMITCOPY = ../generic/gemm_tcopy_8.c
|
||||
#DGEMMONCOPY = gemm_ncopy_4.S
|
||||
DGEMMONCOPY = ../generic/gemm_ncopy_4.c
|
||||
DGEMMOTCOPY = ../generic/gemm_tcopy_4.c
|
||||
#DGEMMOTCOPY = gemm_tcopy_4.S
|
||||
DGEMMINCOPYOBJ = dgemm_incopy$(TSUFFIX).$(SUFFIX)
|
||||
DGEMMITCOPYOBJ = dgemm_itcopy$(TSUFFIX).$(SUFFIX)
|
||||
DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||
DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||
#CGEMMKERNEL = zgemm_kernel_2x4_nehalem.S
|
||||
CGEMMKERNEL = cgemm_kernel_4x8_sandy.S
|
||||
CGEMMINCOPY = ../generic/zgemm_ncopy_8_sandy.c
|
||||
CGEMMITCOPY = ../generic/zgemm_tcopy_8_sandy.c
|
||||
CGEMMONCOPY = ../generic/zgemm_ncopy_4_sandy.c
|
||||
CGEMMOTCOPY = ../generic/zgemm_tcopy_4_sandy.c
|
||||
|
||||
CGEMMKERNEL = zgemm_kernel_2x4_nehalem.S
|
||||
CGEMMINCOPY = zgemm_ncopy_2.S
|
||||
CGEMMITCOPY = zgemm_tcopy_2.S
|
||||
CGEMMONCOPY = ../generic/zgemm_ncopy_4.c
|
||||
CGEMMOTCOPY = ../generic/zgemm_tcopy_4.c
|
||||
CGEMMINCOPYOBJ = cgemm_incopy$(TSUFFIX).$(SUFFIX)
|
||||
CGEMMITCOPYOBJ = cgemm_itcopy$(TSUFFIX).$(SUFFIX)
|
||||
CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||
CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||
#ZGEMMKERNEL = zgemm_kernel_1x4_nehalem.S
|
||||
|
||||
|
||||
ZGEMMKERNEL = zgemm_kernel_4x4_sandy.S
|
||||
ZGEMMINCOPY =
|
||||
ZGEMMITCOPY =
|
||||
@@ -58,6 +59,7 @@ ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||
#ZTRSMKERNEL_LT = ztrsm_kernel_LT_1x4_nehalem.S
|
||||
#ZTRSMKERNEL_RN = ztrsm_kernel_LT_1x4_nehalem.S
|
||||
#ZTRSMKERNEL_RT = ztrsm_kernel_RT_1x4_nehalem.S
|
||||
|
||||
STRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
|
||||
STRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
|
||||
STRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -192,8 +192,8 @@
|
||||
VFMADD231PD_ %ymm13,%ymm1,%ymm0
|
||||
VFMADD231PD_ %ymm14,%ymm2,%ymm0
|
||||
VFMADD231PD_ %ymm15,%ymm3,%ymm0
|
||||
addq $3*SIZE , BO
|
||||
addq $16*SIZE, AO
|
||||
addq $ 3*SIZE , BO
|
||||
addq $ 16*SIZE, AO
|
||||
.endm
|
||||
|
||||
|
||||
@@ -212,8 +212,8 @@
|
||||
VFMADD231PD_ %ymm8,%ymm2,%ymm0
|
||||
VFMADD231PD_ %ymm9,%ymm3,%ymm0
|
||||
prefetcht0 B_PR1(BO)
|
||||
addq $3*SIZE , BO
|
||||
addq $8*SIZE, AO
|
||||
addq $ 3*SIZE , BO
|
||||
addq $ 8*SIZE, AO
|
||||
.endm
|
||||
|
||||
.macro KERNEL4x3_SUBN
|
||||
@@ -224,8 +224,8 @@
|
||||
VFMADD231PD_ %ymm5,%ymm2,%ymm0
|
||||
vbroadcastsd -10 * SIZE(BO), %ymm3
|
||||
VFMADD231PD_ %ymm6,%ymm3,%ymm0
|
||||
addq $3*SIZE , BO
|
||||
addq $4*SIZE, AO
|
||||
addq $ 3*SIZE , BO
|
||||
addq $ 4*SIZE, AO
|
||||
.endm
|
||||
|
||||
.macro KERNEL2x3_SUBN
|
||||
@@ -240,8 +240,8 @@
|
||||
VFMADD231SD_ %xmm8,%xmm1,%xmm0
|
||||
VFMADD231SD_ %xmm10,%xmm2,%xmm0
|
||||
VFMADD231SD_ %xmm12,%xmm3,%xmm0
|
||||
addq $3*SIZE , BO
|
||||
addq $2*SIZE, AO
|
||||
addq $ 3*SIZE , BO
|
||||
addq $ 2*SIZE, AO
|
||||
.endm
|
||||
|
||||
.macro KERNEL1x3_SUBN
|
||||
@@ -252,8 +252,8 @@
|
||||
VFMADD231SD_ %xmm5,%xmm2,%xmm0
|
||||
vmovsd -10 * SIZE(BO), %xmm3
|
||||
VFMADD231SD_ %xmm6,%xmm3,%xmm0
|
||||
addq $3*SIZE , BO
|
||||
addq $1*SIZE, AO
|
||||
addq $ 3*SIZE , BO
|
||||
addq $ 1*SIZE, AO
|
||||
.endm
|
||||
|
||||
|
||||
@@ -1602,16 +1602,16 @@
|
||||
vmovsd 1 * SIZE(BO, BI, SIZE), %xmm1
|
||||
vmovsd -29 * SIZE(AO, %rax, SIZE), %xmm0
|
||||
VFMADD231SD_ %xmm4,%xmm1,%xmm0
|
||||
addq $4, BI
|
||||
addq $4, %rax
|
||||
addq $ 4, BI
|
||||
addq $ 4, %rax
|
||||
.endm
|
||||
|
||||
.macro KERNEL1x1_SUB
|
||||
vmovsd -2 * SIZE(BO, BI, SIZE), %xmm1
|
||||
vmovsd -32 * SIZE(AO, %rax, SIZE), %xmm0
|
||||
VFMADD231SD_ %xmm4,%xmm1,%xmm0
|
||||
addq $1, BI
|
||||
addq $1 , %rax
|
||||
addq $ 1, BI
|
||||
addq $ 1 , %rax
|
||||
.endm
|
||||
|
||||
.macro SAVE1x1
|
||||
@@ -1749,9 +1749,9 @@
|
||||
vmovsd %xmm5, 8*SIZE(BO)
|
||||
vmovups %xmm6, 9*SIZE(BO)
|
||||
vmovsd %xmm7,11*SIZE(BO)
|
||||
addq $8*SIZE,BO1
|
||||
addq $8*SIZE,BO2
|
||||
addq $12*SIZE,BO
|
||||
addq $ 8*SIZE,BO1
|
||||
addq $ 8*SIZE,BO2
|
||||
addq $ 12*SIZE,BO
|
||||
|
||||
vmovups 0 * SIZE(BO1), %xmm0
|
||||
vmovups 2 * SIZE(BO1), %xmm2
|
||||
@@ -1769,9 +1769,9 @@
|
||||
vmovsd %xmm5, 8*SIZE(BO)
|
||||
vmovups %xmm6, 9*SIZE(BO)
|
||||
vmovsd %xmm7,11*SIZE(BO)
|
||||
addq $8*SIZE,BO1
|
||||
addq $8*SIZE,BO2
|
||||
addq $12*SIZE,BO
|
||||
addq $ 8*SIZE,BO1
|
||||
addq $ 8*SIZE,BO2
|
||||
addq $ 12*SIZE,BO
|
||||
|
||||
decq %rax
|
||||
jnz .L6_01a_1
|
||||
@@ -1792,9 +1792,9 @@
|
||||
vmovsd 0 * SIZE(BO2), %xmm2
|
||||
vmovups %xmm0, 0*SIZE(BO)
|
||||
vmovsd %xmm2, 2*SIZE(BO)
|
||||
addq $2*SIZE,BO1
|
||||
addq $2*SIZE,BO2
|
||||
addq $3*SIZE,BO
|
||||
addq $ 2*SIZE,BO1
|
||||
addq $ 2*SIZE,BO2
|
||||
addq $ 3*SIZE,BO
|
||||
decq %rax
|
||||
jnz .L6_02b
|
||||
|
||||
|
||||
@@ -80,10 +80,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#ifndef WINDOWS_ABI
|
||||
|
||||
#define STACKSIZE 96
|
||||
#define L_BUFFER_SIZE 256*8*12+4096
|
||||
|
||||
#else
|
||||
|
||||
#define STACKSIZE 256
|
||||
#define L_BUFFER_SIZE 128*8*12+4096
|
||||
|
||||
#define OLD_A 40 + STACKSIZE(%rsp)
|
||||
#define OLD_B 48 + STACKSIZE(%rsp)
|
||||
@@ -93,7 +95,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
#endif
|
||||
|
||||
#define L_BUFFER_SIZE 512*8*12
|
||||
|
||||
#define Ndiv12 24(%rsp)
|
||||
#define Nmod12 32(%rsp)
|
||||
@@ -107,22 +108,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#if defined(OS_WINDOWS)
|
||||
#if L_BUFFER_SIZE > 16384
|
||||
#define STACK_TOUCH \
|
||||
movl $0, 4096 * 4(%rsp);\
|
||||
movl $0, 4096 * 3(%rsp);\
|
||||
movl $0, 4096 * 2(%rsp);\
|
||||
movl $0, 4096 * 1(%rsp);
|
||||
movl $ 0, 4096 * 4(%rsp);\
|
||||
movl $ 0, 4096 * 3(%rsp);\
|
||||
movl $ 0, 4096 * 2(%rsp);\
|
||||
movl $ 0, 4096 * 1(%rsp);
|
||||
#elif L_BUFFER_SIZE > 12288
|
||||
#define STACK_TOUCH \
|
||||
movl $0, 4096 * 3(%rsp);\
|
||||
movl $0, 4096 * 2(%rsp);\
|
||||
movl $0, 4096 * 1(%rsp);
|
||||
movl $ 0, 4096 * 3(%rsp);\
|
||||
movl $ 0, 4096 * 2(%rsp);\
|
||||
movl $ 0, 4096 * 1(%rsp);
|
||||
#elif L_BUFFER_SIZE > 8192
|
||||
#define STACK_TOUCH \
|
||||
movl $0, 4096 * 2(%rsp);\
|
||||
movl $0, 4096 * 1(%rsp);
|
||||
movl $ 0, 4096 * 2(%rsp);\
|
||||
movl $ 0, 4096 * 1(%rsp);
|
||||
#elif L_BUFFER_SIZE > 4096
|
||||
#define STACK_TOUCH \
|
||||
movl $0, 4096 * 1(%rsp);
|
||||
movl $ 0, 4096 * 1(%rsp);
|
||||
#else
|
||||
#define STACK_TOUCH
|
||||
#endif
|
||||
@@ -168,17 +169,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
vmulpd %ymm0 ,%ymm2 , %ymm8
|
||||
vmulpd %ymm0 ,%ymm3 , %ymm12
|
||||
prefetcht0 B_PR1+256(BO)
|
||||
vpermpd $0xb1, %ymm0 , %ymm0
|
||||
vpermpd $ 0xb1, %ymm0 , %ymm0
|
||||
vmulpd %ymm0 ,%ymm1 , %ymm5
|
||||
vmulpd %ymm0 ,%ymm2 , %ymm9
|
||||
vmulpd %ymm0 ,%ymm3 , %ymm13
|
||||
vpermpd $0x1b, %ymm0 , %ymm0
|
||||
vpermpd $ 0x1b, %ymm0 , %ymm0
|
||||
vmulpd %ymm0 ,%ymm1 , %ymm6
|
||||
vmulpd %ymm0 ,%ymm2 , %ymm10
|
||||
|
||||
addq $12*SIZE, BO
|
||||
addq $ 12*SIZE, BO
|
||||
vmulpd %ymm0 ,%ymm3 , %ymm14
|
||||
vpermpd $0xb1, %ymm0 , %ymm0
|
||||
vpermpd $ 0xb1, %ymm0 , %ymm0
|
||||
vmulpd %ymm0 ,%ymm1 , %ymm7
|
||||
vmovups -12 * SIZE(BO), %ymm1
|
||||
vmulpd %ymm0 ,%ymm2 , %ymm11
|
||||
@@ -197,16 +198,16 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
vfmadd231pd %ymm0 ,%ymm2 , %ymm8
|
||||
prefetcht0 B_PR1+128(BO)
|
||||
vfmadd231pd %ymm0 ,%ymm3 , %ymm12
|
||||
vpermpd $0xb1, %ymm0 , %ymm0
|
||||
vpermpd $ 0xb1, %ymm0 , %ymm0
|
||||
vfmadd231pd %ymm0 ,%ymm1 , %ymm5
|
||||
vfmadd231pd %ymm0 ,%ymm2 , %ymm9
|
||||
vfmadd231pd %ymm0 ,%ymm3 , %ymm13
|
||||
vpermpd $0x1b, %ymm0 , %ymm0
|
||||
vpermpd $ 0x1b, %ymm0 , %ymm0
|
||||
vfmadd231pd %ymm0 ,%ymm1 , %ymm6
|
||||
vfmadd231pd %ymm0 ,%ymm2 , %ymm10
|
||||
|
||||
vfmadd231pd %ymm0 ,%ymm3 , %ymm14
|
||||
vpermpd $0xb1, %ymm0 , %ymm0
|
||||
vpermpd $ 0xb1, %ymm0 , %ymm0
|
||||
vfmadd231pd %ymm0 ,%ymm1 , %ymm7
|
||||
vmovups -12 * SIZE(BO), %ymm1
|
||||
vfmadd231pd %ymm0 ,%ymm2 , %ymm11
|
||||
@@ -221,24 +222,24 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
vfmadd231pd %ymm0 ,%ymm1 , %ymm4
|
||||
vfmadd231pd %ymm0 ,%ymm2 , %ymm8
|
||||
vfmadd231pd %ymm0 ,%ymm3 , %ymm12
|
||||
vpermpd $0xb1, %ymm0 , %ymm0
|
||||
vpermpd $ 0xb1, %ymm0 , %ymm0
|
||||
vfmadd231pd %ymm0 ,%ymm1 , %ymm5
|
||||
vfmadd231pd %ymm0 ,%ymm2 , %ymm9
|
||||
vfmadd231pd %ymm0 ,%ymm3 , %ymm13
|
||||
vpermpd $0x1b, %ymm0 , %ymm0
|
||||
vpermpd $ 0x1b, %ymm0 , %ymm0
|
||||
vfmadd231pd %ymm0 ,%ymm1 , %ymm6
|
||||
vfmadd231pd %ymm0 ,%ymm2 , %ymm10
|
||||
|
||||
addq $8*SIZE, AO
|
||||
addq $ 8*SIZE, AO
|
||||
vfmadd231pd %ymm0 ,%ymm3 , %ymm14
|
||||
vpermpd $0xb1, %ymm0 , %ymm0
|
||||
vpermpd $ 0xb1, %ymm0 , %ymm0
|
||||
vfmadd231pd %ymm0 ,%ymm1 , %ymm7
|
||||
vmovups 0 * SIZE(BO), %ymm1
|
||||
vfmadd231pd %ymm0 ,%ymm2 , %ymm11
|
||||
vmovups 4 * SIZE(BO), %ymm2
|
||||
vfmadd231pd %ymm0 ,%ymm3 , %ymm15
|
||||
vmovups 8 * SIZE(BO), %ymm3
|
||||
addq $24*SIZE, BO
|
||||
addq $ 24*SIZE, BO
|
||||
.endm
|
||||
|
||||
|
||||
@@ -247,21 +248,21 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
vfmadd231pd %ymm0 ,%ymm1 , %ymm4
|
||||
vfmadd231pd %ymm0 ,%ymm2 , %ymm8
|
||||
vfmadd231pd %ymm0 ,%ymm3 , %ymm12
|
||||
vpermpd $0xb1, %ymm0 , %ymm0
|
||||
vpermpd $ 0xb1, %ymm0 , %ymm0
|
||||
vfmadd231pd %ymm0 ,%ymm1 , %ymm5
|
||||
vfmadd231pd %ymm0 ,%ymm2 , %ymm9
|
||||
vfmadd231pd %ymm0 ,%ymm3 , %ymm13
|
||||
vpermpd $0x1b, %ymm0 , %ymm0
|
||||
vpermpd $ 0x1b, %ymm0 , %ymm0
|
||||
vfmadd231pd %ymm0 ,%ymm1 , %ymm6
|
||||
vfmadd231pd %ymm0 ,%ymm2 , %ymm10
|
||||
|
||||
addq $8*SIZE, AO
|
||||
addq $ 8*SIZE, AO
|
||||
vfmadd231pd %ymm0 ,%ymm3 , %ymm14
|
||||
vpermpd $0xb1, %ymm0 , %ymm0
|
||||
vpermpd $ 0xb1, %ymm0 , %ymm0
|
||||
vfmadd231pd %ymm0 ,%ymm1 , %ymm7
|
||||
vfmadd231pd %ymm0 ,%ymm2 , %ymm11
|
||||
vfmadd231pd %ymm0 ,%ymm3 , %ymm15
|
||||
addq $12*SIZE, BO
|
||||
addq $ 12*SIZE, BO
|
||||
.endm
|
||||
|
||||
.macro KERNEL4x12_SUB
|
||||
@@ -272,17 +273,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
vfmadd231pd %ymm0 ,%ymm2 , %ymm8
|
||||
vmovups -4 * SIZE(BO), %ymm3
|
||||
vfmadd231pd %ymm0 ,%ymm3 , %ymm12
|
||||
vpermpd $0xb1, %ymm0 , %ymm0
|
||||
vpermpd $ 0xb1, %ymm0 , %ymm0
|
||||
vfmadd231pd %ymm0 ,%ymm1 , %ymm5
|
||||
vfmadd231pd %ymm0 ,%ymm2 , %ymm9
|
||||
addq $12*SIZE, BO
|
||||
addq $ 12*SIZE, BO
|
||||
vfmadd231pd %ymm0 ,%ymm3 , %ymm13
|
||||
vpermpd $0x1b, %ymm0 , %ymm0
|
||||
vpermpd $ 0x1b, %ymm0 , %ymm0
|
||||
vfmadd231pd %ymm0 ,%ymm1 , %ymm6
|
||||
vfmadd231pd %ymm0 ,%ymm2 , %ymm10
|
||||
addq $4*SIZE, AO
|
||||
addq $ 4*SIZE, AO
|
||||
vfmadd231pd %ymm0 ,%ymm3 , %ymm14
|
||||
vpermpd $0xb1, %ymm0 , %ymm0
|
||||
vpermpd $ 0xb1, %ymm0 , %ymm0
|
||||
vfmadd231pd %ymm0 ,%ymm1 , %ymm7
|
||||
vfmadd231pd %ymm0 ,%ymm2 , %ymm11
|
||||
vfmadd231pd %ymm0 ,%ymm3 , %ymm15
|
||||
@@ -309,23 +310,23 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
vmulpd %ymm0 , %ymm14, %ymm14
|
||||
vmulpd %ymm0 , %ymm15, %ymm15
|
||||
|
||||
vpermpd $0xb1 , %ymm5, %ymm5
|
||||
vpermpd $0xb1 , %ymm7, %ymm7
|
||||
vpermpd $ 0xb1 , %ymm5, %ymm5
|
||||
vpermpd $ 0xb1 , %ymm7, %ymm7
|
||||
|
||||
vblendpd $0x0a, %ymm5, %ymm4, %ymm0
|
||||
vblendpd $0x05, %ymm5, %ymm4, %ymm1
|
||||
vblendpd $0x0a, %ymm7, %ymm6, %ymm2
|
||||
vblendpd $0x05, %ymm7, %ymm6, %ymm3
|
||||
vblendpd $ 0x0a, %ymm5, %ymm4, %ymm0
|
||||
vblendpd $ 0x05, %ymm5, %ymm4, %ymm1
|
||||
vblendpd $ 0x0a, %ymm7, %ymm6, %ymm2
|
||||
vblendpd $ 0x05, %ymm7, %ymm6, %ymm3
|
||||
|
||||
vpermpd $0x1b , %ymm2, %ymm2
|
||||
vpermpd $0x1b , %ymm3, %ymm3
|
||||
vpermpd $0xb1 , %ymm2, %ymm2
|
||||
vpermpd $0xb1 , %ymm3, %ymm3
|
||||
vpermpd $ 0x1b , %ymm2, %ymm2
|
||||
vpermpd $ 0x1b , %ymm3, %ymm3
|
||||
vpermpd $ 0xb1 , %ymm2, %ymm2
|
||||
vpermpd $ 0xb1 , %ymm3, %ymm3
|
||||
|
||||
vblendpd $0x03, %ymm0, %ymm2 , %ymm4
|
||||
vblendpd $0x03, %ymm1, %ymm3 , %ymm5
|
||||
vblendpd $0x03, %ymm2, %ymm0 , %ymm6
|
||||
vblendpd $0x03, %ymm3, %ymm1 , %ymm7
|
||||
vblendpd $ 0x03, %ymm0, %ymm2 , %ymm4
|
||||
vblendpd $ 0x03, %ymm1, %ymm3 , %ymm5
|
||||
vblendpd $ 0x03, %ymm2, %ymm0 , %ymm6
|
||||
vblendpd $ 0x03, %ymm3, %ymm1 , %ymm7
|
||||
|
||||
leaq (CO1, LDC, 2), %rax
|
||||
|
||||
@@ -349,23 +350,23 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
prefetcht0 32(%rax)
|
||||
prefetcht0 32(%rax,LDC)
|
||||
|
||||
vpermpd $0xb1 , %ymm9 , %ymm9
|
||||
vpermpd $0xb1 , %ymm11, %ymm11
|
||||
vpermpd $ 0xb1 , %ymm9 , %ymm9
|
||||
vpermpd $ 0xb1 , %ymm11, %ymm11
|
||||
|
||||
vblendpd $0x0a, %ymm9 , %ymm8 , %ymm0
|
||||
vblendpd $0x05, %ymm9 , %ymm8 , %ymm1
|
||||
vblendpd $0x0a, %ymm11, %ymm10, %ymm2
|
||||
vblendpd $0x05, %ymm11, %ymm10, %ymm3
|
||||
vblendpd $ 0x0a, %ymm9 , %ymm8 , %ymm0
|
||||
vblendpd $ 0x05, %ymm9 , %ymm8 , %ymm1
|
||||
vblendpd $ 0x0a, %ymm11, %ymm10, %ymm2
|
||||
vblendpd $ 0x05, %ymm11, %ymm10, %ymm3
|
||||
|
||||
vpermpd $0x1b , %ymm2, %ymm2
|
||||
vpermpd $0x1b , %ymm3, %ymm3
|
||||
vpermpd $0xb1 , %ymm2, %ymm2
|
||||
vpermpd $0xb1 , %ymm3, %ymm3
|
||||
vpermpd $ 0x1b , %ymm2, %ymm2
|
||||
vpermpd $ 0x1b , %ymm3, %ymm3
|
||||
vpermpd $ 0xb1 , %ymm2, %ymm2
|
||||
vpermpd $ 0xb1 , %ymm3, %ymm3
|
||||
|
||||
vblendpd $0x03, %ymm0, %ymm2 , %ymm4
|
||||
vblendpd $0x03, %ymm1, %ymm3 , %ymm5
|
||||
vblendpd $0x03, %ymm2, %ymm0 , %ymm6
|
||||
vblendpd $0x03, %ymm3, %ymm1 , %ymm7
|
||||
vblendpd $ 0x03, %ymm0, %ymm2 , %ymm4
|
||||
vblendpd $ 0x03, %ymm1, %ymm3 , %ymm5
|
||||
vblendpd $ 0x03, %ymm2, %ymm0 , %ymm6
|
||||
vblendpd $ 0x03, %ymm3, %ymm1 , %ymm7
|
||||
|
||||
|
||||
leaq (%rax, LDC, 2), %rax
|
||||
@@ -390,23 +391,23 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
prefetcht0 32(%rbp)
|
||||
prefetcht0 32(%rbp,LDC)
|
||||
|
||||
vpermpd $0xb1 , %ymm13, %ymm13
|
||||
vpermpd $0xb1 , %ymm15, %ymm15
|
||||
vpermpd $ 0xb1 , %ymm13, %ymm13
|
||||
vpermpd $ 0xb1 , %ymm15, %ymm15
|
||||
|
||||
vblendpd $0x0a, %ymm13, %ymm12, %ymm0
|
||||
vblendpd $0x05, %ymm13, %ymm12, %ymm1
|
||||
vblendpd $0x0a, %ymm15, %ymm14, %ymm2
|
||||
vblendpd $0x05, %ymm15, %ymm14, %ymm3
|
||||
vblendpd $ 0x0a, %ymm13, %ymm12, %ymm0
|
||||
vblendpd $ 0x05, %ymm13, %ymm12, %ymm1
|
||||
vblendpd $ 0x0a, %ymm15, %ymm14, %ymm2
|
||||
vblendpd $ 0x05, %ymm15, %ymm14, %ymm3
|
||||
|
||||
vpermpd $0x1b , %ymm2, %ymm2
|
||||
vpermpd $0x1b , %ymm3, %ymm3
|
||||
vpermpd $0xb1 , %ymm2, %ymm2
|
||||
vpermpd $0xb1 , %ymm3, %ymm3
|
||||
vpermpd $ 0x1b , %ymm2, %ymm2
|
||||
vpermpd $ 0x1b , %ymm3, %ymm3
|
||||
vpermpd $ 0xb1 , %ymm2, %ymm2
|
||||
vpermpd $ 0xb1 , %ymm3, %ymm3
|
||||
|
||||
vblendpd $0x03, %ymm0, %ymm2 , %ymm4
|
||||
vblendpd $0x03, %ymm1, %ymm3 , %ymm5
|
||||
vblendpd $0x03, %ymm2, %ymm0 , %ymm6
|
||||
vblendpd $0x03, %ymm3, %ymm1 , %ymm7
|
||||
vblendpd $ 0x03, %ymm0, %ymm2 , %ymm4
|
||||
vblendpd $ 0x03, %ymm1, %ymm3 , %ymm5
|
||||
vblendpd $ 0x03, %ymm2, %ymm0 , %ymm6
|
||||
vblendpd $ 0x03, %ymm3, %ymm1 , %ymm7
|
||||
|
||||
|
||||
leaq (%rax, LDC, 4), %rax
|
||||
@@ -431,7 +432,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
prefetcht0 32(%rbp)
|
||||
prefetcht0 32(%rbp,LDC)
|
||||
|
||||
addq $4*SIZE, CO1
|
||||
addq $ 4*SIZE, CO1
|
||||
.endm
|
||||
|
||||
/******************************************************************************************/
|
||||
@@ -477,9 +478,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
vfmadd231pd %xmm0 ,%xmm3 , %xmm12
|
||||
vmovddup -1 * SIZE(BO), %xmm3
|
||||
vfmadd231pd %xmm0 ,%xmm1 , %xmm13
|
||||
addq $12*SIZE, BO
|
||||
addq $ 12*SIZE, BO
|
||||
vfmadd231pd %xmm0 ,%xmm2 , %xmm14
|
||||
addq $2*SIZE, AO
|
||||
addq $ 2*SIZE, AO
|
||||
vfmadd231pd %xmm0 ,%xmm3 , %xmm15
|
||||
|
||||
.endm
|
||||
@@ -557,7 +558,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
vmovups %xmm6 , (%rbp)
|
||||
vmovups %xmm7 , (%rbp, LDC)
|
||||
|
||||
addq $2*SIZE, CO1
|
||||
addq $ 2*SIZE, CO1
|
||||
.endm
|
||||
|
||||
|
||||
@@ -604,9 +605,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
vfmadd231sd %xmm0 ,%xmm3 , %xmm12
|
||||
vmovsd -1 * SIZE(BO), %xmm3
|
||||
vfmadd231sd %xmm0 ,%xmm1 , %xmm13
|
||||
addq $12*SIZE, BO
|
||||
addq $ 12*SIZE, BO
|
||||
vfmadd231sd %xmm0 ,%xmm2 , %xmm14
|
||||
addq $1*SIZE, AO
|
||||
addq $ 1*SIZE, AO
|
||||
vfmadd231sd %xmm0 ,%xmm3 , %xmm15
|
||||
|
||||
.endm
|
||||
@@ -684,7 +685,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
vmovsd %xmm6 , (%rbp)
|
||||
vmovsd %xmm7 , (%rbp, LDC)
|
||||
|
||||
addq $1*SIZE, CO1
|
||||
addq $ 1*SIZE, CO1
|
||||
.endm
|
||||
|
||||
|
||||
@@ -707,13 +708,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
vmovups -12 * SIZE(BO), %ymm1
|
||||
vmovups -16 * SIZE(AO), %ymm0
|
||||
vmulpd %ymm0 ,%ymm1 , %ymm4
|
||||
vpermpd $0xb1, %ymm0 , %ymm0
|
||||
vpermpd $ 0xb1, %ymm0 , %ymm0
|
||||
vmulpd %ymm0 ,%ymm1 , %ymm5
|
||||
vpermpd $0x1b, %ymm0 , %ymm0
|
||||
vpermpd $ 0x1b, %ymm0 , %ymm0
|
||||
vmulpd %ymm0 ,%ymm1 , %ymm6
|
||||
|
||||
addq $4*SIZE, BO
|
||||
vpermpd $0xb1, %ymm0 , %ymm0
|
||||
addq $ 4*SIZE, BO
|
||||
vpermpd $ 0xb1, %ymm0 , %ymm0
|
||||
vmulpd %ymm0 ,%ymm1 , %ymm7
|
||||
vmovups -12 * SIZE(BO), %ymm1
|
||||
|
||||
@@ -723,12 +724,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
prefetcht0 A_PR1(AO)
|
||||
vmovups -16 * SIZE(AO), %ymm0
|
||||
vfmadd231pd %ymm0 ,%ymm1 , %ymm4
|
||||
vpermpd $0xb1, %ymm0 , %ymm0
|
||||
vpermpd $ 0xb1, %ymm0 , %ymm0
|
||||
vfmadd231pd %ymm0 ,%ymm1 , %ymm5
|
||||
vpermpd $0x1b, %ymm0 , %ymm0
|
||||
vpermpd $ 0x1b, %ymm0 , %ymm0
|
||||
vfmadd231pd %ymm0 ,%ymm1 , %ymm6
|
||||
|
||||
vpermpd $0xb1, %ymm0 , %ymm0
|
||||
vpermpd $ 0xb1, %ymm0 , %ymm0
|
||||
vfmadd231pd %ymm0 ,%ymm1 , %ymm7
|
||||
vmovups -12 * SIZE(BO), %ymm1
|
||||
|
||||
@@ -737,44 +738,44 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
.macro KERNEL4x4_M2
|
||||
vmovups -12 * SIZE(AO), %ymm0
|
||||
vfmadd231pd %ymm0 ,%ymm1 , %ymm4
|
||||
vpermpd $0xb1, %ymm0 , %ymm0
|
||||
vpermpd $ 0xb1, %ymm0 , %ymm0
|
||||
vfmadd231pd %ymm0 ,%ymm1 , %ymm5
|
||||
vpermpd $0x1b, %ymm0 , %ymm0
|
||||
vpermpd $ 0x1b, %ymm0 , %ymm0
|
||||
vfmadd231pd %ymm0 ,%ymm1 , %ymm6
|
||||
|
||||
addq $8*SIZE, AO
|
||||
vpermpd $0xb1, %ymm0 , %ymm0
|
||||
addq $ 8*SIZE, AO
|
||||
vpermpd $ 0xb1, %ymm0 , %ymm0
|
||||
vfmadd231pd %ymm0 ,%ymm1 , %ymm7
|
||||
vmovups -8 * SIZE(BO), %ymm1
|
||||
addq $8*SIZE, BO
|
||||
addq $ 8*SIZE, BO
|
||||
.endm
|
||||
|
||||
|
||||
.macro KERNEL4x4_E
|
||||
vmovups -12 * SIZE(AO), %ymm0
|
||||
vfmadd231pd %ymm0 ,%ymm1 , %ymm4
|
||||
vpermpd $0xb1, %ymm0 , %ymm0
|
||||
vpermpd $ 0xb1, %ymm0 , %ymm0
|
||||
vfmadd231pd %ymm0 ,%ymm1 , %ymm5
|
||||
vpermpd $0x1b, %ymm0 , %ymm0
|
||||
vpermpd $ 0x1b, %ymm0 , %ymm0
|
||||
vfmadd231pd %ymm0 ,%ymm1 , %ymm6
|
||||
|
||||
addq $8*SIZE, AO
|
||||
vpermpd $0xb1, %ymm0 , %ymm0
|
||||
addq $ 8*SIZE, AO
|
||||
vpermpd $ 0xb1, %ymm0 , %ymm0
|
||||
vfmadd231pd %ymm0 ,%ymm1 , %ymm7
|
||||
addq $4*SIZE, BO
|
||||
addq $ 4*SIZE, BO
|
||||
.endm
|
||||
|
||||
.macro KERNEL4x4_SUB
|
||||
vmovups -12 * SIZE(BO), %ymm1
|
||||
vmovups -16 * SIZE(AO), %ymm0
|
||||
vfmadd231pd %ymm0 ,%ymm1 , %ymm4
|
||||
vpermpd $0xb1, %ymm0 , %ymm0
|
||||
vpermpd $ 0xb1, %ymm0 , %ymm0
|
||||
vfmadd231pd %ymm0 ,%ymm1 , %ymm5
|
||||
addq $4*SIZE, BO
|
||||
vpermpd $0x1b, %ymm0 , %ymm0
|
||||
addq $ 4*SIZE, BO
|
||||
vpermpd $ 0x1b, %ymm0 , %ymm0
|
||||
vfmadd231pd %ymm0 ,%ymm1 , %ymm6
|
||||
addq $4*SIZE, AO
|
||||
vpermpd $0xb1, %ymm0 , %ymm0
|
||||
addq $ 4*SIZE, AO
|
||||
vpermpd $ 0xb1, %ymm0 , %ymm0
|
||||
vfmadd231pd %ymm0 ,%ymm1 , %ymm7
|
||||
|
||||
.endm
|
||||
@@ -788,23 +789,23 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
vmulpd %ymm0 , %ymm5 , %ymm5
|
||||
vmulpd %ymm0 , %ymm6 , %ymm6
|
||||
|
||||
vpermpd $0xb1 , %ymm5, %ymm5
|
||||
vpermpd $0xb1 , %ymm7, %ymm7
|
||||
vpermpd $ 0xb1 , %ymm5, %ymm5
|
||||
vpermpd $ 0xb1 , %ymm7, %ymm7
|
||||
|
||||
vblendpd $0x0a, %ymm5, %ymm4, %ymm0
|
||||
vblendpd $0x05, %ymm5, %ymm4, %ymm1
|
||||
vblendpd $0x0a, %ymm7, %ymm6, %ymm2
|
||||
vblendpd $0x05, %ymm7, %ymm6, %ymm3
|
||||
vblendpd $ 0x0a, %ymm5, %ymm4, %ymm0
|
||||
vblendpd $ 0x05, %ymm5, %ymm4, %ymm1
|
||||
vblendpd $ 0x0a, %ymm7, %ymm6, %ymm2
|
||||
vblendpd $ 0x05, %ymm7, %ymm6, %ymm3
|
||||
|
||||
vpermpd $0x1b , %ymm2, %ymm2
|
||||
vpermpd $0x1b , %ymm3, %ymm3
|
||||
vpermpd $0xb1 , %ymm2, %ymm2
|
||||
vpermpd $0xb1 , %ymm3, %ymm3
|
||||
vpermpd $ 0x1b , %ymm2, %ymm2
|
||||
vpermpd $ 0x1b , %ymm3, %ymm3
|
||||
vpermpd $ 0xb1 , %ymm2, %ymm2
|
||||
vpermpd $ 0xb1 , %ymm3, %ymm3
|
||||
|
||||
vblendpd $0x03, %ymm0, %ymm2 , %ymm4
|
||||
vblendpd $0x03, %ymm1, %ymm3 , %ymm5
|
||||
vblendpd $0x03, %ymm2, %ymm0 , %ymm6
|
||||
vblendpd $0x03, %ymm3, %ymm1 , %ymm7
|
||||
vblendpd $ 0x03, %ymm0, %ymm2 , %ymm4
|
||||
vblendpd $ 0x03, %ymm1, %ymm3 , %ymm5
|
||||
vblendpd $ 0x03, %ymm2, %ymm0 , %ymm6
|
||||
vblendpd $ 0x03, %ymm3, %ymm1 , %ymm7
|
||||
|
||||
leaq (CO1, LDC, 2), %rax
|
||||
|
||||
@@ -823,7 +824,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
vmovups %ymm6 , (%rax)
|
||||
vmovups %ymm7 , (%rax, LDC)
|
||||
|
||||
addq $4*SIZE, CO1
|
||||
addq $ 4*SIZE, CO1
|
||||
.endm
|
||||
|
||||
/******************************************************************************************/
|
||||
@@ -848,9 +849,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
vfmadd231pd %xmm0 ,%xmm2 , %xmm5
|
||||
vmovddup -9 * SIZE(BO), %xmm8
|
||||
vfmadd231pd %xmm0 ,%xmm3 , %xmm6
|
||||
addq $4*SIZE, BO
|
||||
addq $ 4*SIZE, BO
|
||||
vfmadd231pd %xmm0 ,%xmm8 , %xmm7
|
||||
addq $2*SIZE, AO
|
||||
addq $ 2*SIZE, AO
|
||||
|
||||
.endm
|
||||
|
||||
@@ -880,7 +881,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
vmovups %xmm6 , (%rax)
|
||||
vmovups %xmm7 , (%rax, LDC)
|
||||
|
||||
addq $2*SIZE, CO1
|
||||
addq $ 2*SIZE, CO1
|
||||
.endm
|
||||
|
||||
/******************************************************************************************/
|
||||
@@ -905,9 +906,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
vfmadd231sd %xmm0 ,%xmm2 , %xmm5
|
||||
vmovsd -9 * SIZE(BO), %xmm8
|
||||
vfmadd231sd %xmm0 ,%xmm3 , %xmm6
|
||||
addq $4*SIZE, BO
|
||||
addq $ 4*SIZE, BO
|
||||
vfmadd231sd %xmm0 ,%xmm8 , %xmm7
|
||||
addq $1*SIZE, AO
|
||||
addq $ 1*SIZE, AO
|
||||
|
||||
.endm
|
||||
|
||||
@@ -937,7 +938,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
vmovsd %xmm6 , (%rax)
|
||||
vmovsd %xmm7 , (%rax, LDC)
|
||||
|
||||
addq $1*SIZE, CO1
|
||||
addq $ 1*SIZE, CO1
|
||||
.endm
|
||||
|
||||
|
||||
@@ -963,8 +964,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
vfmadd231pd %xmm1 ,%xmm2 , %xmm5
|
||||
vfmadd231pd %xmm0 ,%xmm3 , %xmm6
|
||||
vfmadd231pd %xmm1 ,%xmm3 , %xmm7
|
||||
addq $2*SIZE, BO
|
||||
addq $4*SIZE, AO
|
||||
addq $ 2*SIZE, BO
|
||||
addq $ 4*SIZE, AO
|
||||
|
||||
.endm
|
||||
|
||||
@@ -993,7 +994,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
vmovups %xmm6 , (CO1, LDC)
|
||||
vmovups %xmm7 , 2 * SIZE(CO1, LDC)
|
||||
|
||||
addq $4*SIZE, CO1
|
||||
addq $ 4*SIZE, CO1
|
||||
.endm
|
||||
|
||||
|
||||
@@ -1014,8 +1015,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
vmovddup -11 * SIZE(BO), %xmm3
|
||||
vfmadd231pd %xmm0 ,%xmm2 , %xmm4
|
||||
vfmadd231pd %xmm0 ,%xmm3 , %xmm6
|
||||
addq $2*SIZE, BO
|
||||
addq $2*SIZE, AO
|
||||
addq $ 2*SIZE, BO
|
||||
addq $ 2*SIZE, AO
|
||||
|
||||
.endm
|
||||
|
||||
@@ -1038,7 +1039,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
vmovups %xmm4 , (CO1)
|
||||
vmovups %xmm6 , (CO1, LDC)
|
||||
|
||||
addq $2*SIZE, CO1
|
||||
addq $ 2*SIZE, CO1
|
||||
.endm
|
||||
|
||||
/******************************************************************************************/
|
||||
@@ -1058,8 +1059,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
vmovsd -11 * SIZE(BO), %xmm2
|
||||
vfmadd231sd %xmm0 ,%xmm1 , %xmm4
|
||||
vfmadd231sd %xmm0 ,%xmm2 , %xmm5
|
||||
addq $2*SIZE, BO
|
||||
addq $1*SIZE, AO
|
||||
addq $ 2*SIZE, BO
|
||||
addq $ 1*SIZE, AO
|
||||
|
||||
.endm
|
||||
|
||||
@@ -1082,7 +1083,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
vmovsd %xmm4 , (CO1)
|
||||
vmovsd %xmm5 , (CO1, LDC)
|
||||
|
||||
addq $1*SIZE, CO1
|
||||
addq $ 1*SIZE, CO1
|
||||
.endm
|
||||
|
||||
|
||||
@@ -1103,8 +1104,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
vmovups -14 * SIZE(AO), %xmm1
|
||||
vfmadd231pd %xmm0 ,%xmm2 , %xmm4
|
||||
vfmadd231pd %xmm1 ,%xmm2 , %xmm5
|
||||
addq $1*SIZE, BO
|
||||
addq $4*SIZE, AO
|
||||
addq $ 1*SIZE, BO
|
||||
addq $ 4*SIZE, AO
|
||||
|
||||
.endm
|
||||
|
||||
@@ -1127,7 +1128,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
vmovups %xmm4 , (CO1)
|
||||
vmovups %xmm5 , 2 * SIZE(CO1)
|
||||
|
||||
addq $4*SIZE, CO1
|
||||
addq $ 4*SIZE, CO1
|
||||
.endm
|
||||
|
||||
|
||||
@@ -1145,8 +1146,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
vmovddup -12 * SIZE(BO), %xmm2
|
||||
vmovups -16 * SIZE(AO), %xmm0
|
||||
vfmadd231pd %xmm0 ,%xmm2 , %xmm4
|
||||
addq $1*SIZE, BO
|
||||
addq $2*SIZE, AO
|
||||
addq $ 1*SIZE, BO
|
||||
addq $ 2*SIZE, AO
|
||||
|
||||
.endm
|
||||
|
||||
@@ -1166,7 +1167,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
vmovups %xmm4 , (CO1)
|
||||
|
||||
addq $2*SIZE, CO1
|
||||
addq $ 2*SIZE, CO1
|
||||
.endm
|
||||
|
||||
|
||||
@@ -1184,8 +1185,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
vmovsd -12 * SIZE(BO), %xmm1
|
||||
vmovsd -16 * SIZE(AO), %xmm0
|
||||
vfmadd231sd %xmm0 ,%xmm1 , %xmm4
|
||||
addq $1*SIZE, BO
|
||||
addq $1*SIZE, AO
|
||||
addq $ 1*SIZE, BO
|
||||
addq $ 1*SIZE, AO
|
||||
|
||||
.endm
|
||||
|
||||
@@ -1205,7 +1206,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
vmovsd %xmm4 , (CO1)
|
||||
|
||||
addq $1*SIZE, CO1
|
||||
addq $ 1*SIZE, CO1
|
||||
.endm
|
||||
|
||||
|
||||
@@ -1262,13 +1263,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
STACK_TOUCH
|
||||
|
||||
cmpq $0, OLD_M
|
||||
cmpq $ 0, OLD_M
|
||||
je .L999
|
||||
|
||||
cmpq $0, OLD_N
|
||||
cmpq $ 0, OLD_N
|
||||
je .L999
|
||||
|
||||
cmpq $0, OLD_K
|
||||
cmpq $ 0, OLD_K
|
||||
je .L999
|
||||
|
||||
movq OLD_M, M
|
||||
@@ -1288,7 +1289,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
|
||||
movq Ndiv12, J
|
||||
cmpq $0, J
|
||||
cmpq $ 0, J
|
||||
je .L4_0
|
||||
ALIGN_4
|
||||
|
||||
@@ -1330,10 +1331,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
vmovups %ymm6, 16 * SIZE(BO)
|
||||
vmovups %ymm7, 20 * SIZE(BO)
|
||||
|
||||
addq $8 * SIZE ,BO1
|
||||
addq $8 * SIZE ,BO2
|
||||
addq $8 * SIZE ,BO3
|
||||
addq $24 *SIZE ,BO
|
||||
addq $ 8 * SIZE ,BO1
|
||||
addq $ 8 * SIZE ,BO2
|
||||
addq $ 8 * SIZE ,BO3
|
||||
addq $ 24 *SIZE ,BO
|
||||
|
||||
decq %rax
|
||||
jnz .L12_01a_1
|
||||
@@ -1356,10 +1357,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
vmovups %ymm1, 0 * SIZE(BO)
|
||||
vmovups %ymm2, 4 * SIZE(BO)
|
||||
vmovups %ymm3, 8 * SIZE(BO)
|
||||
addq $4*SIZE,BO1
|
||||
addq $4*SIZE,BO2
|
||||
addq $4*SIZE,BO3
|
||||
addq $12*SIZE,BO
|
||||
addq $ 4*SIZE,BO1
|
||||
addq $ 4*SIZE,BO2
|
||||
addq $ 4*SIZE,BO3
|
||||
addq $ 12*SIZE,BO
|
||||
decq %rax
|
||||
jnz .L12_02b
|
||||
|
||||
@@ -1407,8 +1408,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
subq $2, %rax
|
||||
je .L12_12a
|
||||
|
||||
.align 32
|
||||
|
||||
ALIGN_5
|
||||
.L12_12:
|
||||
|
||||
KERNEL4x12_M1
|
||||
@@ -1621,7 +1621,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
.L4_0:
|
||||
|
||||
cmpq $0, Nmod12 // N % 12 == 0
|
||||
cmpq $ 0, Nmod12 // N % 12 == 0
|
||||
je .L999
|
||||
|
||||
movq Nmod12, J
|
||||
@@ -1666,7 +1666,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
subq $2, %rax
|
||||
je .L4_12a
|
||||
|
||||
.align 32
|
||||
ALIGN_5
|
||||
|
||||
.L4_12:
|
||||
|
||||
@@ -1912,7 +1912,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
je .L2_16
|
||||
|
||||
.align 32
|
||||
ALIGN_5
|
||||
|
||||
.L2_12:
|
||||
|
||||
@@ -2108,7 +2108,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
sarq $3, %rax // K / 8
|
||||
je .L1_16
|
||||
|
||||
.align 32
|
||||
ALIGN_5
|
||||
|
||||
.L1_12:
|
||||
|
||||
@@ -2362,13 +2362,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
STACK_TOUCH
|
||||
|
||||
cmpq $0, OLD_M
|
||||
cmpq $ 0, OLD_M
|
||||
je .L999
|
||||
|
||||
cmpq $0, OLD_N
|
||||
cmpq $ 0, OLD_N
|
||||
je .L999
|
||||
|
||||
cmpq $0, OLD_K
|
||||
cmpq $ 0, OLD_K
|
||||
je .L999
|
||||
|
||||
movq OLD_M, M
|
||||
@@ -2397,7 +2397,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
|
||||
movq Ndiv12, J
|
||||
cmpq $0, J
|
||||
cmpq $ 0, J
|
||||
je .L2_0
|
||||
ALIGN_4
|
||||
|
||||
@@ -2471,7 +2471,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
subq $2, %rax
|
||||
je .L4_12a
|
||||
|
||||
.align 32
|
||||
ALIGN_5
|
||||
|
||||
.L4_12:
|
||||
|
||||
@@ -2848,7 +2848,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
je .L2_16
|
||||
|
||||
.align 32
|
||||
ALIGN_5
|
||||
|
||||
.L2_12:
|
||||
|
||||
@@ -3176,7 +3176,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
sarq $3, %rax // K / 8
|
||||
je .L1_16
|
||||
|
||||
.align 32
|
||||
ALIGN_5
|
||||
|
||||
.L1_12:
|
||||
|
||||
|
||||
@@ -196,7 +196,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
vmovups -12 * SIZE(AO), %xmm0
|
||||
vmulpd %xmm1,%xmm0,%xmm10
|
||||
vmulpd %xmm2,%xmm0,%xmm11
|
||||
addq $3*SIZE, BO
|
||||
addq $ 3 * SIZE, BO
|
||||
vmulpd %xmm3,%xmm0,%xmm12
|
||||
vmovups -10 * SIZE(AO), %xmm0
|
||||
vmulpd %xmm1,%xmm0,%xmm13
|
||||
@@ -294,7 +294,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
vmovups 14 * SIZE(AO), %xmm0
|
||||
VFMADD231PD_( %xmm1,%xmm0,%xmm13 )
|
||||
vmovddup -3 * SIZE(BO), %xmm1
|
||||
addq $32 * SIZE, AO
|
||||
addq $ 32 * SIZE, AO
|
||||
VFMADD231PD_( %xmm2,%xmm0,%xmm14 )
|
||||
vmovddup -2 * SIZE(BO), %xmm2
|
||||
VFMADD231PD_( %xmm3,%xmm0,%xmm15 )
|
||||
@@ -392,8 +392,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
vmovddup 10 * SIZE(BO), %xmm2
|
||||
VFMADD231PD_( %xmm3,%xmm0,%xmm15 )
|
||||
vmovddup 11 * SIZE(BO), %xmm3
|
||||
addq $32 * SIZE, AO
|
||||
addq $24 * SIZE, BO
|
||||
addq $ 32 * SIZE, AO
|
||||
addq $ 24 * SIZE, BO
|
||||
.endm
|
||||
|
||||
|
||||
@@ -414,9 +414,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
VFMADD231PD_( %xmm3,%xmm0,%xmm12 )
|
||||
vmovups 14 * SIZE(AO), %xmm0
|
||||
VFMADD231PD_( %xmm1,%xmm0,%xmm13 )
|
||||
addq $32*SIZE, AO
|
||||
addq $ 32 * SIZE, AO
|
||||
VFMADD231PD_( %xmm2,%xmm0,%xmm14 )
|
||||
addq $21*SIZE, BO
|
||||
addq $ 21 * SIZE, BO
|
||||
VFMADD231PD_( %xmm3,%xmm0,%xmm15 )
|
||||
.endm
|
||||
|
||||
@@ -438,9 +438,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
VFMADD231PD_( %xmm3,%xmm0,%xmm12 )
|
||||
vmovups -10 * SIZE(AO), %xmm0
|
||||
VFMADD231PD_( %xmm1,%xmm0,%xmm13 )
|
||||
addq $3*SIZE, BO
|
||||
addq $ 3 * SIZE, BO
|
||||
VFMADD231PD_( %xmm2,%xmm0,%xmm14 )
|
||||
addq $8*SIZE, AO
|
||||
addq $ 8 * SIZE, AO
|
||||
VFMADD231PD_( %xmm3,%xmm0,%xmm15 )
|
||||
.endm
|
||||
|
||||
@@ -483,7 +483,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
prefetcht0 C_PR1(CO1,LDC)
|
||||
prefetcht0 C_PR1(CO1,LDC,2)
|
||||
|
||||
addq $8 * SIZE, CO1 # coffset += 8
|
||||
addq $ 8 * SIZE, CO1 # coffset += 8
|
||||
.endm
|
||||
|
||||
|
||||
@@ -1165,9 +1165,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
vmovsd %xmm5, 8*SIZE(BO)
|
||||
vmovups %xmm6, 9*SIZE(BO)
|
||||
vmovsd %xmm7,11*SIZE(BO)
|
||||
addq $8*SIZE,BO1
|
||||
addq $8*SIZE,BO2
|
||||
addq $12*SIZE,BO
|
||||
addq $ 8*SIZE,BO1
|
||||
addq $ 8*SIZE,BO2
|
||||
addq $ 12*SIZE,BO
|
||||
decq %rax
|
||||
jnz .L6_02
|
||||
|
||||
@@ -1184,9 +1184,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
vmovsd (BO2), %xmm1
|
||||
vmovups %xmm0, (BO)
|
||||
vmovsd %xmm1, 2*SIZE(BO)
|
||||
addq $2*SIZE,BO1
|
||||
addq $2*SIZE,BO2
|
||||
addq $3*SIZE,BO
|
||||
addq $ 2*SIZE,BO1
|
||||
addq $ 2*SIZE,BO2
|
||||
addq $ 3*SIZE,BO
|
||||
decq %rax
|
||||
jnz .L6_02b
|
||||
|
||||
@@ -1223,9 +1223,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
vmovups %xmm4, 7*SIZE(BO)
|
||||
vmovsd %xmm7, 9*SIZE(BO)
|
||||
vmovups %xmm6,10*SIZE(BO)
|
||||
addq $8*SIZE,BO1
|
||||
addq $8*SIZE,BO2
|
||||
addq $12*SIZE,BO
|
||||
addq $ 8*SIZE,BO1
|
||||
addq $ 8*SIZE,BO2
|
||||
addq $ 12*SIZE,BO
|
||||
decq %rax
|
||||
jnz .L6_03
|
||||
|
||||
@@ -1243,9 +1243,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
vmovups (BO2), %xmm1
|
||||
vmovsd %xmm0, (BO)
|
||||
vmovups %xmm1, 1*SIZE(BO)
|
||||
addq $2*SIZE,BO1
|
||||
addq $2*SIZE,BO2
|
||||
addq $3*SIZE,BO
|
||||
addq $ 2*SIZE,BO1
|
||||
addq $ 2*SIZE,BO2
|
||||
addq $ 3*SIZE,BO
|
||||
decq %rax
|
||||
jnz .L6_03b
|
||||
|
||||
|
||||
@@ -111,6 +111,9 @@
|
||||
#define MM M
|
||||
#endif
|
||||
|
||||
#define TMP_M %r15
|
||||
#define Y2 %rbx
|
||||
|
||||
PROLOGUE
|
||||
PROFCODE
|
||||
|
||||
@@ -170,8 +173,9 @@
|
||||
jge .L00t
|
||||
|
||||
movq MMM,M
|
||||
addq I,M
|
||||
addq M, I
|
||||
jle .L999x
|
||||
movq I, M
|
||||
|
||||
.L00t:
|
||||
movq XX,X
|
||||
@@ -2463,21 +2467,23 @@
|
||||
cmpq Y, BUFFER
|
||||
je .L999
|
||||
#endif
|
||||
|
||||
movq M, TMP_M
|
||||
movq Y, Y1
|
||||
|
||||
cmpq $SIZE, INCY
|
||||
jne .L950
|
||||
|
||||
testq $SIZE, Y
|
||||
testq $SIZE, Y1
|
||||
je .L910
|
||||
|
||||
movsd (Y), %xmm0
|
||||
movsd (Y1), %xmm0
|
||||
addsd (BUFFER), %xmm0
|
||||
movsd %xmm0, (Y)
|
||||
movsd %xmm0, (Y1)
|
||||
|
||||
addq $SIZE, Y
|
||||
addq $SIZE, Y1
|
||||
addq $SIZE, BUFFER
|
||||
|
||||
decq M
|
||||
decq TMP_M
|
||||
jle .L999
|
||||
ALIGN_4
|
||||
|
||||
@@ -2485,20 +2491,20 @@
|
||||
testq $SIZE, BUFFER
|
||||
jne .L920
|
||||
|
||||
movq M, %rax
|
||||
movq TMP_M, %rax
|
||||
sarq $3, %rax
|
||||
jle .L914
|
||||
ALIGN_3
|
||||
|
||||
.L912:
|
||||
#ifdef PREFETCHW
|
||||
PREFETCHW (PREFETCHSIZE) * 4 + PREOFFSET(Y)
|
||||
PREFETCHW (PREFETCHSIZE) * 4 + PREOFFSET(Y1)
|
||||
#endif
|
||||
|
||||
movapd 0 * SIZE(Y), %xmm0
|
||||
movapd 2 * SIZE(Y), %xmm1
|
||||
movapd 4 * SIZE(Y), %xmm2
|
||||
movapd 6 * SIZE(Y), %xmm3
|
||||
movapd 0 * SIZE(Y1), %xmm0
|
||||
movapd 2 * SIZE(Y1), %xmm1
|
||||
movapd 4 * SIZE(Y1), %xmm2
|
||||
movapd 6 * SIZE(Y1), %xmm3
|
||||
|
||||
movapd 0 * SIZE(BUFFER), %xmm4
|
||||
movapd 2 * SIZE(BUFFER), %xmm5
|
||||
@@ -2514,12 +2520,12 @@
|
||||
addpd %xmm6, %xmm2
|
||||
addpd %xmm7, %xmm3
|
||||
|
||||
movapd %xmm0, 0 * SIZE(Y)
|
||||
movapd %xmm1, 2 * SIZE(Y)
|
||||
movapd %xmm2, 4 * SIZE(Y)
|
||||
movapd %xmm3, 6 * SIZE(Y)
|
||||
movapd %xmm0, 0 * SIZE(Y1)
|
||||
movapd %xmm1, 2 * SIZE(Y1)
|
||||
movapd %xmm2, 4 * SIZE(Y1)
|
||||
movapd %xmm3, 6 * SIZE(Y1)
|
||||
|
||||
addq $8 * SIZE, Y
|
||||
addq $8 * SIZE, Y1
|
||||
addq $8 * SIZE, BUFFER
|
||||
|
||||
decq %rax
|
||||
@@ -2527,14 +2533,14 @@
|
||||
ALIGN_3
|
||||
|
||||
.L914:
|
||||
testq $7, M
|
||||
testq $7, TMP_M
|
||||
jle .L999
|
||||
|
||||
testq $4, M
|
||||
testq $4, TMP_M
|
||||
jle .L915
|
||||
|
||||
movapd 0 * SIZE(Y), %xmm0
|
||||
movapd 2 * SIZE(Y), %xmm1
|
||||
movapd 0 * SIZE(Y1), %xmm0
|
||||
movapd 2 * SIZE(Y1), %xmm1
|
||||
|
||||
movapd 0 * SIZE(BUFFER), %xmm4
|
||||
movapd 2 * SIZE(BUFFER), %xmm5
|
||||
@@ -2542,40 +2548,40 @@
|
||||
addpd %xmm4, %xmm0
|
||||
addpd %xmm5, %xmm1
|
||||
|
||||
movapd %xmm0, 0 * SIZE(Y)
|
||||
movapd %xmm1, 2 * SIZE(Y)
|
||||
movapd %xmm0, 0 * SIZE(Y1)
|
||||
movapd %xmm1, 2 * SIZE(Y1)
|
||||
|
||||
addq $4 * SIZE, Y
|
||||
addq $4 * SIZE, Y1
|
||||
addq $4 * SIZE, BUFFER
|
||||
ALIGN_3
|
||||
|
||||
.L915:
|
||||
testq $2, M
|
||||
testq $2, TMP_M
|
||||
jle .L916
|
||||
|
||||
movapd (Y), %xmm0
|
||||
movapd (Y1), %xmm0
|
||||
|
||||
movapd (BUFFER), %xmm4
|
||||
|
||||
addpd %xmm4, %xmm0
|
||||
|
||||
movapd %xmm0, (Y)
|
||||
movapd %xmm0, (Y1)
|
||||
|
||||
addq $2 * SIZE, Y
|
||||
addq $2 * SIZE, Y1
|
||||
addq $2 * SIZE, BUFFER
|
||||
ALIGN_3
|
||||
|
||||
.L916:
|
||||
testq $1, M
|
||||
testq $1, TMP_M
|
||||
jle .L999
|
||||
|
||||
movsd (Y), %xmm0
|
||||
movsd (Y1), %xmm0
|
||||
|
||||
movsd 0 * SIZE(BUFFER), %xmm4
|
||||
|
||||
addsd %xmm4, %xmm0
|
||||
|
||||
movlpd %xmm0, (Y)
|
||||
movlpd %xmm0, (Y1)
|
||||
ALIGN_3
|
||||
|
||||
jmp .L999
|
||||
@@ -2584,20 +2590,20 @@
|
||||
.L920:
|
||||
movapd -1 * SIZE(BUFFER), %xmm4
|
||||
|
||||
movq M, %rax
|
||||
movq TMP_M, %rax
|
||||
sarq $3, %rax
|
||||
jle .L924
|
||||
ALIGN_3
|
||||
|
||||
.L922:
|
||||
#ifdef PREFETCHW
|
||||
PREFETCHW (PREFETCHSIZE) * 4 + PREOFFSET(Y)
|
||||
PREFETCHW (PREFETCHSIZE) * 4 + PREOFFSET(Y1)
|
||||
#endif
|
||||
|
||||
movapd 0 * SIZE(Y), %xmm0
|
||||
movapd 2 * SIZE(Y), %xmm1
|
||||
movapd 4 * SIZE(Y), %xmm2
|
||||
movapd 6 * SIZE(Y), %xmm3
|
||||
movapd 0 * SIZE(Y1), %xmm0
|
||||
movapd 2 * SIZE(Y1), %xmm1
|
||||
movapd 4 * SIZE(Y1), %xmm2
|
||||
movapd 6 * SIZE(Y1), %xmm3
|
||||
|
||||
movapd 1 * SIZE(BUFFER), %xmm5
|
||||
movapd 3 * SIZE(BUFFER), %xmm6
|
||||
@@ -2618,14 +2624,14 @@
|
||||
addpd %xmm6, %xmm2
|
||||
addpd %xmm7, %xmm3
|
||||
|
||||
movapd %xmm0, 0 * SIZE(Y)
|
||||
movapd %xmm1, 2 * SIZE(Y)
|
||||
movapd %xmm2, 4 * SIZE(Y)
|
||||
movapd %xmm3, 6 * SIZE(Y)
|
||||
movapd %xmm0, 0 * SIZE(Y1)
|
||||
movapd %xmm1, 2 * SIZE(Y1)
|
||||
movapd %xmm2, 4 * SIZE(Y1)
|
||||
movapd %xmm3, 6 * SIZE(Y1)
|
||||
|
||||
movapd %xmm8, %xmm4
|
||||
|
||||
addq $8 * SIZE, Y
|
||||
addq $8 * SIZE, Y1
|
||||
addq $8 * SIZE, BUFFER
|
||||
|
||||
decq %rax
|
||||
@@ -2633,14 +2639,14 @@
|
||||
ALIGN_3
|
||||
|
||||
.L924:
|
||||
testq $7, M
|
||||
testq $7, TMP_M
|
||||
jle .L999
|
||||
|
||||
testq $4, M
|
||||
testq $4, TMP_M
|
||||
jle .L925
|
||||
|
||||
movapd 0 * SIZE(Y), %xmm0
|
||||
movapd 2 * SIZE(Y), %xmm1
|
||||
movapd 0 * SIZE(Y1), %xmm0
|
||||
movapd 2 * SIZE(Y1), %xmm1
|
||||
|
||||
movapd 1 * SIZE(BUFFER), %xmm5
|
||||
movapd 3 * SIZE(BUFFER), %xmm6
|
||||
@@ -2651,20 +2657,20 @@
|
||||
addpd %xmm4, %xmm0
|
||||
addpd %xmm5, %xmm1
|
||||
|
||||
movapd %xmm0, 0 * SIZE(Y)
|
||||
movapd %xmm1, 2 * SIZE(Y)
|
||||
movapd %xmm0, 0 * SIZE(Y1)
|
||||
movapd %xmm1, 2 * SIZE(Y1)
|
||||
|
||||
movapd %xmm6, %xmm4
|
||||
|
||||
addq $4 * SIZE, Y
|
||||
addq $4 * SIZE, Y1
|
||||
addq $4 * SIZE, BUFFER
|
||||
ALIGN_3
|
||||
|
||||
.L925:
|
||||
testq $2, M
|
||||
testq $2, TMP_M
|
||||
jle .L926
|
||||
|
||||
movapd (Y), %xmm0
|
||||
movapd (Y1), %xmm0
|
||||
|
||||
movapd 1 * SIZE(BUFFER), %xmm5
|
||||
|
||||
@@ -2672,25 +2678,25 @@
|
||||
|
||||
addpd %xmm4, %xmm0
|
||||
|
||||
movapd %xmm0, (Y)
|
||||
movapd %xmm0, (Y1)
|
||||
|
||||
movaps %xmm5, %xmm4
|
||||
|
||||
addq $2 * SIZE, Y
|
||||
addq $2 * SIZE, Y1
|
||||
addq $2 * SIZE, BUFFER
|
||||
ALIGN_3
|
||||
|
||||
.L926:
|
||||
testq $1, M
|
||||
testq $1, TMP_M
|
||||
jle .L999
|
||||
|
||||
movsd (Y), %xmm0
|
||||
movsd (Y1), %xmm0
|
||||
|
||||
shufpd $1, %xmm4, %xmm4
|
||||
|
||||
addsd %xmm4, %xmm0
|
||||
|
||||
movlpd %xmm0, (Y)
|
||||
movlpd %xmm0, (Y1)
|
||||
ALIGN_3
|
||||
|
||||
jmp .L999
|
||||
@@ -2700,53 +2706,53 @@
|
||||
testq $SIZE, BUFFER
|
||||
je .L960
|
||||
|
||||
movsd (Y), %xmm0
|
||||
movsd (Y1), %xmm0
|
||||
addsd (BUFFER), %xmm0
|
||||
movsd %xmm0, (Y)
|
||||
movsd %xmm0, (Y1)
|
||||
|
||||
addq INCY, Y
|
||||
addq INCY, Y1
|
||||
addq $SIZE, BUFFER
|
||||
|
||||
decq M
|
||||
decq TMP_M
|
||||
jle .L999
|
||||
ALIGN_4
|
||||
|
||||
.L960:
|
||||
movq Y, Y1
|
||||
movq Y1, Y2
|
||||
|
||||
movq M, %rax
|
||||
movq TMP_M, %rax
|
||||
sarq $3, %rax
|
||||
jle .L964
|
||||
ALIGN_3
|
||||
|
||||
.L962:
|
||||
movsd (Y), %xmm0
|
||||
addq INCY, Y
|
||||
movhpd (Y), %xmm0
|
||||
addq INCY, Y
|
||||
movsd (Y2), %xmm0
|
||||
addq INCY, Y2
|
||||
movhpd (Y2), %xmm0
|
||||
addq INCY, Y2
|
||||
|
||||
movapd 0 * SIZE(BUFFER), %xmm4
|
||||
|
||||
movsd (Y), %xmm1
|
||||
addq INCY, Y
|
||||
movhpd (Y), %xmm1
|
||||
addq INCY, Y
|
||||
movsd (Y2), %xmm1
|
||||
addq INCY, Y2
|
||||
movhpd (Y2), %xmm1
|
||||
addq INCY, Y2
|
||||
|
||||
movapd 2 * SIZE(BUFFER), %xmm5
|
||||
|
||||
movsd (Y), %xmm2
|
||||
addq INCY, Y
|
||||
movhpd (Y), %xmm2
|
||||
addq INCY, Y
|
||||
movsd (Y2), %xmm2
|
||||
addq INCY, Y2
|
||||
movhpd (Y2), %xmm2
|
||||
addq INCY, Y2
|
||||
|
||||
movapd 4 * SIZE(BUFFER), %xmm6
|
||||
|
||||
addpd %xmm4, %xmm0
|
||||
|
||||
movsd (Y), %xmm3
|
||||
addq INCY, Y
|
||||
movhpd (Y), %xmm3
|
||||
addq INCY, Y
|
||||
movsd (Y2), %xmm3
|
||||
addq INCY, Y2
|
||||
movhpd (Y2), %xmm3
|
||||
addq INCY, Y2
|
||||
|
||||
movapd 6 * SIZE(BUFFER), %xmm7
|
||||
|
||||
@@ -2781,23 +2787,23 @@
|
||||
ALIGN_3
|
||||
|
||||
.L964:
|
||||
testq $7, M
|
||||
testq $7, TMP_M
|
||||
jle .L999
|
||||
|
||||
testq $4, M
|
||||
testq $4, TMP_M
|
||||
jle .L965
|
||||
|
||||
movsd (Y), %xmm0
|
||||
addq INCY, Y
|
||||
movhpd (Y), %xmm0
|
||||
addq INCY, Y
|
||||
movsd (Y2), %xmm0
|
||||
addq INCY, Y2
|
||||
movhpd (Y2), %xmm0
|
||||
addq INCY, Y2
|
||||
|
||||
movapd 0 * SIZE(BUFFER), %xmm4
|
||||
|
||||
movsd (Y), %xmm1
|
||||
addq INCY, Y
|
||||
movhpd (Y), %xmm1
|
||||
addq INCY, Y
|
||||
movsd (Y2), %xmm1
|
||||
addq INCY, Y2
|
||||
movhpd (Y2), %xmm1
|
||||
addq INCY, Y2
|
||||
|
||||
movapd 2 * SIZE(BUFFER), %xmm5
|
||||
|
||||
@@ -2817,13 +2823,13 @@
|
||||
ALIGN_3
|
||||
|
||||
.L965:
|
||||
testq $2, M
|
||||
testq $2, TMP_M
|
||||
jle .L966
|
||||
|
||||
movsd (Y), %xmm0
|
||||
addq INCY, Y
|
||||
movhpd (Y), %xmm0
|
||||
addq INCY, Y
|
||||
movsd (Y2), %xmm0
|
||||
addq INCY, Y2
|
||||
movhpd (Y2), %xmm0
|
||||
addq INCY, Y2
|
||||
|
||||
movapd 0 * SIZE(BUFFER), %xmm4
|
||||
|
||||
@@ -2838,10 +2844,10 @@
|
||||
ALIGN_3
|
||||
|
||||
.L966:
|
||||
testq $1, M
|
||||
testq $1, TMP_M
|
||||
jle .L999
|
||||
|
||||
movsd (Y), %xmm0
|
||||
movsd (Y2), %xmm0
|
||||
|
||||
movsd 0 * SIZE(BUFFER), %xmm4
|
||||
|
||||
@@ -2853,6 +2859,9 @@
|
||||
.L999:
|
||||
leaq (, M, SIZE), %rax
|
||||
addq %rax,AA
|
||||
movq STACK_INCY, INCY
|
||||
imulq INCY, %rax
|
||||
addq %rax, Y
|
||||
jmp .L0t
|
||||
ALIGN_4
|
||||
|
||||
|
||||
@@ -166,8 +166,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
VFMADD231PS_( %ymm9,%ymm2,%ymm1 )
|
||||
VFMADD231PS_( %ymm10,%ymm3,%ymm0 )
|
||||
VFMADD231PS_( %ymm11,%ymm3,%ymm1 )
|
||||
addq $4 , BI
|
||||
addq $16, %rax
|
||||
addq $ 4 , BI
|
||||
addq $ 16, %rax
|
||||
.endm
|
||||
|
||||
.macro SAVE16x4
|
||||
@@ -233,8 +233,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
vbroadcastss -1 * SIZE(BO, BI, SIZE), %ymm3
|
||||
VFMADD231PS_( %ymm8,%ymm2,%ymm0 )
|
||||
VFMADD231PS_( %ymm10,%ymm3,%ymm0 )
|
||||
addq $4 , BI
|
||||
addq $8 , %rax
|
||||
addq $ 4 , BI
|
||||
addq $ 8 , %rax
|
||||
.endm
|
||||
|
||||
.macro SAVE8x4
|
||||
@@ -277,8 +277,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
vbroadcastss -1 * SIZE(BO, BI, SIZE), %xmm3
|
||||
VFMADD231PS_( %xmm8,%xmm2,%xmm0 )
|
||||
VFMADD231PS_( %xmm10,%xmm3,%xmm0 )
|
||||
addq $4 , BI
|
||||
addq $4 , %rax
|
||||
addq $ 4 , BI
|
||||
addq $ 4 , %rax
|
||||
.endm
|
||||
|
||||
.macro SAVE4x4
|
||||
@@ -325,8 +325,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
VFMADD231SS_( %xmm9,%xmm2,%xmm1 )
|
||||
VFMADD231SS_( %xmm10,%xmm3,%xmm0 )
|
||||
VFMADD231SS_( %xmm11,%xmm3,%xmm1 )
|
||||
addq $4 , BI
|
||||
addq $2, %rax
|
||||
addq $ 4 , BI
|
||||
addq $ 2, %rax
|
||||
.endm
|
||||
|
||||
.macro SAVE2x4
|
||||
@@ -386,8 +386,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
vmovss -1 * SIZE(BO, BI, SIZE), %xmm3
|
||||
VFMADD231SS_( %xmm8,%xmm2,%xmm0 )
|
||||
VFMADD231SS_( %xmm10,%xmm3,%xmm0 )
|
||||
addq $4 , BI
|
||||
addq $1, %rax
|
||||
addq $ 4 , BI
|
||||
addq $ 1, %rax
|
||||
.endm
|
||||
|
||||
.macro SAVE1x4
|
||||
@@ -432,8 +432,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
VFMADD231PS_( %ymm5,%ymm2,%ymm1 )
|
||||
VFMADD231PS_( %ymm6,%ymm3,%ymm0 )
|
||||
VFMADD231PS_( %ymm7,%ymm3,%ymm1 )
|
||||
addq $2 , BI
|
||||
addq $16, %rax
|
||||
addq $ 2 , BI
|
||||
addq $ 16, %rax
|
||||
.endm
|
||||
|
||||
.macro SAVE16x2
|
||||
@@ -474,8 +474,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
vbroadcastss -3 * SIZE(BO, BI, SIZE), %ymm3
|
||||
VFMADD231PS_( %ymm4,%ymm2,%ymm0 )
|
||||
VFMADD231PS_( %ymm6,%ymm3,%ymm0 )
|
||||
addq $2 , BI
|
||||
addq $8 , %rax
|
||||
addq $ 2 , BI
|
||||
addq $ 8 , %rax
|
||||
.endm
|
||||
|
||||
.macro SAVE8x2
|
||||
@@ -507,8 +507,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
vbroadcastss -3 * SIZE(BO, BI, SIZE), %xmm3
|
||||
VFMADD231PS_( %xmm4,%xmm2,%xmm0 )
|
||||
VFMADD231PS_( %xmm6,%xmm3,%xmm0 )
|
||||
addq $2 , BI
|
||||
addq $4 , %rax
|
||||
addq $ 2 , BI
|
||||
addq $ 4 , %rax
|
||||
.endm
|
||||
|
||||
.macro SAVE4x2
|
||||
@@ -542,8 +542,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
VFMADD231SS_( %xmm5,%xmm2,%xmm1 )
|
||||
VFMADD231SS_( %xmm6,%xmm3,%xmm0 )
|
||||
VFMADD231SS_( %xmm7,%xmm3,%xmm1 )
|
||||
addq $2 , BI
|
||||
addq $2, %rax
|
||||
addq $ 2 , BI
|
||||
addq $ 2, %rax
|
||||
.endm
|
||||
|
||||
.macro SAVE2x2
|
||||
@@ -583,8 +583,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
vmovss -3 * SIZE(BO, BI, SIZE), %xmm3
|
||||
VFMADD231SS_( %xmm4,%xmm2,%xmm0 )
|
||||
VFMADD231SS_( %xmm6,%xmm3,%xmm0 )
|
||||
addq $2 , BI
|
||||
addq $1, %rax
|
||||
addq $ 2 , BI
|
||||
addq $ 1, %rax
|
||||
.endm
|
||||
|
||||
.macro SAVE1x2
|
||||
@@ -619,8 +619,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
vbroadcastss -4 * SIZE(BO, BI, SIZE), %ymm2
|
||||
VFMADD231PS_( %ymm4,%ymm2,%ymm0 )
|
||||
VFMADD231PS_( %ymm5,%ymm2,%ymm1 )
|
||||
addq $1 , BI
|
||||
addq $16, %rax
|
||||
addq $ 1 , BI
|
||||
addq $ 16, %rax
|
||||
.endm
|
||||
|
||||
.macro SAVE16x1
|
||||
@@ -649,8 +649,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
vmovups -16 * SIZE(AO, %rax, SIZE), %ymm0
|
||||
vbroadcastss -4 * SIZE(BO, BI, SIZE), %ymm2
|
||||
VFMADD231PS_( %ymm4,%ymm2,%ymm0 )
|
||||
addq $1 , BI
|
||||
addq $8 , %rax
|
||||
addq $ 1 , BI
|
||||
addq $ 8 , %rax
|
||||
.endm
|
||||
|
||||
.macro SAVE8x1
|
||||
@@ -677,8 +677,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
vmovups -16 * SIZE(AO, %rax, SIZE), %xmm0
|
||||
vbroadcastss -4 * SIZE(BO, BI, SIZE), %xmm2
|
||||
VFMADD231PS_( %xmm4,%xmm2,%xmm0 )
|
||||
addq $1 , BI
|
||||
addq $4 , %rax
|
||||
addq $ 1 , BI
|
||||
addq $ 4 , %rax
|
||||
.endm
|
||||
|
||||
.macro SAVE4x1
|
||||
@@ -706,8 +706,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
vmovss -4 * SIZE(BO, BI, SIZE), %xmm2
|
||||
VFMADD231SS_( %xmm4,%xmm2,%xmm0 )
|
||||
VFMADD231SS_( %xmm5,%xmm2,%xmm1 )
|
||||
addq $1 , BI
|
||||
addq $2, %rax
|
||||
addq $ 1 , BI
|
||||
addq $ 2 , %rax
|
||||
.endm
|
||||
|
||||
.macro SAVE2x1
|
||||
@@ -736,8 +736,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
vmovss -16 * SIZE(AO, %rax, SIZE), %xmm0
|
||||
vmovss -4 * SIZE(BO, BI, SIZE), %xmm2
|
||||
VFMADD231SS_( %xmm4,%xmm2,%xmm0 )
|
||||
addq $1 , BI
|
||||
addq $1, %rax
|
||||
addq $ 1 , BI
|
||||
addq $ 1 , %rax
|
||||
.endm
|
||||
|
||||
.macro SAVE1x1
|
||||
@@ -882,8 +882,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
vmovups %xmm2, 8*SIZE(BO)
|
||||
vmovups %xmm3,12*SIZE(BO)
|
||||
|
||||
addq $16*SIZE,BO1
|
||||
addq $16*SIZE,BO
|
||||
addq $ 16*SIZE,BO1
|
||||
addq $ 16*SIZE,BO
|
||||
decq %rax
|
||||
jnz .L4_01a
|
||||
|
||||
@@ -899,8 +899,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
vmovups (BO1), %xmm0
|
||||
vmovups %xmm0, (BO)
|
||||
addq $4*SIZE,BO1
|
||||
addq $4*SIZE,BO
|
||||
addq $ 4*SIZE,BO1
|
||||
addq $ 4*SIZE,BO
|
||||
decq %rax
|
||||
jnz .L4_02c
|
||||
|
||||
@@ -919,7 +919,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#endif
|
||||
|
||||
movq A, AO // aoffset = a
|
||||
addq $16 * SIZE, AO
|
||||
addq $ 16 * SIZE, AO
|
||||
|
||||
movq M, I
|
||||
sarq $4, I // i = (m >> 4)
|
||||
|
||||
@@ -109,22 +109,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#if defined(OS_WINDOWS)
|
||||
#if L_BUFFER_SIZE > 16384
|
||||
#define STACK_TOUCH \
|
||||
movl $0, 4096 * 4(%rsp);\
|
||||
movl $0, 4096 * 3(%rsp);\
|
||||
movl $0, 4096 * 2(%rsp);\
|
||||
movl $0, 4096 * 1(%rsp);
|
||||
movl $ 0, 4096 * 4(%rsp);\
|
||||
movl $ 0, 4096 * 3(%rsp);\
|
||||
movl $ 0, 4096 * 2(%rsp);\
|
||||
movl $ 0, 4096 * 1(%rsp);
|
||||
#elif L_BUFFER_SIZE > 12288
|
||||
#define STACK_TOUCH \
|
||||
movl $0, 4096 * 3(%rsp);\
|
||||
movl $0, 4096 * 2(%rsp);\
|
||||
movl $0, 4096 * 1(%rsp);
|
||||
movl $ 0, 4096 * 3(%rsp);\
|
||||
movl $ 0, 4096 * 2(%rsp);\
|
||||
movl $ 0, 4096 * 1(%rsp);
|
||||
#elif L_BUFFER_SIZE > 8192
|
||||
#define STACK_TOUCH \
|
||||
movl $0, 4096 * 2(%rsp);\
|
||||
movl $0, 4096 * 1(%rsp);
|
||||
movl $ 0, 4096 * 2(%rsp);\
|
||||
movl $ 0, 4096 * 1(%rsp);
|
||||
#elif L_BUFFER_SIZE > 4096
|
||||
#define STACK_TOUCH \
|
||||
movl $0, 4096 * 1(%rsp);
|
||||
movl $ 0, 4096 * 1(%rsp);
|
||||
#else
|
||||
#define STACK_TOUCH
|
||||
#endif
|
||||
@@ -212,8 +212,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
VFMADDPD_I( %ymm11,%ymm7,%ymm0 )
|
||||
VFMADDPD_I( %ymm15,%ymm7,%ymm1 )
|
||||
|
||||
addq $4, BI
|
||||
addq $8, %rax
|
||||
addq $ 4, BI
|
||||
addq $ 8, %rax
|
||||
.endm
|
||||
|
||||
.macro SAVE4x2
|
||||
@@ -222,10 +222,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
vbroadcastsd ALPHA_I, %ymm1
|
||||
|
||||
// swap high and low 8 bytes
|
||||
vshufpd $0x05, %ymm9 , %ymm9, %ymm9
|
||||
vshufpd $0x05, %ymm11, %ymm11, %ymm11
|
||||
vshufpd $0x05, %ymm13, %ymm13, %ymm13
|
||||
vshufpd $0x05, %ymm15, %ymm15, %ymm15
|
||||
vshufpd $ 0x05, %ymm9 , %ymm9, %ymm9
|
||||
vshufpd $ 0x05, %ymm11, %ymm11, %ymm11
|
||||
vshufpd $ 0x05, %ymm13, %ymm13, %ymm13
|
||||
vshufpd $ 0x05, %ymm15, %ymm15, %ymm15
|
||||
|
||||
#if defined(NN) || defined(NT) || defined(TN) || defined(TT) || \
|
||||
defined(NR) || defined(NC) || defined(TR) || defined(TC)
|
||||
@@ -235,10 +235,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
vaddsubpd %ymm13,%ymm12, %ymm12
|
||||
vaddsubpd %ymm15,%ymm14, %ymm14
|
||||
|
||||
vshufpd $0x05, %ymm8 , %ymm8, %ymm9
|
||||
vshufpd $0x05, %ymm10, %ymm10, %ymm11
|
||||
vshufpd $0x05, %ymm12, %ymm12, %ymm13
|
||||
vshufpd $0x05, %ymm14, %ymm14, %ymm15
|
||||
vshufpd $ 0x05, %ymm8 , %ymm8, %ymm9
|
||||
vshufpd $ 0x05, %ymm10, %ymm10, %ymm11
|
||||
vshufpd $ 0x05, %ymm12, %ymm12, %ymm13
|
||||
vshufpd $ 0x05, %ymm14, %ymm14, %ymm15
|
||||
|
||||
#else
|
||||
vaddsubpd %ymm8, %ymm9 ,%ymm9
|
||||
@@ -252,10 +252,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
vmovapd %ymm15, %ymm14
|
||||
|
||||
// swap high and low 8 bytes
|
||||
vshufpd $0x05, %ymm9 , %ymm9, %ymm9
|
||||
vshufpd $0x05, %ymm11, %ymm11, %ymm11
|
||||
vshufpd $0x05, %ymm13, %ymm13, %ymm13
|
||||
vshufpd $0x05, %ymm15, %ymm15, %ymm15
|
||||
vshufpd $ 0x05, %ymm9 , %ymm9, %ymm9
|
||||
vshufpd $ 0x05, %ymm11, %ymm11, %ymm11
|
||||
vshufpd $ 0x05, %ymm13, %ymm13, %ymm13
|
||||
vshufpd $ 0x05, %ymm15, %ymm15, %ymm15
|
||||
|
||||
#endif
|
||||
|
||||
@@ -316,8 +316,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
vmovddup -5 * SIZE(BO, BI, SIZE), %xmm7
|
||||
VFMADDPD_I( %xmm11,%xmm7,%xmm0 )
|
||||
VFMADDPD_I( %xmm15,%xmm7,%xmm1 )
|
||||
addq $4, BI
|
||||
addq $4, %rax
|
||||
addq $ 4, BI
|
||||
addq $ 4, %rax
|
||||
.endm
|
||||
|
||||
.macro SAVE2x2
|
||||
@@ -326,10 +326,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
vmovddup ALPHA_I, %xmm1
|
||||
|
||||
// swap high and low 64 bytes
|
||||
vshufpd $0x01, %xmm9 , %xmm9, %xmm9
|
||||
vshufpd $0x01, %xmm11, %xmm11, %xmm11
|
||||
vshufpd $0x01, %xmm13, %xmm13, %xmm13
|
||||
vshufpd $0x01, %xmm15, %xmm15, %xmm15
|
||||
vshufpd $ 0x01, %xmm9 , %xmm9, %xmm9
|
||||
vshufpd $ 0x01, %xmm11, %xmm11, %xmm11
|
||||
vshufpd $ 0x01, %xmm13, %xmm13, %xmm13
|
||||
vshufpd $ 0x01, %xmm15, %xmm15, %xmm15
|
||||
|
||||
#if defined(NN) || defined(NT) || defined(TN) || defined(TT) || \
|
||||
defined(NR) || defined(NC) || defined(TR) || defined(TC)
|
||||
@@ -339,10 +339,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
vaddsubpd %xmm13,%xmm12, %xmm12
|
||||
vaddsubpd %xmm15,%xmm14, %xmm14
|
||||
|
||||
vshufpd $0x01, %xmm8 , %xmm8, %xmm9
|
||||
vshufpd $0x01, %xmm10, %xmm10, %xmm11
|
||||
vshufpd $0x01, %xmm12, %xmm12, %xmm13
|
||||
vshufpd $0x01, %xmm14, %xmm14, %xmm15
|
||||
vshufpd $ 0x01, %xmm8 , %xmm8, %xmm9
|
||||
vshufpd $ 0x01, %xmm10, %xmm10, %xmm11
|
||||
vshufpd $ 0x01, %xmm12, %xmm12, %xmm13
|
||||
vshufpd $ 0x01, %xmm14, %xmm14, %xmm15
|
||||
|
||||
#else
|
||||
vaddsubpd %xmm8, %xmm9 ,%xmm9
|
||||
@@ -356,10 +356,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
vmovapd %xmm15, %xmm14
|
||||
|
||||
// swap high and low 64 bytes
|
||||
vshufpd $0x01, %xmm9 , %xmm9, %xmm9
|
||||
vshufpd $0x01, %xmm11, %xmm11, %xmm11
|
||||
vshufpd $0x01, %xmm13, %xmm13, %xmm13
|
||||
vshufpd $0x01, %xmm15, %xmm15, %xmm15
|
||||
vshufpd $ 0x01, %xmm9 , %xmm9, %xmm9
|
||||
vshufpd $ 0x01, %xmm11, %xmm11, %xmm11
|
||||
vshufpd $ 0x01, %xmm13, %xmm13, %xmm13
|
||||
vshufpd $ 0x01, %xmm15, %xmm15, %xmm15
|
||||
|
||||
#endif
|
||||
|
||||
@@ -415,8 +415,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
vmovddup -5 * SIZE(BO, BI, SIZE), %xmm7
|
||||
VFMADDPD_R( %xmm10,%xmm6,%xmm0 )
|
||||
VFMADDPD_I( %xmm11,%xmm7,%xmm0 )
|
||||
addq $4, BI
|
||||
addq $2, %rax
|
||||
addq $ 4, BI
|
||||
addq $ 2, %rax
|
||||
.endm
|
||||
|
||||
.macro SAVE1x2
|
||||
@@ -425,8 +425,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
vmovddup ALPHA_I, %xmm1
|
||||
|
||||
// swap high and low 64 bytes
|
||||
vshufpd $0x01, %xmm9 , %xmm9, %xmm9
|
||||
vshufpd $0x01, %xmm11, %xmm11, %xmm11
|
||||
vshufpd $ 0x01, %xmm9 , %xmm9, %xmm9
|
||||
vshufpd $ 0x01, %xmm11, %xmm11, %xmm11
|
||||
|
||||
#if defined(NN) || defined(NT) || defined(TN) || defined(TT) || \
|
||||
defined(NR) || defined(NC) || defined(TR) || defined(TC)
|
||||
@@ -434,8 +434,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
vaddsubpd %xmm9, %xmm8 , %xmm8
|
||||
vaddsubpd %xmm11,%xmm10, %xmm10
|
||||
|
||||
vshufpd $0x01, %xmm8 , %xmm8, %xmm9
|
||||
vshufpd $0x01, %xmm10, %xmm10, %xmm11
|
||||
vshufpd $ 0x01, %xmm8 , %xmm8, %xmm9
|
||||
vshufpd $ 0x01, %xmm10, %xmm10, %xmm11
|
||||
|
||||
#else
|
||||
vaddsubpd %xmm8, %xmm9, %xmm9
|
||||
@@ -445,8 +445,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
vmovapd %xmm11, %xmm10
|
||||
|
||||
// swap high and low 64 bytes
|
||||
vshufpd $0x01, %xmm9 , %xmm9, %xmm9
|
||||
vshufpd $0x01, %xmm11, %xmm11, %xmm11
|
||||
vshufpd $ 0x01, %xmm9 , %xmm9, %xmm9
|
||||
vshufpd $ 0x01, %xmm11, %xmm11, %xmm11
|
||||
|
||||
#endif
|
||||
|
||||
@@ -486,8 +486,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
VFMADDPD_I( %ymm9 ,%ymm5,%ymm0 )
|
||||
VFMADDPD_I( %ymm13,%ymm5,%ymm1 )
|
||||
|
||||
addq $2, BI
|
||||
addq $8, %rax
|
||||
addq $ 2, BI
|
||||
addq $ 8, %rax
|
||||
.endm
|
||||
|
||||
.macro SAVE4x1
|
||||
@@ -496,8 +496,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
vbroadcastsd ALPHA_I, %ymm1
|
||||
|
||||
// swap high and low 8 bytes
|
||||
vshufpd $0x05, %ymm9 , %ymm9, %ymm9
|
||||
vshufpd $0x05, %ymm13, %ymm13, %ymm13
|
||||
vshufpd $ 0x05, %ymm9 , %ymm9, %ymm9
|
||||
vshufpd $ 0x05, %ymm13, %ymm13, %ymm13
|
||||
|
||||
#if defined(NN) || defined(NT) || defined(TN) || defined(TT) || \
|
||||
defined(NR) || defined(NC) || defined(TR) || defined(TC)
|
||||
@@ -505,8 +505,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
vaddsubpd %ymm9, %ymm8 , %ymm8
|
||||
vaddsubpd %ymm13,%ymm12 , %ymm12
|
||||
|
||||
vshufpd $0x05, %ymm8 , %ymm8, %ymm9
|
||||
vshufpd $0x05, %ymm12, %ymm12, %ymm13
|
||||
vshufpd $ 0x05, %ymm8 , %ymm8, %ymm9
|
||||
vshufpd $ 0x05, %ymm12, %ymm12, %ymm13
|
||||
|
||||
#else
|
||||
vaddsubpd %ymm8, %ymm9 , %ymm9
|
||||
@@ -516,8 +516,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
vmovapd %ymm13, %ymm12
|
||||
|
||||
// swap high and low 8 bytes
|
||||
vshufpd $0x05, %ymm9 , %ymm9, %ymm9
|
||||
vshufpd $0x05, %ymm13, %ymm13, %ymm13
|
||||
vshufpd $ 0x05, %ymm9 , %ymm9, %ymm9
|
||||
vshufpd $ 0x05, %ymm13, %ymm13, %ymm13
|
||||
|
||||
#endif
|
||||
|
||||
@@ -559,8 +559,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
vmovddup -3 * SIZE(BO, BI, SIZE), %xmm5
|
||||
VFMADDPD_I( %xmm9,%xmm5,%xmm0 )
|
||||
VFMADDPD_I( %xmm13,%xmm5,%xmm1 )
|
||||
addq $2, BI
|
||||
addq $4, %rax
|
||||
addq $ 2, BI
|
||||
addq $ 4, %rax
|
||||
.endm
|
||||
|
||||
.macro SAVE2x1
|
||||
@@ -569,8 +569,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
vmovddup ALPHA_I, %xmm1
|
||||
|
||||
// swap high and low 64 bytes
|
||||
vshufpd $0x01, %xmm9 , %xmm9, %xmm9
|
||||
vshufpd $0x01, %xmm13, %xmm13, %xmm13
|
||||
vshufpd $ 0x01, %xmm9 , %xmm9, %xmm9
|
||||
vshufpd $ 0x01, %xmm13, %xmm13, %xmm13
|
||||
|
||||
#if defined(NN) || defined(NT) || defined(TN) || defined(TT) || \
|
||||
defined(NR) || defined(NC) || defined(TR) || defined(TC)
|
||||
@@ -578,8 +578,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
vaddsubpd %xmm9, %xmm8 , %xmm8
|
||||
vaddsubpd %xmm13,%xmm12 , %xmm12
|
||||
|
||||
vshufpd $0x01, %xmm8 , %xmm8, %xmm9
|
||||
vshufpd $0x01, %xmm12, %xmm12, %xmm13
|
||||
vshufpd $ 0x01, %xmm8 , %xmm8, %xmm9
|
||||
vshufpd $ 0x01, %xmm12, %xmm12, %xmm13
|
||||
|
||||
#else
|
||||
vaddsubpd %xmm8, %xmm9 , %xmm9
|
||||
@@ -589,8 +589,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
vmovapd %xmm13, %xmm12
|
||||
|
||||
// swap high and low 64 bytes
|
||||
vshufpd $0x01, %xmm9 , %xmm9, %xmm9
|
||||
vshufpd $0x01, %xmm13, %xmm13, %xmm13
|
||||
vshufpd $ 0x01, %xmm9 , %xmm9, %xmm9
|
||||
vshufpd $ 0x01, %xmm13, %xmm13, %xmm13
|
||||
|
||||
#endif
|
||||
|
||||
@@ -626,8 +626,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
VFMADDPD_R( %xmm8,%xmm4,%xmm0 )
|
||||
vmovddup -3 * SIZE(BO, BI, SIZE), %xmm5
|
||||
VFMADDPD_I( %xmm9,%xmm5,%xmm0 )
|
||||
addq $2, BI
|
||||
addq $2, %rax
|
||||
addq $ 2, BI
|
||||
addq $ 2, %rax
|
||||
.endm
|
||||
|
||||
.macro SAVE1x1
|
||||
@@ -636,14 +636,14 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
vmovddup ALPHA_I, %xmm1
|
||||
|
||||
// swap high and low 64 bytes
|
||||
vshufpd $0x01, %xmm9 , %xmm9, %xmm9
|
||||
vshufpd $ 0x01, %xmm9 , %xmm9, %xmm9
|
||||
|
||||
#if defined(NN) || defined(NT) || defined(TN) || defined(TT) || \
|
||||
defined(NR) || defined(NC) || defined(TR) || defined(TC)
|
||||
|
||||
vaddsubpd %xmm9, %xmm8, %xmm8
|
||||
|
||||
vshufpd $0x01, %xmm8 , %xmm8, %xmm9
|
||||
vshufpd $ 0x01, %xmm8 , %xmm8, %xmm9
|
||||
|
||||
#else
|
||||
vaddsubpd %xmm8, %xmm9, %xmm9
|
||||
@@ -651,7 +651,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
vmovapd %xmm9, %xmm8
|
||||
|
||||
// swap high and low 64 bytes
|
||||
vshufpd $0x01, %xmm9 , %xmm9, %xmm9
|
||||
vshufpd $ 0x01, %xmm9 , %xmm9, %xmm9
|
||||
|
||||
#endif
|
||||
|
||||
@@ -682,7 +682,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
PROLOGUE
|
||||
PROFCODE
|
||||
|
||||
subq $STACKSIZE, %rsp
|
||||
subq $ STACKSIZE, %rsp
|
||||
movq %rbx, (%rsp)
|
||||
movq %rbp, 8(%rsp)
|
||||
movq %r12, 16(%rsp)
|
||||
@@ -727,18 +727,18 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#endif
|
||||
|
||||
movq %rsp, SP # save old stack
|
||||
subq $128 + L_BUFFER_SIZE, %rsp
|
||||
andq $-4096, %rsp # align stack
|
||||
subq $ 128 + L_BUFFER_SIZE, %rsp
|
||||
andq $ -4096, %rsp # align stack
|
||||
|
||||
STACK_TOUCH
|
||||
|
||||
cmpq $0, OLD_M
|
||||
cmpq $ 0, OLD_M
|
||||
je .L999
|
||||
|
||||
cmpq $0, OLD_N
|
||||
cmpq $ 0, OLD_N
|
||||
je .L999
|
||||
|
||||
cmpq $0, OLD_K
|
||||
cmpq $ 0, OLD_K
|
||||
je .L999
|
||||
|
||||
movq OLD_M, M
|
||||
@@ -748,11 +748,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
vmovsd %xmm0, ALPHA_R
|
||||
vmovsd %xmm1, ALPHA_I
|
||||
|
||||
salq $ZBASE_SHIFT, LDC
|
||||
salq $ ZBASE_SHIFT, LDC
|
||||
|
||||
movq N, %rax
|
||||
xorq %rdx, %rdx
|
||||
movq $2, %rdi
|
||||
movq $ 2, %rdi
|
||||
divq %rdi // N / 2
|
||||
movq %rax, Ndiv6 // N / 2
|
||||
movq %rdx, Nmod6 // N % 2
|
||||
@@ -770,7 +770,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
.L2_00_0:
|
||||
|
||||
movq Ndiv6, J
|
||||
cmpq $0, J
|
||||
cmpq $ 0, J
|
||||
je .L1_2_0
|
||||
ALIGN_4
|
||||
|
||||
@@ -789,8 +789,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
vmovups 2 * SIZE(BO1), %xmm1
|
||||
vmovups %xmm0, (BO)
|
||||
vmovups %xmm1, 2 * SIZE(BO)
|
||||
addq $4*SIZE,BO1
|
||||
addq $4*SIZE,BO
|
||||
addq $ 4*SIZE,BO1
|
||||
addq $ 4*SIZE,BO
|
||||
decq %rax
|
||||
jnz .L2_00_02b
|
||||
|
||||
@@ -809,10 +809,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#endif
|
||||
|
||||
movq A, AO // aoffset = a
|
||||
addq $8 * SIZE, AO
|
||||
addq $ 8 * SIZE, AO
|
||||
|
||||
movq M, I
|
||||
sarq $2, I // i = (m >> 2)
|
||||
sarq $ 2, I // i = (m >> 2)
|
||||
je .L2_2_10
|
||||
|
||||
ALIGN_4
|
||||
@@ -825,15 +825,15 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
(defined(TRMMKERNEL) && defined(LEFT) && defined(TRANSA)) || \
|
||||
(defined(TRMMKERNEL) && !defined(LEFT) && !defined(TRANSA))
|
||||
leaq BUFFER1, BO // first buffer to BO
|
||||
addq $8 * SIZE, BO
|
||||
addq $ 8 * SIZE, BO
|
||||
#else
|
||||
movq KK, %rax
|
||||
leaq BUFFER1, BO // first buffer to BO
|
||||
addq $8 * SIZE, BO
|
||||
addq $ 8 * SIZE, BO
|
||||
movq %rax, BI // Index for BO
|
||||
leaq (,BI,4), BI // BI = BI * 4 ; number of values
|
||||
leaq (BO, BI, SIZE), BO
|
||||
salq $3, %rax // rax = rax * 8 ; number of values
|
||||
salq $ 3, %rax // rax = rax * 8 ; number of values
|
||||
leaq (AO, %rax, SIZE), AO
|
||||
#endif
|
||||
|
||||
@@ -848,20 +848,20 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#else
|
||||
movq KK, %rax
|
||||
#ifdef LEFT
|
||||
addq $4, %rax // number of values in AO
|
||||
addq $ 4, %rax // number of values in AO
|
||||
#else
|
||||
addq $2, %rax // number of values in BO
|
||||
addq $ 2, %rax // number of values in BO
|
||||
#endif
|
||||
movq %rax, KKK
|
||||
#endif
|
||||
|
||||
|
||||
andq $-8, %rax // K = K - ( K % 8 )
|
||||
andq $ -8, %rax // K = K - ( K % 8 )
|
||||
je .L2_4_16
|
||||
movq %rax, BI // Index for BO
|
||||
leaq ( ,BI,4), BI // BI = BI * 4 ; number of values
|
||||
|
||||
salq $3, %rax // rax = rax * 8 ; number of values
|
||||
salq $ 3, %rax // rax = rax * 8 ; number of values
|
||||
leaq (AO, %rax, SIZE), AO
|
||||
leaq (BO, BI, SIZE), BO
|
||||
negq BI
|
||||
@@ -928,13 +928,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
movq KKK, %rax
|
||||
#endif
|
||||
|
||||
andq $7, %rax # if (k & 1)
|
||||
andq $ 7, %rax # if (k & 1)
|
||||
je .L2_4_19
|
||||
|
||||
movq %rax, BI // Index for BO
|
||||
leaq ( ,BI,4), BI // BI = BI * 4 ; number of values
|
||||
|
||||
salq $3, %rax // rax = rax * 8 ; number of values
|
||||
salq $ 3, %rax // rax = rax * 8 ; number of values
|
||||
leaq (AO, %rax, SIZE), AO
|
||||
leaq (BO, BI, SIZE), BO
|
||||
negq BI
|
||||
@@ -960,16 +960,16 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
movq %rax, BI // Index for BO
|
||||
leaq ( ,BI,4), BI // BI = BI * 4 ; number of values
|
||||
leaq (BO, BI, SIZE), BO
|
||||
salq $3, %rax // rax = rax * 8 ; number of values
|
||||
salq $ 3, %rax // rax = rax * 8 ; number of values
|
||||
leaq (AO, %rax, SIZE), AO
|
||||
#endif
|
||||
|
||||
|
||||
#if defined(TRMMKERNEL) && defined(LEFT)
|
||||
addq $4, KK
|
||||
addq $ 4, KK
|
||||
#endif
|
||||
|
||||
addq $8 * SIZE, CO1 # coffset += 8
|
||||
addq $ 8 * SIZE, CO1 # coffset += 8
|
||||
decq I # i --
|
||||
jg .L2_4_11
|
||||
ALIGN_4
|
||||
@@ -982,7 +982,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
/******************************************************************************************************************/
|
||||
.L2_2_10:
|
||||
testq $2, M
|
||||
testq $ 2, M
|
||||
jz .L2_2_40 // to next 2 lines of N
|
||||
|
||||
.L2_2_11:
|
||||
@@ -991,15 +991,15 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
(defined(TRMMKERNEL) && defined(LEFT) && defined(TRANSA)) || \
|
||||
(defined(TRMMKERNEL) && !defined(LEFT) && !defined(TRANSA))
|
||||
leaq BUFFER1, BO // first buffer to BO
|
||||
addq $8 * SIZE, BO
|
||||
addq $ 8 * SIZE, BO
|
||||
#else
|
||||
movq KK, %rax
|
||||
leaq BUFFER1, BO // first buffer to BO
|
||||
addq $8 * SIZE, BO
|
||||
addq $ 8 * SIZE, BO
|
||||
movq %rax, BI // Index for BO
|
||||
leaq (,BI,4), BI // BI = BI * 4 ; number of values
|
||||
leaq (BO, BI, SIZE), BO
|
||||
salq $2, %rax // rax = rax * 4 ; number of values
|
||||
salq $ 2, %rax // rax = rax * 4 ; number of values
|
||||
leaq (AO, %rax, SIZE), AO
|
||||
#endif
|
||||
|
||||
@@ -1014,20 +1014,20 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#else
|
||||
movq KK, %rax
|
||||
#ifdef LEFT
|
||||
addq $2, %rax // number of values in AO
|
||||
addq $ 2, %rax // number of values in AO
|
||||
#else
|
||||
addq $2, %rax // number of values in BO
|
||||
addq $ 2, %rax // number of values in BO
|
||||
#endif
|
||||
movq %rax, KKK
|
||||
#endif
|
||||
|
||||
|
||||
andq $-8, %rax // K = K - ( K % 8 )
|
||||
andq $ -8, %rax // K = K - ( K % 8 )
|
||||
je .L2_2_16
|
||||
movq %rax, BI // Index for BO
|
||||
leaq ( ,BI,4), BI // BI = BI * 4 ; number of values
|
||||
|
||||
salq $2, %rax // rax = rax * 4 ; number of values
|
||||
salq $ 2, %rax // rax = rax * 4 ; number of values
|
||||
leaq (AO, %rax, SIZE), AO
|
||||
leaq (BO, BI, SIZE), BO
|
||||
negq BI
|
||||
@@ -1086,13 +1086,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
movq KKK, %rax
|
||||
#endif
|
||||
|
||||
andq $7, %rax # if (k & 1)
|
||||
andq $ 7, %rax # if (k & 1)
|
||||
je .L2_2_19
|
||||
|
||||
movq %rax, BI // Index for BO
|
||||
leaq ( ,BI,4), BI // BI = BI * 4 ; number of values
|
||||
|
||||
salq $2, %rax // rax = rax * 4 ; number of values
|
||||
salq $ 2, %rax // rax = rax * 4 ; number of values
|
||||
leaq (AO, %rax, SIZE), AO
|
||||
leaq (BO, BI, SIZE), BO
|
||||
negq BI
|
||||
@@ -1118,16 +1118,16 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
movq %rax, BI // Index for BO
|
||||
leaq ( ,BI,4), BI // BI = BI * 4 ; number of values
|
||||
leaq (BO, BI, SIZE), BO
|
||||
salq $2, %rax // rax = rax * 4 ; number of values
|
||||
salq $ 2, %rax // rax = rax * 4 ; number of values
|
||||
leaq (AO, %rax, SIZE), AO
|
||||
#endif
|
||||
|
||||
|
||||
#if defined(TRMMKERNEL) && defined(LEFT)
|
||||
addq $2, KK
|
||||
addq $ 2, KK
|
||||
#endif
|
||||
|
||||
addq $4 * SIZE, CO1 # coffset += 4
|
||||
addq $ 4 * SIZE, CO1 # coffset += 4
|
||||
ALIGN_4
|
||||
|
||||
|
||||
@@ -1135,7 +1135,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
* Rest of M
|
||||
***************************************************************************/
|
||||
.L2_2_40:
|
||||
testq $1, M
|
||||
testq $ 1, M
|
||||
jz .L2_2_60 // to next 2 lines of N
|
||||
|
||||
ALIGN_4
|
||||
@@ -1146,15 +1146,15 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
(defined(TRMMKERNEL) && defined(LEFT) && defined(TRANSA)) || \
|
||||
(defined(TRMMKERNEL) && !defined(LEFT) && !defined(TRANSA))
|
||||
leaq BUFFER1, BO // first buffer to BO
|
||||
addq $8 * SIZE, BO
|
||||
addq $ 8 * SIZE, BO
|
||||
#else
|
||||
movq KK, %rax
|
||||
leaq BUFFER1, BO // first buffer to BO
|
||||
addq $8 * SIZE, BO
|
||||
addq $ 8 * SIZE, BO
|
||||
movq %rax, BI // Index for BO
|
||||
leaq (,BI,4), BI // BI = BI * 4 ; number of values
|
||||
leaq (BO, BI, SIZE), BO
|
||||
salq $1, %rax // rax = rax * 2 ; number of values
|
||||
salq $ 1, %rax // rax = rax * 2 ; number of values
|
||||
leaq (AO, %rax, SIZE), AO
|
||||
#endif
|
||||
|
||||
@@ -1169,20 +1169,20 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#else
|
||||
movq KK, %rax
|
||||
#ifdef LEFT
|
||||
addq $1, %rax // number of values in AO
|
||||
addq $ 1, %rax // number of values in AO
|
||||
#else
|
||||
addq $2, %rax // number of values in BO
|
||||
addq $ 2, %rax // number of values in BO
|
||||
#endif
|
||||
movq %rax, KKK
|
||||
#endif
|
||||
|
||||
|
||||
andq $-8, %rax // K = K - ( K % 8 )
|
||||
andq $ -8, %rax // K = K - ( K % 8 )
|
||||
je .L2_2_46
|
||||
movq %rax, BI // Index for BO
|
||||
leaq ( ,BI,4), BI // BI = BI * 4 ; number of values
|
||||
|
||||
salq $1, %rax // rax = rax * 2 ; number of values
|
||||
salq $ 1, %rax // rax = rax * 2 ; number of values
|
||||
leaq (AO, %rax, SIZE), AO
|
||||
leaq (BO, BI, SIZE), BO
|
||||
negq BI
|
||||
@@ -1237,13 +1237,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
movq KKK, %rax
|
||||
#endif
|
||||
|
||||
andq $7, %rax # if (k & 1)
|
||||
andq $ 7, %rax # if (k & 1)
|
||||
je .L2_2_49
|
||||
|
||||
movq %rax, BI // Index for BO
|
||||
leaq ( ,BI,4), BI // BI = BI * 4 ; number of values
|
||||
|
||||
salq $1, %rax // rax = rax * 2 ; number of values
|
||||
salq $ 1, %rax // rax = rax * 2 ; number of values
|
||||
leaq (AO, %rax, SIZE), AO
|
||||
leaq (BO, BI, SIZE), BO
|
||||
negq BI
|
||||
@@ -1269,16 +1269,16 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
movq %rax, BI // Index for BO
|
||||
leaq ( ,BI,4), BI // BI = BI * 4 ; number of values
|
||||
leaq (BO, BI, SIZE), BO
|
||||
salq $1, %rax // rax = rax * 2 ; number of values
|
||||
salq $ 1, %rax // rax = rax * 2 ; number of values
|
||||
leaq (AO, %rax, SIZE), AO
|
||||
#endif
|
||||
|
||||
|
||||
#if defined(TRMMKERNEL) && defined(LEFT)
|
||||
addq $1, KK
|
||||
addq $ 1, KK
|
||||
#endif
|
||||
|
||||
addq $2 * SIZE, CO1 # coffset += 2
|
||||
addq $ 2 * SIZE, CO1 # coffset += 2
|
||||
decq I # i --
|
||||
jg .L2_2_41
|
||||
ALIGN_4
|
||||
@@ -1288,7 +1288,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
.L2_2_60:
|
||||
#if defined(TRMMKERNEL) && !defined(LEFT)
|
||||
addq $2, KK
|
||||
addq $ 2, KK
|
||||
#endif
|
||||
|
||||
decq J // j --
|
||||
@@ -1303,7 +1303,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*************************************************************************************************/
|
||||
|
||||
movq Nmod6, J
|
||||
andq $1, J // j % 2
|
||||
andq $ 1, J // j % 2
|
||||
je .L999
|
||||
ALIGN_4
|
||||
|
||||
@@ -1318,8 +1318,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
vmovups (BO1), %xmm0
|
||||
vmovups %xmm0, (BO)
|
||||
addq $2*SIZE,BO1
|
||||
addq $2*SIZE,BO
|
||||
addq $ 2*SIZE,BO1
|
||||
addq $ 2*SIZE,BO
|
||||
decq %rax
|
||||
jnz .L1_00_02b
|
||||
|
||||
@@ -1337,10 +1337,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#endif
|
||||
|
||||
movq A, AO // aoffset = a
|
||||
addq $8 * SIZE, AO
|
||||
addq $ 8 * SIZE, AO
|
||||
|
||||
movq M, I
|
||||
sarq $2, I // i = (m >> 2)
|
||||
sarq $ 2, I // i = (m >> 2)
|
||||
je .L1_2_10
|
||||
|
||||
ALIGN_4
|
||||
@@ -1354,15 +1354,15 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
(defined(TRMMKERNEL) && defined(LEFT) && defined(TRANSA)) || \
|
||||
(defined(TRMMKERNEL) && !defined(LEFT) && !defined(TRANSA))
|
||||
leaq BUFFER1, BO // first buffer to BO
|
||||
addq $4 * SIZE, BO
|
||||
addq $ 4 * SIZE, BO
|
||||
#else
|
||||
movq KK, %rax
|
||||
leaq BUFFER1, BO // first buffer to BO
|
||||
addq $4 * SIZE, BO
|
||||
addq $ 4 * SIZE, BO
|
||||
movq %rax, BI // Index for BO
|
||||
leaq (,BI,2), BI // BI = BI * 2 ; number of values
|
||||
leaq (BO, BI, SIZE), BO
|
||||
salq $3, %rax // rax = rax * 8 ; number of values
|
||||
salq $ 3, %rax // rax = rax * 8 ; number of values
|
||||
leaq (AO, %rax, SIZE), AO
|
||||
#endif
|
||||
|
||||
@@ -1377,20 +1377,20 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#else
|
||||
movq KK, %rax
|
||||
#ifdef LEFT
|
||||
addq $4, %rax // number of values in AO
|
||||
addq $ 4, %rax // number of values in AO
|
||||
#else
|
||||
addq $1, %rax // number of values in BO
|
||||
addq $ 1, %rax // number of values in BO
|
||||
#endif
|
||||
movq %rax, KKK
|
||||
#endif
|
||||
|
||||
|
||||
andq $-8, %rax // K = K - ( K % 8 )
|
||||
andq $ -8, %rax // K = K - ( K % 8 )
|
||||
je .L1_4_16
|
||||
movq %rax, BI // Index for BO
|
||||
leaq ( ,BI,2), BI // BI = BI * 2 ; number of values
|
||||
|
||||
salq $3, %rax // rax = rax * 8 ; number of values
|
||||
salq $ 3, %rax // rax = rax * 8 ; number of values
|
||||
leaq (AO, %rax, SIZE), AO
|
||||
leaq (BO, BI, SIZE), BO
|
||||
negq BI
|
||||
@@ -1433,13 +1433,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
movq KKK, %rax
|
||||
#endif
|
||||
|
||||
andq $7, %rax # if (k & 1)
|
||||
andq $ 7, %rax # if (k & 1)
|
||||
je .L1_4_19
|
||||
|
||||
movq %rax, BI // Index for BO
|
||||
leaq ( ,BI,2), BI // BI = BI * 2 ; number of values
|
||||
|
||||
salq $3, %rax // rax = rax * 8 ; number of values
|
||||
salq $ 3, %rax // rax = rax * 8 ; number of values
|
||||
leaq (AO, %rax, SIZE), AO
|
||||
leaq (BO, BI, SIZE), BO
|
||||
negq BI
|
||||
@@ -1466,16 +1466,16 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
movq %rax, BI // Index for BO
|
||||
leaq ( ,BI,2), BI // BI = BI * 2 ; number of values
|
||||
leaq (BO, BI, SIZE), BO
|
||||
salq $3, %rax // rax = rax * 8 ; number of values
|
||||
salq $ 3, %rax // rax = rax * 8 ; number of values
|
||||
leaq (AO, %rax, SIZE), AO
|
||||
#endif
|
||||
|
||||
|
||||
#if defined(TRMMKERNEL) && defined(LEFT)
|
||||
addq $4, KK
|
||||
addq $ 4, KK
|
||||
#endif
|
||||
|
||||
addq $8 * SIZE, CO1 # coffset += 8
|
||||
addq $ 8 * SIZE, CO1 # coffset += 8
|
||||
decq I # i --
|
||||
jg .L1_4_11
|
||||
ALIGN_4
|
||||
@@ -1485,7 +1485,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
/*******************************************************************************************************/
|
||||
.L1_2_10:
|
||||
testq $2, M
|
||||
testq $ 2, M
|
||||
jz .L1_2_40
|
||||
|
||||
|
||||
@@ -1495,15 +1495,15 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
(defined(TRMMKERNEL) && defined(LEFT) && defined(TRANSA)) || \
|
||||
(defined(TRMMKERNEL) && !defined(LEFT) && !defined(TRANSA))
|
||||
leaq BUFFER1, BO // first buffer to BO
|
||||
addq $4 * SIZE, BO
|
||||
addq $ 4 * SIZE, BO
|
||||
#else
|
||||
movq KK, %rax
|
||||
leaq BUFFER1, BO // first buffer to BO
|
||||
addq $4 * SIZE, BO
|
||||
addq $ 4 * SIZE, BO
|
||||
movq %rax, BI // Index for BO
|
||||
leaq (,BI,2), BI // BI = BI * 2 ; number of values
|
||||
leaq (BO, BI, SIZE), BO
|
||||
salq $2, %rax // rax = rax * 4 ; number of values
|
||||
salq $ 2, %rax // rax = rax * 4 ; number of values
|
||||
leaq (AO, %rax, SIZE), AO
|
||||
#endif
|
||||
|
||||
@@ -1518,20 +1518,20 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#else
|
||||
movq KK, %rax
|
||||
#ifdef LEFT
|
||||
addq $2, %rax // number of values in AO
|
||||
addq $ 2, %rax // number of values in AO
|
||||
#else
|
||||
addq $1, %rax // number of values in BO
|
||||
addq $ 1, %rax // number of values in BO
|
||||
#endif
|
||||
movq %rax, KKK
|
||||
#endif
|
||||
|
||||
|
||||
andq $-8, %rax // K = K - ( K % 8 )
|
||||
andq $ -8, %rax // K = K - ( K % 8 )
|
||||
je .L1_2_16
|
||||
movq %rax, BI // Index for BO
|
||||
leaq ( ,BI,2), BI // BI = BI * 2 ; number of values
|
||||
|
||||
salq $2, %rax // rax = rax * 4 ; number of values
|
||||
salq $ 2, %rax // rax = rax * 4 ; number of values
|
||||
leaq (AO, %rax, SIZE), AO
|
||||
leaq (BO, BI, SIZE), BO
|
||||
negq BI
|
||||
@@ -1583,13 +1583,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
movq KKK, %rax
|
||||
#endif
|
||||
|
||||
andq $7, %rax # if (k & 1)
|
||||
andq $ 7, %rax # if (k & 1)
|
||||
je .L1_2_19
|
||||
|
||||
movq %rax, BI // Index for BO
|
||||
leaq ( ,BI,2), BI // BI = BI * 2 ; number of values
|
||||
|
||||
salq $2, %rax // rax = rax * 4 ; number of values
|
||||
salq $ 2, %rax // rax = rax * 4 ; number of values
|
||||
leaq (AO, %rax, SIZE), AO
|
||||
leaq (BO, BI, SIZE), BO
|
||||
negq BI
|
||||
@@ -1615,16 +1615,16 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
movq %rax, BI // Index for BO
|
||||
leaq ( ,BI,2), BI // BI = BI * 2 ; number of values
|
||||
leaq (BO, BI, SIZE), BO
|
||||
salq $2, %rax // rax = rax * 4 ; number of values
|
||||
salq $ 2, %rax // rax = rax * 4 ; number of values
|
||||
leaq (AO, %rax, SIZE), AO
|
||||
#endif
|
||||
|
||||
|
||||
#if defined(TRMMKERNEL) && defined(LEFT)
|
||||
addq $2, KK
|
||||
addq $ 2, KK
|
||||
#endif
|
||||
|
||||
addq $4 * SIZE, CO1 # coffset += 4
|
||||
addq $ 4 * SIZE, CO1 # coffset += 4
|
||||
|
||||
ALIGN_4
|
||||
|
||||
@@ -1633,7 +1633,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
* Rest of M
|
||||
***************************************************************************/
|
||||
.L1_2_40:
|
||||
testq $1, M
|
||||
testq $ 1, M
|
||||
jz .L999
|
||||
|
||||
ALIGN_4
|
||||
@@ -1644,15 +1644,15 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
(defined(TRMMKERNEL) && defined(LEFT) && defined(TRANSA)) || \
|
||||
(defined(TRMMKERNEL) && !defined(LEFT) && !defined(TRANSA))
|
||||
leaq BUFFER1, BO // first buffer to BO
|
||||
addq $4 * SIZE, BO
|
||||
addq $ 4 * SIZE, BO
|
||||
#else
|
||||
movq KK, %rax
|
||||
leaq BUFFER1, BO // first buffer to BO
|
||||
addq $4 * SIZE, BO
|
||||
addq $ 4 * SIZE, BO
|
||||
movq %rax, BI // Index for BO
|
||||
leaq (,BI,2), BI // BI = BI * 2 ; number of values
|
||||
leaq (BO, BI, SIZE), BO
|
||||
salq $1, %rax // rax = rax * 2 ; number of values
|
||||
salq $ 1, %rax // rax = rax * 2 ; number of values
|
||||
leaq (AO, %rax, SIZE), AO
|
||||
#endif
|
||||
|
||||
@@ -1667,20 +1667,20 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#else
|
||||
movq KK, %rax
|
||||
#ifdef LEFT
|
||||
addq $1, %rax // number of values in AO
|
||||
addq $ 1, %rax // number of values in AO
|
||||
#else
|
||||
addq $1, %rax // number of values in BO
|
||||
addq $ 1, %rax // number of values in BO
|
||||
#endif
|
||||
movq %rax, KKK
|
||||
#endif
|
||||
|
||||
|
||||
andq $-8, %rax // K = K - ( K % 8 )
|
||||
andq $ -8, %rax // K = K - ( K % 8 )
|
||||
je .L1_2_46
|
||||
movq %rax, BI // Index for BO
|
||||
leaq ( ,BI,2), BI // BI = BI * 2 ; number of values
|
||||
|
||||
salq $1, %rax // rax = rax * 2 ; number of values
|
||||
salq $ 1, %rax // rax = rax * 2 ; number of values
|
||||
leaq (AO, %rax, SIZE), AO
|
||||
leaq (BO, BI, SIZE), BO
|
||||
negq BI
|
||||
@@ -1731,13 +1731,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
movq KKK, %rax
|
||||
#endif
|
||||
|
||||
andq $7, %rax # if (k & 1)
|
||||
andq $ 7, %rax # if (k & 1)
|
||||
je .L1_2_49
|
||||
|
||||
movq %rax, BI // Index for BO
|
||||
leaq ( ,BI,2), BI // BI = BI * 2 ; number of values
|
||||
|
||||
salq $1, %rax // rax = rax * 2 ; number of values
|
||||
salq $ 1, %rax // rax = rax * 2 ; number of values
|
||||
leaq (AO, %rax, SIZE), AO
|
||||
leaq (BO, BI, SIZE), BO
|
||||
negq BI
|
||||
@@ -1763,16 +1763,16 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
movq %rax, BI // Index for BO
|
||||
leaq ( ,BI,2), BI // BI = BI * 2 ; number of values
|
||||
leaq (BO, BI, SIZE), BO
|
||||
salq $1, %rax // rax = rax * 2 ; number of values
|
||||
salq $ 1, %rax // rax = rax * 2 ; number of values
|
||||
leaq (AO, %rax, SIZE), AO
|
||||
#endif
|
||||
|
||||
|
||||
#if defined(TRMMKERNEL) && defined(LEFT)
|
||||
addq $1, KK
|
||||
addq $ 1, KK
|
||||
#endif
|
||||
|
||||
addq $2 * SIZE, CO1 # coffset += 2
|
||||
addq $ 2 * SIZE, CO1 # coffset += 2
|
||||
decq I # i --
|
||||
jg .L1_2_41
|
||||
ALIGN_4
|
||||
@@ -1806,7 +1806,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
movups 208(%rsp), %xmm15
|
||||
#endif
|
||||
|
||||
addq $STACKSIZE, %rsp
|
||||
addq $ STACKSIZE, %rsp
|
||||
ret
|
||||
|
||||
EPILOGUE
|
||||
|
||||
19
lapack-devel.log
Normal file
19
lapack-devel.log
Normal file
@@ -0,0 +1,19 @@
|
||||
========================================================================================
|
||||
2014/05/07 Saar
|
||||
|
||||
Platform: BULLDOZER single thread
|
||||
|
||||
|
||||
--> LAPACK TESTING SUMMARY <--
|
||||
Processing LAPACK Testing output found in the TESTING direcory
|
||||
SUMMARY nb test run numerical error other error
|
||||
================ =========== ================= ================
|
||||
REAL 1079349 0 (0.000%) 0 (0.000%)
|
||||
DOUBLE PRECISION 1080161 0 (0.000%) 0 (0.000%)
|
||||
COMPLEX 556022 0 (0.000%) 0 (0.000%)
|
||||
COMPLEX16 556834 0 (0.000%) 0 (0.000%)
|
||||
|
||||
--> ALL PRECISIONS 3272366 0 (0.000%) 0 (0.000%)
|
||||
|
||||
========================================================================================
|
||||
|
||||
@@ -56,7 +56,7 @@ include ../make.inc
|
||||
|
||||
ALLAUX = ilaenv.o ieeeck.o lsamen.o xerbla_array.o iparmq.o \
|
||||
ilaprec.o ilatrans.o ilauplo.o iladiag.o chla_transtype.o \
|
||||
../INSTALL/ilaver.o
|
||||
../INSTALL/ilaver.o ../INSTALL/slamch.o
|
||||
|
||||
SCLAUX = \
|
||||
sbdsdc.o \
|
||||
@@ -92,7 +92,7 @@ DZLAUX = \
|
||||
dlasr.o dlasrt.o dlassq.o dlasv2.o dpttrf.o dstebz.o dstedc.o \
|
||||
dsteqr.o dsterf.o dlaisnan.o disnan.o \
|
||||
dlartgp.o dlartgs.o \
|
||||
../INSTALL/dsecnd_$(TIMER).o
|
||||
../INSTALL/dlamch.o ../INSTALL/dsecnd_$(TIMER).o
|
||||
|
||||
SLASRC = \
|
||||
sgbbrd.o sgbcon.o sgbequ.o sgbrfs.o sgbsv.o \
|
||||
@@ -101,7 +101,7 @@ SLASRC = \
|
||||
sgegs.o sgegv.o sgehd2.o sgehrd.o sgelq2.o sgelqf.o \
|
||||
sgels.o sgelsd.o sgelss.o sgelsx.o sgelsy.o sgeql2.o sgeqlf.o \
|
||||
sgeqp3.o sgeqpf.o sgeqr2.o sgeqr2p.o sgeqrf.o sgeqrfp.o sgerfs.o \
|
||||
sgerq2.o sgerqf.o sgesc2.o sgesdd.o sgesv.o sgesvd.o sgesvx.o \
|
||||
sgerq2.o sgerqf.o sgesc2.o sgesdd.o sgesvd.o sgesvx.o \
|
||||
sgetc2.o sgetri.o \
|
||||
sggbak.o sggbal.o sgges.o sggesx.o sggev.o sggevx.o \
|
||||
sggglm.o sgghrd.o sgglse.o sggqrf.o \
|
||||
@@ -127,7 +127,7 @@ SLASRC = \
|
||||
sormr3.o sormrq.o sormrz.o sormtr.o spbcon.o spbequ.o spbrfs.o \
|
||||
spbstf.o spbsv.o spbsvx.o \
|
||||
spbtf2.o spbtrf.o spbtrs.o spocon.o spoequ.o sporfs.o sposv.o \
|
||||
sposvx.o spotri.o spstrf.o spstf2.o \
|
||||
sposvx.o spstrf.o spstf2.o \
|
||||
sppcon.o sppequ.o \
|
||||
spprfs.o sppsv.o sppsvx.o spptrf.o spptri.o spptrs.o sptcon.o \
|
||||
spteqr.o sptrfs.o sptsv.o sptsvx.o spttrs.o sptts2.o srscl.o \
|
||||
@@ -157,7 +157,7 @@ SLASRC = \
|
||||
sgeqrt.o sgeqrt2.o sgeqrt3.o sgemqrt.o \
|
||||
stpqrt.o stpqrt2.o stpmqrt.o stprfb.o
|
||||
|
||||
DSLASRC = spotrs.o
|
||||
DSLASRC = spotrs.o
|
||||
|
||||
ifdef USEXBLAS
|
||||
SXLASRC = sgesvxx.o sgerfsx.o sla_gerfsx_extended.o sla_geamv.o \
|
||||
@@ -176,7 +176,7 @@ CLASRC = \
|
||||
cgegs.o cgegv.o cgehd2.o cgehrd.o cgelq2.o cgelqf.o \
|
||||
cgels.o cgelsd.o cgelss.o cgelsx.o cgelsy.o cgeql2.o cgeqlf.o cgeqp3.o \
|
||||
cgeqpf.o cgeqr2.o cgeqr2p.o cgeqrf.o cgeqrfp.o cgerfs.o \
|
||||
cgerq2.o cgerqf.o cgesc2.o cgesdd.o cgesv.o cgesvd.o \
|
||||
cgerq2.o cgerqf.o cgesc2.o cgesdd.o cgesvd.o \
|
||||
cgesvx.o cgetc2.o cgetri.o \
|
||||
cggbak.o cggbal.o cgges.o cggesx.o cggev.o cggevx.o cggglm.o \
|
||||
cgghrd.o cgglse.o cggqrf.o cggrqf.o \
|
||||
@@ -210,14 +210,14 @@ CLASRC = \
|
||||
clasyf.o clasyf_rook.o clatbs.o clatdf.o clatps.o clatrd.o clatrs.o clatrz.o \
|
||||
clatzm.o cpbcon.o cpbequ.o cpbrfs.o cpbstf.o cpbsv.o \
|
||||
cpbsvx.o cpbtf2.o cpbtrf.o cpbtrs.o cpocon.o cpoequ.o cporfs.o \
|
||||
cposv.o cposvx.o cpotri.o cpstrf.o cpstf2.o \
|
||||
cposv.o cposvx.o cpstrf.o cpstf2.o \
|
||||
cppcon.o cppequ.o cpprfs.o cppsv.o cppsvx.o cpptrf.o cpptri.o cpptrs.o \
|
||||
cptcon.o cpteqr.o cptrfs.o cptsv.o cptsvx.o cpttrf.o cpttrs.o cptts2.o \
|
||||
crot.o cspcon.o cspmv.o cspr.o csprfs.o cspsv.o \
|
||||
crot.o cspcon.o csprfs.o cspsv.o \
|
||||
cspsvx.o csptrf.o csptri.o csptrs.o csrscl.o cstedc.o \
|
||||
cstegr.o cstein.o csteqr.o \
|
||||
csycon.o csymv.o \
|
||||
csyr.o csyrfs.o csysv.o csysvx.o csytf2.o csytrf.o csytri.o csytri2.o csytri2x.o \
|
||||
csycon.o \
|
||||
csyrfs.o csysv.o csysvx.o csytf2.o csytrf.o csytri.o csytri2.o csytri2x.o \
|
||||
csyswapr.o csytrs.o csytrs2.o csyconv.o \
|
||||
csytf2_rook.o csytrf_rook.o csytrs_rook.o \
|
||||
csytri_rook.o csycon_rook.o csysv_rook.o \
|
||||
@@ -252,7 +252,7 @@ CXLASRC = cgesvxx.o cgerfsx.o cla_gerfsx_extended.o cla_geamv.o \
|
||||
cla_lin_berr.o clarscl2.o clascl2.o cla_wwaddw.o
|
||||
endif
|
||||
|
||||
ZCLASRC = cpotrs.o
|
||||
ZCLASRC = cpotrs.o
|
||||
|
||||
DLASRC = \
|
||||
dgbbrd.o dgbcon.o dgbequ.o dgbrfs.o dgbsv.o \
|
||||
@@ -261,7 +261,7 @@ DLASRC = \
|
||||
dgegs.o dgegv.o dgehd2.o dgehrd.o dgelq2.o dgelqf.o \
|
||||
dgels.o dgelsd.o dgelss.o dgelsx.o dgelsy.o dgeql2.o dgeqlf.o \
|
||||
dgeqp3.o dgeqpf.o dgeqr2.o dgeqr2p.o dgeqrf.o dgeqrfp.o dgerfs.o \
|
||||
dgerq2.o dgerqf.o dgesc2.o dgesdd.o dgesv.o dgesvd.o dgesvx.o \
|
||||
dgerq2.o dgerqf.o dgesc2.o dgesdd.o dgesvd.o dgesvx.o \
|
||||
dgetc2.o dgetri.o \
|
||||
dggbak.o dggbal.o dgges.o dggesx.o dggev.o dggevx.o \
|
||||
dggglm.o dgghrd.o dgglse.o dggqrf.o \
|
||||
@@ -287,7 +287,7 @@ DLASRC = \
|
||||
dormr3.o dormrq.o dormrz.o dormtr.o dpbcon.o dpbequ.o dpbrfs.o \
|
||||
dpbstf.o dpbsv.o dpbsvx.o \
|
||||
dpbtf2.o dpbtrf.o dpbtrs.o dpocon.o dpoequ.o dporfs.o dposv.o \
|
||||
dposvx.o dpotri.o dpotrs.o dpstrf.o dpstf2.o \
|
||||
dposvx.o dpotrs.o dpstrf.o dpstf2.o \
|
||||
dppcon.o dppequ.o \
|
||||
dpprfs.o dppsv.o dppsvx.o dpptrf.o dpptri.o dpptrs.o dptcon.o \
|
||||
dpteqr.o dptrfs.o dptsv.o dptsvx.o dpttrs.o dptts2.o drscl.o \
|
||||
@@ -335,8 +335,8 @@ ZLASRC = \
|
||||
zgegs.o zgegv.o zgehd2.o zgehrd.o zgelq2.o zgelqf.o \
|
||||
zgels.o zgelsd.o zgelss.o zgelsx.o zgelsy.o zgeql2.o zgeqlf.o zgeqp3.o \
|
||||
zgeqpf.o zgeqr2.o zgeqr2p.o zgeqrf.o zgeqrfp.o zgerfs.o zgerq2.o zgerqf.o \
|
||||
zgesc2.o zgesdd.o zgesv.o zgesvd.o zgesvx.o zgetc2.o \
|
||||
zgetri.o \
|
||||
zgesc2.o zgesdd.o zgesvd.o zgesvx.o zgetc2.o \
|
||||
zgetri.o \
|
||||
zggbak.o zggbal.o zgges.o zggesx.o zggev.o zggevx.o zggglm.o \
|
||||
zgghrd.o zgglse.o zggqrf.o zggrqf.o \
|
||||
zggsvd.o zggsvp.o \
|
||||
@@ -369,17 +369,17 @@ ZLASRC = \
|
||||
zlarfx.o zlargv.o zlarnv.o zlarrv.o zlartg.o zlartv.o \
|
||||
zlarz.o zlarzb.o zlarzt.o zlascl.o zlaset.o zlasr.o \
|
||||
zlassq.o zlasyf.o zlasyf_rook.o \
|
||||
zlatbs.o zlatdf.o zlatps.o zlatrd.o zlatrs.o zlatrz.o zlatzm.o zlauu2.o \
|
||||
zlatbs.o zlatdf.o zlatps.o zlatrd.o zlatrs.o zlatrz.o zlatzm.o \
|
||||
zpbcon.o zpbequ.o zpbrfs.o zpbstf.o zpbsv.o \
|
||||
zpbsvx.o zpbtf2.o zpbtrf.o zpbtrs.o zpocon.o zpoequ.o zporfs.o \
|
||||
zposv.o zposvx.o zpotri.o zpotrs.o zpstrf.o zpstf2.o \
|
||||
zposv.o zposvx.o zpotrs.o zpstrf.o zpstf2.o \
|
||||
zppcon.o zppequ.o zpprfs.o zppsv.o zppsvx.o zpptrf.o zpptri.o zpptrs.o \
|
||||
zptcon.o zpteqr.o zptrfs.o zptsv.o zptsvx.o zpttrf.o zpttrs.o zptts2.o \
|
||||
zrot.o zspcon.o zspmv.o zspr.o zsprfs.o zspsv.o \
|
||||
zrot.o zspcon.o zsprfs.o zspsv.o \
|
||||
zspsvx.o zsptrf.o zsptri.o zsptrs.o zdrscl.o zstedc.o \
|
||||
zstegr.o zstein.o zsteqr.o \
|
||||
zsycon.o zsymv.o \
|
||||
zsyr.o zsyrfs.o zsysv.o zsysvx.o zsytf2.o zsytrf.o zsytri.o zsytri2.o zsytri2x.o \
|
||||
zsycon.o \
|
||||
zsyrfs.o zsysv.o zsysvx.o zsytf2.o zsytrf.o zsytri.o zsytri2.o zsytri2x.o \
|
||||
zsyswapr.o zsytrs.o zsytrs2.o zsyconv.o \
|
||||
zsytf2_rook.o zsytrf_rook.o zsytrs_rook.o \
|
||||
zsytri_rook.o zsycon_rook.o zsysv_rook.o \
|
||||
@@ -417,8 +417,6 @@ endif
|
||||
ALLOBJ = $(SLASRC) $(DLASRC) $(DSLASRC) $(CLASRC) $(ZLASRC) $(ZCLASRC) \
|
||||
$(SCLAUX) $(DZLAUX) $(ALLAUX)
|
||||
|
||||
ALLOBJ_P = $(ALLOBJ:.o=.$(PSUFFIX))
|
||||
|
||||
ifdef USEXBLAS
|
||||
ALLXOBJ = $(SXLASRC) $(DXLASRC) $(CXLASRC) $(ZXLASRC)
|
||||
endif
|
||||
@@ -435,6 +433,7 @@ lapacklib: $(ALLOBJ) $(ALLXOBJ)
|
||||
$(ARCH) $(ARCHFLAGS) $@ $(ALLOBJ_P)
|
||||
$(RANLIB) $@
|
||||
|
||||
|
||||
single: $(SLASRC) $(DSLASRC) $(SXLASRC) $(SCLAUX) $(ALLAUX)
|
||||
$(ARCH) $(ARCHFLAGS) ../$(LAPACKLIB) $(SLASRC) $(DSLASRC) \
|
||||
$(SXLASRC) $(SCLAUX) $(ALLAUX) $(ALLXAUX)
|
||||
@@ -483,16 +482,11 @@ clean:
|
||||
%.$(PSUFFIX): %.f
|
||||
$(FORTRAN) $(POPTS) -c $< -o $@
|
||||
|
||||
slaruv.o: slaruv.f ; $(FORTRAN) $(NOOPT) -O0 -c $< -o $@
|
||||
dlaruv.o: dlaruv.f ; $(FORTRAN) $(NOOPT) -O0 -c $< -o $@
|
||||
sla_wwaddw.o: sla_wwaddw.f ; $(FORTRAN) $(NOOPT) -O0 -c $< -o $@
|
||||
dla_wwaddw.o: dla_wwaddw.f ; $(FORTRAN) $(NOOPT) -O0 -c $< -o $@
|
||||
cla_wwaddw.o: cla_wwaddw.f ; $(FORTRAN) $(NOOPT) -O0 -c $< -o $@
|
||||
zla_wwaddw.o: zla_wwaddw.f ; $(FORTRAN) $(NOOPT) -O0 -c $< -o $@
|
||||
|
||||
slaruv.$(PSUFFIX): slaruv.f ; $(FORTRAN) $(PNOOPT) -O0 -c $< -o $@
|
||||
dlaruv.$(PSUFFIX): dlaruv.f ; $(FORTRAN) $(PNOOPT) -O0 -c $< -o $@
|
||||
sla_wwaddw.$(PSUFFIX): sla_wwaddw.f ; $(FORTRAN) $(PNOOPT) -O0 -c $< -o $@
|
||||
dla_wwaddw.$(PSUFFIX): dla_wwaddw.f ; $(FORTRAN) $(PNOOPT) -O0 -c $< -o $@
|
||||
cla_wwaddw.$(PSUFFIX): cla_wwaddw.f ; $(FORTRAN) $(PNOOPT) -O0 -c $< -o $@
|
||||
zla_wwaddw.$(PSUFFIX): zla_wwaddw.f ; $(FORTRAN) $(PNOOPT) -O0 -c $< -o $@
|
||||
slaruv.o: slaruv.f ; $(FORTRAN) $(NOOPT) -c $< -o $@
|
||||
dlaruv.o: dlaruv.f ; $(FORTRAN) $(NOOPT) -c $< -o $@
|
||||
sla_wwaddw.o: sla_wwaddw.f ; $(FORTRAN) $(NOOPT) -c $< -o $@
|
||||
dla_wwaddw.o: dla_wwaddw.f ; $(FORTRAN) $(NOOPT) -c $< -o $@
|
||||
cla_wwaddw.o: cla_wwaddw.f ; $(FORTRAN) $(NOOPT) -c $< -o $@
|
||||
zla_wwaddw.o: zla_wwaddw.f ; $(FORTRAN) $(NOOPT) -c $< -o $@
|
||||
|
||||
|
||||
@@ -5,5 +5,5 @@ Data file for testing COMPLEX LAPACK linear equation routines RFP format
|
||||
1 2 15 Values of NRHS (number of right hand sides)
|
||||
9 Number of matrix types (list types on next line if 0 < NTYPES < 9)
|
||||
1 2 3 4 5 6 7 8 9 Matrix Types
|
||||
30.0 Threshold value of test ratio
|
||||
50.0 Threshold value of test ratio
|
||||
T Put T to test the error exits
|
||||
|
||||
@@ -5,7 +5,7 @@ SEP: Data file for testing Symmetric Eigenvalue Problem routines
|
||||
1 3 3 3 10 Values of NB (blocksize)
|
||||
2 2 2 2 2 Values of NBMIN (minimum blocksize)
|
||||
1 0 5 9 1 Values of NX (crossover point)
|
||||
50.0 Threshold value
|
||||
60.0 Threshold value
|
||||
T Put T to test the LAPACK routines
|
||||
T Put T to test the driver routines
|
||||
T Put T to test the error exits
|
||||
|
||||
@@ -7,7 +7,7 @@ SVD: Data file for testing Singular Value Decomposition routines
|
||||
2 2 2 2 2 Values of NBMIN (minimum blocksize)
|
||||
1 0 5 9 1 Values of NX (crossover point)
|
||||
2 0 2 2 2 Values of NRHS
|
||||
50.0 Threshold value
|
||||
54.0 Threshold value
|
||||
T Put T to test the LAPACK routines
|
||||
T Put T to test the driver routines
|
||||
T Put T to test the error exits
|
||||
|
||||
@@ -2072,9 +2072,9 @@ SOBJ_FILES := $(SSRC_OBJ)
|
||||
DOBJ_FILES := $(DSRC_OBJ)
|
||||
ZOBJ_FILES := $(ZSRC_OBJ)
|
||||
|
||||
ifdef LAPACKE_TESTING
|
||||
# ifdef LAPACKE_TESTING
|
||||
ZOBJ_FILES += $(MATGEN_OBJ)
|
||||
endif
|
||||
#endif
|
||||
|
||||
ALLOBJ = $(COBJ_FILES) $(DOBJ_FILES) $(SOBJ_FILES) $(ZOBJ_FILES) $(OBJ_FILES)
|
||||
|
||||
@@ -2093,7 +2093,9 @@ all: ../../$(LAPACKELIB)
|
||||
$(ARCH) $(ARCHFLAGS) ../../$(LAPACKELIB) $(DOBJ_FILES)
|
||||
$(ARCH) $(ARCHFLAGS) ../../$(LAPACKELIB) $(SOBJ_FILES)
|
||||
$(ARCH) $(ARCHFLAGS) ../../$(LAPACKELIB) $(ZOBJ_FILES)
|
||||
ifdef USEXBLAS
|
||||
$(ARCH) $(ARCHFLAGS) ../../$(LAPACKELIB) $(ALLXOBJ)
|
||||
endif
|
||||
$(RANLIB) ../../$(LAPACKELIB)
|
||||
|
||||
.c.o:
|
||||
|
||||
@@ -1,7 +1,8 @@
|
||||
TOPDIR = ..
|
||||
include ../Makefile.system
|
||||
|
||||
SUBDIRS = laswp getf2 getrf potf2 potrf lauu2 lauum trti2 trtri getrs
|
||||
#SUBDIRS = laswp getf2 getrf potf2 potrf lauu2 lauum trti2 trtri getrs
|
||||
SUBDIRS = getrf getf2 laswp getrs potrf potf2 lauu2 lauum trti2 trtri
|
||||
|
||||
FLAMEDIRS = laswp getf2 potf2 lauu2 trti2
|
||||
|
||||
|
||||
@@ -1,194 +0,0 @@
|
||||
SUBROUTINE CGETRI( N, A, LDA, IPIV, WORK, LWORK, INFO )
|
||||
*
|
||||
* -- LAPACK routine (version 3.0) --
|
||||
* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
|
||||
* Courant Institute, Argonne National Lab, and Rice University
|
||||
* June 30, 1999
|
||||
*
|
||||
* .. Scalar Arguments ..
|
||||
INTEGER INFO, LDA, LWORK, N
|
||||
* ..
|
||||
* .. Array Arguments ..
|
||||
INTEGER IPIV( * )
|
||||
COMPLEX A( LDA, * ), WORK( * )
|
||||
* ..
|
||||
*
|
||||
* Purpose
|
||||
* =======
|
||||
*
|
||||
* CGETRI computes the inverse of a matrix using the LU factorization
|
||||
* computed by CGETRF.
|
||||
*
|
||||
* This method inverts U and then computes inv(A) by solving the system
|
||||
* inv(A)*L = inv(U) for inv(A).
|
||||
*
|
||||
* Arguments
|
||||
* =========
|
||||
*
|
||||
* N (input) INTEGER
|
||||
* The order of the matrix A. N >= 0.
|
||||
*
|
||||
* A (input/output) COMPLEX array, dimension (LDA,N)
|
||||
* On entry, the factors L and U from the factorization
|
||||
* A = P*L*U as computed by CGETRF.
|
||||
* On exit, if INFO = 0, the inverse of the original matrix A.
|
||||
*
|
||||
* LDA (input) INTEGER
|
||||
* The leading dimension of the array A. LDA >= max(1,N).
|
||||
*
|
||||
* IPIV (input) INTEGER array, dimension (N)
|
||||
* The pivot indices from CGETRF; for 1<=i<=N, row i of the
|
||||
* matrix was interchanged with row IPIV(i).
|
||||
*
|
||||
* WORK (workspace/output) COMPLEX array, dimension (LWORK)
|
||||
* On exit, if INFO=0, then WORK(1) returns the optimal LWORK.
|
||||
*
|
||||
* LWORK (input) INTEGER
|
||||
* The dimension of the array WORK. LWORK >= max(1,N).
|
||||
* For optimal performance LWORK >= N*NB, where NB is
|
||||
* the optimal blocksize returned by ILAENV.
|
||||
*
|
||||
* If LWORK = -1, then a workspace query is assumed; the routine
|
||||
* only calculates the optimal size of the WORK array, returns
|
||||
* this value as the first entry of the WORK array, and no error
|
||||
* message related to LWORK is issued by XERBLA.
|
||||
*
|
||||
* INFO (output) INTEGER
|
||||
* = 0: successful exit
|
||||
* < 0: if INFO = -i, the i-th argument had an illegal value
|
||||
* > 0: if INFO = i, U(i,i) is exactly zero; the matrix is
|
||||
* singular and its inverse could not be computed.
|
||||
*
|
||||
* =====================================================================
|
||||
*
|
||||
* .. Parameters ..
|
||||
COMPLEX ZERO, ONE
|
||||
PARAMETER ( ZERO = ( 0.0E+0, 0.0E+0 ),
|
||||
$ ONE = ( 1.0E+0, 0.0E+0 ) )
|
||||
* ..
|
||||
* .. Local Scalars ..
|
||||
LOGICAL LQUERY
|
||||
INTEGER I, IWS, J, JB, JJ, JP, LDWORK, LWKOPT, NB,
|
||||
$ NBMIN, NN
|
||||
* ..
|
||||
* .. External Functions ..
|
||||
INTEGER ILAENV
|
||||
EXTERNAL ILAENV
|
||||
* ..
|
||||
* .. External Subroutines ..
|
||||
EXTERNAL CGEMM, CGEMV, CSWAP, CTRSM, CTRTRI, XERBLA
|
||||
* ..
|
||||
* .. Intrinsic Functions ..
|
||||
INTRINSIC MAX, MIN
|
||||
* ..
|
||||
* .. Executable Statements ..
|
||||
*
|
||||
* Test the input parameters.
|
||||
*
|
||||
INFO = 0
|
||||
NB = ILAENV( 1, 'CGETRI', ' ', N, -1, -1, -1 )
|
||||
LWKOPT = N*NB
|
||||
WORK( 1 ) = LWKOPT
|
||||
LQUERY = ( LWORK.EQ.-1 )
|
||||
IF( N.LT.0 ) THEN
|
||||
INFO = -1
|
||||
ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
|
||||
INFO = -3
|
||||
ELSE IF( LWORK.LT.MAX( 1, N ) .AND. .NOT.LQUERY ) THEN
|
||||
INFO = -6
|
||||
END IF
|
||||
IF( INFO.NE.0 ) THEN
|
||||
CALL XERBLA( 'CGETRI', -INFO )
|
||||
RETURN
|
||||
ELSE IF( LQUERY ) THEN
|
||||
RETURN
|
||||
END IF
|
||||
*
|
||||
* Quick return if possible
|
||||
*
|
||||
IF( N.EQ.0 )
|
||||
$ RETURN
|
||||
*
|
||||
* Form inv(U). If INFO > 0 from CTRTRI, then U is singular,
|
||||
* and the inverse is not computed.
|
||||
*
|
||||
CALL CTRTRI( 'Upper', 'Non-unit', N, A, LDA, INFO )
|
||||
IF( INFO.GT.0 )
|
||||
$ RETURN
|
||||
*
|
||||
NBMIN = 2
|
||||
LDWORK = N
|
||||
IF( NB.GT.1 .AND. NB.LT.N ) THEN
|
||||
IWS = MAX( LDWORK*NB, 1 )
|
||||
IF( LWORK.LT.IWS ) THEN
|
||||
NB = LWORK / LDWORK
|
||||
NBMIN = MAX( 2, ILAENV( 2, 'CGETRI', ' ', N, -1, -1, -1 ) )
|
||||
END IF
|
||||
ELSE
|
||||
IWS = N
|
||||
END IF
|
||||
*
|
||||
* Solve the equation inv(A)*L = inv(U) for inv(A).
|
||||
*
|
||||
IF( NB.LT.NBMIN .OR. NB.GE.N ) THEN
|
||||
*
|
||||
* Use unblocked code.
|
||||
*
|
||||
DO 20 J = N, 1, -1
|
||||
*
|
||||
* Copy current column of L to WORK and replace with zeros.
|
||||
*
|
||||
DO 10 I = J + 1, N
|
||||
WORK( I ) = A( I, J )
|
||||
A( I, J ) = ZERO
|
||||
10 CONTINUE
|
||||
*
|
||||
* Compute current column of inv(A).
|
||||
*
|
||||
IF( J.LT.N )
|
||||
$ CALL CGEMV( 'No transpose', N, N-J, -ONE, A( 1, J+1 ),
|
||||
$ LDA, WORK( J+1 ), 1, ONE, A( 1, J ), 1 )
|
||||
20 CONTINUE
|
||||
ELSE
|
||||
*
|
||||
* Use blocked code.
|
||||
*
|
||||
NN = ( ( N-1 ) / NB )*NB + 1
|
||||
DO 50 J = NN, 1, -NB
|
||||
JB = MIN( NB, N-J+1 )
|
||||
*
|
||||
* Copy current block column of L to WORK and replace with
|
||||
* zeros.
|
||||
*
|
||||
DO 40 JJ = J, J + JB - 1
|
||||
DO 30 I = JJ + 1, N
|
||||
WORK( I+( JJ-J )*LDWORK ) = A( I, JJ )
|
||||
A( I, JJ ) = ZERO
|
||||
30 CONTINUE
|
||||
40 CONTINUE
|
||||
*
|
||||
* Compute current block column of inv(A).
|
||||
*
|
||||
IF( J+JB.LE.N )
|
||||
$ CALL CGEMM( 'No transpose', 'No transpose', N, JB,
|
||||
$ N-J-JB+1, -ONE, A( 1, J+JB ), LDA,
|
||||
$ WORK( J+JB ), LDWORK, ONE, A( 1, J ), LDA )
|
||||
CALL CTRSM( 'Right', 'Lower', 'No transpose', 'Unit', N, JB,
|
||||
$ ONE, WORK( J ), LDWORK, A( 1, J ), LDA )
|
||||
50 CONTINUE
|
||||
END IF
|
||||
*
|
||||
* Apply column interchanges.
|
||||
*
|
||||
DO 60 J = N - 1, 1, -1
|
||||
JP = IPIV( J )
|
||||
IF( JP.NE.J )
|
||||
$ CALL CSWAP( N, A( 1, J ), 1, A( 1, JP ), 1 )
|
||||
60 CONTINUE
|
||||
*
|
||||
WORK( 1 ) = IWS
|
||||
RETURN
|
||||
*
|
||||
* End of CGETRI
|
||||
*
|
||||
END
|
||||
@@ -1,193 +0,0 @@
|
||||
SUBROUTINE DGETRI( N, A, LDA, IPIV, WORK, LWORK, INFO )
|
||||
*
|
||||
* -- LAPACK routine (version 3.0) --
|
||||
* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
|
||||
* Courant Institute, Argonne National Lab, and Rice University
|
||||
* June 30, 1999
|
||||
*
|
||||
* .. Scalar Arguments ..
|
||||
INTEGER INFO, LDA, LWORK, N
|
||||
* ..
|
||||
* .. Array Arguments ..
|
||||
INTEGER IPIV( * )
|
||||
DOUBLE PRECISION A( LDA, * ), WORK( * )
|
||||
* ..
|
||||
*
|
||||
* Purpose
|
||||
* =======
|
||||
*
|
||||
* DGETRI computes the inverse of a matrix using the LU factorization
|
||||
* computed by DGETRF.
|
||||
*
|
||||
* This method inverts U and then computes inv(A) by solving the system
|
||||
* inv(A)*L = inv(U) for inv(A).
|
||||
*
|
||||
* Arguments
|
||||
* =========
|
||||
*
|
||||
* N (input) INTEGER
|
||||
* The order of the matrix A. N >= 0.
|
||||
*
|
||||
* A (input/output) DOUBLE PRECISION array, dimension (LDA,N)
|
||||
* On entry, the factors L and U from the factorization
|
||||
* A = P*L*U as computed by DGETRF.
|
||||
* On exit, if INFO = 0, the inverse of the original matrix A.
|
||||
*
|
||||
* LDA (input) INTEGER
|
||||
* The leading dimension of the array A. LDA >= max(1,N).
|
||||
*
|
||||
* IPIV (input) INTEGER array, dimension (N)
|
||||
* The pivot indices from DGETRF; for 1<=i<=N, row i of the
|
||||
* matrix was interchanged with row IPIV(i).
|
||||
*
|
||||
* WORK (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
|
||||
* On exit, if INFO=0, then WORK(1) returns the optimal LWORK.
|
||||
*
|
||||
* LWORK (input) INTEGER
|
||||
* The dimension of the array WORK. LWORK >= max(1,N).
|
||||
* For optimal performance LWORK >= N*NB, where NB is
|
||||
* the optimal blocksize returned by ILAENV.
|
||||
*
|
||||
* If LWORK = -1, then a workspace query is assumed; the routine
|
||||
* only calculates the optimal size of the WORK array, returns
|
||||
* this value as the first entry of the WORK array, and no error
|
||||
* message related to LWORK is issued by XERBLA.
|
||||
*
|
||||
* INFO (output) INTEGER
|
||||
* = 0: successful exit
|
||||
* < 0: if INFO = -i, the i-th argument had an illegal value
|
||||
* > 0: if INFO = i, U(i,i) is exactly zero; the matrix is
|
||||
* singular and its inverse could not be computed.
|
||||
*
|
||||
* =====================================================================
|
||||
*
|
||||
* .. Parameters ..
|
||||
DOUBLE PRECISION ZERO, ONE
|
||||
PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 )
|
||||
* ..
|
||||
* .. Local Scalars ..
|
||||
LOGICAL LQUERY
|
||||
INTEGER I, IWS, J, JB, JJ, JP, LDWORK, LWKOPT, NB,
|
||||
$ NBMIN, NN
|
||||
* ..
|
||||
* .. External Functions ..
|
||||
INTEGER ILAENV
|
||||
EXTERNAL ILAENV
|
||||
* ..
|
||||
* .. External Subroutines ..
|
||||
EXTERNAL DGEMM, DGEMV, DSWAP, DTRSM, DTRTRI, XERBLA
|
||||
* ..
|
||||
* .. Intrinsic Functions ..
|
||||
INTRINSIC MAX, MIN
|
||||
* ..
|
||||
* .. Executable Statements ..
|
||||
*
|
||||
* Test the input parameters.
|
||||
*
|
||||
INFO = 0
|
||||
NB = ILAENV( 1, 'DGETRI', ' ', N, -1, -1, -1 )
|
||||
LWKOPT = N*NB
|
||||
WORK( 1 ) = LWKOPT
|
||||
LQUERY = ( LWORK.EQ.-1 )
|
||||
IF( N.LT.0 ) THEN
|
||||
INFO = -1
|
||||
ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
|
||||
INFO = -3
|
||||
ELSE IF( LWORK.LT.MAX( 1, N ) .AND. .NOT.LQUERY ) THEN
|
||||
INFO = -6
|
||||
END IF
|
||||
IF( INFO.NE.0 ) THEN
|
||||
CALL XERBLA( 'DGETRI', -INFO )
|
||||
RETURN
|
||||
ELSE IF( LQUERY ) THEN
|
||||
RETURN
|
||||
END IF
|
||||
*
|
||||
* Quick return if possible
|
||||
*
|
||||
IF( N.EQ.0 )
|
||||
$ RETURN
|
||||
*
|
||||
* Form inv(U). If INFO > 0 from DTRTRI, then U is singular,
|
||||
* and the inverse is not computed.
|
||||
*
|
||||
CALL DTRTRI( 'Upper', 'Non-unit', N, A, LDA, INFO )
|
||||
IF( INFO.GT.0 )
|
||||
$ RETURN
|
||||
*
|
||||
NBMIN = 2
|
||||
LDWORK = N
|
||||
IF( NB.GT.1 .AND. NB.LT.N ) THEN
|
||||
IWS = MAX( LDWORK*NB, 1 )
|
||||
IF( LWORK.LT.IWS ) THEN
|
||||
NB = LWORK / LDWORK
|
||||
NBMIN = MAX( 2, ILAENV( 2, 'DGETRI', ' ', N, -1, -1, -1 ) )
|
||||
END IF
|
||||
ELSE
|
||||
IWS = N
|
||||
END IF
|
||||
*
|
||||
* Solve the equation inv(A)*L = inv(U) for inv(A).
|
||||
*
|
||||
IF( NB.LT.NBMIN .OR. NB.GE.N ) THEN
|
||||
*
|
||||
* Use unblocked code.
|
||||
*
|
||||
DO 20 J = N, 1, -1
|
||||
*
|
||||
* Copy current column of L to WORK and replace with zeros.
|
||||
*
|
||||
DO 10 I = J + 1, N
|
||||
WORK( I ) = A( I, J )
|
||||
A( I, J ) = ZERO
|
||||
10 CONTINUE
|
||||
*
|
||||
* Compute current column of inv(A).
|
||||
*
|
||||
IF( J.LT.N )
|
||||
$ CALL DGEMV( 'No transpose', N, N-J, -ONE, A( 1, J+1 ),
|
||||
$ LDA, WORK( J+1 ), 1, ONE, A( 1, J ), 1 )
|
||||
20 CONTINUE
|
||||
ELSE
|
||||
*
|
||||
* Use blocked code.
|
||||
*
|
||||
NN = ( ( N-1 ) / NB )*NB + 1
|
||||
DO 50 J = NN, 1, -NB
|
||||
JB = MIN( NB, N-J+1 )
|
||||
*
|
||||
* Copy current block column of L to WORK and replace with
|
||||
* zeros.
|
||||
*
|
||||
DO 40 JJ = J, J + JB - 1
|
||||
DO 30 I = JJ + 1, N
|
||||
WORK( I+( JJ-J )*LDWORK ) = A( I, JJ )
|
||||
A( I, JJ ) = ZERO
|
||||
30 CONTINUE
|
||||
40 CONTINUE
|
||||
*
|
||||
* Compute current block column of inv(A).
|
||||
*
|
||||
IF( J+JB.LE.N )
|
||||
$ CALL DGEMM( 'No transpose', 'No transpose', N, JB,
|
||||
$ N-J-JB+1, -ONE, A( 1, J+JB ), LDA,
|
||||
$ WORK( J+JB ), LDWORK, ONE, A( 1, J ), LDA )
|
||||
CALL DTRSM( 'Right', 'Lower', 'No transpose', 'Unit', N, JB,
|
||||
$ ONE, WORK( J ), LDWORK, A( 1, J ), LDA )
|
||||
50 CONTINUE
|
||||
END IF
|
||||
*
|
||||
* Apply column interchanges.
|
||||
*
|
||||
DO 60 J = N - 1, 1, -1
|
||||
JP = IPIV( J )
|
||||
IF( JP.NE.J )
|
||||
$ CALL DSWAP( N, A( 1, J ), 1, A( 1, JP ), 1 )
|
||||
60 CONTINUE
|
||||
*
|
||||
WORK( 1 ) = IWS
|
||||
RETURN
|
||||
*
|
||||
* End of DGETRI
|
||||
*
|
||||
END
|
||||
@@ -1,193 +0,0 @@
|
||||
SUBROUTINE SGETRI( N, A, LDA, IPIV, WORK, LWORK, INFO )
|
||||
*
|
||||
* -- LAPACK routine (version 3.0) --
|
||||
* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
|
||||
* Courant Institute, Argonne National Lab, and Rice University
|
||||
* June 30, 1999
|
||||
*
|
||||
* .. Scalar Arguments ..
|
||||
INTEGER INFO, LDA, LWORK, N
|
||||
* ..
|
||||
* .. Array Arguments ..
|
||||
INTEGER IPIV( * )
|
||||
REAL A( LDA, * ), WORK( * )
|
||||
* ..
|
||||
*
|
||||
* Purpose
|
||||
* =======
|
||||
*
|
||||
* SGETRI computes the inverse of a matrix using the LU factorization
|
||||
* computed by SGETRF.
|
||||
*
|
||||
* This method inverts U and then computes inv(A) by solving the system
|
||||
* inv(A)*L = inv(U) for inv(A).
|
||||
*
|
||||
* Arguments
|
||||
* =========
|
||||
*
|
||||
* N (input) INTEGER
|
||||
* The order of the matrix A. N >= 0.
|
||||
*
|
||||
* A (input/output) REAL array, dimension (LDA,N)
|
||||
* On entry, the factors L and U from the factorization
|
||||
* A = P*L*U as computed by SGETRF.
|
||||
* On exit, if INFO = 0, the inverse of the original matrix A.
|
||||
*
|
||||
* LDA (input) INTEGER
|
||||
* The leading dimension of the array A. LDA >= max(1,N).
|
||||
*
|
||||
* IPIV (input) INTEGER array, dimension (N)
|
||||
* The pivot indices from SGETRF; for 1<=i<=N, row i of the
|
||||
* matrix was interchanged with row IPIV(i).
|
||||
*
|
||||
* WORK (workspace/output) REAL array, dimension (LWORK)
|
||||
* On exit, if INFO=0, then WORK(1) returns the optimal LWORK.
|
||||
*
|
||||
* LWORK (input) INTEGER
|
||||
* The dimension of the array WORK. LWORK >= max(1,N).
|
||||
* For optimal performance LWORK >= N*NB, where NB is
|
||||
* the optimal blocksize returned by ILAENV.
|
||||
*
|
||||
* If LWORK = -1, then a workspace query is assumed; the routine
|
||||
* only calculates the optimal size of the WORK array, returns
|
||||
* this value as the first entry of the WORK array, and no error
|
||||
* message related to LWORK is issued by XERBLA.
|
||||
*
|
||||
* INFO (output) INTEGER
|
||||
* = 0: successful exit
|
||||
* < 0: if INFO = -i, the i-th argument had an illegal value
|
||||
* > 0: if INFO = i, U(i,i) is exactly zero; the matrix is
|
||||
* singular and its inverse could not be computed.
|
||||
*
|
||||
* =====================================================================
|
||||
*
|
||||
* .. Parameters ..
|
||||
REAL ZERO, ONE
|
||||
PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 )
|
||||
* ..
|
||||
* .. Local Scalars ..
|
||||
LOGICAL LQUERY
|
||||
INTEGER I, IWS, J, JB, JJ, JP, LDWORK, LWKOPT, NB,
|
||||
$ NBMIN, NN
|
||||
* ..
|
||||
* .. External Functions ..
|
||||
INTEGER ILAENV
|
||||
EXTERNAL ILAENV
|
||||
* ..
|
||||
* .. External Subroutines ..
|
||||
EXTERNAL SGEMM, SGEMV, SSWAP, STRSM, STRTRI, XERBLA
|
||||
* ..
|
||||
* .. Intrinsic Functions ..
|
||||
INTRINSIC MAX, MIN
|
||||
* ..
|
||||
* .. Executable Statements ..
|
||||
*
|
||||
* Test the input parameters.
|
||||
*
|
||||
INFO = 0
|
||||
NB = ILAENV( 1, 'SGETRI', ' ', N, -1, -1, -1 )
|
||||
LWKOPT = N*NB
|
||||
WORK( 1 ) = LWKOPT
|
||||
LQUERY = ( LWORK.EQ.-1 )
|
||||
IF( N.LT.0 ) THEN
|
||||
INFO = -1
|
||||
ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
|
||||
INFO = -3
|
||||
ELSE IF( LWORK.LT.MAX( 1, N ) .AND. .NOT.LQUERY ) THEN
|
||||
INFO = -6
|
||||
END IF
|
||||
IF( INFO.NE.0 ) THEN
|
||||
CALL XERBLA( 'SGETRI', -INFO )
|
||||
RETURN
|
||||
ELSE IF( LQUERY ) THEN
|
||||
RETURN
|
||||
END IF
|
||||
*
|
||||
* Quick return if possible
|
||||
*
|
||||
IF( N.EQ.0 )
|
||||
$ RETURN
|
||||
*
|
||||
* Form inv(U). If INFO > 0 from STRTRI, then U is singular,
|
||||
* and the inverse is not computed.
|
||||
*
|
||||
CALL STRTRI( 'Upper', 'Non-unit', N, A, LDA, INFO )
|
||||
IF( INFO.GT.0 )
|
||||
$ RETURN
|
||||
*
|
||||
NBMIN = 2
|
||||
LDWORK = N
|
||||
IF( NB.GT.1 .AND. NB.LT.N ) THEN
|
||||
IWS = MAX( LDWORK*NB, 1 )
|
||||
IF( LWORK.LT.IWS ) THEN
|
||||
NB = LWORK / LDWORK
|
||||
NBMIN = MAX( 2, ILAENV( 2, 'SGETRI', ' ', N, -1, -1, -1 ) )
|
||||
END IF
|
||||
ELSE
|
||||
IWS = N
|
||||
END IF
|
||||
*
|
||||
* Solve the equation inv(A)*L = inv(U) for inv(A).
|
||||
*
|
||||
IF( NB.LT.NBMIN .OR. NB.GE.N ) THEN
|
||||
*
|
||||
* Use unblocked code.
|
||||
*
|
||||
DO 20 J = N, 1, -1
|
||||
*
|
||||
* Copy current column of L to WORK and replace with zeros.
|
||||
*
|
||||
DO 10 I = J + 1, N
|
||||
WORK( I ) = A( I, J )
|
||||
A( I, J ) = ZERO
|
||||
10 CONTINUE
|
||||
*
|
||||
* Compute current column of inv(A).
|
||||
*
|
||||
IF( J.LT.N )
|
||||
$ CALL SGEMV( 'No transpose', N, N-J, -ONE, A( 1, J+1 ),
|
||||
$ LDA, WORK( J+1 ), 1, ONE, A( 1, J ), 1 )
|
||||
20 CONTINUE
|
||||
ELSE
|
||||
*
|
||||
* Use blocked code.
|
||||
*
|
||||
NN = ( ( N-1 ) / NB )*NB + 1
|
||||
DO 50 J = NN, 1, -NB
|
||||
JB = MIN( NB, N-J+1 )
|
||||
*
|
||||
* Copy current block column of L to WORK and replace with
|
||||
* zeros.
|
||||
*
|
||||
DO 40 JJ = J, J + JB - 1
|
||||
DO 30 I = JJ + 1, N
|
||||
WORK( I+( JJ-J )*LDWORK ) = A( I, JJ )
|
||||
A( I, JJ ) = ZERO
|
||||
30 CONTINUE
|
||||
40 CONTINUE
|
||||
*
|
||||
* Compute current block column of inv(A).
|
||||
*
|
||||
IF( J+JB.LE.N )
|
||||
$ CALL SGEMM( 'No transpose', 'No transpose', N, JB,
|
||||
$ N-J-JB+1, -ONE, A( 1, J+JB ), LDA,
|
||||
$ WORK( J+JB ), LDWORK, ONE, A( 1, J ), LDA )
|
||||
CALL STRSM( 'Right', 'Lower', 'No transpose', 'Unit', N, JB,
|
||||
$ ONE, WORK( J ), LDWORK, A( 1, J ), LDA )
|
||||
50 CONTINUE
|
||||
END IF
|
||||
*
|
||||
* Apply column interchanges.
|
||||
*
|
||||
DO 60 J = N - 1, 1, -1
|
||||
JP = IPIV( J )
|
||||
IF( JP.NE.J )
|
||||
$ CALL SSWAP( N, A( 1, J ), 1, A( 1, JP ), 1 )
|
||||
60 CONTINUE
|
||||
*
|
||||
WORK( 1 ) = IWS
|
||||
RETURN
|
||||
*
|
||||
* End of SGETRI
|
||||
*
|
||||
END
|
||||
@@ -1,194 +0,0 @@
|
||||
SUBROUTINE ZGETRI( N, A, LDA, IPIV, WORK, LWORK, INFO )
|
||||
*
|
||||
* -- LAPACK routine (version 3.0) --
|
||||
* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
|
||||
* Courant Institute, Argonne National Lab, and Rice University
|
||||
* June 30, 1999
|
||||
*
|
||||
* .. Scalar Arguments ..
|
||||
INTEGER INFO, LDA, LWORK, N
|
||||
* ..
|
||||
* .. Array Arguments ..
|
||||
INTEGER IPIV( * )
|
||||
COMPLEX*16 A( LDA, * ), WORK( * )
|
||||
* ..
|
||||
*
|
||||
* Purpose
|
||||
* =======
|
||||
*
|
||||
* ZGETRI computes the inverse of a matrix using the LU factorization
|
||||
* computed by ZGETRF.
|
||||
*
|
||||
* This method inverts U and then computes inv(A) by solving the system
|
||||
* inv(A)*L = inv(U) for inv(A).
|
||||
*
|
||||
* Arguments
|
||||
* =========
|
||||
*
|
||||
* N (input) INTEGER
|
||||
* The order of the matrix A. N >= 0.
|
||||
*
|
||||
* A (input/output) COMPLEX*16 array, dimension (LDA,N)
|
||||
* On entry, the factors L and U from the factorization
|
||||
* A = P*L*U as computed by ZGETRF.
|
||||
* On exit, if INFO = 0, the inverse of the original matrix A.
|
||||
*
|
||||
* LDA (input) INTEGER
|
||||
* The leading dimension of the array A. LDA >= max(1,N).
|
||||
*
|
||||
* IPIV (input) INTEGER array, dimension (N)
|
||||
* The pivot indices from ZGETRF; for 1<=i<=N, row i of the
|
||||
* matrix was interchanged with row IPIV(i).
|
||||
*
|
||||
* WORK (workspace/output) COMPLEX*16 array, dimension (LWORK)
|
||||
* On exit, if INFO=0, then WORK(1) returns the optimal LWORK.
|
||||
*
|
||||
* LWORK (input) INTEGER
|
||||
* The dimension of the array WORK. LWORK >= max(1,N).
|
||||
* For optimal performance LWORK >= N*NB, where NB is
|
||||
* the optimal blocksize returned by ILAENV.
|
||||
*
|
||||
* If LWORK = -1, then a workspace query is assumed; the routine
|
||||
* only calculates the optimal size of the WORK array, returns
|
||||
* this value as the first entry of the WORK array, and no error
|
||||
* message related to LWORK is issued by XERBLA.
|
||||
*
|
||||
* INFO (output) INTEGER
|
||||
* = 0: successful exit
|
||||
* < 0: if INFO = -i, the i-th argument had an illegal value
|
||||
* > 0: if INFO = i, U(i,i) is exactly zero; the matrix is
|
||||
* singular and its inverse could not be computed.
|
||||
*
|
||||
* =====================================================================
|
||||
*
|
||||
* .. Parameters ..
|
||||
COMPLEX*16 ZERO, ONE
|
||||
PARAMETER ( ZERO = ( 0.0D+0, 0.0D+0 ),
|
||||
$ ONE = ( 1.0D+0, 0.0D+0 ) )
|
||||
* ..
|
||||
* .. Local Scalars ..
|
||||
LOGICAL LQUERY
|
||||
INTEGER I, IWS, J, JB, JJ, JP, LDWORK, LWKOPT, NB,
|
||||
$ NBMIN, NN
|
||||
* ..
|
||||
* .. External Functions ..
|
||||
INTEGER ILAENV
|
||||
EXTERNAL ILAENV
|
||||
* ..
|
||||
* .. External Subroutines ..
|
||||
EXTERNAL XERBLA, ZGEMM, ZGEMV, ZSWAP, ZTRSM, ZTRTRI
|
||||
* ..
|
||||
* .. Intrinsic Functions ..
|
||||
INTRINSIC MAX, MIN
|
||||
* ..
|
||||
* .. Executable Statements ..
|
||||
*
|
||||
* Test the input parameters.
|
||||
*
|
||||
INFO = 0
|
||||
NB = ILAENV( 1, 'ZGETRI', ' ', N, -1, -1, -1 )
|
||||
LWKOPT = N*NB
|
||||
WORK( 1 ) = LWKOPT
|
||||
LQUERY = ( LWORK.EQ.-1 )
|
||||
IF( N.LT.0 ) THEN
|
||||
INFO = -1
|
||||
ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
|
||||
INFO = -3
|
||||
ELSE IF( LWORK.LT.MAX( 1, N ) .AND. .NOT.LQUERY ) THEN
|
||||
INFO = -6
|
||||
END IF
|
||||
IF( INFO.NE.0 ) THEN
|
||||
CALL XERBLA( 'ZGETRI', -INFO )
|
||||
RETURN
|
||||
ELSE IF( LQUERY ) THEN
|
||||
RETURN
|
||||
END IF
|
||||
*
|
||||
* Quick return if possible
|
||||
*
|
||||
IF( N.EQ.0 )
|
||||
$ RETURN
|
||||
*
|
||||
* Form inv(U). If INFO > 0 from ZTRTRI, then U is singular,
|
||||
* and the inverse is not computed.
|
||||
*
|
||||
CALL ZTRTRI( 'Upper', 'Non-unit', N, A, LDA, INFO )
|
||||
IF( INFO.GT.0 )
|
||||
$ RETURN
|
||||
*
|
||||
NBMIN = 2
|
||||
LDWORK = N
|
||||
IF( NB.GT.1 .AND. NB.LT.N ) THEN
|
||||
IWS = MAX( LDWORK*NB, 1 )
|
||||
IF( LWORK.LT.IWS ) THEN
|
||||
NB = LWORK / LDWORK
|
||||
NBMIN = MAX( 2, ILAENV( 2, 'ZGETRI', ' ', N, -1, -1, -1 ) )
|
||||
END IF
|
||||
ELSE
|
||||
IWS = N
|
||||
END IF
|
||||
*
|
||||
* Solve the equation inv(A)*L = inv(U) for inv(A).
|
||||
*
|
||||
IF( NB.LT.NBMIN .OR. NB.GE.N ) THEN
|
||||
*
|
||||
* Use unblocked code.
|
||||
*
|
||||
DO 20 J = N, 1, -1
|
||||
*
|
||||
* Copy current column of L to WORK and replace with zeros.
|
||||
*
|
||||
DO 10 I = J + 1, N
|
||||
WORK( I ) = A( I, J )
|
||||
A( I, J ) = ZERO
|
||||
10 CONTINUE
|
||||
*
|
||||
* Compute current column of inv(A).
|
||||
*
|
||||
IF( J.LT.N )
|
||||
$ CALL ZGEMV( 'No transpose', N, N-J, -ONE, A( 1, J+1 ),
|
||||
$ LDA, WORK( J+1 ), 1, ONE, A( 1, J ), 1 )
|
||||
20 CONTINUE
|
||||
ELSE
|
||||
*
|
||||
* Use blocked code.
|
||||
*
|
||||
NN = ( ( N-1 ) / NB )*NB + 1
|
||||
DO 50 J = NN, 1, -NB
|
||||
JB = MIN( NB, N-J+1 )
|
||||
*
|
||||
* Copy current block column of L to WORK and replace with
|
||||
* zeros.
|
||||
*
|
||||
DO 40 JJ = J, J + JB - 1
|
||||
DO 30 I = JJ + 1, N
|
||||
WORK( I+( JJ-J )*LDWORK ) = A( I, JJ )
|
||||
A( I, JJ ) = ZERO
|
||||
30 CONTINUE
|
||||
40 CONTINUE
|
||||
*
|
||||
* Compute current block column of inv(A).
|
||||
*
|
||||
IF( J+JB.LE.N )
|
||||
$ CALL ZGEMM( 'No transpose', 'No transpose', N, JB,
|
||||
$ N-J-JB+1, -ONE, A( 1, J+JB ), LDA,
|
||||
$ WORK( J+JB ), LDWORK, ONE, A( 1, J ), LDA )
|
||||
CALL ZTRSM( 'Right', 'Lower', 'No transpose', 'Unit', N, JB,
|
||||
$ ONE, WORK( J ), LDWORK, A( 1, J ), LDA )
|
||||
50 CONTINUE
|
||||
END IF
|
||||
*
|
||||
* Apply column interchanges.
|
||||
*
|
||||
DO 60 J = N - 1, 1, -1
|
||||
JP = IPIV( J )
|
||||
IF( JP.NE.J )
|
||||
$ CALL ZSWAP( N, A( 1, J ), 1, A( 1, JP ), 1 )
|
||||
60 CONTINUE
|
||||
*
|
||||
WORK( 1 ) = IWS
|
||||
RETURN
|
||||
*
|
||||
* End of ZGETRI
|
||||
*
|
||||
END
|
||||
@@ -1,190 +1,113 @@
|
||||
/*********************************************************************/
|
||||
/* Copyright 2009, 2010 The University of Texas at Austin. */
|
||||
/* All rights reserved. */
|
||||
/* */
|
||||
/* Redistribution and use in source and binary forms, with or */
|
||||
/* without modification, are permitted provided that the following */
|
||||
/* conditions are met: */
|
||||
/* */
|
||||
/* 1. Redistributions of source code must retain the above */
|
||||
/* copyright notice, this list of conditions and the following */
|
||||
/* disclaimer. */
|
||||
/* */
|
||||
/* 2. Redistributions in binary form must reproduce the above */
|
||||
/* copyright notice, this list of conditions and the following */
|
||||
/* disclaimer in the documentation and/or other materials */
|
||||
/* provided with the distribution. */
|
||||
/* */
|
||||
/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
|
||||
/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
|
||||
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
|
||||
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
|
||||
/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
|
||||
/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
|
||||
/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
|
||||
/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
|
||||
/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
|
||||
/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
|
||||
/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
|
||||
/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
|
||||
/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
|
||||
/* POSSIBILITY OF SUCH DAMAGE. */
|
||||
/* */
|
||||
/* The views and conclusions contained in the software and */
|
||||
/* documentation are those of the authors and should not be */
|
||||
/* interpreted as representing official policies, either expressed */
|
||||
/* or implied, of The University of Texas at Austin. */
|
||||
/*********************************************************************/
|
||||
/***************************************************************************
|
||||
* Copyright (c) 2013, The OpenBLAS Project
|
||||
* All rights reserved.
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are
|
||||
* met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
* 3. Neither the name of the OpenBLAS project nor the names of
|
||||
* its contributors may be used to endorse or promote products
|
||||
* derived from this software without specific prior written permission.
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
* USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
* *****************************************************************************/
|
||||
|
||||
/**************************************************************************************
|
||||
* 2014/05/22 Saar
|
||||
* TEST double precision unblocked : OK
|
||||
* 2014/05/23 Saar
|
||||
* TEST double precision blocked: OK
|
||||
* TEST single precision blocked: OK
|
||||
**************************************************************************************/
|
||||
|
||||
#include <stdio.h>
|
||||
#include "common.h"
|
||||
|
||||
static FLOAT dp1 = 1.;
|
||||
static FLOAT dm1 = -1.;
|
||||
// static FLOAT dp1 = 1.;
|
||||
// static FLOAT dm1 = -1.;
|
||||
|
||||
|
||||
#ifdef UNIT
|
||||
#define TRTI2 TRTI2_LU
|
||||
#define TRTI2 TRTI2_LU
|
||||
#define TRMM TRMM_LNLU
|
||||
#define TRSM TRSM_RNLU
|
||||
#else
|
||||
#define TRTI2 TRTI2_LN
|
||||
#define TRTI2 TRTI2_LN
|
||||
#define TRMM TRMM_LNLN
|
||||
#define TRSM TRSM_RNLN
|
||||
#endif
|
||||
|
||||
#if 0
|
||||
#undef GEMM_P
|
||||
#undef GEMM_Q
|
||||
#undef GEMM_R
|
||||
|
||||
#define GEMM_P 8
|
||||
#define GEMM_Q 20
|
||||
#define GEMM_R 64
|
||||
#endif
|
||||
|
||||
#define GEMM_PQ MAX(GEMM_P, GEMM_Q)
|
||||
#define REAL_GEMM_R (GEMM_R - 2 * GEMM_PQ)
|
||||
|
||||
blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLOAT *sb, BLASLONG myid) {
|
||||
|
||||
BLASLONG n, lda;
|
||||
BLASLONG j, n, lda;
|
||||
FLOAT *a;
|
||||
|
||||
BLASLONG i, is, min_i, start_i;
|
||||
BLASLONG ls, min_l;
|
||||
BLASLONG bk;
|
||||
BLASLONG blocking;
|
||||
BLASLONG range_N[2];
|
||||
// BLASLONG info=0;
|
||||
BLASLONG jb;
|
||||
BLASLONG NB;
|
||||
BLASLONG start_j;
|
||||
|
||||
FLOAT *sa_trsm = (FLOAT *)((BLASLONG)sb);
|
||||
FLOAT *sa_trmm = (FLOAT *)((((BLASLONG)sb
|
||||
+ GEMM_PQ * GEMM_Q * COMPSIZE * SIZE + GEMM_ALIGN) & ~GEMM_ALIGN)
|
||||
+ GEMM_OFFSET_A);
|
||||
FLOAT *sb_gemm = (FLOAT *)((((BLASLONG)sa_trmm
|
||||
+ GEMM_PQ * GEMM_Q * COMPSIZE * SIZE + GEMM_ALIGN) & ~GEMM_ALIGN)
|
||||
+ GEMM_OFFSET_B);
|
||||
FLOAT beta_plus[2] = { ONE, ZERO};
|
||||
FLOAT beta_minus[2] = {-ONE, ZERO};
|
||||
|
||||
n = args -> n;
|
||||
a = (FLOAT *)args -> a;
|
||||
lda = args -> lda;
|
||||
|
||||
if (range_n) {
|
||||
n = range_n[1] - range_n[0];
|
||||
a += range_n[0] * (lda + 1) * COMPSIZE;
|
||||
}
|
||||
NB = GEMM_Q;
|
||||
|
||||
if (n <= DTB_ENTRIES) {
|
||||
if (n < NB) {
|
||||
TRTI2(args, NULL, range_n, sa, sb, 0);
|
||||
return 0;
|
||||
}
|
||||
|
||||
blocking = GEMM_Q;
|
||||
if (n <= 4 * GEMM_Q) blocking = (n + 3) / 4;
|
||||
|
||||
start_i = 0;
|
||||
while (start_i < n) start_i += blocking;
|
||||
start_i -= blocking;
|
||||
lda = args -> lda;
|
||||
a = (FLOAT *) args -> a;
|
||||
args -> ldb = lda;
|
||||
args -> ldc = lda;
|
||||
args -> alpha = NULL;
|
||||
|
||||
for (i = start_i; i >= 0; i -= blocking) {
|
||||
bk = MIN(blocking, n - i);
|
||||
|
||||
if (n - bk - i > 0) TRSM_OLNCOPY(bk, bk, a + (i + i * lda) * COMPSIZE, lda, 0, sa_trsm);
|
||||
|
||||
if (!range_n) {
|
||||
range_N[0] = i;
|
||||
range_N[1] = i + bk;
|
||||
} else {
|
||||
range_N[0] = range_n[0] + i;
|
||||
range_N[1] = range_n[0] + i + bk;
|
||||
}
|
||||
start_j = 0;
|
||||
while (start_j < n) start_j += NB;
|
||||
start_j -= NB;
|
||||
|
||||
CNAME(args, NULL, range_N, sa, sa_trmm, 0);
|
||||
|
||||
if (i > 0) {
|
||||
TRMM_ILTCOPY(bk, bk, a + (i + i * lda) * COMPSIZE, lda, 0, 0, sa_trmm);
|
||||
for (j = start_j ; j >=0 ; j-= NB)
|
||||
{
|
||||
jb = n - j;
|
||||
if ( jb > NB ) jb = NB;
|
||||
|
||||
for (ls = 0; ls < i; ls += REAL_GEMM_R) {
|
||||
min_l = i - ls;
|
||||
if (min_l > REAL_GEMM_R) min_l = REAL_GEMM_R;
|
||||
|
||||
GEMM_ONCOPY (bk, min_l, a + (i + ls * lda) * COMPSIZE, lda, sb_gemm);
|
||||
|
||||
if (n - bk - i > 0) {
|
||||
for (is = i + bk; is < n; is += GEMM_P) {
|
||||
min_i = n - is;
|
||||
if (min_i > GEMM_P) min_i = GEMM_P;
|
||||
|
||||
if (ls == 0) {
|
||||
NEG_TCOPY (bk, min_i, a + (is + i * lda) * COMPSIZE, lda, sa);
|
||||
args -> n = jb;
|
||||
args -> m = n-j-jb;
|
||||
|
||||
TRSM_KERNEL_RT(min_i, bk, bk, dm1,
|
||||
#ifdef COMPLEX
|
||||
ZERO,
|
||||
#endif
|
||||
sa, sa_trsm,
|
||||
a + (is + i * lda) * COMPSIZE, lda, 0);
|
||||
} else {
|
||||
GEMM_ITCOPY (bk, min_i, a + (is + i * lda) * COMPSIZE, lda, sa);
|
||||
}
|
||||
args -> a = &a[(j+jb+(j+jb)*lda) * COMPSIZE];
|
||||
args -> b = &a[(j+jb+j*lda) * COMPSIZE];
|
||||
args -> beta = beta_plus;
|
||||
|
||||
GEMM_KERNEL_N(min_i, min_l, bk, dp1,
|
||||
#ifdef COMPLEX
|
||||
ZERO,
|
||||
#endif
|
||||
sa, sb_gemm,
|
||||
a + (is + ls * lda) * COMPSIZE, lda);
|
||||
}
|
||||
}
|
||||
|
||||
for (is = 0; is < bk; is += GEMM_P) {
|
||||
min_i = bk - is;
|
||||
if (min_i > GEMM_P) min_i = GEMM_P;
|
||||
|
||||
TRMM_KERNEL_LT(min_i, min_l, bk, dp1,
|
||||
#ifdef COMPLEX
|
||||
ZERO,
|
||||
#endif
|
||||
sa_trmm + is * bk * COMPSIZE, sb_gemm,
|
||||
a + (i + is + ls * lda) * COMPSIZE, lda, is);
|
||||
}
|
||||
}
|
||||
TRMM(args, NULL, NULL, sa, sb, 0);
|
||||
|
||||
} else {
|
||||
args -> a = &a[(j+j*lda) * COMPSIZE];
|
||||
args -> beta = beta_minus;
|
||||
|
||||
TRSM(args, NULL, NULL, sa, sb, 0);
|
||||
|
||||
args -> a = &a[(j+j*lda) * COMPSIZE];
|
||||
|
||||
TRTI2(args, NULL, range_n, sa, sb, 0);
|
||||
|
||||
if (n - bk - i > 0) {
|
||||
for (is = 0; is < n - bk - i; is += GEMM_P) {
|
||||
min_i = n - bk - i - is;
|
||||
if (min_i > GEMM_P) min_i = GEMM_P;
|
||||
|
||||
NEG_TCOPY (bk, min_i, a + (i + bk + is + i * lda) * COMPSIZE, lda, sa);
|
||||
|
||||
TRSM_KERNEL_RT(min_i, bk, bk, dm1,
|
||||
#ifdef COMPLEX
|
||||
ZERO,
|
||||
#endif
|
||||
sa, sa_trsm,
|
||||
a + (i + bk + is + i * lda) * COMPSIZE, lda, 0);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -1,46 +1,44 @@
|
||||
/*********************************************************************/
|
||||
/* Copyright 2009, 2010 The University of Texas at Austin. */
|
||||
/* All rights reserved. */
|
||||
/* */
|
||||
/* Redistribution and use in source and binary forms, with or */
|
||||
/* without modification, are permitted provided that the following */
|
||||
/* conditions are met: */
|
||||
/* */
|
||||
/* 1. Redistributions of source code must retain the above */
|
||||
/* copyright notice, this list of conditions and the following */
|
||||
/* disclaimer. */
|
||||
/* */
|
||||
/* 2. Redistributions in binary form must reproduce the above */
|
||||
/* copyright notice, this list of conditions and the following */
|
||||
/* disclaimer in the documentation and/or other materials */
|
||||
/* provided with the distribution. */
|
||||
/* */
|
||||
/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
|
||||
/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
|
||||
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
|
||||
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
|
||||
/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
|
||||
/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
|
||||
/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
|
||||
/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
|
||||
/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
|
||||
/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
|
||||
/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
|
||||
/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
|
||||
/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
|
||||
/* POSSIBILITY OF SUCH DAMAGE. */
|
||||
/* */
|
||||
/* The views and conclusions contained in the software and */
|
||||
/* documentation are those of the authors and should not be */
|
||||
/* interpreted as representing official policies, either expressed */
|
||||
/* or implied, of The University of Texas at Austin. */
|
||||
/*********************************************************************/
|
||||
/***************************************************************************
|
||||
* Copyright (c) 2013, The OpenBLAS Project
|
||||
* All rights reserved.
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are
|
||||
* met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
* 3. Neither the name of the OpenBLAS project nor the names of
|
||||
* its contributors may be used to endorse or promote products
|
||||
* derived from this software without specific prior written permission.
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
* USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
* *****************************************************************************/
|
||||
|
||||
/**************************************************************************************
|
||||
* 2014/05/22 Saar
|
||||
* TEST double precision unblocked : OK
|
||||
* TEST double precision blocked : OK
|
||||
* 2014/05/23
|
||||
* TEST single precision blocked : OK
|
||||
*
|
||||
**************************************************************************************/
|
||||
|
||||
#include <stdio.h>
|
||||
#include "common.h"
|
||||
|
||||
static FLOAT dp1 = 1.;
|
||||
static FLOAT dm1 = -1.;
|
||||
// static FLOAT dp1 = 1.;
|
||||
// static FLOAT dm1 = -1.;
|
||||
|
||||
#ifdef UNIT
|
||||
#define TRTI2 TRTI2_UU
|
||||
@@ -48,152 +46,66 @@ static FLOAT dm1 = -1.;
|
||||
#define TRTI2 TRTI2_UN
|
||||
#endif
|
||||
|
||||
#if 0
|
||||
#undef GEMM_P
|
||||
#undef GEMM_Q
|
||||
#undef GEMM_R
|
||||
|
||||
#define GEMM_P 8
|
||||
#define GEMM_Q 20
|
||||
#define GEMM_R 64
|
||||
#ifdef UNIT
|
||||
#define TRMM TRMM_LNUU
|
||||
#define TRSM TRSM_RNUU
|
||||
#else
|
||||
#define TRMM TRMM_LNUN
|
||||
#define TRSM TRSM_RNUN
|
||||
#endif
|
||||
|
||||
#define GEMM_PQ MAX(GEMM_P, GEMM_Q)
|
||||
#define REAL_GEMM_R (GEMM_R - 2 * GEMM_PQ)
|
||||
|
||||
blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLOAT *sb, BLASLONG myid) {
|
||||
|
||||
BLASLONG n, lda;
|
||||
BLASLONG j, n, lda;
|
||||
FLOAT *a;
|
||||
|
||||
BLASLONG i, is, min_i, start_is;
|
||||
BLASLONG ls, min_l;
|
||||
BLASLONG bk;
|
||||
BLASLONG blocking;
|
||||
BLASLONG range_N[2];
|
||||
// BLASLONG info=0;
|
||||
BLASLONG jb;
|
||||
BLASLONG NB;
|
||||
|
||||
FLOAT *sa_trsm = (FLOAT *)((BLASLONG)sb);
|
||||
FLOAT *sa_trmm = (FLOAT *)((((BLASLONG)sb
|
||||
+ GEMM_PQ * GEMM_Q * COMPSIZE * SIZE + GEMM_ALIGN) & ~GEMM_ALIGN)
|
||||
+ GEMM_OFFSET_A);
|
||||
FLOAT *sb_gemm = (FLOAT *)((((BLASLONG)sa_trmm
|
||||
+ GEMM_PQ * GEMM_Q * COMPSIZE * SIZE + GEMM_ALIGN) & ~GEMM_ALIGN)
|
||||
+ GEMM_OFFSET_B);
|
||||
FLOAT beta_plus[2] = { ONE, ZERO};
|
||||
FLOAT beta_minus[2] = {-ONE, ZERO};
|
||||
|
||||
n = args -> n;
|
||||
a = (FLOAT *)args -> a;
|
||||
lda = args -> lda;
|
||||
|
||||
if (range_n) {
|
||||
n = range_n[1] - range_n[0];
|
||||
a += range_n[0] * (lda + 1) * COMPSIZE;
|
||||
}
|
||||
NB = GEMM_Q;
|
||||
|
||||
if (n <= DTB_ENTRIES) {
|
||||
if (n <= NB) {
|
||||
TRTI2(args, NULL, range_n, sa, sb, 0);
|
||||
return 0;
|
||||
}
|
||||
|
||||
blocking = GEMM_Q;
|
||||
if (n <= 4 * GEMM_Q) blocking = (n + 3) / 4;
|
||||
|
||||
for (i = 0; i < n; i += blocking) {
|
||||
bk = MIN(blocking, n - i);
|
||||
|
||||
if (i > 0) TRSM_OUNCOPY(bk, bk, a + (i + i * lda) * COMPSIZE, lda, 0, sa_trsm);
|
||||
lda = args -> lda;
|
||||
a = (FLOAT *) args -> a;
|
||||
args -> ldb = lda;
|
||||
args -> ldc = lda;
|
||||
args -> alpha = NULL;
|
||||
|
||||
if (!range_n) {
|
||||
range_N[0] = i;
|
||||
range_N[1] = i + bk;
|
||||
} else {
|
||||
range_N[0] = range_n[0] + i;
|
||||
range_N[1] = range_n[0] + i + bk;
|
||||
}
|
||||
for (j = 0; j < n; j += NB)
|
||||
{
|
||||
jb = n - j;
|
||||
if ( jb > NB ) jb = NB;
|
||||
|
||||
CNAME(args, NULL, range_N, sa, sa_trmm, 0);
|
||||
args -> n = jb;
|
||||
args -> m = j;
|
||||
|
||||
if (n -bk - i > 0) {
|
||||
TRMM_IUTCOPY(bk, bk, a + (i + i * lda) * COMPSIZE, lda, 0, 0, sa_trmm);
|
||||
args -> a = &a[0];
|
||||
args -> b = &a[(j*lda) * COMPSIZE];
|
||||
args -> beta = beta_plus;
|
||||
|
||||
for (ls = i + bk; ls < n; ls += REAL_GEMM_R) {
|
||||
min_l = n - ls;
|
||||
if (min_l > REAL_GEMM_R) min_l = REAL_GEMM_R;
|
||||
|
||||
GEMM_ONCOPY (bk, min_l, a + (i + ls * lda) * COMPSIZE, lda, sb_gemm);
|
||||
|
||||
if (i > 0) {
|
||||
for (is = 0; is < i; is += GEMM_P) {
|
||||
min_i = i - is;
|
||||
if (min_i > GEMM_P) min_i = GEMM_P;
|
||||
|
||||
if (ls == i + bk) {
|
||||
//NEG_TCOPY (bk, min_i, a + (is + i * lda) * COMPSIZE, lda, sa);
|
||||
TRMM(args, NULL, NULL, sa, sb, 0);
|
||||
|
||||
GEMM_BETA(min_i, bk, 0, dm1,
|
||||
#ifdef COMPLEX
|
||||
ZERO,
|
||||
#endif
|
||||
NULL, 0, NULL, 0, a + (is + i * lda) * COMPSIZE, lda);
|
||||
args -> a = &a[(j+j*lda) * COMPSIZE];
|
||||
args -> beta = beta_minus;
|
||||
|
||||
TRSM_KERNEL_RN(min_i, bk, bk, dm1,
|
||||
#ifdef COMPLEX
|
||||
ZERO,
|
||||
#endif
|
||||
sa, sa_trsm,
|
||||
a + (is + i * lda) * COMPSIZE, lda, 0);
|
||||
} else {
|
||||
GEMM_ITCOPY (bk, min_i, a + (is + i * lda) * COMPSIZE, lda, sa);
|
||||
}
|
||||
|
||||
GEMM_KERNEL_N(min_i, min_l, bk, dp1,
|
||||
#ifdef COMPLEX
|
||||
ZERO,
|
||||
#endif
|
||||
sa, sb_gemm,
|
||||
a + (is + ls * lda) * COMPSIZE, lda);
|
||||
}
|
||||
}
|
||||
|
||||
start_is = 0;
|
||||
while (start_is < bk) start_is += GEMM_P;
|
||||
start_is -= GEMM_P;
|
||||
TRSM(args, NULL, NULL, sa, sb, 0);
|
||||
|
||||
for (is = 0; is < bk; is += GEMM_P) {
|
||||
min_i = bk - is;
|
||||
if (min_i > GEMM_P) min_i = GEMM_P;
|
||||
|
||||
TRMM_KERNEL_LN(min_i, min_l, bk, dp1,
|
||||
#ifdef COMPLEX
|
||||
ZERO,
|
||||
#endif
|
||||
sa_trmm + is * bk * COMPSIZE, sb_gemm,
|
||||
a + (i + is + ls * lda) * COMPSIZE, lda, is);
|
||||
}
|
||||
}
|
||||
args -> a = &a[(j+j*lda) * COMPSIZE];
|
||||
|
||||
} else {
|
||||
if (i > 0) {
|
||||
for (is = 0; is < i; is += GEMM_P) {
|
||||
min_i = i - is;
|
||||
if (min_i > GEMM_P) min_i = GEMM_P;
|
||||
|
||||
//NEG_TCOPY (bk, min_i, a + (is + i * lda) * COMPSIZE, lda, sa);
|
||||
GEMM_BETA(min_i, bk, 0, dm1,
|
||||
#ifdef COMPLEX
|
||||
ZERO,
|
||||
#endif
|
||||
NULL, 0, NULL, 0, a + (is + i * lda) * COMPSIZE, lda);
|
||||
TRTI2(args, NULL, range_n, sa, sb, 0);
|
||||
|
||||
TRSM_KERNEL_RN(min_i, bk, bk, dm1,
|
||||
#ifdef COMPLEX
|
||||
ZERO,
|
||||
#endif
|
||||
sa, sa_trsm,
|
||||
a + (is + i * lda) * COMPSIZE, lda, 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
8
make.inc
8
make.inc
@@ -1,11 +1,7 @@
|
||||
SHELL = /bin/sh
|
||||
PLAT = _LINUX
|
||||
DRVOPTS = $(OPTS)
|
||||
LOADER = $(FORTRAN)
|
||||
TIMER = NONE
|
||||
LOADER = $(FORTRAN) -pthread
|
||||
ARCHFLAGS= -ru
|
||||
#RANLIB = ranlib
|
||||
BLASLIB =
|
||||
TMGLIB = tmglib.a
|
||||
EIGSRCLIB = eigsrc.a
|
||||
LINSRCLIB = linsrc.a
|
||||
|
||||
|
||||
52
param.h
52
param.h
@@ -1032,14 +1032,14 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#define XGEMM_DEFAULT_UNROLL_N 1
|
||||
#else
|
||||
#define SGEMM_DEFAULT_UNROLL_M 4
|
||||
#define DGEMM_DEFAULT_UNROLL_M 2
|
||||
#define DGEMM_DEFAULT_UNROLL_M 4
|
||||
#define QGEMM_DEFAULT_UNROLL_M 2
|
||||
#define CGEMM_DEFAULT_UNROLL_M 2
|
||||
#define ZGEMM_DEFAULT_UNROLL_M 1
|
||||
#define XGEMM_DEFAULT_UNROLL_M 1
|
||||
|
||||
#define SGEMM_DEFAULT_UNROLL_N 8
|
||||
#define DGEMM_DEFAULT_UNROLL_N 8
|
||||
#define DGEMM_DEFAULT_UNROLL_N 4
|
||||
#define QGEMM_DEFAULT_UNROLL_N 2
|
||||
#define CGEMM_DEFAULT_UNROLL_N 4
|
||||
#define ZGEMM_DEFAULT_UNROLL_N 4
|
||||
@@ -1104,10 +1104,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#define ZGEMM_DEFAULT_UNROLL_N 2
|
||||
#define XGEMM_DEFAULT_UNROLL_N 1
|
||||
#else
|
||||
#define SGEMM_DEFAULT_UNROLL_M 8
|
||||
#define SGEMM_DEFAULT_UNROLL_M 4
|
||||
#define DGEMM_DEFAULT_UNROLL_M 8
|
||||
#define QGEMM_DEFAULT_UNROLL_M 2
|
||||
#define CGEMM_DEFAULT_UNROLL_M 8
|
||||
#define CGEMM_DEFAULT_UNROLL_M 2
|
||||
#define ZGEMM_DEFAULT_UNROLL_M 4
|
||||
#define XGEMM_DEFAULT_UNROLL_M 1
|
||||
|
||||
@@ -1228,7 +1228,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#define ZGEMM_DEFAULT_P 256
|
||||
|
||||
#define SGEMM_DEFAULT_Q 384
|
||||
#ifdef WINDOWS_ABI
|
||||
#define DGEMM_DEFAULT_Q 128
|
||||
#else
|
||||
#define DGEMM_DEFAULT_Q 256
|
||||
#endif
|
||||
#define CGEMM_DEFAULT_Q 192
|
||||
#define ZGEMM_DEFAULT_Q 128
|
||||
|
||||
@@ -2017,6 +2021,46 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#endif
|
||||
|
||||
|
||||
#if defined(ARMV5)
|
||||
#define SNUMOPT 2
|
||||
#define DNUMOPT 2
|
||||
|
||||
#define GEMM_DEFAULT_OFFSET_A 0
|
||||
#define GEMM_DEFAULT_OFFSET_B 0
|
||||
#define GEMM_DEFAULT_ALIGN 0x03fffUL
|
||||
|
||||
#define SGEMM_DEFAULT_UNROLL_M 2
|
||||
#define SGEMM_DEFAULT_UNROLL_N 2
|
||||
|
||||
#define DGEMM_DEFAULT_UNROLL_M 2
|
||||
#define DGEMM_DEFAULT_UNROLL_N 2
|
||||
|
||||
#define CGEMM_DEFAULT_UNROLL_M 2
|
||||
#define CGEMM_DEFAULT_UNROLL_N 2
|
||||
|
||||
#define ZGEMM_DEFAULT_UNROLL_M 2
|
||||
#define ZGEMM_DEFAULT_UNROLL_N 2
|
||||
|
||||
#define SGEMM_DEFAULT_P 128
|
||||
#define DGEMM_DEFAULT_P 128
|
||||
#define CGEMM_DEFAULT_P 96
|
||||
#define ZGEMM_DEFAULT_P 64
|
||||
|
||||
#define SGEMM_DEFAULT_Q 240
|
||||
#define DGEMM_DEFAULT_Q 120
|
||||
#define CGEMM_DEFAULT_Q 120
|
||||
#define ZGEMM_DEFAULT_Q 120
|
||||
|
||||
#define SGEMM_DEFAULT_R 12288
|
||||
#define DGEMM_DEFAULT_R 8192
|
||||
#define CGEMM_DEFAULT_R 4096
|
||||
#define ZGEMM_DEFAULT_R 4096
|
||||
|
||||
|
||||
#define SYMV_P 16
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
|
||||
#ifdef GENERIC
|
||||
|
||||
@@ -1,684 +0,0 @@
|
||||
diff -ruN lapack-3.1.1.old/INSTALL/Makefile lapack-3.1.1/INSTALL/Makefile
|
||||
--- lapack-3.1.1.old/INSTALL/Makefile 2007-02-23 14:07:35.000000000 -0600
|
||||
+++ lapack-3.1.1/INSTALL/Makefile 2009-12-16 14:40:35.000000000 -0600
|
||||
@@ -27,7 +27,7 @@
|
||||
$(LOADER) $(LOADOPTS) -o testversion ilaver.o LAPACK_version.o
|
||||
|
||||
clean:
|
||||
- rm -f *.o
|
||||
+ rm -f *.o test*
|
||||
|
||||
slamch.o: slamch.f ; $(FORTRAN) $(NOOPT) -c $< -o $@
|
||||
dlamch.o: dlamch.f ; $(FORTRAN) $(NOOPT) -c $< -o $@
|
||||
diff -ruN lapack-3.1.1.old/Makefile lapack-3.1.1/Makefile
|
||||
--- lapack-3.1.1.old/Makefile 2007-02-22 15:55:00.000000000 -0600
|
||||
+++ lapack-3.1.1/Makefile 2009-12-16 14:40:35.000000000 -0600
|
||||
@@ -20,9 +20,12 @@
|
||||
blaslib:
|
||||
( cd BLAS/SRC; $(MAKE) )
|
||||
|
||||
-lapacklib: lapack_install
|
||||
+lapacklib:
|
||||
( cd SRC; $(MAKE) )
|
||||
|
||||
+lapack_prof:
|
||||
+ ( cd SRC; $(MAKE) lapack_prof)
|
||||
+
|
||||
tmglib:
|
||||
( cd TESTING/MATGEN; $(MAKE) )
|
||||
|
||||
diff -ruN lapack-3.1.1.old/SRC/Makefile lapack-3.1.1/SRC/Makefile
|
||||
--- lapack-3.1.1.old/SRC/Makefile 2007-02-23 15:33:05.000000000 -0600
|
||||
+++ lapack-3.1.1/SRC/Makefile 2009-12-16 14:41:09.000000000 -0600
|
||||
@@ -38,265 +38,273 @@
|
||||
#
|
||||
#######################################################################
|
||||
|
||||
-ALLAUX = ilaenv.o ieeeck.o lsamen.o xerbla.o iparmq.o \
|
||||
- ../INSTALL/ilaver.o ../INSTALL/lsame.o
|
||||
+ALLAUX = ilaenv.$(SUFFIX) ieeeck.$(SUFFIX) lsamen.$(SUFFIX) iparmq.$(SUFFIX) \
|
||||
+ ../INSTALL/ilaver.$(SUFFIX)
|
||||
|
||||
SCLAUX = \
|
||||
- sbdsdc.o \
|
||||
- sbdsqr.o sdisna.o slabad.o slacpy.o sladiv.o slae2.o slaebz.o \
|
||||
- slaed0.o slaed1.o slaed2.o slaed3.o slaed4.o slaed5.o slaed6.o \
|
||||
- slaed7.o slaed8.o slaed9.o slaeda.o slaev2.o slagtf.o \
|
||||
- slagts.o slamrg.o slanst.o \
|
||||
- slapy2.o slapy3.o slarnv.o \
|
||||
- slarra.o slarrb.o slarrc.o slarrd.o slarre.o slarrf.o slarrj.o \
|
||||
- slarrk.o slarrr.o slaneg.o \
|
||||
- slartg.o slaruv.o slas2.o slascl.o \
|
||||
- slasd0.o slasd1.o slasd2.o slasd3.o slasd4.o slasd5.o slasd6.o \
|
||||
- slasd7.o slasd8.o slasda.o slasdq.o slasdt.o \
|
||||
- slaset.o slasq1.o slasq2.o slasq3.o slazq3.o slasq4.o slazq4.o slasq5.o slasq6.o \
|
||||
- slasr.o slasrt.o slassq.o slasv2.o spttrf.o sstebz.o sstedc.o \
|
||||
- ssteqr.o ssterf.o slaisnan.o sisnan.o \
|
||||
- ../INSTALL/slamch.o ../INSTALL/second_$(TIMER).o
|
||||
+ sbdsdc.$(SUFFIX) \
|
||||
+ sbdsqr.$(SUFFIX) sdisna.$(SUFFIX) slabad.$(SUFFIX) slacpy.$(SUFFIX) sladiv.$(SUFFIX) slae2.$(SUFFIX) slaebz.$(SUFFIX) \
|
||||
+ slaed0.$(SUFFIX) slaed1.$(SUFFIX) slaed2.$(SUFFIX) slaed3.$(SUFFIX) slaed4.$(SUFFIX) slaed5.$(SUFFIX) slaed6.$(SUFFIX) \
|
||||
+ slaed7.$(SUFFIX) slaed8.$(SUFFIX) slaed9.$(SUFFIX) slaeda.$(SUFFIX) slaev2.$(SUFFIX) slagtf.$(SUFFIX) \
|
||||
+ slagts.$(SUFFIX) slamrg.$(SUFFIX) slanst.$(SUFFIX) \
|
||||
+ slapy2.$(SUFFIX) slapy3.$(SUFFIX) slarnv.$(SUFFIX) \
|
||||
+ slarra.$(SUFFIX) slarrb.$(SUFFIX) slarrc.$(SUFFIX) slarrd.$(SUFFIX) slarre.$(SUFFIX) slarrf.$(SUFFIX) slarrj.$(SUFFIX) \
|
||||
+ slarrk.$(SUFFIX) slarrr.$(SUFFIX) slaneg.$(SUFFIX) \
|
||||
+ slartg.$(SUFFIX) slaruv.$(SUFFIX) slas2.$(SUFFIX) slascl.$(SUFFIX) \
|
||||
+ slasd0.$(SUFFIX) slasd1.$(SUFFIX) slasd2.$(SUFFIX) slasd3.$(SUFFIX) slasd4.$(SUFFIX) slasd5.$(SUFFIX) slasd6.$(SUFFIX) \
|
||||
+ slasd7.$(SUFFIX) slasd8.$(SUFFIX) slasda.$(SUFFIX) slasdq.$(SUFFIX) slasdt.$(SUFFIX) \
|
||||
+ slaset.$(SUFFIX) slasq1.$(SUFFIX) slasq2.$(SUFFIX) slasq3.$(SUFFIX) slazq3.$(SUFFIX) slasq4.$(SUFFIX) slazq4.$(SUFFIX) slasq5.$(SUFFIX) slasq6.$(SUFFIX) \
|
||||
+ slasr.$(SUFFIX) slasrt.$(SUFFIX) slassq.$(SUFFIX) slasv2.$(SUFFIX) spttrf.$(SUFFIX) sstebz.$(SUFFIX) sstedc.$(SUFFIX) \
|
||||
+ ssteqr.$(SUFFIX) ssterf.$(SUFFIX) slaisnan.$(SUFFIX) sisnan.$(SUFFIX) \
|
||||
+ ../INSTALL/second_$(TIMER).$(SUFFIX)
|
||||
|
||||
DZLAUX = \
|
||||
- dbdsdc.o \
|
||||
- dbdsqr.o ddisna.o dlabad.o dlacpy.o dladiv.o dlae2.o dlaebz.o \
|
||||
- dlaed0.o dlaed1.o dlaed2.o dlaed3.o dlaed4.o dlaed5.o dlaed6.o \
|
||||
- dlaed7.o dlaed8.o dlaed9.o dlaeda.o dlaev2.o dlagtf.o \
|
||||
- dlagts.o dlamrg.o dlanst.o \
|
||||
- dlapy2.o dlapy3.o dlarnv.o \
|
||||
- dlarra.o dlarrb.o dlarrc.o dlarrd.o dlarre.o dlarrf.o dlarrj.o \
|
||||
- dlarrk.o dlarrr.o dlaneg.o \
|
||||
- dlartg.o dlaruv.o dlas2.o dlascl.o \
|
||||
- dlasd0.o dlasd1.o dlasd2.o dlasd3.o dlasd4.o dlasd5.o dlasd6.o \
|
||||
- dlasd7.o dlasd8.o dlasda.o dlasdq.o dlasdt.o \
|
||||
- dlaset.o dlasq1.o dlasq2.o dlasq3.o dlazq3.o dlasq4.o dlazq4.o dlasq5.o dlasq6.o \
|
||||
- dlasr.o dlasrt.o dlassq.o dlasv2.o dpttrf.o dstebz.o dstedc.o \
|
||||
- dsteqr.o dsterf.o dlaisnan.o disnan.o \
|
||||
- ../INSTALL/dlamch.o ../INSTALL/dsecnd_$(TIMER).o
|
||||
+ dbdsdc.$(SUFFIX) \
|
||||
+ dbdsqr.$(SUFFIX) ddisna.$(SUFFIX) dlabad.$(SUFFIX) dlacpy.$(SUFFIX) dladiv.$(SUFFIX) dlae2.$(SUFFIX) dlaebz.$(SUFFIX) \
|
||||
+ dlaed0.$(SUFFIX) dlaed1.$(SUFFIX) dlaed2.$(SUFFIX) dlaed3.$(SUFFIX) dlaed4.$(SUFFIX) dlaed5.$(SUFFIX) dlaed6.$(SUFFIX) \
|
||||
+ dlaed7.$(SUFFIX) dlaed8.$(SUFFIX) dlaed9.$(SUFFIX) dlaeda.$(SUFFIX) dlaev2.$(SUFFIX) dlagtf.$(SUFFIX) \
|
||||
+ dlagts.$(SUFFIX) dlamrg.$(SUFFIX) dlanst.$(SUFFIX) \
|
||||
+ dlapy2.$(SUFFIX) dlapy3.$(SUFFIX) dlarnv.$(SUFFIX) \
|
||||
+ dlarra.$(SUFFIX) dlarrb.$(SUFFIX) dlarrc.$(SUFFIX) dlarrd.$(SUFFIX) dlarre.$(SUFFIX) dlarrf.$(SUFFIX) dlarrj.$(SUFFIX) \
|
||||
+ dlarrk.$(SUFFIX) dlarrr.$(SUFFIX) dlaneg.$(SUFFIX) \
|
||||
+ dlartg.$(SUFFIX) dlaruv.$(SUFFIX) dlas2.$(SUFFIX) dlascl.$(SUFFIX) \
|
||||
+ dlasd0.$(SUFFIX) dlasd1.$(SUFFIX) dlasd2.$(SUFFIX) dlasd3.$(SUFFIX) dlasd4.$(SUFFIX) dlasd5.$(SUFFIX) dlasd6.$(SUFFIX) \
|
||||
+ dlasd7.$(SUFFIX) dlasd8.$(SUFFIX) dlasda.$(SUFFIX) dlasdq.$(SUFFIX) dlasdt.$(SUFFIX) \
|
||||
+ dlaset.$(SUFFIX) dlasq1.$(SUFFIX) dlasq2.$(SUFFIX) dlasq3.$(SUFFIX) dlazq3.$(SUFFIX) dlasq4.$(SUFFIX) dlazq4.$(SUFFIX) dlasq5.$(SUFFIX) dlasq6.$(SUFFIX) \
|
||||
+ dlasr.$(SUFFIX) dlasrt.$(SUFFIX) dlassq.$(SUFFIX) dlasv2.$(SUFFIX) dpttrf.$(SUFFIX) dstebz.$(SUFFIX) dstedc.$(SUFFIX) \
|
||||
+ dsteqr.$(SUFFIX) dsterf.$(SUFFIX) dlaisnan.$(SUFFIX) disnan.$(SUFFIX) \
|
||||
+ ../INSTALL/dsecnd_$(TIMER).$(SUFFIX)
|
||||
|
||||
SLASRC = \
|
||||
- sgbbrd.o sgbcon.o sgbequ.o sgbrfs.o sgbsv.o \
|
||||
- sgbsvx.o sgbtf2.o sgbtrf.o sgbtrs.o sgebak.o sgebal.o sgebd2.o \
|
||||
- sgebrd.o sgecon.o sgeequ.o sgees.o sgeesx.o sgeev.o sgeevx.o \
|
||||
- sgegs.o sgegv.o sgehd2.o sgehrd.o sgelq2.o sgelqf.o \
|
||||
- sgels.o sgelsd.o sgelss.o sgelsx.o sgelsy.o sgeql2.o sgeqlf.o \
|
||||
- sgeqp3.o sgeqpf.o sgeqr2.o sgeqrf.o sgerfs.o sgerq2.o sgerqf.o \
|
||||
- sgesc2.o sgesdd.o sgesv.o sgesvd.o sgesvx.o sgetc2.o sgetf2.o \
|
||||
- sgetrf.o sgetri.o \
|
||||
- sgetrs.o sggbak.o sggbal.o sgges.o sggesx.o sggev.o sggevx.o \
|
||||
- sggglm.o sgghrd.o sgglse.o sggqrf.o \
|
||||
- sggrqf.o sggsvd.o sggsvp.o sgtcon.o sgtrfs.o sgtsv.o \
|
||||
- sgtsvx.o sgttrf.o sgttrs.o sgtts2.o shgeqz.o \
|
||||
- shsein.o shseqr.o slabrd.o slacon.o slacn2.o \
|
||||
- slaein.o slaexc.o slag2.o slags2.o slagtm.o slagv2.o slahqr.o \
|
||||
- slahrd.o slahr2.o slaic1.o slaln2.o slals0.o slalsa.o slalsd.o \
|
||||
- slangb.o slange.o slangt.o slanhs.o slansb.o slansp.o \
|
||||
- slansy.o slantb.o slantp.o slantr.o slanv2.o \
|
||||
- slapll.o slapmt.o \
|
||||
- slaqgb.o slaqge.o slaqp2.o slaqps.o slaqsb.o slaqsp.o slaqsy.o \
|
||||
- slaqr0.o slaqr1.o slaqr2.o slaqr3.o slaqr4.o slaqr5.o \
|
||||
- slaqtr.o slar1v.o slar2v.o \
|
||||
- slarf.o slarfb.o slarfg.o slarft.o slarfx.o slargv.o \
|
||||
- slarrv.o slartv.o \
|
||||
- slarz.o slarzb.o slarzt.o slaswp.o slasy2.o slasyf.o \
|
||||
- slatbs.o slatdf.o slatps.o slatrd.o slatrs.o slatrz.o slatzm.o \
|
||||
- slauu2.o slauum.o sopgtr.o sopmtr.o sorg2l.o sorg2r.o \
|
||||
- sorgbr.o sorghr.o sorgl2.o sorglq.o sorgql.o sorgqr.o sorgr2.o \
|
||||
- sorgrq.o sorgtr.o sorm2l.o sorm2r.o \
|
||||
- sormbr.o sormhr.o sorml2.o sormlq.o sormql.o sormqr.o sormr2.o \
|
||||
- sormr3.o sormrq.o sormrz.o sormtr.o spbcon.o spbequ.o spbrfs.o \
|
||||
- spbstf.o spbsv.o spbsvx.o \
|
||||
- spbtf2.o spbtrf.o spbtrs.o spocon.o spoequ.o sporfs.o sposv.o \
|
||||
- sposvx.o spotf2.o spotrf.o spotri.o spotrs.o sppcon.o sppequ.o \
|
||||
- spprfs.o sppsv.o sppsvx.o spptrf.o spptri.o spptrs.o sptcon.o \
|
||||
- spteqr.o sptrfs.o sptsv.o sptsvx.o spttrs.o sptts2.o srscl.o \
|
||||
- ssbev.o ssbevd.o ssbevx.o ssbgst.o ssbgv.o ssbgvd.o ssbgvx.o \
|
||||
- ssbtrd.o sspcon.o sspev.o sspevd.o sspevx.o sspgst.o \
|
||||
- sspgv.o sspgvd.o sspgvx.o ssprfs.o sspsv.o sspsvx.o ssptrd.o \
|
||||
- ssptrf.o ssptri.o ssptrs.o sstegr.o sstein.o sstev.o sstevd.o sstevr.o \
|
||||
- sstevx.o ssycon.o ssyev.o ssyevd.o ssyevr.o ssyevx.o ssygs2.o \
|
||||
- ssygst.o ssygv.o ssygvd.o ssygvx.o ssyrfs.o ssysv.o ssysvx.o \
|
||||
- ssytd2.o ssytf2.o ssytrd.o ssytrf.o ssytri.o ssytrs.o stbcon.o \
|
||||
- stbrfs.o stbtrs.o stgevc.o stgex2.o stgexc.o stgsen.o \
|
||||
- stgsja.o stgsna.o stgsy2.o stgsyl.o stpcon.o stprfs.o stptri.o \
|
||||
- stptrs.o \
|
||||
- strcon.o strevc.o strexc.o strrfs.o strsen.o strsna.o strsyl.o \
|
||||
- strti2.o strtri.o strtrs.o stzrqf.o stzrzf.o sstemr.o
|
||||
+ sgbbrd.$(SUFFIX) sgbcon.$(SUFFIX) sgbequ.$(SUFFIX) sgbrfs.$(SUFFIX) sgbsv.$(SUFFIX) \
|
||||
+ sgbsvx.$(SUFFIX) sgbtf2.$(SUFFIX) sgbtrf.$(SUFFIX) sgbtrs.$(SUFFIX) sgebak.$(SUFFIX) sgebal.$(SUFFIX) sgebd2.$(SUFFIX) \
|
||||
+ sgebrd.$(SUFFIX) sgecon.$(SUFFIX) sgeequ.$(SUFFIX) sgees.$(SUFFIX) sgeesx.$(SUFFIX) sgeev.$(SUFFIX) sgeevx.$(SUFFIX) \
|
||||
+ sgegs.$(SUFFIX) sgegv.$(SUFFIX) sgehd2.$(SUFFIX) sgehrd.$(SUFFIX) sgelq2.$(SUFFIX) sgelqf.$(SUFFIX) \
|
||||
+ sgels.$(SUFFIX) sgelsd.$(SUFFIX) sgelss.$(SUFFIX) sgelsx.$(SUFFIX) sgelsy.$(SUFFIX) sgeql2.$(SUFFIX) sgeqlf.$(SUFFIX) \
|
||||
+ sgeqp3.$(SUFFIX) sgeqpf.$(SUFFIX) sgeqr2.$(SUFFIX) sgeqrf.$(SUFFIX) sgerfs.$(SUFFIX) sgerq2.$(SUFFIX) sgerqf.$(SUFFIX) \
|
||||
+ sgesc2.$(SUFFIX) sgesdd.$(SUFFIX) sgesvd.$(SUFFIX) sgesvx.$(SUFFIX) sgetc2.$(SUFFIX) \
|
||||
+ sgetri.$(SUFFIX) \
|
||||
+ sggbak.$(SUFFIX) sggbal.$(SUFFIX) sgges.$(SUFFIX) sggesx.$(SUFFIX) sggev.$(SUFFIX) sggevx.$(SUFFIX) \
|
||||
+ sggglm.$(SUFFIX) sgghrd.$(SUFFIX) sgglse.$(SUFFIX) sggqrf.$(SUFFIX) \
|
||||
+ sggrqf.$(SUFFIX) sggsvd.$(SUFFIX) sggsvp.$(SUFFIX) sgtcon.$(SUFFIX) sgtrfs.$(SUFFIX) sgtsv.$(SUFFIX) \
|
||||
+ sgtsvx.$(SUFFIX) sgttrf.$(SUFFIX) sgttrs.$(SUFFIX) sgtts2.$(SUFFIX) shgeqz.$(SUFFIX) \
|
||||
+ shsein.$(SUFFIX) shseqr.$(SUFFIX) slabrd.$(SUFFIX) slacon.$(SUFFIX) slacn2.$(SUFFIX) \
|
||||
+ slaein.$(SUFFIX) slaexc.$(SUFFIX) slag2.$(SUFFIX) slags2.$(SUFFIX) slagtm.$(SUFFIX) slagv2.$(SUFFIX) slahqr.$(SUFFIX) \
|
||||
+ slahrd.$(SUFFIX) slahr2.$(SUFFIX) slaic1.$(SUFFIX) slaln2.$(SUFFIX) slals0.$(SUFFIX) slalsa.$(SUFFIX) slalsd.$(SUFFIX) \
|
||||
+ slangb.$(SUFFIX) slange.$(SUFFIX) slangt.$(SUFFIX) slanhs.$(SUFFIX) slansb.$(SUFFIX) slansp.$(SUFFIX) \
|
||||
+ slansy.$(SUFFIX) slantb.$(SUFFIX) slantp.$(SUFFIX) slantr.$(SUFFIX) slanv2.$(SUFFIX) \
|
||||
+ slapll.$(SUFFIX) slapmt.$(SUFFIX) \
|
||||
+ slaqgb.$(SUFFIX) slaqge.$(SUFFIX) slaqp2.$(SUFFIX) slaqps.$(SUFFIX) slaqsb.$(SUFFIX) slaqsp.$(SUFFIX) slaqsy.$(SUFFIX) \
|
||||
+ slaqr0.$(SUFFIX) slaqr1.$(SUFFIX) slaqr2.$(SUFFIX) slaqr3.$(SUFFIX) slaqr4.$(SUFFIX) slaqr5.$(SUFFIX) \
|
||||
+ slaqtr.$(SUFFIX) slar1v.$(SUFFIX) slar2v.$(SUFFIX) \
|
||||
+ slarf.$(SUFFIX) slarfb.$(SUFFIX) slarfg.$(SUFFIX) slarft.$(SUFFIX) slarfx.$(SUFFIX) slargv.$(SUFFIX) \
|
||||
+ slarrv.$(SUFFIX) slartv.$(SUFFIX) \
|
||||
+ slarz.$(SUFFIX) slarzb.$(SUFFIX) slarzt.$(SUFFIX) slasy2.$(SUFFIX) slasyf.$(SUFFIX) \
|
||||
+ slatbs.$(SUFFIX) slatdf.$(SUFFIX) slatps.$(SUFFIX) slatrd.$(SUFFIX) slatrs.$(SUFFIX) slatrz.$(SUFFIX) slatzm.$(SUFFIX) \
|
||||
+ sopgtr.$(SUFFIX) sopmtr.$(SUFFIX) sorg2l.$(SUFFIX) sorg2r.$(SUFFIX) \
|
||||
+ sorgbr.$(SUFFIX) sorghr.$(SUFFIX) sorgl2.$(SUFFIX) sorglq.$(SUFFIX) sorgql.$(SUFFIX) sorgqr.$(SUFFIX) sorgr2.$(SUFFIX) \
|
||||
+ sorgrq.$(SUFFIX) sorgtr.$(SUFFIX) sorm2l.$(SUFFIX) sorm2r.$(SUFFIX) \
|
||||
+ sormbr.$(SUFFIX) sormhr.$(SUFFIX) sorml2.$(SUFFIX) sormlq.$(SUFFIX) sormql.$(SUFFIX) sormqr.$(SUFFIX) sormr2.$(SUFFIX) \
|
||||
+ sormr3.$(SUFFIX) sormrq.$(SUFFIX) sormrz.$(SUFFIX) sormtr.$(SUFFIX) spbcon.$(SUFFIX) spbequ.$(SUFFIX) spbrfs.$(SUFFIX) \
|
||||
+ spbstf.$(SUFFIX) spbsv.$(SUFFIX) spbsvx.$(SUFFIX) \
|
||||
+ spbtf2.$(SUFFIX) spbtrf.$(SUFFIX) spbtrs.$(SUFFIX) spocon.$(SUFFIX) spoequ.$(SUFFIX) sporfs.$(SUFFIX) sposv.$(SUFFIX) \
|
||||
+ sposvx.$(SUFFIX) spotrs.$(SUFFIX) sppcon.$(SUFFIX) sppequ.$(SUFFIX) \
|
||||
+ spprfs.$(SUFFIX) sppsv.$(SUFFIX) sppsvx.$(SUFFIX) spptrf.$(SUFFIX) spptri.$(SUFFIX) spptrs.$(SUFFIX) sptcon.$(SUFFIX) \
|
||||
+ spteqr.$(SUFFIX) sptrfs.$(SUFFIX) sptsv.$(SUFFIX) sptsvx.$(SUFFIX) spttrs.$(SUFFIX) sptts2.$(SUFFIX) srscl.$(SUFFIX) \
|
||||
+ ssbev.$(SUFFIX) ssbevd.$(SUFFIX) ssbevx.$(SUFFIX) ssbgst.$(SUFFIX) ssbgv.$(SUFFIX) ssbgvd.$(SUFFIX) ssbgvx.$(SUFFIX) \
|
||||
+ ssbtrd.$(SUFFIX) sspcon.$(SUFFIX) sspev.$(SUFFIX) sspevd.$(SUFFIX) sspevx.$(SUFFIX) sspgst.$(SUFFIX) \
|
||||
+ sspgv.$(SUFFIX) sspgvd.$(SUFFIX) sspgvx.$(SUFFIX) ssprfs.$(SUFFIX) sspsv.$(SUFFIX) sspsvx.$(SUFFIX) ssptrd.$(SUFFIX) \
|
||||
+ ssptrf.$(SUFFIX) ssptri.$(SUFFIX) ssptrs.$(SUFFIX) sstegr.$(SUFFIX) sstein.$(SUFFIX) sstev.$(SUFFIX) sstevd.$(SUFFIX) sstevr.$(SUFFIX) \
|
||||
+ sstevx.$(SUFFIX) ssycon.$(SUFFIX) ssyev.$(SUFFIX) ssyevd.$(SUFFIX) ssyevr.$(SUFFIX) ssyevx.$(SUFFIX) ssygs2.$(SUFFIX) \
|
||||
+ ssygst.$(SUFFIX) ssygv.$(SUFFIX) ssygvd.$(SUFFIX) ssygvx.$(SUFFIX) ssyrfs.$(SUFFIX) ssysv.$(SUFFIX) ssysvx.$(SUFFIX) \
|
||||
+ ssytd2.$(SUFFIX) ssytf2.$(SUFFIX) ssytrd.$(SUFFIX) ssytrf.$(SUFFIX) ssytri.$(SUFFIX) ssytrs.$(SUFFIX) stbcon.$(SUFFIX) \
|
||||
+ stbrfs.$(SUFFIX) stbtrs.$(SUFFIX) stgevc.$(SUFFIX) stgex2.$(SUFFIX) stgexc.$(SUFFIX) stgsen.$(SUFFIX) \
|
||||
+ stgsja.$(SUFFIX) stgsna.$(SUFFIX) stgsy2.$(SUFFIX) stgsyl.$(SUFFIX) stpcon.$(SUFFIX) stprfs.$(SUFFIX) stptri.$(SUFFIX) \
|
||||
+ stptrs.$(SUFFIX) \
|
||||
+ strcon.$(SUFFIX) strevc.$(SUFFIX) strexc.$(SUFFIX) strrfs.$(SUFFIX) strsen.$(SUFFIX) strsna.$(SUFFIX) strsyl.$(SUFFIX) \
|
||||
+ strtrs.$(SUFFIX) stzrqf.$(SUFFIX) stzrzf.$(SUFFIX) sstemr.$(SUFFIX)
|
||||
|
||||
CLASRC = \
|
||||
- cbdsqr.o cgbbrd.o cgbcon.o cgbequ.o cgbrfs.o cgbsv.o cgbsvx.o \
|
||||
- cgbtf2.o cgbtrf.o cgbtrs.o cgebak.o cgebal.o cgebd2.o cgebrd.o \
|
||||
- cgecon.o cgeequ.o cgees.o cgeesx.o cgeev.o cgeevx.o \
|
||||
- cgegs.o cgegv.o cgehd2.o cgehrd.o cgelq2.o cgelqf.o \
|
||||
- cgels.o cgelsd.o cgelss.o cgelsx.o cgelsy.o cgeql2.o cgeqlf.o cgeqp3.o \
|
||||
- cgeqpf.o cgeqr2.o cgeqrf.o cgerfs.o cgerq2.o cgerqf.o \
|
||||
- cgesc2.o cgesdd.o cgesv.o cgesvd.o cgesvx.o cgetc2.o cgetf2.o cgetrf.o \
|
||||
- cgetri.o cgetrs.o \
|
||||
- cggbak.o cggbal.o cgges.o cggesx.o cggev.o cggevx.o cggglm.o \
|
||||
- cgghrd.o cgglse.o cggqrf.o cggrqf.o \
|
||||
- cggsvd.o cggsvp.o \
|
||||
- cgtcon.o cgtrfs.o cgtsv.o cgtsvx.o cgttrf.o cgttrs.o cgtts2.o chbev.o \
|
||||
- chbevd.o chbevx.o chbgst.o chbgv.o chbgvd.o chbgvx.o chbtrd.o \
|
||||
- checon.o cheev.o cheevd.o cheevr.o cheevx.o chegs2.o chegst.o \
|
||||
- chegv.o chegvd.o chegvx.o cherfs.o chesv.o chesvx.o chetd2.o \
|
||||
- chetf2.o chetrd.o \
|
||||
- chetrf.o chetri.o chetrs.o chgeqz.o chpcon.o chpev.o chpevd.o \
|
||||
- chpevx.o chpgst.o chpgv.o chpgvd.o chpgvx.o chprfs.o chpsv.o \
|
||||
- chpsvx.o \
|
||||
- chptrd.o chptrf.o chptri.o chptrs.o chsein.o chseqr.o clabrd.o \
|
||||
- clacgv.o clacon.o clacn2.o clacp2.o clacpy.o clacrm.o clacrt.o cladiv.o \
|
||||
- claed0.o claed7.o claed8.o \
|
||||
- claein.o claesy.o claev2.o clags2.o clagtm.o \
|
||||
- clahef.o clahqr.o \
|
||||
- clahrd.o clahr2.o claic1.o clals0.o clalsa.o clalsd.o clangb.o clange.o clangt.o \
|
||||
- clanhb.o clanhe.o \
|
||||
- clanhp.o clanhs.o clanht.o clansb.o clansp.o clansy.o clantb.o \
|
||||
- clantp.o clantr.o clapll.o clapmt.o clarcm.o claqgb.o claqge.o \
|
||||
- claqhb.o claqhe.o claqhp.o claqp2.o claqps.o claqsb.o \
|
||||
- claqr0.o claqr1.o claqr2.o claqr3.o claqr4.o claqr5.o \
|
||||
- claqsp.o claqsy.o clar1v.o clar2v.o clarf.o clarfb.o clarfg.o clarft.o \
|
||||
- clarfx.o clargv.o clarnv.o clarrv.o clartg.o clartv.o \
|
||||
- clarz.o clarzb.o clarzt.o clascl.o claset.o clasr.o classq.o \
|
||||
- claswp.o clasyf.o clatbs.o clatdf.o clatps.o clatrd.o clatrs.o clatrz.o \
|
||||
- clatzm.o clauu2.o clauum.o cpbcon.o cpbequ.o cpbrfs.o cpbstf.o cpbsv.o \
|
||||
- cpbsvx.o cpbtf2.o cpbtrf.o cpbtrs.o cpocon.o cpoequ.o cporfs.o \
|
||||
- cposv.o cposvx.o cpotf2.o cpotrf.o cpotri.o cpotrs.o cppcon.o \
|
||||
- cppequ.o cpprfs.o cppsv.o cppsvx.o cpptrf.o cpptri.o cpptrs.o \
|
||||
- cptcon.o cpteqr.o cptrfs.o cptsv.o cptsvx.o cpttrf.o cpttrs.o cptts2.o \
|
||||
- crot.o cspcon.o cspmv.o cspr.o csprfs.o cspsv.o \
|
||||
- cspsvx.o csptrf.o csptri.o csptrs.o csrscl.o cstedc.o \
|
||||
- cstegr.o cstein.o csteqr.o csycon.o csymv.o \
|
||||
- csyr.o csyrfs.o csysv.o csysvx.o csytf2.o csytrf.o csytri.o \
|
||||
- csytrs.o ctbcon.o ctbrfs.o ctbtrs.o ctgevc.o ctgex2.o \
|
||||
- ctgexc.o ctgsen.o ctgsja.o ctgsna.o ctgsy2.o ctgsyl.o ctpcon.o \
|
||||
- ctprfs.o ctptri.o \
|
||||
- ctptrs.o ctrcon.o ctrevc.o ctrexc.o ctrrfs.o ctrsen.o ctrsna.o \
|
||||
- ctrsyl.o ctrti2.o ctrtri.o ctrtrs.o ctzrqf.o ctzrzf.o cung2l.o cung2r.o \
|
||||
- cungbr.o cunghr.o cungl2.o cunglq.o cungql.o cungqr.o cungr2.o \
|
||||
- cungrq.o cungtr.o cunm2l.o cunm2r.o cunmbr.o cunmhr.o cunml2.o \
|
||||
- cunmlq.o cunmql.o cunmqr.o cunmr2.o cunmr3.o cunmrq.o cunmrz.o \
|
||||
- cunmtr.o cupgtr.o cupmtr.o icmax1.o scsum1.o cstemr.o
|
||||
+ cbdsqr.$(SUFFIX) cgbbrd.$(SUFFIX) cgbcon.$(SUFFIX) cgbequ.$(SUFFIX) cgbrfs.$(SUFFIX) cgbsv.$(SUFFIX) cgbsvx.$(SUFFIX) \
|
||||
+ cgbtf2.$(SUFFIX) cgbtrf.$(SUFFIX) cgbtrs.$(SUFFIX) cgebak.$(SUFFIX) cgebal.$(SUFFIX) cgebd2.$(SUFFIX) cgebrd.$(SUFFIX) \
|
||||
+ cgecon.$(SUFFIX) cgeequ.$(SUFFIX) cgees.$(SUFFIX) cgeesx.$(SUFFIX) cgeev.$(SUFFIX) cgeevx.$(SUFFIX) \
|
||||
+ cgegs.$(SUFFIX) cgegv.$(SUFFIX) cgehd2.$(SUFFIX) cgehrd.$(SUFFIX) cgelq2.$(SUFFIX) cgelqf.$(SUFFIX) \
|
||||
+ cgels.$(SUFFIX) cgelsd.$(SUFFIX) cgelss.$(SUFFIX) cgelsx.$(SUFFIX) cgelsy.$(SUFFIX) cgeql2.$(SUFFIX) cgeqlf.$(SUFFIX) cgeqp3.$(SUFFIX) \
|
||||
+ cgeqpf.$(SUFFIX) cgeqr2.$(SUFFIX) cgeqrf.$(SUFFIX) cgerfs.$(SUFFIX) cgerq2.$(SUFFIX) cgerqf.$(SUFFIX) \
|
||||
+ cgesc2.$(SUFFIX) cgesdd.$(SUFFIX) cgesvd.$(SUFFIX) cgesvx.$(SUFFIX) cgetc2.$(SUFFIX) \
|
||||
+ cgetri.$(SUFFIX) \
|
||||
+ cggbak.$(SUFFIX) cggbal.$(SUFFIX) cgges.$(SUFFIX) cggesx.$(SUFFIX) cggev.$(SUFFIX) cggevx.$(SUFFIX) cggglm.$(SUFFIX) \
|
||||
+ cgghrd.$(SUFFIX) cgglse.$(SUFFIX) cggqrf.$(SUFFIX) cggrqf.$(SUFFIX) \
|
||||
+ cggsvd.$(SUFFIX) cggsvp.$(SUFFIX) \
|
||||
+ cgtcon.$(SUFFIX) cgtrfs.$(SUFFIX) cgtsv.$(SUFFIX) cgtsvx.$(SUFFIX) cgttrf.$(SUFFIX) cgttrs.$(SUFFIX) cgtts2.$(SUFFIX) chbev.$(SUFFIX) \
|
||||
+ chbevd.$(SUFFIX) chbevx.$(SUFFIX) chbgst.$(SUFFIX) chbgv.$(SUFFIX) chbgvd.$(SUFFIX) chbgvx.$(SUFFIX) chbtrd.$(SUFFIX) \
|
||||
+ checon.$(SUFFIX) cheev.$(SUFFIX) cheevd.$(SUFFIX) cheevr.$(SUFFIX) cheevx.$(SUFFIX) chegs2.$(SUFFIX) chegst.$(SUFFIX) \
|
||||
+ chegv.$(SUFFIX) chegvd.$(SUFFIX) chegvx.$(SUFFIX) cherfs.$(SUFFIX) chesv.$(SUFFIX) chesvx.$(SUFFIX) chetd2.$(SUFFIX) \
|
||||
+ chetf2.$(SUFFIX) chetrd.$(SUFFIX) \
|
||||
+ chetrf.$(SUFFIX) chetri.$(SUFFIX) chetrs.$(SUFFIX) chgeqz.$(SUFFIX) chpcon.$(SUFFIX) chpev.$(SUFFIX) chpevd.$(SUFFIX) \
|
||||
+ chpevx.$(SUFFIX) chpgst.$(SUFFIX) chpgv.$(SUFFIX) chpgvd.$(SUFFIX) chpgvx.$(SUFFIX) chprfs.$(SUFFIX) chpsv.$(SUFFIX) \
|
||||
+ chpsvx.$(SUFFIX) \
|
||||
+ chptrd.$(SUFFIX) chptrf.$(SUFFIX) chptri.$(SUFFIX) chptrs.$(SUFFIX) chsein.$(SUFFIX) chseqr.$(SUFFIX) clabrd.$(SUFFIX) \
|
||||
+ clacgv.$(SUFFIX) clacon.$(SUFFIX) clacn2.$(SUFFIX) clacp2.$(SUFFIX) clacpy.$(SUFFIX) clacrm.$(SUFFIX) clacrt.$(SUFFIX) cladiv.$(SUFFIX) \
|
||||
+ claed0.$(SUFFIX) claed7.$(SUFFIX) claed8.$(SUFFIX) \
|
||||
+ claein.$(SUFFIX) claesy.$(SUFFIX) claev2.$(SUFFIX) clags2.$(SUFFIX) clagtm.$(SUFFIX) \
|
||||
+ clahef.$(SUFFIX) clahqr.$(SUFFIX) \
|
||||
+ clahrd.$(SUFFIX) clahr2.$(SUFFIX) claic1.$(SUFFIX) clals0.$(SUFFIX) clalsa.$(SUFFIX) clalsd.$(SUFFIX) clangb.$(SUFFIX) clange.$(SUFFIX) clangt.$(SUFFIX) \
|
||||
+ clanhb.$(SUFFIX) clanhe.$(SUFFIX) \
|
||||
+ clanhp.$(SUFFIX) clanhs.$(SUFFIX) clanht.$(SUFFIX) clansb.$(SUFFIX) clansp.$(SUFFIX) clansy.$(SUFFIX) clantb.$(SUFFIX) \
|
||||
+ clantp.$(SUFFIX) clantr.$(SUFFIX) clapll.$(SUFFIX) clapmt.$(SUFFIX) clarcm.$(SUFFIX) claqgb.$(SUFFIX) claqge.$(SUFFIX) \
|
||||
+ claqhb.$(SUFFIX) claqhe.$(SUFFIX) claqhp.$(SUFFIX) claqp2.$(SUFFIX) claqps.$(SUFFIX) claqsb.$(SUFFIX) \
|
||||
+ claqr0.$(SUFFIX) claqr1.$(SUFFIX) claqr2.$(SUFFIX) claqr3.$(SUFFIX) claqr4.$(SUFFIX) claqr5.$(SUFFIX) \
|
||||
+ claqsp.$(SUFFIX) claqsy.$(SUFFIX) clar1v.$(SUFFIX) clar2v.$(SUFFIX) clarf.$(SUFFIX) clarfb.$(SUFFIX) clarfg.$(SUFFIX) clarft.$(SUFFIX) \
|
||||
+ clarfx.$(SUFFIX) clargv.$(SUFFIX) clarnv.$(SUFFIX) clarrv.$(SUFFIX) clartg.$(SUFFIX) clartv.$(SUFFIX) \
|
||||
+ clarz.$(SUFFIX) clarzb.$(SUFFIX) clarzt.$(SUFFIX) clascl.$(SUFFIX) claset.$(SUFFIX) clasr.$(SUFFIX) classq.$(SUFFIX) \
|
||||
+ clasyf.$(SUFFIX) clatbs.$(SUFFIX) clatdf.$(SUFFIX) clatps.$(SUFFIX) clatrd.$(SUFFIX) clatrs.$(SUFFIX) clatrz.$(SUFFIX) \
|
||||
+ clatzm.$(SUFFIX) cpbcon.$(SUFFIX) cpbequ.$(SUFFIX) cpbrfs.$(SUFFIX) cpbstf.$(SUFFIX) cpbsv.$(SUFFIX) \
|
||||
+ cpbsvx.$(SUFFIX) cpbtf2.$(SUFFIX) cpbtrf.$(SUFFIX) cpbtrs.$(SUFFIX) cpocon.$(SUFFIX) cpoequ.$(SUFFIX) cporfs.$(SUFFIX) \
|
||||
+ cposv.$(SUFFIX) cposvx.$(SUFFIX) cpotrs.$(SUFFIX) cppcon.$(SUFFIX) \
|
||||
+ cppequ.$(SUFFIX) cpprfs.$(SUFFIX) cppsv.$(SUFFIX) cppsvx.$(SUFFIX) cpptrf.$(SUFFIX) cpptri.$(SUFFIX) cpptrs.$(SUFFIX) \
|
||||
+ cptcon.$(SUFFIX) cpteqr.$(SUFFIX) cptrfs.$(SUFFIX) cptsv.$(SUFFIX) cptsvx.$(SUFFIX) cpttrf.$(SUFFIX) cpttrs.$(SUFFIX) cptts2.$(SUFFIX) \
|
||||
+ crot.$(SUFFIX) cspcon.$(SUFFIX) csprfs.$(SUFFIX) cspsv.$(SUFFIX) \
|
||||
+ cspsvx.$(SUFFIX) csptrf.$(SUFFIX) csptri.$(SUFFIX) csptrs.$(SUFFIX) csrscl.$(SUFFIX) cstedc.$(SUFFIX) \
|
||||
+ cstegr.$(SUFFIX) cstein.$(SUFFIX) csteqr.$(SUFFIX) csycon.$(SUFFIX) \
|
||||
+ csyrfs.$(SUFFIX) csysv.$(SUFFIX) csysvx.$(SUFFIX) csytf2.$(SUFFIX) csytrf.$(SUFFIX) csytri.$(SUFFIX) \
|
||||
+ csytrs.$(SUFFIX) ctbcon.$(SUFFIX) ctbrfs.$(SUFFIX) ctbtrs.$(SUFFIX) ctgevc.$(SUFFIX) ctgex2.$(SUFFIX) \
|
||||
+ ctgexc.$(SUFFIX) ctgsen.$(SUFFIX) ctgsja.$(SUFFIX) ctgsna.$(SUFFIX) ctgsy2.$(SUFFIX) ctgsyl.$(SUFFIX) ctpcon.$(SUFFIX) \
|
||||
+ ctprfs.$(SUFFIX) ctptri.$(SUFFIX) \
|
||||
+ ctptrs.$(SUFFIX) ctrcon.$(SUFFIX) ctrevc.$(SUFFIX) ctrexc.$(SUFFIX) ctrrfs.$(SUFFIX) ctrsen.$(SUFFIX) ctrsna.$(SUFFIX) \
|
||||
+ ctrsyl.$(SUFFIX) ctrtrs.$(SUFFIX) ctzrqf.$(SUFFIX) ctzrzf.$(SUFFIX) cung2l.$(SUFFIX) cung2r.$(SUFFIX) \
|
||||
+ cungbr.$(SUFFIX) cunghr.$(SUFFIX) cungl2.$(SUFFIX) cunglq.$(SUFFIX) cungql.$(SUFFIX) cungqr.$(SUFFIX) cungr2.$(SUFFIX) \
|
||||
+ cungrq.$(SUFFIX) cungtr.$(SUFFIX) cunm2l.$(SUFFIX) cunm2r.$(SUFFIX) cunmbr.$(SUFFIX) cunmhr.$(SUFFIX) cunml2.$(SUFFIX) \
|
||||
+ cunmlq.$(SUFFIX) cunmql.$(SUFFIX) cunmqr.$(SUFFIX) cunmr2.$(SUFFIX) cunmr3.$(SUFFIX) cunmrq.$(SUFFIX) cunmrz.$(SUFFIX) \
|
||||
+ cunmtr.$(SUFFIX) cupgtr.$(SUFFIX) cupmtr.$(SUFFIX) icmax1.$(SUFFIX) scsum1.$(SUFFIX) cstemr.$(SUFFIX)
|
||||
|
||||
DLASRC = \
|
||||
- dgbbrd.o dgbcon.o dgbequ.o dgbrfs.o dgbsv.o \
|
||||
- dgbsvx.o dgbtf2.o dgbtrf.o dgbtrs.o dgebak.o dgebal.o dgebd2.o \
|
||||
- dgebrd.o dgecon.o dgeequ.o dgees.o dgeesx.o dgeev.o dgeevx.o \
|
||||
- dgegs.o dgegv.o dgehd2.o dgehrd.o dgelq2.o dgelqf.o \
|
||||
- dgels.o dgelsd.o dgelss.o dgelsx.o dgelsy.o dgeql2.o dgeqlf.o \
|
||||
- dgeqp3.o dgeqpf.o dgeqr2.o dgeqrf.o dgerfs.o dgerq2.o dgerqf.o \
|
||||
- dgesc2.o dgesdd.o dgesv.o dgesvd.o dgesvx.o dgetc2.o dgetf2.o \
|
||||
- dgetrf.o dgetri.o \
|
||||
- dgetrs.o dggbak.o dggbal.o dgges.o dggesx.o dggev.o dggevx.o \
|
||||
- dggglm.o dgghrd.o dgglse.o dggqrf.o \
|
||||
- dggrqf.o dggsvd.o dggsvp.o dgtcon.o dgtrfs.o dgtsv.o \
|
||||
- dgtsvx.o dgttrf.o dgttrs.o dgtts2.o dhgeqz.o \
|
||||
- dhsein.o dhseqr.o dlabrd.o dlacon.o dlacn2.o \
|
||||
- dlaein.o dlaexc.o dlag2.o dlags2.o dlagtm.o dlagv2.o dlahqr.o \
|
||||
- dlahrd.o dlahr2.o dlaic1.o dlaln2.o dlals0.o dlalsa.o dlalsd.o \
|
||||
- dlangb.o dlange.o dlangt.o dlanhs.o dlansb.o dlansp.o \
|
||||
- dlansy.o dlantb.o dlantp.o dlantr.o dlanv2.o \
|
||||
- dlapll.o dlapmt.o \
|
||||
- dlaqgb.o dlaqge.o dlaqp2.o dlaqps.o dlaqsb.o dlaqsp.o dlaqsy.o \
|
||||
- dlaqr0.o dlaqr1.o dlaqr2.o dlaqr3.o dlaqr4.o dlaqr5.o \
|
||||
- dlaqtr.o dlar1v.o dlar2v.o \
|
||||
- dlarf.o dlarfb.o dlarfg.o dlarft.o dlarfx.o dlargv.o \
|
||||
- dlarrv.o dlartv.o \
|
||||
- dlarz.o dlarzb.o dlarzt.o dlaswp.o dlasy2.o dlasyf.o \
|
||||
- dlatbs.o dlatdf.o dlatps.o dlatrd.o dlatrs.o dlatrz.o dlatzm.o dlauu2.o \
|
||||
- dlauum.o dopgtr.o dopmtr.o dorg2l.o dorg2r.o \
|
||||
- dorgbr.o dorghr.o dorgl2.o dorglq.o dorgql.o dorgqr.o dorgr2.o \
|
||||
- dorgrq.o dorgtr.o dorm2l.o dorm2r.o \
|
||||
- dormbr.o dormhr.o dorml2.o dormlq.o dormql.o dormqr.o dormr2.o \
|
||||
- dormr3.o dormrq.o dormrz.o dormtr.o dpbcon.o dpbequ.o dpbrfs.o \
|
||||
- dpbstf.o dpbsv.o dpbsvx.o \
|
||||
- dpbtf2.o dpbtrf.o dpbtrs.o dpocon.o dpoequ.o dporfs.o dposv.o \
|
||||
- dposvx.o dpotf2.o dpotrf.o dpotri.o dpotrs.o dppcon.o dppequ.o \
|
||||
- dpprfs.o dppsv.o dppsvx.o dpptrf.o dpptri.o dpptrs.o dptcon.o \
|
||||
- dpteqr.o dptrfs.o dptsv.o dptsvx.o dpttrs.o dptts2.o drscl.o \
|
||||
- dsbev.o dsbevd.o dsbevx.o dsbgst.o dsbgv.o dsbgvd.o dsbgvx.o \
|
||||
- dsbtrd.o dspcon.o dspev.o dspevd.o dspevx.o dspgst.o \
|
||||
- dspgv.o dspgvd.o dspgvx.o dsprfs.o dspsv.o dspsvx.o dsptrd.o \
|
||||
- dsptrf.o dsptri.o dsptrs.o dstegr.o dstein.o dstev.o dstevd.o dstevr.o \
|
||||
- dstevx.o dsycon.o dsyev.o dsyevd.o dsyevr.o \
|
||||
- dsyevx.o dsygs2.o dsygst.o dsygv.o dsygvd.o dsygvx.o dsyrfs.o \
|
||||
- dsysv.o dsysvx.o \
|
||||
- dsytd2.o dsytf2.o dsytrd.o dsytrf.o dsytri.o dsytrs.o dtbcon.o \
|
||||
- dtbrfs.o dtbtrs.o dtgevc.o dtgex2.o dtgexc.o dtgsen.o \
|
||||
- dtgsja.o dtgsna.o dtgsy2.o dtgsyl.o dtpcon.o dtprfs.o dtptri.o \
|
||||
- dtptrs.o \
|
||||
- dtrcon.o dtrevc.o dtrexc.o dtrrfs.o dtrsen.o dtrsna.o dtrsyl.o \
|
||||
- dtrti2.o dtrtri.o dtrtrs.o dtzrqf.o dtzrzf.o dstemr.o \
|
||||
- dsgesv.o dlag2s.o slag2d.o
|
||||
+ dgbbrd.$(SUFFIX) dgbcon.$(SUFFIX) dgbequ.$(SUFFIX) dgbrfs.$(SUFFIX) dgbsv.$(SUFFIX) \
|
||||
+ dgbsvx.$(SUFFIX) dgbtf2.$(SUFFIX) dgbtrf.$(SUFFIX) dgbtrs.$(SUFFIX) dgebak.$(SUFFIX) dgebal.$(SUFFIX) dgebd2.$(SUFFIX) \
|
||||
+ dgebrd.$(SUFFIX) dgecon.$(SUFFIX) dgeequ.$(SUFFIX) dgees.$(SUFFIX) dgeesx.$(SUFFIX) dgeev.$(SUFFIX) dgeevx.$(SUFFIX) \
|
||||
+ dgegs.$(SUFFIX) dgegv.$(SUFFIX) dgehd2.$(SUFFIX) dgehrd.$(SUFFIX) dgelq2.$(SUFFIX) dgelqf.$(SUFFIX) \
|
||||
+ dgels.$(SUFFIX) dgelsd.$(SUFFIX) dgelss.$(SUFFIX) dgelsx.$(SUFFIX) dgelsy.$(SUFFIX) dgeql2.$(SUFFIX) dgeqlf.$(SUFFIX) \
|
||||
+ dgeqp3.$(SUFFIX) dgeqpf.$(SUFFIX) dgeqr2.$(SUFFIX) dgeqrf.$(SUFFIX) dgerfs.$(SUFFIX) dgerq2.$(SUFFIX) dgerqf.$(SUFFIX) \
|
||||
+ dgesc2.$(SUFFIX) dgesdd.$(SUFFIX) dgesvd.$(SUFFIX) dgesvx.$(SUFFIX) dgetc2.$(SUFFIX) \
|
||||
+ dgetri.$(SUFFIX) \
|
||||
+ dggbak.$(SUFFIX) dggbal.$(SUFFIX) dgges.$(SUFFIX) dggesx.$(SUFFIX) dggev.$(SUFFIX) dggevx.$(SUFFIX) \
|
||||
+ dggglm.$(SUFFIX) dgghrd.$(SUFFIX) dgglse.$(SUFFIX) dggqrf.$(SUFFIX) \
|
||||
+ dggrqf.$(SUFFIX) dggsvd.$(SUFFIX) dggsvp.$(SUFFIX) dgtcon.$(SUFFIX) dgtrfs.$(SUFFIX) dgtsv.$(SUFFIX) \
|
||||
+ dgtsvx.$(SUFFIX) dgttrf.$(SUFFIX) dgttrs.$(SUFFIX) dgtts2.$(SUFFIX) dhgeqz.$(SUFFIX) \
|
||||
+ dhsein.$(SUFFIX) dhseqr.$(SUFFIX) dlabrd.$(SUFFIX) dlacon.$(SUFFIX) dlacn2.$(SUFFIX) \
|
||||
+ dlaein.$(SUFFIX) dlaexc.$(SUFFIX) dlag2.$(SUFFIX) dlags2.$(SUFFIX) dlagtm.$(SUFFIX) dlagv2.$(SUFFIX) dlahqr.$(SUFFIX) \
|
||||
+ dlahrd.$(SUFFIX) dlahr2.$(SUFFIX) dlaic1.$(SUFFIX) dlaln2.$(SUFFIX) dlals0.$(SUFFIX) dlalsa.$(SUFFIX) dlalsd.$(SUFFIX) \
|
||||
+ dlangb.$(SUFFIX) dlange.$(SUFFIX) dlangt.$(SUFFIX) dlanhs.$(SUFFIX) dlansb.$(SUFFIX) dlansp.$(SUFFIX) \
|
||||
+ dlansy.$(SUFFIX) dlantb.$(SUFFIX) dlantp.$(SUFFIX) dlantr.$(SUFFIX) dlanv2.$(SUFFIX) \
|
||||
+ dlapll.$(SUFFIX) dlapmt.$(SUFFIX) \
|
||||
+ dlaqgb.$(SUFFIX) dlaqge.$(SUFFIX) dlaqp2.$(SUFFIX) dlaqps.$(SUFFIX) dlaqsb.$(SUFFIX) dlaqsp.$(SUFFIX) dlaqsy.$(SUFFIX) \
|
||||
+ dlaqr0.$(SUFFIX) dlaqr1.$(SUFFIX) dlaqr2.$(SUFFIX) dlaqr3.$(SUFFIX) dlaqr4.$(SUFFIX) dlaqr5.$(SUFFIX) \
|
||||
+ dlaqtr.$(SUFFIX) dlar1v.$(SUFFIX) dlar2v.$(SUFFIX) \
|
||||
+ dlarf.$(SUFFIX) dlarfb.$(SUFFIX) dlarfg.$(SUFFIX) dlarft.$(SUFFIX) dlarfx.$(SUFFIX) dlargv.$(SUFFIX) \
|
||||
+ dlarrv.$(SUFFIX) dlartv.$(SUFFIX) \
|
||||
+ dlarz.$(SUFFIX) dlarzb.$(SUFFIX) dlarzt.$(SUFFIX) dlasy2.$(SUFFIX) dlasyf.$(SUFFIX) \
|
||||
+ dlatbs.$(SUFFIX) dlatdf.$(SUFFIX) dlatps.$(SUFFIX) dlatrd.$(SUFFIX) dlatrs.$(SUFFIX) dlatrz.$(SUFFIX) dlatzm.$(SUFFIX) \
|
||||
+ dopgtr.$(SUFFIX) dopmtr.$(SUFFIX) dorg2l.$(SUFFIX) dorg2r.$(SUFFIX) \
|
||||
+ dorgbr.$(SUFFIX) dorghr.$(SUFFIX) dorgl2.$(SUFFIX) dorglq.$(SUFFIX) dorgql.$(SUFFIX) dorgqr.$(SUFFIX) dorgr2.$(SUFFIX) \
|
||||
+ dorgrq.$(SUFFIX) dorgtr.$(SUFFIX) dorm2l.$(SUFFIX) dorm2r.$(SUFFIX) \
|
||||
+ dormbr.$(SUFFIX) dormhr.$(SUFFIX) dorml2.$(SUFFIX) dormlq.$(SUFFIX) dormql.$(SUFFIX) dormqr.$(SUFFIX) dormr2.$(SUFFIX) \
|
||||
+ dormr3.$(SUFFIX) dormrq.$(SUFFIX) dormrz.$(SUFFIX) dormtr.$(SUFFIX) dpbcon.$(SUFFIX) dpbequ.$(SUFFIX) dpbrfs.$(SUFFIX) \
|
||||
+ dpbstf.$(SUFFIX) dpbsv.$(SUFFIX) dpbsvx.$(SUFFIX) \
|
||||
+ dpbtf2.$(SUFFIX) dpbtrf.$(SUFFIX) dpbtrs.$(SUFFIX) dpocon.$(SUFFIX) dpoequ.$(SUFFIX) dporfs.$(SUFFIX) dposv.$(SUFFIX) \
|
||||
+ dposvx.$(SUFFIX) dpotrs.$(SUFFIX) dppcon.$(SUFFIX) dppequ.$(SUFFIX) \
|
||||
+ dpprfs.$(SUFFIX) dppsv.$(SUFFIX) dppsvx.$(SUFFIX) dpptrf.$(SUFFIX) dpptri.$(SUFFIX) dpptrs.$(SUFFIX) dptcon.$(SUFFIX) \
|
||||
+ dpteqr.$(SUFFIX) dptrfs.$(SUFFIX) dptsv.$(SUFFIX) dptsvx.$(SUFFIX) dpttrs.$(SUFFIX) dptts2.$(SUFFIX) drscl.$(SUFFIX) \
|
||||
+ dsbev.$(SUFFIX) dsbevd.$(SUFFIX) dsbevx.$(SUFFIX) dsbgst.$(SUFFIX) dsbgv.$(SUFFIX) dsbgvd.$(SUFFIX) dsbgvx.$(SUFFIX) \
|
||||
+ dsbtrd.$(SUFFIX) dspcon.$(SUFFIX) dspev.$(SUFFIX) dspevd.$(SUFFIX) dspevx.$(SUFFIX) dspgst.$(SUFFIX) \
|
||||
+ dspgv.$(SUFFIX) dspgvd.$(SUFFIX) dspgvx.$(SUFFIX) dsprfs.$(SUFFIX) dspsv.$(SUFFIX) dspsvx.$(SUFFIX) dsptrd.$(SUFFIX) \
|
||||
+ dsptrf.$(SUFFIX) dsptri.$(SUFFIX) dsptrs.$(SUFFIX) dstegr.$(SUFFIX) dstein.$(SUFFIX) dstev.$(SUFFIX) dstevd.$(SUFFIX) dstevr.$(SUFFIX) \
|
||||
+ dstevx.$(SUFFIX) dsycon.$(SUFFIX) dsyev.$(SUFFIX) dsyevd.$(SUFFIX) dsyevr.$(SUFFIX) \
|
||||
+ dsyevx.$(SUFFIX) dsygs2.$(SUFFIX) dsygst.$(SUFFIX) dsygv.$(SUFFIX) dsygvd.$(SUFFIX) dsygvx.$(SUFFIX) dsyrfs.$(SUFFIX) \
|
||||
+ dsysv.$(SUFFIX) dsysvx.$(SUFFIX) \
|
||||
+ dsytd2.$(SUFFIX) dsytf2.$(SUFFIX) dsytrd.$(SUFFIX) dsytrf.$(SUFFIX) dsytri.$(SUFFIX) dsytrs.$(SUFFIX) dtbcon.$(SUFFIX) \
|
||||
+ dtbrfs.$(SUFFIX) dtbtrs.$(SUFFIX) dtgevc.$(SUFFIX) dtgex2.$(SUFFIX) dtgexc.$(SUFFIX) dtgsen.$(SUFFIX) \
|
||||
+ dtgsja.$(SUFFIX) dtgsna.$(SUFFIX) dtgsy2.$(SUFFIX) dtgsyl.$(SUFFIX) dtpcon.$(SUFFIX) dtprfs.$(SUFFIX) dtptri.$(SUFFIX) \
|
||||
+ dtptrs.$(SUFFIX) \
|
||||
+ dtrcon.$(SUFFIX) dtrevc.$(SUFFIX) dtrexc.$(SUFFIX) dtrrfs.$(SUFFIX) dtrsen.$(SUFFIX) dtrsna.$(SUFFIX) dtrsyl.$(SUFFIX) \
|
||||
+ dtrtrs.$(SUFFIX) dtzrqf.$(SUFFIX) dtzrzf.$(SUFFIX) dstemr.$(SUFFIX) \
|
||||
+ dsgesv.$(SUFFIX) dlag2s.$(SUFFIX) slag2d.$(SUFFIX)
|
||||
|
||||
ZLASRC = \
|
||||
- zbdsqr.o zgbbrd.o zgbcon.o zgbequ.o zgbrfs.o zgbsv.o zgbsvx.o \
|
||||
- zgbtf2.o zgbtrf.o zgbtrs.o zgebak.o zgebal.o zgebd2.o zgebrd.o \
|
||||
- zgecon.o zgeequ.o zgees.o zgeesx.o zgeev.o zgeevx.o \
|
||||
- zgegs.o zgegv.o zgehd2.o zgehrd.o zgelq2.o zgelqf.o \
|
||||
- zgels.o zgelsd.o zgelss.o zgelsx.o zgelsy.o zgeql2.o zgeqlf.o zgeqp3.o \
|
||||
- zgeqpf.o zgeqr2.o zgeqrf.o zgerfs.o zgerq2.o zgerqf.o \
|
||||
- zgesc2.o zgesdd.o zgesv.o zgesvd.o zgesvx.o zgetc2.o zgetf2.o zgetrf.o \
|
||||
- zgetri.o zgetrs.o \
|
||||
- zggbak.o zggbal.o zgges.o zggesx.o zggev.o zggevx.o zggglm.o \
|
||||
- zgghrd.o zgglse.o zggqrf.o zggrqf.o \
|
||||
- zggsvd.o zggsvp.o \
|
||||
- zgtcon.o zgtrfs.o zgtsv.o zgtsvx.o zgttrf.o zgttrs.o zgtts2.o zhbev.o \
|
||||
- zhbevd.o zhbevx.o zhbgst.o zhbgv.o zhbgvd.o zhbgvx.o zhbtrd.o \
|
||||
- zhecon.o zheev.o zheevd.o zheevr.o zheevx.o zhegs2.o zhegst.o \
|
||||
- zhegv.o zhegvd.o zhegvx.o zherfs.o zhesv.o zhesvx.o zhetd2.o \
|
||||
- zhetf2.o zhetrd.o \
|
||||
- zhetrf.o zhetri.o zhetrs.o zhgeqz.o zhpcon.o zhpev.o zhpevd.o \
|
||||
- zhpevx.o zhpgst.o zhpgv.o zhpgvd.o zhpgvx.o zhprfs.o zhpsv.o \
|
||||
- zhpsvx.o \
|
||||
- zhptrd.o zhptrf.o zhptri.o zhptrs.o zhsein.o zhseqr.o zlabrd.o \
|
||||
- zlacgv.o zlacon.o zlacn2.o zlacp2.o zlacpy.o zlacrm.o zlacrt.o zladiv.o \
|
||||
- zlaed0.o zlaed7.o zlaed8.o \
|
||||
- zlaein.o zlaesy.o zlaev2.o zlags2.o zlagtm.o \
|
||||
- zlahef.o zlahqr.o \
|
||||
- zlahrd.o zlahr2.o zlaic1.o zlals0.o zlalsa.o zlalsd.o zlangb.o zlange.o \
|
||||
- zlangt.o zlanhb.o \
|
||||
- zlanhe.o \
|
||||
- zlanhp.o zlanhs.o zlanht.o zlansb.o zlansp.o zlansy.o zlantb.o \
|
||||
- zlantp.o zlantr.o zlapll.o zlapmt.o zlaqgb.o zlaqge.o \
|
||||
- zlaqhb.o zlaqhe.o zlaqhp.o zlaqp2.o zlaqps.o zlaqsb.o \
|
||||
- zlaqr0.o zlaqr1.o zlaqr2.o zlaqr3.o zlaqr4.o zlaqr5.o \
|
||||
- zlaqsp.o zlaqsy.o zlar1v.o zlar2v.o zlarcm.o zlarf.o zlarfb.o \
|
||||
- zlarfg.o zlarft.o \
|
||||
- zlarfx.o zlargv.o zlarnv.o zlarrv.o zlartg.o zlartv.o \
|
||||
- zlarz.o zlarzb.o zlarzt.o zlascl.o zlaset.o zlasr.o \
|
||||
- zlassq.o zlaswp.o zlasyf.o \
|
||||
- zlatbs.o zlatdf.o zlatps.o zlatrd.o zlatrs.o zlatrz.o zlatzm.o zlauu2.o \
|
||||
- zlauum.o zpbcon.o zpbequ.o zpbrfs.o zpbstf.o zpbsv.o \
|
||||
- zpbsvx.o zpbtf2.o zpbtrf.o zpbtrs.o zpocon.o zpoequ.o zporfs.o \
|
||||
- zposv.o zposvx.o zpotf2.o zpotrf.o zpotri.o zpotrs.o zppcon.o \
|
||||
- zppequ.o zpprfs.o zppsv.o zppsvx.o zpptrf.o zpptri.o zpptrs.o \
|
||||
- zptcon.o zpteqr.o zptrfs.o zptsv.o zptsvx.o zpttrf.o zpttrs.o zptts2.o \
|
||||
- zrot.o zspcon.o zspmv.o zspr.o zsprfs.o zspsv.o \
|
||||
- zspsvx.o zsptrf.o zsptri.o zsptrs.o zdrscl.o zstedc.o \
|
||||
- zstegr.o zstein.o zsteqr.o zsycon.o zsymv.o \
|
||||
- zsyr.o zsyrfs.o zsysv.o zsysvx.o zsytf2.o zsytrf.o zsytri.o \
|
||||
- zsytrs.o ztbcon.o ztbrfs.o ztbtrs.o ztgevc.o ztgex2.o \
|
||||
- ztgexc.o ztgsen.o ztgsja.o ztgsna.o ztgsy2.o ztgsyl.o ztpcon.o \
|
||||
- ztprfs.o ztptri.o \
|
||||
- ztptrs.o ztrcon.o ztrevc.o ztrexc.o ztrrfs.o ztrsen.o ztrsna.o \
|
||||
- ztrsyl.o ztrti2.o ztrtri.o ztrtrs.o ztzrqf.o ztzrzf.o zung2l.o \
|
||||
- zung2r.o zungbr.o zunghr.o zungl2.o zunglq.o zungql.o zungqr.o zungr2.o \
|
||||
- zungrq.o zungtr.o zunm2l.o zunm2r.o zunmbr.o zunmhr.o zunml2.o \
|
||||
- zunmlq.o zunmql.o zunmqr.o zunmr2.o zunmr3.o zunmrq.o zunmrz.o \
|
||||
- zunmtr.o zupgtr.o \
|
||||
- zupmtr.o izmax1.o dzsum1.o zstemr.o \
|
||||
- zcgesv.o zlag2c.o clag2z.o
|
||||
+ zbdsqr.$(SUFFIX) zgbbrd.$(SUFFIX) zgbcon.$(SUFFIX) zgbequ.$(SUFFIX) zgbrfs.$(SUFFIX) zgbsv.$(SUFFIX) zgbsvx.$(SUFFIX) \
|
||||
+ zgbtf2.$(SUFFIX) zgbtrf.$(SUFFIX) zgbtrs.$(SUFFIX) zgebak.$(SUFFIX) zgebal.$(SUFFIX) zgebd2.$(SUFFIX) zgebrd.$(SUFFIX) \
|
||||
+ zgecon.$(SUFFIX) zgeequ.$(SUFFIX) zgees.$(SUFFIX) zgeesx.$(SUFFIX) zgeev.$(SUFFIX) zgeevx.$(SUFFIX) \
|
||||
+ zgegs.$(SUFFIX) zgegv.$(SUFFIX) zgehd2.$(SUFFIX) zgehrd.$(SUFFIX) zgelq2.$(SUFFIX) zgelqf.$(SUFFIX) \
|
||||
+ zgels.$(SUFFIX) zgelsd.$(SUFFIX) zgelss.$(SUFFIX) zgelsx.$(SUFFIX) zgelsy.$(SUFFIX) zgeql2.$(SUFFIX) zgeqlf.$(SUFFIX) zgeqp3.$(SUFFIX) \
|
||||
+ zgeqpf.$(SUFFIX) zgeqr2.$(SUFFIX) zgeqrf.$(SUFFIX) zgerfs.$(SUFFIX) zgerq2.$(SUFFIX) zgerqf.$(SUFFIX) \
|
||||
+ zgesc2.$(SUFFIX) zgesdd.$(SUFFIX) zgesvd.$(SUFFIX) zgesvx.$(SUFFIX) zgetc2.$(SUFFIX) \
|
||||
+ zgetri.$(SUFFIX) \
|
||||
+ zggbak.$(SUFFIX) zggbal.$(SUFFIX) zgges.$(SUFFIX) zggesx.$(SUFFIX) zggev.$(SUFFIX) zggevx.$(SUFFIX) zggglm.$(SUFFIX) \
|
||||
+ zgghrd.$(SUFFIX) zgglse.$(SUFFIX) zggqrf.$(SUFFIX) zggrqf.$(SUFFIX) \
|
||||
+ zggsvd.$(SUFFIX) zggsvp.$(SUFFIX) \
|
||||
+ zgtcon.$(SUFFIX) zgtrfs.$(SUFFIX) zgtsv.$(SUFFIX) zgtsvx.$(SUFFIX) zgttrf.$(SUFFIX) zgttrs.$(SUFFIX) zgtts2.$(SUFFIX) zhbev.$(SUFFIX) \
|
||||
+ zhbevd.$(SUFFIX) zhbevx.$(SUFFIX) zhbgst.$(SUFFIX) zhbgv.$(SUFFIX) zhbgvd.$(SUFFIX) zhbgvx.$(SUFFIX) zhbtrd.$(SUFFIX) \
|
||||
+ zhecon.$(SUFFIX) zheev.$(SUFFIX) zheevd.$(SUFFIX) zheevr.$(SUFFIX) zheevx.$(SUFFIX) zhegs2.$(SUFFIX) zhegst.$(SUFFIX) \
|
||||
+ zhegv.$(SUFFIX) zhegvd.$(SUFFIX) zhegvx.$(SUFFIX) zherfs.$(SUFFIX) zhesv.$(SUFFIX) zhesvx.$(SUFFIX) zhetd2.$(SUFFIX) \
|
||||
+ zhetf2.$(SUFFIX) zhetrd.$(SUFFIX) \
|
||||
+ zhetrf.$(SUFFIX) zhetri.$(SUFFIX) zhetrs.$(SUFFIX) zhgeqz.$(SUFFIX) zhpcon.$(SUFFIX) zhpev.$(SUFFIX) zhpevd.$(SUFFIX) \
|
||||
+ zhpevx.$(SUFFIX) zhpgst.$(SUFFIX) zhpgv.$(SUFFIX) zhpgvd.$(SUFFIX) zhpgvx.$(SUFFIX) zhprfs.$(SUFFIX) zhpsv.$(SUFFIX) \
|
||||
+ zhpsvx.$(SUFFIX) \
|
||||
+ zhptrd.$(SUFFIX) zhptrf.$(SUFFIX) zhptri.$(SUFFIX) zhptrs.$(SUFFIX) zhsein.$(SUFFIX) zhseqr.$(SUFFIX) zlabrd.$(SUFFIX) \
|
||||
+ zlacgv.$(SUFFIX) zlacon.$(SUFFIX) zlacn2.$(SUFFIX) zlacp2.$(SUFFIX) zlacpy.$(SUFFIX) zlacrm.$(SUFFIX) zlacrt.$(SUFFIX) zladiv.$(SUFFIX) \
|
||||
+ zlaed0.$(SUFFIX) zlaed7.$(SUFFIX) zlaed8.$(SUFFIX) \
|
||||
+ zlaein.$(SUFFIX) zlaesy.$(SUFFIX) zlaev2.$(SUFFIX) zlags2.$(SUFFIX) zlagtm.$(SUFFIX) \
|
||||
+ zlahef.$(SUFFIX) zlahqr.$(SUFFIX) \
|
||||
+ zlahrd.$(SUFFIX) zlahr2.$(SUFFIX) zlaic1.$(SUFFIX) zlals0.$(SUFFIX) zlalsa.$(SUFFIX) zlalsd.$(SUFFIX) zlangb.$(SUFFIX) zlange.$(SUFFIX) \
|
||||
+ zlangt.$(SUFFIX) zlanhb.$(SUFFIX) \
|
||||
+ zlanhe.$(SUFFIX) \
|
||||
+ zlanhp.$(SUFFIX) zlanhs.$(SUFFIX) zlanht.$(SUFFIX) zlansb.$(SUFFIX) zlansp.$(SUFFIX) zlansy.$(SUFFIX) zlantb.$(SUFFIX) \
|
||||
+ zlantp.$(SUFFIX) zlantr.$(SUFFIX) zlapll.$(SUFFIX) zlapmt.$(SUFFIX) zlaqgb.$(SUFFIX) zlaqge.$(SUFFIX) \
|
||||
+ zlaqhb.$(SUFFIX) zlaqhe.$(SUFFIX) zlaqhp.$(SUFFIX) zlaqp2.$(SUFFIX) zlaqps.$(SUFFIX) zlaqsb.$(SUFFIX) \
|
||||
+ zlaqr0.$(SUFFIX) zlaqr1.$(SUFFIX) zlaqr2.$(SUFFIX) zlaqr3.$(SUFFIX) zlaqr4.$(SUFFIX) zlaqr5.$(SUFFIX) \
|
||||
+ zlaqsp.$(SUFFIX) zlaqsy.$(SUFFIX) zlar1v.$(SUFFIX) zlar2v.$(SUFFIX) zlarcm.$(SUFFIX) zlarf.$(SUFFIX) zlarfb.$(SUFFIX) \
|
||||
+ zlarfg.$(SUFFIX) zlarft.$(SUFFIX) \
|
||||
+ zlarfx.$(SUFFIX) zlargv.$(SUFFIX) zlarnv.$(SUFFIX) zlarrv.$(SUFFIX) zlartg.$(SUFFIX) zlartv.$(SUFFIX) \
|
||||
+ zlarz.$(SUFFIX) zlarzb.$(SUFFIX) zlarzt.$(SUFFIX) zlascl.$(SUFFIX) zlaset.$(SUFFIX) zlasr.$(SUFFIX) \
|
||||
+ zlassq.$(SUFFIX) zlasyf.$(SUFFIX) \
|
||||
+ zlatbs.$(SUFFIX) zlatdf.$(SUFFIX) zlatps.$(SUFFIX) zlatrd.$(SUFFIX) zlatrs.$(SUFFIX) zlatrz.$(SUFFIX) zlatzm.$(SUFFIX) \
|
||||
+ zpbcon.$(SUFFIX) zpbequ.$(SUFFIX) zpbrfs.$(SUFFIX) zpbstf.$(SUFFIX) zpbsv.$(SUFFIX) \
|
||||
+ zpbsvx.$(SUFFIX) zpbtf2.$(SUFFIX) zpbtrf.$(SUFFIX) zpbtrs.$(SUFFIX) zpocon.$(SUFFIX) zpoequ.$(SUFFIX) zporfs.$(SUFFIX) \
|
||||
+ zposv.$(SUFFIX) zposvx.$(SUFFIX) zpotrs.$(SUFFIX) zppcon.$(SUFFIX) \
|
||||
+ zppequ.$(SUFFIX) zpprfs.$(SUFFIX) zppsv.$(SUFFIX) zppsvx.$(SUFFIX) zpptrf.$(SUFFIX) zpptri.$(SUFFIX) zpptrs.$(SUFFIX) \
|
||||
+ zptcon.$(SUFFIX) zpteqr.$(SUFFIX) zptrfs.$(SUFFIX) zptsv.$(SUFFIX) zptsvx.$(SUFFIX) zpttrf.$(SUFFIX) zpttrs.$(SUFFIX) zptts2.$(SUFFIX) \
|
||||
+ zrot.$(SUFFIX) zspcon.$(SUFFIX) zsprfs.$(SUFFIX) zspsv.$(SUFFIX) \
|
||||
+ zspsvx.$(SUFFIX) zsptrf.$(SUFFIX) zsptri.$(SUFFIX) zsptrs.$(SUFFIX) zdrscl.$(SUFFIX) zstedc.$(SUFFIX) \
|
||||
+ zstegr.$(SUFFIX) zstein.$(SUFFIX) zsteqr.$(SUFFIX) zsycon.$(SUFFIX) \
|
||||
+ zsyrfs.$(SUFFIX) zsysv.$(SUFFIX) zsysvx.$(SUFFIX) zsytf2.$(SUFFIX) zsytrf.$(SUFFIX) zsytri.$(SUFFIX) \
|
||||
+ zsytrs.$(SUFFIX) ztbcon.$(SUFFIX) ztbrfs.$(SUFFIX) ztbtrs.$(SUFFIX) ztgevc.$(SUFFIX) ztgex2.$(SUFFIX) \
|
||||
+ ztgexc.$(SUFFIX) ztgsen.$(SUFFIX) ztgsja.$(SUFFIX) ztgsna.$(SUFFIX) ztgsy2.$(SUFFIX) ztgsyl.$(SUFFIX) ztpcon.$(SUFFIX) \
|
||||
+ ztprfs.$(SUFFIX) ztptri.$(SUFFIX) \
|
||||
+ ztptrs.$(SUFFIX) ztrcon.$(SUFFIX) ztrevc.$(SUFFIX) ztrexc.$(SUFFIX) ztrrfs.$(SUFFIX) ztrsen.$(SUFFIX) ztrsna.$(SUFFIX) \
|
||||
+ ztrsyl.$(SUFFIX) ztrtrs.$(SUFFIX) ztzrqf.$(SUFFIX) ztzrzf.$(SUFFIX) zung2l.$(SUFFIX) \
|
||||
+ zung2r.$(SUFFIX) zungbr.$(SUFFIX) zunghr.$(SUFFIX) zungl2.$(SUFFIX) zunglq.$(SUFFIX) zungql.$(SUFFIX) zungqr.$(SUFFIX) zungr2.$(SUFFIX) \
|
||||
+ zungrq.$(SUFFIX) zungtr.$(SUFFIX) zunm2l.$(SUFFIX) zunm2r.$(SUFFIX) zunmbr.$(SUFFIX) zunmhr.$(SUFFIX) zunml2.$(SUFFIX) \
|
||||
+ zunmlq.$(SUFFIX) zunmql.$(SUFFIX) zunmqr.$(SUFFIX) zunmr2.$(SUFFIX) zunmr3.$(SUFFIX) zunmrq.$(SUFFIX) zunmrz.$(SUFFIX) \
|
||||
+ zunmtr.$(SUFFIX) zupgtr.$(SUFFIX) \
|
||||
+ zupmtr.$(SUFFIX) izmax1.$(SUFFIX) dzsum1.$(SUFFIX) zstemr.$(SUFFIX) \
|
||||
+ zcgesv.$(SUFFIX) zlag2c.$(SUFFIX) clag2z.$(SUFFIX)
|
||||
|
||||
all: ../$(LAPACKLIB)
|
||||
|
||||
+lapack_prof: ../$(LAPACKLIB_P)
|
||||
+
|
||||
ALLOBJ=$(SLASRC) $(DLASRC) $(CLASRC) $(ZLASRC) $(SCLAUX) $(DZLAUX) \
|
||||
$(ALLAUX)
|
||||
|
||||
+ALLOBJ_P = $(ALLOBJ:.$(SUFFIX)=.$(PSUFFIX))
|
||||
+
|
||||
../$(LAPACKLIB): $(ALLOBJ)
|
||||
$(ARCH) $(ARCHFLAGS) $@ $(ALLOBJ)
|
||||
$(RANLIB) $@
|
||||
|
||||
+../$(LAPACKLIB_P): $(ALLOBJ_P)
|
||||
+ $(ARCH) $(ARCHFLAGS) $@ $(ALLOBJ_P)
|
||||
+ $(RANLIB) $@
|
||||
+
|
||||
single: $(SLASRC) $(ALLAUX) $(SCLAUX)
|
||||
$(ARCH) $(ARCHFLAGS) ../$(LAPACKLIB) $(SLASRC) $(ALLAUX) \
|
||||
$(SCLAUX)
|
||||
@@ -317,6 +325,7 @@
|
||||
$(DZLAUX)
|
||||
$(RANLIB) ../$(LAPACKLIB)
|
||||
|
||||
+
|
||||
$(ALLAUX): $(FRC)
|
||||
$(SCLAUX): $(FRC)
|
||||
$(DZLAUX): $(FRC)
|
||||
@@ -329,11 +338,16 @@
|
||||
@FRC=$(FRC)
|
||||
|
||||
clean:
|
||||
- rm -f *.o
|
||||
+ rm -f *.$(SUFFIX) *.$(PSUFFIX)
|
||||
|
||||
-.f.o:
|
||||
+%.$(SUFFIX): %.f
|
||||
$(FORTRAN) $(OPTS) -c $< -o $@
|
||||
|
||||
-slaruv.o: slaruv.f ; $(FORTRAN) $(NOOPT) -c $< -o $@
|
||||
-dlaruv.o: dlaruv.f ; $(FORTRAN) $(NOOPT) -c $< -o $@
|
||||
+%.$(PSUFFIX): %.f
|
||||
+ $(FORTRAN) $(POPTS) -c $< -o $@
|
||||
+
|
||||
+slaruv.$(SUFFIX): slaruv.f ; $(FORTRAN) $(NOOPT) -O0 -c $< -o $@
|
||||
+dlaruv.$(SUFFIX): dlaruv.f ; $(FORTRAN) $(NOOPT) -O0 -c $< -o $@
|
||||
|
||||
+slaruv.$(PSUFFIX): slaruv.f ; $(FORTRAN) $(PNOOPT) -O0 -c $< -o $@
|
||||
+dlaruv.$(PSUFFIX): dlaruv.f ; $(FORTRAN) $(PNOOPT) -O0 -c $< -o $@
|
||||
diff -ruN lapack-3.1.1.old/TESTING/EIG/Makefile lapack-3.1.1/TESTING/EIG/Makefile
|
||||
--- lapack-3.1.1.old/TESTING/EIG/Makefile 2007-02-20 15:33:03.000000000 -0600
|
||||
+++ lapack-3.1.1/TESTING/EIG/Makefile 2009-12-16 14:40:35.000000000 -0600
|
||||
@@ -78,7 +78,7 @@
|
||||
cget35.o cget36.o cget37.o cget38.o cget51.o cget52.o \
|
||||
cget54.o cglmts.o cgqrts.o cgrqts.o cgsvts.o \
|
||||
chbt21.o chet21.o chet22.o chpt21.o chst01.o \
|
||||
- clarfy.o clarhs.o clatm4.o clctes.o clctsx.o clsets.o csbmv.o \
|
||||
+ clarfy.o clarhs.o clatm4.o clctes.o clctsx.o clsets.o \
|
||||
csgt01.o cslect.o \
|
||||
cstt21.o cstt22.o cunt01.o cunt03.o
|
||||
|
||||
@@ -115,7 +115,7 @@
|
||||
zget35.o zget36.o zget37.o zget38.o zget51.o zget52.o \
|
||||
zget54.o zglmts.o zgqrts.o zgrqts.o zgsvts.o \
|
||||
zhbt21.o zhet21.o zhet22.o zhpt21.o zhst01.o \
|
||||
- zlarfy.o zlarhs.o zlatm4.o zlctes.o zlctsx.o zlsets.o zsbmv.o \
|
||||
+ zlarfy.o zlarhs.o zlatm4.o zlctes.o zlctsx.o zlsets.o \
|
||||
zsgt01.o zslect.o \
|
||||
zstt21.o zstt22.o zunt01.o zunt03.o
|
||||
|
||||
@@ -129,22 +129,22 @@
|
||||
../xeigtsts: $(SEIGTST) $(SCIGTST) $(AEIGTST) ; \
|
||||
$(LOADER) $(LOADOPTS) -o $@ \
|
||||
$(SEIGTST) $(SCIGTST) $(AEIGTST) ../../$(TMGLIB) \
|
||||
- ../../$(LAPACKLIB) $(BLASLIB)
|
||||
+ ../../$(LAPACKLIB) $(BLASLIB) $(CEXTRALIB)
|
||||
|
||||
../xeigtstc: $(CEIGTST) $(SCIGTST) $(AEIGTST) ; \
|
||||
$(LOADER) $(LOADOPTS) -o $@ \
|
||||
$(CEIGTST) $(SCIGTST) $(AEIGTST) ../../$(TMGLIB) \
|
||||
- ../../$(LAPACKLIB) $(BLASLIB)
|
||||
+ ../../$(LAPACKLIB) $(BLASLIB) $(CEXTRALIB)
|
||||
|
||||
../xeigtstd: $(DEIGTST) $(DZIGTST) $(AEIGTST) ; \
|
||||
$(LOADER) $(LOADOPTS) -o $@ \
|
||||
$(DEIGTST) $(DZIGTST) $(AEIGTST) ../../$(TMGLIB) \
|
||||
- ../../$(LAPACKLIB) $(BLASLIB)
|
||||
+ ../../$(LAPACKLIB) $(BLASLIB) $(CEXTRALIB)
|
||||
|
||||
../xeigtstz: $(ZEIGTST) $(DZIGTST) $(AEIGTST) ; \
|
||||
$(LOADER) $(LOADOPTS) -o $@ \
|
||||
$(ZEIGTST) $(DZIGTST) $(AEIGTST) ../../$(TMGLIB) \
|
||||
- ../../$(LAPACKLIB) $(BLASLIB)
|
||||
+ ../../$(LAPACKLIB) $(BLASLIB) $(CEXTRALIB)
|
||||
|
||||
$(AEIGTST): $(FRC)
|
||||
$(SCIGTST): $(FRC)
|
||||
diff -ruN lapack-3.1.1.old/TESTING/LIN/Makefile lapack-3.1.1/TESTING/LIN/Makefile
|
||||
--- lapack-3.1.1.old/TESTING/LIN/Makefile 2007-02-20 15:33:03.000000000 -0600
|
||||
+++ lapack-3.1.1/TESTING/LIN/Makefile 2009-12-16 14:40:35.000000000 -0600
|
||||
@@ -97,7 +97,7 @@
|
||||
cqpt01.o cqrt01.o cqrt02.o cqrt03.o cqrt11.o \
|
||||
cqrt12.o cqrt13.o cqrt14.o cqrt15.o cqrt16.o \
|
||||
cqrt17.o crqt01.o crqt02.o crqt03.o crzt01.o crzt02.o \
|
||||
- csbmv.o cspt01.o \
|
||||
+ cspt01.o \
|
||||
cspt02.o cspt03.o csyt01.o csyt02.o csyt03.o \
|
||||
ctbt02.o ctbt03.o ctbt05.o ctbt06.o ctpt01.o \
|
||||
ctpt02.o ctpt03.o ctpt05.o ctpt06.o ctrt01.o \
|
||||
@@ -159,7 +159,7 @@
|
||||
zqpt01.o zqrt01.o zqrt02.o zqrt03.o zqrt11.o \
|
||||
zqrt12.o zqrt13.o zqrt14.o zqrt15.o zqrt16.o \
|
||||
zqrt17.o zrqt01.o zrqt02.o zrqt03.o zrzt01.o zrzt02.o \
|
||||
- zsbmv.o zspt01.o \
|
||||
+ zspt01.o \
|
||||
zspt02.o zspt03.o zsyt01.o zsyt02.o zsyt03.o \
|
||||
ztbt02.o ztbt03.o ztbt05.o ztbt06.o ztpt01.o \
|
||||
ztpt02.o ztpt03.o ztpt05.o ztpt06.o ztrt01.o \
|
||||
@@ -176,7 +176,7 @@
|
||||
zdrvab.o zerrab.o zget08.o \
|
||||
alaerh.o alahd.o aladhd.o alareq.o \
|
||||
chkxer.o zget02.o zlarhs.o zlatb4.o \
|
||||
- zsbmv.o xerbla.o
|
||||
+ xerbla.o
|
||||
|
||||
all: single double complex complex16 proto-double proto-complex16
|
||||
|
||||
@@ -190,27 +190,27 @@
|
||||
|
||||
../xlintsts : $(ALINTST) $(SLINTST) $(SCLNTST)
|
||||
$(LOADER) $(LOADOPTS) $(ALINTST) $(SCLNTST) $(SLINTST) \
|
||||
- ../../$(TMGLIB) ../../$(LAPACKLIB) $(BLASLIB) -o $@
|
||||
+ ../../$(TMGLIB) ../../$(LAPACKLIB) $(BLASLIB) -o $@ $(CEXTRALIB)
|
||||
|
||||
../xlintstc : $(ALINTST) $(CLINTST) $(SCLNTST)
|
||||
$(LOADER) $(LOADOPTS) $(ALINTST) $(SCLNTST) $(CLINTST) \
|
||||
- ../../$(TMGLIB) ../../$(LAPACKLIB) $(BLASLIB) -o $@
|
||||
+ ../../$(TMGLIB) ../../$(LAPACKLIB) $(BLASLIB) -o $@ $(CEXTRALIB)
|
||||
|
||||
../xlintstd : $(ALINTST) $(DLINTST) $(DZLNTST)
|
||||
$(LOADER) $(LOADOPTS) $(ALINTST) $(DZLNTST) $(DLINTST) \
|
||||
- ../../$(TMGLIB) ../../$(LAPACKLIB) $(BLASLIB) -o $@
|
||||
+ ../../$(TMGLIB) ../../$(LAPACKLIB) $(BLASLIB) -o $@ $(CEXTRALIB)
|
||||
|
||||
../xlintstz : $(ALINTST) $(ZLINTST) $(DZLNTST)
|
||||
$(LOADER) $(LOADOPTS) $(ALINTST) $(DZLNTST) $(ZLINTST) \
|
||||
- ../../$(TMGLIB) ../../$(LAPACKLIB) $(BLASLIB) -o $@
|
||||
+ ../../$(TMGLIB) ../../$(LAPACKLIB) $(BLASLIB) -o $@ $(CEXTRALIB)
|
||||
|
||||
../xlintstds : $(DSLINTST)
|
||||
$(LOADER) $(LOADOPTS) $(DSLINTST) \
|
||||
- ../../$(TMGLIB) ../../$(LAPACKLIB) $(BLASLIB) -o $@
|
||||
+ ../../$(TMGLIB) ../../$(LAPACKLIB) $(BLASLIB) -o $@ $(CEXTRALIB)
|
||||
|
||||
../xlintstzc : $(ZCLINTST)
|
||||
$(LOADER) $(LOADOPTS) $(ZCLINTST) \
|
||||
- ../../$(TMGLIB) ../../$(LAPACKLIB) $(BLASLIB) -o $@
|
||||
+ ../../$(TMGLIB) ../../$(LAPACKLIB) $(BLASLIB) -o $@ $(CEXTRALIB)
|
||||
|
||||
$(ALINTST): $(FRC)
|
||||
$(SCLNTST): $(FRC)
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,932 +0,0 @@
|
||||
diff -ruN lapack-3.4.1.old/INSTALL/Makefile lapack-3.4.1/INSTALL/Makefile
|
||||
--- lapack-3.4.1.old/INSTALL/Makefile 2011-10-01 04:37:03 +0200
|
||||
+++ lapack-3.4.1/INSTALL/Makefile 2012-04-22 21:48:48 +0200
|
||||
@@ -27,7 +27,7 @@
|
||||
$(LOADER) $(LOADOPTS) -o testversion ilaver.o LAPACK_version.o
|
||||
|
||||
clean:
|
||||
- rm -f *.o
|
||||
+ rm -f *.o test*
|
||||
.f.o:
|
||||
$(FORTRAN) $(OPTS) -c $< -o $@
|
||||
|
||||
diff -ruN lapack-3.4.1.old/Makefile lapack-3.4.1/Makefile
|
||||
--- lapack-3.4.1.old/Makefile 2012-04-13 20:13:07 +0200
|
||||
+++ lapack-3.4.1/Makefile 2012-04-22 21:48:07 +0200
|
||||
@@ -20,9 +20,12 @@
|
||||
blaslib:
|
||||
( cd BLAS/SRC; $(MAKE) )
|
||||
|
||||
-lapacklib: lapack_install
|
||||
+lapacklib:
|
||||
( cd SRC; $(MAKE) )
|
||||
|
||||
+lapack_prof:
|
||||
+ ( cd SRC; $(MAKE) lapack_prof)
|
||||
+
|
||||
lapackelib: lapacklib
|
||||
( cd lapacke; $(MAKE) )
|
||||
|
||||
diff -ruN lapack-3.4.1.old/SRC/Makefile lapack-3.4.1/SRC/Makefile
|
||||
--- lapack-3.4.1.old/SRC/Makefile 2012-04-02 21:06:36 +0200
|
||||
+++ lapack-3.4.1/SRC/Makefile 2012-04-22 21:40:21 +0200
|
||||
@@ -54,363 +54,371 @@
|
||||
#
|
||||
#######################################################################
|
||||
|
||||
-ALLAUX = ilaenv.o ieeeck.o lsamen.o xerbla.o xerbla_array.o iparmq.o \
|
||||
- ilaprec.o ilatrans.o ilauplo.o iladiag.o chla_transtype.o \
|
||||
- ../INSTALL/ilaver.o ../INSTALL/lsame.o ../INSTALL/slamch.o
|
||||
+ALLAUX = ilaenv.$(SUFFIX) ieeeck.$(SUFFIX) lsamen.$(SUFFIX) xerbla_array.$(SUFFIX) iparmq.$(SUFFIX) \
|
||||
+ ilaprec.$(SUFFIX) ilatrans.$(SUFFIX) ilauplo.$(SUFFIX) iladiag.$(SUFFIX) chla_transtype.$(SUFFIX) \
|
||||
+ ../INSTALL/ilaver.$(SUFFIX)
|
||||
|
||||
SCLAUX = \
|
||||
- sbdsdc.o \
|
||||
- sbdsqr.o sdisna.o slabad.o slacpy.o sladiv.o slae2.o slaebz.o \
|
||||
- slaed0.o slaed1.o slaed2.o slaed3.o slaed4.o slaed5.o slaed6.o \
|
||||
- slaed7.o slaed8.o slaed9.o slaeda.o slaev2.o slagtf.o \
|
||||
- slagts.o slamrg.o slanst.o \
|
||||
- slapy2.o slapy3.o slarnv.o \
|
||||
- slarra.o slarrb.o slarrc.o slarrd.o slarre.o slarrf.o slarrj.o \
|
||||
- slarrk.o slarrr.o slaneg.o \
|
||||
- slartg.o slaruv.o slas2.o slascl.o \
|
||||
- slasd0.o slasd1.o slasd2.o slasd3.o slasd4.o slasd5.o slasd6.o \
|
||||
- slasd7.o slasd8.o slasda.o slasdq.o slasdt.o \
|
||||
- slaset.o slasq1.o slasq2.o slasq3.o slasq4.o slasq5.o slasq6.o \
|
||||
- slasr.o slasrt.o slassq.o slasv2.o spttrf.o sstebz.o sstedc.o \
|
||||
- ssteqr.o ssterf.o slaisnan.o sisnan.o \
|
||||
- slartgp.o slartgs.o \
|
||||
- ../INSTALL/second_$(TIMER).o
|
||||
+ sbdsdc.$(SUFFIX) \
|
||||
+ sbdsqr.$(SUFFIX) sdisna.$(SUFFIX) slabad.$(SUFFIX) slacpy.$(SUFFIX) sladiv.$(SUFFIX) slae2.$(SUFFIX) slaebz.$(SUFFIX) \
|
||||
+ slaed0.$(SUFFIX) slaed1.$(SUFFIX) slaed2.$(SUFFIX) slaed3.$(SUFFIX) slaed4.$(SUFFIX) slaed5.$(SUFFIX) slaed6.$(SUFFIX) \
|
||||
+ slaed7.$(SUFFIX) slaed8.$(SUFFIX) slaed9.$(SUFFIX) slaeda.$(SUFFIX) slaev2.$(SUFFIX) slagtf.$(SUFFIX) \
|
||||
+ slagts.$(SUFFIX) slamrg.$(SUFFIX) slanst.$(SUFFIX) \
|
||||
+ slapy2.$(SUFFIX) slapy3.$(SUFFIX) slarnv.$(SUFFIX) \
|
||||
+ slarra.$(SUFFIX) slarrb.$(SUFFIX) slarrc.$(SUFFIX) slarrd.$(SUFFIX) slarre.$(SUFFIX) slarrf.$(SUFFIX) slarrj.$(SUFFIX) \
|
||||
+ slarrk.$(SUFFIX) slarrr.$(SUFFIX) slaneg.$(SUFFIX) \
|
||||
+ slartg.$(SUFFIX) slaruv.$(SUFFIX) slas2.$(SUFFIX) slascl.$(SUFFIX) \
|
||||
+ slasd0.$(SUFFIX) slasd1.$(SUFFIX) slasd2.$(SUFFIX) slasd3.$(SUFFIX) slasd4.$(SUFFIX) slasd5.$(SUFFIX) slasd6.$(SUFFIX) \
|
||||
+ slasd7.$(SUFFIX) slasd8.$(SUFFIX) slasda.$(SUFFIX) slasdq.$(SUFFIX) slasdt.$(SUFFIX) \
|
||||
+ slaset.$(SUFFIX) slasq1.$(SUFFIX) slasq2.$(SUFFIX) slasq3.$(SUFFIX) slasq4.$(SUFFIX) slasq5.$(SUFFIX) slasq6.$(SUFFIX) \
|
||||
+ slasr.$(SUFFIX) slasrt.$(SUFFIX) slassq.$(SUFFIX) slasv2.$(SUFFIX) spttrf.$(SUFFIX) sstebz.$(SUFFIX) sstedc.$(SUFFIX) \
|
||||
+ ssteqr.$(SUFFIX) ssterf.$(SUFFIX) slaisnan.$(SUFFIX) sisnan.$(SUFFIX) \
|
||||
+ slartgp.$(SUFFIX) slartgs.$(SUFFIX) \
|
||||
+ ../INSTALL/second_$(TIMER).$(SUFFIX)
|
||||
|
||||
DZLAUX = \
|
||||
- dbdsdc.o \
|
||||
- dbdsqr.o ddisna.o dlabad.o dlacpy.o dladiv.o dlae2.o dlaebz.o \
|
||||
- dlaed0.o dlaed1.o dlaed2.o dlaed3.o dlaed4.o dlaed5.o dlaed6.o \
|
||||
- dlaed7.o dlaed8.o dlaed9.o dlaeda.o dlaev2.o dlagtf.o \
|
||||
- dlagts.o dlamrg.o dlanst.o \
|
||||
- dlapy2.o dlapy3.o dlarnv.o \
|
||||
- dlarra.o dlarrb.o dlarrc.o dlarrd.o dlarre.o dlarrf.o dlarrj.o \
|
||||
- dlarrk.o dlarrr.o dlaneg.o \
|
||||
- dlartg.o dlaruv.o dlas2.o dlascl.o \
|
||||
- dlasd0.o dlasd1.o dlasd2.o dlasd3.o dlasd4.o dlasd5.o dlasd6.o \
|
||||
- dlasd7.o dlasd8.o dlasda.o dlasdq.o dlasdt.o \
|
||||
- dlaset.o dlasq1.o dlasq2.o dlasq3.o dlasq4.o dlasq5.o dlasq6.o \
|
||||
- dlasr.o dlasrt.o dlassq.o dlasv2.o dpttrf.o dstebz.o dstedc.o \
|
||||
- dsteqr.o dsterf.o dlaisnan.o disnan.o \
|
||||
- dlartgp.o dlartgs.o \
|
||||
- ../INSTALL/dlamch.o ../INSTALL/dsecnd_$(TIMER).o
|
||||
+ dbdsdc.$(SUFFIX) \
|
||||
+ dbdsqr.$(SUFFIX) ddisna.$(SUFFIX) dlabad.$(SUFFIX) dlacpy.$(SUFFIX) dladiv.$(SUFFIX) dlae2.$(SUFFIX) dlaebz.$(SUFFIX) \
|
||||
+ dlaed0.$(SUFFIX) dlaed1.$(SUFFIX) dlaed2.$(SUFFIX) dlaed3.$(SUFFIX) dlaed4.$(SUFFIX) dlaed5.$(SUFFIX) dlaed6.$(SUFFIX) \
|
||||
+ dlaed7.$(SUFFIX) dlaed8.$(SUFFIX) dlaed9.$(SUFFIX) dlaeda.$(SUFFIX) dlaev2.$(SUFFIX) dlagtf.$(SUFFIX) \
|
||||
+ dlagts.$(SUFFIX) dlamrg.$(SUFFIX) dlanst.$(SUFFIX) \
|
||||
+ dlapy2.$(SUFFIX) dlapy3.$(SUFFIX) dlarnv.$(SUFFIX) \
|
||||
+ dlarra.$(SUFFIX) dlarrb.$(SUFFIX) dlarrc.$(SUFFIX) dlarrd.$(SUFFIX) dlarre.$(SUFFIX) dlarrf.$(SUFFIX) dlarrj.$(SUFFIX) \
|
||||
+ dlarrk.$(SUFFIX) dlarrr.$(SUFFIX) dlaneg.$(SUFFIX) \
|
||||
+ dlartg.$(SUFFIX) dlaruv.$(SUFFIX) dlas2.$(SUFFIX) dlascl.$(SUFFIX) \
|
||||
+ dlasd0.$(SUFFIX) dlasd1.$(SUFFIX) dlasd2.$(SUFFIX) dlasd3.$(SUFFIX) dlasd4.$(SUFFIX) dlasd5.$(SUFFIX) dlasd6.$(SUFFIX) \
|
||||
+ dlasd7.$(SUFFIX) dlasd8.$(SUFFIX) dlasda.$(SUFFIX) dlasdq.$(SUFFIX) dlasdt.$(SUFFIX) \
|
||||
+ dlaset.$(SUFFIX) dlasq1.$(SUFFIX) dlasq2.$(SUFFIX) dlasq3.$(SUFFIX) dlasq4.$(SUFFIX) dlasq5.$(SUFFIX) dlasq6.$(SUFFIX) \
|
||||
+ dlasr.$(SUFFIX) dlasrt.$(SUFFIX) dlassq.$(SUFFIX) dlasv2.$(SUFFIX) dpttrf.$(SUFFIX) dstebz.$(SUFFIX) dstedc.$(SUFFIX) \
|
||||
+ dsteqr.$(SUFFIX) dsterf.$(SUFFIX) dlaisnan.$(SUFFIX) disnan.$(SUFFIX) \
|
||||
+ dlartgp.$(SUFFIX) dlartgs.$(SUFFIX) \
|
||||
+ ../INSTALL/dsecnd_$(TIMER).$(SUFFIX)
|
||||
|
||||
SLASRC = \
|
||||
- sgbbrd.o sgbcon.o sgbequ.o sgbrfs.o sgbsv.o \
|
||||
- sgbsvx.o sgbtf2.o sgbtrf.o sgbtrs.o sgebak.o sgebal.o sgebd2.o \
|
||||
- sgebrd.o sgecon.o sgeequ.o sgees.o sgeesx.o sgeev.o sgeevx.o \
|
||||
- sgegs.o sgegv.o sgehd2.o sgehrd.o sgelq2.o sgelqf.o \
|
||||
- sgels.o sgelsd.o sgelss.o sgelsx.o sgelsy.o sgeql2.o sgeqlf.o \
|
||||
- sgeqp3.o sgeqpf.o sgeqr2.o sgeqr2p.o sgeqrf.o sgeqrfp.o sgerfs.o \
|
||||
- sgerq2.o sgerqf.o sgesc2.o sgesdd.o sgesv.o sgesvd.o sgesvx.o \
|
||||
- sgetc2.o sgetf2.o sgetri.o \
|
||||
- sggbak.o sggbal.o sgges.o sggesx.o sggev.o sggevx.o \
|
||||
- sggglm.o sgghrd.o sgglse.o sggqrf.o \
|
||||
- sggrqf.o sggsvd.o sggsvp.o sgtcon.o sgtrfs.o sgtsv.o \
|
||||
- sgtsvx.o sgttrf.o sgttrs.o sgtts2.o shgeqz.o \
|
||||
- shsein.o shseqr.o slabrd.o slacon.o slacn2.o \
|
||||
- slaein.o slaexc.o slag2.o slags2.o slagtm.o slagv2.o slahqr.o \
|
||||
- slahrd.o slahr2.o slaic1.o slaln2.o slals0.o slalsa.o slalsd.o \
|
||||
- slangb.o slange.o slangt.o slanhs.o slansb.o slansp.o \
|
||||
- slansy.o slantb.o slantp.o slantr.o slanv2.o \
|
||||
- slapll.o slapmt.o \
|
||||
- slaqgb.o slaqge.o slaqp2.o slaqps.o slaqsb.o slaqsp.o slaqsy.o \
|
||||
- slaqr0.o slaqr1.o slaqr2.o slaqr3.o slaqr4.o slaqr5.o \
|
||||
- slaqtr.o slar1v.o slar2v.o ilaslr.o ilaslc.o \
|
||||
- slarf.o slarfb.o slarfg.o slarfgp.o slarft.o slarfx.o slargv.o \
|
||||
- slarrv.o slartv.o \
|
||||
- slarz.o slarzb.o slarzt.o slaswp.o slasy2.o slasyf.o \
|
||||
- slatbs.o slatdf.o slatps.o slatrd.o slatrs.o slatrz.o slatzm.o \
|
||||
- slauu2.o slauum.o sopgtr.o sopmtr.o sorg2l.o sorg2r.o \
|
||||
- sorgbr.o sorghr.o sorgl2.o sorglq.o sorgql.o sorgqr.o sorgr2.o \
|
||||
- sorgrq.o sorgtr.o sorm2l.o sorm2r.o \
|
||||
- sormbr.o sormhr.o sorml2.o sormlq.o sormql.o sormqr.o sormr2.o \
|
||||
- sormr3.o sormrq.o sormrz.o sormtr.o spbcon.o spbequ.o spbrfs.o \
|
||||
- spbstf.o spbsv.o spbsvx.o \
|
||||
- spbtf2.o spbtrf.o spbtrs.o spocon.o spoequ.o sporfs.o sposv.o \
|
||||
- sposvx.o spotf2.o spotri.o spstrf.o spstf2.o \
|
||||
- sppcon.o sppequ.o \
|
||||
- spprfs.o sppsv.o sppsvx.o spptrf.o spptri.o spptrs.o sptcon.o \
|
||||
- spteqr.o sptrfs.o sptsv.o sptsvx.o spttrs.o sptts2.o srscl.o \
|
||||
- ssbev.o ssbevd.o ssbevx.o ssbgst.o ssbgv.o ssbgvd.o ssbgvx.o \
|
||||
- ssbtrd.o sspcon.o sspev.o sspevd.o sspevx.o sspgst.o \
|
||||
- sspgv.o sspgvd.o sspgvx.o ssprfs.o sspsv.o sspsvx.o ssptrd.o \
|
||||
- ssptrf.o ssptri.o ssptrs.o sstegr.o sstein.o sstev.o sstevd.o sstevr.o \
|
||||
- sstevx.o \
|
||||
- ssycon.o ssyev.o ssyevd.o ssyevr.o ssyevx.o ssygs2.o \
|
||||
- ssygst.o ssygv.o ssygvd.o ssygvx.o ssyrfs.o ssysv.o ssysvx.o \
|
||||
- ssytd2.o ssytf2.o ssytrd.o ssytrf.o ssytri.o ssytri2.o ssytri2x.o \
|
||||
- ssyswapr.o ssytrs.o ssytrs2.o ssyconv.o \
|
||||
- stbcon.o \
|
||||
- stbrfs.o stbtrs.o stgevc.o stgex2.o stgexc.o stgsen.o \
|
||||
- stgsja.o stgsna.o stgsy2.o stgsyl.o stpcon.o stprfs.o stptri.o \
|
||||
- stptrs.o \
|
||||
- strcon.o strevc.o strexc.o strrfs.o strsen.o strsna.o strsyl.o \
|
||||
- strti2.o strtri.o strtrs.o stzrqf.o stzrzf.o sstemr.o \
|
||||
- slansf.o spftrf.o spftri.o spftrs.o ssfrk.o stfsm.o stftri.o stfttp.o \
|
||||
- stfttr.o stpttf.o stpttr.o strttf.o strttp.o \
|
||||
- sgejsv.o sgesvj.o sgsvj0.o sgsvj1.o \
|
||||
- sgeequb.o ssyequb.o spoequb.o sgbequb.o \
|
||||
- sbbcsd.o slapmr.o sorbdb.o sorcsd.o \
|
||||
- sgeqrt.o sgeqrt2.o sgeqrt3.o sgemqrt.o \
|
||||
- stpqrt.o stpqrt2.o stpmqrt.o stprfb.o
|
||||
+ sgbbrd.$(SUFFIX) sgbcon.$(SUFFIX) sgbequ.$(SUFFIX) sgbrfs.$(SUFFIX) sgbsv.$(SUFFIX) \
|
||||
+ sgbsvx.$(SUFFIX) sgbtf2.$(SUFFIX) sgbtrf.$(SUFFIX) sgbtrs.$(SUFFIX) sgebak.$(SUFFIX) sgebal.$(SUFFIX) sgebd2.$(SUFFIX) \
|
||||
+ sgebrd.$(SUFFIX) sgecon.$(SUFFIX) sgeequ.$(SUFFIX) sgees.$(SUFFIX) sgeesx.$(SUFFIX) sgeev.$(SUFFIX) sgeevx.$(SUFFIX) \
|
||||
+ sgegs.$(SUFFIX) sgegv.$(SUFFIX) sgehd2.$(SUFFIX) sgehrd.$(SUFFIX) sgelq2.$(SUFFIX) sgelqf.$(SUFFIX) \
|
||||
+ sgels.$(SUFFIX) sgelsd.$(SUFFIX) sgelss.$(SUFFIX) sgelsx.$(SUFFIX) sgelsy.$(SUFFIX) sgeql2.$(SUFFIX) sgeqlf.$(SUFFIX) \
|
||||
+ sgeqp3.$(SUFFIX) sgeqpf.$(SUFFIX) sgeqr2.$(SUFFIX) sgeqr2p.$(SUFFIX) sgeqrf.$(SUFFIX) sgeqrfp.$(SUFFIX) sgerfs.$(SUFFIX) \
|
||||
+ sgerq2.$(SUFFIX) sgerqf.$(SUFFIX) sgesc2.$(SUFFIX) sgesdd.$(SUFFIX) sgesv.$(SUFFIX) sgesvd.$(SUFFIX) sgesvx.$(SUFFIX) \
|
||||
+ sgetc2.$(SUFFIX) sgetri.$(SUFFIX) \
|
||||
+ sggbak.$(SUFFIX) sggbal.$(SUFFIX) sgges.$(SUFFIX) sggesx.$(SUFFIX) sggev.$(SUFFIX) sggevx.$(SUFFIX) \
|
||||
+ sggglm.$(SUFFIX) sgghrd.$(SUFFIX) sgglse.$(SUFFIX) sggqrf.$(SUFFIX) \
|
||||
+ sggrqf.$(SUFFIX) sggsvd.$(SUFFIX) sggsvp.$(SUFFIX) sgtcon.$(SUFFIX) sgtrfs.$(SUFFIX) sgtsv.$(SUFFIX) \
|
||||
+ sgtsvx.$(SUFFIX) sgttrf.$(SUFFIX) sgttrs.$(SUFFIX) sgtts2.$(SUFFIX) shgeqz.$(SUFFIX) \
|
||||
+ shsein.$(SUFFIX) shseqr.$(SUFFIX) slabrd.$(SUFFIX) slacon.$(SUFFIX) slacn2.$(SUFFIX) \
|
||||
+ slaein.$(SUFFIX) slaexc.$(SUFFIX) slag2.$(SUFFIX) slags2.$(SUFFIX) slagtm.$(SUFFIX) slagv2.$(SUFFIX) slahqr.$(SUFFIX) \
|
||||
+ slahrd.$(SUFFIX) slahr2.$(SUFFIX) slaic1.$(SUFFIX) slaln2.$(SUFFIX) slals0.$(SUFFIX) slalsa.$(SUFFIX) slalsd.$(SUFFIX) \
|
||||
+ slangb.$(SUFFIX) slange.$(SUFFIX) slangt.$(SUFFIX) slanhs.$(SUFFIX) slansb.$(SUFFIX) slansp.$(SUFFIX) \
|
||||
+ slansy.$(SUFFIX) slantb.$(SUFFIX) slantp.$(SUFFIX) slantr.$(SUFFIX) slanv2.$(SUFFIX) \
|
||||
+ slapll.$(SUFFIX) slapmt.$(SUFFIX) \
|
||||
+ slaqgb.$(SUFFIX) slaqge.$(SUFFIX) slaqp2.$(SUFFIX) slaqps.$(SUFFIX) slaqsb.$(SUFFIX) slaqsp.$(SUFFIX) slaqsy.$(SUFFIX) \
|
||||
+ slaqr0.$(SUFFIX) slaqr1.$(SUFFIX) slaqr2.$(SUFFIX) slaqr3.$(SUFFIX) slaqr4.$(SUFFIX) slaqr5.$(SUFFIX) \
|
||||
+ slaqtr.$(SUFFIX) slar1v.$(SUFFIX) slar2v.$(SUFFIX) ilaslr.$(SUFFIX) ilaslc.$(SUFFIX) \
|
||||
+ slarf.$(SUFFIX) slarfb.$(SUFFIX) slarfg.$(SUFFIX) slarfgp.$(SUFFIX) slarft.$(SUFFIX) slarfx.$(SUFFIX) slargv.$(SUFFIX) \
|
||||
+ slarrv.$(SUFFIX) slartv.$(SUFFIX) \
|
||||
+ slarz.$(SUFFIX) slarzb.$(SUFFIX) slarzt.$(SUFFIX) slasy2.$(SUFFIX) slasyf.$(SUFFIX) \
|
||||
+ slatbs.$(SUFFIX) slatdf.$(SUFFIX) slatps.$(SUFFIX) slatrd.$(SUFFIX) slatrs.$(SUFFIX) slatrz.$(SUFFIX) slatzm.$(SUFFIX) \
|
||||
+ sopgtr.$(SUFFIX) sopmtr.$(SUFFIX) sorg2l.$(SUFFIX) sorg2r.$(SUFFIX) \
|
||||
+ sorgbr.$(SUFFIX) sorghr.$(SUFFIX) sorgl2.$(SUFFIX) sorglq.$(SUFFIX) sorgql.$(SUFFIX) sorgqr.$(SUFFIX) sorgr2.$(SUFFIX) \
|
||||
+ sorgrq.$(SUFFIX) sorgtr.$(SUFFIX) sorm2l.$(SUFFIX) sorm2r.$(SUFFIX) \
|
||||
+ sormbr.$(SUFFIX) sormhr.$(SUFFIX) sorml2.$(SUFFIX) sormlq.$(SUFFIX) sormql.$(SUFFIX) sormqr.$(SUFFIX) sormr2.$(SUFFIX) \
|
||||
+ sormr3.$(SUFFIX) sormrq.$(SUFFIX) sormrz.$(SUFFIX) sormtr.$(SUFFIX) spbcon.$(SUFFIX) spbequ.$(SUFFIX) spbrfs.$(SUFFIX) \
|
||||
+ spbstf.$(SUFFIX) spbsv.$(SUFFIX) spbsvx.$(SUFFIX) \
|
||||
+ spbtf2.$(SUFFIX) spbtrf.$(SUFFIX) spbtrs.$(SUFFIX) spocon.$(SUFFIX) spoequ.$(SUFFIX) sporfs.$(SUFFIX) sposv.$(SUFFIX) \
|
||||
+ sposvx.$(SUFFIX) spotri.$(SUFFIX) spstrf.$(SUFFIX) spstf2.$(SUFFIX) \
|
||||
+ sppcon.$(SUFFIX) sppequ.$(SUFFIX) \
|
||||
+ spprfs.$(SUFFIX) sppsv.$(SUFFIX) sppsvx.$(SUFFIX) spptrf.$(SUFFIX) spptri.$(SUFFIX) spptrs.$(SUFFIX) sptcon.$(SUFFIX) \
|
||||
+ spteqr.$(SUFFIX) sptrfs.$(SUFFIX) sptsv.$(SUFFIX) sptsvx.$(SUFFIX) spttrs.$(SUFFIX) sptts2.$(SUFFIX) srscl.$(SUFFIX) \
|
||||
+ ssbev.$(SUFFIX) ssbevd.$(SUFFIX) ssbevx.$(SUFFIX) ssbgst.$(SUFFIX) ssbgv.$(SUFFIX) ssbgvd.$(SUFFIX) ssbgvx.$(SUFFIX) \
|
||||
+ ssbtrd.$(SUFFIX) sspcon.$(SUFFIX) sspev.$(SUFFIX) sspevd.$(SUFFIX) sspevx.$(SUFFIX) sspgst.$(SUFFIX) \
|
||||
+ sspgv.$(SUFFIX) sspgvd.$(SUFFIX) sspgvx.$(SUFFIX) ssprfs.$(SUFFIX) sspsv.$(SUFFIX) sspsvx.$(SUFFIX) ssptrd.$(SUFFIX) \
|
||||
+ ssptrf.$(SUFFIX) ssptri.$(SUFFIX) ssptrs.$(SUFFIX) sstegr.$(SUFFIX) sstein.$(SUFFIX) sstev.$(SUFFIX) sstevd.$(SUFFIX) sstevr.$(SUFFIX) \
|
||||
+ sstevx.$(SUFFIX) \
|
||||
+ ssycon.$(SUFFIX) ssyev.$(SUFFIX) ssyevd.$(SUFFIX) ssyevr.$(SUFFIX) ssyevx.$(SUFFIX) ssygs2.$(SUFFIX) \
|
||||
+ ssygst.$(SUFFIX) ssygv.$(SUFFIX) ssygvd.$(SUFFIX) ssygvx.$(SUFFIX) ssyrfs.$(SUFFIX) ssysv.$(SUFFIX) ssysvx.$(SUFFIX) \
|
||||
+ ssytd2.$(SUFFIX) ssytf2.$(SUFFIX) ssytrd.$(SUFFIX) ssytrf.$(SUFFIX) ssytri.$(SUFFIX) ssytri2.$(SUFFIX) ssytri2x.$(SUFFIX) \
|
||||
+ ssyswapr.$(SUFFIX) ssytrs.$(SUFFIX) ssytrs2.$(SUFFIX) ssyconv.$(SUFFIX) \
|
||||
+ stbcon.$(SUFFIX) \
|
||||
+ stbrfs.$(SUFFIX) stbtrs.$(SUFFIX) stgevc.$(SUFFIX) stgex2.$(SUFFIX) stgexc.$(SUFFIX) stgsen.$(SUFFIX) \
|
||||
+ stgsja.$(SUFFIX) stgsna.$(SUFFIX) stgsy2.$(SUFFIX) stgsyl.$(SUFFIX) stpcon.$(SUFFIX) stprfs.$(SUFFIX) stptri.$(SUFFIX) \
|
||||
+ stptrs.$(SUFFIX) \
|
||||
+ strcon.$(SUFFIX) strevc.$(SUFFIX) strexc.$(SUFFIX) strrfs.$(SUFFIX) strsen.$(SUFFIX) strsna.$(SUFFIX) strsyl.$(SUFFIX) \
|
||||
+ strtrs.$(SUFFIX) stzrqf.$(SUFFIX) stzrzf.$(SUFFIX) sstemr.$(SUFFIX) \
|
||||
+ slansf.$(SUFFIX) spftrf.$(SUFFIX) spftri.$(SUFFIX) spftrs.$(SUFFIX) ssfrk.$(SUFFIX) stfsm.$(SUFFIX) stftri.$(SUFFIX) stfttp.$(SUFFIX) \
|
||||
+ stfttr.$(SUFFIX) stpttf.$(SUFFIX) stpttr.$(SUFFIX) strttf.$(SUFFIX) strttp.$(SUFFIX) \
|
||||
+ sgejsv.$(SUFFIX) sgesvj.$(SUFFIX) sgsvj0.$(SUFFIX) sgsvj1.$(SUFFIX) \
|
||||
+ sgeequb.$(SUFFIX) ssyequb.$(SUFFIX) spoequb.$(SUFFIX) sgbequb.$(SUFFIX) \
|
||||
+ sbbcsd.$(SUFFIX) slapmr.$(SUFFIX) sorbdb.$(SUFFIX) sorcsd.$(SUFFIX) \
|
||||
+ sgeqrt.$(SUFFIX) sgeqrt2.$(SUFFIX) sgeqrt3.$(SUFFIX) sgemqrt.$(SUFFIX) \
|
||||
+ stpqrt.$(SUFFIX) stpqrt2.$(SUFFIX) stpmqrt.$(SUFFIX) stprfb.$(SUFFIX)
|
||||
|
||||
-DSLASRC = spotrs.o sgetrs.o spotrf.o sgetrf.o
|
||||
+DSLASRC = spotrs.$(SUFFIX)
|
||||
|
||||
ifdef USEXBLAS
|
||||
-SXLASRC = sgesvxx.o sgerfsx.o sla_gerfsx_extended.o sla_geamv.o \
|
||||
- sla_gercond.o sla_gerpvgrw.o ssysvxx.o ssyrfsx.o \
|
||||
- sla_syrfsx_extended.o sla_syamv.o sla_syrcond.o sla_syrpvgrw.o \
|
||||
- sposvxx.o sporfsx.o sla_porfsx_extended.o sla_porcond.o \
|
||||
- sla_porpvgrw.o sgbsvxx.o sgbrfsx.o sla_gbrfsx_extended.o \
|
||||
- sla_gbamv.o sla_gbrcond.o sla_gbrpvgrw.o sla_lin_berr.o slarscl2.o \
|
||||
- slascl2.o sla_wwaddw.o
|
||||
+SXLASRC = sgesvxx.$(SUFFIX) sgerfsx.$(SUFFIX) sla_gerfsx_extended.$(SUFFIX) sla_geamv.$(SUFFIX) \
|
||||
+ sla_gercond.$(SUFFIX) sla_gerpvgrw.$(SUFFIX) ssysvxx.$(SUFFIX) ssyrfsx.$(SUFFIX) \
|
||||
+ sla_syrfsx_extended.$(SUFFIX) sla_syamv.$(SUFFIX) sla_syrcond.$(SUFFIX) sla_syrpvgrw.$(SUFFIX) \
|
||||
+ sposvxx.$(SUFFIX) sporfsx.$(SUFFIX) sla_porfsx_extended.$(SUFFIX) sla_porcond.$(SUFFIX) \
|
||||
+ sla_porpvgrw.$(SUFFIX) sgbsvxx.$(SUFFIX) sgbrfsx.$(SUFFIX) sla_gbrfsx_extended.$(SUFFIX) \
|
||||
+ sla_gbamv.$(SUFFIX) sla_gbrcond.$(SUFFIX) sla_gbrpvgrw.$(SUFFIX) sla_lin_berr.$(SUFFIX) slarscl2.$(SUFFIX) \
|
||||
+ slascl2.$(SUFFIX) sla_wwaddw.$(SUFFIX)
|
||||
endif
|
||||
|
||||
CLASRC = \
|
||||
- cbdsqr.o cgbbrd.o cgbcon.o cgbequ.o cgbrfs.o cgbsv.o cgbsvx.o \
|
||||
- cgbtf2.o cgbtrf.o cgbtrs.o cgebak.o cgebal.o cgebd2.o cgebrd.o \
|
||||
- cgecon.o cgeequ.o cgees.o cgeesx.o cgeev.o cgeevx.o \
|
||||
- cgegs.o cgegv.o cgehd2.o cgehrd.o cgelq2.o cgelqf.o \
|
||||
- cgels.o cgelsd.o cgelss.o cgelsx.o cgelsy.o cgeql2.o cgeqlf.o cgeqp3.o \
|
||||
- cgeqpf.o cgeqr2.o cgeqr2p.o cgeqrf.o cgeqrfp.o cgerfs.o \
|
||||
- cgerq2.o cgerqf.o cgesc2.o cgesdd.o cgesv.o cgesvd.o \
|
||||
- cgesvx.o cgetc2.o cgetf2.o cgetri.o \
|
||||
- cggbak.o cggbal.o cgges.o cggesx.o cggev.o cggevx.o cggglm.o \
|
||||
- cgghrd.o cgglse.o cggqrf.o cggrqf.o \
|
||||
- cggsvd.o cggsvp.o \
|
||||
- cgtcon.o cgtrfs.o cgtsv.o cgtsvx.o cgttrf.o cgttrs.o cgtts2.o chbev.o \
|
||||
- chbevd.o chbevx.o chbgst.o chbgv.o chbgvd.o chbgvx.o chbtrd.o \
|
||||
- checon.o cheev.o cheevd.o cheevr.o cheevx.o chegs2.o chegst.o \
|
||||
- chegv.o chegvd.o chegvx.o cherfs.o chesv.o chesvx.o chetd2.o \
|
||||
- chetf2.o chetrd.o \
|
||||
- chetrf.o chetri.o chetri2.o chetri2x.o cheswapr.o \
|
||||
- chetrs.o chetrs2.o chgeqz.o chpcon.o chpev.o chpevd.o \
|
||||
- chpevx.o chpgst.o chpgv.o chpgvd.o chpgvx.o chprfs.o chpsv.o \
|
||||
- chpsvx.o \
|
||||
- chptrd.o chptrf.o chptri.o chptrs.o chsein.o chseqr.o clabrd.o \
|
||||
- clacgv.o clacon.o clacn2.o clacp2.o clacpy.o clacrm.o clacrt.o cladiv.o \
|
||||
- claed0.o claed7.o claed8.o \
|
||||
- claein.o claesy.o claev2.o clags2.o clagtm.o \
|
||||
- clahef.o clahqr.o \
|
||||
- clahrd.o clahr2.o claic1.o clals0.o clalsa.o clalsd.o clangb.o clange.o clangt.o \
|
||||
- clanhb.o clanhe.o \
|
||||
- clanhp.o clanhs.o clanht.o clansb.o clansp.o clansy.o clantb.o \
|
||||
- clantp.o clantr.o clapll.o clapmt.o clarcm.o claqgb.o claqge.o \
|
||||
- claqhb.o claqhe.o claqhp.o claqp2.o claqps.o claqsb.o \
|
||||
- claqr0.o claqr1.o claqr2.o claqr3.o claqr4.o claqr5.o \
|
||||
- claqsp.o claqsy.o clar1v.o clar2v.o ilaclr.o ilaclc.o \
|
||||
- clarf.o clarfb.o clarfg.o clarft.o clarfgp.o \
|
||||
- clarfx.o clargv.o clarnv.o clarrv.o clartg.o clartv.o \
|
||||
- clarz.o clarzb.o clarzt.o clascl.o claset.o clasr.o classq.o \
|
||||
- claswp.o clasyf.o clatbs.o clatdf.o clatps.o clatrd.o clatrs.o clatrz.o \
|
||||
- clatzm.o clauu2.o clauum.o cpbcon.o cpbequ.o cpbrfs.o cpbstf.o cpbsv.o \
|
||||
- cpbsvx.o cpbtf2.o cpbtrf.o cpbtrs.o cpocon.o cpoequ.o cporfs.o \
|
||||
- cposv.o cposvx.o cpotf2.o cpotri.o cpstrf.o cpstf2.o \
|
||||
- cppcon.o cppequ.o cpprfs.o cppsv.o cppsvx.o cpptrf.o cpptri.o cpptrs.o \
|
||||
- cptcon.o cpteqr.o cptrfs.o cptsv.o cptsvx.o cpttrf.o cpttrs.o cptts2.o \
|
||||
- crot.o cspcon.o cspmv.o cspr.o csprfs.o cspsv.o \
|
||||
- cspsvx.o csptrf.o csptri.o csptrs.o csrscl.o cstedc.o \
|
||||
- cstegr.o cstein.o csteqr.o \
|
||||
- csycon.o csymv.o \
|
||||
- csyr.o csyrfs.o csysv.o csysvx.o csytf2.o csytrf.o csytri.o csytri2.o csytri2x.o \
|
||||
- csyswapr.o csytrs.o csytrs2.o csyconv.o \
|
||||
- ctbcon.o ctbrfs.o ctbtrs.o ctgevc.o ctgex2.o \
|
||||
- ctgexc.o ctgsen.o ctgsja.o ctgsna.o ctgsy2.o ctgsyl.o ctpcon.o \
|
||||
- ctprfs.o ctptri.o \
|
||||
- ctptrs.o ctrcon.o ctrevc.o ctrexc.o ctrrfs.o ctrsen.o ctrsna.o \
|
||||
- ctrsyl.o ctrti2.o ctrtri.o ctrtrs.o ctzrqf.o ctzrzf.o cung2l.o cung2r.o \
|
||||
- cungbr.o cunghr.o cungl2.o cunglq.o cungql.o cungqr.o cungr2.o \
|
||||
- cungrq.o cungtr.o cunm2l.o cunm2r.o cunmbr.o cunmhr.o cunml2.o \
|
||||
- cunmlq.o cunmql.o cunmqr.o cunmr2.o cunmr3.o cunmrq.o cunmrz.o \
|
||||
- cunmtr.o cupgtr.o cupmtr.o icmax1.o scsum1.o cstemr.o \
|
||||
- chfrk.o ctfttp.o clanhf.o cpftrf.o cpftri.o cpftrs.o ctfsm.o ctftri.o \
|
||||
- ctfttr.o ctpttf.o ctpttr.o ctrttf.o ctrttp.o \
|
||||
- cgeequb.o cgbequb.o csyequb.o cpoequb.o cheequb.o \
|
||||
- cbbcsd.o clapmr.o cunbdb.o cuncsd.o \
|
||||
- cgeqrt.o cgeqrt2.o cgeqrt3.o cgemqrt.o \
|
||||
- ctpqrt.o ctpqrt2.o ctpmqrt.o ctprfb.o
|
||||
+ cbdsqr.$(SUFFIX) cgbbrd.$(SUFFIX) cgbcon.$(SUFFIX) cgbequ.$(SUFFIX) cgbrfs.$(SUFFIX) cgbsv.$(SUFFIX) cgbsvx.$(SUFFIX) \
|
||||
+ cgbtf2.$(SUFFIX) cgbtrf.$(SUFFIX) cgbtrs.$(SUFFIX) cgebak.$(SUFFIX) cgebal.$(SUFFIX) cgebd2.$(SUFFIX) cgebrd.$(SUFFIX) \
|
||||
+ cgecon.$(SUFFIX) cgeequ.$(SUFFIX) cgees.$(SUFFIX) cgeesx.$(SUFFIX) cgeev.$(SUFFIX) cgeevx.$(SUFFIX) \
|
||||
+ cgegs.$(SUFFIX) cgegv.$(SUFFIX) cgehd2.$(SUFFIX) cgehrd.$(SUFFIX) cgelq2.$(SUFFIX) cgelqf.$(SUFFIX) \
|
||||
+ cgels.$(SUFFIX) cgelsd.$(SUFFIX) cgelss.$(SUFFIX) cgelsx.$(SUFFIX) cgelsy.$(SUFFIX) cgeql2.$(SUFFIX) cgeqlf.$(SUFFIX) cgeqp3.$(SUFFIX) \
|
||||
+ cgeqpf.$(SUFFIX) cgeqr2.$(SUFFIX) cgeqr2p.$(SUFFIX) cgeqrf.$(SUFFIX) cgeqrfp.$(SUFFIX) cgerfs.$(SUFFIX) \
|
||||
+ cgerq2.$(SUFFIX) cgerqf.$(SUFFIX) cgesc2.$(SUFFIX) cgesdd.$(SUFFIX) cgesv.$(SUFFIX) cgesvd.$(SUFFIX) \
|
||||
+ cgesvx.$(SUFFIX) cgetc2.$(SUFFIX) cgetri.$(SUFFIX) \
|
||||
+ cggbak.$(SUFFIX) cggbal.$(SUFFIX) cgges.$(SUFFIX) cggesx.$(SUFFIX) cggev.$(SUFFIX) cggevx.$(SUFFIX) cggglm.$(SUFFIX) \
|
||||
+ cgghrd.$(SUFFIX) cgglse.$(SUFFIX) cggqrf.$(SUFFIX) cggrqf.$(SUFFIX) \
|
||||
+ cggsvd.$(SUFFIX) cggsvp.$(SUFFIX) \
|
||||
+ cgtcon.$(SUFFIX) cgtrfs.$(SUFFIX) cgtsv.$(SUFFIX) cgtsvx.$(SUFFIX) cgttrf.$(SUFFIX) cgttrs.$(SUFFIX) cgtts2.$(SUFFIX) chbev.$(SUFFIX) \
|
||||
+ chbevd.$(SUFFIX) chbevx.$(SUFFIX) chbgst.$(SUFFIX) chbgv.$(SUFFIX) chbgvd.$(SUFFIX) chbgvx.$(SUFFIX) chbtrd.$(SUFFIX) \
|
||||
+ checon.$(SUFFIX) cheev.$(SUFFIX) cheevd.$(SUFFIX) cheevr.$(SUFFIX) cheevx.$(SUFFIX) chegs2.$(SUFFIX) chegst.$(SUFFIX) \
|
||||
+ chegv.$(SUFFIX) chegvd.$(SUFFIX) chegvx.$(SUFFIX) cherfs.$(SUFFIX) chesv.$(SUFFIX) chesvx.$(SUFFIX) chetd2.$(SUFFIX) \
|
||||
+ chetf2.$(SUFFIX) chetrd.$(SUFFIX) \
|
||||
+ chetrf.$(SUFFIX) chetri.$(SUFFIX) chetri2.$(SUFFIX) chetri2x.$(SUFFIX) cheswapr.$(SUFFIX) \
|
||||
+ chetrs.$(SUFFIX) chetrs2.$(SUFFIX) chgeqz.$(SUFFIX) chpcon.$(SUFFIX) chpev.$(SUFFIX) chpevd.$(SUFFIX) \
|
||||
+ chpevx.$(SUFFIX) chpgst.$(SUFFIX) chpgv.$(SUFFIX) chpgvd.$(SUFFIX) chpgvx.$(SUFFIX) chprfs.$(SUFFIX) chpsv.$(SUFFIX) \
|
||||
+ chpsvx.$(SUFFIX) \
|
||||
+ chptrd.$(SUFFIX) chptrf.$(SUFFIX) chptri.$(SUFFIX) chptrs.$(SUFFIX) chsein.$(SUFFIX) chseqr.$(SUFFIX) clabrd.$(SUFFIX) \
|
||||
+ clacgv.$(SUFFIX) clacon.$(SUFFIX) clacn2.$(SUFFIX) clacp2.$(SUFFIX) clacpy.$(SUFFIX) clacrm.$(SUFFIX) clacrt.$(SUFFIX) cladiv.$(SUFFIX) \
|
||||
+ claed0.$(SUFFIX) claed7.$(SUFFIX) claed8.$(SUFFIX) \
|
||||
+ claein.$(SUFFIX) claesy.$(SUFFIX) claev2.$(SUFFIX) clags2.$(SUFFIX) clagtm.$(SUFFIX) \
|
||||
+ clahef.$(SUFFIX) clahqr.$(SUFFIX) \
|
||||
+ clahrd.$(SUFFIX) clahr2.$(SUFFIX) claic1.$(SUFFIX) clals0.$(SUFFIX) clalsa.$(SUFFIX) clalsd.$(SUFFIX) clangb.$(SUFFIX) clange.$(SUFFIX) clangt.$(SUFFIX) \
|
||||
+ clanhb.$(SUFFIX) clanhe.$(SUFFIX) \
|
||||
+ clanhp.$(SUFFIX) clanhs.$(SUFFIX) clanht.$(SUFFIX) clansb.$(SUFFIX) clansp.$(SUFFIX) clansy.$(SUFFIX) clantb.$(SUFFIX) \
|
||||
+ clantp.$(SUFFIX) clantr.$(SUFFIX) clapll.$(SUFFIX) clapmt.$(SUFFIX) clarcm.$(SUFFIX) claqgb.$(SUFFIX) claqge.$(SUFFIX) \
|
||||
+ claqhb.$(SUFFIX) claqhe.$(SUFFIX) claqhp.$(SUFFIX) claqp2.$(SUFFIX) claqps.$(SUFFIX) claqsb.$(SUFFIX) \
|
||||
+ claqr0.$(SUFFIX) claqr1.$(SUFFIX) claqr2.$(SUFFIX) claqr3.$(SUFFIX) claqr4.$(SUFFIX) claqr5.$(SUFFIX) \
|
||||
+ claqsp.$(SUFFIX) claqsy.$(SUFFIX) clar1v.$(SUFFIX) clar2v.$(SUFFIX) ilaclr.$(SUFFIX) ilaclc.$(SUFFIX) \
|
||||
+ clarf.$(SUFFIX) clarfb.$(SUFFIX) clarfg.$(SUFFIX) clarft.$(SUFFIX) clarfgp.$(SUFFIX) \
|
||||
+ clarfx.$(SUFFIX) clargv.$(SUFFIX) clarnv.$(SUFFIX) clarrv.$(SUFFIX) clartg.$(SUFFIX) clartv.$(SUFFIX) \
|
||||
+ clarz.$(SUFFIX) clarzb.$(SUFFIX) clarzt.$(SUFFIX) clascl.$(SUFFIX) claset.$(SUFFIX) clasr.$(SUFFIX) classq.$(SUFFIX) \
|
||||
+ clasyf.$(SUFFIX) clatbs.$(SUFFIX) clatdf.$(SUFFIX) clatps.$(SUFFIX) clatrd.$(SUFFIX) clatrs.$(SUFFIX) clatrz.$(SUFFIX) \
|
||||
+ clatzm.$(SUFFIX) cpbcon.$(SUFFIX) cpbequ.$(SUFFIX) cpbrfs.$(SUFFIX) cpbstf.$(SUFFIX) cpbsv.$(SUFFIX) \
|
||||
+ cpbsvx.$(SUFFIX) cpbtf2.$(SUFFIX) cpbtrf.$(SUFFIX) cpbtrs.$(SUFFIX) cpocon.$(SUFFIX) cpoequ.$(SUFFIX) cporfs.$(SUFFIX) \
|
||||
+ cposv.$(SUFFIX) cposvx.$(SUFFIX) cpotri.$(SUFFIX) cpstrf.$(SUFFIX) cpstf2.$(SUFFIX) \
|
||||
+ cppcon.$(SUFFIX) cppequ.$(SUFFIX) cpprfs.$(SUFFIX) cppsv.$(SUFFIX) cppsvx.$(SUFFIX) cpptrf.$(SUFFIX) cpptri.$(SUFFIX) cpptrs.$(SUFFIX) \
|
||||
+ cptcon.$(SUFFIX) cpteqr.$(SUFFIX) cptrfs.$(SUFFIX) cptsv.$(SUFFIX) cptsvx.$(SUFFIX) cpttrf.$(SUFFIX) cpttrs.$(SUFFIX) cptts2.$(SUFFIX) \
|
||||
+ crot.$(SUFFIX) cspcon.$(SUFFIX) cspmv.$(SUFFIX) cspr.$(SUFFIX) csprfs.$(SUFFIX) cspsv.$(SUFFIX) \
|
||||
+ cspsvx.$(SUFFIX) csptrf.$(SUFFIX) csptri.$(SUFFIX) csptrs.$(SUFFIX) csrscl.$(SUFFIX) cstedc.$(SUFFIX) \
|
||||
+ cstegr.$(SUFFIX) cstein.$(SUFFIX) csteqr.$(SUFFIX) \
|
||||
+ csycon.$(SUFFIX) csymv.$(SUFFIX) \
|
||||
+ csyr.$(SUFFIX) csyrfs.$(SUFFIX) csysv.$(SUFFIX) csysvx.$(SUFFIX) csytf2.$(SUFFIX) csytrf.$(SUFFIX) csytri.$(SUFFIX) csytri2.$(SUFFIX) csytri2x.$(SUFFIX) \
|
||||
+ csyswapr.$(SUFFIX) csytrs.$(SUFFIX) csytrs2.$(SUFFIX) csyconv.$(SUFFIX) \
|
||||
+ ctbcon.$(SUFFIX) ctbrfs.$(SUFFIX) ctbtrs.$(SUFFIX) ctgevc.$(SUFFIX) ctgex2.$(SUFFIX) \
|
||||
+ ctgexc.$(SUFFIX) ctgsen.$(SUFFIX) ctgsja.$(SUFFIX) ctgsna.$(SUFFIX) ctgsy2.$(SUFFIX) ctgsyl.$(SUFFIX) ctpcon.$(SUFFIX) \
|
||||
+ ctprfs.$(SUFFIX) ctptri.$(SUFFIX) \
|
||||
+ ctptrs.$(SUFFIX) ctrcon.$(SUFFIX) ctrevc.$(SUFFIX) ctrexc.$(SUFFIX) ctrrfs.$(SUFFIX) ctrsen.$(SUFFIX) ctrsna.$(SUFFIX) \
|
||||
+ ctrsyl.$(SUFFIX) ctrtrs.$(SUFFIX) ctzrqf.$(SUFFIX) ctzrzf.$(SUFFIX) cung2l.$(SUFFIX) cung2r.$(SUFFIX) \
|
||||
+ cungbr.$(SUFFIX) cunghr.$(SUFFIX) cungl2.$(SUFFIX) cunglq.$(SUFFIX) cungql.$(SUFFIX) cungqr.$(SUFFIX) cungr2.$(SUFFIX) \
|
||||
+ cungrq.$(SUFFIX) cungtr.$(SUFFIX) cunm2l.$(SUFFIX) cunm2r.$(SUFFIX) cunmbr.$(SUFFIX) cunmhr.$(SUFFIX) cunml2.$(SUFFIX) \
|
||||
+ cunmlq.$(SUFFIX) cunmql.$(SUFFIX) cunmqr.$(SUFFIX) cunmr2.$(SUFFIX) cunmr3.$(SUFFIX) cunmrq.$(SUFFIX) cunmrz.$(SUFFIX) \
|
||||
+ cunmtr.$(SUFFIX) cupgtr.$(SUFFIX) cupmtr.$(SUFFIX) icmax1.$(SUFFIX) scsum1.$(SUFFIX) cstemr.$(SUFFIX) \
|
||||
+ chfrk.$(SUFFIX) ctfttp.$(SUFFIX) clanhf.$(SUFFIX) cpftrf.$(SUFFIX) cpftri.$(SUFFIX) cpftrs.$(SUFFIX) ctfsm.$(SUFFIX) ctftri.$(SUFFIX) \
|
||||
+ ctfttr.$(SUFFIX) ctpttf.$(SUFFIX) ctpttr.$(SUFFIX) ctrttf.$(SUFFIX) ctrttp.$(SUFFIX) \
|
||||
+ cgeequb.$(SUFFIX) cgbequb.$(SUFFIX) csyequb.$(SUFFIX) cpoequb.$(SUFFIX) cheequb.$(SUFFIX) \
|
||||
+ cbbcsd.$(SUFFIX) clapmr.$(SUFFIX) cunbdb.$(SUFFIX) cuncsd.$(SUFFIX) \
|
||||
+ cgeqrt.$(SUFFIX) cgeqrt2.$(SUFFIX) cgeqrt3.$(SUFFIX) cgemqrt.$(SUFFIX) \
|
||||
+ ctpqrt.$(SUFFIX) ctpqrt2.$(SUFFIX) ctpmqrt.$(SUFFIX) ctprfb.$(SUFFIX)
|
||||
|
||||
ifdef USEXBLAS
|
||||
-CXLASRC = cgesvxx.o cgerfsx.o cla_gerfsx_extended.o cla_geamv.o \
|
||||
- cla_gercond_c.o cla_gercond_x.o cla_gerpvgrw.o \
|
||||
- csysvxx.o csyrfsx.o cla_syrfsx_extended.o cla_syamv.o \
|
||||
- cla_syrcond_c.o cla_syrcond_x.o cla_syrpvgrw.o \
|
||||
- cposvxx.o cporfsx.o cla_porfsx_extended.o \
|
||||
- cla_porcond_c.o cla_porcond_x.o cla_porpvgrw.o \
|
||||
- cgbsvxx.o cgbrfsx.o cla_gbrfsx_extended.o cla_gbamv.o \
|
||||
- cla_gbrcond_c.o cla_gbrcond_x.o cla_gbrpvgrw.o \
|
||||
- chesvxx.o cherfsx.o cla_herfsx_extended.o cla_heamv.o \
|
||||
- cla_hercond_c.o cla_hercond_x.o cla_herpvgrw.o \
|
||||
- cla_lin_berr.o clarscl2.o clascl2.o cla_wwaddw.o
|
||||
+CXLASRC = cgesvxx.$(SUFFIX) cgerfsx.$(SUFFIX) cla_gerfsx_extended.$(SUFFIX) cla_geamv.$(SUFFIX) \
|
||||
+ cla_gercond_c.$(SUFFIX) cla_gercond_x.$(SUFFIX) cla_gerpvgrw.$(SUFFIX) \
|
||||
+ csysvxx.$(SUFFIX) csyrfsx.$(SUFFIX) cla_syrfsx_extended.$(SUFFIX) cla_syamv.$(SUFFIX) \
|
||||
+ cla_syrcond_c.$(SUFFIX) cla_syrcond_x.$(SUFFIX) cla_syrpvgrw.$(SUFFIX) \
|
||||
+ cposvxx.$(SUFFIX) cporfsx.$(SUFFIX) cla_porfsx_extended.$(SUFFIX) \
|
||||
+ cla_porcond_c.$(SUFFIX) cla_porcond_x.$(SUFFIX) cla_porpvgrw.$(SUFFIX) \
|
||||
+ cgbsvxx.$(SUFFIX) cgbrfsx.$(SUFFIX) cla_gbrfsx_extended.$(SUFFIX) cla_gbamv.$(SUFFIX) \
|
||||
+ cla_gbrcond_c.$(SUFFIX) cla_gbrcond_x.$(SUFFIX) cla_gbrpvgrw.$(SUFFIX) \
|
||||
+ chesvxx.$(SUFFIX) cherfsx.$(SUFFIX) cla_herfsx_extended.$(SUFFIX) cla_heamv.$(SUFFIX) \
|
||||
+ cla_hercond_c.$(SUFFIX) cla_hercond_x.$(SUFFIX) cla_herpvgrw.$(SUFFIX) \
|
||||
+ cla_lin_berr.$(SUFFIX) clarscl2.$(SUFFIX) clascl2.$(SUFFIX) cla_wwaddw.$(SUFFIX)
|
||||
endif
|
||||
|
||||
-ZCLASRC = cpotrs.o cgetrs.o cpotrf.o cgetrf.o
|
||||
+ZCLASRC = cpotrs.$(SUFFIX)
|
||||
|
||||
DLASRC = \
|
||||
- dgbbrd.o dgbcon.o dgbequ.o dgbrfs.o dgbsv.o \
|
||||
- dgbsvx.o dgbtf2.o dgbtrf.o dgbtrs.o dgebak.o dgebal.o dgebd2.o \
|
||||
- dgebrd.o dgecon.o dgeequ.o dgees.o dgeesx.o dgeev.o dgeevx.o \
|
||||
- dgegs.o dgegv.o dgehd2.o dgehrd.o dgelq2.o dgelqf.o \
|
||||
- dgels.o dgelsd.o dgelss.o dgelsx.o dgelsy.o dgeql2.o dgeqlf.o \
|
||||
- dgeqp3.o dgeqpf.o dgeqr2.o dgeqr2p.o dgeqrf.o dgeqrfp.o dgerfs.o \
|
||||
- dgerq2.o dgerqf.o dgesc2.o dgesdd.o dgesv.o dgesvd.o dgesvx.o \
|
||||
- dgetc2.o dgetf2.o dgetrf.o dgetri.o \
|
||||
- dgetrs.o dggbak.o dggbal.o dgges.o dggesx.o dggev.o dggevx.o \
|
||||
- dggglm.o dgghrd.o dgglse.o dggqrf.o \
|
||||
- dggrqf.o dggsvd.o dggsvp.o dgtcon.o dgtrfs.o dgtsv.o \
|
||||
- dgtsvx.o dgttrf.o dgttrs.o dgtts2.o dhgeqz.o \
|
||||
- dhsein.o dhseqr.o dlabrd.o dlacon.o dlacn2.o \
|
||||
- dlaein.o dlaexc.o dlag2.o dlags2.o dlagtm.o dlagv2.o dlahqr.o \
|
||||
- dlahrd.o dlahr2.o dlaic1.o dlaln2.o dlals0.o dlalsa.o dlalsd.o \
|
||||
- dlangb.o dlange.o dlangt.o dlanhs.o dlansb.o dlansp.o \
|
||||
- dlansy.o dlantb.o dlantp.o dlantr.o dlanv2.o \
|
||||
- dlapll.o dlapmt.o \
|
||||
- dlaqgb.o dlaqge.o dlaqp2.o dlaqps.o dlaqsb.o dlaqsp.o dlaqsy.o \
|
||||
- dlaqr0.o dlaqr1.o dlaqr2.o dlaqr3.o dlaqr4.o dlaqr5.o \
|
||||
- dlaqtr.o dlar1v.o dlar2v.o iladlr.o iladlc.o \
|
||||
- dlarf.o dlarfb.o dlarfg.o dlarfgp.o dlarft.o dlarfx.o \
|
||||
- dlargv.o dlarrv.o dlartv.o \
|
||||
- dlarz.o dlarzb.o dlarzt.o dlaswp.o dlasy2.o dlasyf.o \
|
||||
- dlatbs.o dlatdf.o dlatps.o dlatrd.o dlatrs.o dlatrz.o dlatzm.o dlauu2.o \
|
||||
- dlauum.o dopgtr.o dopmtr.o dorg2l.o dorg2r.o \
|
||||
- dorgbr.o dorghr.o dorgl2.o dorglq.o dorgql.o dorgqr.o dorgr2.o \
|
||||
- dorgrq.o dorgtr.o dorm2l.o dorm2r.o \
|
||||
- dormbr.o dormhr.o dorml2.o dormlq.o dormql.o dormqr.o dormr2.o \
|
||||
- dormr3.o dormrq.o dormrz.o dormtr.o dpbcon.o dpbequ.o dpbrfs.o \
|
||||
- dpbstf.o dpbsv.o dpbsvx.o \
|
||||
- dpbtf2.o dpbtrf.o dpbtrs.o dpocon.o dpoequ.o dporfs.o dposv.o \
|
||||
- dposvx.o dpotf2.o dpotrf.o dpotri.o dpotrs.o dpstrf.o dpstf2.o \
|
||||
- dppcon.o dppequ.o \
|
||||
- dpprfs.o dppsv.o dppsvx.o dpptrf.o dpptri.o dpptrs.o dptcon.o \
|
||||
- dpteqr.o dptrfs.o dptsv.o dptsvx.o dpttrs.o dptts2.o drscl.o \
|
||||
- dsbev.o dsbevd.o dsbevx.o dsbgst.o dsbgv.o dsbgvd.o dsbgvx.o \
|
||||
- dsbtrd.o dspcon.o dspev.o dspevd.o dspevx.o dspgst.o \
|
||||
- dspgv.o dspgvd.o dspgvx.o dsprfs.o dspsv.o dspsvx.o dsptrd.o \
|
||||
- dsptrf.o dsptri.o dsptrs.o dstegr.o dstein.o dstev.o dstevd.o dstevr.o \
|
||||
- dstevx.o \
|
||||
- dsycon.o dsyev.o dsyevd.o dsyevr.o \
|
||||
- dsyevx.o dsygs2.o dsygst.o dsygv.o dsygvd.o dsygvx.o dsyrfs.o \
|
||||
- dsysv.o dsysvx.o \
|
||||
- dsytd2.o dsytf2.o dsytrd.o dsytrf.o dsytri.o dsytri2.o dsytri2x.o \
|
||||
- dsyswapr.o dsytrs.o dsytrs2.o dsyconv.o \
|
||||
- dtbcon.o dtbrfs.o dtbtrs.o dtgevc.o dtgex2.o dtgexc.o dtgsen.o \
|
||||
- dtgsja.o dtgsna.o dtgsy2.o dtgsyl.o dtpcon.o dtprfs.o dtptri.o \
|
||||
- dtptrs.o \
|
||||
- dtrcon.o dtrevc.o dtrexc.o dtrrfs.o dtrsen.o dtrsna.o dtrsyl.o \
|
||||
- dtrti2.o dtrtri.o dtrtrs.o dtzrqf.o dtzrzf.o dstemr.o \
|
||||
- dsgesv.o dsposv.o dlag2s.o slag2d.o dlat2s.o \
|
||||
- dlansf.o dpftrf.o dpftri.o dpftrs.o dsfrk.o dtfsm.o dtftri.o dtfttp.o \
|
||||
- dtfttr.o dtpttf.o dtpttr.o dtrttf.o dtrttp.o \
|
||||
- dgejsv.o dgesvj.o dgsvj0.o dgsvj1.o \
|
||||
- dgeequb.o dsyequb.o dpoequb.o dgbequb.o \
|
||||
- dbbcsd.o dlapmr.o dorbdb.o dorcsd.o \
|
||||
- dgeqrt.o dgeqrt2.o dgeqrt3.o dgemqrt.o \
|
||||
- dtpqrt.o dtpqrt2.o dtpmqrt.o dtprfb.o
|
||||
+ dgbbrd.$(SUFFIX) dgbcon.$(SUFFIX) dgbequ.$(SUFFIX) dgbrfs.$(SUFFIX) dgbsv.$(SUFFIX) \
|
||||
+ dgbsvx.$(SUFFIX) dgbtf2.$(SUFFIX) dgbtrf.$(SUFFIX) dgbtrs.$(SUFFIX) dgebak.$(SUFFIX) dgebal.$(SUFFIX) dgebd2.$(SUFFIX) \
|
||||
+ dgebrd.$(SUFFIX) dgecon.$(SUFFIX) dgeequ.$(SUFFIX) dgees.$(SUFFIX) dgeesx.$(SUFFIX) dgeev.$(SUFFIX) dgeevx.$(SUFFIX) \
|
||||
+ dgegs.$(SUFFIX) dgegv.$(SUFFIX) dgehd2.$(SUFFIX) dgehrd.$(SUFFIX) dgelq2.$(SUFFIX) dgelqf.$(SUFFIX) \
|
||||
+ dgels.$(SUFFIX) dgelsd.$(SUFFIX) dgelss.$(SUFFIX) dgelsx.$(SUFFIX) dgelsy.$(SUFFIX) dgeql2.$(SUFFIX) dgeqlf.$(SUFFIX) \
|
||||
+ dgeqp3.$(SUFFIX) dgeqpf.$(SUFFIX) dgeqr2.$(SUFFIX) dgeqr2p.$(SUFFIX) dgeqrf.$(SUFFIX) dgeqrfp.$(SUFFIX) dgerfs.$(SUFFIX) \
|
||||
+ dgerq2.$(SUFFIX) dgerqf.$(SUFFIX) dgesc2.$(SUFFIX) dgesdd.$(SUFFIX) dgesv.$(SUFFIX) dgesvd.$(SUFFIX) dgesvx.$(SUFFIX) \
|
||||
+ dgetc2.$(SUFFIX) dgetri.$(SUFFIX) \
|
||||
+ dggbak.$(SUFFIX) dggbal.$(SUFFIX) dgges.$(SUFFIX) dggesx.$(SUFFIX) dggev.$(SUFFIX) dggevx.$(SUFFIX) \
|
||||
+ dggglm.$(SUFFIX) dgghrd.$(SUFFIX) dgglse.$(SUFFIX) dggqrf.$(SUFFIX) \
|
||||
+ dggrqf.$(SUFFIX) dggsvd.$(SUFFIX) dggsvp.$(SUFFIX) dgtcon.$(SUFFIX) dgtrfs.$(SUFFIX) dgtsv.$(SUFFIX) \
|
||||
+ dgtsvx.$(SUFFIX) dgttrf.$(SUFFIX) dgttrs.$(SUFFIX) dgtts2.$(SUFFIX) dhgeqz.$(SUFFIX) \
|
||||
+ dhsein.$(SUFFIX) dhseqr.$(SUFFIX) dlabrd.$(SUFFIX) dlacon.$(SUFFIX) dlacn2.$(SUFFIX) \
|
||||
+ dlaein.$(SUFFIX) dlaexc.$(SUFFIX) dlag2.$(SUFFIX) dlags2.$(SUFFIX) dlagtm.$(SUFFIX) dlagv2.$(SUFFIX) dlahqr.$(SUFFIX) \
|
||||
+ dlahrd.$(SUFFIX) dlahr2.$(SUFFIX) dlaic1.$(SUFFIX) dlaln2.$(SUFFIX) dlals0.$(SUFFIX) dlalsa.$(SUFFIX) dlalsd.$(SUFFIX) \
|
||||
+ dlangb.$(SUFFIX) dlange.$(SUFFIX) dlangt.$(SUFFIX) dlanhs.$(SUFFIX) dlansb.$(SUFFIX) dlansp.$(SUFFIX) \
|
||||
+ dlansy.$(SUFFIX) dlantb.$(SUFFIX) dlantp.$(SUFFIX) dlantr.$(SUFFIX) dlanv2.$(SUFFIX) \
|
||||
+ dlapll.$(SUFFIX) dlapmt.$(SUFFIX) \
|
||||
+ dlaqgb.$(SUFFIX) dlaqge.$(SUFFIX) dlaqp2.$(SUFFIX) dlaqps.$(SUFFIX) dlaqsb.$(SUFFIX) dlaqsp.$(SUFFIX) dlaqsy.$(SUFFIX) \
|
||||
+ dlaqr0.$(SUFFIX) dlaqr1.$(SUFFIX) dlaqr2.$(SUFFIX) dlaqr3.$(SUFFIX) dlaqr4.$(SUFFIX) dlaqr5.$(SUFFIX) \
|
||||
+ dlaqtr.$(SUFFIX) dlar1v.$(SUFFIX) dlar2v.$(SUFFIX) iladlr.$(SUFFIX) iladlc.$(SUFFIX) \
|
||||
+ dlarf.$(SUFFIX) dlarfb.$(SUFFIX) dlarfg.$(SUFFIX) dlarfgp.$(SUFFIX) dlarft.$(SUFFIX) dlarfx.$(SUFFIX) \
|
||||
+ dlargv.$(SUFFIX) dlarrv.$(SUFFIX) dlartv.$(SUFFIX) \
|
||||
+ dlarz.$(SUFFIX) dlarzb.$(SUFFIX) dlarzt.$(SUFFIX) dlasy2.$(SUFFIX) dlasyf.$(SUFFIX) \
|
||||
+ dlatbs.$(SUFFIX) dlatdf.$(SUFFIX) dlatps.$(SUFFIX) dlatrd.$(SUFFIX) dlatrs.$(SUFFIX) dlatrz.$(SUFFIX) dlatzm.$(SUFFIX) \
|
||||
+ dopgtr.$(SUFFIX) dopmtr.$(SUFFIX) dorg2l.$(SUFFIX) dorg2r.$(SUFFIX) \
|
||||
+ dorgbr.$(SUFFIX) dorghr.$(SUFFIX) dorgl2.$(SUFFIX) dorglq.$(SUFFIX) dorgql.$(SUFFIX) dorgqr.$(SUFFIX) dorgr2.$(SUFFIX) \
|
||||
+ dorgrq.$(SUFFIX) dorgtr.$(SUFFIX) dorm2l.$(SUFFIX) dorm2r.$(SUFFIX) \
|
||||
+ dormbr.$(SUFFIX) dormhr.$(SUFFIX) dorml2.$(SUFFIX) dormlq.$(SUFFIX) dormql.$(SUFFIX) dormqr.$(SUFFIX) dormr2.$(SUFFIX) \
|
||||
+ dormr3.$(SUFFIX) dormrq.$(SUFFIX) dormrz.$(SUFFIX) dormtr.$(SUFFIX) dpbcon.$(SUFFIX) dpbequ.$(SUFFIX) dpbrfs.$(SUFFIX) \
|
||||
+ dpbstf.$(SUFFIX) dpbsv.$(SUFFIX) dpbsvx.$(SUFFIX) \
|
||||
+ dpbtf2.$(SUFFIX) dpbtrf.$(SUFFIX) dpbtrs.$(SUFFIX) dpocon.$(SUFFIX) dpoequ.$(SUFFIX) dporfs.$(SUFFIX) dposv.$(SUFFIX) \
|
||||
+ dposvx.$(SUFFIX) dpotri.$(SUFFIX) dpotrs.$(SUFFIX) dpstrf.$(SUFFIX) dpstf2.$(SUFFIX) \
|
||||
+ dppcon.$(SUFFIX) dppequ.$(SUFFIX) \
|
||||
+ dpprfs.$(SUFFIX) dppsv.$(SUFFIX) dppsvx.$(SUFFIX) dpptrf.$(SUFFIX) dpptri.$(SUFFIX) dpptrs.$(SUFFIX) dptcon.$(SUFFIX) \
|
||||
+ dpteqr.$(SUFFIX) dptrfs.$(SUFFIX) dptsv.$(SUFFIX) dptsvx.$(SUFFIX) dpttrs.$(SUFFIX) dptts2.$(SUFFIX) drscl.$(SUFFIX) \
|
||||
+ dsbev.$(SUFFIX) dsbevd.$(SUFFIX) dsbevx.$(SUFFIX) dsbgst.$(SUFFIX) dsbgv.$(SUFFIX) dsbgvd.$(SUFFIX) dsbgvx.$(SUFFIX) \
|
||||
+ dsbtrd.$(SUFFIX) dspcon.$(SUFFIX) dspev.$(SUFFIX) dspevd.$(SUFFIX) dspevx.$(SUFFIX) dspgst.$(SUFFIX) \
|
||||
+ dspgv.$(SUFFIX) dspgvd.$(SUFFIX) dspgvx.$(SUFFIX) dsprfs.$(SUFFIX) dspsv.$(SUFFIX) dspsvx.$(SUFFIX) dsptrd.$(SUFFIX) \
|
||||
+ dsptrf.$(SUFFIX) dsptri.$(SUFFIX) dsptrs.$(SUFFIX) dstegr.$(SUFFIX) dstein.$(SUFFIX) dstev.$(SUFFIX) dstevd.$(SUFFIX) dstevr.$(SUFFIX) \
|
||||
+ dstevx.$(SUFFIX) \
|
||||
+ dsycon.$(SUFFIX) dsyev.$(SUFFIX) dsyevd.$(SUFFIX) dsyevr.$(SUFFIX) \
|
||||
+ dsyevx.$(SUFFIX) dsygs2.$(SUFFIX) dsygst.$(SUFFIX) dsygv.$(SUFFIX) dsygvd.$(SUFFIX) dsygvx.$(SUFFIX) dsyrfs.$(SUFFIX) \
|
||||
+ dsysv.$(SUFFIX) dsysvx.$(SUFFIX) \
|
||||
+ dsytd2.$(SUFFIX) dsytf2.$(SUFFIX) dsytrd.$(SUFFIX) dsytrf.$(SUFFIX) dsytri.$(SUFFIX) dsytri2.$(SUFFIX) dsytri2x.$(SUFFIX) \
|
||||
+ dsyswapr.$(SUFFIX) dsytrs.$(SUFFIX) dsytrs2.$(SUFFIX) dsyconv.$(SUFFIX) \
|
||||
+ dtbcon.$(SUFFIX) dtbrfs.$(SUFFIX) dtbtrs.$(SUFFIX) dtgevc.$(SUFFIX) dtgex2.$(SUFFIX) dtgexc.$(SUFFIX) dtgsen.$(SUFFIX) \
|
||||
+ dtgsja.$(SUFFIX) dtgsna.$(SUFFIX) dtgsy2.$(SUFFIX) dtgsyl.$(SUFFIX) dtpcon.$(SUFFIX) dtprfs.$(SUFFIX) dtptri.$(SUFFIX) \
|
||||
+ dtptrs.$(SUFFIX) \
|
||||
+ dtrcon.$(SUFFIX) dtrevc.$(SUFFIX) dtrexc.$(SUFFIX) dtrrfs.$(SUFFIX) dtrsen.$(SUFFIX) dtrsna.$(SUFFIX) dtrsyl.$(SUFFIX) \
|
||||
+ dtrtrs.$(SUFFIX) dtzrqf.$(SUFFIX) dtzrzf.$(SUFFIX) dstemr.$(SUFFIX) \
|
||||
+ dsgesv.$(SUFFIX) dsposv.$(SUFFIX) dlag2s.$(SUFFIX) slag2d.$(SUFFIX) dlat2s.$(SUFFIX) \
|
||||
+ dlansf.$(SUFFIX) dpftrf.$(SUFFIX) dpftri.$(SUFFIX) dpftrs.$(SUFFIX) dsfrk.$(SUFFIX) dtfsm.$(SUFFIX) dtftri.$(SUFFIX) dtfttp.$(SUFFIX) \
|
||||
+ dtfttr.$(SUFFIX) dtpttf.$(SUFFIX) dtpttr.$(SUFFIX) dtrttf.$(SUFFIX) dtrttp.$(SUFFIX) \
|
||||
+ dgejsv.$(SUFFIX) dgesvj.$(SUFFIX) dgsvj0.$(SUFFIX) dgsvj1.$(SUFFIX) \
|
||||
+ dgeequb.$(SUFFIX) dsyequb.$(SUFFIX) dpoequb.$(SUFFIX) dgbequb.$(SUFFIX) \
|
||||
+ dbbcsd.$(SUFFIX) dlapmr.$(SUFFIX) dorbdb.$(SUFFIX) dorcsd.$(SUFFIX) \
|
||||
+ dgeqrt.$(SUFFIX) dgeqrt2.$(SUFFIX) dgeqrt3.$(SUFFIX) dgemqrt.$(SUFFIX) \
|
||||
+ dtpqrt.$(SUFFIX) dtpqrt2.$(SUFFIX) dtpmqrt.$(SUFFIX) dtprfb.$(SUFFIX)
|
||||
|
||||
ifdef USEXBLAS
|
||||
-DXLASRC = dgesvxx.o dgerfsx.o dla_gerfsx_extended.o dla_geamv.o \
|
||||
- dla_gercond.o dla_gerpvgrw.o dsysvxx.o dsyrfsx.o \
|
||||
- dla_syrfsx_extended.o dla_syamv.o dla_syrcond.o dla_syrpvgrw.o \
|
||||
- dposvxx.o dporfsx.o dla_porfsx_extended.o dla_porcond.o \
|
||||
- dla_porpvgrw.o dgbsvxx.o dgbrfsx.o dla_gbrfsx_extended.o \
|
||||
- dla_gbamv.o dla_gbrcond.o dla_gbrpvgrw.o dla_lin_berr.o dlarscl2.o \
|
||||
- dlascl2.o dla_wwaddw.o
|
||||
+DXLASRC = dgesvxx.$(SUFFIX) dgerfsx.$(SUFFIX) dla_gerfsx_extended.$(SUFFIX) dla_geamv.$(SUFFIX) \
|
||||
+ dla_gercond.$(SUFFIX) dla_gerpvgrw.$(SUFFIX) dsysvxx.$(SUFFIX) dsyrfsx.$(SUFFIX) \
|
||||
+ dla_syrfsx_extended.$(SUFFIX) dla_syamv.$(SUFFIX) dla_syrcond.$(SUFFIX) dla_syrpvgrw.$(SUFFIX) \
|
||||
+ dposvxx.$(SUFFIX) dporfsx.$(SUFFIX) dla_porfsx_extended.$(SUFFIX) dla_porcond.$(SUFFIX) \
|
||||
+ dla_porpvgrw.$(SUFFIX) dgbsvxx.$(SUFFIX) dgbrfsx.$(SUFFIX) dla_gbrfsx_extended.$(SUFFIX) \
|
||||
+ dla_gbamv.$(SUFFIX) dla_gbrcond.$(SUFFIX) dla_gbrpvgrw.$(SUFFIX) dla_lin_berr.$(SUFFIX) dlarscl2.$(SUFFIX) \
|
||||
+ dlascl2.$(SUFFIX) dla_wwaddw.$(SUFFIX)
|
||||
endif
|
||||
|
||||
ZLASRC = \
|
||||
- zbdsqr.o zgbbrd.o zgbcon.o zgbequ.o zgbrfs.o zgbsv.o zgbsvx.o \
|
||||
- zgbtf2.o zgbtrf.o zgbtrs.o zgebak.o zgebal.o zgebd2.o zgebrd.o \
|
||||
- zgecon.o zgeequ.o zgees.o zgeesx.o zgeev.o zgeevx.o \
|
||||
- zgegs.o zgegv.o zgehd2.o zgehrd.o zgelq2.o zgelqf.o \
|
||||
- zgels.o zgelsd.o zgelss.o zgelsx.o zgelsy.o zgeql2.o zgeqlf.o zgeqp3.o \
|
||||
- zgeqpf.o zgeqr2.o zgeqr2p.o zgeqrf.o zgeqrfp.o zgerfs.o zgerq2.o zgerqf.o \
|
||||
- zgesc2.o zgesdd.o zgesv.o zgesvd.o zgesvx.o zgetc2.o zgetf2.o zgetrf.o \
|
||||
- zgetri.o zgetrs.o \
|
||||
- zggbak.o zggbal.o zgges.o zggesx.o zggev.o zggevx.o zggglm.o \
|
||||
- zgghrd.o zgglse.o zggqrf.o zggrqf.o \
|
||||
- zggsvd.o zggsvp.o \
|
||||
- zgtcon.o zgtrfs.o zgtsv.o zgtsvx.o zgttrf.o zgttrs.o zgtts2.o zhbev.o \
|
||||
- zhbevd.o zhbevx.o zhbgst.o zhbgv.o zhbgvd.o zhbgvx.o zhbtrd.o \
|
||||
- zhecon.o zheev.o zheevd.o zheevr.o zheevx.o zhegs2.o zhegst.o \
|
||||
- zhegv.o zhegvd.o zhegvx.o zherfs.o zhesv.o zhesvx.o zhetd2.o \
|
||||
- zhetf2.o zhetrd.o \
|
||||
- zhetrf.o zhetri.o zhetri2.o zhetri2x.o zheswapr.o \
|
||||
- zhetrs.o zhetrs2.o zhgeqz.o zhpcon.o zhpev.o zhpevd.o \
|
||||
- zhpevx.o zhpgst.o zhpgv.o zhpgvd.o zhpgvx.o zhprfs.o zhpsv.o \
|
||||
- zhpsvx.o \
|
||||
- zhptrd.o zhptrf.o zhptri.o zhptrs.o zhsein.o zhseqr.o zlabrd.o \
|
||||
- zlacgv.o zlacon.o zlacn2.o zlacp2.o zlacpy.o zlacrm.o zlacrt.o zladiv.o \
|
||||
- zlaed0.o zlaed7.o zlaed8.o \
|
||||
- zlaein.o zlaesy.o zlaev2.o zlags2.o zlagtm.o \
|
||||
- zlahef.o zlahqr.o \
|
||||
- zlahrd.o zlahr2.o zlaic1.o zlals0.o zlalsa.o zlalsd.o zlangb.o zlange.o \
|
||||
- zlangt.o zlanhb.o \
|
||||
- zlanhe.o \
|
||||
- zlanhp.o zlanhs.o zlanht.o zlansb.o zlansp.o zlansy.o zlantb.o \
|
||||
- zlantp.o zlantr.o zlapll.o zlapmt.o zlaqgb.o zlaqge.o \
|
||||
- zlaqhb.o zlaqhe.o zlaqhp.o zlaqp2.o zlaqps.o zlaqsb.o \
|
||||
- zlaqr0.o zlaqr1.o zlaqr2.o zlaqr3.o zlaqr4.o zlaqr5.o \
|
||||
- zlaqsp.o zlaqsy.o zlar1v.o zlar2v.o ilazlr.o ilazlc.o \
|
||||
- zlarcm.o zlarf.o zlarfb.o \
|
||||
- zlarfg.o zlarft.o zlarfgp.o \
|
||||
- zlarfx.o zlargv.o zlarnv.o zlarrv.o zlartg.o zlartv.o \
|
||||
- zlarz.o zlarzb.o zlarzt.o zlascl.o zlaset.o zlasr.o \
|
||||
- zlassq.o zlaswp.o zlasyf.o \
|
||||
- zlatbs.o zlatdf.o zlatps.o zlatrd.o zlatrs.o zlatrz.o zlatzm.o zlauu2.o \
|
||||
- zlauum.o zpbcon.o zpbequ.o zpbrfs.o zpbstf.o zpbsv.o \
|
||||
- zpbsvx.o zpbtf2.o zpbtrf.o zpbtrs.o zpocon.o zpoequ.o zporfs.o \
|
||||
- zposv.o zposvx.o zpotf2.o zpotrf.o zpotri.o zpotrs.o zpstrf.o zpstf2.o \
|
||||
- zppcon.o zppequ.o zpprfs.o zppsv.o zppsvx.o zpptrf.o zpptri.o zpptrs.o \
|
||||
- zptcon.o zpteqr.o zptrfs.o zptsv.o zptsvx.o zpttrf.o zpttrs.o zptts2.o \
|
||||
- zrot.o zspcon.o zspmv.o zspr.o zsprfs.o zspsv.o \
|
||||
- zspsvx.o zsptrf.o zsptri.o zsptrs.o zdrscl.o zstedc.o \
|
||||
- zstegr.o zstein.o zsteqr.o \
|
||||
- zsycon.o zsymv.o \
|
||||
- zsyr.o zsyrfs.o zsysv.o zsysvx.o zsytf2.o zsytrf.o zsytri.o zsytri2.o zsytri2x.o \
|
||||
- zsyswapr.o zsytrs.o zsytrs2.o zsyconv.o \
|
||||
- ztbcon.o ztbrfs.o ztbtrs.o ztgevc.o ztgex2.o \
|
||||
- ztgexc.o ztgsen.o ztgsja.o ztgsna.o ztgsy2.o ztgsyl.o ztpcon.o \
|
||||
- ztprfs.o ztptri.o \
|
||||
- ztptrs.o ztrcon.o ztrevc.o ztrexc.o ztrrfs.o ztrsen.o ztrsna.o \
|
||||
- ztrsyl.o ztrti2.o ztrtri.o ztrtrs.o ztzrqf.o ztzrzf.o zung2l.o \
|
||||
- zung2r.o zungbr.o zunghr.o zungl2.o zunglq.o zungql.o zungqr.o zungr2.o \
|
||||
- zungrq.o zungtr.o zunm2l.o zunm2r.o zunmbr.o zunmhr.o zunml2.o \
|
||||
- zunmlq.o zunmql.o zunmqr.o zunmr2.o zunmr3.o zunmrq.o zunmrz.o \
|
||||
- zunmtr.o zupgtr.o \
|
||||
- zupmtr.o izmax1.o dzsum1.o zstemr.o \
|
||||
- zcgesv.o zcposv.o zlag2c.o clag2z.o zlat2c.o \
|
||||
- zhfrk.o ztfttp.o zlanhf.o zpftrf.o zpftri.o zpftrs.o ztfsm.o ztftri.o \
|
||||
- ztfttr.o ztpttf.o ztpttr.o ztrttf.o ztrttp.o \
|
||||
- zgeequb.o zgbequb.o zsyequb.o zpoequb.o zheequb.o \
|
||||
- zbbcsd.o zlapmr.o zunbdb.o zuncsd.o \
|
||||
- zgeqrt.o zgeqrt2.o zgeqrt3.o zgemqrt.o \
|
||||
- ztpqrt.o ztpqrt2.o ztpmqrt.o ztprfb.o
|
||||
+ zbdsqr.$(SUFFIX) zgbbrd.$(SUFFIX) zgbcon.$(SUFFIX) zgbequ.$(SUFFIX) zgbrfs.$(SUFFIX) zgbsv.$(SUFFIX) zgbsvx.$(SUFFIX) \
|
||||
+ zgbtf2.$(SUFFIX) zgbtrf.$(SUFFIX) zgbtrs.$(SUFFIX) zgebak.$(SUFFIX) zgebal.$(SUFFIX) zgebd2.$(SUFFIX) zgebrd.$(SUFFIX) \
|
||||
+ zgecon.$(SUFFIX) zgeequ.$(SUFFIX) zgees.$(SUFFIX) zgeesx.$(SUFFIX) zgeev.$(SUFFIX) zgeevx.$(SUFFIX) \
|
||||
+ zgegs.$(SUFFIX) zgegv.$(SUFFIX) zgehd2.$(SUFFIX) zgehrd.$(SUFFIX) zgelq2.$(SUFFIX) zgelqf.$(SUFFIX) \
|
||||
+ zgels.$(SUFFIX) zgelsd.$(SUFFIX) zgelss.$(SUFFIX) zgelsx.$(SUFFIX) zgelsy.$(SUFFIX) zgeql2.$(SUFFIX) zgeqlf.$(SUFFIX) zgeqp3.$(SUFFIX) \
|
||||
+ zgeqpf.$(SUFFIX) zgeqr2.$(SUFFIX) zgeqr2p.$(SUFFIX) zgeqrf.$(SUFFIX) zgeqrfp.$(SUFFIX) zgerfs.$(SUFFIX) zgerq2.$(SUFFIX) zgerqf.$(SUFFIX) \
|
||||
+ zgesc2.$(SUFFIX) zgesdd.$(SUFFIX) zgesv.$(SUFFIX) zgesvd.$(SUFFIX) zgesvx.$(SUFFIX) zgetc2.$(SUFFIX) \
|
||||
+ zgetri.$(SUFFIX) \
|
||||
+ zggbak.$(SUFFIX) zggbal.$(SUFFIX) zgges.$(SUFFIX) zggesx.$(SUFFIX) zggev.$(SUFFIX) zggevx.$(SUFFIX) zggglm.$(SUFFIX) \
|
||||
+ zgghrd.$(SUFFIX) zgglse.$(SUFFIX) zggqrf.$(SUFFIX) zggrqf.$(SUFFIX) \
|
||||
+ zggsvd.$(SUFFIX) zggsvp.$(SUFFIX) \
|
||||
+ zgtcon.$(SUFFIX) zgtrfs.$(SUFFIX) zgtsv.$(SUFFIX) zgtsvx.$(SUFFIX) zgttrf.$(SUFFIX) zgttrs.$(SUFFIX) zgtts2.$(SUFFIX) zhbev.$(SUFFIX) \
|
||||
+ zhbevd.$(SUFFIX) zhbevx.$(SUFFIX) zhbgst.$(SUFFIX) zhbgv.$(SUFFIX) zhbgvd.$(SUFFIX) zhbgvx.$(SUFFIX) zhbtrd.$(SUFFIX) \
|
||||
+ zhecon.$(SUFFIX) zheev.$(SUFFIX) zheevd.$(SUFFIX) zheevr.$(SUFFIX) zheevx.$(SUFFIX) zhegs2.$(SUFFIX) zhegst.$(SUFFIX) \
|
||||
+ zhegv.$(SUFFIX) zhegvd.$(SUFFIX) zhegvx.$(SUFFIX) zherfs.$(SUFFIX) zhesv.$(SUFFIX) zhesvx.$(SUFFIX) zhetd2.$(SUFFIX) \
|
||||
+ zhetf2.$(SUFFIX) zhetrd.$(SUFFIX) \
|
||||
+ zhetrf.$(SUFFIX) zhetri.$(SUFFIX) zhetri2.$(SUFFIX) zhetri2x.$(SUFFIX) zheswapr.$(SUFFIX) \
|
||||
+ zhetrs.$(SUFFIX) zhetrs2.$(SUFFIX) zhgeqz.$(SUFFIX) zhpcon.$(SUFFIX) zhpev.$(SUFFIX) zhpevd.$(SUFFIX) \
|
||||
+ zhpevx.$(SUFFIX) zhpgst.$(SUFFIX) zhpgv.$(SUFFIX) zhpgvd.$(SUFFIX) zhpgvx.$(SUFFIX) zhprfs.$(SUFFIX) zhpsv.$(SUFFIX) \
|
||||
+ zhpsvx.$(SUFFIX) \
|
||||
+ zhptrd.$(SUFFIX) zhptrf.$(SUFFIX) zhptri.$(SUFFIX) zhptrs.$(SUFFIX) zhsein.$(SUFFIX) zhseqr.$(SUFFIX) zlabrd.$(SUFFIX) \
|
||||
+ zlacgv.$(SUFFIX) zlacon.$(SUFFIX) zlacn2.$(SUFFIX) zlacp2.$(SUFFIX) zlacpy.$(SUFFIX) zlacrm.$(SUFFIX) zlacrt.$(SUFFIX) zladiv.$(SUFFIX) \
|
||||
+ zlaed0.$(SUFFIX) zlaed7.$(SUFFIX) zlaed8.$(SUFFIX) \
|
||||
+ zlaein.$(SUFFIX) zlaesy.$(SUFFIX) zlaev2.$(SUFFIX) zlags2.$(SUFFIX) zlagtm.$(SUFFIX) \
|
||||
+ zlahef.$(SUFFIX) zlahqr.$(SUFFIX) \
|
||||
+ zlahrd.$(SUFFIX) zlahr2.$(SUFFIX) zlaic1.$(SUFFIX) zlals0.$(SUFFIX) zlalsa.$(SUFFIX) zlalsd.$(SUFFIX) zlangb.$(SUFFIX) zlange.$(SUFFIX) \
|
||||
+ zlangt.$(SUFFIX) zlanhb.$(SUFFIX) \
|
||||
+ zlanhe.$(SUFFIX) \
|
||||
+ zlanhp.$(SUFFIX) zlanhs.$(SUFFIX) zlanht.$(SUFFIX) zlansb.$(SUFFIX) zlansp.$(SUFFIX) zlansy.$(SUFFIX) zlantb.$(SUFFIX) \
|
||||
+ zlantp.$(SUFFIX) zlantr.$(SUFFIX) zlapll.$(SUFFIX) zlapmt.$(SUFFIX) zlaqgb.$(SUFFIX) zlaqge.$(SUFFIX) \
|
||||
+ zlaqhb.$(SUFFIX) zlaqhe.$(SUFFIX) zlaqhp.$(SUFFIX) zlaqp2.$(SUFFIX) zlaqps.$(SUFFIX) zlaqsb.$(SUFFIX) \
|
||||
+ zlaqr0.$(SUFFIX) zlaqr1.$(SUFFIX) zlaqr2.$(SUFFIX) zlaqr3.$(SUFFIX) zlaqr4.$(SUFFIX) zlaqr5.$(SUFFIX) \
|
||||
+ zlaqsp.$(SUFFIX) zlaqsy.$(SUFFIX) zlar1v.$(SUFFIX) zlar2v.$(SUFFIX) ilazlr.$(SUFFIX) ilazlc.$(SUFFIX) \
|
||||
+ zlarcm.$(SUFFIX) zlarf.$(SUFFIX) zlarfb.$(SUFFIX) \
|
||||
+ zlarfg.$(SUFFIX) zlarft.$(SUFFIX) zlarfgp.$(SUFFIX) \
|
||||
+ zlarfx.$(SUFFIX) zlargv.$(SUFFIX) zlarnv.$(SUFFIX) zlarrv.$(SUFFIX) zlartg.$(SUFFIX) zlartv.$(SUFFIX) \
|
||||
+ zlarz.$(SUFFIX) zlarzb.$(SUFFIX) zlarzt.$(SUFFIX) zlascl.$(SUFFIX) zlaset.$(SUFFIX) zlasr.$(SUFFIX) \
|
||||
+ zlassq.$(SUFFIX) zlasyf.$(SUFFIX) \
|
||||
+ zlatbs.$(SUFFIX) zlatdf.$(SUFFIX) zlatps.$(SUFFIX) zlatrd.$(SUFFIX) zlatrs.$(SUFFIX) zlatrz.$(SUFFIX) zlatzm.$(SUFFIX) zlauu2.$(SUFFIX) \
|
||||
+ zpbcon.$(SUFFIX) zpbequ.$(SUFFIX) zpbrfs.$(SUFFIX) zpbstf.$(SUFFIX) zpbsv.$(SUFFIX) \
|
||||
+ zpbsvx.$(SUFFIX) zpbtf2.$(SUFFIX) zpbtrf.$(SUFFIX) zpbtrs.$(SUFFIX) zpocon.$(SUFFIX) zpoequ.$(SUFFIX) zporfs.$(SUFFIX) \
|
||||
+ zposv.$(SUFFIX) zposvx.$(SUFFIX) zpotri.$(SUFFIX) zpotrs.$(SUFFIX) zpstrf.$(SUFFIX) zpstf2.$(SUFFIX) \
|
||||
+ zppcon.$(SUFFIX) zppequ.$(SUFFIX) zpprfs.$(SUFFIX) zppsv.$(SUFFIX) zppsvx.$(SUFFIX) zpptrf.$(SUFFIX) zpptri.$(SUFFIX) zpptrs.$(SUFFIX) \
|
||||
+ zptcon.$(SUFFIX) zpteqr.$(SUFFIX) zptrfs.$(SUFFIX) zptsv.$(SUFFIX) zptsvx.$(SUFFIX) zpttrf.$(SUFFIX) zpttrs.$(SUFFIX) zptts2.$(SUFFIX) \
|
||||
+ zrot.$(SUFFIX) zspcon.$(SUFFIX) zspmv.$(SUFFIX) zspr.$(SUFFIX) zsprfs.$(SUFFIX) zspsv.$(SUFFIX) \
|
||||
+ zspsvx.$(SUFFIX) zsptrf.$(SUFFIX) zsptri.$(SUFFIX) zsptrs.$(SUFFIX) zdrscl.$(SUFFIX) zstedc.$(SUFFIX) \
|
||||
+ zstegr.$(SUFFIX) zstein.$(SUFFIX) zsteqr.$(SUFFIX) \
|
||||
+ zsycon.$(SUFFIX) zsymv.$(SUFFIX) \
|
||||
+ zsyr.$(SUFFIX) zsyrfs.$(SUFFIX) zsysv.$(SUFFIX) zsysvx.$(SUFFIX) zsytf2.$(SUFFIX) zsytrf.$(SUFFIX) zsytri.$(SUFFIX) zsytri2.$(SUFFIX) zsytri2x.$(SUFFIX) \
|
||||
+ zsyswapr.$(SUFFIX) zsytrs.$(SUFFIX) zsytrs2.$(SUFFIX) zsyconv.$(SUFFIX) \
|
||||
+ ztbcon.$(SUFFIX) ztbrfs.$(SUFFIX) ztbtrs.$(SUFFIX) ztgevc.$(SUFFIX) ztgex2.$(SUFFIX) \
|
||||
+ ztgexc.$(SUFFIX) ztgsen.$(SUFFIX) ztgsja.$(SUFFIX) ztgsna.$(SUFFIX) ztgsy2.$(SUFFIX) ztgsyl.$(SUFFIX) ztpcon.$(SUFFIX) \
|
||||
+ ztprfs.$(SUFFIX) ztptri.$(SUFFIX) \
|
||||
+ ztptrs.$(SUFFIX) ztrcon.$(SUFFIX) ztrevc.$(SUFFIX) ztrexc.$(SUFFIX) ztrrfs.$(SUFFIX) ztrsen.$(SUFFIX) ztrsna.$(SUFFIX) \
|
||||
+ ztrsyl.$(SUFFIX) ztrtrs.$(SUFFIX) ztzrqf.$(SUFFIX) ztzrzf.$(SUFFIX) zung2l.$(SUFFIX) \
|
||||
+ zung2r.$(SUFFIX) zungbr.$(SUFFIX) zunghr.$(SUFFIX) zungl2.$(SUFFIX) zunglq.$(SUFFIX) zungql.$(SUFFIX) zungqr.$(SUFFIX) zungr2.$(SUFFIX) \
|
||||
+ zungrq.$(SUFFIX) zungtr.$(SUFFIX) zunm2l.$(SUFFIX) zunm2r.$(SUFFIX) zunmbr.$(SUFFIX) zunmhr.$(SUFFIX) zunml2.$(SUFFIX) \
|
||||
+ zunmlq.$(SUFFIX) zunmql.$(SUFFIX) zunmqr.$(SUFFIX) zunmr2.$(SUFFIX) zunmr3.$(SUFFIX) zunmrq.$(SUFFIX) zunmrz.$(SUFFIX) \
|
||||
+ zunmtr.$(SUFFIX) zupgtr.$(SUFFIX) \
|
||||
+ zupmtr.$(SUFFIX) izmax1.$(SUFFIX) dzsum1.$(SUFFIX) zstemr.$(SUFFIX) \
|
||||
+ zcgesv.$(SUFFIX) zcposv.$(SUFFIX) zlag2c.$(SUFFIX) clag2z.$(SUFFIX) zlat2c.$(SUFFIX) \
|
||||
+ zhfrk.$(SUFFIX) ztfttp.$(SUFFIX) zlanhf.$(SUFFIX) zpftrf.$(SUFFIX) zpftri.$(SUFFIX) zpftrs.$(SUFFIX) ztfsm.$(SUFFIX) ztftri.$(SUFFIX) \
|
||||
+ ztfttr.$(SUFFIX) ztpttf.$(SUFFIX) ztpttr.$(SUFFIX) ztrttf.$(SUFFIX) ztrttp.$(SUFFIX) \
|
||||
+ zgeequb.$(SUFFIX) zgbequb.$(SUFFIX) zsyequb.$(SUFFIX) zpoequb.$(SUFFIX) zheequb.$(SUFFIX) \
|
||||
+ zbbcsd.$(SUFFIX) zlapmr.$(SUFFIX) zunbdb.$(SUFFIX) zuncsd.$(SUFFIX) \
|
||||
+ zgeqrt.$(SUFFIX) zgeqrt2.$(SUFFIX) zgeqrt3.$(SUFFIX) zgemqrt.$(SUFFIX) \
|
||||
+ ztpqrt.$(SUFFIX) ztpqrt2.$(SUFFIX) ztpmqrt.$(SUFFIX) ztprfb.$(SUFFIX)
|
||||
|
||||
ifdef USEXBLAS
|
||||
-ZXLASRC = zgesvxx.o zgerfsx.o zla_gerfsx_extended.o zla_geamv.o \
|
||||
- zla_gercond_c.o zla_gercond_x.o zla_gerpvgrw.o zsysvxx.o zsyrfsx.o \
|
||||
- zla_syrfsx_extended.o zla_syamv.o zla_syrcond_c.o zla_syrcond_x.o \
|
||||
- zla_syrpvgrw.o zposvxx.o zporfsx.o zla_porfsx_extended.o \
|
||||
- zla_porcond_c.o zla_porcond_x.o zla_porpvgrw.o zgbsvxx.o zgbrfsx.o \
|
||||
- zla_gbrfsx_extended.o zla_gbamv.o zla_gbrcond_c.o zla_gbrcond_x.o \
|
||||
- zla_gbrpvgrw.o zhesvxx.o zherfsx.o zla_herfsx_extended.o \
|
||||
- zla_heamv.o zla_hercond_c.o zla_hercond_x.o zla_herpvgrw.o \
|
||||
- zla_lin_berr.o zlarscl2.o zlascl2.o zla_wwaddw.o
|
||||
+ZXLASRC = zgesvxx.$(SUFFIX) zgerfsx.$(SUFFIX) zla_gerfsx_extended.$(SUFFIX) zla_geamv.$(SUFFIX) \
|
||||
+ zla_gercond_c.$(SUFFIX) zla_gercond_x.$(SUFFIX) zla_gerpvgrw.$(SUFFIX) zsysvxx.$(SUFFIX) zsyrfsx.$(SUFFIX) \
|
||||
+ zla_syrfsx_extended.$(SUFFIX) zla_syamv.$(SUFFIX) zla_syrcond_c.$(SUFFIX) zla_syrcond_x.$(SUFFIX) \
|
||||
+ zla_syrpvgrw.$(SUFFIX) zposvxx.$(SUFFIX) zporfsx.$(SUFFIX) zla_porfsx_extended.$(SUFFIX) \
|
||||
+ zla_porcond_c.$(SUFFIX) zla_porcond_x.$(SUFFIX) zla_porpvgrw.$(SUFFIX) zgbsvxx.$(SUFFIX) zgbrfsx.$(SUFFIX) \
|
||||
+ zla_gbrfsx_extended.$(SUFFIX) zla_gbamv.$(SUFFIX) zla_gbrcond_c.$(SUFFIX) zla_gbrcond_x.$(SUFFIX) \
|
||||
+ zla_gbrpvgrw.$(SUFFIX) zhesvxx.$(SUFFIX) zherfsx.$(SUFFIX) zla_herfsx_extended.$(SUFFIX) \
|
||||
+ zla_heamv.$(SUFFIX) zla_hercond_c.$(SUFFIX) zla_hercond_x.$(SUFFIX) zla_herpvgrw.$(SUFFIX) \
|
||||
+ zla_lin_berr.$(SUFFIX) zlarscl2.$(SUFFIX) zlascl2.$(SUFFIX) zla_wwaddw.$(SUFFIX)
|
||||
endif
|
||||
|
||||
ALLOBJ = $(SLASRC) $(DLASRC) $(DSLASRC) $(CLASRC) $(ZLASRC) $(ZCLASRC) \
|
||||
$(SCLAUX) $(DZLAUX) $(ALLAUX)
|
||||
|
||||
+ALLOBJ_P = $(ALLOBJ:.$(SUFFIX)=.$(PSUFFIX))
|
||||
+
|
||||
ifdef USEXBLAS
|
||||
ALLXOBJ = $(SXLASRC) $(DXLASRC) $(CXLASRC) $(ZXLASRC)
|
||||
endif
|
||||
|
||||
all: ../$(LAPACKLIB)
|
||||
|
||||
+lapack_prof: ../$(LAPACKLIB_P)
|
||||
+
|
||||
../$(LAPACKLIB): $(ALLOBJ) $(ALLXOBJ)
|
||||
$(ARCH) $(ARCHFLAGS) $@ $(ALLOBJ) $(ALLXOBJ)
|
||||
$(RANLIB) $@
|
||||
|
||||
+../$(LAPACKLIB_P): $(ALLOBJ_P)
|
||||
+ $(ARCH) $(ARCHFLAGS) $@ $(ALLOBJ_P)
|
||||
+ $(RANLIB) $@
|
||||
+
|
||||
single: $(SLASRC) $(DSLASRC) $(SXLASRC) $(SCLAUX) $(ALLAUX)
|
||||
$(ARCH) $(ARCHFLAGS) ../$(LAPACKLIB) $(SLASRC) $(DSLASRC) \
|
||||
$(SXLASRC) $(SCLAUX) $(ALLAUX) $(ALLXAUX)
|
||||
@@ -451,15 +459,24 @@
|
||||
@FRC=$(FRC)
|
||||
|
||||
clean:
|
||||
- rm -f *.o
|
||||
+ rm -f *.$(SUFFIX) *.$(PSUFFIX)
|
||||
|
||||
-.f.o:
|
||||
+%.$(SUFFIX): %.f
|
||||
$(FORTRAN) $(OPTS) -c $< -o $@
|
||||
|
||||
-slaruv.o: slaruv.f ; $(FORTRAN) $(NOOPT) -c $< -o $@
|
||||
-dlaruv.o: dlaruv.f ; $(FORTRAN) $(NOOPT) -c $< -o $@
|
||||
-sla_wwaddw.o: sla_wwaddw.f ; $(FORTRAN) $(NOOPT) -c $< -o $@
|
||||
-dla_wwaddw.o: dla_wwaddw.f ; $(FORTRAN) $(NOOPT) -c $< -o $@
|
||||
-cla_wwaddw.o: cla_wwaddw.f ; $(FORTRAN) $(NOOPT) -c $< -o $@
|
||||
-zla_wwaddw.o: zla_wwaddw.f ; $(FORTRAN) $(NOOPT) -c $< -o $@
|
||||
+%.$(PSUFFIX): %.f
|
||||
+ $(FORTRAN) $(POPTS) -c $< -o $@
|
||||
|
||||
+slaruv.$(SUFFIX): slaruv.f ; $(FORTRAN) $(NOOPT) -O0 -c $< -o $@
|
||||
+dlaruv.$(SUFFIX): dlaruv.f ; $(FORTRAN) $(NOOPT) -O0 -c $< -o $@
|
||||
+sla_wwaddw.$(SUFFIX): sla_wwaddw.f ; $(FORTRAN) $(NOOPT) -O0 -c $< -o $@
|
||||
+dla_wwaddw.$(SUFFIX): dla_wwaddw.f ; $(FORTRAN) $(NOOPT) -O0 -c $< -o $@
|
||||
+cla_wwaddw.$(SUFFIX): cla_wwaddw.f ; $(FORTRAN) $(NOOPT) -O0 -c $< -o $@
|
||||
+zla_wwaddw.$(SUFFIX): zla_wwaddw.f ; $(FORTRAN) $(NOOPT) -O0 -c $< -o $@
|
||||
+
|
||||
+slaruv.$(PSUFFIX): slaruv.f ; $(FORTRAN) $(PNOOPT) -O0 -c $< -o $@
|
||||
+dlaruv.$(PSUFFIX): dlaruv.f ; $(FORTRAN) $(PNOOPT) -O0 -c $< -o $@
|
||||
+sla_wwaddw.$(PSUFFIX): sla_wwaddw.f ; $(FORTRAN) $(PNOOPT) -O0 -c $< -o $@
|
||||
+dla_wwaddw.$(PSUFFIX): dla_wwaddw.f ; $(FORTRAN) $(PNOOPT) -O0 -c $< -o $@
|
||||
+cla_wwaddw.$(PSUFFIX): cla_wwaddw.f ; $(FORTRAN) $(PNOOPT) -O0 -c $< -o $@
|
||||
+zla_wwaddw.$(PSUFFIX): zla_wwaddw.f ; $(FORTRAN) $(PNOOPT) -O0 -c $< -o $@
|
||||
diff -ruN lapack-3.4.1.old/TESTING/EIG/Makefile lapack-3.4.1/TESTING/EIG/Makefile
|
||||
--- lapack-3.4.1.old/TESTING/EIG/Makefile 2011-09-26 23:52:31 +0200
|
||||
+++ lapack-3.4.1/TESTING/EIG/Makefile 2012-04-22 21:41:45 +0200
|
||||
@@ -78,7 +78,7 @@
|
||||
cget35.o cget36.o cget37.o cget38.o cget51.o cget52.o \
|
||||
cget54.o cglmts.o cgqrts.o cgrqts.o cgsvts.o \
|
||||
chbt21.o chet21.o chet22.o chpt21.o chst01.o \
|
||||
- clarfy.o clarhs.o clatm4.o clctes.o clctsx.o clsets.o csbmv.o \
|
||||
+ clarfy.o clarhs.o clatm4.o clctes.o clctsx.o clsets.o \
|
||||
csgt01.o cslect.o \
|
||||
cstt21.o cstt22.o cunt01.o cunt03.o
|
||||
|
||||
@@ -115,7 +115,7 @@
|
||||
zget35.o zget36.o zget37.o zget38.o zget51.o zget52.o \
|
||||
zget54.o zglmts.o zgqrts.o zgrqts.o zgsvts.o \
|
||||
zhbt21.o zhet21.o zhet22.o zhpt21.o zhst01.o \
|
||||
- zlarfy.o zlarhs.o zlatm4.o zlctes.o zlctsx.o zlsets.o zsbmv.o \
|
||||
+ zlarfy.o zlarhs.o zlatm4.o zlctes.o zlctsx.o zlsets.o \
|
||||
zsgt01.o zslect.o \
|
||||
zstt21.o zstt22.o zunt01.o zunt03.o
|
||||
|
||||
@@ -129,22 +129,22 @@
|
||||
../xeigtsts: $(SEIGTST) $(SCIGTST) $(AEIGTST) ../../$(LAPACKLIB); \
|
||||
$(LOADER) $(LOADOPTS) -o xeigtsts \
|
||||
$(SEIGTST) $(SCIGTST) $(AEIGTST) ../../$(TMGLIB) \
|
||||
- ../../$(LAPACKLIB) $(BLASLIB) && mv xeigtsts $@
|
||||
+ ../../$(LAPACKLIB) $(BLASLIB) $(CEXTRALIB) && mv xeigtsts $@
|
||||
|
||||
../xeigtstc: $(CEIGTST) $(SCIGTST) $(AEIGTST) ../../$(LAPACKLIB); \
|
||||
$(LOADER) $(LOADOPTS) -o xeigtstc \
|
||||
$(CEIGTST) $(SCIGTST) $(AEIGTST) ../../$(TMGLIB) \
|
||||
- ../../$(LAPACKLIB) $(BLASLIB) && mv xeigtstc $@
|
||||
+ ../../$(LAPACKLIB) $(BLASLIB) $(CEXTRALIB) && mv xeigtstc $@
|
||||
|
||||
../xeigtstd: $(DEIGTST) $(DZIGTST) $(AEIGTST) ../../$(LAPACKLIB); \
|
||||
$(LOADER) $(LOADOPTS) -o xeigtstd \
|
||||
$(DEIGTST) $(DZIGTST) $(AEIGTST) ../../$(TMGLIB) \
|
||||
- ../../$(LAPACKLIB) $(BLASLIB) && mv xeigtstd $@
|
||||
+ ../../$(LAPACKLIB) $(BLASLIB) $(CEXTRALIB) && mv xeigtstd $@
|
||||
|
||||
../xeigtstz: $(ZEIGTST) $(DZIGTST) $(AEIGTST) ../../$(LAPACKLIB); \
|
||||
$(LOADER) $(LOADOPTS) -o xeigtstz \
|
||||
$(ZEIGTST) $(DZIGTST) $(AEIGTST) ../../$(TMGLIB) \
|
||||
- ../../$(LAPACKLIB) $(BLASLIB) && mv xeigtstz $@
|
||||
+ ../../$(LAPACKLIB) $(BLASLIB) $(CEXTRALIB) && mv xeigtstz $@
|
||||
|
||||
$(AEIGTST): $(FRC)
|
||||
$(SCIGTST): $(FRC)
|
||||
diff -ruN lapack-3.4.1.old/TESTING/LIN/Makefile lapack-3.4.1/TESTING/LIN/Makefile
|
||||
--- lapack-3.4.1.old/TESTING/LIN/Makefile 2012-04-02 21:06:36 +0200
|
||||
+++ lapack-3.4.1/TESTING/LIN/Makefile 2012-04-22 21:43:30 +0200
|
||||
@@ -109,7 +109,7 @@
|
||||
cqpt01.o cqrt01.o cqrt01p.o cqrt02.o cqrt03.o cqrt11.o \
|
||||
cqrt12.o cqrt13.o cqrt14.o cqrt15.o cqrt16.o \
|
||||
cqrt17.o crqt01.o crqt02.o crqt03.o crzt01.o crzt02.o \
|
||||
- csbmv.o cspt01.o \
|
||||
+ cspt01.o \
|
||||
cspt02.o cspt03.o csyt01.o csyt02.o csyt03.o \
|
||||
ctbt02.o ctbt03.o ctbt05.o ctbt06.o ctpt01.o \
|
||||
ctpt02.o ctpt03.o ctpt05.o ctpt06.o ctrt01.o \
|
||||
@@ -188,7 +188,7 @@
|
||||
zqpt01.o zqrt01.o zqrt01p.o zqrt02.o zqrt03.o zqrt11.o \
|
||||
zqrt12.o zqrt13.o zqrt14.o zqrt15.o zqrt16.o \
|
||||
zqrt17.o zrqt01.o zrqt02.o zrqt03.o zrzt01.o zrzt02.o \
|
||||
- zsbmv.o zspt01.o \
|
||||
+ zspt01.o \
|
||||
zspt02.o zspt03.o zsyt01.o zsyt02.o zsyt03.o \
|
||||
ztbt02.o ztbt03.o ztbt05.o ztbt06.o ztpt01.o \
|
||||
ztpt02.o ztpt03.o ztpt05.o ztpt06.o ztrt01.o \
|
||||
@@ -214,7 +214,7 @@
|
||||
zdrvab.o zdrvac.o zerrab.o zerrac.o zget08.o \
|
||||
alaerh.o alahd.o aladhd.o alareq.o \
|
||||
chkxer.o zget02.o zlarhs.o zlatb4.o \
|
||||
- zsbmv.o xerbla.o zpot06.o zlaipd.o
|
||||
+ xerbla.o zpot06.o zlaipd.o
|
||||
|
||||
SLINTSTRFP = schkrfp.o sdrvrfp.o sdrvrf1.o sdrvrf2.o sdrvrf3.o sdrvrf4.o serrrfp.o \
|
||||
slatb4.o slarhs.o sget04.o spot01.o spot03.o spot02.o \
|
||||
@@ -225,11 +225,11 @@
|
||||
chkxer.o xerbla.o alaerh.o aladhd.o alahd.o alasvm.o
|
||||
|
||||
CLINTSTRFP = cchkrfp.o cdrvrfp.o cdrvrf1.o cdrvrf2.o cdrvrf3.o cdrvrf4.o cerrrfp.o \
|
||||
- claipd.o clatb4.o clarhs.o csbmv.o cget04.o cpot01.o cpot03.o cpot02.o \
|
||||
+ claipd.o clatb4.o clarhs.o cget04.o cpot01.o cpot03.o cpot02.o \
|
||||
chkxer.o xerbla.o alaerh.o aladhd.o alahd.o alasvm.o
|
||||
|
||||
ZLINTSTRFP = zchkrfp.o zdrvrfp.o zdrvrf1.o zdrvrf2.o zdrvrf3.o zdrvrf4.o zerrrfp.o \
|
||||
- zlatb4.o zlaipd.o zlarhs.o zsbmv.o zget04.o zpot01.o zpot03.o zpot02.o \
|
||||
+ zlatb4.o zlaipd.o zlarhs.o zget04.o zpot01.o zpot03.o zpot02.o \
|
||||
chkxer.o xerbla.o alaerh.o aladhd.o alahd.o alasvm.o
|
||||
|
||||
all: single double complex complex16 proto-single proto-double proto-complex proto-complex16
|
||||
@@ -246,43 +246,43 @@
|
||||
|
||||
xlintsts : $(ALINTST) $(SLINTST) $(SCLNTST) ../../$(LAPACKLIB)
|
||||
$(LOADER) $(LOADOPTS) $(ALINTST) $(SCLNTST) $(SLINTST) \
|
||||
- ../../$(TMGLIB) ../../$(LAPACKLIB) $(XBLASLIB) $(BLASLIB) -o $@
|
||||
+ ../../$(TMGLIB) ../../$(LAPACKLIB) $(XBLASLIB) $(BLASLIB) -o $@ $(CEXTRALIB)
|
||||
|
||||
xlintstc : $(ALINTST) $(CLINTST) $(SCLNTST) ../../$(LAPACKLIB)
|
||||
$(LOADER) $(LOADOPTS) $(ALINTST) $(SCLNTST) $(CLINTST) \
|
||||
- ../../$(TMGLIB) ../../$(LAPACKLIB) $(XBLASLIB) $(BLASLIB) -o $@
|
||||
+ ../../$(TMGLIB) ../../$(LAPACKLIB) $(XBLASLIB) $(BLASLIB) -o $@ $(CEXTRALIB)
|
||||
|
||||
xlintstd : $(ALINTST) $(DLINTST) $(DZLNTST) ../../$(LAPACKLIB)
|
||||
$(LOADER) $(LOADOPTS) $^ \
|
||||
- ../../$(TMGLIB) ../../$(LAPACKLIB) $(XBLASLIB) $(BLASLIB) -o $@
|
||||
+ ../../$(TMGLIB) ../../$(LAPACKLIB) $(XBLASLIB) $(BLASLIB) -o $@ $(CEXTRALIB)
|
||||
|
||||
xlintstz : $(ALINTST) $(ZLINTST) $(DZLNTST) ../../$(LAPACKLIB)
|
||||
$(LOADER) $(LOADOPTS) $(ALINTST) $(DZLNTST) $(ZLINTST) \
|
||||
- ../../$(TMGLIB) ../../$(LAPACKLIB) $(XBLASLIB) $(BLASLIB) -o $@
|
||||
+ ../../$(TMGLIB) ../../$(LAPACKLIB) $(XBLASLIB) $(BLASLIB) -o $@ $(CEXTRALIB)
|
||||
|
||||
xlintstds : $(DSLINTST) ../../$(LAPACKLIB)
|
||||
$(LOADER) $(LOADOPTS) $(DSLINTST) \
|
||||
- ../../$(TMGLIB) ../../$(LAPACKLIB) $(BLASLIB) -o $@
|
||||
+ ../../$(TMGLIB) ../../$(LAPACKLIB) $(BLASLIB) -o $@ $(CEXTRALIB)
|
||||
|
||||
xlintstzc : $(ZCLINTST) ../../$(LAPACKLIB)
|
||||
$(LOADER) $(LOADOPTS) $(ZCLINTST) \
|
||||
- ../../$(TMGLIB) ../../$(LAPACKLIB) $(BLASLIB) -o $@
|
||||
+ ../../$(TMGLIB) ../../$(LAPACKLIB) $(BLASLIB) -o $@ $(CEXTRALIB)
|
||||
|
||||
xlintstrfs : $(SLINTSTRFP) ../../$(LAPACKLIB)
|
||||
$(LOADER) $(LOADOPTS) $(SLINTSTRFP) \
|
||||
- ../../$(TMGLIB) ../../$(LAPACKLIB) $(BLASLIB) -o $@
|
||||
+ ../../$(TMGLIB) ../../$(LAPACKLIB) $(BLASLIB) -o $@ $(CEXTRALIB)
|
||||
|
||||
xlintstrfd : $(DLINTSTRFP) ../../$(LAPACKLIB)
|
||||
$(LOADER) $(LOADOPTS) $(DLINTSTRFP) \
|
||||
- ../../$(TMGLIB) ../../$(LAPACKLIB) $(BLASLIB) -o $@
|
||||
+ ../../$(TMGLIB) ../../$(LAPACKLIB) $(BLASLIB) -o $@ $(CEXTRALIB)
|
||||
|
||||
xlintstrfc : $(CLINTSTRFP) ../../$(LAPACKLIB)
|
||||
$(LOADER) $(LOADOPTS) $(CLINTSTRFP) \
|
||||
- ../../$(TMGLIB) ../../$(LAPACKLIB) $(BLASLIB) -o $@
|
||||
+ ../../$(TMGLIB) ../../$(LAPACKLIB) $(BLASLIB) -o $@ $(CEXTRALIB)
|
||||
|
||||
xlintstrfz : $(ZLINTSTRFP) ../../$(LAPACKLIB)
|
||||
$(LOADER) $(LOADOPTS) $(ZLINTSTRFP) \
|
||||
- ../../$(TMGLIB) ../../$(LAPACKLIB) $(BLASLIB) -o $@
|
||||
+ ../../$(TMGLIB) ../../$(LAPACKLIB) $(BLASLIB) -o $@ $(CEXTRALIB)
|
||||
|
||||
../xlintsts: xlintsts
|
||||
mv xlintsts $@
|
||||
diff -ruN lapack-3.4.1.old/lapacke/src/Makefile lapack-3.4.1/lapacke/src/Makefile
|
||||
--- lapack-3.4.1.old/lapacke/src/Makefile 2012-04-02 22:16:32 +0200
|
||||
+++ lapack-3.4.1/lapacke/src/Makefile 2012-04-22 21:38:38 +0200
|
||||
@@ -2040,19 +2040,21 @@
|
||||
lapacke_zlagsy.o \
|
||||
lapacke_zlagsy_work.o
|
||||
|
||||
-ALLOBJ = $(SRC_OBJ) $(MATGEN_OBJ)
|
||||
+OBJ_FILES := $(SRC_OBJ)
|
||||
|
||||
-ifdef USEXBLAS
|
||||
-ALLXOBJ = $(SXLASRC) $(DXLASRC) $(CXLASRC) $(ZXLASRC)
|
||||
+ifdef LAPACKE_EXTENDED
|
||||
+OBJ_FILES += $(SXLASRC) $(DXLASRC) $(CXLASRC) $(ZXLASRC)
|
||||
endif
|
||||
|
||||
-
|
||||
-OBJ_FILES := $(C_FILES:.o=.o)
|
||||
+ifdef LAPACKE_TESTING
|
||||
+OBJ_FILES += $(MATGEN_OBJ)
|
||||
+endif
|
||||
|
||||
all: ../../$(LAPACKELIB)
|
||||
|
||||
-../../$(LAPACKELIB): $(ALLOBJ) $(ALLXOBJ)
|
||||
- $(ARCH) $(ARCHFLAGS) ../../$(LAPACKELIB) $(ALLOBJ) $(ALLXOBJ)
|
||||
+../../$(LAPACKELIB): $(OBJ_FILES)
|
||||
+# http://hackage.haskell.org/trac/gtk2hs/ticket/1146
|
||||
+ echo $(OBJ_FILES) | xargs -n 100 $(ARCH) $(ARCHFLAGS) ../../$(LAPACKELIB)
|
||||
$(RANLIB) ../../$(LAPACKELIB)
|
||||
|
||||
.c.o:
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,4 +1,4 @@
|
||||
#!/bin/bash
|
||||
|
||||
echo " Please read https://github.com/xianyi/OpenBLAS/wiki/How-to-use-OpenBLAS-in-Microsoft-Visual-Studio "
|
||||
make BINARY=64 CC=x86_64-w64-mingw32-gcc FC=x86_64-w64-mingw32-gfortran
|
||||
make BINARY=64 CC=gcc FC=gfortran
|
||||
|
||||
@@ -11,7 +11,7 @@ CUNIT_LIB=$(CUNIT_DIR)/lib/libcunit.a
|
||||
|
||||
CFLAGS+=-I$(CUNIT_DIR)/include
|
||||
|
||||
OBJS=main.o test_rot.o test_swap.o test_axpy.o test_dotu.o test_rotmg.o test_dsdot.o test_amax.o
|
||||
OBJS=main.o test_rot.o test_swap.o test_axpy.o test_dotu.o test_rotmg.o test_dsdot.o test_amax.o test_fork.o
|
||||
|
||||
all : run_test
|
||||
|
||||
|
||||
@@ -63,4 +63,6 @@ void test_dsdot_n_1(void);
|
||||
|
||||
void test_samax(void);
|
||||
|
||||
void test_fork_safety(void);
|
||||
|
||||
#endif
|
||||
|
||||
@@ -60,6 +60,14 @@ CU_TestInfo test_level1[]={
|
||||
{"Testing dsdot with n == 1",test_dsdot_n_1},
|
||||
|
||||
{"Testing samax", test_samax},
|
||||
|
||||
#if !defined(USE_OPENMP) && !defined(OS_WINDOWS)
|
||||
// The GNU OpenMP implementation libgomp is not fork-safe (as of 4.8.2):
|
||||
// http://gcc.gnu.org/bugzilla/show_bug.cgi?id=60035
|
||||
// Hence skip this test when OpenBLAS is built with OpenMP.
|
||||
{"Testing fork safety", test_fork_safety},
|
||||
#endif
|
||||
|
||||
CU_TEST_INFO_NULL,
|
||||
};
|
||||
|
||||
|
||||
123
utest/test_fork.c
Normal file
123
utest/test_fork.c
Normal file
@@ -0,0 +1,123 @@
|
||||
/*****************************************************************************
|
||||
Copyright (c) 2014, Lab of Parallel Software and Computational Science,ICSAS
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the ISCAS nor the names of its contributors may
|
||||
be used to endorse or promote products derived from this software
|
||||
without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
**********************************************************************************/
|
||||
|
||||
#ifndef OS_WINDOWS
|
||||
#include "common_utest.h"
|
||||
#include <sys/wait.h>
|
||||
#include <cblas.h>
|
||||
|
||||
void* xmalloc(size_t n)
|
||||
{
|
||||
void* tmp;
|
||||
tmp = malloc(n);
|
||||
if (tmp == NULL) {
|
||||
fprintf(stderr, "You are about to die\n");
|
||||
exit(1);
|
||||
} else {
|
||||
return tmp;
|
||||
}
|
||||
}
|
||||
|
||||
void check_dgemm(double *a, double *b, double *result, double *expected, int n)
|
||||
{
|
||||
int i;
|
||||
cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, n, n, n,
|
||||
1.0, a, n, b, n, 0.0, result, n);
|
||||
for(i = 0; i < n * n; ++i) {
|
||||
CU_ASSERT_DOUBLE_EQUAL(expected[i], result[i], CHECK_EPS);
|
||||
}
|
||||
}
|
||||
|
||||
void test_fork_safety(void)
|
||||
{
|
||||
int n = 1000;
|
||||
int i;
|
||||
|
||||
double *a, *b, *c, *d;
|
||||
size_t n_bytes;
|
||||
|
||||
pid_t fork_pid;
|
||||
pid_t fork_pid_nested;
|
||||
|
||||
n_bytes = sizeof(*a) * n * n;
|
||||
|
||||
a = xmalloc(n_bytes);
|
||||
b = xmalloc(n_bytes);
|
||||
c = xmalloc(n_bytes);
|
||||
d = xmalloc(n_bytes);
|
||||
|
||||
// Put ones in a and b
|
||||
for(i = 0; i < n * n; ++i) {
|
||||
a[i] = 1;
|
||||
b[i] = 1;
|
||||
}
|
||||
|
||||
// Compute a DGEMM product in the parent process prior to forking to
|
||||
// ensure that the OpenBLAS thread pool is initialized.
|
||||
cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, n, n, n,
|
||||
1.0, a, n, b, n, 0.0, c, n);
|
||||
|
||||
fork_pid = fork();
|
||||
if (fork_pid == -1) {
|
||||
CU_FAIL("Failed to fork process.");
|
||||
} else if (fork_pid == 0) {
|
||||
// Compute a DGEMM product in the child process to check that the
|
||||
// thread pool as been properly been reinitialized after the fork.
|
||||
check_dgemm(a, b, d, c, n);
|
||||
|
||||
// Nested fork to check that the pthread_atfork protection can work
|
||||
// recursively
|
||||
fork_pid_nested = fork();
|
||||
if (fork_pid_nested == -1) {
|
||||
CU_FAIL("Failed to fork process.");
|
||||
exit(1);
|
||||
} else if (fork_pid_nested == 0) {
|
||||
check_dgemm(a, b, d, c, n);
|
||||
exit(0);
|
||||
} else {
|
||||
check_dgemm(a, b, d, c, n);
|
||||
int child_status = 0;
|
||||
pid_t wait_pid = wait(&child_status);
|
||||
CU_ASSERT(wait_pid == fork_pid_nested);
|
||||
CU_ASSERT(WEXITSTATUS (child_status) == 0);
|
||||
exit(0);
|
||||
}
|
||||
} else {
|
||||
check_dgemm(a, b, d, c, n);
|
||||
// Wait for the child to finish and check the exit code.
|
||||
int child_status = 0;
|
||||
pid_t wait_pid = wait(&child_status);
|
||||
CU_ASSERT(wait_pid == fork_pid);
|
||||
CU_ASSERT(WEXITSTATUS (child_status) == 0);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
Reference in New Issue
Block a user