Compare commits

...

138 Commits

Author SHA1 Message Date
Zhang Xianyi
f773f492f3 Merge branch 'develop' 2014-06-10 21:55:47 +08:00
Zhang Xianyi
21a6b5f79e OpenBLAS 0.2.9 Version. 2014-06-10 21:55:19 +08:00
Zhang Xianyi
a40116de25 Fixed generating DLL bug. 2014-06-06 16:13:08 +08:00
Zhang Xianyi
b31ec99372 Fixed #374.
Merge branch 'TimothyGu-develop' into develop
2014-06-05 17:01:44 +08:00
Zhang Xianyi
0ac073fa94 Merge pull request #376 from wernsaar/develop
Merged some Lapack optimized functions
https://github.com/xianyi/OpenBLAS/wiki/Fixed-optimized-kernels-To-do-List
2014-05-26 04:46:06 -05:00
wernsaar
25e899b60b fixed function profile in zpotri.c 2014-05-25 09:15:22 +02:00
wernsaar
219bcb119d added lapack and lapacke timing libs by default 2014-05-24 15:53:25 +02:00
wernsaar
5664445543 changed threshold value for sep.in from 50.0 to 60.0 2014-05-23 17:26:50 +02:00
wernsaar
89da450800 enabled and tested optimized potri lapack functions 2014-05-23 12:14:30 +02:00
wernsaar
c26bbee489 enabled abd tested optimized trtri lapack functions 2014-05-23 10:55:39 +02:00
Timothy Gu
ced13574a0 Random "walk (a)round" --> "work-around" typo fixes
Signed-off-by: Timothy Gu <timothygu99@gmail.com>
2014-05-22 18:11:52 -07:00
Timothy Gu
fe858873af Add NO_STATIC variable which disables static lib installation
Static library is still built for shared lib generation.

Signed-off-by: Timothy Gu <timothygu99@gmail.com>
2014-05-22 18:06:26 -07:00
Timothy Gu
a8d4d1c4d3 Build import library for mingw
Signed-off-by: Timothy Gu <timothygu99@gmail.com>
2014-05-22 18:06:26 -07:00
wernsaar
c4ccb3fbb2 removed lapack/getri because it was never used 2014-05-21 14:21:19 +02:00
wernsaar
a748d3a75d enabled optimized trti2 lapack functions again 2014-05-21 11:02:07 +02:00
wernsaar
a5ab231ad4 enabled optimized complex lauum lapack functions again 2014-05-21 10:35:28 +02:00
wernsaar
dbaeea7b59 enabled lauu2 and lauum lapack functions again 2014-05-21 09:49:18 +02:00
Zhang Xianyi
10a16bd690 Refs #372. Fixed a lot of bugs about LAPACK testing.
As a walk round solution, we rolled back some kernels.

Please check https://github.com/xianyi/OpenBLAS/wiki/Fixed-optimized-kernels-To-do-List

Merge branch 'wernsaar-develop' into develop
2014-05-21 11:36:46 +08:00
Zhang Xianyi
406f5bd22b Merge branch 'develop' of https://github.com/wernsaar/OpenBLAS into wernsaar-develop
Conflicts:
	kernel/arm/KERNEL.ARMV6
2014-05-21 11:24:39 +08:00
wernsaar
a0ae53966f removed debug flag from Makefile.rule 2014-05-19 15:57:18 +02:00
wernsaar
0d75f3b6a2 enabled and tested optimized gesv lapack functions 2014-05-19 14:44:53 +02:00
wernsaar
abad6f66d6 marked trti2.c and ztrti2.c as bad 2014-05-19 13:50:02 +02:00
wernsaar
2ff66e661d enabled and tested optimized laswp lapack function 2014-05-19 13:35:32 +02:00
wernsaar
5e55034922 marked zlauu2.c and zlauum.c as bad 2014-05-19 12:53:22 +02:00
wernsaar
9a9e810239 marked trtri.c and ztrtri as bad 2014-05-19 12:42:52 +02:00
wernsaar
45be9ac111 moved trtri.c and ztrtri.c to the directory lapack 2014-05-19 12:29:29 +02:00
wernsaar
9f201558c9 marked lauu2.c and lauum.c as bad 2014-05-19 12:00:16 +02:00
wernsaar
d4237cb7f3 marked larf.c as obsolete 2014-05-19 11:23:17 +02:00
Zhang Xianyi
d2a8ff4b04 Merge branch 'TimothyGu-develop' into develop 2014-05-19 10:37:20 +08:00
Timothy Gu
f331cb1a76 Remove code for downloading lapack tarball and the patches themselves
They are not used anymore since 3eb5af1.

Signed-off-by: Timothy Gu <timothygu99@gmail.com>
2014-05-18 19:09:26 -07:00
Timothy Gu
9ed981c5dc Remove unused dll2 target
Signed-off-by: Timothy Gu <timothygu99@gmail.com>
2014-05-18 18:54:38 -07:00
wernsaar
aaa9d7fbf8 marked potri functions as bad because a lot of errors 2014-05-18 23:41:13 +02:00
wernsaar
ebc95e6f11 enabled and tested optimized potf2 lapack functions 2014-05-18 22:41:43 +02:00
wernsaar
61a2c50e8e enabled and tested optimized getf2 lapack functions 2014-05-18 22:21:16 +02:00
wernsaar
4f98f8c9b3 enabled and tested optimized potrf lapack functions 2014-05-18 21:42:37 +02:00
wernsaar
536875d463 enabled and tested optimized getrs lapack functions 2014-05-18 21:13:56 +02:00
wernsaar
65f2fba4c3 enabled and tested optimized cgetrf lapack function 2014-05-18 20:32:27 +02:00
wernsaar
eea6f51df9 enabled and tested optimized sgetrf lapack function 2014-05-18 20:01:23 +02:00
wernsaar
6fc4646709 enabled and tested optimized zgetrf lapack function 2014-05-18 19:36:32 +02:00
wernsaar
ac029f81b3 enabled and tested optimized dgetrf function 2014-05-18 19:07:51 +02:00
wernsaar
c0cf875a82 added optimized lapack files from OpenBLAS 2014-05-18 14:09:22 +02:00
Timothy Gu
b6d904838e Remove routines for generating exports/symbol.S
Signed-off-by: Timothy Gu <timothygu99@gmail.com>
2014-05-17 16:02:36 -07:00
Timothy Gu
5379eff022 Remove routines for making exports/linux.def
Signed-off-by: Timothy Gu <timothygu99@gmail.com>
2014-05-17 16:01:30 -07:00
wernsaar
aaddb05411 bugfix for ARMV6 2014-05-17 13:00:36 +02:00
wernsaar
e52532a9fe enable debug for lapack testing 2014-05-17 11:18:26 +02:00
wernsaar
e826a5a6af some modifications regarding lapack test 2014-05-16 20:37:41 +02:00
wernsaar
165d5436b5 changed threshold to 50.0 2014-05-16 20:34:48 +02:00
wernsaar
409b52255c changed default optimization flag from O3 to O2 for ARM 2014-05-16 14:36:24 +02:00
wernsaar
5953972a5a changed threshold for 50.0 to 54.0 in svd.in 2014-05-16 14:32:10 +02:00
wernsaar
d751224ea4 changed YIELDING for BULLDOZER 2014-05-15 11:37:38 +02:00
wernsaar
4a5938b5cc Modified lapack-test, using lapack_testing.py to run tests 2014-05-14 15:16:21 +02:00
wernsaar
d18bc5468f added FCOMMON_OPT for lapack 2014-05-14 15:01:03 +02:00
wernsaar
8877c6db51 changed label lapack-test 2014-05-14 13:08:05 +02:00
wernsaar
c38379c9dd bugfixes for ARM regarding lapack tests 2014-05-14 13:03:45 +02:00
wernsaar
a0b07c1440 bugfixs for ARM regarding lapack tests 2014-05-14 12:59:20 +02:00
wernsaar
43fbdb7a5a added ARMV5 as reference platform 2014-05-13 17:25:19 +02:00
wernsaar
777cebc8c7 added ZERO check to zscal.c because bug in lapack-testing 2014-05-13 16:31:00 +02:00
wernsaar
aa5c73e20f added ZERO check to zscal.c because bug in lapack-test 2014-05-13 16:25:21 +02:00
wernsaar
5e5ef28ca0 added ZERO check because bug in lapack-test 2014-05-13 15:36:03 +02:00
wernsaar
650ed34336 added ZERO check because bug in lapack-test 2014-05-13 15:31:36 +02:00
wernsaar
189ca1bcee removed lapack objects from interface/Makefile 2014-05-11 12:09:34 +02:00
wernsaar
4c1caa7454 checked, that zhpr is OK 2014-05-11 11:21:23 +02:00
wernsaar
7bb19cf90e checked, that zhpr2 is OK 2014-05-11 11:11:05 +02:00
wernsaar
2a94aaaf2e checked, that zhpmv is OK 2014-05-11 10:46:48 +02:00
wernsaar
5e4b4f6712 checked, that zher is OK 2014-05-11 10:36:34 +02:00
wernsaar
47e8950e77 checked, that zher2 is OK 2014-05-11 10:26:05 +02:00
wernsaar
f45f2c8465 checked, that zhemv is OK 2014-05-11 10:15:06 +02:00
wernsaar
10780ae650 marked zhbmv as smp bug 2014-05-11 09:58:16 +02:00
wernsaar
9bae50f700 checked, that zscal and zswap are OK 2014-05-11 09:30:18 +02:00
wernsaar
0758c1a374 checked, that trtri is OK 2014-05-11 09:11:20 +02:00
wernsaar
564ff395f6 checked, that trsm is OK 2014-05-11 08:59:33 +02:00
wernsaar
7fb78a5f01 checked, that trmv is OK 2014-05-11 08:47:44 +02:00
wernsaar
8204ab4aa8 checked, that tpmv is OK 2014-05-11 08:35:34 +02:00
wernsaar
48d1325784 checked, that tbmv is OK 2014-05-11 08:22:00 +02:00
wernsaar
57bbc586ef checked, that syrk is OK 2014-05-11 08:10:25 +02:00
wernsaar
bfef3c5dd1 checked, that syr is OK 2014-05-11 07:46:22 +02:00
wernsaar
d972f4a60a check, that syr2k is OK 2014-05-11 01:04:46 +02:00
wernsaar
eebce01cf2 checked, that syr2 is OK 2014-05-11 00:48:49 +02:00
wernsaar
e2c39a4a8e checked, that symv is OK 2014-05-11 00:36:56 +02:00
wernsaar
1e8e6faa7e checked, that symm is OK 2014-05-11 00:22:40 +02:00
wernsaar
c7eb901496 checked, that spr is OK 2014-05-11 00:07:07 +02:00
wernsaar
2ed03ea0a2 checked, that spr2 is OK 2014-05-10 23:55:43 +02:00
wernsaar
de00e2937a marked as smp bug 2014-05-10 23:18:35 +02:00
wernsaar
e187b5e9d0 removed gesv.c from interface 2014-05-10 22:55:44 +02:00
wernsaar
0947fc1c89 checked, that ger is OK 2014-05-10 22:49:53 +02:00
wernsaar
4d61607c9e cheched, that gbmv is OK 2014-05-10 22:38:09 +02:00
wernsaar
781bfb6e66 checked, that gemv is OK 2014-05-10 22:24:05 +02:00
wernsaar
79a82ba7f1 checked that axpy is OK 2014-05-10 22:09:49 +02:00
wernsaar
d63bd7fa5e checked that gemm.c is OK 2014-05-10 21:51:44 +02:00
wernsaar
e265c4ec86 added C files in interface 2014-05-10 21:27:47 +02:00
wernsaar
0732238213 removed all C files in interface 2014-05-10 21:25:17 +02:00
wernsaar
5f3b68b4d4 replaced sgemm and cgemm kernels because lapack bugs 2014-05-10 11:24:07 +02:00
wernsaar
2424af62fd replaced dgemm-kernel because bug in lapack 2014-05-10 10:52:37 +02:00
wernsaar
6b252033ae changed test ratio from 30.0 to 40.0 2014-05-09 13:17:47 +02:00
wernsaar
320c805905 fixed incorrect parameter 2 errors 2014-05-08 11:06:32 +02:00
wernsaar
e673848a9b added log file for lapack development 2014-05-07 14:36:49 +02:00
wernsaar
a35a1a9ae7 changed makefiles for lapack development 2014-05-07 11:33:02 +02:00
wernsaar
793509a3b5 replaced files for sdot, sgemv_n and sgemv_t for bug #348 2014-05-06 15:29:39 +02:00
Zhang Xianyi
020f36f970 Merge pull request #367 from xantares/patch-2
Makefile typo
2014-05-02 17:55:08 +08:00
Zhang Xianyi
9d0cc399ac Merge pull request #366 from xantares/patch-1
Install dll to prefix/bin instead of prefix/lib
2014-05-02 17:54:22 +08:00
wernsaar
025fc914cc fixed 2 bugs as reported by Brendan Tracey 2014-05-02 11:34:26 +02:00
xantares
43bb633096 Update Makefile 2014-05-02 08:54:22 +02:00
xantares
187237b622 Install dll to prefix/bin instead of prefix/bin 2014-05-01 21:48:26 +02:00
Zhang Xianyi
66198faab6 Refs #63. delete prefix for mingw64 toolchain. 2014-04-27 13:05:26 +08:00
wernsaar
47b22763f8 reduced stack usage on windows to 16K 2014-04-24 14:09:26 +02:00
Zhang Xianyi
4d42368214 Refs #355. Fixed ARM detection bug. 2014-03-22 15:08:18 +08:00
Zhang Xianyi
3e068e78e2 Merge branch 'release-0.2.9' 2014-03-06 17:45:31 +08:00
Zhang Xianyi
1140c489c9 #351. Release 0.2.9 rc2. 2014-03-06 17:44:03 +08:00
Zhang Xianyi
804a306313 Merge branch 'develop' of github.com:xianyi/OpenBLAS into develop 2014-03-06 12:53:10 +08:00
wernsaar
9db0fb8b02 bugfix for sdsdot 2014-02-28 14:59:36 +01:00
wernsaar
692b14cecd rewrote rotmg.c instead of modifying very old code 2014-02-28 14:43:28 +01:00
Zhang Xianyi
322a178430 Merge pull request #345 from ogrisel/fix-non-smp-server-pthread_atfork-reference
Do not reference pthread_atfork in non-SMP_SERVER mode
2014-02-26 00:54:01 +08:00
Zhang Xianyi
f80f29e256 Merge branch 'develop' of github.com:xianyi/OpenBLAS into develop 2014-02-26 00:38:29 +08:00
Olivier Grisel
2c556f093a Add cast to function pointer to remove warning 2014-02-25 11:08:32 +01:00
Olivier Grisel
3b027d2528 Do not reference pthread_atfork in non-SMP_SERVER mode 2014-02-25 11:08:32 +01:00
Zhang Xianyi
57526cae99 Merge pull request #346 from ogrisel/fix-openblas_config.h
More robust OPENBLAS_ prefixing of macros in openblas_config.h
2014-02-25 06:43:30 +08:00
Olivier Grisel
5de5ef118c More robust OPENBLAS_ prefixing of macros in openblas_config.h 2014-02-24 13:21:06 +01:00
Zhang Xianyi
b161ac29e3 Merge branch 'develop' of github.com:xianyi/OpenBLAS into develop 2014-02-21 07:15:35 +08:00
Zhang Xianyi
b20ee6924a Merge pull request #343 from ogrisel/fix-294-fork-safe-pthread
FIX #294: fork-safe pthread mode
2014-02-20 06:58:27 +08:00
Olivier Grisel
49bd98f410 Do not reference pthread_atfork under windows 2014-02-19 19:25:48 +01:00
Olivier Grisel
a14f98ca7c Make sure that fork_test.c is not built under windows 2014-02-19 19:14:13 +01:00
Olivier Grisel
138a841390 FIX #294: make OpenBLAS thread-pool resilient to fork via pthread_atfork 2014-02-19 19:01:15 +01:00
Olivier Grisel
046e4013cb Revert "Refs #294. Used pthread_atfork to avoid hang after a Unix fork."
This reverts commit 3617c22a56.
2014-02-19 18:32:54 +01:00
Zhang Xianyi
dd2d3e61ab Merge branch 'develop' of github.com:xianyi/OpenBLAS into develop 2014-02-18 15:53:57 +08:00
Zhang Xianyi
3617c22a56 Refs #294. Used pthread_atfork to avoid hang after a Unix fork.
The problem is the mutex we used in blas_server. Thus, we must clear
the mutex before the fork and re-init them at parent and child process.

If you used OpenMP, GOMP has the same problem by now. Please try other OpenMP
implemantation.
2014-02-18 15:36:04 +08:00
wernsaar
f9daebba0a checked in bugfixes for ARM 2014-02-16 11:45:47 +01:00
Zhang Xianyi
9a557e90da Refs #340. Fixed SEGFAULT bug of dgemv_n on OSX. 2014-02-15 23:23:15 +08:00
wangqian
2d557eb1e0 Fixed computational error of dgemv_n. 2014-02-04 21:47:51 +08:00
Zhang Xianyi
a789b77b75 Used SwitchToThread for YIELDING on AMD piledriver with Windows. 2014-01-28 16:40:19 +08:00
Zhang Xianyi
75acf96d94 Refs #329 #287. Only disable -fopenmp for LAPACK Fortran codes on Windows. 2014-01-24 15:39:46 +08:00
Zhang Xianyi
8c7687b419 Refs #338. Added OPENBLAS_VERBOSE environment variable on runtime
By default, OpenBLAS doesn't output the warning message. You can set
OPENBLAS_VERBOSE (e.g. export OPENBLAS_VERBOSE=1) to enable the warning
message on runtime.
2014-01-24 02:05:59 +08:00
Zhang Xianyi
3e0a7b931c Refs #333. Detect the wrong parameter for zherk/zher2k. 2014-01-21 01:27:51 +08:00
Zhang Xianyi
306d9f2e35 Fixed #334 a makefile bug in lapacke. 2014-01-19 23:28:11 +08:00
Zhang Xianyi
7b8604ea29 Refs #335. Added the fallback of L2 size detection for some virtual machines. 2014-01-08 11:16:21 +08:00
Zhang Xianyi
ab69443bd4 Refs #332. Added addtional Intel Ivy Bridge and Haswell CPU-id. 2014-01-05 23:44:29 +08:00
Zhang Xianyi
b263e096af Refs #307. Delete debug printf. 2013-12-31 15:53:13 +08:00
Zhang Xianyi
05bb391c3a Refs #330. Fixed the compatible issue with clang on Mac OSX. 2013-12-16 20:31:17 +08:00
Zhang Xianyi
0ab080987d Release 0.2.9 rc1 version. 2013-12-13 20:48:05 +08:00
97 changed files with 2043 additions and 7969 deletions

View File

@@ -10,13 +10,26 @@
* Optimize BLAS3 on ICT Loongson 3A.
* Optimize BLAS3 on Intel Sandy Bridge.
* Werner Saar <wernsaar@googlemail.com>
* [2013-03-04] Optimize AVX and FMA4 DGEMM on AMD Bulldozer
* [2013-04-27] Optimize AVX and FMA4 TRSM on AMD Bulldozer
* [2013-06-09] Optimize AVX and FMA4 SGEMM on AMD Bulldozer
* [2013-06-11] Optimize AVX and FMA4 ZGEMM on AMD Bulldozer
* [2013-06-12] Optimize AVX and FMA4 CGEMM on AMD Bulldozer
* [2013-06-16] Optimize dgemv_n kernel on AMD Bulldozer
* [2013-06-20] Optimize ddot, daxpy kernel on AMD Bulldozer
* [2013-06-21] Optimize dcopy kernel on AMD Bulldozer
* Porting and Optimization on ARM Cortex-A9
* Optimization on AMD Piledriver
* Optimization on Intel Haswell
## Previous Developers
* Zaheer Chothia <zaheer.chothia@gmail.com>
* Improve the compatibility about complex number
* Build LAPACKE: C interface to LAPACK
* Improve the windows build.
## Previous Developers
* Chen Shaohu <huhumartinwar@gmail.com>
* Optimize GEMV on the Loongson 3A processor.
@@ -52,16 +65,7 @@ In chronological order:
* Sébastien Villemot <sebastien@debian.org>
* [2012-11-14] Fix compilation with TARGET=GENERIC. Patch applied to Debian package.
* Werner Saar <wernsaar@googlemail.com>
* [2013-03-04] Optimize AVX and FMA4 DGEMM on AMD Bulldozer
* [2013-04-27] Optimize AVX and FMA4 TRSM on AMD Bulldozer
* [2013-06-09] Optimize AVX and FMA4 SGEMM on AMD Bulldozer
* [2013-06-11] Optimize AVX and FMA4 ZGEMM on AMD Bulldozer
* [2013-06-12] Optimize AVX and FMA4 CGEMM on AMD Bulldozer
* [2013-06-16] Optimize dgemv_n kernel on AMD Bulldozer
* [2013-06-20] Optimize ddot, daxpy kernel on AMD Bulldozer
* [2013-06-21] Optimize dcopy kernel on AMD Bulldozer
* [2013-08-28] Avoid failure on qemu guests declaring an Athlon CPU without 3dnow!
* Kang-Che Sung <Explorer09@gmail.com>
* [2013-05-17] Fix typo in the document. Re-order the architecture list in getarch.c.
@@ -79,10 +83,34 @@ In chronological order:
* [2013-07-11] create openblas_get_parallel to retrieve information which parallelization
model is used by OpenBLAS.
* Elliot Saba <staticfloat@gmail.com>
* [2013-07-22] Add in return value for `interface/trtri.c`
* Sébastien Fabbro <bicatali@gentoo.org>
* [2013-07-24] Modify makefile to respect user's LDFLAGS
* [2013-07-24] Add stack markings for GNU as arch-independent for assembler files
* Viral B. Shah <viral@mayin.org>
* [2013-08-21] Patch LAPACK XLASD4.f as discussed in JuliaLang/julia#2340
* Lars Buitinck <https://github.com/larsmans>
* [2013-08-28] get rid of the generated cblas_noconst.h file
* [2013-08-28] Missing threshold in gemm.c
* [2013-08-28] fix default prefix handling in makefiles
* yieldthought <https://github.com/yieldthought>
* [2013-10-08] Remove -Wl,--retain-symbols-file from dynamic link line to fix tool support
* Keno Fischer <https://github.com/loladiro>
* [2013-10-23] Use FC instead of CC to link the dynamic library on OS X
* Christopher Meng <cickumqt@gmail.com>
* [2013-12-09] Add DESTDIR support for easier building on RPM based distros.
Use install command instead of cp to install files with permissions control.
* Lucas Beyer <lucasb.eyer.be@gmail.com>
* [2013-12-10] Added support for NO_SHARED in make install.
* carlkl <https://github.com/carlkl>
* [2013-12-13] Fixed LAPACKE building bug on Windows

View File

@@ -1,4 +1,42 @@
OpenBLAS ChangeLog
====================================================================
Version 0.2.9
10-Jun-2014
common:
* Improved the result for LAPACK testing. (#372)
* Installed DLL to prefix/bin instead of prefix/lib. (#366)
* Build import library on Windows.(#374)
x86/x86-64:
* To improve LAPACK testing, we fallback some kernels. (#372)
https://github.com/xianyi/OpenBLAS/wiki/Fixed-optimized-kernels-To-do-List
====================================================================
Version 0.2.9.rc2
06-Mar-2014
common:
* Added OPENBLAS_VERBOSE environment variable.(#338)
* Make OpenBLAS thread-pool resilient to fork via pthread_atfork.
(#294, Thank Olivier Grisel)
* Rewrote rotmg
* Fixed sdsdot bug.
x86/x86-64:
* Detect Intel Haswell for new Macbook.
====================================================================
Version 0.2.9.rc1
13-Jan-2013
common:
* Update LAPACK to 3.5.0 version
* Fixed compatiable issues with Clang and Pathscale compilers.
x86/x86-64:
* Optimization on Intel Haswell.
* Enable optimization kernels on AMD Bulldozer and Piledriver.
ARM:
* Support ARMv6 and ARMv7 ISA.
* Optimization on ARM Cortex-A9.
====================================================================
Version 0.2.8
01-Aug-2013
@@ -24,7 +62,7 @@ common:
parallelization model is used by OpenBLAS. (Thank grisuthedragon)
* Detect LLVM/Clang compiler. The default compiler is Clang on Mac OS X.
* Change LIBSUFFIX from .lib to .a on windows.
* A walk round for dtrti_U single thread bug. Replace it with LAPACK codes. (#191)
* A work-around for dtrti_U single thread bug. Replace it with LAPACK codes. (#191)
x86/x86-64:
* Optimize c/zgemm, trsm, dgemv_n, ddot, daxpy, dcopy on
@@ -257,7 +295,7 @@ x86/x86_64:
* Fixed #28 a wrong result of dsdot on x86_64.
* Fixed #32 a SEGFAULT bug of zdotc with gcc-4.6.
* Fixed #33 ztrmm bug on Nehalem.
* Walk round #27 the low performance axpy issue with small imput size & multithreads.
* Work-around #27 the low performance axpy issue with small imput size & multithreads.
MIPS64:
* Fixed #28 a wrong result of dsdot on Loongson3A/MIPS64.
@@ -281,7 +319,7 @@ common:
x86/x86_64:
* On x86 32bits, fixed a bug in zdot_sse2.S line 191. This would casue
zdotu & zdotc failures.Instead,Walk around it. (Refs issue #8 #9 on github)
zdotu & zdotc failures. Instead, work-around it. (Refs issue #8 #9 on github)
* Modified ?axpy functions to return same netlib BLAS results
when incx==0 or incy==0 (Refs issue #7 on github)
* Modified ?swap functions to return same netlib BLAS results

View File

@@ -57,7 +57,7 @@ endif
ifeq ($(USE_OPENMP), 1)
@echo
@echo " Use OpenMP in the multithreading. Becasue of ignoring OPENBLAS_NUM_THREADS and GOTO_NUM_THREADS flags, "
@echo " Use OpenMP in the multithreading. Because of ignoring OPENBLAS_NUM_THREADS and GOTO_NUM_THREADS flags, "
@echo " you should use OMP_NUM_THREADS environment variable to control the number of threads."
@echo
endif
@@ -128,6 +128,11 @@ ifeq ($(CORE), UNKOWN)
endif
ifeq ($(NOFORTRAN), 1)
$(error OpenBLAS: Detecting fortran compiler failed. Please install fortran compiler, e.g. gfortran, ifort, openf90.)
endif
ifeq ($(NO_STATIC), 1)
ifeq ($(NO_SHARED), 1)
$(error OpenBLAS: neither static nor shared are enabled.)
endif
endif
@-ln -fs $(LIBNAME) $(LIBPREFIX).$(LIBSUFFIX)
@for d in $(SUBDIRS) ; \
@@ -207,6 +212,7 @@ else
netlib : lapack_prebuild
ifndef NOFORTRAN
@$(MAKE) -C $(NETLIB_LAPACK_DIR) lapacklib
@$(MAKE) -C $(NETLIB_LAPACK_DIR) tmglib
endif
ifndef NO_LAPACKE
@$(MAKE) -C $(NETLIB_LAPACK_DIR) lapackelib
@@ -230,45 +236,21 @@ ifndef NOFORTRAN
-@echo "ARCHFLAGS = -ru" >> $(NETLIB_LAPACK_DIR)/make.inc
-@echo "RANLIB = $(RANLIB)" >> $(NETLIB_LAPACK_DIR)/make.inc
-@echo "LAPACKLIB = ../$(LIBNAME)" >> $(NETLIB_LAPACK_DIR)/make.inc
-@echo "TMGLIB = ../$(LIBNAME)" >> $(NETLIB_LAPACK_DIR)/make.inc
-@echo "BLASLIB = ../../../$(LIBNAME)" >> $(NETLIB_LAPACK_DIR)/make.inc
-@echo "LAPACKELIB = ../$(LIBNAME)" >> $(NETLIB_LAPACK_DIR)/make.inc
-@echo "LAPACKLIB_P = ../$(LIBNAME_P)" >> $(NETLIB_LAPACK_DIR)/make.inc
-@echo "SUFFIX = $(SUFFIX)" >> $(NETLIB_LAPACK_DIR)/make.inc
-@echo "PSUFFIX = $(PSUFFIX)" >> $(NETLIB_LAPACK_DIR)/make.inc
-@echo "CEXTRALIB = $(EXTRALIB)" >> $(NETLIB_LAPACK_DIR)/make.inc
ifeq ($(F_COMPILER), GFORTRAN)
-@echo "TIMER = INT_ETIME" >> $(NETLIB_LAPACK_DIR)/make.inc
else
-@echo "TIMER = NONE" >> $(NETLIB_LAPACK_DIR)/make.inc
endif
-@cat make.inc >> $(NETLIB_LAPACK_DIR)/make.inc
endif
lapack-3.4.2 : lapack-3.4.2.tgz
ifndef NOFORTRAN
ifndef NO_LAPACK
@if test `$(MD5SUM) $< | $(AWK) '{print $$1}'` = 61bf1a8a4469d4bdb7604f5897179478; then \
echo $(TAR) zxf $< ;\
$(TAR) zxf $< && (cd $(NETLIB_LAPACK_DIR); $(PATCH) -p1 < ../patch.for_lapack-3.4.2) ;\
rm -f $(NETLIB_LAPACK_DIR)/lapacke/make.inc ;\
else \
rm -rf $(NETLIB_LAPACK_DIR) ;\
echo " Cannot download lapack-3.4.2.tgz or the MD5 check sum is wrong (Please use orignal)."; \
exit 1; \
fi
endif
endif
LAPACK_URL=http://www.netlib.org/lapack/lapack-3.4.2.tgz
lapack-3.4.2.tgz :
ifndef NOFORTRAN
#http://stackoverflow.com/questions/7656425/makefile-ifeq-logical-or
ifeq ($(OSNAME), $(filter $(OSNAME),Darwin NetBSD))
curl -O $(LAPACK_URL);
else
ifeq ($(OSNAME), FreeBSD)
fetch $(LAPACK_URL);
else
wget -O $@ $(LAPACK_URL);
endif
endif
endif
large.tgz :
ifndef NOFORTRAN
if [ ! -a $< ]; then
@@ -287,17 +269,15 @@ lapack-timing : large.tgz timing.tgz
ifndef NOFORTRAN
(cd $(NETLIB_LAPACK_DIR); $(TAR) zxf ../timing.tgz TIMING)
(cd $(NETLIB_LAPACK_DIR)/TIMING; $(TAR) zxf ../../large.tgz )
make -C $(NETLIB_LAPACK_DIR) tmglib
make -C $(NETLIB_LAPACK_DIR)/TIMING
endif
lapack-test :
$(MAKE) -C $(NETLIB_LAPACK_DIR) tmglib
$(MAKE) -C $(NETLIB_LAPACK_DIR)/TESTING xeigtstc xeigtstd xeigtsts xeigtstz xlintstc xlintstd xlintstds xlintsts xlintstz xlintstzc
@rm -f $(NETLIB_LAPACK_DIR)/TESTING/*.out
$(MAKE) -j 1 -C $(NETLIB_LAPACK_DIR)/TESTING
$(GREP) failed $(NETLIB_LAPACK_DIR)/TESTING/*.out
(cd $(NETLIB_LAPACK_DIR)/TESTING && rm -f x* *.out)
make -j 1 -C $(NETLIB_LAPACK_DIR)/TESTING xeigtstc xeigtstd xeigtsts xeigtstz xlintstc xlintstd xlintstds xlintstrfd xlintstrfz xlintsts xlintstz xlintstzc xlintstrfs xlintstrfc
(cd $(NETLIB_LAPACK_DIR); ./lapack_testing.py -r )
dummy :
@@ -323,4 +303,5 @@ endif
@$(MAKE) -C $(NETLIB_LAPACK_DIR) clean
@rm -f $(NETLIB_LAPACK_DIR)/make.inc $(NETLIB_LAPACK_DIR)/lapacke/include/lapacke_mangling.h
@rm -f *.grd Makefile.conf_last config_last.h
@(cd $(NETLIB_LAPACK_DIR)/TESTING && rm -f x* *.out testing_results.txt)
@echo Done.

View File

@@ -10,3 +10,9 @@ FCOMMON_OPT += -marm -mfpu=vfp -mfloat-abi=hard -march=armv6
endif
ifeq ($(CORE), ARMV5)
CCOMMON_OPT += -marm -mfpu=vfp -mfloat-abi=hard -march=armv6
FCOMMON_OPT += -marm -mfpu=vfp -mfloat-abi=hard -march=armv6
endif

View File

@@ -7,6 +7,7 @@ PREFIX ?= /opt/OpenBLAS
OPENBLAS_INCLUDE_DIR := $(PREFIX)/include
OPENBLAS_LIBRARY_DIR := $(PREFIX)/lib
OPENBLAS_BINARY_DIR := $(PREFIX)/bin
OPENBLAS_BUILD_DIR := $(CURDIR)
.PHONY : install
@@ -19,11 +20,12 @@ install : lib.grd
@-mkdir -p $(DESTDIR)$(PREFIX)
@-mkdir -p $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)
@-mkdir -p $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)
@-mkdir -p $(DESTDIR)$(OPENBLAS_BINARY_DIR)
@echo Generating openblas_config.h in $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)
#for inc
@echo \#ifndef OPENBLAS_CONFIG_H > $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/openblas_config.h
@echo \#define OPENBLAS_CONFIG_H >> $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/openblas_config.h
@awk '{print $$1, "OPENBLAS_"$$2, $$3}' config_last.h >> $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/openblas_config.h
@awk 'NF {print $$1, "OPENBLAS_"$$2, $$3}' config_last.h >> $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/openblas_config.h
@echo \#define OPENBLAS_VERSION \" OpenBLAS $(VERSION) \" >> $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/openblas_config.h
@cat openblas_config_template.h >> $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/openblas_config.h
@echo \#endif \/\* OPENBLAS_CONFIG_H \*\/ >> $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/openblas_config.h
@@ -49,10 +51,12 @@ ifndef NO_LAPACKE
endif
#for install static library
ifndef NO_STATIC
@echo Copying the static library to $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)
@install -pm644 $(LIBNAME) $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)
@cd $(DESTDIR)$(OPENBLAS_LIBRARY_DIR) ; \
ln -fs $(LIBNAME) $(LIBPREFIX).$(LIBSUFFIX)
endif
#for install shared library
ifndef NO_SHARED
@echo Copying the shared library to $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)
@@ -78,10 +82,11 @@ ifeq ($(OSNAME), Darwin)
@-ln -fs $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)/$(LIBDYNNAME) $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)/$(LIBPREFIX).dylib
endif
ifeq ($(OSNAME), WINNT)
@-cp $(LIBDLLNAME) $(OPENBLAS_LIBRARY_DIR)
@-cp $(LIBDLLNAME) $(OPENBLAS_BINARY_DIR)
@-cp $(LIBPREFIX).lib $(OPENBLAS_LIBRARY_DIR)
endif
ifeq ($(OSNAME), CYGWIN_NT)
@-cp $(LIBDLLNAME) $(OPENBLAS_LIBRARY_DIR)
@-cp $(LIBDLLNAME) $(OPENBLAS_BINARY_DIR)
endif
endif

View File

@@ -3,7 +3,7 @@
#
# This library's version
VERSION = 0.2.8
VERSION = 0.2.9
# If you set the suffix, the library name will be libopenblas_$(LIBNAMESUFFIX).a
# and libopenblas_$(LIBNAMESUFFIX).so. Meanwhile, the soname in shared library
@@ -48,6 +48,9 @@ VERSION = 0.2.8
# automatically detected by the the script.
# NUM_THREADS = 24
# if you don't need to install the static library, please comment it in.
# NO_STATIC = 1
# if you don't need generate the shared library, please comment it in.
# NO_SHARED = 1
@@ -76,10 +79,10 @@ VERSION = 0.2.8
# Unfortunately most of kernel won't give us high quality buffer.
# BLAS tries to find the best region before entering main function,
# but it will consume time. If you don't like it, you can disable one.
# NO_WARMUP = 1
NO_WARMUP = 1
# If you want to disable CPU/Memory affinity on Linux.
# NO_AFFINITY = 1
NO_AFFINITY = 1
# Don't use AVX kernel on Sandy Bridge. It is compatible with old compilers
# and OS. However, the performance is low.
@@ -129,6 +132,9 @@ VERSION = 0.2.8
# The default -O2 is enough.
# COMMON_OPT = -O2
# gfortran option for LAPACK
FCOMMON_OPT = -frecursive
# Profiling flags
COMMON_PROF = -pg

View File

@@ -158,6 +158,7 @@ endif
ifeq ($(OSNAME), Linux)
EXTRALIB += -lm
NO_EXPRECISION = 1
endif
ifeq ($(OSNAME), AIX)
@@ -846,19 +847,6 @@ ifeq ($(DEBUG), 1)
COMMON_OPT += -g
endif
ifndef COMMON_OPT
ifeq ($(ARCH), arm)
COMMON_OPT = -O3
endif
endif
ifndef COMMON_OPT
ifeq ($(ARCH), arm64)
COMMON_OPT = -O3
endif
endif
ifndef COMMON_OPT
COMMON_OPT = -O2
endif
@@ -872,8 +860,14 @@ override FPFLAGS += $(COMMON_OPT) $(FCOMMON_OPT) $(COMMON_PROF)
#MAKEOVERRIDES =
#For LAPACK Fortran codes.
#Disable -fopenmp for LAPACK Fortran codes on Windows.
ifdef OS_WINDOWS
LAPACK_FFLAGS := $(filter-out -fopenmp -mp -openmp -xopenmp=parallel,$(FFLAGS))
LAPACK_FPFLAGS := $(filter-out -fopenmp -mp -openmp -xopenmp=parallel,$(FPFLAGS))
else
LAPACK_FFLAGS := $(FFLAGS)
LAPACK_FPFLAGS := $(FPFLAGS)
endif
LAPACK_CFLAGS = $(CFLAGS)
LAPACK_CFLAGS += -DHAVE_LAPACK_CONFIG_H

View File

@@ -310,15 +310,23 @@ typedef int blasint;
#define YIELDING SwitchToThread()
#endif
#if defined(ARMV7) || defined(ARMV6) || defined(ARMV8)
#if defined(ARMV7) || defined(ARMV6) || defined(ARMV8) || defined(ARMV5)
#define YIELDING asm volatile ("nop;nop;nop;nop;nop;nop;nop;nop; \n");
#endif
#ifdef PILEDRIVER
#ifdef BULLDOZER
#ifndef YIELDING
#define YIELDING __asm__ __volatile__ ("nop;nop;nop;nop;nop;nop;nop;nop;\n");
#endif
#endif
#ifdef PILEDRIVER
#ifndef YIELDING
#define YIELDING __asm__ __volatile__ ("nop;nop;nop;nop;nop;nop;nop;nop;\n");
#endif
#endif
#ifndef YIELDING
#define YIELDING sched_yield()
#endif

View File

@@ -17,7 +17,7 @@
#define SCOPY_K scopy_k
#define SDOTU_K sdot_k
#define SDOTC_K sdot_k
#define SDSDOT_K sdot_k
#define SDSDOT_K dsdot_k
#define DSDOT_K dsdot_k
#define SNRM2_K snrm2_k
#define SSCAL_K sscal_k
@@ -162,7 +162,7 @@
#define SCOPY_K gotoblas -> scopy_k
#define SDOTU_K gotoblas -> sdot_k
#define SDOTC_K gotoblas -> sdot_k
#define SDSDOT_K gotoblas -> sdot_k
#define SDSDOT_K gotoblas -> dsdot_k
#define DSDOT_K gotoblas -> dsdot_k
#define SNRM2_K gotoblas -> snrm2_k
#define SSCAL_K gotoblas -> sscal_k

View File

@@ -1051,11 +1051,14 @@ int get_cpuname(void){
case 3:
switch (model) {
case 10:
case 14:
// Ivy Bridge
if(support_avx())
return CPUTYPE_SANDYBRIDGE;
else
return CPUTYPE_NEHALEM;
case 12:
case 15:
if(support_avx())
return CPUTYPE_HASWELL;
else
@@ -1065,6 +1068,7 @@ int get_cpuname(void){
case 4:
switch (model) {
case 5:
case 6:
if(support_avx())
return CPUTYPE_HASWELL;
else
@@ -1457,11 +1461,13 @@ int get_coretype(void){
case 3:
switch (model) {
case 10:
case 14:
if(support_avx())
return CORE_SANDYBRIDGE;
else
return CORE_NEHALEM; //OS doesn't support AVX
case 12:
case 15:
if(support_avx())
return CORE_HASWELL;
else
@@ -1471,6 +1477,7 @@ int get_coretype(void){
case 4:
switch (model) {
case 5:
case 6:
if(support_avx())
return CORE_HASWELL;
else
@@ -1551,7 +1558,13 @@ void get_cpuconfig(void){
printf("#define L2_SIZE %d\n", info.size * 1024);
printf("#define L2_ASSOCIATIVE %d\n", info.associative);
printf("#define L2_LINESIZE %d\n", info.linesize);
} else {
//fall back for some virtual machines.
printf("#define L2_SIZE 1048576\n");
printf("#define L2_ASSOCIATIVE 6\n");
printf("#define L2_LINESIZE 64\n");
}
get_cacheinfo(CACHE_INFO_L3, &info);
if (info.size > 0) {

View File

@@ -125,7 +125,7 @@ ARCH_IA64
BINARY_64
#endif
#if defined(__ARM_ARCH) || defined(__ARM_ARCH_7A__)
#if defined(__ARM_ARCH) || defined(__ARM_ARCH_7A__) || defined(__arm__)
ARCH_ARM
#endif

View File

@@ -1,9 +1,9 @@
TOPDIR = ../..
include ../../Makefile.system
COMMONOBJS = memory.$(SUFFIX) xerbla.$(SUFFIX) c_abs.$(SUFFIX) z_abs.$(SUFFIX) openblas_set_num_threads.$(SUFFIX) openblas_get_config.$(SUFFIX) openblas_get_parallel.$(SUFFIX)
COMMONOBJS = memory.$(SUFFIX) xerbla.$(SUFFIX) c_abs.$(SUFFIX) z_abs.$(SUFFIX) openblas_set_num_threads.$(SUFFIX) openblas_get_config.$(SUFFIX) openblas_get_parallel.$(SUFFIX) openblas_error_handle.$(SUFFIX)
COMMONOBJS += slamch.$(SUFFIX) slamc3.$(SUFFIX) dlamch.$(SUFFIX) dlamc3.$(SUFFIX)
#COMMONOBJS += slamch.$(SUFFIX) slamc3.$(SUFFIX) dlamch.$(SUFFIX) dlamc3.$(SUFFIX)
ifdef SMP
COMMONOBJS += blas_server.$(SUFFIX) divtable.$(SUFFIX) blasL1thread.$(SUFFIX)
@@ -109,6 +109,9 @@ openblas_get_config.$(SUFFIX) : openblas_get_config.c
openblas_get_parallel.$(SUFFIX) : openblas_get_parallel.c
$(CC) $(CFLAGS) -c $< -o $(@F)
openblas_error_handle.$(SUFFIX) : openblas_error_handle.c
$(CC) $(CFLAGS) -c $< -o $(@F)
blasL1thread.$(SUFFIX) : blas_l1_thread.c ../../common.h ../../common_thread.h
$(CC) $(CFLAGS) -c $< -o $(@F)

View File

@@ -74,6 +74,21 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include <sys/resource.h>
#endif
#ifndef likely
#ifdef __GNUC__
#define likely(x) __builtin_expect(!!(x), 1)
#else
#define likely(x) (x)
#endif
#endif
#ifndef unlikely
#ifdef __GNUC__
#define unlikely(x) __builtin_expect(!!(x), 0)
#else
#define unlikely(x) (x)
#endif
#endif
#ifdef SMP_SERVER
#undef MONITOR
@@ -584,6 +599,10 @@ static BLASULONG exec_queue_lock = 0;
int exec_blas_async(BLASLONG pos, blas_queue_t *queue){
#ifdef SMP_SERVER
// Handle lazy re-init of the thread-pool after a POSIX fork
if (unlikely(blas_server_avail == 0)) blas_thread_init();
#endif
BLASLONG i = 0;
blas_queue_t *current = queue;
#if defined(OS_LINUX) && !defined(NO_AFFINITY) && !defined(PARAMTEST)
@@ -708,7 +727,11 @@ int exec_blas_async_wait(BLASLONG num, blas_queue_t *queue){
/* Execute Threads */
int exec_blas(BLASLONG num, blas_queue_t *queue){
int (*routine)(blas_arg_t *, void *, void *, double *, double *, BLASLONG);
#ifdef SMP_SERVER
// Handle lazy re-init of the thread-pool after a POSIX fork
if (unlikely(blas_server_avail == 0)) blas_thread_init();
#endif
int (*routine)(blas_arg_t *, void *, void *, double *, double *, BLASLONG);
#ifdef TIMING_DEBUG
BLASULONG start, stop;

View File

@@ -441,7 +441,7 @@ int BLASFUNC(blas_thread_shutdown)(void){
if (blas_server_avail){
SetEvent(pool.killed);
printf("blas_num_threads=%d\n", blas_num_threads);
for(i = 0; i < blas_num_threads - 1; i++){
WaitForSingleObject(blas_threads[i], 5); //INFINITE);
TerminateThread(blas_threads[i],0);

View File

@@ -38,6 +38,7 @@
#include "common.h"
#ifdef ARCH_X86
#define EXTERN extern
#else
@@ -108,6 +109,11 @@ int support_avx(){
#endif
}
extern void openblas_warning(int verbose, const char * msg);
#define FALLBACK_VERBOSE 1
#define NEHALEM_FALLBACK "OpenBLAS : Your OS does not support AVX instructions. OpenBLAS is using Nehalem kernels as a fallback, which may give poorer performance.\n"
#define BARCELONA_FALLBACK "OpenBLAS : Your OS does not support AVX instructions. OpenBLAS is using Barcelona kernels as a fallback, which may give poorer performance.\n"
static int get_vendor(void){
int eax, ebx, ecx, edx;
char vendor[13];
@@ -179,38 +185,38 @@ static gotoblas_t *get_coretype(void){
if(support_avx())
return &gotoblas_SANDYBRIDGE;
else{
fprintf(stderr, "OpenBLAS : Your OS does not support AVX instructions. OpenBLAS is using Nehalem kernels as a fallback, which may give poorer performance.\n");
openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK);
return &gotoblas_NEHALEM; //OS doesn't support AVX. Use old kernels.
}
}
return NULL;
case 3:
//Intel Sandy Bridge 22nm (Ivy Bridge?)
if (model == 10) {
if (model == 10 || model == 14) {
if(support_avx())
return &gotoblas_SANDYBRIDGE;
else{
fprintf(stderr, "OpenBLAS : Your OS does not support AVX instructions. OpenBLAS is using Nehalem kernels as a fallback, which may give poorer performance.\n");
openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK);
return &gotoblas_NEHALEM; //OS doesn't support AVX. Use old kernels.
}
}
//Intel Haswell
if (model == 12) {
if (model == 12 || model == 15) {
if(support_avx())
return &gotoblas_HASWELL;
else{
fprintf(stderr, "OpenBLAS : Your OS does not support AVX instructions. OpenBLAS is using Nehalem kernels as a fallback, which may give poorer performance.\n");
openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK);
return &gotoblas_NEHALEM; //OS doesn't support AVX. Use old kernels.
}
}
return NULL;
case 4:
//Intel Haswell
if (model == 5) {
if (model == 5 || model == 6) {
if(support_avx())
return &gotoblas_HASWELL;
else{
fprintf(stderr, "OpenBLAS : Your OS does not support AVX instructions. OpenBLAS is using Nehalem kernels as a fallback, which may give poorer performance.\n");
openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK);
return &gotoblas_NEHALEM; //OS doesn't support AVX. Use old kernels.
}
}
@@ -248,7 +254,7 @@ static gotoblas_t *get_coretype(void){
if(support_avx())
return &gotoblas_BULLDOZER;
else{
fprintf(stderr, "OpenBLAS : Your OS does not support AVX instructions. OpenBLAS is using Barcelona kernels as a fallback, which may give poorer performance.\n");
openblas_warning(FALLBACK_VERBOSE, BARCELONA_FALLBACK);
return &gotoblas_BARCELONA; //OS doesn't support AVX. Use old kernels.
}
}else if(model == 2){
@@ -256,7 +262,7 @@ static gotoblas_t *get_coretype(void){
if(support_avx())
return &gotoblas_PILEDRIVER;
else{
fprintf(stderr, "OpenBLAS : Your OS does not support AVX instructions. OpenBLAS is using Barcelona kernels as a fallback, which may give poorer performance.\n");
openblas_warning(FALLBACK_VERBOSE, BARCELONA_FALLBACK);
return &gotoblas_BARCELONA; //OS doesn't support AVX. Use old kernels.
}
}
@@ -351,7 +357,7 @@ void gotoblas_dynamic_init(void) {
if (gotoblas && gotoblas -> init) {
gotoblas -> init();
} else {
fprintf(stderr, "OpenBLAS : Architecture Initialization failed. No initialization function found.\n");
openblas_warning(0, "OpenBLAS : Architecture Initialization failed. No initialization function found.\n");
exit(1);
}

View File

@@ -143,6 +143,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
gotoblas_t *gotoblas = NULL;
#endif
extern void openblas_warning(int verbose, const char * msg);
#ifndef SMP
#define blas_cpu_number 1
@@ -253,6 +255,23 @@ int goto_get_num_procs (void) {
return blas_cpu_number;
}
void openblas_fork_handler()
{
// This handler shuts down the OpenBLAS-managed PTHREAD pool when OpenBLAS is
// built with "make USE_OPENMP=0".
// Hanging can still happen when OpenBLAS is built against the libgomp
// implementation of OpenMP. The problem is tracked at:
// http://gcc.gnu.org/bugzilla/show_bug.cgi?id=60035
// In the mean time build with USE_OPENMP=0 or link against another
// implementation of OpenMP.
#if !defined(OS_WINDOWS) && defined(SMP_SERVER)
int err;
err = pthread_atfork ((void (*)(void)) BLASFUNC(blas_thread_shutdown), NULL, NULL);
if(err != 0)
openblas_warning(0, "OpenBLAS Warning ... cannot install fork handler. You may meet hang after fork.\n");
#endif
}
int blas_get_cpu_number(void){
char *p;
#if defined(OS_LINUX) || defined(OS_WINDOWS) || defined(OS_FREEBSD) || defined(OS_DARWIN)
@@ -1268,6 +1287,9 @@ void CONSTRUCTOR gotoblas_init(void) {
if (gotoblas_initialized) return;
#ifdef SMP
openblas_fork_handler();
#endif
#ifdef PROFILE
moncontrol (0);

View File

@@ -0,0 +1,51 @@
/***************************************************************************
Copyright (c) 2013, The OpenBLAS Project
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
3. Neither the name of the OpenBLAS project nor the names of
its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
#include "common.h"
int openblas_verbose() {
int ret=0;
char *p;
p = getenv("OPENBLAS_VERBOSE");
if (p) ret = atoi(p);
if(ret<0) ret=0;
return ret;
}
void openblas_warning(int verbose, const char * msg) {
int current_verbose;
current_verbose=openblas_verbose();
if(current_verbose >= verbose){
fprintf(stderr, "%s", msg);
}
}

View File

@@ -75,9 +75,6 @@ zip : dll
zip $(LIBZIPNAME) $(LIBDLLNAME) $(LIBNAME)
dll : ../$(LIBDLLNAME)
#libgoto2.dll
dll2 : libgoto2_shared.dll
# On Windows, we only generate a DLL without a version suffix. This is because
# applications which link against the dynamic library reference a fixed DLL name
@@ -86,36 +83,19 @@ dll2 : libgoto2_shared.dll
# For more details see: https://github.com/xianyi/OpenBLAS/issues/127.
../$(LIBDLLNAME) : ../$(LIBNAME) libopenblas.def dllinit.$(SUFFIX)
$(RANLIB) ../$(LIBNAME)
ifeq ($(BINARY32), 1)
$(DLLWRAP) -o ../$(LIBDLLNAME) --def libopenblas.def \
--entry _dllinit@12 -s dllinit.$(SUFFIX) --dllname $(@F) ../$(LIBNAME) $(EXTRALIB)
-lib /machine:i386 /def:libopenblas.def
else
$(DLLWRAP) -o ../$(LIBDLLNAME) --def libopenblas.def \
--entry $(FU)dllinit -s dllinit.$(SUFFIX) --dllname $(@F) ../$(LIBNAME) $(EXTRALIB)
-lib /machine:X64 /def:libopenblas.def
endif
libgoto2_shared.dll : ../$(LIBNAME) libgoto2_shared.def
$(CC) $(CFLAGS) $(LDFLAGS) libgoto2_shared.def -shared -o $(@F) \
-Wl,--whole-archive ../$(LIBNAME) -Wl,--no-whole-archive \
-Wl,--out-implib,libgoto2_shared.lib $(FEXTRALIB)
$(CC) $(CFLAGS) $(LDFLAGS) libopenblas.def dllinit.$(SUFFIX) \
-shared -o ../$(LIBDLLNAME) -Wl,--out-implib,../$(LIBPREFIX).lib \
-Wl,--whole-archive ../$(LIBNAME) -Wl,--no-whole-archive $(FEXTRALIB)
libopenblas.def : gensymbol
perl ./gensymbol win2k $(ARCH) dummy $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) > $(@F)
libgoto2_shared.def : gensymbol
perl ./gensymbol win2k $(ARCH) dummy $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) > $(@F)
libgoto_hpl.def : gensymbol
perl ./gensymbol win2khpl $(ARCH) dummy $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) > $(@F)
$(LIBDYNNAME) : ../$(LIBNAME) osx.def
$(FC) $(FFLAGS) -all_load -headerpad_max_install_names -install_name $(CURDIR)/../$(LIBDYNNAME) -dynamiclib -o ../$(LIBDYNNAME) $< -Wl,-exported_symbols_list,osx.def $(FEXTRALIB)
symbol.$(SUFFIX) : symbol.S
$(CC) $(CFLAGS) -c -o $(@F) $^
dllinit.$(SUFFIX) : dllinit.c
$(CC) $(CFLAGS) -c -o $(@F) -s $<
@@ -123,7 +103,7 @@ ifeq ($(OSNAME), Linux)
so : ../$(LIBSONAME)
../$(LIBSONAME) : ../$(LIBNAME) linux.def linktest.c
../$(LIBSONAME) : ../$(LIBNAME) linktest.c
ifneq ($(C_COMPILER), LSB)
$(CC) $(CFLAGS) $(LDFLAGS) -shared -o ../$(LIBSONAME) \
-Wl,--whole-archive ../$(LIBNAME) -Wl,--no-whole-archive \
@@ -145,7 +125,7 @@ ifeq ($(OSNAME), $(filter $(OSNAME),FreeBSD NetBSD))
so : ../$(LIBSONAME)
../$(LIBSONAME) : ../$(LIBNAME) linux.def linktest.c
../$(LIBSONAME) : ../$(LIBNAME) linktest.c
$(CC) $(CFLAGS) $(LDFLAGS) -shared -o ../$(LIBSONAME) \
-Wl,--whole-archive ../$(LIBNAME) -Wl,--no-whole-archive \
$(FEXTRALIB) $(EXTRALIB)
@@ -197,18 +177,12 @@ static : ../$(LIBNAME)
$(AR) -cq ../$(LIBNAME) goto.$(SUFFIX)
rm -f goto.$(SUFFIX)
linux.def : gensymbol ../Makefile.system ../getarch.c
perl ./gensymbol linux $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) > $(@F)
osx.def : gensymbol ../Makefile.system ../getarch.c
perl ./gensymbol osx $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) > $(@F)
aix.def : gensymbol ../Makefile.system ../getarch.c
perl ./gensymbol aix $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) > $(@F)
symbol.S : gensymbol
perl ./gensymbol win2kasm noarch dummy $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) > symbol.S
test : linktest.c
$(CC) $(CFLAGS) $(LDFLAGS) -w -o linktest linktest.c ../$(LIBSONAME) -lm && echo OK.
rm -f linktest

View File

@@ -41,7 +41,7 @@
void gotoblas_init(void);
void gotoblas_quit(void);
BOOL APIENTRY dllinit(HINSTANCE hInst, DWORD reason, LPVOID reserved) {
BOOL APIENTRY DllMain(HINSTANCE hInst, DWORD reason, LPVOID reserved) {
if (reason == DLL_PROCESS_ATTACH) {
gotoblas_init();

View File

@@ -2667,34 +2667,34 @@
## @(MATGEN_OBJ) from `lapack-3.4.1/lapacke/src/Makefile`
## Not exported: requires LAPACKE_TESTING to be set and depends on libtmg
## (see `lapack-3.4.1/TESTING/MATGEN`).
#LAPACKE_clatms,
#LAPACKE_clatms_work,
#LAPACKE_dlatms,
#LAPACKE_dlatms_work,
#LAPACKE_slatms,
#LAPACKE_slatms_work,
#LAPACKE_zlatms,
#LAPACKE_zlatms_work,
#LAPACKE_clagge,
#LAPACKE_clagge_work,
#LAPACKE_dlagge,
#LAPACKE_dlagge_work,
#LAPACKE_slagge,
#LAPACKE_slagge_work,
#LAPACKE_zlagge,
#LAPACKE_zlagge_work,
#LAPACKE_claghe,
#LAPACKE_claghe_work,
#LAPACKE_zlaghe,
#LAPACKE_zlaghe_work,
#LAPACKE_clagsy,
#LAPACKE_clagsy_work,
#LAPACKE_dlagsy,
#LAPACKE_dlagsy_work,
#LAPACKE_slagsy,
#LAPACKE_slagsy_work,
#LAPACKE_zlagsy,
#LAPACKE_zlagsy_work,
LAPACKE_clatms,
LAPACKE_clatms_work,
LAPACKE_dlatms,
LAPACKE_dlatms_work,
LAPACKE_slatms,
LAPACKE_slatms_work,
LAPACKE_zlatms,
LAPACKE_zlatms_work,
LAPACKE_clagge,
LAPACKE_clagge_work,
LAPACKE_dlagge,
LAPACKE_dlagge_work,
LAPACKE_slagge,
LAPACKE_slagge_work,
LAPACKE_zlagge,
LAPACKE_zlagge_work,
LAPACKE_claghe,
LAPACKE_claghe_work,
LAPACKE_zlaghe,
LAPACKE_zlaghe_work,
LAPACKE_clagsy,
LAPACKE_clagsy_work,
LAPACKE_dlagsy,
LAPACKE_dlagsy_work,
LAPACKE_slagsy,
LAPACKE_slagsy_work,
LAPACKE_zlagsy,
LAPACKE_zlagsy_work,
);
#These function may need 2 underscores.
@@ -2725,8 +2725,7 @@ if ($ARGV[8] == 1) {
} elsif ($ARGV[5] == 1) {
#NO_LAPACK=1
@underscore_objs = (@blasobjs, @misc_underscore_objs);
} elsif (-d "../lapack-3.1.1" || -d "../lapack-3.4.0" || -d "../lapack-3.4.1" ||
-d "../lapack-3.4.2" || -d "../lapack-netlib") {
} elsif (-d "../lapack-netlib") {
if ($ARGV[7] == 0){
# NEED2UNDERSCORES=0
@@ -2771,14 +2770,6 @@ if ($ARGV[6] == 1) {
@no_underscore_objs = (@no_underscore_objs, @lapackeobjs);
}
@linuxobjs = ('__strtol_internal', 'exit', 'free', 'getenv', 'malloc',
'mmap', 'printf', 'sqrt',
'pthread_cond_broadcast', 'pthread_cond_destroy',
'pthread_cond_init', 'pthread_cond_signal', 'pthread_cond_wait',
'pthread_create', 'pthread_exit', 'pthread_join',
'pthread_mutex_destroy', 'pthread_mutex_init',
'pthread_mutex_lock', 'pthread_mutex_unlock');
@hplobjs = (daxpy, dcopy, dscal, idamax, dgemv, dtrsv, dger, dgemm, dtrsm);
@hplobjs2 = (HPL_dlaswp00N, HPL_dlaswp01N, HPL_dlaswp01T);
@@ -2786,31 +2777,6 @@ $bu = $ARGV[2];
$bu = "" if (($bu eq "0") || ($bu eq "1"));
if ($ARGV[0] eq "linux"){
@underscore_objs = (@underscore_objs, @misc_common_objs);
@no_underscore_objs = (@no_underscore_objs, @misc_common_objs);
foreach $objs (@underscore_objs) {
print $objs, $bu, "\n";
}
foreach $objs (@need_2underscore_objs) {
print $objs, $bu, $bu, "\n";
}
# if ($ARGV[4] == 0) {
foreach $objs (@no_underscore_objs) {
print $objs, "\n";
}
# }
foreach $objs (@linuxobjs) {
print $objs, "\n";
}
exit(0);
}
if ($ARGV[0] eq "osx"){
@underscore_objs = (@underscore_objs, @misc_common_objs);
@@ -2956,32 +2922,6 @@ if ($ARGV[0] eq "microsoft"){
exit(0);
}
if ($ARGV[0] eq "win2kasm"){
@underscore_objs = (@underscore_objs, @misc_common_objs);
print "\t.text\n";
foreach $objs (@underscore_objs) {
$uppercase = $objs;
$uppercase =~ tr/[a-z]/[A-Z]/;
print "\t.align 16\n";
print "\t.globl _", $uppercase, "_\n";
print "_", $uppercase, "_:\n";
print "\tjmp\t_", $objs, "_\n";
}
foreach $objs (@need_2underscore_objs) {
$uppercase = $objs;
$uppercase =~ tr/[a-z]/[A-Z]/;
print "\t.align 16\n";
print "\t.globl _", $uppercase, "__\n";
print "_", $uppercase, "__:\n";
print "\tjmp\t_", $objs, "__\n";
}
exit(0);
}
if ($ARGV[0] eq "linktest"){
@underscore_objs = (@underscore_objs, @misc_common_objs);

View File

@@ -724,6 +724,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#else
#endif
#ifdef FORCE_ARMV5
#define FORCE
#define ARCHITECTURE "ARM"
#define SUBARCHITECTURE "ARMV5"
#define SUBDIRNAME "arm"
#define ARCHCONFIG "-DARMV5 " \
"-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=32 " \
"-DL2_SIZE=512488 -DL2_LINESIZE=32 " \
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=4 " \
"-DHAVE_VFP"
#define LIBNAME "armv5"
#define CORENAME "ARMV5"
#else
#endif
#ifdef FORCE_ARMV8
#define FORCE
#define ARCHITECTURE "ARM64"

View File

@@ -2,11 +2,11 @@ TOPDIR = ..
include $(TOPDIR)/Makefile.system
ifeq ($(ARCH), x86)
SUPPORT_GEMM3M = 1
SUPPORT_GEMM3M = 0
endif
ifeq ($(ARCH), x86_64)
SUPPORT_GEMM3M = 1
SUPPORT_GEMM3M = 0
endif
ifeq ($(ARCH), ia64)
@@ -342,30 +342,56 @@ CBLASOBJS = $(CBLAS1OBJS) $(CBLAS2OBJS) $(CBLAS3OBJS)
ZBLASOBJS = $(ZBLAS1OBJS) $(ZBLAS2OBJS) $(ZBLAS3OBJS)
XBLASOBJS = $(XBLAS1OBJS) $(XBLAS2OBJS) $(XBLAS3OBJS)
#SLAPACKOBJS = \
# sgetf2.$(SUFFIX) sgetrf.$(SUFFIX) slauu2.$(SUFFIX) slauum.$(SUFFIX) \
# spotf2.$(SUFFIX) spotrf.$(SUFFIX) strti2.$(SUFFIX) strtri.$(SUFFIX) \
# slaswp.$(SUFFIX) sgetrs.$(SUFFIX) sgesv.$(SUFFIX) spotri.$(SUFFIX) \
SLAPACKOBJS = \
sgetf2.$(SUFFIX) sgetrf.$(SUFFIX) slauu2.$(SUFFIX) slauum.$(SUFFIX) \
spotf2.$(SUFFIX) spotrf.$(SUFFIX) strti2.$(SUFFIX) strtri.$(SUFFIX) \
slaswp.$(SUFFIX) sgetrs.$(SUFFIX) sgesv.$(SUFFIX) spotri.$(SUFFIX) \
sgetrf.$(SUFFIX) sgetrs.$(SUFFIX) spotrf.$(SUFFIX) sgetf2.$(SUFFIX) \
spotf2.$(SUFFIX) slaswp.$(SUFFIX) sgesv.$(SUFFIX) slauu2.$(SUFFIX) \
slauum.$(SUFFIX) strti2.$(SUFFIX) strtri.$(SUFFIX) spotri.$(SUFFIX)
#DLAPACKOBJS = \
# dgetf2.$(SUFFIX) dgetrf.$(SUFFIX) dlauu2.$(SUFFIX) dlauum.$(SUFFIX) \
# dpotf2.$(SUFFIX) dpotrf.$(SUFFIX) dtrti2.$(SUFFIX) dtrtri.$(SUFFIX) \
# dlaswp.$(SUFFIX) dgetrs.$(SUFFIX) dgesv.$(SUFFIX) dpotri.$(SUFFIX) \
DLAPACKOBJS = \
dgetf2.$(SUFFIX) dgetrf.$(SUFFIX) dlauu2.$(SUFFIX) dlauum.$(SUFFIX) \
dpotf2.$(SUFFIX) dpotrf.$(SUFFIX) dtrti2.$(SUFFIX) dtrtri.$(SUFFIX) \
dlaswp.$(SUFFIX) dgetrs.$(SUFFIX) dgesv.$(SUFFIX) dpotri.$(SUFFIX) \
dgetrf.$(SUFFIX) dgetrs.$(SUFFIX) dpotrf.$(SUFFIX) dgetf2.$(SUFFIX) \
dpotf2.$(SUFFIX) dlaswp.$(SUFFIX) dgesv.$(SUFFIX) dlauu2.$(SUFFIX) \
dlauum.$(SUFFIX) dtrti2.$(SUFFIX) dtrtri.$(SUFFIX) dpotri.$(SUFFIX)
QLAPACKOBJS = \
qgetf2.$(SUFFIX) qgetrf.$(SUFFIX) qlauu2.$(SUFFIX) qlauum.$(SUFFIX) \
qpotf2.$(SUFFIX) qpotrf.$(SUFFIX) qtrti2.$(SUFFIX) qtrtri.$(SUFFIX) \
qlaswp.$(SUFFIX) qgetrs.$(SUFFIX) qgesv.$(SUFFIX) qpotri.$(SUFFIX) \
#CLAPACKOBJS = \
# cgetf2.$(SUFFIX) cgetrf.$(SUFFIX) clauu2.$(SUFFIX) clauum.$(SUFFIX) \
# cpotf2.$(SUFFIX) cpotrf.$(SUFFIX) ctrti2.$(SUFFIX) ctrtri.$(SUFFIX) \
# claswp.$(SUFFIX) cgetrs.$(SUFFIX) cgesv.$(SUFFIX) cpotri.$(SUFFIX) \
CLAPACKOBJS = \
cgetf2.$(SUFFIX) cgetrf.$(SUFFIX) clauu2.$(SUFFIX) clauum.$(SUFFIX) \
cpotf2.$(SUFFIX) cpotrf.$(SUFFIX) ctrti2.$(SUFFIX) ctrtri.$(SUFFIX) \
claswp.$(SUFFIX) cgetrs.$(SUFFIX) cgesv.$(SUFFIX) cpotri.$(SUFFIX) \
cgetrf.$(SUFFIX) cgetrs.$(SUFFIX) cpotrf.$(SUFFIX) cgetf2.$(SUFFIX) \
cpotf2.$(SUFFIX) claswp.$(SUFFIX) cgesv.$(SUFFIX) clauu2.$(SUFFIX) \
clauum.$(SUFFIX) ctrti2.$(SUFFIX) ctrtri.$(SUFFIX) cpotri.$(SUFFIX)
#ZLAPACKOBJS = \
# zgetf2.$(SUFFIX) zgetrf.$(SUFFIX) zlauu2.$(SUFFIX) zlauum.$(SUFFIX) \
# zpotf2.$(SUFFIX) zpotrf.$(SUFFIX) ztrti2.$(SUFFIX) ztrtri.$(SUFFIX) \
# zlaswp.$(SUFFIX) zgetrs.$(SUFFIX) zgesv.$(SUFFIX) zpotri.$(SUFFIX) \
ZLAPACKOBJS = \
zgetf2.$(SUFFIX) zgetrf.$(SUFFIX) zlauu2.$(SUFFIX) zlauum.$(SUFFIX) \
zpotf2.$(SUFFIX) zpotrf.$(SUFFIX) ztrti2.$(SUFFIX) ztrtri.$(SUFFIX) \
zlaswp.$(SUFFIX) zgetrs.$(SUFFIX) zgesv.$(SUFFIX) zpotri.$(SUFFIX) \
zgetrf.$(SUFFIX) zgetrs.$(SUFFIX) zpotrf.$(SUFFIX) zgetf2.$(SUFFIX) \
zpotf2.$(SUFFIX) zlaswp.$(SUFFIX) zgesv.$(SUFFIX) zlauu2.$(SUFFIX) \
zlauum.$(SUFFIX) ztrti2.$(SUFFIX) ztrtri.$(SUFFIX) zpotri.$(SUFFIX)
XLAPACKOBJS = \
xgetf2.$(SUFFIX) xgetrf.$(SUFFIX) xlauu2.$(SUFFIX) xlauum.$(SUFFIX) \
@@ -375,10 +401,10 @@ XLAPACKOBJS = \
ifneq ($(NO_LAPACK), 1)
SBLASOBJS += $(SLAPACKOBJS)
DBLASOBJS += $(DLAPACKOBJS)
QBLASOBJS += $(QLAPACKOBJS)
#QBLASOBJS += $(QLAPACKOBJS)
CBLASOBJS += $(CLAPACKOBJS)
ZBLASOBJS += $(ZLAPACKOBJS)
XBLASOBJS += $(XLAPACKOBJS)
#XBLASOBJS += $(XLAPACKOBJS)
endif
@@ -1731,217 +1757,217 @@ cblas_cher2k.$(SUFFIX) cblas_cher2k.$(PSUFFIX) : syr2k.c
cblas_zher2k.$(SUFFIX) cblas_zher2k.$(PSUFFIX) : syr2k.c
$(CC) -DCBLAS -c $(CFLAGS) -DHEMM $< -o $(@F)
sgetf2.$(SUFFIX) sgetf2.$(PSUFFIX) : getf2.c
sgetf2.$(SUFFIX) sgetf2.$(PSUFFIX) : lapack/getf2.c
$(CC) -c $(CFLAGS) $< -o $(@F)
dgetf2.$(SUFFIX) dgetf2.$(PSUFFIX) : getf2.c
dgetf2.$(SUFFIX) dgetf2.$(PSUFFIX) : lapack/getf2.c
$(CC) -c $(CFLAGS) $< -o $(@F)
qgetf2.$(SUFFIX) qgetf2.$(PSUFFIX) : getf2.c
$(CC) -c $(CFLAGS) $< -o $(@F)
cgetf2.$(SUFFIX) cgetf2.$(PSUFFIX) : zgetf2.c
cgetf2.$(SUFFIX) cgetf2.$(PSUFFIX) : lapack/zgetf2.c
$(CC) -c $(CFLAGS) $< -o $(@F)
zgetf2.$(SUFFIX) zgetf2.$(PSUFFIX) : zgetf2.c
zgetf2.$(SUFFIX) zgetf2.$(PSUFFIX) : lapack/zgetf2.c
$(CC) -c $(CFLAGS) $< -o $(@F)
xgetf2.$(SUFFIX) xgetf2.$(PSUFFIX) : zgetf2.c
$(CC) -c $(CFLAGS) $< -o $(@F)
sgetrf.$(SUFFIX) sgetrf.$(PSUFFIX) : getrf.c
sgetrf.$(SUFFIX) sgetrf.$(PSUFFIX) : lapack/getrf.c
$(CC) -c $(CFLAGS) $< -o $(@F)
dgetrf.$(SUFFIX) dgetrf.$(PSUFFIX) : getrf.c
dgetrf.$(SUFFIX) dgetrf.$(PSUFFIX) : lapack/getrf.c
$(CC) -c $(CFLAGS) $< -o $(@F)
qgetrf.$(SUFFIX) qgetrf.$(PSUFFIX) : getrf.c
$(CC) -c $(CFLAGS) $< -o $(@F)
cgetrf.$(SUFFIX) cgetrf.$(PSUFFIX) : zgetrf.c
cgetrf.$(SUFFIX) cgetrf.$(PSUFFIX) : lapack/zgetrf.c
$(CC) -c $(CFLAGS) $< -o $(@F)
zgetrf.$(SUFFIX) zgetrf.$(PSUFFIX) : zgetrf.c
zgetrf.$(SUFFIX) zgetrf.$(PSUFFIX) : lapack/zgetrf.c
$(CC) -c $(CFLAGS) $< -o $(@F)
xgetrf.$(SUFFIX) xgetrf.$(PSUFFIX) : zgetrf.c
$(CC) -c $(CFLAGS) $< -o $(@F)
slauu2.$(SUFFIX) slauu2.$(PSUFFIX) : lauu2.c
slauu2.$(SUFFIX) slauu2.$(PSUFFIX) : lapack/lauu2.c
$(CC) -c $(CFLAGS) $< -o $(@F)
dlauu2.$(SUFFIX) dlauu2.$(PSUFFIX) : lauu2.c
dlauu2.$(SUFFIX) dlauu2.$(PSUFFIX) : lapack/lauu2.c
$(CC) -c $(CFLAGS) $< -o $(@F)
qlauu2.$(SUFFIX) qlauu2.$(PSUFFIX) : lauu2.c
$(CC) -c $(CFLAGS) $< -o $(@F)
clauu2.$(SUFFIX) clauu2.$(PSUFFIX) : zlauu2.c
clauu2.$(SUFFIX) clauu2.$(PSUFFIX) : lapack/zlauu2.c
$(CC) -c $(CFLAGS) $< -o $(@F)
zlauu2.$(SUFFIX) zlauu2.$(PSUFFIX) : zlauu2.c
zlauu2.$(SUFFIX) zlauu2.$(PSUFFIX) : lapack/zlauu2.c
$(CC) -c $(CFLAGS) $< -o $(@F)
xlauu2.$(SUFFIX) xlauu2.$(PSUFFIX) : zlauu2.c
$(CC) -c $(CFLAGS) $< -o $(@F)
slauum.$(SUFFIX) slauum.$(PSUFFIX) : lauum.c
slauum.$(SUFFIX) slauum.$(PSUFFIX) : lapack/lauum.c
$(CC) -c $(CFLAGS) $< -o $(@F)
dlauum.$(SUFFIX) dlauum.$(PSUFFIX) : lauum.c
dlauum.$(SUFFIX) dlauum.$(PSUFFIX) : lapack/lauum.c
$(CC) -c $(CFLAGS) $< -o $(@F)
qlauum.$(SUFFIX) qlauum.$(PSUFFIX) : lauum.c
$(CC) -c $(CFLAGS) $< -o $(@F)
clauum.$(SUFFIX) clauum.$(PSUFFIX) : zlauum.c
clauum.$(SUFFIX) clauum.$(PSUFFIX) : lapack/zlauum.c
$(CC) -c $(CFLAGS) $< -o $(@F)
zlauum.$(SUFFIX) zlauum.$(PSUFFIX) : zlauum.c
zlauum.$(SUFFIX) zlauum.$(PSUFFIX) : lapack/zlauum.c
$(CC) -c $(CFLAGS) $< -o $(@F)
xlauum.$(SUFFIX) xlauum.$(PSUFFIX) : zlauum.c
$(CC) -c $(CFLAGS) $< -o $(@F)
spotf2.$(SUFFIX) spotf2.$(PSUFFIX) : potf2.c
spotf2.$(SUFFIX) spotf2.$(PSUFFIX) : lapack/potf2.c
$(CC) -c $(CFLAGS) $< -o $(@F)
dpotf2.$(SUFFIX) dpotf2.$(PSUFFIX) : potf2.c
dpotf2.$(SUFFIX) dpotf2.$(PSUFFIX) : lapack/potf2.c
$(CC) -c $(CFLAGS) $< -o $(@F)
qpotf2.$(SUFFIX) qpotf2.$(PSUFFIX) : potf2.c
$(CC) -c $(CFLAGS) $< -o $(@F)
cpotf2.$(SUFFIX) cpotf2.$(PSUFFIX) : zpotf2.c
cpotf2.$(SUFFIX) cpotf2.$(PSUFFIX) : lapack/zpotf2.c
$(CC) -c $(CFLAGS) $< -o $(@F)
zpotf2.$(SUFFIX) zpotf2.$(PSUFFIX) : zpotf2.c
zpotf2.$(SUFFIX) zpotf2.$(PSUFFIX) : lapack/zpotf2.c
$(CC) -c $(CFLAGS) $< -o $(@F)
xpotf2.$(SUFFIX) xpotf2.$(PSUFFIX) : zpotf2.c
$(CC) -c $(CFLAGS) $< -o $(@F)
spotrf.$(SUFFIX) spotrf.$(PSUFFIX) : potrf.c
spotrf.$(SUFFIX) spotrf.$(PSUFFIX) : lapack/potrf.c
$(CC) -c $(CFLAGS) $< -o $(@F)
dpotrf.$(SUFFIX) dpotrf.$(PSUFFIX) : potrf.c
dpotrf.$(SUFFIX) dpotrf.$(PSUFFIX) : lapack/potrf.c
$(CC) -c $(CFLAGS) $< -o $(@F)
qpotrf.$(SUFFIX) qpotrf.$(PSUFFIX) : potrf.c
$(CC) -c $(CFLAGS) $< -o $(@F)
cpotrf.$(SUFFIX) cpotrf.$(PSUFFIX) : zpotrf.c
cpotrf.$(SUFFIX) cpotrf.$(PSUFFIX) : lapack/zpotrf.c
$(CC) -c $(CFLAGS) $< -o $(@F)
zpotrf.$(SUFFIX) zpotrf.$(PSUFFIX) : zpotrf.c
zpotrf.$(SUFFIX) zpotrf.$(PSUFFIX) : lapack/zpotrf.c
$(CC) -c $(CFLAGS) $< -o $(@F)
xpotrf.$(SUFFIX) xpotrf.$(PSUFFIX) : zpotrf.c
$(CC) -c $(CFLAGS) $< -o $(@F)
strti2.$(SUFFIX) strti2.$(PSUFFIX) : trti2.c
strti2.$(SUFFIX) strti2.$(PSUFFIX) : lapack/trti2.c
$(CC) -c $(CFLAGS) $< -o $(@F)
dtrti2.$(SUFFIX) dtrti2.$(PSUFFIX) : trti2.c
dtrti2.$(SUFFIX) dtrti2.$(PSUFFIX) : lapack/trti2.c
$(CC) -c $(CFLAGS) $< -o $(@F)
qtrti2.$(SUFFIX) qtrti2.$(PSUFFIX) : trti2.c
$(CC) -c $(CFLAGS) $< -o $(@F)
ctrti2.$(SUFFIX) ctrti2.$(PSUFFIX) : ztrti2.c
ctrti2.$(SUFFIX) ctrti2.$(PSUFFIX) : lapack/ztrti2.c
$(CC) -c $(CFLAGS) $< -o $(@F)
ztrti2.$(SUFFIX) ztrti2.$(PSUFFIX) : ztrti2.c
ztrti2.$(SUFFIX) ztrti2.$(PSUFFIX) : lapack/ztrti2.c
$(CC) -c $(CFLAGS) $< -o $(@F)
xtrti2.$(SUFFIX) xtrti2.$(PSUFFIX) : ztrti2.c
$(CC) -c $(CFLAGS) $< -o $(@F)
strtri.$(SUFFIX) strtri.$(PSUFFIX) : trtri.c
strtri.$(SUFFIX) strtri.$(PSUFFIX) : lapack/trtri.c
$(CC) -c $(CFLAGS) $< -o $(@F)
dtrtri.$(SUFFIX) dtrtri.$(PSUFFIX) : trtri.c
dtrtri.$(SUFFIX) dtrtri.$(PSUFFIX) : lapack/trtri.c
$(CC) -c $(CFLAGS) $< -o $(@F)
qtrtri.$(SUFFIX) qtrtri.$(PSUFFIX) : trtri.c
$(CC) -c $(CFLAGS) $< -o $(@F)
ctrtri.$(SUFFIX) ctrtri.$(PSUFFIX) : ztrtri.c
ctrtri.$(SUFFIX) ctrtri.$(PSUFFIX) : lapack/ztrtri.c
$(CC) -c $(CFLAGS) $< -o $(@F)
ztrtri.$(SUFFIX) ztrtri.$(PSUFFIX) : ztrtri.c
ztrtri.$(SUFFIX) ztrtri.$(PSUFFIX) : lapack/ztrtri.c
$(CC) -c $(CFLAGS) $< -o $(@F)
xtrtri.$(SUFFIX) xtrtri.$(PSUFFIX) : ztrtri.c
$(CC) -c $(CFLAGS) $< -o $(@F)
slaswp.$(SUFFIX) slaswp.$(PSUFFIX) : laswp.c
slaswp.$(SUFFIX) slaswp.$(PSUFFIX) : lapack/laswp.c
$(CC) -c $(CFLAGS) $< -o $(@F)
dlaswp.$(SUFFIX) dlaswp.$(PSUFFIX) : laswp.c
dlaswp.$(SUFFIX) dlaswp.$(PSUFFIX) : lapack/laswp.c
$(CC) -c $(CFLAGS) $< -o $(@F)
qlaswp.$(SUFFIX) qlaswp.$(PSUFFIX) : laswp.c
$(CC) -c $(CFLAGS) $< -o $(@F)
claswp.$(SUFFIX) claswp.$(PSUFFIX) : zlaswp.c
claswp.$(SUFFIX) claswp.$(PSUFFIX) : lapack/zlaswp.c
$(CC) -c $(CFLAGS) $< -o $(@F)
zlaswp.$(SUFFIX) zlaswp.$(PSUFFIX) : zlaswp.c
zlaswp.$(SUFFIX) zlaswp.$(PSUFFIX) : lapack/zlaswp.c
$(CC) -c $(CFLAGS) $< -o $(@F)
xlaswp.$(SUFFIX) xlaswp.$(PSUFFIX) : zlaswp.c
$(CC) -c $(CFLAGS) $< -o $(@F)
sgetrs.$(SUFFIX) sgetrs.$(PSUFFIX) : getrs.c
sgetrs.$(SUFFIX) sgetrs.$(PSUFFIX) : lapack/getrs.c
$(CC) -c $(CFLAGS) $< -o $(@F)
dgetrs.$(SUFFIX) dgetrs.$(PSUFFIX) : getrs.c
dgetrs.$(SUFFIX) dgetrs.$(PSUFFIX) : lapack/getrs.c
$(CC) -c $(CFLAGS) $< -o $(@F)
qgetrs.$(SUFFIX) qgetrs.$(PSUFFIX) : getrs.c
$(CC) -c $(CFLAGS) $< -o $(@F)
cgetrs.$(SUFFIX) cgetrs.$(PSUFFIX) : zgetrs.c
cgetrs.$(SUFFIX) cgetrs.$(PSUFFIX) : lapack/zgetrs.c
$(CC) -c $(CFLAGS) $< -o $(@F)
zgetrs.$(SUFFIX) zgetrs.$(PSUFFIX) : zgetrs.c
zgetrs.$(SUFFIX) zgetrs.$(PSUFFIX) : lapack/zgetrs.c
$(CC) -c $(CFLAGS) $< -o $(@F)
xgetrs.$(SUFFIX) xgetrs.$(PSUFFIX) : zgetrs.c
$(CC) -c $(CFLAGS) $< -o $(@F)
sgesv.$(SUFFIX) sgesv.$(PSUFFIX) : gesv.c
sgesv.$(SUFFIX) sgesv.$(PSUFFIX) : lapack/gesv.c
$(CC) -c $(CFLAGS) $< -o $(@F)
dgesv.$(SUFFIX) dgesv.$(PSUFFIX) : gesv.c
dgesv.$(SUFFIX) dgesv.$(PSUFFIX) : lapack/gesv.c
$(CC) -c $(CFLAGS) $< -o $(@F)
qgesv.$(SUFFIX) qgesv.$(PSUFFIX) : gesv.c
$(CC) -c $(CFLAGS) $< -o $(@F)
cgesv.$(SUFFIX) cgesv.$(PSUFFIX) : gesv.c
cgesv.$(SUFFIX) cgesv.$(PSUFFIX) : lapack/gesv.c
$(CC) -c $(CFLAGS) $< -o $(@F)
zgesv.$(SUFFIX) zgesv.$(PSUFFIX) : gesv.c
zgesv.$(SUFFIX) zgesv.$(PSUFFIX) : lapack/gesv.c
$(CC) -c $(CFLAGS) $< -o $(@F)
xgesv.$(SUFFIX) xgesv.$(PSUFFIX) : gesv.c
$(CC) -c $(CFLAGS) $< -o $(@F)
spotri.$(SUFFIX) spotri.$(PSUFFIX) : potri.c
spotri.$(SUFFIX) spotri.$(PSUFFIX) : lapack/potri.c
$(CC) -c $(CFLAGS) $< -o $(@F)
dpotri.$(SUFFIX) dpotri.$(PSUFFIX) : potri.c
dpotri.$(SUFFIX) dpotri.$(PSUFFIX) : lapack/potri.c
$(CC) -c $(CFLAGS) $< -o $(@F)
qpotri.$(SUFFIX) qpotri.$(PSUFFIX) : potri.c
$(CC) -c $(CFLAGS) $< -o $(@F)
cpotri.$(SUFFIX) cpotri.$(PSUFFIX) : zpotri.c
cpotri.$(SUFFIX) cpotri.$(PSUFFIX) : lapack/zpotri.c
$(CC) -c $(CFLAGS) $< -o $(@F)
zpotri.$(SUFFIX) zpotri.$(PSUFFIX) : zpotri.c
zpotri.$(SUFFIX) zpotri.$(PSUFFIX) : lapack/zpotri.c
$(CC) -c $(CFLAGS) $< -o $(@F)
xpotri.$(SUFFIX) xpotri.$(PSUFFIX) : zpotri.c

View File

@@ -86,7 +86,8 @@ void CNAME(blasint n, FLOAT alpha, FLOAT *x, blasint incx, FLOAT *y, blasint inc
if (incx == 0 || incy == 0)
nthreads = 1;
//Temporarily walk around the low performance issue with small imput size & multithreads.
//Temporarily work-around the low performance issue with small imput size &
//multithreads.
if (n <= 10000)
nthreads = 1;

View File

@@ -149,7 +149,10 @@ int NAME(char *UPLO, blasint *N, FLOAT *a, blasint *ldA, blasint *Info){
blas_memory_free(buffer);
#endif
FUNCTION_PROFILE_END(COMPSIZE * COMPSIZE, args.m * args.n, 2. / 3. * args.m * args.n * args.n);
FUNCTION_PROFILE_END(COMPSIZE * COMPSIZE, .5 * args.n * args.n,
args.n * (1./3. + args.n * ( 1./2. + args.n * 1./6.))
+ args.n * (1./3. + args.n * (-1./2. + args.n * 1./6.)));
IDEBUG_END;

View File

@@ -1,3 +1,38 @@
/***************************************************************************
Copyright (c) 2013, The OpenBLAS Project
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
3. Neither the name of the OpenBLAS project nor the names of
its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
/**************************************************************************************
* 2014/05/02 Saar
* fixed two bugs as reported by Brendan Tracey
* Test with lapack-3.5.0 : OK
*
**************************************************************************************/
#include "common.h"
#ifdef FUNCTION_PROFILE
#include "functable.h"
@@ -7,6 +42,8 @@
#define GAMSQ 16777216.e0
#define RGAMSQ 5.9604645e-8
#define TWO 2.e0
#ifdef DOUBLE
#define ABS(x) fabs(x)
#else
@@ -25,181 +62,174 @@ void CNAME(FLOAT *dd1, FLOAT *dd2, FLOAT *dx1, FLOAT dy1, FLOAT *dparam){
#endif
FLOAT du, dp1, dp2, dq2, dq1, dh11, dh21, dh12, dh22;
int igo, flag;
FLOAT dtemp;
FLOAT du, dp1, dp2, dq2, dq1, dh11, dh21, dh12, dh22, dflag, dtemp;
#ifndef CBLAS
PRINT_DEBUG_NAME;
#else
PRINT_DEBUG_CNAME;
#endif
if(*dd1 < ZERO)
{
dflag = -ONE;
dh11 = ZERO;
dh12 = ZERO;
dh21 = ZERO;
dh22 = ZERO;
dh11 = ZERO;
dh12 = ZERO;
dh21 = ZERO;
dh22 = ZERO;
*dd1 = ZERO;
*dd2 = ZERO;
*dx1 = ZERO;
}
else
{
dp2 = *dd2 * dy1;
if(dp2 == ZERO)
{
dflag = -TWO;
dparam[0] = dflag;
return;
}
dp1 = *dd1 * *dx1;
dq2 = dp2 * dy1;
dq1 = dp1 * *dx1;
if(ABS(dq1) > ABS(dq2))
{
dh21 = - dy1 / *dx1;
dh12 = dp2 / dp1;
if (*dd1 < ZERO) goto L60;
du = ONE - dh12 * dh21;
if(du > ZERO)
{
dflag = ZERO;
*dd1 = *dd1 / du;
*dd2 = *dd2 / du;
*dx1 = *dx1 * du;
dp2 = *dd2 * dy1;
}
}
else
{
if(dq2 < ZERO)
{
dflag = -ONE;
dh11 = ZERO;
dh12 = ZERO;
dh21 = ZERO;
dh22 = ZERO;
if (dp2 == ZERO) {
flag = -2;
goto L260;
}
*dd1 = ZERO;
*dd2 = ZERO;
*dx1 = ZERO;
}
else
{
dflag = ONE;
dp1 = *dd1 * *dx1;
dq2 = dp2 * dy1;
dq1 = dp1 * *dx1;
dh11 = dp1 / dp2;
dh22 = *dx1 / dy1;
du = ONE + dh11 * dh22;
dtemp = *dd2 / du;
if (! (ABS(dq1) > ABS(dq2))) goto L40;
dh21 = -(dy1) / *dx1;
dh12 = dp2 / dp1;
du = ONE - dh12 * dh21;
if (du <= ZERO) goto L60;
flag = 0;
*dd1 /= du;
*dd2 /= du;
*dx1 *= du;
goto L100;
L40:
if (dq2 < ZERO) goto L60;
flag = 1;
dh11 = dp1 / dp2;
dh22 = *dx1 / dy1;
du = ONE + dh11 * dh22;
dtemp = *dd2 / du;
*dd2 = *dd1 / du;
*dd1 = dtemp;
*dx1 = dy1 * du;
goto L100;
L60:
flag = -1;
dh11 = ZERO;
dh12 = ZERO;
dh21 = ZERO;
dh22 = ZERO;
*dd1 = ZERO;
*dd2 = ZERO;
*dx1 = ZERO;
goto L220;
*dd2 = *dd1 / du;
*dd1 = dtemp;
*dx1 = dy1 * du;
}
}
L70:
if (flag < 0) goto L90;
if (flag > 0) goto L80;
dh11 = ONE;
dh22 = ONE;
flag = -1;
goto L90;
if(*dd1 != ZERO)
{
while( (*dd1 <= RGAMSQ) || (*dd1 >= GAMSQ) )
{
if(dflag == ZERO)
{
dh11 = ONE;
dh22 = ONE;
dflag = -ONE;
}
else
{
if(dflag == ONE)
{
dh21 = -ONE;
dh12 = ONE;
dflag = -ONE;
}
}
if( *dd1 <= RGAMSQ )
{
*dd1 = *dd1 * (GAM * GAM);
*dx1 = *dx1 / GAM;
dh11 = dh11 / GAM;
dh12 = dh12 / GAM;
}
else
{
*dd1 = *dd1 / (GAM * GAM);
*dx1 = *dx1 * GAM;
dh11 = dh11 * GAM;
dh12 = dh12 * GAM;
}
}
}
if(*dd2 != ZERO)
{
while( (ABS(*dd2) <= RGAMSQ) || (ABS(*dd2) >= GAMSQ) )
{
if(dflag == ZERO)
{
dh11 = ONE;
dh22 = ONE;
dflag = -ONE;
}
else
{
if(dflag == ONE)
{
dh21 = -ONE;
dh12 = ONE;
dflag = -ONE;
}
}
if( ABS(*dd2) <= RGAMSQ )
{
*dd2 = *dd2 * (GAM * GAM);
dh21 = dh21 / GAM;
dh22 = dh22 / GAM;
}
else
{
*dd2 = *dd2 / (GAM * GAM);
dh21 = dh21 * GAM;
dh22 = dh22 * GAM;
}
}
}
}
L80:
dh21 = -ONE;
dh12 = ONE;
flag = -1;
if(dflag < ZERO)
{
dparam[1] = dh11;
dparam[2] = dh21;
dparam[3] = dh12;
dparam[4] = dh22;
}
else
{
if(dflag == ZERO)
{
dparam[2] = dh21;
dparam[3] = dh12;
}
else
{
dparam[1] = dh11;
dparam[4] = dh22;
}
}
L90:
switch (igo) {
case 0: goto L120;
case 1: goto L150;
case 2: goto L180;
case 3: goto L210;
}
L100:
if (!(*dd1 <= RGAMSQ)) goto L130;
if (*dd1 == ZERO) goto L160;
igo = 0;
goto L70;
L120:
*dd1 *= GAM * GAM;
*dx1 /= GAM;
dh11 /= GAM;
dh12 /= GAM;
goto L100;
L130:
if (! (*dd1 >= GAMSQ)) {
goto L160;
}
igo = 1;
goto L70;
L150:
*dd1 /= GAM * GAM;
*dx1 *= GAM;
dh11 *= GAM;
dh12 *= GAM;
goto L130;
L160:
if (! (ABS(*dd2) <= RGAMSQ)) {
goto L190;
}
if (*dd2 == ZERO) {
goto L220;
}
igo = 2;
goto L70;
L180:
/* Computing 2nd power */
*dd2 *= GAM * GAM;
dh21 /= GAM;
dh22 /= GAM;
goto L160;
L190:
if (! (ABS(*dd2) >= GAMSQ)) {
goto L220;
}
igo = 3;
goto L70;
L210:
/* Computing 2nd power */
*dd2 /= GAM * GAM;
dh21 *= GAM;
dh22 *= GAM;
goto L190;
L220:
if (flag < 0) {
goto L250;
} else if (flag == 0) {
goto L230;
} else {
goto L240;
}
L230:
dparam[2] = dh21;
dparam[3] = dh12;
goto L260;
L240:
dparam[2] = dh11;
dparam[4] = dh22;
goto L260;
L250:
dparam[1] = dh11;
dparam[2] = dh21;
dparam[3] = dh12;
dparam[4] = dh22;
L260:
dparam[0] = (FLOAT) flag;
return;
dparam[0] = dflag;
return;
}

View File

@@ -61,7 +61,7 @@ static int (*sbmv[])(BLASLONG, BLASLONG, FLOAT, FLOAT *, BLASLONG, FLOAT *, BLA
#endif
};
#ifdef SMP
#ifdef SMPBUG
static int (*sbmv_thread[])(BLASLONG, BLASLONG, FLOAT, FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *, int) = {
#ifdef XDOUBLE
qsbmv_thread_U, qsbmv_thread_L,
@@ -90,7 +90,7 @@ void NAME(char *UPLO, blasint *N, blasint *K, FLOAT *ALPHA, FLOAT *a, blasint *
blasint info;
int uplo;
FLOAT *buffer;
#ifdef SMP
#ifdef SMPBUG
int nthreads;
#endif
@@ -130,7 +130,7 @@ void CNAME(enum CBLAS_ORDER order,
FLOAT *buffer;
int uplo;
blasint info;
#ifdef SMP
#ifdef SMPBUG
int nthreads;
#endif
@@ -189,7 +189,7 @@ void CNAME(enum CBLAS_ORDER order,
buffer = (FLOAT *)blas_memory_alloc(1);
#ifdef SMP
#ifdef SMPBUG
nthreads = num_cpu_avail(2);
if (nthreads == 1) {
@@ -197,7 +197,7 @@ void CNAME(enum CBLAS_ORDER order,
(sbmv[uplo])(n, k, alpha, a, lda, x, incx, y, incy, buffer);
#ifdef SMP
#ifdef SMPBUG
} else {
(sbmv_thread[uplo])(n, k, alpha, a, lda, x, incx, y, incy, buffer, nthreads);

View File

@@ -52,8 +52,8 @@ FLOATRET NAME(blasint *N, FLOAT *a, FLOAT *x, blasint *INCX, FLOAT *y, blasint *
FLOATRET ret;
PRINT_DEBUG_NAME;
if (n <= 0) return 0.;
if (n <= 0) return(*a) ;
IDEBUG_START;
@@ -80,7 +80,7 @@ FLOAT CNAME(blasint n, FLOAT alpha, FLOAT *x, blasint incx, FLOAT *y, blasint in
PRINT_DEBUG_CNAME;
if (n <= 0) return 0.;
if (n <= 0) return (alpha);
IDEBUG_START;

View File

@@ -61,7 +61,7 @@ static int (*spmv[])(BLASLONG, FLOAT, FLOAT *, FLOAT *, BLASLONG, FLOAT *, BLAS
#endif
};
#ifdef SMP
#ifdef SMPTEST
static int (*spmv_thread[])(BLASLONG, FLOAT, FLOAT *, FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *, int) = {
#ifdef XDOUBLE
qspmv_thread_U, qspmv_thread_L,
@@ -88,7 +88,7 @@ void NAME(char *UPLO, blasint *N, FLOAT *ALPHA, FLOAT *a,
blasint info;
int uplo;
FLOAT *buffer;
#ifdef SMP
#ifdef SMPTEST
int nthreads;
#endif
@@ -126,7 +126,7 @@ void CNAME(enum CBLAS_ORDER order,
FLOAT *buffer;
int uplo;
blasint info;
#ifdef SMP
#ifdef SMPTEST
int nthreads;
#endif
@@ -181,7 +181,7 @@ void CNAME(enum CBLAS_ORDER order,
buffer = (FLOAT *)blas_memory_alloc(1);
#ifdef SMP
#ifdef SMPTEST
nthreads = num_cpu_avail(2);
if (nthreads == 1) {
@@ -189,7 +189,7 @@ void CNAME(enum CBLAS_ORDER order,
(spmv[uplo])(n, alpha, a, x, incx, y, incy, buffer);
#ifdef SMP
#ifdef SMPTEST
} else {
(spmv_thread[uplo])(n, alpha, a, x, incx, y, incy, buffer, nthreads);

View File

@@ -145,10 +145,21 @@ void NAME(char *UPLO, char *TRANS,
if (uplo_arg == 'U') uplo = 0;
if (uplo_arg == 'L') uplo = 1;
#ifndef COMPLEX
if (trans_arg == 'N') trans = 0;
if (trans_arg == 'T') trans = 1;
if (trans_arg == 'R') trans = 0;
if (trans_arg == 'C') trans = 1;
#else
#ifdef HEMM
if (trans_arg == 'N') trans = 0;
if (trans_arg == 'C') trans = 1;
#else
if (trans_arg == 'N') trans = 0;
if (trans_arg == 'T') trans = 1;
#endif
#endif
nrowa = args.n;
if (trans & 1) nrowa = args.k;

View File

@@ -148,10 +148,21 @@ void NAME(char *UPLO, char *TRANS,
if (uplo_arg == 'U') uplo = 0;
if (uplo_arg == 'L') uplo = 1;
#ifndef COMPLEX
if (trans_arg == 'N') trans = 0;
if (trans_arg == 'T') trans = 1;
if (trans_arg == 'R') trans = 0;
if (trans_arg == 'C') trans = 1;
#else
#ifdef HEMM
if (trans_arg == 'N') trans = 0;
if (trans_arg == 'C') trans = 1;
#else
if (trans_arg == 'N') trans = 0;
if (trans_arg == 'T') trans = 1;
#endif
#endif
nrowa = args.n;
if (trans & 1) nrowa = args.k;

View File

@@ -61,7 +61,7 @@ static int (*hbmv[])(BLASLONG, BLASLONG, FLOAT, FLOAT, FLOAT *, BLASLONG, FLOAT
#endif
};
#ifdef SMP
#ifdef SMPBUG
static int (*hbmv_thread[])(BLASLONG, BLASLONG, FLOAT *, FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *, int) = {
#ifdef XDOUBLE
xhbmv_thread_U, xhbmv_thread_L, xhbmv_thread_V, xhbmv_thread_M,
@@ -92,7 +92,7 @@ void NAME(char *UPLO, blasint *N, blasint *K, FLOAT *ALPHA, FLOAT *a, blasint *
blasint info;
int uplo;
FLOAT *buffer;
#ifdef SMP
#ifdef SMPBUG
int nthreads;
#endif
@@ -138,7 +138,7 @@ void CNAME(enum CBLAS_ORDER order,
FLOAT *buffer;
int uplo;
blasint info;
#ifdef SMP
#ifdef SMPBUG
int nthreads;
#endif
@@ -197,7 +197,7 @@ void CNAME(enum CBLAS_ORDER order,
buffer = (FLOAT *)blas_memory_alloc(1);
#ifdef SMP
#ifdef SMPBUG
nthreads = num_cpu_avail(2);
if (nthreads == 1) {
@@ -205,7 +205,7 @@ void CNAME(enum CBLAS_ORDER order,
(hbmv[uplo])(n, k, alpha_r, alpha_i, a, lda, x, incx, y, incy, buffer);
#ifdef SMP
#ifdef SMPBUG
} else {
(hbmv_thread[uplo])(n, k, ALPHA, a, lda, x, incx, y, incy, buffer, nthreads);

View File

@@ -61,7 +61,7 @@ static int (*sbmv[])(BLASLONG, BLASLONG, FLOAT, FLOAT, FLOAT *, BLASLONG, FLOAT
#endif
};
#ifdef SMP
#ifdef SMPBUG
static int (*sbmv_thread[])(BLASLONG, BLASLONG, FLOAT *, FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *, int) = {
#ifdef XDOUBLE
xsbmv_thread_U, xsbmv_thread_L,
@@ -90,7 +90,7 @@ void NAME(char *UPLO, blasint *N, blasint *K, FLOAT *ALPHA, FLOAT *a, blasint *
blasint info;
int uplo;
FLOAT *buffer;
#ifdef SMP
#ifdef SMPBUG
int nthreads;
#endif
@@ -131,7 +131,7 @@ void NAME(char *UPLO, blasint *N, blasint *K, FLOAT *ALPHA, FLOAT *a, blasint *
buffer = (FLOAT *)blas_memory_alloc(1);
#ifdef SMP
#ifdef SMPBUG
nthreads = num_cpu_avail(2);
if (nthreads == 1) {
@@ -139,7 +139,7 @@ void NAME(char *UPLO, blasint *N, blasint *K, FLOAT *ALPHA, FLOAT *a, blasint *
(sbmv[uplo])(n, k, alpha_r, alpha_i, a, lda, b, incx, c, incy, buffer);
#ifdef SMP
#ifdef SMPBUG
} else {
(sbmv_thread[uplo])(n, k, ALPHA, a, lda, b, incx, c, incy, buffer, nthreads);

View File

@@ -61,7 +61,7 @@ static int (*spmv[])(BLASLONG, FLOAT, FLOAT, FLOAT *, FLOAT *, BLASLONG, FLOAT
#endif
};
#ifdef SMP
#ifdef SMPTEST
static int (*spmv_thread[])(BLASLONG, FLOAT *, FLOAT *, FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *, int) = {
#ifdef XDOUBLE
xspmv_thread_U, xspmv_thread_L,
@@ -88,7 +88,7 @@ void NAME(char *UPLO, blasint *N, FLOAT *ALPHA, FLOAT *a,
blasint info;
int uplo;
FLOAT *buffer;
#ifdef SMP
#ifdef SMPTEST
int nthreads;
#endif
@@ -127,7 +127,7 @@ void NAME(char *UPLO, blasint *N, FLOAT *ALPHA, FLOAT *a,
buffer = (FLOAT *)blas_memory_alloc(1);
#ifdef SMP
#ifdef SMPTEST
nthreads = num_cpu_avail(2);
if (nthreads == 1) {
@@ -135,7 +135,7 @@ void NAME(char *UPLO, blasint *N, FLOAT *ALPHA, FLOAT *a,
(spmv[uplo])(n, alpha_r, alpha_i, a, b, incx, c, incy, buffer);
#ifdef SMP
#ifdef SMPTEST
} else {

View File

@@ -674,7 +674,7 @@ $(KDIR)sdot_k$(TSUFFIX).$(SUFFIX) $(KDIR)sdot_k$(TPSUFFIX).$(PSUFFIX) : $(KERNEL
$(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE $< -o $@
$(KDIR)sdsdot_k$(TSUFFIX).$(SUFFIX) $(KDIR)sdsdot_k$(TPSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(SDOTKERNEL)
$(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE $< -o $@
$(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -DDSDOT $< -o $@
$(KDIR)zdotu_k$(TSUFFIX).$(SUFFIX) $(KDIR)zdotu_k$(TPSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(ZDOTKERNEL)
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -UCONJ $< -o $@

134
kernel/arm/KERNEL.ARMV5 Normal file
View File

@@ -0,0 +1,134 @@
SAMAXKERNEL = ../arm/amax.c
DAMAXKERNEL = ../arm/amax.c
CAMAXKERNEL = ../arm/zamax.c
ZAMAXKERNEL = ../arm/zamax.c
SAMINKERNEL = ../arm/amin.c
DAMINKERNEL = ../arm/amin.c
CAMINKERNEL = ../arm/zamin.c
ZAMINKERNEL = ../arm/zamin.c
SMAXKERNEL = ../arm/max.c
DMAXKERNEL = ../arm/max.c
SMINKERNEL = ../arm/min.c
DMINKERNEL = ../arm/min.c
ISAMAXKERNEL = ../arm/iamax.c
IDAMAXKERNEL = ../arm/iamax.c
ICAMAXKERNEL = ../arm/izamax.c
IZAMAXKERNEL = ../arm/izamax.c
ISAMINKERNEL = ../arm/iamin.c
IDAMINKERNEL = ../arm/iamin.c
ICAMINKERNEL = ../arm/izamin.c
IZAMINKERNEL = ../arm/izamin.c
ISMAXKERNEL = ../arm/imax.c
IDMAXKERNEL = ../arm/imax.c
ISMINKERNEL = ../arm/imin.c
IDMINKERNEL = ../arm/imin.c
SASUMKERNEL = ../arm/asum.c
DASUMKERNEL = ../arm/asum.c
CASUMKERNEL = ../arm/zasum.c
ZASUMKERNEL = ../arm/zasum.c
SAXPYKERNEL = ../arm/axpy.c
DAXPYKERNEL = ../arm/axpy.c
CAXPYKERNEL = ../arm/zaxpy.c
ZAXPYKERNEL = ../arm/zaxpy.c
SCOPYKERNEL = ../arm/copy.c
DCOPYKERNEL = ../arm/copy.c
CCOPYKERNEL = ../arm/zcopy.c
ZCOPYKERNEL = ../arm/zcopy.c
SDOTKERNEL = ../arm/dot.c
DDOTKERNEL = ../arm/dot.c
CDOTKERNEL = ../arm/zdot.c
ZDOTKERNEL = ../arm/zdot.c
SNRM2KERNEL = ../arm/nrm2.c
DNRM2KERNEL = ../arm/nrm2.c
CNRM2KERNEL = ../arm/znrm2.c
ZNRM2KERNEL = ../arm/znrm2.c
SROTKERNEL = ../arm/rot.c
DROTKERNEL = ../arm/rot.c
CROTKERNEL = ../arm/zrot.c
ZROTKERNEL = ../arm/zrot.c
SSCALKERNEL = ../arm/scal.c
DSCALKERNEL = ../arm/scal.c
CSCALKERNEL = ../arm/zscal.c
ZSCALKERNEL = ../arm/zscal.c
SSWAPKERNEL = ../arm/swap.c
DSWAPKERNEL = ../arm/swap.c
CSWAPKERNEL = ../arm/zswap.c
ZSWAPKERNEL = ../arm/zswap.c
SGEMVNKERNEL = ../arm/gemv_n.c
DGEMVNKERNEL = ../arm/gemv_n.c
CGEMVNKERNEL = ../arm/zgemv_n.c
ZGEMVNKERNEL = ../arm/zgemv_n.c
SGEMVTKERNEL = ../arm/gemv_t.c
DGEMVTKERNEL = ../arm/gemv_t.c
CGEMVTKERNEL = ../arm/zgemv_t.c
ZGEMVTKERNEL = ../arm/zgemv_t.c
STRMMKERNEL = ../generic/trmmkernel_2x2.c
DTRMMKERNEL = ../generic/trmmkernel_2x2.c
CTRMMKERNEL = ../generic/ztrmmkernel_2x2.c
ZTRMMKERNEL = ../generic/ztrmmkernel_2x2.c
SGEMMKERNEL = ../generic/gemmkernel_2x2.c
SGEMMONCOPY = ../generic/gemm_ncopy_2.c
SGEMMOTCOPY = ../generic/gemm_tcopy_2.c
SGEMMONCOPYOBJ = sgemm_oncopy.o
SGEMMOTCOPYOBJ = sgemm_otcopy.o
DGEMMKERNEL = ../generic/gemmkernel_2x2.c
DGEMMONCOPY = ../generic/gemm_ncopy_2.c
DGEMMOTCOPY = ../generic/gemm_tcopy_2.c
DGEMMONCOPYOBJ = dgemm_oncopy.o
DGEMMOTCOPYOBJ = dgemm_otcopy.o
CGEMMKERNEL = ../generic/zgemmkernel_2x2.c
CGEMMONCOPY = ../generic/zgemm_ncopy_2.c
CGEMMOTCOPY = ../generic/zgemm_tcopy_2.c
CGEMMONCOPYOBJ = cgemm_oncopy.o
CGEMMOTCOPYOBJ = cgemm_otcopy.o
ZGEMMKERNEL = ../generic/zgemmkernel_2x2.c
ZGEMMONCOPY = ../generic/zgemm_ncopy_2.c
ZGEMMOTCOPY = ../generic/zgemm_tcopy_2.c
ZGEMMONCOPYOBJ = zgemm_oncopy.o
ZGEMMOTCOPYOBJ = zgemm_otcopy.o
STRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
STRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
STRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
STRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
DTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
DTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
DTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
DTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
CTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
CTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
CTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
CTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
ZTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
ZTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
ZTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
ZTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c

View File

@@ -1,3 +1,27 @@
SGEMVNKERNEL = ../arm/gemv_n.c
SGEMVTKERNEL = ../arm/gemv_t.c
DGEMVNKERNEL = ../arm/gemv_n.c
DGEMVTKERNEL = ../arm/gemv_t.c
CTRMMKERNEL = ../generic/ztrmmkernel_2x2.c
CGEMMKERNEL = ../generic/zgemmkernel_2x2.c
CGEMMONCOPY = ../generic/zgemm_ncopy_2.c
CGEMMOTCOPY = ../generic/zgemm_tcopy_2.c
#ZTRMMKERNEL = ../generic/ztrmmkernel_2x2.c
#ZGEMMKERNEL = ../generic/zgemmkernel_2x2.c
#ZGEMMONCOPY = ../generic/zgemm_ncopy_2.c
#ZGEMMOTCOPY = ../generic/zgemm_tcopy_2.c
#STRMMKERNEL = ../generic/trmmkernel_2x2.c
#SGEMMKERNEL = ../generic/gemmkernel_2x2.c
#SGEMMONCOPY = ../generic/gemm_ncopy_2.c
#SGEMMOTCOPY = ../generic/gemm_tcopy_2.c
###############################################################################
SAMAXKERNEL = iamax_vfp.S
DAMAXKERNEL = iamax_vfp.S
CAMAXKERNEL = iamax_vfp.S
@@ -40,10 +64,10 @@ DAXPYKERNEL = axpy_vfp.S
CAXPYKERNEL = axpy_vfp.S
ZAXPYKERNEL = axpy_vfp.S
SCOPYKERNEL = scopy_vfp.S
DCOPYKERNEL = dcopy_vfp.S
CCOPYKERNEL = ccopy_vfp.S
ZCOPYKERNEL = zcopy_vfp.S
SCOPYKERNEL = copy.c
DCOPYKERNEL = copy.c
CCOPYKERNEL = zcopy.c
ZCOPYKERNEL = zcopy.c
SDOTKERNEL = sdot_vfp.S
DDOTKERNEL = ddot_vfp.S
@@ -60,29 +84,29 @@ DROTKERNEL = rot_vfp.S
CROTKERNEL = rot_vfp.S
ZROTKERNEL = rot_vfp.S
SSCALKERNEL = scal_vfp.S
DSCALKERNEL = scal_vfp.S
CSCALKERNEL = scal_vfp.S
ZSCALKERNEL = scal_vfp.S
SSCALKERNEL = scal.c
DSCALKERNEL = scal.c
CSCALKERNEL = zscal.c
ZSCALKERNEL = zscal.c
SSWAPKERNEL = swap_vfp.S
DSWAPKERNEL = swap_vfp.S
CSWAPKERNEL = swap_vfp.S
ZSWAPKERNEL = swap_vfp.S
SGEMVNKERNEL = gemv_n_vfp.S
DGEMVNKERNEL = gemv_n_vfp.S
# BAD SGEMVNKERNEL = gemv_n_vfp.S
# BAD DGEMVNKERNEL = gemv_n_vfp.S
CGEMVNKERNEL = cgemv_n_vfp.S
ZGEMVNKERNEL = zgemv_n_vfp.S
SGEMVTKERNEL = gemv_t_vfp.S
DGEMVTKERNEL = gemv_t_vfp.S
# BAD SGEMVTKERNEL = gemv_t_vfp.S
# BAD DGEMVTKERNEL = gemv_t_vfp.S
CGEMVTKERNEL = cgemv_t_vfp.S
ZGEMVTKERNEL = zgemv_t_vfp.S
STRMMKERNEL = strmm_kernel_4x2_vfp.S
DTRMMKERNEL = dtrmm_kernel_4x2_vfp.S
CTRMMKERNEL = ctrmm_kernel_2x2_vfp.S
#CTRMMKERNEL = ctrmm_kernel_2x2_vfp.S
ZTRMMKERNEL = ztrmm_kernel_2x2_vfp.S
SGEMMKERNEL = sgemm_kernel_4x2_vfp.S
@@ -105,9 +129,9 @@ DGEMMOTCOPY = ../generic/gemm_tcopy_2.c
DGEMMONCOPYOBJ = dgemm_oncopy.o
DGEMMOTCOPYOBJ = dgemm_otcopy.o
CGEMMKERNEL = cgemm_kernel_2x2_vfp.S
CGEMMONCOPY = cgemm_ncopy_2_vfp.S
CGEMMOTCOPY = cgemm_tcopy_2_vfp.S
#CGEMMKERNEL = cgemm_kernel_2x2_vfp.S
#CGEMMONCOPY = cgemm_ncopy_2_vfp.S
#CGEMMOTCOPY = cgemm_tcopy_2_vfp.S
CGEMMONCOPYOBJ = cgemm_oncopy.o
CGEMMOTCOPYOBJ = cgemm_otcopy.o

View File

@@ -1,3 +1,8 @@
SGEMVNKERNEL = ../arm/gemv_n.c
SGEMVTKERNEL = ../arm/gemv_t.c
#################################################################################
SAMAXKERNEL = iamax_vfp.S
DAMAXKERNEL = iamax_vfp.S
CAMAXKERNEL = iamax_vfp.S
@@ -45,10 +50,10 @@ DAXPYKERNEL = axpy_vfp.S
CAXPYKERNEL = axpy_vfp.S
ZAXPYKERNEL = axpy_vfp.S
SCOPYKERNEL = scopy_vfp.S
DCOPYKERNEL = dcopy_vfp.S
CCOPYKERNEL = ccopy_vfp.S
ZCOPYKERNEL = zcopy_vfp.S
SCOPYKERNEL = copy.c
DCOPYKERNEL = copy.c
CCOPYKERNEL = zcopy.c
ZCOPYKERNEL = zcopy.c
SDOTKERNEL = sdot_vfp.S
DDOTKERNEL = ddot_vfp.S
@@ -65,17 +70,17 @@ DROTKERNEL = rot_vfp.S
CROTKERNEL = rot_vfp.S
ZROTKERNEL = rot_vfp.S
SSCALKERNEL = scal_vfp.S
DSCALKERNEL = scal_vfp.S
CSCALKERNEL = scal_vfp.S
ZSCALKERNEL = scal_vfp.S
SSCALKERNEL = scal.c
DSCALKERNEL = scal.c
CSCALKERNEL = zscal.c
ZSCALKERNEL = zscal.c
SGEMVNKERNEL = gemv_n_vfp.S
# BAD SGEMVNKERNEL = gemv_n_vfp.S
DGEMVNKERNEL = gemv_n_vfp.S
CGEMVNKERNEL = cgemv_n_vfp.S
ZGEMVNKERNEL = zgemv_n_vfp.S
SGEMVTKERNEL = gemv_t_vfp.S
# BAD SGEMVTKERNEL = gemv_t_vfp.S
DGEMVTKERNEL = gemv_t_vfp.S
CGEMVTKERNEL = cgemv_t_vfp.S
ZGEMVTKERNEL = zgemv_t_vfp.S

View File

@@ -38,20 +38,21 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT *dummy, BLASLONG dummy2)
{
BLASLONG i=0;
BLASLONG i=0,j=0;
if ( n < 0 || inc_x < 1 ) return(0);
if ( da == 1.0 ) return(0);
n *= inc_x;
while(i < n)
while(j < n)
{
x[i] = da * x[i] ;
if ( da == 0.0 )
x[i]=0.0;
else
x[i] = da * x[i] ;
i += inc_x ;
j++;
}
return(0);
return;
}

View File

@@ -43,19 +43,39 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da_r,FLOAT da_i, F
BLASLONG ip = 0;
FLOAT temp;
if ( n < 0 || inc_x < 1 ) return(0);
inc_x2 = 2 * inc_x;
for ( i=0; i<n; i++ )
{
temp = da_r * x[ip] - da_i * x[ip+1] ;
x[ip+1] = da_r * x[ip+1] + da_i * x[ip] ;
if ( da_r == 0.0 )
{
if ( da_i == 0.0 )
{
temp = 0.0;
x[ip+1] = 0.0 ;
}
else
{
temp = - da_i * x[ip+1] ;
x[ip+1] = da_i * x[ip] ;
}
}
else
{
if ( da_i == 0.0 )
{
temp = da_r * x[ip] ;
x[ip+1] = da_r * x[ip+1];
}
else
{
temp = da_r * x[ip] - da_i * x[ip+1] ;
x[ip+1] = da_r * x[ip+1] + da_i * x[ip] ;
}
}
x[ip] = temp;
ip += inc_x2;
}
return(0);

View File

@@ -59,7 +59,7 @@ CASUMKERNEL = zasum_sse.S
endif
ifndef SDOTKERNEL
SDOTKERNEL = dot_sse.S
SDOTKERNEL = ../arm/dot.c
endif
ifndef CDOTKERNEL
@@ -107,11 +107,11 @@ CSWAPKERNEL = zswap_sse.S
endif
ifndef SGEMVNKERNEL
SGEMVNKERNEL = gemv_n_sse.S
SGEMVNKERNEL = ../arm/gemv_n.c
endif
ifndef SGEMVTKERNEL
SGEMVTKERNEL = gemv_t_sse.S
SGEMVTKERNEL = ../arm/gemv_t.c
endif
ifndef CGEMVNKERNEL

View File

@@ -119,7 +119,7 @@ XCOPYKERNEL = zcopy.S
endif
ifndef SDOTKERNEL
SDOTKERNEL = dot_sse.S
SDOTKERNEL = ../arm/dot.c
endif
ifndef DDOTKERNEL
@@ -369,11 +369,11 @@ endif
GEMVDEP = ../l2param.h
ifndef SGEMVNKERNEL
SGEMVNKERNEL = sgemv_n.S
SGEMVNKERNEL = ../arm/gemv_n.c
endif
ifndef SGEMVTKERNEL
SGEMVTKERNEL = sgemv_t.S
SGEMVTKERNEL = ../arm/gemv_t.c
endif
ifndef DGEMVNKERNEL

View File

@@ -7,15 +7,19 @@ SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX)
SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX)
SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX)
SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX)
DGEMMKERNEL = gemm_kernel_2x8_nehalem.S
DGEMMINCOPY = dgemm_ncopy_2.S
DGEMMITCOPY = dgemm_tcopy_2.S
DGEMMONCOPY = ../generic/gemm_ncopy_8.c
DGEMMOTCOPY = dgemm_tcopy_8.S
DGEMMINCOPYOBJ = dgemm_incopy$(TSUFFIX).$(SUFFIX)
DGEMMITCOPYOBJ = dgemm_itcopy$(TSUFFIX).$(SUFFIX)
DGEMMKERNEL = gemm_kernel_4x4_core2.S
DGEMMINCOPY =
DGEMMITCOPY =
DGEMMONCOPY = gemm_ncopy_4.S
DGEMMOTCOPY = gemm_tcopy_4.S
DGEMMINCOPYOBJ =
DGEMMITCOPYOBJ =
DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX)
DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX)
CGEMMKERNEL = zgemm_kernel_2x4_nehalem.S
CGEMMINCOPY = zgemm_ncopy_2.S
CGEMMITCOPY = zgemm_tcopy_2.S
@@ -40,10 +44,11 @@ STRSMKERNEL_LT = trsm_kernel_LT_4x8_nehalem.S
STRSMKERNEL_RN = trsm_kernel_LT_4x8_nehalem.S
STRSMKERNEL_RT = trsm_kernel_RT_4x8_nehalem.S
DTRSMKERNEL_LN = trsm_kernel_LN_2x8_nehalem.S
DTRSMKERNEL_LT = trsm_kernel_LT_2x8_nehalem.S
DTRSMKERNEL_RN = trsm_kernel_LT_2x8_nehalem.S
DTRSMKERNEL_RT = trsm_kernel_RT_2x8_nehalem.S
DTRSMKERNEL_LN = trsm_kernel_LN_4x4_core2.S
DTRSMKERNEL_LT = trsm_kernel_LT_4x4_core2.S
DTRSMKERNEL_RN = trsm_kernel_LT_4x4_core2.S
DTRSMKERNEL_RT = trsm_kernel_RT_4x4_core2.S
CTRSMKERNEL_LN = ztrsm_kernel_LN_2x4_nehalem.S
CTRSMKERNEL_LT = ztrsm_kernel_LT_2x4_nehalem.S

View File

@@ -1,34 +1,35 @@
SGEMMKERNEL = sgemm_kernel_8x8_sandy.S
SGEMMINCOPY =
SGEMMITCOPY =
SGEMMKERNEL = gemm_kernel_4x8_nehalem.S
SGEMMINCOPY = gemm_ncopy_4.S
SGEMMITCOPY = gemm_tcopy_4.S
SGEMMONCOPY = ../generic/gemm_ncopy_8.c
SGEMMOTCOPY = ../generic/gemm_tcopy_8.c
SGEMMINCOPYOBJ =
SGEMMITCOPYOBJ =
SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX)
SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX)
SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX)
SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX)
DGEMMKERNEL = dgemm_kernel_4x8_sandy.S
DGEMMINCOPY = ../generic/gemm_ncopy_8.c
DGEMMITCOPY = ../generic/gemm_tcopy_8.c
#DGEMMONCOPY = gemm_ncopy_4.S
DGEMMONCOPY = ../generic/gemm_ncopy_4.c
DGEMMOTCOPY = ../generic/gemm_tcopy_4.c
#DGEMMOTCOPY = gemm_tcopy_4.S
DGEMMINCOPYOBJ = dgemm_incopy$(TSUFFIX).$(SUFFIX)
DGEMMITCOPYOBJ = dgemm_itcopy$(TSUFFIX).$(SUFFIX)
DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX)
DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX)
#CGEMMKERNEL = zgemm_kernel_2x4_nehalem.S
CGEMMKERNEL = cgemm_kernel_4x8_sandy.S
CGEMMINCOPY = ../generic/zgemm_ncopy_8_sandy.c
CGEMMITCOPY = ../generic/zgemm_tcopy_8_sandy.c
CGEMMONCOPY = ../generic/zgemm_ncopy_4_sandy.c
CGEMMOTCOPY = ../generic/zgemm_tcopy_4_sandy.c
CGEMMKERNEL = zgemm_kernel_2x4_nehalem.S
CGEMMINCOPY = zgemm_ncopy_2.S
CGEMMITCOPY = zgemm_tcopy_2.S
CGEMMONCOPY = ../generic/zgemm_ncopy_4.c
CGEMMOTCOPY = ../generic/zgemm_tcopy_4.c
CGEMMINCOPYOBJ = cgemm_incopy$(TSUFFIX).$(SUFFIX)
CGEMMITCOPYOBJ = cgemm_itcopy$(TSUFFIX).$(SUFFIX)
CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX)
CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX)
#ZGEMMKERNEL = zgemm_kernel_1x4_nehalem.S
ZGEMMKERNEL = zgemm_kernel_4x4_sandy.S
ZGEMMINCOPY =
ZGEMMITCOPY =
@@ -58,6 +59,7 @@ ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX)
#ZTRSMKERNEL_LT = ztrsm_kernel_LT_1x4_nehalem.S
#ZTRSMKERNEL_RN = ztrsm_kernel_LT_1x4_nehalem.S
#ZTRSMKERNEL_RT = ztrsm_kernel_RT_1x4_nehalem.S
STRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
STRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
STRSMKERNEL_RN = ../generic/trsm_kernel_RN.c

File diff suppressed because it is too large Load Diff

View File

@@ -192,8 +192,8 @@
VFMADD231PD_ %ymm13,%ymm1,%ymm0
VFMADD231PD_ %ymm14,%ymm2,%ymm0
VFMADD231PD_ %ymm15,%ymm3,%ymm0
addq $3*SIZE , BO
addq $16*SIZE, AO
addq $ 3*SIZE , BO
addq $ 16*SIZE, AO
.endm
@@ -212,8 +212,8 @@
VFMADD231PD_ %ymm8,%ymm2,%ymm0
VFMADD231PD_ %ymm9,%ymm3,%ymm0
prefetcht0 B_PR1(BO)
addq $3*SIZE , BO
addq $8*SIZE, AO
addq $ 3*SIZE , BO
addq $ 8*SIZE, AO
.endm
.macro KERNEL4x3_SUBN
@@ -224,8 +224,8 @@
VFMADD231PD_ %ymm5,%ymm2,%ymm0
vbroadcastsd -10 * SIZE(BO), %ymm3
VFMADD231PD_ %ymm6,%ymm3,%ymm0
addq $3*SIZE , BO
addq $4*SIZE, AO
addq $ 3*SIZE , BO
addq $ 4*SIZE, AO
.endm
.macro KERNEL2x3_SUBN
@@ -240,8 +240,8 @@
VFMADD231SD_ %xmm8,%xmm1,%xmm0
VFMADD231SD_ %xmm10,%xmm2,%xmm0
VFMADD231SD_ %xmm12,%xmm3,%xmm0
addq $3*SIZE , BO
addq $2*SIZE, AO
addq $ 3*SIZE , BO
addq $ 2*SIZE, AO
.endm
.macro KERNEL1x3_SUBN
@@ -252,8 +252,8 @@
VFMADD231SD_ %xmm5,%xmm2,%xmm0
vmovsd -10 * SIZE(BO), %xmm3
VFMADD231SD_ %xmm6,%xmm3,%xmm0
addq $3*SIZE , BO
addq $1*SIZE, AO
addq $ 3*SIZE , BO
addq $ 1*SIZE, AO
.endm
@@ -1602,16 +1602,16 @@
vmovsd 1 * SIZE(BO, BI, SIZE), %xmm1
vmovsd -29 * SIZE(AO, %rax, SIZE), %xmm0
VFMADD231SD_ %xmm4,%xmm1,%xmm0
addq $4, BI
addq $4, %rax
addq $ 4, BI
addq $ 4, %rax
.endm
.macro KERNEL1x1_SUB
vmovsd -2 * SIZE(BO, BI, SIZE), %xmm1
vmovsd -32 * SIZE(AO, %rax, SIZE), %xmm0
VFMADD231SD_ %xmm4,%xmm1,%xmm0
addq $1, BI
addq $1 , %rax
addq $ 1, BI
addq $ 1 , %rax
.endm
.macro SAVE1x1
@@ -1749,9 +1749,9 @@
vmovsd %xmm5, 8*SIZE(BO)
vmovups %xmm6, 9*SIZE(BO)
vmovsd %xmm7,11*SIZE(BO)
addq $8*SIZE,BO1
addq $8*SIZE,BO2
addq $12*SIZE,BO
addq $ 8*SIZE,BO1
addq $ 8*SIZE,BO2
addq $ 12*SIZE,BO
vmovups 0 * SIZE(BO1), %xmm0
vmovups 2 * SIZE(BO1), %xmm2
@@ -1769,9 +1769,9 @@
vmovsd %xmm5, 8*SIZE(BO)
vmovups %xmm6, 9*SIZE(BO)
vmovsd %xmm7,11*SIZE(BO)
addq $8*SIZE,BO1
addq $8*SIZE,BO2
addq $12*SIZE,BO
addq $ 8*SIZE,BO1
addq $ 8*SIZE,BO2
addq $ 12*SIZE,BO
decq %rax
jnz .L6_01a_1
@@ -1792,9 +1792,9 @@
vmovsd 0 * SIZE(BO2), %xmm2
vmovups %xmm0, 0*SIZE(BO)
vmovsd %xmm2, 2*SIZE(BO)
addq $2*SIZE,BO1
addq $2*SIZE,BO2
addq $3*SIZE,BO
addq $ 2*SIZE,BO1
addq $ 2*SIZE,BO2
addq $ 3*SIZE,BO
decq %rax
jnz .L6_02b

View File

@@ -80,10 +80,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#ifndef WINDOWS_ABI
#define STACKSIZE 96
#define L_BUFFER_SIZE 256*8*12+4096
#else
#define STACKSIZE 256
#define L_BUFFER_SIZE 128*8*12+4096
#define OLD_A 40 + STACKSIZE(%rsp)
#define OLD_B 48 + STACKSIZE(%rsp)
@@ -93,7 +95,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#endif
#define L_BUFFER_SIZE 512*8*12
#define Ndiv12 24(%rsp)
#define Nmod12 32(%rsp)
@@ -107,22 +108,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#if defined(OS_WINDOWS)
#if L_BUFFER_SIZE > 16384
#define STACK_TOUCH \
movl $0, 4096 * 4(%rsp);\
movl $0, 4096 * 3(%rsp);\
movl $0, 4096 * 2(%rsp);\
movl $0, 4096 * 1(%rsp);
movl $ 0, 4096 * 4(%rsp);\
movl $ 0, 4096 * 3(%rsp);\
movl $ 0, 4096 * 2(%rsp);\
movl $ 0, 4096 * 1(%rsp);
#elif L_BUFFER_SIZE > 12288
#define STACK_TOUCH \
movl $0, 4096 * 3(%rsp);\
movl $0, 4096 * 2(%rsp);\
movl $0, 4096 * 1(%rsp);
movl $ 0, 4096 * 3(%rsp);\
movl $ 0, 4096 * 2(%rsp);\
movl $ 0, 4096 * 1(%rsp);
#elif L_BUFFER_SIZE > 8192
#define STACK_TOUCH \
movl $0, 4096 * 2(%rsp);\
movl $0, 4096 * 1(%rsp);
movl $ 0, 4096 * 2(%rsp);\
movl $ 0, 4096 * 1(%rsp);
#elif L_BUFFER_SIZE > 4096
#define STACK_TOUCH \
movl $0, 4096 * 1(%rsp);
movl $ 0, 4096 * 1(%rsp);
#else
#define STACK_TOUCH
#endif
@@ -168,17 +169,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
vmulpd %ymm0 ,%ymm2 , %ymm8
vmulpd %ymm0 ,%ymm3 , %ymm12
prefetcht0 B_PR1+256(BO)
vpermpd $0xb1, %ymm0 , %ymm0
vpermpd $ 0xb1, %ymm0 , %ymm0
vmulpd %ymm0 ,%ymm1 , %ymm5
vmulpd %ymm0 ,%ymm2 , %ymm9
vmulpd %ymm0 ,%ymm3 , %ymm13
vpermpd $0x1b, %ymm0 , %ymm0
vpermpd $ 0x1b, %ymm0 , %ymm0
vmulpd %ymm0 ,%ymm1 , %ymm6
vmulpd %ymm0 ,%ymm2 , %ymm10
addq $12*SIZE, BO
addq $ 12*SIZE, BO
vmulpd %ymm0 ,%ymm3 , %ymm14
vpermpd $0xb1, %ymm0 , %ymm0
vpermpd $ 0xb1, %ymm0 , %ymm0
vmulpd %ymm0 ,%ymm1 , %ymm7
vmovups -12 * SIZE(BO), %ymm1
vmulpd %ymm0 ,%ymm2 , %ymm11
@@ -197,16 +198,16 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
vfmadd231pd %ymm0 ,%ymm2 , %ymm8
prefetcht0 B_PR1+128(BO)
vfmadd231pd %ymm0 ,%ymm3 , %ymm12
vpermpd $0xb1, %ymm0 , %ymm0
vpermpd $ 0xb1, %ymm0 , %ymm0
vfmadd231pd %ymm0 ,%ymm1 , %ymm5
vfmadd231pd %ymm0 ,%ymm2 , %ymm9
vfmadd231pd %ymm0 ,%ymm3 , %ymm13
vpermpd $0x1b, %ymm0 , %ymm0
vpermpd $ 0x1b, %ymm0 , %ymm0
vfmadd231pd %ymm0 ,%ymm1 , %ymm6
vfmadd231pd %ymm0 ,%ymm2 , %ymm10
vfmadd231pd %ymm0 ,%ymm3 , %ymm14
vpermpd $0xb1, %ymm0 , %ymm0
vpermpd $ 0xb1, %ymm0 , %ymm0
vfmadd231pd %ymm0 ,%ymm1 , %ymm7
vmovups -12 * SIZE(BO), %ymm1
vfmadd231pd %ymm0 ,%ymm2 , %ymm11
@@ -221,24 +222,24 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
vfmadd231pd %ymm0 ,%ymm1 , %ymm4
vfmadd231pd %ymm0 ,%ymm2 , %ymm8
vfmadd231pd %ymm0 ,%ymm3 , %ymm12
vpermpd $0xb1, %ymm0 , %ymm0
vpermpd $ 0xb1, %ymm0 , %ymm0
vfmadd231pd %ymm0 ,%ymm1 , %ymm5
vfmadd231pd %ymm0 ,%ymm2 , %ymm9
vfmadd231pd %ymm0 ,%ymm3 , %ymm13
vpermpd $0x1b, %ymm0 , %ymm0
vpermpd $ 0x1b, %ymm0 , %ymm0
vfmadd231pd %ymm0 ,%ymm1 , %ymm6
vfmadd231pd %ymm0 ,%ymm2 , %ymm10
addq $8*SIZE, AO
addq $ 8*SIZE, AO
vfmadd231pd %ymm0 ,%ymm3 , %ymm14
vpermpd $0xb1, %ymm0 , %ymm0
vpermpd $ 0xb1, %ymm0 , %ymm0
vfmadd231pd %ymm0 ,%ymm1 , %ymm7
vmovups 0 * SIZE(BO), %ymm1
vfmadd231pd %ymm0 ,%ymm2 , %ymm11
vmovups 4 * SIZE(BO), %ymm2
vfmadd231pd %ymm0 ,%ymm3 , %ymm15
vmovups 8 * SIZE(BO), %ymm3
addq $24*SIZE, BO
addq $ 24*SIZE, BO
.endm
@@ -247,21 +248,21 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
vfmadd231pd %ymm0 ,%ymm1 , %ymm4
vfmadd231pd %ymm0 ,%ymm2 , %ymm8
vfmadd231pd %ymm0 ,%ymm3 , %ymm12
vpermpd $0xb1, %ymm0 , %ymm0
vpermpd $ 0xb1, %ymm0 , %ymm0
vfmadd231pd %ymm0 ,%ymm1 , %ymm5
vfmadd231pd %ymm0 ,%ymm2 , %ymm9
vfmadd231pd %ymm0 ,%ymm3 , %ymm13
vpermpd $0x1b, %ymm0 , %ymm0
vpermpd $ 0x1b, %ymm0 , %ymm0
vfmadd231pd %ymm0 ,%ymm1 , %ymm6
vfmadd231pd %ymm0 ,%ymm2 , %ymm10
addq $8*SIZE, AO
addq $ 8*SIZE, AO
vfmadd231pd %ymm0 ,%ymm3 , %ymm14
vpermpd $0xb1, %ymm0 , %ymm0
vpermpd $ 0xb1, %ymm0 , %ymm0
vfmadd231pd %ymm0 ,%ymm1 , %ymm7
vfmadd231pd %ymm0 ,%ymm2 , %ymm11
vfmadd231pd %ymm0 ,%ymm3 , %ymm15
addq $12*SIZE, BO
addq $ 12*SIZE, BO
.endm
.macro KERNEL4x12_SUB
@@ -272,17 +273,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
vfmadd231pd %ymm0 ,%ymm2 , %ymm8
vmovups -4 * SIZE(BO), %ymm3
vfmadd231pd %ymm0 ,%ymm3 , %ymm12
vpermpd $0xb1, %ymm0 , %ymm0
vpermpd $ 0xb1, %ymm0 , %ymm0
vfmadd231pd %ymm0 ,%ymm1 , %ymm5
vfmadd231pd %ymm0 ,%ymm2 , %ymm9
addq $12*SIZE, BO
addq $ 12*SIZE, BO
vfmadd231pd %ymm0 ,%ymm3 , %ymm13
vpermpd $0x1b, %ymm0 , %ymm0
vpermpd $ 0x1b, %ymm0 , %ymm0
vfmadd231pd %ymm0 ,%ymm1 , %ymm6
vfmadd231pd %ymm0 ,%ymm2 , %ymm10
addq $4*SIZE, AO
addq $ 4*SIZE, AO
vfmadd231pd %ymm0 ,%ymm3 , %ymm14
vpermpd $0xb1, %ymm0 , %ymm0
vpermpd $ 0xb1, %ymm0 , %ymm0
vfmadd231pd %ymm0 ,%ymm1 , %ymm7
vfmadd231pd %ymm0 ,%ymm2 , %ymm11
vfmadd231pd %ymm0 ,%ymm3 , %ymm15
@@ -309,23 +310,23 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
vmulpd %ymm0 , %ymm14, %ymm14
vmulpd %ymm0 , %ymm15, %ymm15
vpermpd $0xb1 , %ymm5, %ymm5
vpermpd $0xb1 , %ymm7, %ymm7
vpermpd $ 0xb1 , %ymm5, %ymm5
vpermpd $ 0xb1 , %ymm7, %ymm7
vblendpd $0x0a, %ymm5, %ymm4, %ymm0
vblendpd $0x05, %ymm5, %ymm4, %ymm1
vblendpd $0x0a, %ymm7, %ymm6, %ymm2
vblendpd $0x05, %ymm7, %ymm6, %ymm3
vblendpd $ 0x0a, %ymm5, %ymm4, %ymm0
vblendpd $ 0x05, %ymm5, %ymm4, %ymm1
vblendpd $ 0x0a, %ymm7, %ymm6, %ymm2
vblendpd $ 0x05, %ymm7, %ymm6, %ymm3
vpermpd $0x1b , %ymm2, %ymm2
vpermpd $0x1b , %ymm3, %ymm3
vpermpd $0xb1 , %ymm2, %ymm2
vpermpd $0xb1 , %ymm3, %ymm3
vpermpd $ 0x1b , %ymm2, %ymm2
vpermpd $ 0x1b , %ymm3, %ymm3
vpermpd $ 0xb1 , %ymm2, %ymm2
vpermpd $ 0xb1 , %ymm3, %ymm3
vblendpd $0x03, %ymm0, %ymm2 , %ymm4
vblendpd $0x03, %ymm1, %ymm3 , %ymm5
vblendpd $0x03, %ymm2, %ymm0 , %ymm6
vblendpd $0x03, %ymm3, %ymm1 , %ymm7
vblendpd $ 0x03, %ymm0, %ymm2 , %ymm4
vblendpd $ 0x03, %ymm1, %ymm3 , %ymm5
vblendpd $ 0x03, %ymm2, %ymm0 , %ymm6
vblendpd $ 0x03, %ymm3, %ymm1 , %ymm7
leaq (CO1, LDC, 2), %rax
@@ -349,23 +350,23 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
prefetcht0 32(%rax)
prefetcht0 32(%rax,LDC)
vpermpd $0xb1 , %ymm9 , %ymm9
vpermpd $0xb1 , %ymm11, %ymm11
vpermpd $ 0xb1 , %ymm9 , %ymm9
vpermpd $ 0xb1 , %ymm11, %ymm11
vblendpd $0x0a, %ymm9 , %ymm8 , %ymm0
vblendpd $0x05, %ymm9 , %ymm8 , %ymm1
vblendpd $0x0a, %ymm11, %ymm10, %ymm2
vblendpd $0x05, %ymm11, %ymm10, %ymm3
vblendpd $ 0x0a, %ymm9 , %ymm8 , %ymm0
vblendpd $ 0x05, %ymm9 , %ymm8 , %ymm1
vblendpd $ 0x0a, %ymm11, %ymm10, %ymm2
vblendpd $ 0x05, %ymm11, %ymm10, %ymm3
vpermpd $0x1b , %ymm2, %ymm2
vpermpd $0x1b , %ymm3, %ymm3
vpermpd $0xb1 , %ymm2, %ymm2
vpermpd $0xb1 , %ymm3, %ymm3
vpermpd $ 0x1b , %ymm2, %ymm2
vpermpd $ 0x1b , %ymm3, %ymm3
vpermpd $ 0xb1 , %ymm2, %ymm2
vpermpd $ 0xb1 , %ymm3, %ymm3
vblendpd $0x03, %ymm0, %ymm2 , %ymm4
vblendpd $0x03, %ymm1, %ymm3 , %ymm5
vblendpd $0x03, %ymm2, %ymm0 , %ymm6
vblendpd $0x03, %ymm3, %ymm1 , %ymm7
vblendpd $ 0x03, %ymm0, %ymm2 , %ymm4
vblendpd $ 0x03, %ymm1, %ymm3 , %ymm5
vblendpd $ 0x03, %ymm2, %ymm0 , %ymm6
vblendpd $ 0x03, %ymm3, %ymm1 , %ymm7
leaq (%rax, LDC, 2), %rax
@@ -390,23 +391,23 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
prefetcht0 32(%rbp)
prefetcht0 32(%rbp,LDC)
vpermpd $0xb1 , %ymm13, %ymm13
vpermpd $0xb1 , %ymm15, %ymm15
vpermpd $ 0xb1 , %ymm13, %ymm13
vpermpd $ 0xb1 , %ymm15, %ymm15
vblendpd $0x0a, %ymm13, %ymm12, %ymm0
vblendpd $0x05, %ymm13, %ymm12, %ymm1
vblendpd $0x0a, %ymm15, %ymm14, %ymm2
vblendpd $0x05, %ymm15, %ymm14, %ymm3
vblendpd $ 0x0a, %ymm13, %ymm12, %ymm0
vblendpd $ 0x05, %ymm13, %ymm12, %ymm1
vblendpd $ 0x0a, %ymm15, %ymm14, %ymm2
vblendpd $ 0x05, %ymm15, %ymm14, %ymm3
vpermpd $0x1b , %ymm2, %ymm2
vpermpd $0x1b , %ymm3, %ymm3
vpermpd $0xb1 , %ymm2, %ymm2
vpermpd $0xb1 , %ymm3, %ymm3
vpermpd $ 0x1b , %ymm2, %ymm2
vpermpd $ 0x1b , %ymm3, %ymm3
vpermpd $ 0xb1 , %ymm2, %ymm2
vpermpd $ 0xb1 , %ymm3, %ymm3
vblendpd $0x03, %ymm0, %ymm2 , %ymm4
vblendpd $0x03, %ymm1, %ymm3 , %ymm5
vblendpd $0x03, %ymm2, %ymm0 , %ymm6
vblendpd $0x03, %ymm3, %ymm1 , %ymm7
vblendpd $ 0x03, %ymm0, %ymm2 , %ymm4
vblendpd $ 0x03, %ymm1, %ymm3 , %ymm5
vblendpd $ 0x03, %ymm2, %ymm0 , %ymm6
vblendpd $ 0x03, %ymm3, %ymm1 , %ymm7
leaq (%rax, LDC, 4), %rax
@@ -431,7 +432,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
prefetcht0 32(%rbp)
prefetcht0 32(%rbp,LDC)
addq $4*SIZE, CO1
addq $ 4*SIZE, CO1
.endm
/******************************************************************************************/
@@ -477,9 +478,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
vfmadd231pd %xmm0 ,%xmm3 , %xmm12
vmovddup -1 * SIZE(BO), %xmm3
vfmadd231pd %xmm0 ,%xmm1 , %xmm13
addq $12*SIZE, BO
addq $ 12*SIZE, BO
vfmadd231pd %xmm0 ,%xmm2 , %xmm14
addq $2*SIZE, AO
addq $ 2*SIZE, AO
vfmadd231pd %xmm0 ,%xmm3 , %xmm15
.endm
@@ -557,7 +558,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
vmovups %xmm6 , (%rbp)
vmovups %xmm7 , (%rbp, LDC)
addq $2*SIZE, CO1
addq $ 2*SIZE, CO1
.endm
@@ -604,9 +605,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
vfmadd231sd %xmm0 ,%xmm3 , %xmm12
vmovsd -1 * SIZE(BO), %xmm3
vfmadd231sd %xmm0 ,%xmm1 , %xmm13
addq $12*SIZE, BO
addq $ 12*SIZE, BO
vfmadd231sd %xmm0 ,%xmm2 , %xmm14
addq $1*SIZE, AO
addq $ 1*SIZE, AO
vfmadd231sd %xmm0 ,%xmm3 , %xmm15
.endm
@@ -684,7 +685,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
vmovsd %xmm6 , (%rbp)
vmovsd %xmm7 , (%rbp, LDC)
addq $1*SIZE, CO1
addq $ 1*SIZE, CO1
.endm
@@ -707,13 +708,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
vmovups -12 * SIZE(BO), %ymm1
vmovups -16 * SIZE(AO), %ymm0
vmulpd %ymm0 ,%ymm1 , %ymm4
vpermpd $0xb1, %ymm0 , %ymm0
vpermpd $ 0xb1, %ymm0 , %ymm0
vmulpd %ymm0 ,%ymm1 , %ymm5
vpermpd $0x1b, %ymm0 , %ymm0
vpermpd $ 0x1b, %ymm0 , %ymm0
vmulpd %ymm0 ,%ymm1 , %ymm6
addq $4*SIZE, BO
vpermpd $0xb1, %ymm0 , %ymm0
addq $ 4*SIZE, BO
vpermpd $ 0xb1, %ymm0 , %ymm0
vmulpd %ymm0 ,%ymm1 , %ymm7
vmovups -12 * SIZE(BO), %ymm1
@@ -723,12 +724,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
prefetcht0 A_PR1(AO)
vmovups -16 * SIZE(AO), %ymm0
vfmadd231pd %ymm0 ,%ymm1 , %ymm4
vpermpd $0xb1, %ymm0 , %ymm0
vpermpd $ 0xb1, %ymm0 , %ymm0
vfmadd231pd %ymm0 ,%ymm1 , %ymm5
vpermpd $0x1b, %ymm0 , %ymm0
vpermpd $ 0x1b, %ymm0 , %ymm0
vfmadd231pd %ymm0 ,%ymm1 , %ymm6
vpermpd $0xb1, %ymm0 , %ymm0
vpermpd $ 0xb1, %ymm0 , %ymm0
vfmadd231pd %ymm0 ,%ymm1 , %ymm7
vmovups -12 * SIZE(BO), %ymm1
@@ -737,44 +738,44 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
.macro KERNEL4x4_M2
vmovups -12 * SIZE(AO), %ymm0
vfmadd231pd %ymm0 ,%ymm1 , %ymm4
vpermpd $0xb1, %ymm0 , %ymm0
vpermpd $ 0xb1, %ymm0 , %ymm0
vfmadd231pd %ymm0 ,%ymm1 , %ymm5
vpermpd $0x1b, %ymm0 , %ymm0
vpermpd $ 0x1b, %ymm0 , %ymm0
vfmadd231pd %ymm0 ,%ymm1 , %ymm6
addq $8*SIZE, AO
vpermpd $0xb1, %ymm0 , %ymm0
addq $ 8*SIZE, AO
vpermpd $ 0xb1, %ymm0 , %ymm0
vfmadd231pd %ymm0 ,%ymm1 , %ymm7
vmovups -8 * SIZE(BO), %ymm1
addq $8*SIZE, BO
addq $ 8*SIZE, BO
.endm
.macro KERNEL4x4_E
vmovups -12 * SIZE(AO), %ymm0
vfmadd231pd %ymm0 ,%ymm1 , %ymm4
vpermpd $0xb1, %ymm0 , %ymm0
vpermpd $ 0xb1, %ymm0 , %ymm0
vfmadd231pd %ymm0 ,%ymm1 , %ymm5
vpermpd $0x1b, %ymm0 , %ymm0
vpermpd $ 0x1b, %ymm0 , %ymm0
vfmadd231pd %ymm0 ,%ymm1 , %ymm6
addq $8*SIZE, AO
vpermpd $0xb1, %ymm0 , %ymm0
addq $ 8*SIZE, AO
vpermpd $ 0xb1, %ymm0 , %ymm0
vfmadd231pd %ymm0 ,%ymm1 , %ymm7
addq $4*SIZE, BO
addq $ 4*SIZE, BO
.endm
.macro KERNEL4x4_SUB
vmovups -12 * SIZE(BO), %ymm1
vmovups -16 * SIZE(AO), %ymm0
vfmadd231pd %ymm0 ,%ymm1 , %ymm4
vpermpd $0xb1, %ymm0 , %ymm0
vpermpd $ 0xb1, %ymm0 , %ymm0
vfmadd231pd %ymm0 ,%ymm1 , %ymm5
addq $4*SIZE, BO
vpermpd $0x1b, %ymm0 , %ymm0
addq $ 4*SIZE, BO
vpermpd $ 0x1b, %ymm0 , %ymm0
vfmadd231pd %ymm0 ,%ymm1 , %ymm6
addq $4*SIZE, AO
vpermpd $0xb1, %ymm0 , %ymm0
addq $ 4*SIZE, AO
vpermpd $ 0xb1, %ymm0 , %ymm0
vfmadd231pd %ymm0 ,%ymm1 , %ymm7
.endm
@@ -788,23 +789,23 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
vmulpd %ymm0 , %ymm5 , %ymm5
vmulpd %ymm0 , %ymm6 , %ymm6
vpermpd $0xb1 , %ymm5, %ymm5
vpermpd $0xb1 , %ymm7, %ymm7
vpermpd $ 0xb1 , %ymm5, %ymm5
vpermpd $ 0xb1 , %ymm7, %ymm7
vblendpd $0x0a, %ymm5, %ymm4, %ymm0
vblendpd $0x05, %ymm5, %ymm4, %ymm1
vblendpd $0x0a, %ymm7, %ymm6, %ymm2
vblendpd $0x05, %ymm7, %ymm6, %ymm3
vblendpd $ 0x0a, %ymm5, %ymm4, %ymm0
vblendpd $ 0x05, %ymm5, %ymm4, %ymm1
vblendpd $ 0x0a, %ymm7, %ymm6, %ymm2
vblendpd $ 0x05, %ymm7, %ymm6, %ymm3
vpermpd $0x1b , %ymm2, %ymm2
vpermpd $0x1b , %ymm3, %ymm3
vpermpd $0xb1 , %ymm2, %ymm2
vpermpd $0xb1 , %ymm3, %ymm3
vpermpd $ 0x1b , %ymm2, %ymm2
vpermpd $ 0x1b , %ymm3, %ymm3
vpermpd $ 0xb1 , %ymm2, %ymm2
vpermpd $ 0xb1 , %ymm3, %ymm3
vblendpd $0x03, %ymm0, %ymm2 , %ymm4
vblendpd $0x03, %ymm1, %ymm3 , %ymm5
vblendpd $0x03, %ymm2, %ymm0 , %ymm6
vblendpd $0x03, %ymm3, %ymm1 , %ymm7
vblendpd $ 0x03, %ymm0, %ymm2 , %ymm4
vblendpd $ 0x03, %ymm1, %ymm3 , %ymm5
vblendpd $ 0x03, %ymm2, %ymm0 , %ymm6
vblendpd $ 0x03, %ymm3, %ymm1 , %ymm7
leaq (CO1, LDC, 2), %rax
@@ -823,7 +824,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
vmovups %ymm6 , (%rax)
vmovups %ymm7 , (%rax, LDC)
addq $4*SIZE, CO1
addq $ 4*SIZE, CO1
.endm
/******************************************************************************************/
@@ -848,9 +849,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
vfmadd231pd %xmm0 ,%xmm2 , %xmm5
vmovddup -9 * SIZE(BO), %xmm8
vfmadd231pd %xmm0 ,%xmm3 , %xmm6
addq $4*SIZE, BO
addq $ 4*SIZE, BO
vfmadd231pd %xmm0 ,%xmm8 , %xmm7
addq $2*SIZE, AO
addq $ 2*SIZE, AO
.endm
@@ -880,7 +881,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
vmovups %xmm6 , (%rax)
vmovups %xmm7 , (%rax, LDC)
addq $2*SIZE, CO1
addq $ 2*SIZE, CO1
.endm
/******************************************************************************************/
@@ -905,9 +906,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
vfmadd231sd %xmm0 ,%xmm2 , %xmm5
vmovsd -9 * SIZE(BO), %xmm8
vfmadd231sd %xmm0 ,%xmm3 , %xmm6
addq $4*SIZE, BO
addq $ 4*SIZE, BO
vfmadd231sd %xmm0 ,%xmm8 , %xmm7
addq $1*SIZE, AO
addq $ 1*SIZE, AO
.endm
@@ -937,7 +938,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
vmovsd %xmm6 , (%rax)
vmovsd %xmm7 , (%rax, LDC)
addq $1*SIZE, CO1
addq $ 1*SIZE, CO1
.endm
@@ -963,8 +964,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
vfmadd231pd %xmm1 ,%xmm2 , %xmm5
vfmadd231pd %xmm0 ,%xmm3 , %xmm6
vfmadd231pd %xmm1 ,%xmm3 , %xmm7
addq $2*SIZE, BO
addq $4*SIZE, AO
addq $ 2*SIZE, BO
addq $ 4*SIZE, AO
.endm
@@ -993,7 +994,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
vmovups %xmm6 , (CO1, LDC)
vmovups %xmm7 , 2 * SIZE(CO1, LDC)
addq $4*SIZE, CO1
addq $ 4*SIZE, CO1
.endm
@@ -1014,8 +1015,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
vmovddup -11 * SIZE(BO), %xmm3
vfmadd231pd %xmm0 ,%xmm2 , %xmm4
vfmadd231pd %xmm0 ,%xmm3 , %xmm6
addq $2*SIZE, BO
addq $2*SIZE, AO
addq $ 2*SIZE, BO
addq $ 2*SIZE, AO
.endm
@@ -1038,7 +1039,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
vmovups %xmm4 , (CO1)
vmovups %xmm6 , (CO1, LDC)
addq $2*SIZE, CO1
addq $ 2*SIZE, CO1
.endm
/******************************************************************************************/
@@ -1058,8 +1059,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
vmovsd -11 * SIZE(BO), %xmm2
vfmadd231sd %xmm0 ,%xmm1 , %xmm4
vfmadd231sd %xmm0 ,%xmm2 , %xmm5
addq $2*SIZE, BO
addq $1*SIZE, AO
addq $ 2*SIZE, BO
addq $ 1*SIZE, AO
.endm
@@ -1082,7 +1083,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
vmovsd %xmm4 , (CO1)
vmovsd %xmm5 , (CO1, LDC)
addq $1*SIZE, CO1
addq $ 1*SIZE, CO1
.endm
@@ -1103,8 +1104,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
vmovups -14 * SIZE(AO), %xmm1
vfmadd231pd %xmm0 ,%xmm2 , %xmm4
vfmadd231pd %xmm1 ,%xmm2 , %xmm5
addq $1*SIZE, BO
addq $4*SIZE, AO
addq $ 1*SIZE, BO
addq $ 4*SIZE, AO
.endm
@@ -1127,7 +1128,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
vmovups %xmm4 , (CO1)
vmovups %xmm5 , 2 * SIZE(CO1)
addq $4*SIZE, CO1
addq $ 4*SIZE, CO1
.endm
@@ -1145,8 +1146,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
vmovddup -12 * SIZE(BO), %xmm2
vmovups -16 * SIZE(AO), %xmm0
vfmadd231pd %xmm0 ,%xmm2 , %xmm4
addq $1*SIZE, BO
addq $2*SIZE, AO
addq $ 1*SIZE, BO
addq $ 2*SIZE, AO
.endm
@@ -1166,7 +1167,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
vmovups %xmm4 , (CO1)
addq $2*SIZE, CO1
addq $ 2*SIZE, CO1
.endm
@@ -1184,8 +1185,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
vmovsd -12 * SIZE(BO), %xmm1
vmovsd -16 * SIZE(AO), %xmm0
vfmadd231sd %xmm0 ,%xmm1 , %xmm4
addq $1*SIZE, BO
addq $1*SIZE, AO
addq $ 1*SIZE, BO
addq $ 1*SIZE, AO
.endm
@@ -1205,7 +1206,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
vmovsd %xmm4 , (CO1)
addq $1*SIZE, CO1
addq $ 1*SIZE, CO1
.endm
@@ -1262,13 +1263,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
STACK_TOUCH
cmpq $0, OLD_M
cmpq $ 0, OLD_M
je .L999
cmpq $0, OLD_N
cmpq $ 0, OLD_N
je .L999
cmpq $0, OLD_K
cmpq $ 0, OLD_K
je .L999
movq OLD_M, M
@@ -1288,7 +1289,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
movq Ndiv12, J
cmpq $0, J
cmpq $ 0, J
je .L4_0
ALIGN_4
@@ -1330,10 +1331,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
vmovups %ymm6, 16 * SIZE(BO)
vmovups %ymm7, 20 * SIZE(BO)
addq $8 * SIZE ,BO1
addq $8 * SIZE ,BO2
addq $8 * SIZE ,BO3
addq $24 *SIZE ,BO
addq $ 8 * SIZE ,BO1
addq $ 8 * SIZE ,BO2
addq $ 8 * SIZE ,BO3
addq $ 24 *SIZE ,BO
decq %rax
jnz .L12_01a_1
@@ -1356,10 +1357,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
vmovups %ymm1, 0 * SIZE(BO)
vmovups %ymm2, 4 * SIZE(BO)
vmovups %ymm3, 8 * SIZE(BO)
addq $4*SIZE,BO1
addq $4*SIZE,BO2
addq $4*SIZE,BO3
addq $12*SIZE,BO
addq $ 4*SIZE,BO1
addq $ 4*SIZE,BO2
addq $ 4*SIZE,BO3
addq $ 12*SIZE,BO
decq %rax
jnz .L12_02b
@@ -1407,8 +1408,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
subq $2, %rax
je .L12_12a
.align 32
ALIGN_5
.L12_12:
KERNEL4x12_M1
@@ -1621,7 +1621,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
.L4_0:
cmpq $0, Nmod12 // N % 12 == 0
cmpq $ 0, Nmod12 // N % 12 == 0
je .L999
movq Nmod12, J
@@ -1666,7 +1666,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
subq $2, %rax
je .L4_12a
.align 32
ALIGN_5
.L4_12:
@@ -1912,7 +1912,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
je .L2_16
.align 32
ALIGN_5
.L2_12:
@@ -2108,7 +2108,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
sarq $3, %rax // K / 8
je .L1_16
.align 32
ALIGN_5
.L1_12:
@@ -2362,13 +2362,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
STACK_TOUCH
cmpq $0, OLD_M
cmpq $ 0, OLD_M
je .L999
cmpq $0, OLD_N
cmpq $ 0, OLD_N
je .L999
cmpq $0, OLD_K
cmpq $ 0, OLD_K
je .L999
movq OLD_M, M
@@ -2397,7 +2397,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
movq Ndiv12, J
cmpq $0, J
cmpq $ 0, J
je .L2_0
ALIGN_4
@@ -2471,7 +2471,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
subq $2, %rax
je .L4_12a
.align 32
ALIGN_5
.L4_12:
@@ -2848,7 +2848,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
je .L2_16
.align 32
ALIGN_5
.L2_12:
@@ -3176,7 +3176,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
sarq $3, %rax // K / 8
je .L1_16
.align 32
ALIGN_5
.L1_12:

View File

@@ -196,7 +196,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
vmovups -12 * SIZE(AO), %xmm0
vmulpd %xmm1,%xmm0,%xmm10
vmulpd %xmm2,%xmm0,%xmm11
addq $3*SIZE, BO
addq $ 3 * SIZE, BO
vmulpd %xmm3,%xmm0,%xmm12
vmovups -10 * SIZE(AO), %xmm0
vmulpd %xmm1,%xmm0,%xmm13
@@ -294,7 +294,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
vmovups 14 * SIZE(AO), %xmm0
VFMADD231PD_( %xmm1,%xmm0,%xmm13 )
vmovddup -3 * SIZE(BO), %xmm1
addq $32 * SIZE, AO
addq $ 32 * SIZE, AO
VFMADD231PD_( %xmm2,%xmm0,%xmm14 )
vmovddup -2 * SIZE(BO), %xmm2
VFMADD231PD_( %xmm3,%xmm0,%xmm15 )
@@ -392,8 +392,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
vmovddup 10 * SIZE(BO), %xmm2
VFMADD231PD_( %xmm3,%xmm0,%xmm15 )
vmovddup 11 * SIZE(BO), %xmm3
addq $32 * SIZE, AO
addq $24 * SIZE, BO
addq $ 32 * SIZE, AO
addq $ 24 * SIZE, BO
.endm
@@ -414,9 +414,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
VFMADD231PD_( %xmm3,%xmm0,%xmm12 )
vmovups 14 * SIZE(AO), %xmm0
VFMADD231PD_( %xmm1,%xmm0,%xmm13 )
addq $32*SIZE, AO
addq $ 32 * SIZE, AO
VFMADD231PD_( %xmm2,%xmm0,%xmm14 )
addq $21*SIZE, BO
addq $ 21 * SIZE, BO
VFMADD231PD_( %xmm3,%xmm0,%xmm15 )
.endm
@@ -438,9 +438,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
VFMADD231PD_( %xmm3,%xmm0,%xmm12 )
vmovups -10 * SIZE(AO), %xmm0
VFMADD231PD_( %xmm1,%xmm0,%xmm13 )
addq $3*SIZE, BO
addq $ 3 * SIZE, BO
VFMADD231PD_( %xmm2,%xmm0,%xmm14 )
addq $8*SIZE, AO
addq $ 8 * SIZE, AO
VFMADD231PD_( %xmm3,%xmm0,%xmm15 )
.endm
@@ -483,7 +483,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
prefetcht0 C_PR1(CO1,LDC)
prefetcht0 C_PR1(CO1,LDC,2)
addq $8 * SIZE, CO1 # coffset += 8
addq $ 8 * SIZE, CO1 # coffset += 8
.endm
@@ -1165,9 +1165,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
vmovsd %xmm5, 8*SIZE(BO)
vmovups %xmm6, 9*SIZE(BO)
vmovsd %xmm7,11*SIZE(BO)
addq $8*SIZE,BO1
addq $8*SIZE,BO2
addq $12*SIZE,BO
addq $ 8*SIZE,BO1
addq $ 8*SIZE,BO2
addq $ 12*SIZE,BO
decq %rax
jnz .L6_02
@@ -1184,9 +1184,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
vmovsd (BO2), %xmm1
vmovups %xmm0, (BO)
vmovsd %xmm1, 2*SIZE(BO)
addq $2*SIZE,BO1
addq $2*SIZE,BO2
addq $3*SIZE,BO
addq $ 2*SIZE,BO1
addq $ 2*SIZE,BO2
addq $ 3*SIZE,BO
decq %rax
jnz .L6_02b
@@ -1223,9 +1223,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
vmovups %xmm4, 7*SIZE(BO)
vmovsd %xmm7, 9*SIZE(BO)
vmovups %xmm6,10*SIZE(BO)
addq $8*SIZE,BO1
addq $8*SIZE,BO2
addq $12*SIZE,BO
addq $ 8*SIZE,BO1
addq $ 8*SIZE,BO2
addq $ 12*SIZE,BO
decq %rax
jnz .L6_03
@@ -1243,9 +1243,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
vmovups (BO2), %xmm1
vmovsd %xmm0, (BO)
vmovups %xmm1, 1*SIZE(BO)
addq $2*SIZE,BO1
addq $2*SIZE,BO2
addq $3*SIZE,BO
addq $ 2*SIZE,BO1
addq $ 2*SIZE,BO2
addq $ 3*SIZE,BO
decq %rax
jnz .L6_03b

View File

@@ -111,6 +111,9 @@
#define MM M
#endif
#define TMP_M %r15
#define Y2 %rbx
PROLOGUE
PROFCODE
@@ -170,8 +173,9 @@
jge .L00t
movq MMM,M
addq I,M
addq M, I
jle .L999x
movq I, M
.L00t:
movq XX,X
@@ -2463,21 +2467,23 @@
cmpq Y, BUFFER
je .L999
#endif
movq M, TMP_M
movq Y, Y1
cmpq $SIZE, INCY
jne .L950
testq $SIZE, Y
testq $SIZE, Y1
je .L910
movsd (Y), %xmm0
movsd (Y1), %xmm0
addsd (BUFFER), %xmm0
movsd %xmm0, (Y)
movsd %xmm0, (Y1)
addq $SIZE, Y
addq $SIZE, Y1
addq $SIZE, BUFFER
decq M
decq TMP_M
jle .L999
ALIGN_4
@@ -2485,20 +2491,20 @@
testq $SIZE, BUFFER
jne .L920
movq M, %rax
movq TMP_M, %rax
sarq $3, %rax
jle .L914
ALIGN_3
.L912:
#ifdef PREFETCHW
PREFETCHW (PREFETCHSIZE) * 4 + PREOFFSET(Y)
PREFETCHW (PREFETCHSIZE) * 4 + PREOFFSET(Y1)
#endif
movapd 0 * SIZE(Y), %xmm0
movapd 2 * SIZE(Y), %xmm1
movapd 4 * SIZE(Y), %xmm2
movapd 6 * SIZE(Y), %xmm3
movapd 0 * SIZE(Y1), %xmm0
movapd 2 * SIZE(Y1), %xmm1
movapd 4 * SIZE(Y1), %xmm2
movapd 6 * SIZE(Y1), %xmm3
movapd 0 * SIZE(BUFFER), %xmm4
movapd 2 * SIZE(BUFFER), %xmm5
@@ -2514,12 +2520,12 @@
addpd %xmm6, %xmm2
addpd %xmm7, %xmm3
movapd %xmm0, 0 * SIZE(Y)
movapd %xmm1, 2 * SIZE(Y)
movapd %xmm2, 4 * SIZE(Y)
movapd %xmm3, 6 * SIZE(Y)
movapd %xmm0, 0 * SIZE(Y1)
movapd %xmm1, 2 * SIZE(Y1)
movapd %xmm2, 4 * SIZE(Y1)
movapd %xmm3, 6 * SIZE(Y1)
addq $8 * SIZE, Y
addq $8 * SIZE, Y1
addq $8 * SIZE, BUFFER
decq %rax
@@ -2527,14 +2533,14 @@
ALIGN_3
.L914:
testq $7, M
testq $7, TMP_M
jle .L999
testq $4, M
testq $4, TMP_M
jle .L915
movapd 0 * SIZE(Y), %xmm0
movapd 2 * SIZE(Y), %xmm1
movapd 0 * SIZE(Y1), %xmm0
movapd 2 * SIZE(Y1), %xmm1
movapd 0 * SIZE(BUFFER), %xmm4
movapd 2 * SIZE(BUFFER), %xmm5
@@ -2542,40 +2548,40 @@
addpd %xmm4, %xmm0
addpd %xmm5, %xmm1
movapd %xmm0, 0 * SIZE(Y)
movapd %xmm1, 2 * SIZE(Y)
movapd %xmm0, 0 * SIZE(Y1)
movapd %xmm1, 2 * SIZE(Y1)
addq $4 * SIZE, Y
addq $4 * SIZE, Y1
addq $4 * SIZE, BUFFER
ALIGN_3
.L915:
testq $2, M
testq $2, TMP_M
jle .L916
movapd (Y), %xmm0
movapd (Y1), %xmm0
movapd (BUFFER), %xmm4
addpd %xmm4, %xmm0
movapd %xmm0, (Y)
movapd %xmm0, (Y1)
addq $2 * SIZE, Y
addq $2 * SIZE, Y1
addq $2 * SIZE, BUFFER
ALIGN_3
.L916:
testq $1, M
testq $1, TMP_M
jle .L999
movsd (Y), %xmm0
movsd (Y1), %xmm0
movsd 0 * SIZE(BUFFER), %xmm4
addsd %xmm4, %xmm0
movlpd %xmm0, (Y)
movlpd %xmm0, (Y1)
ALIGN_3
jmp .L999
@@ -2584,20 +2590,20 @@
.L920:
movapd -1 * SIZE(BUFFER), %xmm4
movq M, %rax
movq TMP_M, %rax
sarq $3, %rax
jle .L924
ALIGN_3
.L922:
#ifdef PREFETCHW
PREFETCHW (PREFETCHSIZE) * 4 + PREOFFSET(Y)
PREFETCHW (PREFETCHSIZE) * 4 + PREOFFSET(Y1)
#endif
movapd 0 * SIZE(Y), %xmm0
movapd 2 * SIZE(Y), %xmm1
movapd 4 * SIZE(Y), %xmm2
movapd 6 * SIZE(Y), %xmm3
movapd 0 * SIZE(Y1), %xmm0
movapd 2 * SIZE(Y1), %xmm1
movapd 4 * SIZE(Y1), %xmm2
movapd 6 * SIZE(Y1), %xmm3
movapd 1 * SIZE(BUFFER), %xmm5
movapd 3 * SIZE(BUFFER), %xmm6
@@ -2618,14 +2624,14 @@
addpd %xmm6, %xmm2
addpd %xmm7, %xmm3
movapd %xmm0, 0 * SIZE(Y)
movapd %xmm1, 2 * SIZE(Y)
movapd %xmm2, 4 * SIZE(Y)
movapd %xmm3, 6 * SIZE(Y)
movapd %xmm0, 0 * SIZE(Y1)
movapd %xmm1, 2 * SIZE(Y1)
movapd %xmm2, 4 * SIZE(Y1)
movapd %xmm3, 6 * SIZE(Y1)
movapd %xmm8, %xmm4
addq $8 * SIZE, Y
addq $8 * SIZE, Y1
addq $8 * SIZE, BUFFER
decq %rax
@@ -2633,14 +2639,14 @@
ALIGN_3
.L924:
testq $7, M
testq $7, TMP_M
jle .L999
testq $4, M
testq $4, TMP_M
jle .L925
movapd 0 * SIZE(Y), %xmm0
movapd 2 * SIZE(Y), %xmm1
movapd 0 * SIZE(Y1), %xmm0
movapd 2 * SIZE(Y1), %xmm1
movapd 1 * SIZE(BUFFER), %xmm5
movapd 3 * SIZE(BUFFER), %xmm6
@@ -2651,20 +2657,20 @@
addpd %xmm4, %xmm0
addpd %xmm5, %xmm1
movapd %xmm0, 0 * SIZE(Y)
movapd %xmm1, 2 * SIZE(Y)
movapd %xmm0, 0 * SIZE(Y1)
movapd %xmm1, 2 * SIZE(Y1)
movapd %xmm6, %xmm4
addq $4 * SIZE, Y
addq $4 * SIZE, Y1
addq $4 * SIZE, BUFFER
ALIGN_3
.L925:
testq $2, M
testq $2, TMP_M
jle .L926
movapd (Y), %xmm0
movapd (Y1), %xmm0
movapd 1 * SIZE(BUFFER), %xmm5
@@ -2672,25 +2678,25 @@
addpd %xmm4, %xmm0
movapd %xmm0, (Y)
movapd %xmm0, (Y1)
movaps %xmm5, %xmm4
addq $2 * SIZE, Y
addq $2 * SIZE, Y1
addq $2 * SIZE, BUFFER
ALIGN_3
.L926:
testq $1, M
testq $1, TMP_M
jle .L999
movsd (Y), %xmm0
movsd (Y1), %xmm0
shufpd $1, %xmm4, %xmm4
addsd %xmm4, %xmm0
movlpd %xmm0, (Y)
movlpd %xmm0, (Y1)
ALIGN_3
jmp .L999
@@ -2700,53 +2706,53 @@
testq $SIZE, BUFFER
je .L960
movsd (Y), %xmm0
movsd (Y1), %xmm0
addsd (BUFFER), %xmm0
movsd %xmm0, (Y)
movsd %xmm0, (Y1)
addq INCY, Y
addq INCY, Y1
addq $SIZE, BUFFER
decq M
decq TMP_M
jle .L999
ALIGN_4
.L960:
movq Y, Y1
movq Y1, Y2
movq M, %rax
movq TMP_M, %rax
sarq $3, %rax
jle .L964
ALIGN_3
.L962:
movsd (Y), %xmm0
addq INCY, Y
movhpd (Y), %xmm0
addq INCY, Y
movsd (Y2), %xmm0
addq INCY, Y2
movhpd (Y2), %xmm0
addq INCY, Y2
movapd 0 * SIZE(BUFFER), %xmm4
movsd (Y), %xmm1
addq INCY, Y
movhpd (Y), %xmm1
addq INCY, Y
movsd (Y2), %xmm1
addq INCY, Y2
movhpd (Y2), %xmm1
addq INCY, Y2
movapd 2 * SIZE(BUFFER), %xmm5
movsd (Y), %xmm2
addq INCY, Y
movhpd (Y), %xmm2
addq INCY, Y
movsd (Y2), %xmm2
addq INCY, Y2
movhpd (Y2), %xmm2
addq INCY, Y2
movapd 4 * SIZE(BUFFER), %xmm6
addpd %xmm4, %xmm0
movsd (Y), %xmm3
addq INCY, Y
movhpd (Y), %xmm3
addq INCY, Y
movsd (Y2), %xmm3
addq INCY, Y2
movhpd (Y2), %xmm3
addq INCY, Y2
movapd 6 * SIZE(BUFFER), %xmm7
@@ -2781,23 +2787,23 @@
ALIGN_3
.L964:
testq $7, M
testq $7, TMP_M
jle .L999
testq $4, M
testq $4, TMP_M
jle .L965
movsd (Y), %xmm0
addq INCY, Y
movhpd (Y), %xmm0
addq INCY, Y
movsd (Y2), %xmm0
addq INCY, Y2
movhpd (Y2), %xmm0
addq INCY, Y2
movapd 0 * SIZE(BUFFER), %xmm4
movsd (Y), %xmm1
addq INCY, Y
movhpd (Y), %xmm1
addq INCY, Y
movsd (Y2), %xmm1
addq INCY, Y2
movhpd (Y2), %xmm1
addq INCY, Y2
movapd 2 * SIZE(BUFFER), %xmm5
@@ -2817,13 +2823,13 @@
ALIGN_3
.L965:
testq $2, M
testq $2, TMP_M
jle .L966
movsd (Y), %xmm0
addq INCY, Y
movhpd (Y), %xmm0
addq INCY, Y
movsd (Y2), %xmm0
addq INCY, Y2
movhpd (Y2), %xmm0
addq INCY, Y2
movapd 0 * SIZE(BUFFER), %xmm4
@@ -2838,10 +2844,10 @@
ALIGN_3
.L966:
testq $1, M
testq $1, TMP_M
jle .L999
movsd (Y), %xmm0
movsd (Y2), %xmm0
movsd 0 * SIZE(BUFFER), %xmm4
@@ -2853,6 +2859,9 @@
.L999:
leaq (, M, SIZE), %rax
addq %rax,AA
movq STACK_INCY, INCY
imulq INCY, %rax
addq %rax, Y
jmp .L0t
ALIGN_4

View File

@@ -166,8 +166,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
VFMADD231PS_( %ymm9,%ymm2,%ymm1 )
VFMADD231PS_( %ymm10,%ymm3,%ymm0 )
VFMADD231PS_( %ymm11,%ymm3,%ymm1 )
addq $4 , BI
addq $16, %rax
addq $ 4 , BI
addq $ 16, %rax
.endm
.macro SAVE16x4
@@ -233,8 +233,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
vbroadcastss -1 * SIZE(BO, BI, SIZE), %ymm3
VFMADD231PS_( %ymm8,%ymm2,%ymm0 )
VFMADD231PS_( %ymm10,%ymm3,%ymm0 )
addq $4 , BI
addq $8 , %rax
addq $ 4 , BI
addq $ 8 , %rax
.endm
.macro SAVE8x4
@@ -277,8 +277,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
vbroadcastss -1 * SIZE(BO, BI, SIZE), %xmm3
VFMADD231PS_( %xmm8,%xmm2,%xmm0 )
VFMADD231PS_( %xmm10,%xmm3,%xmm0 )
addq $4 , BI
addq $4 , %rax
addq $ 4 , BI
addq $ 4 , %rax
.endm
.macro SAVE4x4
@@ -325,8 +325,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
VFMADD231SS_( %xmm9,%xmm2,%xmm1 )
VFMADD231SS_( %xmm10,%xmm3,%xmm0 )
VFMADD231SS_( %xmm11,%xmm3,%xmm1 )
addq $4 , BI
addq $2, %rax
addq $ 4 , BI
addq $ 2, %rax
.endm
.macro SAVE2x4
@@ -386,8 +386,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
vmovss -1 * SIZE(BO, BI, SIZE), %xmm3
VFMADD231SS_( %xmm8,%xmm2,%xmm0 )
VFMADD231SS_( %xmm10,%xmm3,%xmm0 )
addq $4 , BI
addq $1, %rax
addq $ 4 , BI
addq $ 1, %rax
.endm
.macro SAVE1x4
@@ -432,8 +432,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
VFMADD231PS_( %ymm5,%ymm2,%ymm1 )
VFMADD231PS_( %ymm6,%ymm3,%ymm0 )
VFMADD231PS_( %ymm7,%ymm3,%ymm1 )
addq $2 , BI
addq $16, %rax
addq $ 2 , BI
addq $ 16, %rax
.endm
.macro SAVE16x2
@@ -474,8 +474,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
vbroadcastss -3 * SIZE(BO, BI, SIZE), %ymm3
VFMADD231PS_( %ymm4,%ymm2,%ymm0 )
VFMADD231PS_( %ymm6,%ymm3,%ymm0 )
addq $2 , BI
addq $8 , %rax
addq $ 2 , BI
addq $ 8 , %rax
.endm
.macro SAVE8x2
@@ -507,8 +507,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
vbroadcastss -3 * SIZE(BO, BI, SIZE), %xmm3
VFMADD231PS_( %xmm4,%xmm2,%xmm0 )
VFMADD231PS_( %xmm6,%xmm3,%xmm0 )
addq $2 , BI
addq $4 , %rax
addq $ 2 , BI
addq $ 4 , %rax
.endm
.macro SAVE4x2
@@ -542,8 +542,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
VFMADD231SS_( %xmm5,%xmm2,%xmm1 )
VFMADD231SS_( %xmm6,%xmm3,%xmm0 )
VFMADD231SS_( %xmm7,%xmm3,%xmm1 )
addq $2 , BI
addq $2, %rax
addq $ 2 , BI
addq $ 2, %rax
.endm
.macro SAVE2x2
@@ -583,8 +583,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
vmovss -3 * SIZE(BO, BI, SIZE), %xmm3
VFMADD231SS_( %xmm4,%xmm2,%xmm0 )
VFMADD231SS_( %xmm6,%xmm3,%xmm0 )
addq $2 , BI
addq $1, %rax
addq $ 2 , BI
addq $ 1, %rax
.endm
.macro SAVE1x2
@@ -619,8 +619,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
vbroadcastss -4 * SIZE(BO, BI, SIZE), %ymm2
VFMADD231PS_( %ymm4,%ymm2,%ymm0 )
VFMADD231PS_( %ymm5,%ymm2,%ymm1 )
addq $1 , BI
addq $16, %rax
addq $ 1 , BI
addq $ 16, %rax
.endm
.macro SAVE16x1
@@ -649,8 +649,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
vmovups -16 * SIZE(AO, %rax, SIZE), %ymm0
vbroadcastss -4 * SIZE(BO, BI, SIZE), %ymm2
VFMADD231PS_( %ymm4,%ymm2,%ymm0 )
addq $1 , BI
addq $8 , %rax
addq $ 1 , BI
addq $ 8 , %rax
.endm
.macro SAVE8x1
@@ -677,8 +677,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
vmovups -16 * SIZE(AO, %rax, SIZE), %xmm0
vbroadcastss -4 * SIZE(BO, BI, SIZE), %xmm2
VFMADD231PS_( %xmm4,%xmm2,%xmm0 )
addq $1 , BI
addq $4 , %rax
addq $ 1 , BI
addq $ 4 , %rax
.endm
.macro SAVE4x1
@@ -706,8 +706,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
vmovss -4 * SIZE(BO, BI, SIZE), %xmm2
VFMADD231SS_( %xmm4,%xmm2,%xmm0 )
VFMADD231SS_( %xmm5,%xmm2,%xmm1 )
addq $1 , BI
addq $2, %rax
addq $ 1 , BI
addq $ 2 , %rax
.endm
.macro SAVE2x1
@@ -736,8 +736,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
vmovss -16 * SIZE(AO, %rax, SIZE), %xmm0
vmovss -4 * SIZE(BO, BI, SIZE), %xmm2
VFMADD231SS_( %xmm4,%xmm2,%xmm0 )
addq $1 , BI
addq $1, %rax
addq $ 1 , BI
addq $ 1 , %rax
.endm
.macro SAVE1x1
@@ -882,8 +882,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
vmovups %xmm2, 8*SIZE(BO)
vmovups %xmm3,12*SIZE(BO)
addq $16*SIZE,BO1
addq $16*SIZE,BO
addq $ 16*SIZE,BO1
addq $ 16*SIZE,BO
decq %rax
jnz .L4_01a
@@ -899,8 +899,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
vmovups (BO1), %xmm0
vmovups %xmm0, (BO)
addq $4*SIZE,BO1
addq $4*SIZE,BO
addq $ 4*SIZE,BO1
addq $ 4*SIZE,BO
decq %rax
jnz .L4_02c
@@ -919,7 +919,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#endif
movq A, AO // aoffset = a
addq $16 * SIZE, AO
addq $ 16 * SIZE, AO
movq M, I
sarq $4, I // i = (m >> 4)

View File

@@ -109,22 +109,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#if defined(OS_WINDOWS)
#if L_BUFFER_SIZE > 16384
#define STACK_TOUCH \
movl $0, 4096 * 4(%rsp);\
movl $0, 4096 * 3(%rsp);\
movl $0, 4096 * 2(%rsp);\
movl $0, 4096 * 1(%rsp);
movl $ 0, 4096 * 4(%rsp);\
movl $ 0, 4096 * 3(%rsp);\
movl $ 0, 4096 * 2(%rsp);\
movl $ 0, 4096 * 1(%rsp);
#elif L_BUFFER_SIZE > 12288
#define STACK_TOUCH \
movl $0, 4096 * 3(%rsp);\
movl $0, 4096 * 2(%rsp);\
movl $0, 4096 * 1(%rsp);
movl $ 0, 4096 * 3(%rsp);\
movl $ 0, 4096 * 2(%rsp);\
movl $ 0, 4096 * 1(%rsp);
#elif L_BUFFER_SIZE > 8192
#define STACK_TOUCH \
movl $0, 4096 * 2(%rsp);\
movl $0, 4096 * 1(%rsp);
movl $ 0, 4096 * 2(%rsp);\
movl $ 0, 4096 * 1(%rsp);
#elif L_BUFFER_SIZE > 4096
#define STACK_TOUCH \
movl $0, 4096 * 1(%rsp);
movl $ 0, 4096 * 1(%rsp);
#else
#define STACK_TOUCH
#endif
@@ -212,8 +212,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
VFMADDPD_I( %ymm11,%ymm7,%ymm0 )
VFMADDPD_I( %ymm15,%ymm7,%ymm1 )
addq $4, BI
addq $8, %rax
addq $ 4, BI
addq $ 8, %rax
.endm
.macro SAVE4x2
@@ -222,10 +222,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
vbroadcastsd ALPHA_I, %ymm1
// swap high and low 8 bytes
vshufpd $0x05, %ymm9 , %ymm9, %ymm9
vshufpd $0x05, %ymm11, %ymm11, %ymm11
vshufpd $0x05, %ymm13, %ymm13, %ymm13
vshufpd $0x05, %ymm15, %ymm15, %ymm15
vshufpd $ 0x05, %ymm9 , %ymm9, %ymm9
vshufpd $ 0x05, %ymm11, %ymm11, %ymm11
vshufpd $ 0x05, %ymm13, %ymm13, %ymm13
vshufpd $ 0x05, %ymm15, %ymm15, %ymm15
#if defined(NN) || defined(NT) || defined(TN) || defined(TT) || \
defined(NR) || defined(NC) || defined(TR) || defined(TC)
@@ -235,10 +235,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
vaddsubpd %ymm13,%ymm12, %ymm12
vaddsubpd %ymm15,%ymm14, %ymm14
vshufpd $0x05, %ymm8 , %ymm8, %ymm9
vshufpd $0x05, %ymm10, %ymm10, %ymm11
vshufpd $0x05, %ymm12, %ymm12, %ymm13
vshufpd $0x05, %ymm14, %ymm14, %ymm15
vshufpd $ 0x05, %ymm8 , %ymm8, %ymm9
vshufpd $ 0x05, %ymm10, %ymm10, %ymm11
vshufpd $ 0x05, %ymm12, %ymm12, %ymm13
vshufpd $ 0x05, %ymm14, %ymm14, %ymm15
#else
vaddsubpd %ymm8, %ymm9 ,%ymm9
@@ -252,10 +252,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
vmovapd %ymm15, %ymm14
// swap high and low 8 bytes
vshufpd $0x05, %ymm9 , %ymm9, %ymm9
vshufpd $0x05, %ymm11, %ymm11, %ymm11
vshufpd $0x05, %ymm13, %ymm13, %ymm13
vshufpd $0x05, %ymm15, %ymm15, %ymm15
vshufpd $ 0x05, %ymm9 , %ymm9, %ymm9
vshufpd $ 0x05, %ymm11, %ymm11, %ymm11
vshufpd $ 0x05, %ymm13, %ymm13, %ymm13
vshufpd $ 0x05, %ymm15, %ymm15, %ymm15
#endif
@@ -316,8 +316,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
vmovddup -5 * SIZE(BO, BI, SIZE), %xmm7
VFMADDPD_I( %xmm11,%xmm7,%xmm0 )
VFMADDPD_I( %xmm15,%xmm7,%xmm1 )
addq $4, BI
addq $4, %rax
addq $ 4, BI
addq $ 4, %rax
.endm
.macro SAVE2x2
@@ -326,10 +326,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
vmovddup ALPHA_I, %xmm1
// swap high and low 64 bytes
vshufpd $0x01, %xmm9 , %xmm9, %xmm9
vshufpd $0x01, %xmm11, %xmm11, %xmm11
vshufpd $0x01, %xmm13, %xmm13, %xmm13
vshufpd $0x01, %xmm15, %xmm15, %xmm15
vshufpd $ 0x01, %xmm9 , %xmm9, %xmm9
vshufpd $ 0x01, %xmm11, %xmm11, %xmm11
vshufpd $ 0x01, %xmm13, %xmm13, %xmm13
vshufpd $ 0x01, %xmm15, %xmm15, %xmm15
#if defined(NN) || defined(NT) || defined(TN) || defined(TT) || \
defined(NR) || defined(NC) || defined(TR) || defined(TC)
@@ -339,10 +339,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
vaddsubpd %xmm13,%xmm12, %xmm12
vaddsubpd %xmm15,%xmm14, %xmm14
vshufpd $0x01, %xmm8 , %xmm8, %xmm9
vshufpd $0x01, %xmm10, %xmm10, %xmm11
vshufpd $0x01, %xmm12, %xmm12, %xmm13
vshufpd $0x01, %xmm14, %xmm14, %xmm15
vshufpd $ 0x01, %xmm8 , %xmm8, %xmm9
vshufpd $ 0x01, %xmm10, %xmm10, %xmm11
vshufpd $ 0x01, %xmm12, %xmm12, %xmm13
vshufpd $ 0x01, %xmm14, %xmm14, %xmm15
#else
vaddsubpd %xmm8, %xmm9 ,%xmm9
@@ -356,10 +356,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
vmovapd %xmm15, %xmm14
// swap high and low 64 bytes
vshufpd $0x01, %xmm9 , %xmm9, %xmm9
vshufpd $0x01, %xmm11, %xmm11, %xmm11
vshufpd $0x01, %xmm13, %xmm13, %xmm13
vshufpd $0x01, %xmm15, %xmm15, %xmm15
vshufpd $ 0x01, %xmm9 , %xmm9, %xmm9
vshufpd $ 0x01, %xmm11, %xmm11, %xmm11
vshufpd $ 0x01, %xmm13, %xmm13, %xmm13
vshufpd $ 0x01, %xmm15, %xmm15, %xmm15
#endif
@@ -415,8 +415,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
vmovddup -5 * SIZE(BO, BI, SIZE), %xmm7
VFMADDPD_R( %xmm10,%xmm6,%xmm0 )
VFMADDPD_I( %xmm11,%xmm7,%xmm0 )
addq $4, BI
addq $2, %rax
addq $ 4, BI
addq $ 2, %rax
.endm
.macro SAVE1x2
@@ -425,8 +425,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
vmovddup ALPHA_I, %xmm1
// swap high and low 64 bytes
vshufpd $0x01, %xmm9 , %xmm9, %xmm9
vshufpd $0x01, %xmm11, %xmm11, %xmm11
vshufpd $ 0x01, %xmm9 , %xmm9, %xmm9
vshufpd $ 0x01, %xmm11, %xmm11, %xmm11
#if defined(NN) || defined(NT) || defined(TN) || defined(TT) || \
defined(NR) || defined(NC) || defined(TR) || defined(TC)
@@ -434,8 +434,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
vaddsubpd %xmm9, %xmm8 , %xmm8
vaddsubpd %xmm11,%xmm10, %xmm10
vshufpd $0x01, %xmm8 , %xmm8, %xmm9
vshufpd $0x01, %xmm10, %xmm10, %xmm11
vshufpd $ 0x01, %xmm8 , %xmm8, %xmm9
vshufpd $ 0x01, %xmm10, %xmm10, %xmm11
#else
vaddsubpd %xmm8, %xmm9, %xmm9
@@ -445,8 +445,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
vmovapd %xmm11, %xmm10
// swap high and low 64 bytes
vshufpd $0x01, %xmm9 , %xmm9, %xmm9
vshufpd $0x01, %xmm11, %xmm11, %xmm11
vshufpd $ 0x01, %xmm9 , %xmm9, %xmm9
vshufpd $ 0x01, %xmm11, %xmm11, %xmm11
#endif
@@ -486,8 +486,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
VFMADDPD_I( %ymm9 ,%ymm5,%ymm0 )
VFMADDPD_I( %ymm13,%ymm5,%ymm1 )
addq $2, BI
addq $8, %rax
addq $ 2, BI
addq $ 8, %rax
.endm
.macro SAVE4x1
@@ -496,8 +496,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
vbroadcastsd ALPHA_I, %ymm1
// swap high and low 8 bytes
vshufpd $0x05, %ymm9 , %ymm9, %ymm9
vshufpd $0x05, %ymm13, %ymm13, %ymm13
vshufpd $ 0x05, %ymm9 , %ymm9, %ymm9
vshufpd $ 0x05, %ymm13, %ymm13, %ymm13
#if defined(NN) || defined(NT) || defined(TN) || defined(TT) || \
defined(NR) || defined(NC) || defined(TR) || defined(TC)
@@ -505,8 +505,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
vaddsubpd %ymm9, %ymm8 , %ymm8
vaddsubpd %ymm13,%ymm12 , %ymm12
vshufpd $0x05, %ymm8 , %ymm8, %ymm9
vshufpd $0x05, %ymm12, %ymm12, %ymm13
vshufpd $ 0x05, %ymm8 , %ymm8, %ymm9
vshufpd $ 0x05, %ymm12, %ymm12, %ymm13
#else
vaddsubpd %ymm8, %ymm9 , %ymm9
@@ -516,8 +516,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
vmovapd %ymm13, %ymm12
// swap high and low 8 bytes
vshufpd $0x05, %ymm9 , %ymm9, %ymm9
vshufpd $0x05, %ymm13, %ymm13, %ymm13
vshufpd $ 0x05, %ymm9 , %ymm9, %ymm9
vshufpd $ 0x05, %ymm13, %ymm13, %ymm13
#endif
@@ -559,8 +559,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
vmovddup -3 * SIZE(BO, BI, SIZE), %xmm5
VFMADDPD_I( %xmm9,%xmm5,%xmm0 )
VFMADDPD_I( %xmm13,%xmm5,%xmm1 )
addq $2, BI
addq $4, %rax
addq $ 2, BI
addq $ 4, %rax
.endm
.macro SAVE2x1
@@ -569,8 +569,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
vmovddup ALPHA_I, %xmm1
// swap high and low 64 bytes
vshufpd $0x01, %xmm9 , %xmm9, %xmm9
vshufpd $0x01, %xmm13, %xmm13, %xmm13
vshufpd $ 0x01, %xmm9 , %xmm9, %xmm9
vshufpd $ 0x01, %xmm13, %xmm13, %xmm13
#if defined(NN) || defined(NT) || defined(TN) || defined(TT) || \
defined(NR) || defined(NC) || defined(TR) || defined(TC)
@@ -578,8 +578,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
vaddsubpd %xmm9, %xmm8 , %xmm8
vaddsubpd %xmm13,%xmm12 , %xmm12
vshufpd $0x01, %xmm8 , %xmm8, %xmm9
vshufpd $0x01, %xmm12, %xmm12, %xmm13
vshufpd $ 0x01, %xmm8 , %xmm8, %xmm9
vshufpd $ 0x01, %xmm12, %xmm12, %xmm13
#else
vaddsubpd %xmm8, %xmm9 , %xmm9
@@ -589,8 +589,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
vmovapd %xmm13, %xmm12
// swap high and low 64 bytes
vshufpd $0x01, %xmm9 , %xmm9, %xmm9
vshufpd $0x01, %xmm13, %xmm13, %xmm13
vshufpd $ 0x01, %xmm9 , %xmm9, %xmm9
vshufpd $ 0x01, %xmm13, %xmm13, %xmm13
#endif
@@ -626,8 +626,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
VFMADDPD_R( %xmm8,%xmm4,%xmm0 )
vmovddup -3 * SIZE(BO, BI, SIZE), %xmm5
VFMADDPD_I( %xmm9,%xmm5,%xmm0 )
addq $2, BI
addq $2, %rax
addq $ 2, BI
addq $ 2, %rax
.endm
.macro SAVE1x1
@@ -636,14 +636,14 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
vmovddup ALPHA_I, %xmm1
// swap high and low 64 bytes
vshufpd $0x01, %xmm9 , %xmm9, %xmm9
vshufpd $ 0x01, %xmm9 , %xmm9, %xmm9
#if defined(NN) || defined(NT) || defined(TN) || defined(TT) || \
defined(NR) || defined(NC) || defined(TR) || defined(TC)
vaddsubpd %xmm9, %xmm8, %xmm8
vshufpd $0x01, %xmm8 , %xmm8, %xmm9
vshufpd $ 0x01, %xmm8 , %xmm8, %xmm9
#else
vaddsubpd %xmm8, %xmm9, %xmm9
@@ -651,7 +651,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
vmovapd %xmm9, %xmm8
// swap high and low 64 bytes
vshufpd $0x01, %xmm9 , %xmm9, %xmm9
vshufpd $ 0x01, %xmm9 , %xmm9, %xmm9
#endif
@@ -682,7 +682,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
PROLOGUE
PROFCODE
subq $STACKSIZE, %rsp
subq $ STACKSIZE, %rsp
movq %rbx, (%rsp)
movq %rbp, 8(%rsp)
movq %r12, 16(%rsp)
@@ -727,18 +727,18 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#endif
movq %rsp, SP # save old stack
subq $128 + L_BUFFER_SIZE, %rsp
andq $-4096, %rsp # align stack
subq $ 128 + L_BUFFER_SIZE, %rsp
andq $ -4096, %rsp # align stack
STACK_TOUCH
cmpq $0, OLD_M
cmpq $ 0, OLD_M
je .L999
cmpq $0, OLD_N
cmpq $ 0, OLD_N
je .L999
cmpq $0, OLD_K
cmpq $ 0, OLD_K
je .L999
movq OLD_M, M
@@ -748,11 +748,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
vmovsd %xmm0, ALPHA_R
vmovsd %xmm1, ALPHA_I
salq $ZBASE_SHIFT, LDC
salq $ ZBASE_SHIFT, LDC
movq N, %rax
xorq %rdx, %rdx
movq $2, %rdi
movq $ 2, %rdi
divq %rdi // N / 2
movq %rax, Ndiv6 // N / 2
movq %rdx, Nmod6 // N % 2
@@ -770,7 +770,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
.L2_00_0:
movq Ndiv6, J
cmpq $0, J
cmpq $ 0, J
je .L1_2_0
ALIGN_4
@@ -789,8 +789,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
vmovups 2 * SIZE(BO1), %xmm1
vmovups %xmm0, (BO)
vmovups %xmm1, 2 * SIZE(BO)
addq $4*SIZE,BO1
addq $4*SIZE,BO
addq $ 4*SIZE,BO1
addq $ 4*SIZE,BO
decq %rax
jnz .L2_00_02b
@@ -809,10 +809,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#endif
movq A, AO // aoffset = a
addq $8 * SIZE, AO
addq $ 8 * SIZE, AO
movq M, I
sarq $2, I // i = (m >> 2)
sarq $ 2, I // i = (m >> 2)
je .L2_2_10
ALIGN_4
@@ -825,15 +825,15 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
(defined(TRMMKERNEL) && defined(LEFT) && defined(TRANSA)) || \
(defined(TRMMKERNEL) && !defined(LEFT) && !defined(TRANSA))
leaq BUFFER1, BO // first buffer to BO
addq $8 * SIZE, BO
addq $ 8 * SIZE, BO
#else
movq KK, %rax
leaq BUFFER1, BO // first buffer to BO
addq $8 * SIZE, BO
addq $ 8 * SIZE, BO
movq %rax, BI // Index for BO
leaq (,BI,4), BI // BI = BI * 4 ; number of values
leaq (BO, BI, SIZE), BO
salq $3, %rax // rax = rax * 8 ; number of values
salq $ 3, %rax // rax = rax * 8 ; number of values
leaq (AO, %rax, SIZE), AO
#endif
@@ -848,20 +848,20 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#else
movq KK, %rax
#ifdef LEFT
addq $4, %rax // number of values in AO
addq $ 4, %rax // number of values in AO
#else
addq $2, %rax // number of values in BO
addq $ 2, %rax // number of values in BO
#endif
movq %rax, KKK
#endif
andq $-8, %rax // K = K - ( K % 8 )
andq $ -8, %rax // K = K - ( K % 8 )
je .L2_4_16
movq %rax, BI // Index for BO
leaq ( ,BI,4), BI // BI = BI * 4 ; number of values
salq $3, %rax // rax = rax * 8 ; number of values
salq $ 3, %rax // rax = rax * 8 ; number of values
leaq (AO, %rax, SIZE), AO
leaq (BO, BI, SIZE), BO
negq BI
@@ -928,13 +928,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
movq KKK, %rax
#endif
andq $7, %rax # if (k & 1)
andq $ 7, %rax # if (k & 1)
je .L2_4_19
movq %rax, BI // Index for BO
leaq ( ,BI,4), BI // BI = BI * 4 ; number of values
salq $3, %rax // rax = rax * 8 ; number of values
salq $ 3, %rax // rax = rax * 8 ; number of values
leaq (AO, %rax, SIZE), AO
leaq (BO, BI, SIZE), BO
negq BI
@@ -960,16 +960,16 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
movq %rax, BI // Index for BO
leaq ( ,BI,4), BI // BI = BI * 4 ; number of values
leaq (BO, BI, SIZE), BO
salq $3, %rax // rax = rax * 8 ; number of values
salq $ 3, %rax // rax = rax * 8 ; number of values
leaq (AO, %rax, SIZE), AO
#endif
#if defined(TRMMKERNEL) && defined(LEFT)
addq $4, KK
addq $ 4, KK
#endif
addq $8 * SIZE, CO1 # coffset += 8
addq $ 8 * SIZE, CO1 # coffset += 8
decq I # i --
jg .L2_4_11
ALIGN_4
@@ -982,7 +982,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
/******************************************************************************************************************/
.L2_2_10:
testq $2, M
testq $ 2, M
jz .L2_2_40 // to next 2 lines of N
.L2_2_11:
@@ -991,15 +991,15 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
(defined(TRMMKERNEL) && defined(LEFT) && defined(TRANSA)) || \
(defined(TRMMKERNEL) && !defined(LEFT) && !defined(TRANSA))
leaq BUFFER1, BO // first buffer to BO
addq $8 * SIZE, BO
addq $ 8 * SIZE, BO
#else
movq KK, %rax
leaq BUFFER1, BO // first buffer to BO
addq $8 * SIZE, BO
addq $ 8 * SIZE, BO
movq %rax, BI // Index for BO
leaq (,BI,4), BI // BI = BI * 4 ; number of values
leaq (BO, BI, SIZE), BO
salq $2, %rax // rax = rax * 4 ; number of values
salq $ 2, %rax // rax = rax * 4 ; number of values
leaq (AO, %rax, SIZE), AO
#endif
@@ -1014,20 +1014,20 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#else
movq KK, %rax
#ifdef LEFT
addq $2, %rax // number of values in AO
addq $ 2, %rax // number of values in AO
#else
addq $2, %rax // number of values in BO
addq $ 2, %rax // number of values in BO
#endif
movq %rax, KKK
#endif
andq $-8, %rax // K = K - ( K % 8 )
andq $ -8, %rax // K = K - ( K % 8 )
je .L2_2_16
movq %rax, BI // Index for BO
leaq ( ,BI,4), BI // BI = BI * 4 ; number of values
salq $2, %rax // rax = rax * 4 ; number of values
salq $ 2, %rax // rax = rax * 4 ; number of values
leaq (AO, %rax, SIZE), AO
leaq (BO, BI, SIZE), BO
negq BI
@@ -1086,13 +1086,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
movq KKK, %rax
#endif
andq $7, %rax # if (k & 1)
andq $ 7, %rax # if (k & 1)
je .L2_2_19
movq %rax, BI // Index for BO
leaq ( ,BI,4), BI // BI = BI * 4 ; number of values
salq $2, %rax // rax = rax * 4 ; number of values
salq $ 2, %rax // rax = rax * 4 ; number of values
leaq (AO, %rax, SIZE), AO
leaq (BO, BI, SIZE), BO
negq BI
@@ -1118,16 +1118,16 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
movq %rax, BI // Index for BO
leaq ( ,BI,4), BI // BI = BI * 4 ; number of values
leaq (BO, BI, SIZE), BO
salq $2, %rax // rax = rax * 4 ; number of values
salq $ 2, %rax // rax = rax * 4 ; number of values
leaq (AO, %rax, SIZE), AO
#endif
#if defined(TRMMKERNEL) && defined(LEFT)
addq $2, KK
addq $ 2, KK
#endif
addq $4 * SIZE, CO1 # coffset += 4
addq $ 4 * SIZE, CO1 # coffset += 4
ALIGN_4
@@ -1135,7 +1135,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
* Rest of M
***************************************************************************/
.L2_2_40:
testq $1, M
testq $ 1, M
jz .L2_2_60 // to next 2 lines of N
ALIGN_4
@@ -1146,15 +1146,15 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
(defined(TRMMKERNEL) && defined(LEFT) && defined(TRANSA)) || \
(defined(TRMMKERNEL) && !defined(LEFT) && !defined(TRANSA))
leaq BUFFER1, BO // first buffer to BO
addq $8 * SIZE, BO
addq $ 8 * SIZE, BO
#else
movq KK, %rax
leaq BUFFER1, BO // first buffer to BO
addq $8 * SIZE, BO
addq $ 8 * SIZE, BO
movq %rax, BI // Index for BO
leaq (,BI,4), BI // BI = BI * 4 ; number of values
leaq (BO, BI, SIZE), BO
salq $1, %rax // rax = rax * 2 ; number of values
salq $ 1, %rax // rax = rax * 2 ; number of values
leaq (AO, %rax, SIZE), AO
#endif
@@ -1169,20 +1169,20 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#else
movq KK, %rax
#ifdef LEFT
addq $1, %rax // number of values in AO
addq $ 1, %rax // number of values in AO
#else
addq $2, %rax // number of values in BO
addq $ 2, %rax // number of values in BO
#endif
movq %rax, KKK
#endif
andq $-8, %rax // K = K - ( K % 8 )
andq $ -8, %rax // K = K - ( K % 8 )
je .L2_2_46
movq %rax, BI // Index for BO
leaq ( ,BI,4), BI // BI = BI * 4 ; number of values
salq $1, %rax // rax = rax * 2 ; number of values
salq $ 1, %rax // rax = rax * 2 ; number of values
leaq (AO, %rax, SIZE), AO
leaq (BO, BI, SIZE), BO
negq BI
@@ -1237,13 +1237,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
movq KKK, %rax
#endif
andq $7, %rax # if (k & 1)
andq $ 7, %rax # if (k & 1)
je .L2_2_49
movq %rax, BI // Index for BO
leaq ( ,BI,4), BI // BI = BI * 4 ; number of values
salq $1, %rax // rax = rax * 2 ; number of values
salq $ 1, %rax // rax = rax * 2 ; number of values
leaq (AO, %rax, SIZE), AO
leaq (BO, BI, SIZE), BO
negq BI
@@ -1269,16 +1269,16 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
movq %rax, BI // Index for BO
leaq ( ,BI,4), BI // BI = BI * 4 ; number of values
leaq (BO, BI, SIZE), BO
salq $1, %rax // rax = rax * 2 ; number of values
salq $ 1, %rax // rax = rax * 2 ; number of values
leaq (AO, %rax, SIZE), AO
#endif
#if defined(TRMMKERNEL) && defined(LEFT)
addq $1, KK
addq $ 1, KK
#endif
addq $2 * SIZE, CO1 # coffset += 2
addq $ 2 * SIZE, CO1 # coffset += 2
decq I # i --
jg .L2_2_41
ALIGN_4
@@ -1288,7 +1288,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
.L2_2_60:
#if defined(TRMMKERNEL) && !defined(LEFT)
addq $2, KK
addq $ 2, KK
#endif
decq J // j --
@@ -1303,7 +1303,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*************************************************************************************************/
movq Nmod6, J
andq $1, J // j % 2
andq $ 1, J // j % 2
je .L999
ALIGN_4
@@ -1318,8 +1318,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
vmovups (BO1), %xmm0
vmovups %xmm0, (BO)
addq $2*SIZE,BO1
addq $2*SIZE,BO
addq $ 2*SIZE,BO1
addq $ 2*SIZE,BO
decq %rax
jnz .L1_00_02b
@@ -1337,10 +1337,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#endif
movq A, AO // aoffset = a
addq $8 * SIZE, AO
addq $ 8 * SIZE, AO
movq M, I
sarq $2, I // i = (m >> 2)
sarq $ 2, I // i = (m >> 2)
je .L1_2_10
ALIGN_4
@@ -1354,15 +1354,15 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
(defined(TRMMKERNEL) && defined(LEFT) && defined(TRANSA)) || \
(defined(TRMMKERNEL) && !defined(LEFT) && !defined(TRANSA))
leaq BUFFER1, BO // first buffer to BO
addq $4 * SIZE, BO
addq $ 4 * SIZE, BO
#else
movq KK, %rax
leaq BUFFER1, BO // first buffer to BO
addq $4 * SIZE, BO
addq $ 4 * SIZE, BO
movq %rax, BI // Index for BO
leaq (,BI,2), BI // BI = BI * 2 ; number of values
leaq (BO, BI, SIZE), BO
salq $3, %rax // rax = rax * 8 ; number of values
salq $ 3, %rax // rax = rax * 8 ; number of values
leaq (AO, %rax, SIZE), AO
#endif
@@ -1377,20 +1377,20 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#else
movq KK, %rax
#ifdef LEFT
addq $4, %rax // number of values in AO
addq $ 4, %rax // number of values in AO
#else
addq $1, %rax // number of values in BO
addq $ 1, %rax // number of values in BO
#endif
movq %rax, KKK
#endif
andq $-8, %rax // K = K - ( K % 8 )
andq $ -8, %rax // K = K - ( K % 8 )
je .L1_4_16
movq %rax, BI // Index for BO
leaq ( ,BI,2), BI // BI = BI * 2 ; number of values
salq $3, %rax // rax = rax * 8 ; number of values
salq $ 3, %rax // rax = rax * 8 ; number of values
leaq (AO, %rax, SIZE), AO
leaq (BO, BI, SIZE), BO
negq BI
@@ -1433,13 +1433,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
movq KKK, %rax
#endif
andq $7, %rax # if (k & 1)
andq $ 7, %rax # if (k & 1)
je .L1_4_19
movq %rax, BI // Index for BO
leaq ( ,BI,2), BI // BI = BI * 2 ; number of values
salq $3, %rax // rax = rax * 8 ; number of values
salq $ 3, %rax // rax = rax * 8 ; number of values
leaq (AO, %rax, SIZE), AO
leaq (BO, BI, SIZE), BO
negq BI
@@ -1466,16 +1466,16 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
movq %rax, BI // Index for BO
leaq ( ,BI,2), BI // BI = BI * 2 ; number of values
leaq (BO, BI, SIZE), BO
salq $3, %rax // rax = rax * 8 ; number of values
salq $ 3, %rax // rax = rax * 8 ; number of values
leaq (AO, %rax, SIZE), AO
#endif
#if defined(TRMMKERNEL) && defined(LEFT)
addq $4, KK
addq $ 4, KK
#endif
addq $8 * SIZE, CO1 # coffset += 8
addq $ 8 * SIZE, CO1 # coffset += 8
decq I # i --
jg .L1_4_11
ALIGN_4
@@ -1485,7 +1485,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
/*******************************************************************************************************/
.L1_2_10:
testq $2, M
testq $ 2, M
jz .L1_2_40
@@ -1495,15 +1495,15 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
(defined(TRMMKERNEL) && defined(LEFT) && defined(TRANSA)) || \
(defined(TRMMKERNEL) && !defined(LEFT) && !defined(TRANSA))
leaq BUFFER1, BO // first buffer to BO
addq $4 * SIZE, BO
addq $ 4 * SIZE, BO
#else
movq KK, %rax
leaq BUFFER1, BO // first buffer to BO
addq $4 * SIZE, BO
addq $ 4 * SIZE, BO
movq %rax, BI // Index for BO
leaq (,BI,2), BI // BI = BI * 2 ; number of values
leaq (BO, BI, SIZE), BO
salq $2, %rax // rax = rax * 4 ; number of values
salq $ 2, %rax // rax = rax * 4 ; number of values
leaq (AO, %rax, SIZE), AO
#endif
@@ -1518,20 +1518,20 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#else
movq KK, %rax
#ifdef LEFT
addq $2, %rax // number of values in AO
addq $ 2, %rax // number of values in AO
#else
addq $1, %rax // number of values in BO
addq $ 1, %rax // number of values in BO
#endif
movq %rax, KKK
#endif
andq $-8, %rax // K = K - ( K % 8 )
andq $ -8, %rax // K = K - ( K % 8 )
je .L1_2_16
movq %rax, BI // Index for BO
leaq ( ,BI,2), BI // BI = BI * 2 ; number of values
salq $2, %rax // rax = rax * 4 ; number of values
salq $ 2, %rax // rax = rax * 4 ; number of values
leaq (AO, %rax, SIZE), AO
leaq (BO, BI, SIZE), BO
negq BI
@@ -1583,13 +1583,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
movq KKK, %rax
#endif
andq $7, %rax # if (k & 1)
andq $ 7, %rax # if (k & 1)
je .L1_2_19
movq %rax, BI // Index for BO
leaq ( ,BI,2), BI // BI = BI * 2 ; number of values
salq $2, %rax // rax = rax * 4 ; number of values
salq $ 2, %rax // rax = rax * 4 ; number of values
leaq (AO, %rax, SIZE), AO
leaq (BO, BI, SIZE), BO
negq BI
@@ -1615,16 +1615,16 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
movq %rax, BI // Index for BO
leaq ( ,BI,2), BI // BI = BI * 2 ; number of values
leaq (BO, BI, SIZE), BO
salq $2, %rax // rax = rax * 4 ; number of values
salq $ 2, %rax // rax = rax * 4 ; number of values
leaq (AO, %rax, SIZE), AO
#endif
#if defined(TRMMKERNEL) && defined(LEFT)
addq $2, KK
addq $ 2, KK
#endif
addq $4 * SIZE, CO1 # coffset += 4
addq $ 4 * SIZE, CO1 # coffset += 4
ALIGN_4
@@ -1633,7 +1633,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
* Rest of M
***************************************************************************/
.L1_2_40:
testq $1, M
testq $ 1, M
jz .L999
ALIGN_4
@@ -1644,15 +1644,15 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
(defined(TRMMKERNEL) && defined(LEFT) && defined(TRANSA)) || \
(defined(TRMMKERNEL) && !defined(LEFT) && !defined(TRANSA))
leaq BUFFER1, BO // first buffer to BO
addq $4 * SIZE, BO
addq $ 4 * SIZE, BO
#else
movq KK, %rax
leaq BUFFER1, BO // first buffer to BO
addq $4 * SIZE, BO
addq $ 4 * SIZE, BO
movq %rax, BI // Index for BO
leaq (,BI,2), BI // BI = BI * 2 ; number of values
leaq (BO, BI, SIZE), BO
salq $1, %rax // rax = rax * 2 ; number of values
salq $ 1, %rax // rax = rax * 2 ; number of values
leaq (AO, %rax, SIZE), AO
#endif
@@ -1667,20 +1667,20 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#else
movq KK, %rax
#ifdef LEFT
addq $1, %rax // number of values in AO
addq $ 1, %rax // number of values in AO
#else
addq $1, %rax // number of values in BO
addq $ 1, %rax // number of values in BO
#endif
movq %rax, KKK
#endif
andq $-8, %rax // K = K - ( K % 8 )
andq $ -8, %rax // K = K - ( K % 8 )
je .L1_2_46
movq %rax, BI // Index for BO
leaq ( ,BI,2), BI // BI = BI * 2 ; number of values
salq $1, %rax // rax = rax * 2 ; number of values
salq $ 1, %rax // rax = rax * 2 ; number of values
leaq (AO, %rax, SIZE), AO
leaq (BO, BI, SIZE), BO
negq BI
@@ -1731,13 +1731,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
movq KKK, %rax
#endif
andq $7, %rax # if (k & 1)
andq $ 7, %rax # if (k & 1)
je .L1_2_49
movq %rax, BI // Index for BO
leaq ( ,BI,2), BI // BI = BI * 2 ; number of values
salq $1, %rax // rax = rax * 2 ; number of values
salq $ 1, %rax // rax = rax * 2 ; number of values
leaq (AO, %rax, SIZE), AO
leaq (BO, BI, SIZE), BO
negq BI
@@ -1763,16 +1763,16 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
movq %rax, BI // Index for BO
leaq ( ,BI,2), BI // BI = BI * 2 ; number of values
leaq (BO, BI, SIZE), BO
salq $1, %rax // rax = rax * 2 ; number of values
salq $ 1, %rax // rax = rax * 2 ; number of values
leaq (AO, %rax, SIZE), AO
#endif
#if defined(TRMMKERNEL) && defined(LEFT)
addq $1, KK
addq $ 1, KK
#endif
addq $2 * SIZE, CO1 # coffset += 2
addq $ 2 * SIZE, CO1 # coffset += 2
decq I # i --
jg .L1_2_41
ALIGN_4
@@ -1806,7 +1806,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
movups 208(%rsp), %xmm15
#endif
addq $STACKSIZE, %rsp
addq $ STACKSIZE, %rsp
ret
EPILOGUE

19
lapack-devel.log Normal file
View File

@@ -0,0 +1,19 @@
========================================================================================
2014/05/07 Saar
Platform: BULLDOZER single thread
--> LAPACK TESTING SUMMARY <--
Processing LAPACK Testing output found in the TESTING direcory
SUMMARY nb test run numerical error other error
================ =========== ================= ================
REAL 1079349 0 (0.000%) 0 (0.000%)
DOUBLE PRECISION 1080161 0 (0.000%) 0 (0.000%)
COMPLEX 556022 0 (0.000%) 0 (0.000%)
COMPLEX16 556834 0 (0.000%) 0 (0.000%)
--> ALL PRECISIONS 3272366 0 (0.000%) 0 (0.000%)
========================================================================================

View File

@@ -56,7 +56,7 @@ include ../make.inc
ALLAUX = ilaenv.o ieeeck.o lsamen.o xerbla_array.o iparmq.o \
ilaprec.o ilatrans.o ilauplo.o iladiag.o chla_transtype.o \
../INSTALL/ilaver.o
../INSTALL/ilaver.o ../INSTALL/slamch.o
SCLAUX = \
sbdsdc.o \
@@ -92,7 +92,7 @@ DZLAUX = \
dlasr.o dlasrt.o dlassq.o dlasv2.o dpttrf.o dstebz.o dstedc.o \
dsteqr.o dsterf.o dlaisnan.o disnan.o \
dlartgp.o dlartgs.o \
../INSTALL/dsecnd_$(TIMER).o
../INSTALL/dlamch.o ../INSTALL/dsecnd_$(TIMER).o
SLASRC = \
sgbbrd.o sgbcon.o sgbequ.o sgbrfs.o sgbsv.o \
@@ -101,7 +101,7 @@ SLASRC = \
sgegs.o sgegv.o sgehd2.o sgehrd.o sgelq2.o sgelqf.o \
sgels.o sgelsd.o sgelss.o sgelsx.o sgelsy.o sgeql2.o sgeqlf.o \
sgeqp3.o sgeqpf.o sgeqr2.o sgeqr2p.o sgeqrf.o sgeqrfp.o sgerfs.o \
sgerq2.o sgerqf.o sgesc2.o sgesdd.o sgesv.o sgesvd.o sgesvx.o \
sgerq2.o sgerqf.o sgesc2.o sgesdd.o sgesvd.o sgesvx.o \
sgetc2.o sgetri.o \
sggbak.o sggbal.o sgges.o sggesx.o sggev.o sggevx.o \
sggglm.o sgghrd.o sgglse.o sggqrf.o \
@@ -127,7 +127,7 @@ SLASRC = \
sormr3.o sormrq.o sormrz.o sormtr.o spbcon.o spbequ.o spbrfs.o \
spbstf.o spbsv.o spbsvx.o \
spbtf2.o spbtrf.o spbtrs.o spocon.o spoequ.o sporfs.o sposv.o \
sposvx.o spotri.o spstrf.o spstf2.o \
sposvx.o spstrf.o spstf2.o \
sppcon.o sppequ.o \
spprfs.o sppsv.o sppsvx.o spptrf.o spptri.o spptrs.o sptcon.o \
spteqr.o sptrfs.o sptsv.o sptsvx.o spttrs.o sptts2.o srscl.o \
@@ -157,7 +157,7 @@ SLASRC = \
sgeqrt.o sgeqrt2.o sgeqrt3.o sgemqrt.o \
stpqrt.o stpqrt2.o stpmqrt.o stprfb.o
DSLASRC = spotrs.o
DSLASRC = spotrs.o
ifdef USEXBLAS
SXLASRC = sgesvxx.o sgerfsx.o sla_gerfsx_extended.o sla_geamv.o \
@@ -176,7 +176,7 @@ CLASRC = \
cgegs.o cgegv.o cgehd2.o cgehrd.o cgelq2.o cgelqf.o \
cgels.o cgelsd.o cgelss.o cgelsx.o cgelsy.o cgeql2.o cgeqlf.o cgeqp3.o \
cgeqpf.o cgeqr2.o cgeqr2p.o cgeqrf.o cgeqrfp.o cgerfs.o \
cgerq2.o cgerqf.o cgesc2.o cgesdd.o cgesv.o cgesvd.o \
cgerq2.o cgerqf.o cgesc2.o cgesdd.o cgesvd.o \
cgesvx.o cgetc2.o cgetri.o \
cggbak.o cggbal.o cgges.o cggesx.o cggev.o cggevx.o cggglm.o \
cgghrd.o cgglse.o cggqrf.o cggrqf.o \
@@ -210,14 +210,14 @@ CLASRC = \
clasyf.o clasyf_rook.o clatbs.o clatdf.o clatps.o clatrd.o clatrs.o clatrz.o \
clatzm.o cpbcon.o cpbequ.o cpbrfs.o cpbstf.o cpbsv.o \
cpbsvx.o cpbtf2.o cpbtrf.o cpbtrs.o cpocon.o cpoequ.o cporfs.o \
cposv.o cposvx.o cpotri.o cpstrf.o cpstf2.o \
cposv.o cposvx.o cpstrf.o cpstf2.o \
cppcon.o cppequ.o cpprfs.o cppsv.o cppsvx.o cpptrf.o cpptri.o cpptrs.o \
cptcon.o cpteqr.o cptrfs.o cptsv.o cptsvx.o cpttrf.o cpttrs.o cptts2.o \
crot.o cspcon.o cspmv.o cspr.o csprfs.o cspsv.o \
crot.o cspcon.o csprfs.o cspsv.o \
cspsvx.o csptrf.o csptri.o csptrs.o csrscl.o cstedc.o \
cstegr.o cstein.o csteqr.o \
csycon.o csymv.o \
csyr.o csyrfs.o csysv.o csysvx.o csytf2.o csytrf.o csytri.o csytri2.o csytri2x.o \
csycon.o \
csyrfs.o csysv.o csysvx.o csytf2.o csytrf.o csytri.o csytri2.o csytri2x.o \
csyswapr.o csytrs.o csytrs2.o csyconv.o \
csytf2_rook.o csytrf_rook.o csytrs_rook.o \
csytri_rook.o csycon_rook.o csysv_rook.o \
@@ -252,7 +252,7 @@ CXLASRC = cgesvxx.o cgerfsx.o cla_gerfsx_extended.o cla_geamv.o \
cla_lin_berr.o clarscl2.o clascl2.o cla_wwaddw.o
endif
ZCLASRC = cpotrs.o
ZCLASRC = cpotrs.o
DLASRC = \
dgbbrd.o dgbcon.o dgbequ.o dgbrfs.o dgbsv.o \
@@ -261,7 +261,7 @@ DLASRC = \
dgegs.o dgegv.o dgehd2.o dgehrd.o dgelq2.o dgelqf.o \
dgels.o dgelsd.o dgelss.o dgelsx.o dgelsy.o dgeql2.o dgeqlf.o \
dgeqp3.o dgeqpf.o dgeqr2.o dgeqr2p.o dgeqrf.o dgeqrfp.o dgerfs.o \
dgerq2.o dgerqf.o dgesc2.o dgesdd.o dgesv.o dgesvd.o dgesvx.o \
dgerq2.o dgerqf.o dgesc2.o dgesdd.o dgesvd.o dgesvx.o \
dgetc2.o dgetri.o \
dggbak.o dggbal.o dgges.o dggesx.o dggev.o dggevx.o \
dggglm.o dgghrd.o dgglse.o dggqrf.o \
@@ -287,7 +287,7 @@ DLASRC = \
dormr3.o dormrq.o dormrz.o dormtr.o dpbcon.o dpbequ.o dpbrfs.o \
dpbstf.o dpbsv.o dpbsvx.o \
dpbtf2.o dpbtrf.o dpbtrs.o dpocon.o dpoequ.o dporfs.o dposv.o \
dposvx.o dpotri.o dpotrs.o dpstrf.o dpstf2.o \
dposvx.o dpotrs.o dpstrf.o dpstf2.o \
dppcon.o dppequ.o \
dpprfs.o dppsv.o dppsvx.o dpptrf.o dpptri.o dpptrs.o dptcon.o \
dpteqr.o dptrfs.o dptsv.o dptsvx.o dpttrs.o dptts2.o drscl.o \
@@ -335,8 +335,8 @@ ZLASRC = \
zgegs.o zgegv.o zgehd2.o zgehrd.o zgelq2.o zgelqf.o \
zgels.o zgelsd.o zgelss.o zgelsx.o zgelsy.o zgeql2.o zgeqlf.o zgeqp3.o \
zgeqpf.o zgeqr2.o zgeqr2p.o zgeqrf.o zgeqrfp.o zgerfs.o zgerq2.o zgerqf.o \
zgesc2.o zgesdd.o zgesv.o zgesvd.o zgesvx.o zgetc2.o \
zgetri.o \
zgesc2.o zgesdd.o zgesvd.o zgesvx.o zgetc2.o \
zgetri.o \
zggbak.o zggbal.o zgges.o zggesx.o zggev.o zggevx.o zggglm.o \
zgghrd.o zgglse.o zggqrf.o zggrqf.o \
zggsvd.o zggsvp.o \
@@ -369,17 +369,17 @@ ZLASRC = \
zlarfx.o zlargv.o zlarnv.o zlarrv.o zlartg.o zlartv.o \
zlarz.o zlarzb.o zlarzt.o zlascl.o zlaset.o zlasr.o \
zlassq.o zlasyf.o zlasyf_rook.o \
zlatbs.o zlatdf.o zlatps.o zlatrd.o zlatrs.o zlatrz.o zlatzm.o zlauu2.o \
zlatbs.o zlatdf.o zlatps.o zlatrd.o zlatrs.o zlatrz.o zlatzm.o \
zpbcon.o zpbequ.o zpbrfs.o zpbstf.o zpbsv.o \
zpbsvx.o zpbtf2.o zpbtrf.o zpbtrs.o zpocon.o zpoequ.o zporfs.o \
zposv.o zposvx.o zpotri.o zpotrs.o zpstrf.o zpstf2.o \
zposv.o zposvx.o zpotrs.o zpstrf.o zpstf2.o \
zppcon.o zppequ.o zpprfs.o zppsv.o zppsvx.o zpptrf.o zpptri.o zpptrs.o \
zptcon.o zpteqr.o zptrfs.o zptsv.o zptsvx.o zpttrf.o zpttrs.o zptts2.o \
zrot.o zspcon.o zspmv.o zspr.o zsprfs.o zspsv.o \
zrot.o zspcon.o zsprfs.o zspsv.o \
zspsvx.o zsptrf.o zsptri.o zsptrs.o zdrscl.o zstedc.o \
zstegr.o zstein.o zsteqr.o \
zsycon.o zsymv.o \
zsyr.o zsyrfs.o zsysv.o zsysvx.o zsytf2.o zsytrf.o zsytri.o zsytri2.o zsytri2x.o \
zsycon.o \
zsyrfs.o zsysv.o zsysvx.o zsytf2.o zsytrf.o zsytri.o zsytri2.o zsytri2x.o \
zsyswapr.o zsytrs.o zsytrs2.o zsyconv.o \
zsytf2_rook.o zsytrf_rook.o zsytrs_rook.o \
zsytri_rook.o zsycon_rook.o zsysv_rook.o \
@@ -417,8 +417,6 @@ endif
ALLOBJ = $(SLASRC) $(DLASRC) $(DSLASRC) $(CLASRC) $(ZLASRC) $(ZCLASRC) \
$(SCLAUX) $(DZLAUX) $(ALLAUX)
ALLOBJ_P = $(ALLOBJ:.o=.$(PSUFFIX))
ifdef USEXBLAS
ALLXOBJ = $(SXLASRC) $(DXLASRC) $(CXLASRC) $(ZXLASRC)
endif
@@ -435,6 +433,7 @@ lapacklib: $(ALLOBJ) $(ALLXOBJ)
$(ARCH) $(ARCHFLAGS) $@ $(ALLOBJ_P)
$(RANLIB) $@
single: $(SLASRC) $(DSLASRC) $(SXLASRC) $(SCLAUX) $(ALLAUX)
$(ARCH) $(ARCHFLAGS) ../$(LAPACKLIB) $(SLASRC) $(DSLASRC) \
$(SXLASRC) $(SCLAUX) $(ALLAUX) $(ALLXAUX)
@@ -483,16 +482,11 @@ clean:
%.$(PSUFFIX): %.f
$(FORTRAN) $(POPTS) -c $< -o $@
slaruv.o: slaruv.f ; $(FORTRAN) $(NOOPT) -O0 -c $< -o $@
dlaruv.o: dlaruv.f ; $(FORTRAN) $(NOOPT) -O0 -c $< -o $@
sla_wwaddw.o: sla_wwaddw.f ; $(FORTRAN) $(NOOPT) -O0 -c $< -o $@
dla_wwaddw.o: dla_wwaddw.f ; $(FORTRAN) $(NOOPT) -O0 -c $< -o $@
cla_wwaddw.o: cla_wwaddw.f ; $(FORTRAN) $(NOOPT) -O0 -c $< -o $@
zla_wwaddw.o: zla_wwaddw.f ; $(FORTRAN) $(NOOPT) -O0 -c $< -o $@
slaruv.$(PSUFFIX): slaruv.f ; $(FORTRAN) $(PNOOPT) -O0 -c $< -o $@
dlaruv.$(PSUFFIX): dlaruv.f ; $(FORTRAN) $(PNOOPT) -O0 -c $< -o $@
sla_wwaddw.$(PSUFFIX): sla_wwaddw.f ; $(FORTRAN) $(PNOOPT) -O0 -c $< -o $@
dla_wwaddw.$(PSUFFIX): dla_wwaddw.f ; $(FORTRAN) $(PNOOPT) -O0 -c $< -o $@
cla_wwaddw.$(PSUFFIX): cla_wwaddw.f ; $(FORTRAN) $(PNOOPT) -O0 -c $< -o $@
zla_wwaddw.$(PSUFFIX): zla_wwaddw.f ; $(FORTRAN) $(PNOOPT) -O0 -c $< -o $@
slaruv.o: slaruv.f ; $(FORTRAN) $(NOOPT) -c $< -o $@
dlaruv.o: dlaruv.f ; $(FORTRAN) $(NOOPT) -c $< -o $@
sla_wwaddw.o: sla_wwaddw.f ; $(FORTRAN) $(NOOPT) -c $< -o $@
dla_wwaddw.o: dla_wwaddw.f ; $(FORTRAN) $(NOOPT) -c $< -o $@
cla_wwaddw.o: cla_wwaddw.f ; $(FORTRAN) $(NOOPT) -c $< -o $@
zla_wwaddw.o: zla_wwaddw.f ; $(FORTRAN) $(NOOPT) -c $< -o $@

View File

@@ -5,5 +5,5 @@ Data file for testing COMPLEX LAPACK linear equation routines RFP format
1 2 15 Values of NRHS (number of right hand sides)
9 Number of matrix types (list types on next line if 0 < NTYPES < 9)
1 2 3 4 5 6 7 8 9 Matrix Types
30.0 Threshold value of test ratio
50.0 Threshold value of test ratio
T Put T to test the error exits

View File

@@ -5,7 +5,7 @@ SEP: Data file for testing Symmetric Eigenvalue Problem routines
1 3 3 3 10 Values of NB (blocksize)
2 2 2 2 2 Values of NBMIN (minimum blocksize)
1 0 5 9 1 Values of NX (crossover point)
50.0 Threshold value
60.0 Threshold value
T Put T to test the LAPACK routines
T Put T to test the driver routines
T Put T to test the error exits

View File

@@ -7,7 +7,7 @@ SVD: Data file for testing Singular Value Decomposition routines
2 2 2 2 2 Values of NBMIN (minimum blocksize)
1 0 5 9 1 Values of NX (crossover point)
2 0 2 2 2 Values of NRHS
50.0 Threshold value
54.0 Threshold value
T Put T to test the LAPACK routines
T Put T to test the driver routines
T Put T to test the error exits

View File

@@ -2072,9 +2072,9 @@ SOBJ_FILES := $(SSRC_OBJ)
DOBJ_FILES := $(DSRC_OBJ)
ZOBJ_FILES := $(ZSRC_OBJ)
ifdef LAPACKE_TESTING
# ifdef LAPACKE_TESTING
ZOBJ_FILES += $(MATGEN_OBJ)
endif
#endif
ALLOBJ = $(COBJ_FILES) $(DOBJ_FILES) $(SOBJ_FILES) $(ZOBJ_FILES) $(OBJ_FILES)
@@ -2093,7 +2093,9 @@ all: ../../$(LAPACKELIB)
$(ARCH) $(ARCHFLAGS) ../../$(LAPACKELIB) $(DOBJ_FILES)
$(ARCH) $(ARCHFLAGS) ../../$(LAPACKELIB) $(SOBJ_FILES)
$(ARCH) $(ARCHFLAGS) ../../$(LAPACKELIB) $(ZOBJ_FILES)
ifdef USEXBLAS
$(ARCH) $(ARCHFLAGS) ../../$(LAPACKELIB) $(ALLXOBJ)
endif
$(RANLIB) ../../$(LAPACKELIB)
.c.o:

View File

@@ -1,7 +1,8 @@
TOPDIR = ..
include ../Makefile.system
SUBDIRS = laswp getf2 getrf potf2 potrf lauu2 lauum trti2 trtri getrs
#SUBDIRS = laswp getf2 getrf potf2 potrf lauu2 lauum trti2 trtri getrs
SUBDIRS = getrf getf2 laswp getrs potrf potf2 lauu2 lauum trti2 trtri
FLAMEDIRS = laswp getf2 potf2 lauu2 trti2

View File

@@ -1,194 +0,0 @@
SUBROUTINE CGETRI( N, A, LDA, IPIV, WORK, LWORK, INFO )
*
* -- LAPACK routine (version 3.0) --
* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
* Courant Institute, Argonne National Lab, and Rice University
* June 30, 1999
*
* .. Scalar Arguments ..
INTEGER INFO, LDA, LWORK, N
* ..
* .. Array Arguments ..
INTEGER IPIV( * )
COMPLEX A( LDA, * ), WORK( * )
* ..
*
* Purpose
* =======
*
* CGETRI computes the inverse of a matrix using the LU factorization
* computed by CGETRF.
*
* This method inverts U and then computes inv(A) by solving the system
* inv(A)*L = inv(U) for inv(A).
*
* Arguments
* =========
*
* N (input) INTEGER
* The order of the matrix A. N >= 0.
*
* A (input/output) COMPLEX array, dimension (LDA,N)
* On entry, the factors L and U from the factorization
* A = P*L*U as computed by CGETRF.
* On exit, if INFO = 0, the inverse of the original matrix A.
*
* LDA (input) INTEGER
* The leading dimension of the array A. LDA >= max(1,N).
*
* IPIV (input) INTEGER array, dimension (N)
* The pivot indices from CGETRF; for 1<=i<=N, row i of the
* matrix was interchanged with row IPIV(i).
*
* WORK (workspace/output) COMPLEX array, dimension (LWORK)
* On exit, if INFO=0, then WORK(1) returns the optimal LWORK.
*
* LWORK (input) INTEGER
* The dimension of the array WORK. LWORK >= max(1,N).
* For optimal performance LWORK >= N*NB, where NB is
* the optimal blocksize returned by ILAENV.
*
* If LWORK = -1, then a workspace query is assumed; the routine
* only calculates the optimal size of the WORK array, returns
* this value as the first entry of the WORK array, and no error
* message related to LWORK is issued by XERBLA.
*
* INFO (output) INTEGER
* = 0: successful exit
* < 0: if INFO = -i, the i-th argument had an illegal value
* > 0: if INFO = i, U(i,i) is exactly zero; the matrix is
* singular and its inverse could not be computed.
*
* =====================================================================
*
* .. Parameters ..
COMPLEX ZERO, ONE
PARAMETER ( ZERO = ( 0.0E+0, 0.0E+0 ),
$ ONE = ( 1.0E+0, 0.0E+0 ) )
* ..
* .. Local Scalars ..
LOGICAL LQUERY
INTEGER I, IWS, J, JB, JJ, JP, LDWORK, LWKOPT, NB,
$ NBMIN, NN
* ..
* .. External Functions ..
INTEGER ILAENV
EXTERNAL ILAENV
* ..
* .. External Subroutines ..
EXTERNAL CGEMM, CGEMV, CSWAP, CTRSM, CTRTRI, XERBLA
* ..
* .. Intrinsic Functions ..
INTRINSIC MAX, MIN
* ..
* .. Executable Statements ..
*
* Test the input parameters.
*
INFO = 0
NB = ILAENV( 1, 'CGETRI', ' ', N, -1, -1, -1 )
LWKOPT = N*NB
WORK( 1 ) = LWKOPT
LQUERY = ( LWORK.EQ.-1 )
IF( N.LT.0 ) THEN
INFO = -1
ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
INFO = -3
ELSE IF( LWORK.LT.MAX( 1, N ) .AND. .NOT.LQUERY ) THEN
INFO = -6
END IF
IF( INFO.NE.0 ) THEN
CALL XERBLA( 'CGETRI', -INFO )
RETURN
ELSE IF( LQUERY ) THEN
RETURN
END IF
*
* Quick return if possible
*
IF( N.EQ.0 )
$ RETURN
*
* Form inv(U). If INFO > 0 from CTRTRI, then U is singular,
* and the inverse is not computed.
*
CALL CTRTRI( 'Upper', 'Non-unit', N, A, LDA, INFO )
IF( INFO.GT.0 )
$ RETURN
*
NBMIN = 2
LDWORK = N
IF( NB.GT.1 .AND. NB.LT.N ) THEN
IWS = MAX( LDWORK*NB, 1 )
IF( LWORK.LT.IWS ) THEN
NB = LWORK / LDWORK
NBMIN = MAX( 2, ILAENV( 2, 'CGETRI', ' ', N, -1, -1, -1 ) )
END IF
ELSE
IWS = N
END IF
*
* Solve the equation inv(A)*L = inv(U) for inv(A).
*
IF( NB.LT.NBMIN .OR. NB.GE.N ) THEN
*
* Use unblocked code.
*
DO 20 J = N, 1, -1
*
* Copy current column of L to WORK and replace with zeros.
*
DO 10 I = J + 1, N
WORK( I ) = A( I, J )
A( I, J ) = ZERO
10 CONTINUE
*
* Compute current column of inv(A).
*
IF( J.LT.N )
$ CALL CGEMV( 'No transpose', N, N-J, -ONE, A( 1, J+1 ),
$ LDA, WORK( J+1 ), 1, ONE, A( 1, J ), 1 )
20 CONTINUE
ELSE
*
* Use blocked code.
*
NN = ( ( N-1 ) / NB )*NB + 1
DO 50 J = NN, 1, -NB
JB = MIN( NB, N-J+1 )
*
* Copy current block column of L to WORK and replace with
* zeros.
*
DO 40 JJ = J, J + JB - 1
DO 30 I = JJ + 1, N
WORK( I+( JJ-J )*LDWORK ) = A( I, JJ )
A( I, JJ ) = ZERO
30 CONTINUE
40 CONTINUE
*
* Compute current block column of inv(A).
*
IF( J+JB.LE.N )
$ CALL CGEMM( 'No transpose', 'No transpose', N, JB,
$ N-J-JB+1, -ONE, A( 1, J+JB ), LDA,
$ WORK( J+JB ), LDWORK, ONE, A( 1, J ), LDA )
CALL CTRSM( 'Right', 'Lower', 'No transpose', 'Unit', N, JB,
$ ONE, WORK( J ), LDWORK, A( 1, J ), LDA )
50 CONTINUE
END IF
*
* Apply column interchanges.
*
DO 60 J = N - 1, 1, -1
JP = IPIV( J )
IF( JP.NE.J )
$ CALL CSWAP( N, A( 1, J ), 1, A( 1, JP ), 1 )
60 CONTINUE
*
WORK( 1 ) = IWS
RETURN
*
* End of CGETRI
*
END

View File

@@ -1,193 +0,0 @@
SUBROUTINE DGETRI( N, A, LDA, IPIV, WORK, LWORK, INFO )
*
* -- LAPACK routine (version 3.0) --
* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
* Courant Institute, Argonne National Lab, and Rice University
* June 30, 1999
*
* .. Scalar Arguments ..
INTEGER INFO, LDA, LWORK, N
* ..
* .. Array Arguments ..
INTEGER IPIV( * )
DOUBLE PRECISION A( LDA, * ), WORK( * )
* ..
*
* Purpose
* =======
*
* DGETRI computes the inverse of a matrix using the LU factorization
* computed by DGETRF.
*
* This method inverts U and then computes inv(A) by solving the system
* inv(A)*L = inv(U) for inv(A).
*
* Arguments
* =========
*
* N (input) INTEGER
* The order of the matrix A. N >= 0.
*
* A (input/output) DOUBLE PRECISION array, dimension (LDA,N)
* On entry, the factors L and U from the factorization
* A = P*L*U as computed by DGETRF.
* On exit, if INFO = 0, the inverse of the original matrix A.
*
* LDA (input) INTEGER
* The leading dimension of the array A. LDA >= max(1,N).
*
* IPIV (input) INTEGER array, dimension (N)
* The pivot indices from DGETRF; for 1<=i<=N, row i of the
* matrix was interchanged with row IPIV(i).
*
* WORK (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
* On exit, if INFO=0, then WORK(1) returns the optimal LWORK.
*
* LWORK (input) INTEGER
* The dimension of the array WORK. LWORK >= max(1,N).
* For optimal performance LWORK >= N*NB, where NB is
* the optimal blocksize returned by ILAENV.
*
* If LWORK = -1, then a workspace query is assumed; the routine
* only calculates the optimal size of the WORK array, returns
* this value as the first entry of the WORK array, and no error
* message related to LWORK is issued by XERBLA.
*
* INFO (output) INTEGER
* = 0: successful exit
* < 0: if INFO = -i, the i-th argument had an illegal value
* > 0: if INFO = i, U(i,i) is exactly zero; the matrix is
* singular and its inverse could not be computed.
*
* =====================================================================
*
* .. Parameters ..
DOUBLE PRECISION ZERO, ONE
PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 )
* ..
* .. Local Scalars ..
LOGICAL LQUERY
INTEGER I, IWS, J, JB, JJ, JP, LDWORK, LWKOPT, NB,
$ NBMIN, NN
* ..
* .. External Functions ..
INTEGER ILAENV
EXTERNAL ILAENV
* ..
* .. External Subroutines ..
EXTERNAL DGEMM, DGEMV, DSWAP, DTRSM, DTRTRI, XERBLA
* ..
* .. Intrinsic Functions ..
INTRINSIC MAX, MIN
* ..
* .. Executable Statements ..
*
* Test the input parameters.
*
INFO = 0
NB = ILAENV( 1, 'DGETRI', ' ', N, -1, -1, -1 )
LWKOPT = N*NB
WORK( 1 ) = LWKOPT
LQUERY = ( LWORK.EQ.-1 )
IF( N.LT.0 ) THEN
INFO = -1
ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
INFO = -3
ELSE IF( LWORK.LT.MAX( 1, N ) .AND. .NOT.LQUERY ) THEN
INFO = -6
END IF
IF( INFO.NE.0 ) THEN
CALL XERBLA( 'DGETRI', -INFO )
RETURN
ELSE IF( LQUERY ) THEN
RETURN
END IF
*
* Quick return if possible
*
IF( N.EQ.0 )
$ RETURN
*
* Form inv(U). If INFO > 0 from DTRTRI, then U is singular,
* and the inverse is not computed.
*
CALL DTRTRI( 'Upper', 'Non-unit', N, A, LDA, INFO )
IF( INFO.GT.0 )
$ RETURN
*
NBMIN = 2
LDWORK = N
IF( NB.GT.1 .AND. NB.LT.N ) THEN
IWS = MAX( LDWORK*NB, 1 )
IF( LWORK.LT.IWS ) THEN
NB = LWORK / LDWORK
NBMIN = MAX( 2, ILAENV( 2, 'DGETRI', ' ', N, -1, -1, -1 ) )
END IF
ELSE
IWS = N
END IF
*
* Solve the equation inv(A)*L = inv(U) for inv(A).
*
IF( NB.LT.NBMIN .OR. NB.GE.N ) THEN
*
* Use unblocked code.
*
DO 20 J = N, 1, -1
*
* Copy current column of L to WORK and replace with zeros.
*
DO 10 I = J + 1, N
WORK( I ) = A( I, J )
A( I, J ) = ZERO
10 CONTINUE
*
* Compute current column of inv(A).
*
IF( J.LT.N )
$ CALL DGEMV( 'No transpose', N, N-J, -ONE, A( 1, J+1 ),
$ LDA, WORK( J+1 ), 1, ONE, A( 1, J ), 1 )
20 CONTINUE
ELSE
*
* Use blocked code.
*
NN = ( ( N-1 ) / NB )*NB + 1
DO 50 J = NN, 1, -NB
JB = MIN( NB, N-J+1 )
*
* Copy current block column of L to WORK and replace with
* zeros.
*
DO 40 JJ = J, J + JB - 1
DO 30 I = JJ + 1, N
WORK( I+( JJ-J )*LDWORK ) = A( I, JJ )
A( I, JJ ) = ZERO
30 CONTINUE
40 CONTINUE
*
* Compute current block column of inv(A).
*
IF( J+JB.LE.N )
$ CALL DGEMM( 'No transpose', 'No transpose', N, JB,
$ N-J-JB+1, -ONE, A( 1, J+JB ), LDA,
$ WORK( J+JB ), LDWORK, ONE, A( 1, J ), LDA )
CALL DTRSM( 'Right', 'Lower', 'No transpose', 'Unit', N, JB,
$ ONE, WORK( J ), LDWORK, A( 1, J ), LDA )
50 CONTINUE
END IF
*
* Apply column interchanges.
*
DO 60 J = N - 1, 1, -1
JP = IPIV( J )
IF( JP.NE.J )
$ CALL DSWAP( N, A( 1, J ), 1, A( 1, JP ), 1 )
60 CONTINUE
*
WORK( 1 ) = IWS
RETURN
*
* End of DGETRI
*
END

View File

@@ -1,193 +0,0 @@
SUBROUTINE SGETRI( N, A, LDA, IPIV, WORK, LWORK, INFO )
*
* -- LAPACK routine (version 3.0) --
* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
* Courant Institute, Argonne National Lab, and Rice University
* June 30, 1999
*
* .. Scalar Arguments ..
INTEGER INFO, LDA, LWORK, N
* ..
* .. Array Arguments ..
INTEGER IPIV( * )
REAL A( LDA, * ), WORK( * )
* ..
*
* Purpose
* =======
*
* SGETRI computes the inverse of a matrix using the LU factorization
* computed by SGETRF.
*
* This method inverts U and then computes inv(A) by solving the system
* inv(A)*L = inv(U) for inv(A).
*
* Arguments
* =========
*
* N (input) INTEGER
* The order of the matrix A. N >= 0.
*
* A (input/output) REAL array, dimension (LDA,N)
* On entry, the factors L and U from the factorization
* A = P*L*U as computed by SGETRF.
* On exit, if INFO = 0, the inverse of the original matrix A.
*
* LDA (input) INTEGER
* The leading dimension of the array A. LDA >= max(1,N).
*
* IPIV (input) INTEGER array, dimension (N)
* The pivot indices from SGETRF; for 1<=i<=N, row i of the
* matrix was interchanged with row IPIV(i).
*
* WORK (workspace/output) REAL array, dimension (LWORK)
* On exit, if INFO=0, then WORK(1) returns the optimal LWORK.
*
* LWORK (input) INTEGER
* The dimension of the array WORK. LWORK >= max(1,N).
* For optimal performance LWORK >= N*NB, where NB is
* the optimal blocksize returned by ILAENV.
*
* If LWORK = -1, then a workspace query is assumed; the routine
* only calculates the optimal size of the WORK array, returns
* this value as the first entry of the WORK array, and no error
* message related to LWORK is issued by XERBLA.
*
* INFO (output) INTEGER
* = 0: successful exit
* < 0: if INFO = -i, the i-th argument had an illegal value
* > 0: if INFO = i, U(i,i) is exactly zero; the matrix is
* singular and its inverse could not be computed.
*
* =====================================================================
*
* .. Parameters ..
REAL ZERO, ONE
PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 )
* ..
* .. Local Scalars ..
LOGICAL LQUERY
INTEGER I, IWS, J, JB, JJ, JP, LDWORK, LWKOPT, NB,
$ NBMIN, NN
* ..
* .. External Functions ..
INTEGER ILAENV
EXTERNAL ILAENV
* ..
* .. External Subroutines ..
EXTERNAL SGEMM, SGEMV, SSWAP, STRSM, STRTRI, XERBLA
* ..
* .. Intrinsic Functions ..
INTRINSIC MAX, MIN
* ..
* .. Executable Statements ..
*
* Test the input parameters.
*
INFO = 0
NB = ILAENV( 1, 'SGETRI', ' ', N, -1, -1, -1 )
LWKOPT = N*NB
WORK( 1 ) = LWKOPT
LQUERY = ( LWORK.EQ.-1 )
IF( N.LT.0 ) THEN
INFO = -1
ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
INFO = -3
ELSE IF( LWORK.LT.MAX( 1, N ) .AND. .NOT.LQUERY ) THEN
INFO = -6
END IF
IF( INFO.NE.0 ) THEN
CALL XERBLA( 'SGETRI', -INFO )
RETURN
ELSE IF( LQUERY ) THEN
RETURN
END IF
*
* Quick return if possible
*
IF( N.EQ.0 )
$ RETURN
*
* Form inv(U). If INFO > 0 from STRTRI, then U is singular,
* and the inverse is not computed.
*
CALL STRTRI( 'Upper', 'Non-unit', N, A, LDA, INFO )
IF( INFO.GT.0 )
$ RETURN
*
NBMIN = 2
LDWORK = N
IF( NB.GT.1 .AND. NB.LT.N ) THEN
IWS = MAX( LDWORK*NB, 1 )
IF( LWORK.LT.IWS ) THEN
NB = LWORK / LDWORK
NBMIN = MAX( 2, ILAENV( 2, 'SGETRI', ' ', N, -1, -1, -1 ) )
END IF
ELSE
IWS = N
END IF
*
* Solve the equation inv(A)*L = inv(U) for inv(A).
*
IF( NB.LT.NBMIN .OR. NB.GE.N ) THEN
*
* Use unblocked code.
*
DO 20 J = N, 1, -1
*
* Copy current column of L to WORK and replace with zeros.
*
DO 10 I = J + 1, N
WORK( I ) = A( I, J )
A( I, J ) = ZERO
10 CONTINUE
*
* Compute current column of inv(A).
*
IF( J.LT.N )
$ CALL SGEMV( 'No transpose', N, N-J, -ONE, A( 1, J+1 ),
$ LDA, WORK( J+1 ), 1, ONE, A( 1, J ), 1 )
20 CONTINUE
ELSE
*
* Use blocked code.
*
NN = ( ( N-1 ) / NB )*NB + 1
DO 50 J = NN, 1, -NB
JB = MIN( NB, N-J+1 )
*
* Copy current block column of L to WORK and replace with
* zeros.
*
DO 40 JJ = J, J + JB - 1
DO 30 I = JJ + 1, N
WORK( I+( JJ-J )*LDWORK ) = A( I, JJ )
A( I, JJ ) = ZERO
30 CONTINUE
40 CONTINUE
*
* Compute current block column of inv(A).
*
IF( J+JB.LE.N )
$ CALL SGEMM( 'No transpose', 'No transpose', N, JB,
$ N-J-JB+1, -ONE, A( 1, J+JB ), LDA,
$ WORK( J+JB ), LDWORK, ONE, A( 1, J ), LDA )
CALL STRSM( 'Right', 'Lower', 'No transpose', 'Unit', N, JB,
$ ONE, WORK( J ), LDWORK, A( 1, J ), LDA )
50 CONTINUE
END IF
*
* Apply column interchanges.
*
DO 60 J = N - 1, 1, -1
JP = IPIV( J )
IF( JP.NE.J )
$ CALL SSWAP( N, A( 1, J ), 1, A( 1, JP ), 1 )
60 CONTINUE
*
WORK( 1 ) = IWS
RETURN
*
* End of SGETRI
*
END

View File

@@ -1,194 +0,0 @@
SUBROUTINE ZGETRI( N, A, LDA, IPIV, WORK, LWORK, INFO )
*
* -- LAPACK routine (version 3.0) --
* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
* Courant Institute, Argonne National Lab, and Rice University
* June 30, 1999
*
* .. Scalar Arguments ..
INTEGER INFO, LDA, LWORK, N
* ..
* .. Array Arguments ..
INTEGER IPIV( * )
COMPLEX*16 A( LDA, * ), WORK( * )
* ..
*
* Purpose
* =======
*
* ZGETRI computes the inverse of a matrix using the LU factorization
* computed by ZGETRF.
*
* This method inverts U and then computes inv(A) by solving the system
* inv(A)*L = inv(U) for inv(A).
*
* Arguments
* =========
*
* N (input) INTEGER
* The order of the matrix A. N >= 0.
*
* A (input/output) COMPLEX*16 array, dimension (LDA,N)
* On entry, the factors L and U from the factorization
* A = P*L*U as computed by ZGETRF.
* On exit, if INFO = 0, the inverse of the original matrix A.
*
* LDA (input) INTEGER
* The leading dimension of the array A. LDA >= max(1,N).
*
* IPIV (input) INTEGER array, dimension (N)
* The pivot indices from ZGETRF; for 1<=i<=N, row i of the
* matrix was interchanged with row IPIV(i).
*
* WORK (workspace/output) COMPLEX*16 array, dimension (LWORK)
* On exit, if INFO=0, then WORK(1) returns the optimal LWORK.
*
* LWORK (input) INTEGER
* The dimension of the array WORK. LWORK >= max(1,N).
* For optimal performance LWORK >= N*NB, where NB is
* the optimal blocksize returned by ILAENV.
*
* If LWORK = -1, then a workspace query is assumed; the routine
* only calculates the optimal size of the WORK array, returns
* this value as the first entry of the WORK array, and no error
* message related to LWORK is issued by XERBLA.
*
* INFO (output) INTEGER
* = 0: successful exit
* < 0: if INFO = -i, the i-th argument had an illegal value
* > 0: if INFO = i, U(i,i) is exactly zero; the matrix is
* singular and its inverse could not be computed.
*
* =====================================================================
*
* .. Parameters ..
COMPLEX*16 ZERO, ONE
PARAMETER ( ZERO = ( 0.0D+0, 0.0D+0 ),
$ ONE = ( 1.0D+0, 0.0D+0 ) )
* ..
* .. Local Scalars ..
LOGICAL LQUERY
INTEGER I, IWS, J, JB, JJ, JP, LDWORK, LWKOPT, NB,
$ NBMIN, NN
* ..
* .. External Functions ..
INTEGER ILAENV
EXTERNAL ILAENV
* ..
* .. External Subroutines ..
EXTERNAL XERBLA, ZGEMM, ZGEMV, ZSWAP, ZTRSM, ZTRTRI
* ..
* .. Intrinsic Functions ..
INTRINSIC MAX, MIN
* ..
* .. Executable Statements ..
*
* Test the input parameters.
*
INFO = 0
NB = ILAENV( 1, 'ZGETRI', ' ', N, -1, -1, -1 )
LWKOPT = N*NB
WORK( 1 ) = LWKOPT
LQUERY = ( LWORK.EQ.-1 )
IF( N.LT.0 ) THEN
INFO = -1
ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
INFO = -3
ELSE IF( LWORK.LT.MAX( 1, N ) .AND. .NOT.LQUERY ) THEN
INFO = -6
END IF
IF( INFO.NE.0 ) THEN
CALL XERBLA( 'ZGETRI', -INFO )
RETURN
ELSE IF( LQUERY ) THEN
RETURN
END IF
*
* Quick return if possible
*
IF( N.EQ.0 )
$ RETURN
*
* Form inv(U). If INFO > 0 from ZTRTRI, then U is singular,
* and the inverse is not computed.
*
CALL ZTRTRI( 'Upper', 'Non-unit', N, A, LDA, INFO )
IF( INFO.GT.0 )
$ RETURN
*
NBMIN = 2
LDWORK = N
IF( NB.GT.1 .AND. NB.LT.N ) THEN
IWS = MAX( LDWORK*NB, 1 )
IF( LWORK.LT.IWS ) THEN
NB = LWORK / LDWORK
NBMIN = MAX( 2, ILAENV( 2, 'ZGETRI', ' ', N, -1, -1, -1 ) )
END IF
ELSE
IWS = N
END IF
*
* Solve the equation inv(A)*L = inv(U) for inv(A).
*
IF( NB.LT.NBMIN .OR. NB.GE.N ) THEN
*
* Use unblocked code.
*
DO 20 J = N, 1, -1
*
* Copy current column of L to WORK and replace with zeros.
*
DO 10 I = J + 1, N
WORK( I ) = A( I, J )
A( I, J ) = ZERO
10 CONTINUE
*
* Compute current column of inv(A).
*
IF( J.LT.N )
$ CALL ZGEMV( 'No transpose', N, N-J, -ONE, A( 1, J+1 ),
$ LDA, WORK( J+1 ), 1, ONE, A( 1, J ), 1 )
20 CONTINUE
ELSE
*
* Use blocked code.
*
NN = ( ( N-1 ) / NB )*NB + 1
DO 50 J = NN, 1, -NB
JB = MIN( NB, N-J+1 )
*
* Copy current block column of L to WORK and replace with
* zeros.
*
DO 40 JJ = J, J + JB - 1
DO 30 I = JJ + 1, N
WORK( I+( JJ-J )*LDWORK ) = A( I, JJ )
A( I, JJ ) = ZERO
30 CONTINUE
40 CONTINUE
*
* Compute current block column of inv(A).
*
IF( J+JB.LE.N )
$ CALL ZGEMM( 'No transpose', 'No transpose', N, JB,
$ N-J-JB+1, -ONE, A( 1, J+JB ), LDA,
$ WORK( J+JB ), LDWORK, ONE, A( 1, J ), LDA )
CALL ZTRSM( 'Right', 'Lower', 'No transpose', 'Unit', N, JB,
$ ONE, WORK( J ), LDWORK, A( 1, J ), LDA )
50 CONTINUE
END IF
*
* Apply column interchanges.
*
DO 60 J = N - 1, 1, -1
JP = IPIV( J )
IF( JP.NE.J )
$ CALL ZSWAP( N, A( 1, J ), 1, A( 1, JP ), 1 )
60 CONTINUE
*
WORK( 1 ) = IWS
RETURN
*
* End of ZGETRI
*
END

View File

@@ -1,190 +1,113 @@
/*********************************************************************/
/* Copyright 2009, 2010 The University of Texas at Austin. */
/* All rights reserved. */
/* */
/* Redistribution and use in source and binary forms, with or */
/* without modification, are permitted provided that the following */
/* conditions are met: */
/* */
/* 1. Redistributions of source code must retain the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer. */
/* */
/* 2. Redistributions in binary form must reproduce the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer in the documentation and/or other materials */
/* provided with the distribution. */
/* */
/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
/* POSSIBILITY OF SUCH DAMAGE. */
/* */
/* The views and conclusions contained in the software and */
/* documentation are those of the authors and should not be */
/* interpreted as representing official policies, either expressed */
/* or implied, of The University of Texas at Austin. */
/*********************************************************************/
/***************************************************************************
* Copyright (c) 2013, The OpenBLAS Project
* All rights reserved.
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* 3. Neither the name of the OpenBLAS project nor the names of
* its contributors may be used to endorse or promote products
* derived from this software without specific prior written permission.
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
* USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
* *****************************************************************************/
/**************************************************************************************
* 2014/05/22 Saar
* TEST double precision unblocked : OK
* 2014/05/23 Saar
* TEST double precision blocked: OK
* TEST single precision blocked: OK
**************************************************************************************/
#include <stdio.h>
#include "common.h"
static FLOAT dp1 = 1.;
static FLOAT dm1 = -1.;
// static FLOAT dp1 = 1.;
// static FLOAT dm1 = -1.;
#ifdef UNIT
#define TRTI2 TRTI2_LU
#define TRTI2 TRTI2_LU
#define TRMM TRMM_LNLU
#define TRSM TRSM_RNLU
#else
#define TRTI2 TRTI2_LN
#define TRTI2 TRTI2_LN
#define TRMM TRMM_LNLN
#define TRSM TRSM_RNLN
#endif
#if 0
#undef GEMM_P
#undef GEMM_Q
#undef GEMM_R
#define GEMM_P 8
#define GEMM_Q 20
#define GEMM_R 64
#endif
#define GEMM_PQ MAX(GEMM_P, GEMM_Q)
#define REAL_GEMM_R (GEMM_R - 2 * GEMM_PQ)
blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLOAT *sb, BLASLONG myid) {
BLASLONG n, lda;
BLASLONG j, n, lda;
FLOAT *a;
BLASLONG i, is, min_i, start_i;
BLASLONG ls, min_l;
BLASLONG bk;
BLASLONG blocking;
BLASLONG range_N[2];
// BLASLONG info=0;
BLASLONG jb;
BLASLONG NB;
BLASLONG start_j;
FLOAT *sa_trsm = (FLOAT *)((BLASLONG)sb);
FLOAT *sa_trmm = (FLOAT *)((((BLASLONG)sb
+ GEMM_PQ * GEMM_Q * COMPSIZE * SIZE + GEMM_ALIGN) & ~GEMM_ALIGN)
+ GEMM_OFFSET_A);
FLOAT *sb_gemm = (FLOAT *)((((BLASLONG)sa_trmm
+ GEMM_PQ * GEMM_Q * COMPSIZE * SIZE + GEMM_ALIGN) & ~GEMM_ALIGN)
+ GEMM_OFFSET_B);
FLOAT beta_plus[2] = { ONE, ZERO};
FLOAT beta_minus[2] = {-ONE, ZERO};
n = args -> n;
a = (FLOAT *)args -> a;
lda = args -> lda;
if (range_n) {
n = range_n[1] - range_n[0];
a += range_n[0] * (lda + 1) * COMPSIZE;
}
NB = GEMM_Q;
if (n <= DTB_ENTRIES) {
if (n < NB) {
TRTI2(args, NULL, range_n, sa, sb, 0);
return 0;
}
blocking = GEMM_Q;
if (n <= 4 * GEMM_Q) blocking = (n + 3) / 4;
start_i = 0;
while (start_i < n) start_i += blocking;
start_i -= blocking;
lda = args -> lda;
a = (FLOAT *) args -> a;
args -> ldb = lda;
args -> ldc = lda;
args -> alpha = NULL;
for (i = start_i; i >= 0; i -= blocking) {
bk = MIN(blocking, n - i);
if (n - bk - i > 0) TRSM_OLNCOPY(bk, bk, a + (i + i * lda) * COMPSIZE, lda, 0, sa_trsm);
if (!range_n) {
range_N[0] = i;
range_N[1] = i + bk;
} else {
range_N[0] = range_n[0] + i;
range_N[1] = range_n[0] + i + bk;
}
start_j = 0;
while (start_j < n) start_j += NB;
start_j -= NB;
CNAME(args, NULL, range_N, sa, sa_trmm, 0);
if (i > 0) {
TRMM_ILTCOPY(bk, bk, a + (i + i * lda) * COMPSIZE, lda, 0, 0, sa_trmm);
for (j = start_j ; j >=0 ; j-= NB)
{
jb = n - j;
if ( jb > NB ) jb = NB;
for (ls = 0; ls < i; ls += REAL_GEMM_R) {
min_l = i - ls;
if (min_l > REAL_GEMM_R) min_l = REAL_GEMM_R;
GEMM_ONCOPY (bk, min_l, a + (i + ls * lda) * COMPSIZE, lda, sb_gemm);
if (n - bk - i > 0) {
for (is = i + bk; is < n; is += GEMM_P) {
min_i = n - is;
if (min_i > GEMM_P) min_i = GEMM_P;
if (ls == 0) {
NEG_TCOPY (bk, min_i, a + (is + i * lda) * COMPSIZE, lda, sa);
args -> n = jb;
args -> m = n-j-jb;
TRSM_KERNEL_RT(min_i, bk, bk, dm1,
#ifdef COMPLEX
ZERO,
#endif
sa, sa_trsm,
a + (is + i * lda) * COMPSIZE, lda, 0);
} else {
GEMM_ITCOPY (bk, min_i, a + (is + i * lda) * COMPSIZE, lda, sa);
}
args -> a = &a[(j+jb+(j+jb)*lda) * COMPSIZE];
args -> b = &a[(j+jb+j*lda) * COMPSIZE];
args -> beta = beta_plus;
GEMM_KERNEL_N(min_i, min_l, bk, dp1,
#ifdef COMPLEX
ZERO,
#endif
sa, sb_gemm,
a + (is + ls * lda) * COMPSIZE, lda);
}
}
for (is = 0; is < bk; is += GEMM_P) {
min_i = bk - is;
if (min_i > GEMM_P) min_i = GEMM_P;
TRMM_KERNEL_LT(min_i, min_l, bk, dp1,
#ifdef COMPLEX
ZERO,
#endif
sa_trmm + is * bk * COMPSIZE, sb_gemm,
a + (i + is + ls * lda) * COMPSIZE, lda, is);
}
}
TRMM(args, NULL, NULL, sa, sb, 0);
} else {
args -> a = &a[(j+j*lda) * COMPSIZE];
args -> beta = beta_minus;
TRSM(args, NULL, NULL, sa, sb, 0);
args -> a = &a[(j+j*lda) * COMPSIZE];
TRTI2(args, NULL, range_n, sa, sb, 0);
if (n - bk - i > 0) {
for (is = 0; is < n - bk - i; is += GEMM_P) {
min_i = n - bk - i - is;
if (min_i > GEMM_P) min_i = GEMM_P;
NEG_TCOPY (bk, min_i, a + (i + bk + is + i * lda) * COMPSIZE, lda, sa);
TRSM_KERNEL_RT(min_i, bk, bk, dm1,
#ifdef COMPLEX
ZERO,
#endif
sa, sa_trsm,
a + (i + bk + is + i * lda) * COMPSIZE, lda, 0);
}
}
}
}
return 0;
}

View File

@@ -1,46 +1,44 @@
/*********************************************************************/
/* Copyright 2009, 2010 The University of Texas at Austin. */
/* All rights reserved. */
/* */
/* Redistribution and use in source and binary forms, with or */
/* without modification, are permitted provided that the following */
/* conditions are met: */
/* */
/* 1. Redistributions of source code must retain the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer. */
/* */
/* 2. Redistributions in binary form must reproduce the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer in the documentation and/or other materials */
/* provided with the distribution. */
/* */
/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
/* POSSIBILITY OF SUCH DAMAGE. */
/* */
/* The views and conclusions contained in the software and */
/* documentation are those of the authors and should not be */
/* interpreted as representing official policies, either expressed */
/* or implied, of The University of Texas at Austin. */
/*********************************************************************/
/***************************************************************************
* Copyright (c) 2013, The OpenBLAS Project
* All rights reserved.
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* 3. Neither the name of the OpenBLAS project nor the names of
* its contributors may be used to endorse or promote products
* derived from this software without specific prior written permission.
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
* USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
* *****************************************************************************/
/**************************************************************************************
* 2014/05/22 Saar
* TEST double precision unblocked : OK
* TEST double precision blocked : OK
* 2014/05/23
* TEST single precision blocked : OK
*
**************************************************************************************/
#include <stdio.h>
#include "common.h"
static FLOAT dp1 = 1.;
static FLOAT dm1 = -1.;
// static FLOAT dp1 = 1.;
// static FLOAT dm1 = -1.;
#ifdef UNIT
#define TRTI2 TRTI2_UU
@@ -48,152 +46,66 @@ static FLOAT dm1 = -1.;
#define TRTI2 TRTI2_UN
#endif
#if 0
#undef GEMM_P
#undef GEMM_Q
#undef GEMM_R
#define GEMM_P 8
#define GEMM_Q 20
#define GEMM_R 64
#ifdef UNIT
#define TRMM TRMM_LNUU
#define TRSM TRSM_RNUU
#else
#define TRMM TRMM_LNUN
#define TRSM TRSM_RNUN
#endif
#define GEMM_PQ MAX(GEMM_P, GEMM_Q)
#define REAL_GEMM_R (GEMM_R - 2 * GEMM_PQ)
blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLOAT *sb, BLASLONG myid) {
BLASLONG n, lda;
BLASLONG j, n, lda;
FLOAT *a;
BLASLONG i, is, min_i, start_is;
BLASLONG ls, min_l;
BLASLONG bk;
BLASLONG blocking;
BLASLONG range_N[2];
// BLASLONG info=0;
BLASLONG jb;
BLASLONG NB;
FLOAT *sa_trsm = (FLOAT *)((BLASLONG)sb);
FLOAT *sa_trmm = (FLOAT *)((((BLASLONG)sb
+ GEMM_PQ * GEMM_Q * COMPSIZE * SIZE + GEMM_ALIGN) & ~GEMM_ALIGN)
+ GEMM_OFFSET_A);
FLOAT *sb_gemm = (FLOAT *)((((BLASLONG)sa_trmm
+ GEMM_PQ * GEMM_Q * COMPSIZE * SIZE + GEMM_ALIGN) & ~GEMM_ALIGN)
+ GEMM_OFFSET_B);
FLOAT beta_plus[2] = { ONE, ZERO};
FLOAT beta_minus[2] = {-ONE, ZERO};
n = args -> n;
a = (FLOAT *)args -> a;
lda = args -> lda;
if (range_n) {
n = range_n[1] - range_n[0];
a += range_n[0] * (lda + 1) * COMPSIZE;
}
NB = GEMM_Q;
if (n <= DTB_ENTRIES) {
if (n <= NB) {
TRTI2(args, NULL, range_n, sa, sb, 0);
return 0;
}
blocking = GEMM_Q;
if (n <= 4 * GEMM_Q) blocking = (n + 3) / 4;
for (i = 0; i < n; i += blocking) {
bk = MIN(blocking, n - i);
if (i > 0) TRSM_OUNCOPY(bk, bk, a + (i + i * lda) * COMPSIZE, lda, 0, sa_trsm);
lda = args -> lda;
a = (FLOAT *) args -> a;
args -> ldb = lda;
args -> ldc = lda;
args -> alpha = NULL;
if (!range_n) {
range_N[0] = i;
range_N[1] = i + bk;
} else {
range_N[0] = range_n[0] + i;
range_N[1] = range_n[0] + i + bk;
}
for (j = 0; j < n; j += NB)
{
jb = n - j;
if ( jb > NB ) jb = NB;
CNAME(args, NULL, range_N, sa, sa_trmm, 0);
args -> n = jb;
args -> m = j;
if (n -bk - i > 0) {
TRMM_IUTCOPY(bk, bk, a + (i + i * lda) * COMPSIZE, lda, 0, 0, sa_trmm);
args -> a = &a[0];
args -> b = &a[(j*lda) * COMPSIZE];
args -> beta = beta_plus;
for (ls = i + bk; ls < n; ls += REAL_GEMM_R) {
min_l = n - ls;
if (min_l > REAL_GEMM_R) min_l = REAL_GEMM_R;
GEMM_ONCOPY (bk, min_l, a + (i + ls * lda) * COMPSIZE, lda, sb_gemm);
if (i > 0) {
for (is = 0; is < i; is += GEMM_P) {
min_i = i - is;
if (min_i > GEMM_P) min_i = GEMM_P;
if (ls == i + bk) {
//NEG_TCOPY (bk, min_i, a + (is + i * lda) * COMPSIZE, lda, sa);
TRMM(args, NULL, NULL, sa, sb, 0);
GEMM_BETA(min_i, bk, 0, dm1,
#ifdef COMPLEX
ZERO,
#endif
NULL, 0, NULL, 0, a + (is + i * lda) * COMPSIZE, lda);
args -> a = &a[(j+j*lda) * COMPSIZE];
args -> beta = beta_minus;
TRSM_KERNEL_RN(min_i, bk, bk, dm1,
#ifdef COMPLEX
ZERO,
#endif
sa, sa_trsm,
a + (is + i * lda) * COMPSIZE, lda, 0);
} else {
GEMM_ITCOPY (bk, min_i, a + (is + i * lda) * COMPSIZE, lda, sa);
}
GEMM_KERNEL_N(min_i, min_l, bk, dp1,
#ifdef COMPLEX
ZERO,
#endif
sa, sb_gemm,
a + (is + ls * lda) * COMPSIZE, lda);
}
}
start_is = 0;
while (start_is < bk) start_is += GEMM_P;
start_is -= GEMM_P;
TRSM(args, NULL, NULL, sa, sb, 0);
for (is = 0; is < bk; is += GEMM_P) {
min_i = bk - is;
if (min_i > GEMM_P) min_i = GEMM_P;
TRMM_KERNEL_LN(min_i, min_l, bk, dp1,
#ifdef COMPLEX
ZERO,
#endif
sa_trmm + is * bk * COMPSIZE, sb_gemm,
a + (i + is + ls * lda) * COMPSIZE, lda, is);
}
}
args -> a = &a[(j+j*lda) * COMPSIZE];
} else {
if (i > 0) {
for (is = 0; is < i; is += GEMM_P) {
min_i = i - is;
if (min_i > GEMM_P) min_i = GEMM_P;
//NEG_TCOPY (bk, min_i, a + (is + i * lda) * COMPSIZE, lda, sa);
GEMM_BETA(min_i, bk, 0, dm1,
#ifdef COMPLEX
ZERO,
#endif
NULL, 0, NULL, 0, a + (is + i * lda) * COMPSIZE, lda);
TRTI2(args, NULL, range_n, sa, sb, 0);
TRSM_KERNEL_RN(min_i, bk, bk, dm1,
#ifdef COMPLEX
ZERO,
#endif
sa, sa_trsm,
a + (is + i * lda) * COMPSIZE, lda, 0);
}
}
}
}
return 0;
}

View File

@@ -1,11 +1,7 @@
SHELL = /bin/sh
PLAT = _LINUX
DRVOPTS = $(OPTS)
LOADER = $(FORTRAN)
TIMER = NONE
LOADER = $(FORTRAN) -pthread
ARCHFLAGS= -ru
#RANLIB = ranlib
BLASLIB =
TMGLIB = tmglib.a
EIGSRCLIB = eigsrc.a
LINSRCLIB = linsrc.a

52
param.h
View File

@@ -1032,14 +1032,14 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define XGEMM_DEFAULT_UNROLL_N 1
#else
#define SGEMM_DEFAULT_UNROLL_M 4
#define DGEMM_DEFAULT_UNROLL_M 2
#define DGEMM_DEFAULT_UNROLL_M 4
#define QGEMM_DEFAULT_UNROLL_M 2
#define CGEMM_DEFAULT_UNROLL_M 2
#define ZGEMM_DEFAULT_UNROLL_M 1
#define XGEMM_DEFAULT_UNROLL_M 1
#define SGEMM_DEFAULT_UNROLL_N 8
#define DGEMM_DEFAULT_UNROLL_N 8
#define DGEMM_DEFAULT_UNROLL_N 4
#define QGEMM_DEFAULT_UNROLL_N 2
#define CGEMM_DEFAULT_UNROLL_N 4
#define ZGEMM_DEFAULT_UNROLL_N 4
@@ -1104,10 +1104,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define ZGEMM_DEFAULT_UNROLL_N 2
#define XGEMM_DEFAULT_UNROLL_N 1
#else
#define SGEMM_DEFAULT_UNROLL_M 8
#define SGEMM_DEFAULT_UNROLL_M 4
#define DGEMM_DEFAULT_UNROLL_M 8
#define QGEMM_DEFAULT_UNROLL_M 2
#define CGEMM_DEFAULT_UNROLL_M 8
#define CGEMM_DEFAULT_UNROLL_M 2
#define ZGEMM_DEFAULT_UNROLL_M 4
#define XGEMM_DEFAULT_UNROLL_M 1
@@ -1228,7 +1228,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define ZGEMM_DEFAULT_P 256
#define SGEMM_DEFAULT_Q 384
#ifdef WINDOWS_ABI
#define DGEMM_DEFAULT_Q 128
#else
#define DGEMM_DEFAULT_Q 256
#endif
#define CGEMM_DEFAULT_Q 192
#define ZGEMM_DEFAULT_Q 128
@@ -2017,6 +2021,46 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#endif
#if defined(ARMV5)
#define SNUMOPT 2
#define DNUMOPT 2
#define GEMM_DEFAULT_OFFSET_A 0
#define GEMM_DEFAULT_OFFSET_B 0
#define GEMM_DEFAULT_ALIGN 0x03fffUL
#define SGEMM_DEFAULT_UNROLL_M 2
#define SGEMM_DEFAULT_UNROLL_N 2
#define DGEMM_DEFAULT_UNROLL_M 2
#define DGEMM_DEFAULT_UNROLL_N 2
#define CGEMM_DEFAULT_UNROLL_M 2
#define CGEMM_DEFAULT_UNROLL_N 2
#define ZGEMM_DEFAULT_UNROLL_M 2
#define ZGEMM_DEFAULT_UNROLL_N 2
#define SGEMM_DEFAULT_P 128
#define DGEMM_DEFAULT_P 128
#define CGEMM_DEFAULT_P 96
#define ZGEMM_DEFAULT_P 64
#define SGEMM_DEFAULT_Q 240
#define DGEMM_DEFAULT_Q 120
#define CGEMM_DEFAULT_Q 120
#define ZGEMM_DEFAULT_Q 120
#define SGEMM_DEFAULT_R 12288
#define DGEMM_DEFAULT_R 8192
#define CGEMM_DEFAULT_R 4096
#define ZGEMM_DEFAULT_R 4096
#define SYMV_P 16
#endif
#ifdef GENERIC

View File

@@ -1,684 +0,0 @@
diff -ruN lapack-3.1.1.old/INSTALL/Makefile lapack-3.1.1/INSTALL/Makefile
--- lapack-3.1.1.old/INSTALL/Makefile 2007-02-23 14:07:35.000000000 -0600
+++ lapack-3.1.1/INSTALL/Makefile 2009-12-16 14:40:35.000000000 -0600
@@ -27,7 +27,7 @@
$(LOADER) $(LOADOPTS) -o testversion ilaver.o LAPACK_version.o
clean:
- rm -f *.o
+ rm -f *.o test*
slamch.o: slamch.f ; $(FORTRAN) $(NOOPT) -c $< -o $@
dlamch.o: dlamch.f ; $(FORTRAN) $(NOOPT) -c $< -o $@
diff -ruN lapack-3.1.1.old/Makefile lapack-3.1.1/Makefile
--- lapack-3.1.1.old/Makefile 2007-02-22 15:55:00.000000000 -0600
+++ lapack-3.1.1/Makefile 2009-12-16 14:40:35.000000000 -0600
@@ -20,9 +20,12 @@
blaslib:
( cd BLAS/SRC; $(MAKE) )
-lapacklib: lapack_install
+lapacklib:
( cd SRC; $(MAKE) )
+lapack_prof:
+ ( cd SRC; $(MAKE) lapack_prof)
+
tmglib:
( cd TESTING/MATGEN; $(MAKE) )
diff -ruN lapack-3.1.1.old/SRC/Makefile lapack-3.1.1/SRC/Makefile
--- lapack-3.1.1.old/SRC/Makefile 2007-02-23 15:33:05.000000000 -0600
+++ lapack-3.1.1/SRC/Makefile 2009-12-16 14:41:09.000000000 -0600
@@ -38,265 +38,273 @@
#
#######################################################################
-ALLAUX = ilaenv.o ieeeck.o lsamen.o xerbla.o iparmq.o \
- ../INSTALL/ilaver.o ../INSTALL/lsame.o
+ALLAUX = ilaenv.$(SUFFIX) ieeeck.$(SUFFIX) lsamen.$(SUFFIX) iparmq.$(SUFFIX) \
+ ../INSTALL/ilaver.$(SUFFIX)
SCLAUX = \
- sbdsdc.o \
- sbdsqr.o sdisna.o slabad.o slacpy.o sladiv.o slae2.o slaebz.o \
- slaed0.o slaed1.o slaed2.o slaed3.o slaed4.o slaed5.o slaed6.o \
- slaed7.o slaed8.o slaed9.o slaeda.o slaev2.o slagtf.o \
- slagts.o slamrg.o slanst.o \
- slapy2.o slapy3.o slarnv.o \
- slarra.o slarrb.o slarrc.o slarrd.o slarre.o slarrf.o slarrj.o \
- slarrk.o slarrr.o slaneg.o \
- slartg.o slaruv.o slas2.o slascl.o \
- slasd0.o slasd1.o slasd2.o slasd3.o slasd4.o slasd5.o slasd6.o \
- slasd7.o slasd8.o slasda.o slasdq.o slasdt.o \
- slaset.o slasq1.o slasq2.o slasq3.o slazq3.o slasq4.o slazq4.o slasq5.o slasq6.o \
- slasr.o slasrt.o slassq.o slasv2.o spttrf.o sstebz.o sstedc.o \
- ssteqr.o ssterf.o slaisnan.o sisnan.o \
- ../INSTALL/slamch.o ../INSTALL/second_$(TIMER).o
+ sbdsdc.$(SUFFIX) \
+ sbdsqr.$(SUFFIX) sdisna.$(SUFFIX) slabad.$(SUFFIX) slacpy.$(SUFFIX) sladiv.$(SUFFIX) slae2.$(SUFFIX) slaebz.$(SUFFIX) \
+ slaed0.$(SUFFIX) slaed1.$(SUFFIX) slaed2.$(SUFFIX) slaed3.$(SUFFIX) slaed4.$(SUFFIX) slaed5.$(SUFFIX) slaed6.$(SUFFIX) \
+ slaed7.$(SUFFIX) slaed8.$(SUFFIX) slaed9.$(SUFFIX) slaeda.$(SUFFIX) slaev2.$(SUFFIX) slagtf.$(SUFFIX) \
+ slagts.$(SUFFIX) slamrg.$(SUFFIX) slanst.$(SUFFIX) \
+ slapy2.$(SUFFIX) slapy3.$(SUFFIX) slarnv.$(SUFFIX) \
+ slarra.$(SUFFIX) slarrb.$(SUFFIX) slarrc.$(SUFFIX) slarrd.$(SUFFIX) slarre.$(SUFFIX) slarrf.$(SUFFIX) slarrj.$(SUFFIX) \
+ slarrk.$(SUFFIX) slarrr.$(SUFFIX) slaneg.$(SUFFIX) \
+ slartg.$(SUFFIX) slaruv.$(SUFFIX) slas2.$(SUFFIX) slascl.$(SUFFIX) \
+ slasd0.$(SUFFIX) slasd1.$(SUFFIX) slasd2.$(SUFFIX) slasd3.$(SUFFIX) slasd4.$(SUFFIX) slasd5.$(SUFFIX) slasd6.$(SUFFIX) \
+ slasd7.$(SUFFIX) slasd8.$(SUFFIX) slasda.$(SUFFIX) slasdq.$(SUFFIX) slasdt.$(SUFFIX) \
+ slaset.$(SUFFIX) slasq1.$(SUFFIX) slasq2.$(SUFFIX) slasq3.$(SUFFIX) slazq3.$(SUFFIX) slasq4.$(SUFFIX) slazq4.$(SUFFIX) slasq5.$(SUFFIX) slasq6.$(SUFFIX) \
+ slasr.$(SUFFIX) slasrt.$(SUFFIX) slassq.$(SUFFIX) slasv2.$(SUFFIX) spttrf.$(SUFFIX) sstebz.$(SUFFIX) sstedc.$(SUFFIX) \
+ ssteqr.$(SUFFIX) ssterf.$(SUFFIX) slaisnan.$(SUFFIX) sisnan.$(SUFFIX) \
+ ../INSTALL/second_$(TIMER).$(SUFFIX)
DZLAUX = \
- dbdsdc.o \
- dbdsqr.o ddisna.o dlabad.o dlacpy.o dladiv.o dlae2.o dlaebz.o \
- dlaed0.o dlaed1.o dlaed2.o dlaed3.o dlaed4.o dlaed5.o dlaed6.o \
- dlaed7.o dlaed8.o dlaed9.o dlaeda.o dlaev2.o dlagtf.o \
- dlagts.o dlamrg.o dlanst.o \
- dlapy2.o dlapy3.o dlarnv.o \
- dlarra.o dlarrb.o dlarrc.o dlarrd.o dlarre.o dlarrf.o dlarrj.o \
- dlarrk.o dlarrr.o dlaneg.o \
- dlartg.o dlaruv.o dlas2.o dlascl.o \
- dlasd0.o dlasd1.o dlasd2.o dlasd3.o dlasd4.o dlasd5.o dlasd6.o \
- dlasd7.o dlasd8.o dlasda.o dlasdq.o dlasdt.o \
- dlaset.o dlasq1.o dlasq2.o dlasq3.o dlazq3.o dlasq4.o dlazq4.o dlasq5.o dlasq6.o \
- dlasr.o dlasrt.o dlassq.o dlasv2.o dpttrf.o dstebz.o dstedc.o \
- dsteqr.o dsterf.o dlaisnan.o disnan.o \
- ../INSTALL/dlamch.o ../INSTALL/dsecnd_$(TIMER).o
+ dbdsdc.$(SUFFIX) \
+ dbdsqr.$(SUFFIX) ddisna.$(SUFFIX) dlabad.$(SUFFIX) dlacpy.$(SUFFIX) dladiv.$(SUFFIX) dlae2.$(SUFFIX) dlaebz.$(SUFFIX) \
+ dlaed0.$(SUFFIX) dlaed1.$(SUFFIX) dlaed2.$(SUFFIX) dlaed3.$(SUFFIX) dlaed4.$(SUFFIX) dlaed5.$(SUFFIX) dlaed6.$(SUFFIX) \
+ dlaed7.$(SUFFIX) dlaed8.$(SUFFIX) dlaed9.$(SUFFIX) dlaeda.$(SUFFIX) dlaev2.$(SUFFIX) dlagtf.$(SUFFIX) \
+ dlagts.$(SUFFIX) dlamrg.$(SUFFIX) dlanst.$(SUFFIX) \
+ dlapy2.$(SUFFIX) dlapy3.$(SUFFIX) dlarnv.$(SUFFIX) \
+ dlarra.$(SUFFIX) dlarrb.$(SUFFIX) dlarrc.$(SUFFIX) dlarrd.$(SUFFIX) dlarre.$(SUFFIX) dlarrf.$(SUFFIX) dlarrj.$(SUFFIX) \
+ dlarrk.$(SUFFIX) dlarrr.$(SUFFIX) dlaneg.$(SUFFIX) \
+ dlartg.$(SUFFIX) dlaruv.$(SUFFIX) dlas2.$(SUFFIX) dlascl.$(SUFFIX) \
+ dlasd0.$(SUFFIX) dlasd1.$(SUFFIX) dlasd2.$(SUFFIX) dlasd3.$(SUFFIX) dlasd4.$(SUFFIX) dlasd5.$(SUFFIX) dlasd6.$(SUFFIX) \
+ dlasd7.$(SUFFIX) dlasd8.$(SUFFIX) dlasda.$(SUFFIX) dlasdq.$(SUFFIX) dlasdt.$(SUFFIX) \
+ dlaset.$(SUFFIX) dlasq1.$(SUFFIX) dlasq2.$(SUFFIX) dlasq3.$(SUFFIX) dlazq3.$(SUFFIX) dlasq4.$(SUFFIX) dlazq4.$(SUFFIX) dlasq5.$(SUFFIX) dlasq6.$(SUFFIX) \
+ dlasr.$(SUFFIX) dlasrt.$(SUFFIX) dlassq.$(SUFFIX) dlasv2.$(SUFFIX) dpttrf.$(SUFFIX) dstebz.$(SUFFIX) dstedc.$(SUFFIX) \
+ dsteqr.$(SUFFIX) dsterf.$(SUFFIX) dlaisnan.$(SUFFIX) disnan.$(SUFFIX) \
+ ../INSTALL/dsecnd_$(TIMER).$(SUFFIX)
SLASRC = \
- sgbbrd.o sgbcon.o sgbequ.o sgbrfs.o sgbsv.o \
- sgbsvx.o sgbtf2.o sgbtrf.o sgbtrs.o sgebak.o sgebal.o sgebd2.o \
- sgebrd.o sgecon.o sgeequ.o sgees.o sgeesx.o sgeev.o sgeevx.o \
- sgegs.o sgegv.o sgehd2.o sgehrd.o sgelq2.o sgelqf.o \
- sgels.o sgelsd.o sgelss.o sgelsx.o sgelsy.o sgeql2.o sgeqlf.o \
- sgeqp3.o sgeqpf.o sgeqr2.o sgeqrf.o sgerfs.o sgerq2.o sgerqf.o \
- sgesc2.o sgesdd.o sgesv.o sgesvd.o sgesvx.o sgetc2.o sgetf2.o \
- sgetrf.o sgetri.o \
- sgetrs.o sggbak.o sggbal.o sgges.o sggesx.o sggev.o sggevx.o \
- sggglm.o sgghrd.o sgglse.o sggqrf.o \
- sggrqf.o sggsvd.o sggsvp.o sgtcon.o sgtrfs.o sgtsv.o \
- sgtsvx.o sgttrf.o sgttrs.o sgtts2.o shgeqz.o \
- shsein.o shseqr.o slabrd.o slacon.o slacn2.o \
- slaein.o slaexc.o slag2.o slags2.o slagtm.o slagv2.o slahqr.o \
- slahrd.o slahr2.o slaic1.o slaln2.o slals0.o slalsa.o slalsd.o \
- slangb.o slange.o slangt.o slanhs.o slansb.o slansp.o \
- slansy.o slantb.o slantp.o slantr.o slanv2.o \
- slapll.o slapmt.o \
- slaqgb.o slaqge.o slaqp2.o slaqps.o slaqsb.o slaqsp.o slaqsy.o \
- slaqr0.o slaqr1.o slaqr2.o slaqr3.o slaqr4.o slaqr5.o \
- slaqtr.o slar1v.o slar2v.o \
- slarf.o slarfb.o slarfg.o slarft.o slarfx.o slargv.o \
- slarrv.o slartv.o \
- slarz.o slarzb.o slarzt.o slaswp.o slasy2.o slasyf.o \
- slatbs.o slatdf.o slatps.o slatrd.o slatrs.o slatrz.o slatzm.o \
- slauu2.o slauum.o sopgtr.o sopmtr.o sorg2l.o sorg2r.o \
- sorgbr.o sorghr.o sorgl2.o sorglq.o sorgql.o sorgqr.o sorgr2.o \
- sorgrq.o sorgtr.o sorm2l.o sorm2r.o \
- sormbr.o sormhr.o sorml2.o sormlq.o sormql.o sormqr.o sormr2.o \
- sormr3.o sormrq.o sormrz.o sormtr.o spbcon.o spbequ.o spbrfs.o \
- spbstf.o spbsv.o spbsvx.o \
- spbtf2.o spbtrf.o spbtrs.o spocon.o spoequ.o sporfs.o sposv.o \
- sposvx.o spotf2.o spotrf.o spotri.o spotrs.o sppcon.o sppequ.o \
- spprfs.o sppsv.o sppsvx.o spptrf.o spptri.o spptrs.o sptcon.o \
- spteqr.o sptrfs.o sptsv.o sptsvx.o spttrs.o sptts2.o srscl.o \
- ssbev.o ssbevd.o ssbevx.o ssbgst.o ssbgv.o ssbgvd.o ssbgvx.o \
- ssbtrd.o sspcon.o sspev.o sspevd.o sspevx.o sspgst.o \
- sspgv.o sspgvd.o sspgvx.o ssprfs.o sspsv.o sspsvx.o ssptrd.o \
- ssptrf.o ssptri.o ssptrs.o sstegr.o sstein.o sstev.o sstevd.o sstevr.o \
- sstevx.o ssycon.o ssyev.o ssyevd.o ssyevr.o ssyevx.o ssygs2.o \
- ssygst.o ssygv.o ssygvd.o ssygvx.o ssyrfs.o ssysv.o ssysvx.o \
- ssytd2.o ssytf2.o ssytrd.o ssytrf.o ssytri.o ssytrs.o stbcon.o \
- stbrfs.o stbtrs.o stgevc.o stgex2.o stgexc.o stgsen.o \
- stgsja.o stgsna.o stgsy2.o stgsyl.o stpcon.o stprfs.o stptri.o \
- stptrs.o \
- strcon.o strevc.o strexc.o strrfs.o strsen.o strsna.o strsyl.o \
- strti2.o strtri.o strtrs.o stzrqf.o stzrzf.o sstemr.o
+ sgbbrd.$(SUFFIX) sgbcon.$(SUFFIX) sgbequ.$(SUFFIX) sgbrfs.$(SUFFIX) sgbsv.$(SUFFIX) \
+ sgbsvx.$(SUFFIX) sgbtf2.$(SUFFIX) sgbtrf.$(SUFFIX) sgbtrs.$(SUFFIX) sgebak.$(SUFFIX) sgebal.$(SUFFIX) sgebd2.$(SUFFIX) \
+ sgebrd.$(SUFFIX) sgecon.$(SUFFIX) sgeequ.$(SUFFIX) sgees.$(SUFFIX) sgeesx.$(SUFFIX) sgeev.$(SUFFIX) sgeevx.$(SUFFIX) \
+ sgegs.$(SUFFIX) sgegv.$(SUFFIX) sgehd2.$(SUFFIX) sgehrd.$(SUFFIX) sgelq2.$(SUFFIX) sgelqf.$(SUFFIX) \
+ sgels.$(SUFFIX) sgelsd.$(SUFFIX) sgelss.$(SUFFIX) sgelsx.$(SUFFIX) sgelsy.$(SUFFIX) sgeql2.$(SUFFIX) sgeqlf.$(SUFFIX) \
+ sgeqp3.$(SUFFIX) sgeqpf.$(SUFFIX) sgeqr2.$(SUFFIX) sgeqrf.$(SUFFIX) sgerfs.$(SUFFIX) sgerq2.$(SUFFIX) sgerqf.$(SUFFIX) \
+ sgesc2.$(SUFFIX) sgesdd.$(SUFFIX) sgesvd.$(SUFFIX) sgesvx.$(SUFFIX) sgetc2.$(SUFFIX) \
+ sgetri.$(SUFFIX) \
+ sggbak.$(SUFFIX) sggbal.$(SUFFIX) sgges.$(SUFFIX) sggesx.$(SUFFIX) sggev.$(SUFFIX) sggevx.$(SUFFIX) \
+ sggglm.$(SUFFIX) sgghrd.$(SUFFIX) sgglse.$(SUFFIX) sggqrf.$(SUFFIX) \
+ sggrqf.$(SUFFIX) sggsvd.$(SUFFIX) sggsvp.$(SUFFIX) sgtcon.$(SUFFIX) sgtrfs.$(SUFFIX) sgtsv.$(SUFFIX) \
+ sgtsvx.$(SUFFIX) sgttrf.$(SUFFIX) sgttrs.$(SUFFIX) sgtts2.$(SUFFIX) shgeqz.$(SUFFIX) \
+ shsein.$(SUFFIX) shseqr.$(SUFFIX) slabrd.$(SUFFIX) slacon.$(SUFFIX) slacn2.$(SUFFIX) \
+ slaein.$(SUFFIX) slaexc.$(SUFFIX) slag2.$(SUFFIX) slags2.$(SUFFIX) slagtm.$(SUFFIX) slagv2.$(SUFFIX) slahqr.$(SUFFIX) \
+ slahrd.$(SUFFIX) slahr2.$(SUFFIX) slaic1.$(SUFFIX) slaln2.$(SUFFIX) slals0.$(SUFFIX) slalsa.$(SUFFIX) slalsd.$(SUFFIX) \
+ slangb.$(SUFFIX) slange.$(SUFFIX) slangt.$(SUFFIX) slanhs.$(SUFFIX) slansb.$(SUFFIX) slansp.$(SUFFIX) \
+ slansy.$(SUFFIX) slantb.$(SUFFIX) slantp.$(SUFFIX) slantr.$(SUFFIX) slanv2.$(SUFFIX) \
+ slapll.$(SUFFIX) slapmt.$(SUFFIX) \
+ slaqgb.$(SUFFIX) slaqge.$(SUFFIX) slaqp2.$(SUFFIX) slaqps.$(SUFFIX) slaqsb.$(SUFFIX) slaqsp.$(SUFFIX) slaqsy.$(SUFFIX) \
+ slaqr0.$(SUFFIX) slaqr1.$(SUFFIX) slaqr2.$(SUFFIX) slaqr3.$(SUFFIX) slaqr4.$(SUFFIX) slaqr5.$(SUFFIX) \
+ slaqtr.$(SUFFIX) slar1v.$(SUFFIX) slar2v.$(SUFFIX) \
+ slarf.$(SUFFIX) slarfb.$(SUFFIX) slarfg.$(SUFFIX) slarft.$(SUFFIX) slarfx.$(SUFFIX) slargv.$(SUFFIX) \
+ slarrv.$(SUFFIX) slartv.$(SUFFIX) \
+ slarz.$(SUFFIX) slarzb.$(SUFFIX) slarzt.$(SUFFIX) slasy2.$(SUFFIX) slasyf.$(SUFFIX) \
+ slatbs.$(SUFFIX) slatdf.$(SUFFIX) slatps.$(SUFFIX) slatrd.$(SUFFIX) slatrs.$(SUFFIX) slatrz.$(SUFFIX) slatzm.$(SUFFIX) \
+ sopgtr.$(SUFFIX) sopmtr.$(SUFFIX) sorg2l.$(SUFFIX) sorg2r.$(SUFFIX) \
+ sorgbr.$(SUFFIX) sorghr.$(SUFFIX) sorgl2.$(SUFFIX) sorglq.$(SUFFIX) sorgql.$(SUFFIX) sorgqr.$(SUFFIX) sorgr2.$(SUFFIX) \
+ sorgrq.$(SUFFIX) sorgtr.$(SUFFIX) sorm2l.$(SUFFIX) sorm2r.$(SUFFIX) \
+ sormbr.$(SUFFIX) sormhr.$(SUFFIX) sorml2.$(SUFFIX) sormlq.$(SUFFIX) sormql.$(SUFFIX) sormqr.$(SUFFIX) sormr2.$(SUFFIX) \
+ sormr3.$(SUFFIX) sormrq.$(SUFFIX) sormrz.$(SUFFIX) sormtr.$(SUFFIX) spbcon.$(SUFFIX) spbequ.$(SUFFIX) spbrfs.$(SUFFIX) \
+ spbstf.$(SUFFIX) spbsv.$(SUFFIX) spbsvx.$(SUFFIX) \
+ spbtf2.$(SUFFIX) spbtrf.$(SUFFIX) spbtrs.$(SUFFIX) spocon.$(SUFFIX) spoequ.$(SUFFIX) sporfs.$(SUFFIX) sposv.$(SUFFIX) \
+ sposvx.$(SUFFIX) spotrs.$(SUFFIX) sppcon.$(SUFFIX) sppequ.$(SUFFIX) \
+ spprfs.$(SUFFIX) sppsv.$(SUFFIX) sppsvx.$(SUFFIX) spptrf.$(SUFFIX) spptri.$(SUFFIX) spptrs.$(SUFFIX) sptcon.$(SUFFIX) \
+ spteqr.$(SUFFIX) sptrfs.$(SUFFIX) sptsv.$(SUFFIX) sptsvx.$(SUFFIX) spttrs.$(SUFFIX) sptts2.$(SUFFIX) srscl.$(SUFFIX) \
+ ssbev.$(SUFFIX) ssbevd.$(SUFFIX) ssbevx.$(SUFFIX) ssbgst.$(SUFFIX) ssbgv.$(SUFFIX) ssbgvd.$(SUFFIX) ssbgvx.$(SUFFIX) \
+ ssbtrd.$(SUFFIX) sspcon.$(SUFFIX) sspev.$(SUFFIX) sspevd.$(SUFFIX) sspevx.$(SUFFIX) sspgst.$(SUFFIX) \
+ sspgv.$(SUFFIX) sspgvd.$(SUFFIX) sspgvx.$(SUFFIX) ssprfs.$(SUFFIX) sspsv.$(SUFFIX) sspsvx.$(SUFFIX) ssptrd.$(SUFFIX) \
+ ssptrf.$(SUFFIX) ssptri.$(SUFFIX) ssptrs.$(SUFFIX) sstegr.$(SUFFIX) sstein.$(SUFFIX) sstev.$(SUFFIX) sstevd.$(SUFFIX) sstevr.$(SUFFIX) \
+ sstevx.$(SUFFIX) ssycon.$(SUFFIX) ssyev.$(SUFFIX) ssyevd.$(SUFFIX) ssyevr.$(SUFFIX) ssyevx.$(SUFFIX) ssygs2.$(SUFFIX) \
+ ssygst.$(SUFFIX) ssygv.$(SUFFIX) ssygvd.$(SUFFIX) ssygvx.$(SUFFIX) ssyrfs.$(SUFFIX) ssysv.$(SUFFIX) ssysvx.$(SUFFIX) \
+ ssytd2.$(SUFFIX) ssytf2.$(SUFFIX) ssytrd.$(SUFFIX) ssytrf.$(SUFFIX) ssytri.$(SUFFIX) ssytrs.$(SUFFIX) stbcon.$(SUFFIX) \
+ stbrfs.$(SUFFIX) stbtrs.$(SUFFIX) stgevc.$(SUFFIX) stgex2.$(SUFFIX) stgexc.$(SUFFIX) stgsen.$(SUFFIX) \
+ stgsja.$(SUFFIX) stgsna.$(SUFFIX) stgsy2.$(SUFFIX) stgsyl.$(SUFFIX) stpcon.$(SUFFIX) stprfs.$(SUFFIX) stptri.$(SUFFIX) \
+ stptrs.$(SUFFIX) \
+ strcon.$(SUFFIX) strevc.$(SUFFIX) strexc.$(SUFFIX) strrfs.$(SUFFIX) strsen.$(SUFFIX) strsna.$(SUFFIX) strsyl.$(SUFFIX) \
+ strtrs.$(SUFFIX) stzrqf.$(SUFFIX) stzrzf.$(SUFFIX) sstemr.$(SUFFIX)
CLASRC = \
- cbdsqr.o cgbbrd.o cgbcon.o cgbequ.o cgbrfs.o cgbsv.o cgbsvx.o \
- cgbtf2.o cgbtrf.o cgbtrs.o cgebak.o cgebal.o cgebd2.o cgebrd.o \
- cgecon.o cgeequ.o cgees.o cgeesx.o cgeev.o cgeevx.o \
- cgegs.o cgegv.o cgehd2.o cgehrd.o cgelq2.o cgelqf.o \
- cgels.o cgelsd.o cgelss.o cgelsx.o cgelsy.o cgeql2.o cgeqlf.o cgeqp3.o \
- cgeqpf.o cgeqr2.o cgeqrf.o cgerfs.o cgerq2.o cgerqf.o \
- cgesc2.o cgesdd.o cgesv.o cgesvd.o cgesvx.o cgetc2.o cgetf2.o cgetrf.o \
- cgetri.o cgetrs.o \
- cggbak.o cggbal.o cgges.o cggesx.o cggev.o cggevx.o cggglm.o \
- cgghrd.o cgglse.o cggqrf.o cggrqf.o \
- cggsvd.o cggsvp.o \
- cgtcon.o cgtrfs.o cgtsv.o cgtsvx.o cgttrf.o cgttrs.o cgtts2.o chbev.o \
- chbevd.o chbevx.o chbgst.o chbgv.o chbgvd.o chbgvx.o chbtrd.o \
- checon.o cheev.o cheevd.o cheevr.o cheevx.o chegs2.o chegst.o \
- chegv.o chegvd.o chegvx.o cherfs.o chesv.o chesvx.o chetd2.o \
- chetf2.o chetrd.o \
- chetrf.o chetri.o chetrs.o chgeqz.o chpcon.o chpev.o chpevd.o \
- chpevx.o chpgst.o chpgv.o chpgvd.o chpgvx.o chprfs.o chpsv.o \
- chpsvx.o \
- chptrd.o chptrf.o chptri.o chptrs.o chsein.o chseqr.o clabrd.o \
- clacgv.o clacon.o clacn2.o clacp2.o clacpy.o clacrm.o clacrt.o cladiv.o \
- claed0.o claed7.o claed8.o \
- claein.o claesy.o claev2.o clags2.o clagtm.o \
- clahef.o clahqr.o \
- clahrd.o clahr2.o claic1.o clals0.o clalsa.o clalsd.o clangb.o clange.o clangt.o \
- clanhb.o clanhe.o \
- clanhp.o clanhs.o clanht.o clansb.o clansp.o clansy.o clantb.o \
- clantp.o clantr.o clapll.o clapmt.o clarcm.o claqgb.o claqge.o \
- claqhb.o claqhe.o claqhp.o claqp2.o claqps.o claqsb.o \
- claqr0.o claqr1.o claqr2.o claqr3.o claqr4.o claqr5.o \
- claqsp.o claqsy.o clar1v.o clar2v.o clarf.o clarfb.o clarfg.o clarft.o \
- clarfx.o clargv.o clarnv.o clarrv.o clartg.o clartv.o \
- clarz.o clarzb.o clarzt.o clascl.o claset.o clasr.o classq.o \
- claswp.o clasyf.o clatbs.o clatdf.o clatps.o clatrd.o clatrs.o clatrz.o \
- clatzm.o clauu2.o clauum.o cpbcon.o cpbequ.o cpbrfs.o cpbstf.o cpbsv.o \
- cpbsvx.o cpbtf2.o cpbtrf.o cpbtrs.o cpocon.o cpoequ.o cporfs.o \
- cposv.o cposvx.o cpotf2.o cpotrf.o cpotri.o cpotrs.o cppcon.o \
- cppequ.o cpprfs.o cppsv.o cppsvx.o cpptrf.o cpptri.o cpptrs.o \
- cptcon.o cpteqr.o cptrfs.o cptsv.o cptsvx.o cpttrf.o cpttrs.o cptts2.o \
- crot.o cspcon.o cspmv.o cspr.o csprfs.o cspsv.o \
- cspsvx.o csptrf.o csptri.o csptrs.o csrscl.o cstedc.o \
- cstegr.o cstein.o csteqr.o csycon.o csymv.o \
- csyr.o csyrfs.o csysv.o csysvx.o csytf2.o csytrf.o csytri.o \
- csytrs.o ctbcon.o ctbrfs.o ctbtrs.o ctgevc.o ctgex2.o \
- ctgexc.o ctgsen.o ctgsja.o ctgsna.o ctgsy2.o ctgsyl.o ctpcon.o \
- ctprfs.o ctptri.o \
- ctptrs.o ctrcon.o ctrevc.o ctrexc.o ctrrfs.o ctrsen.o ctrsna.o \
- ctrsyl.o ctrti2.o ctrtri.o ctrtrs.o ctzrqf.o ctzrzf.o cung2l.o cung2r.o \
- cungbr.o cunghr.o cungl2.o cunglq.o cungql.o cungqr.o cungr2.o \
- cungrq.o cungtr.o cunm2l.o cunm2r.o cunmbr.o cunmhr.o cunml2.o \
- cunmlq.o cunmql.o cunmqr.o cunmr2.o cunmr3.o cunmrq.o cunmrz.o \
- cunmtr.o cupgtr.o cupmtr.o icmax1.o scsum1.o cstemr.o
+ cbdsqr.$(SUFFIX) cgbbrd.$(SUFFIX) cgbcon.$(SUFFIX) cgbequ.$(SUFFIX) cgbrfs.$(SUFFIX) cgbsv.$(SUFFIX) cgbsvx.$(SUFFIX) \
+ cgbtf2.$(SUFFIX) cgbtrf.$(SUFFIX) cgbtrs.$(SUFFIX) cgebak.$(SUFFIX) cgebal.$(SUFFIX) cgebd2.$(SUFFIX) cgebrd.$(SUFFIX) \
+ cgecon.$(SUFFIX) cgeequ.$(SUFFIX) cgees.$(SUFFIX) cgeesx.$(SUFFIX) cgeev.$(SUFFIX) cgeevx.$(SUFFIX) \
+ cgegs.$(SUFFIX) cgegv.$(SUFFIX) cgehd2.$(SUFFIX) cgehrd.$(SUFFIX) cgelq2.$(SUFFIX) cgelqf.$(SUFFIX) \
+ cgels.$(SUFFIX) cgelsd.$(SUFFIX) cgelss.$(SUFFIX) cgelsx.$(SUFFIX) cgelsy.$(SUFFIX) cgeql2.$(SUFFIX) cgeqlf.$(SUFFIX) cgeqp3.$(SUFFIX) \
+ cgeqpf.$(SUFFIX) cgeqr2.$(SUFFIX) cgeqrf.$(SUFFIX) cgerfs.$(SUFFIX) cgerq2.$(SUFFIX) cgerqf.$(SUFFIX) \
+ cgesc2.$(SUFFIX) cgesdd.$(SUFFIX) cgesvd.$(SUFFIX) cgesvx.$(SUFFIX) cgetc2.$(SUFFIX) \
+ cgetri.$(SUFFIX) \
+ cggbak.$(SUFFIX) cggbal.$(SUFFIX) cgges.$(SUFFIX) cggesx.$(SUFFIX) cggev.$(SUFFIX) cggevx.$(SUFFIX) cggglm.$(SUFFIX) \
+ cgghrd.$(SUFFIX) cgglse.$(SUFFIX) cggqrf.$(SUFFIX) cggrqf.$(SUFFIX) \
+ cggsvd.$(SUFFIX) cggsvp.$(SUFFIX) \
+ cgtcon.$(SUFFIX) cgtrfs.$(SUFFIX) cgtsv.$(SUFFIX) cgtsvx.$(SUFFIX) cgttrf.$(SUFFIX) cgttrs.$(SUFFIX) cgtts2.$(SUFFIX) chbev.$(SUFFIX) \
+ chbevd.$(SUFFIX) chbevx.$(SUFFIX) chbgst.$(SUFFIX) chbgv.$(SUFFIX) chbgvd.$(SUFFIX) chbgvx.$(SUFFIX) chbtrd.$(SUFFIX) \
+ checon.$(SUFFIX) cheev.$(SUFFIX) cheevd.$(SUFFIX) cheevr.$(SUFFIX) cheevx.$(SUFFIX) chegs2.$(SUFFIX) chegst.$(SUFFIX) \
+ chegv.$(SUFFIX) chegvd.$(SUFFIX) chegvx.$(SUFFIX) cherfs.$(SUFFIX) chesv.$(SUFFIX) chesvx.$(SUFFIX) chetd2.$(SUFFIX) \
+ chetf2.$(SUFFIX) chetrd.$(SUFFIX) \
+ chetrf.$(SUFFIX) chetri.$(SUFFIX) chetrs.$(SUFFIX) chgeqz.$(SUFFIX) chpcon.$(SUFFIX) chpev.$(SUFFIX) chpevd.$(SUFFIX) \
+ chpevx.$(SUFFIX) chpgst.$(SUFFIX) chpgv.$(SUFFIX) chpgvd.$(SUFFIX) chpgvx.$(SUFFIX) chprfs.$(SUFFIX) chpsv.$(SUFFIX) \
+ chpsvx.$(SUFFIX) \
+ chptrd.$(SUFFIX) chptrf.$(SUFFIX) chptri.$(SUFFIX) chptrs.$(SUFFIX) chsein.$(SUFFIX) chseqr.$(SUFFIX) clabrd.$(SUFFIX) \
+ clacgv.$(SUFFIX) clacon.$(SUFFIX) clacn2.$(SUFFIX) clacp2.$(SUFFIX) clacpy.$(SUFFIX) clacrm.$(SUFFIX) clacrt.$(SUFFIX) cladiv.$(SUFFIX) \
+ claed0.$(SUFFIX) claed7.$(SUFFIX) claed8.$(SUFFIX) \
+ claein.$(SUFFIX) claesy.$(SUFFIX) claev2.$(SUFFIX) clags2.$(SUFFIX) clagtm.$(SUFFIX) \
+ clahef.$(SUFFIX) clahqr.$(SUFFIX) \
+ clahrd.$(SUFFIX) clahr2.$(SUFFIX) claic1.$(SUFFIX) clals0.$(SUFFIX) clalsa.$(SUFFIX) clalsd.$(SUFFIX) clangb.$(SUFFIX) clange.$(SUFFIX) clangt.$(SUFFIX) \
+ clanhb.$(SUFFIX) clanhe.$(SUFFIX) \
+ clanhp.$(SUFFIX) clanhs.$(SUFFIX) clanht.$(SUFFIX) clansb.$(SUFFIX) clansp.$(SUFFIX) clansy.$(SUFFIX) clantb.$(SUFFIX) \
+ clantp.$(SUFFIX) clantr.$(SUFFIX) clapll.$(SUFFIX) clapmt.$(SUFFIX) clarcm.$(SUFFIX) claqgb.$(SUFFIX) claqge.$(SUFFIX) \
+ claqhb.$(SUFFIX) claqhe.$(SUFFIX) claqhp.$(SUFFIX) claqp2.$(SUFFIX) claqps.$(SUFFIX) claqsb.$(SUFFIX) \
+ claqr0.$(SUFFIX) claqr1.$(SUFFIX) claqr2.$(SUFFIX) claqr3.$(SUFFIX) claqr4.$(SUFFIX) claqr5.$(SUFFIX) \
+ claqsp.$(SUFFIX) claqsy.$(SUFFIX) clar1v.$(SUFFIX) clar2v.$(SUFFIX) clarf.$(SUFFIX) clarfb.$(SUFFIX) clarfg.$(SUFFIX) clarft.$(SUFFIX) \
+ clarfx.$(SUFFIX) clargv.$(SUFFIX) clarnv.$(SUFFIX) clarrv.$(SUFFIX) clartg.$(SUFFIX) clartv.$(SUFFIX) \
+ clarz.$(SUFFIX) clarzb.$(SUFFIX) clarzt.$(SUFFIX) clascl.$(SUFFIX) claset.$(SUFFIX) clasr.$(SUFFIX) classq.$(SUFFIX) \
+ clasyf.$(SUFFIX) clatbs.$(SUFFIX) clatdf.$(SUFFIX) clatps.$(SUFFIX) clatrd.$(SUFFIX) clatrs.$(SUFFIX) clatrz.$(SUFFIX) \
+ clatzm.$(SUFFIX) cpbcon.$(SUFFIX) cpbequ.$(SUFFIX) cpbrfs.$(SUFFIX) cpbstf.$(SUFFIX) cpbsv.$(SUFFIX) \
+ cpbsvx.$(SUFFIX) cpbtf2.$(SUFFIX) cpbtrf.$(SUFFIX) cpbtrs.$(SUFFIX) cpocon.$(SUFFIX) cpoequ.$(SUFFIX) cporfs.$(SUFFIX) \
+ cposv.$(SUFFIX) cposvx.$(SUFFIX) cpotrs.$(SUFFIX) cppcon.$(SUFFIX) \
+ cppequ.$(SUFFIX) cpprfs.$(SUFFIX) cppsv.$(SUFFIX) cppsvx.$(SUFFIX) cpptrf.$(SUFFIX) cpptri.$(SUFFIX) cpptrs.$(SUFFIX) \
+ cptcon.$(SUFFIX) cpteqr.$(SUFFIX) cptrfs.$(SUFFIX) cptsv.$(SUFFIX) cptsvx.$(SUFFIX) cpttrf.$(SUFFIX) cpttrs.$(SUFFIX) cptts2.$(SUFFIX) \
+ crot.$(SUFFIX) cspcon.$(SUFFIX) csprfs.$(SUFFIX) cspsv.$(SUFFIX) \
+ cspsvx.$(SUFFIX) csptrf.$(SUFFIX) csptri.$(SUFFIX) csptrs.$(SUFFIX) csrscl.$(SUFFIX) cstedc.$(SUFFIX) \
+ cstegr.$(SUFFIX) cstein.$(SUFFIX) csteqr.$(SUFFIX) csycon.$(SUFFIX) \
+ csyrfs.$(SUFFIX) csysv.$(SUFFIX) csysvx.$(SUFFIX) csytf2.$(SUFFIX) csytrf.$(SUFFIX) csytri.$(SUFFIX) \
+ csytrs.$(SUFFIX) ctbcon.$(SUFFIX) ctbrfs.$(SUFFIX) ctbtrs.$(SUFFIX) ctgevc.$(SUFFIX) ctgex2.$(SUFFIX) \
+ ctgexc.$(SUFFIX) ctgsen.$(SUFFIX) ctgsja.$(SUFFIX) ctgsna.$(SUFFIX) ctgsy2.$(SUFFIX) ctgsyl.$(SUFFIX) ctpcon.$(SUFFIX) \
+ ctprfs.$(SUFFIX) ctptri.$(SUFFIX) \
+ ctptrs.$(SUFFIX) ctrcon.$(SUFFIX) ctrevc.$(SUFFIX) ctrexc.$(SUFFIX) ctrrfs.$(SUFFIX) ctrsen.$(SUFFIX) ctrsna.$(SUFFIX) \
+ ctrsyl.$(SUFFIX) ctrtrs.$(SUFFIX) ctzrqf.$(SUFFIX) ctzrzf.$(SUFFIX) cung2l.$(SUFFIX) cung2r.$(SUFFIX) \
+ cungbr.$(SUFFIX) cunghr.$(SUFFIX) cungl2.$(SUFFIX) cunglq.$(SUFFIX) cungql.$(SUFFIX) cungqr.$(SUFFIX) cungr2.$(SUFFIX) \
+ cungrq.$(SUFFIX) cungtr.$(SUFFIX) cunm2l.$(SUFFIX) cunm2r.$(SUFFIX) cunmbr.$(SUFFIX) cunmhr.$(SUFFIX) cunml2.$(SUFFIX) \
+ cunmlq.$(SUFFIX) cunmql.$(SUFFIX) cunmqr.$(SUFFIX) cunmr2.$(SUFFIX) cunmr3.$(SUFFIX) cunmrq.$(SUFFIX) cunmrz.$(SUFFIX) \
+ cunmtr.$(SUFFIX) cupgtr.$(SUFFIX) cupmtr.$(SUFFIX) icmax1.$(SUFFIX) scsum1.$(SUFFIX) cstemr.$(SUFFIX)
DLASRC = \
- dgbbrd.o dgbcon.o dgbequ.o dgbrfs.o dgbsv.o \
- dgbsvx.o dgbtf2.o dgbtrf.o dgbtrs.o dgebak.o dgebal.o dgebd2.o \
- dgebrd.o dgecon.o dgeequ.o dgees.o dgeesx.o dgeev.o dgeevx.o \
- dgegs.o dgegv.o dgehd2.o dgehrd.o dgelq2.o dgelqf.o \
- dgels.o dgelsd.o dgelss.o dgelsx.o dgelsy.o dgeql2.o dgeqlf.o \
- dgeqp3.o dgeqpf.o dgeqr2.o dgeqrf.o dgerfs.o dgerq2.o dgerqf.o \
- dgesc2.o dgesdd.o dgesv.o dgesvd.o dgesvx.o dgetc2.o dgetf2.o \
- dgetrf.o dgetri.o \
- dgetrs.o dggbak.o dggbal.o dgges.o dggesx.o dggev.o dggevx.o \
- dggglm.o dgghrd.o dgglse.o dggqrf.o \
- dggrqf.o dggsvd.o dggsvp.o dgtcon.o dgtrfs.o dgtsv.o \
- dgtsvx.o dgttrf.o dgttrs.o dgtts2.o dhgeqz.o \
- dhsein.o dhseqr.o dlabrd.o dlacon.o dlacn2.o \
- dlaein.o dlaexc.o dlag2.o dlags2.o dlagtm.o dlagv2.o dlahqr.o \
- dlahrd.o dlahr2.o dlaic1.o dlaln2.o dlals0.o dlalsa.o dlalsd.o \
- dlangb.o dlange.o dlangt.o dlanhs.o dlansb.o dlansp.o \
- dlansy.o dlantb.o dlantp.o dlantr.o dlanv2.o \
- dlapll.o dlapmt.o \
- dlaqgb.o dlaqge.o dlaqp2.o dlaqps.o dlaqsb.o dlaqsp.o dlaqsy.o \
- dlaqr0.o dlaqr1.o dlaqr2.o dlaqr3.o dlaqr4.o dlaqr5.o \
- dlaqtr.o dlar1v.o dlar2v.o \
- dlarf.o dlarfb.o dlarfg.o dlarft.o dlarfx.o dlargv.o \
- dlarrv.o dlartv.o \
- dlarz.o dlarzb.o dlarzt.o dlaswp.o dlasy2.o dlasyf.o \
- dlatbs.o dlatdf.o dlatps.o dlatrd.o dlatrs.o dlatrz.o dlatzm.o dlauu2.o \
- dlauum.o dopgtr.o dopmtr.o dorg2l.o dorg2r.o \
- dorgbr.o dorghr.o dorgl2.o dorglq.o dorgql.o dorgqr.o dorgr2.o \
- dorgrq.o dorgtr.o dorm2l.o dorm2r.o \
- dormbr.o dormhr.o dorml2.o dormlq.o dormql.o dormqr.o dormr2.o \
- dormr3.o dormrq.o dormrz.o dormtr.o dpbcon.o dpbequ.o dpbrfs.o \
- dpbstf.o dpbsv.o dpbsvx.o \
- dpbtf2.o dpbtrf.o dpbtrs.o dpocon.o dpoequ.o dporfs.o dposv.o \
- dposvx.o dpotf2.o dpotrf.o dpotri.o dpotrs.o dppcon.o dppequ.o \
- dpprfs.o dppsv.o dppsvx.o dpptrf.o dpptri.o dpptrs.o dptcon.o \
- dpteqr.o dptrfs.o dptsv.o dptsvx.o dpttrs.o dptts2.o drscl.o \
- dsbev.o dsbevd.o dsbevx.o dsbgst.o dsbgv.o dsbgvd.o dsbgvx.o \
- dsbtrd.o dspcon.o dspev.o dspevd.o dspevx.o dspgst.o \
- dspgv.o dspgvd.o dspgvx.o dsprfs.o dspsv.o dspsvx.o dsptrd.o \
- dsptrf.o dsptri.o dsptrs.o dstegr.o dstein.o dstev.o dstevd.o dstevr.o \
- dstevx.o dsycon.o dsyev.o dsyevd.o dsyevr.o \
- dsyevx.o dsygs2.o dsygst.o dsygv.o dsygvd.o dsygvx.o dsyrfs.o \
- dsysv.o dsysvx.o \
- dsytd2.o dsytf2.o dsytrd.o dsytrf.o dsytri.o dsytrs.o dtbcon.o \
- dtbrfs.o dtbtrs.o dtgevc.o dtgex2.o dtgexc.o dtgsen.o \
- dtgsja.o dtgsna.o dtgsy2.o dtgsyl.o dtpcon.o dtprfs.o dtptri.o \
- dtptrs.o \
- dtrcon.o dtrevc.o dtrexc.o dtrrfs.o dtrsen.o dtrsna.o dtrsyl.o \
- dtrti2.o dtrtri.o dtrtrs.o dtzrqf.o dtzrzf.o dstemr.o \
- dsgesv.o dlag2s.o slag2d.o
+ dgbbrd.$(SUFFIX) dgbcon.$(SUFFIX) dgbequ.$(SUFFIX) dgbrfs.$(SUFFIX) dgbsv.$(SUFFIX) \
+ dgbsvx.$(SUFFIX) dgbtf2.$(SUFFIX) dgbtrf.$(SUFFIX) dgbtrs.$(SUFFIX) dgebak.$(SUFFIX) dgebal.$(SUFFIX) dgebd2.$(SUFFIX) \
+ dgebrd.$(SUFFIX) dgecon.$(SUFFIX) dgeequ.$(SUFFIX) dgees.$(SUFFIX) dgeesx.$(SUFFIX) dgeev.$(SUFFIX) dgeevx.$(SUFFIX) \
+ dgegs.$(SUFFIX) dgegv.$(SUFFIX) dgehd2.$(SUFFIX) dgehrd.$(SUFFIX) dgelq2.$(SUFFIX) dgelqf.$(SUFFIX) \
+ dgels.$(SUFFIX) dgelsd.$(SUFFIX) dgelss.$(SUFFIX) dgelsx.$(SUFFIX) dgelsy.$(SUFFIX) dgeql2.$(SUFFIX) dgeqlf.$(SUFFIX) \
+ dgeqp3.$(SUFFIX) dgeqpf.$(SUFFIX) dgeqr2.$(SUFFIX) dgeqrf.$(SUFFIX) dgerfs.$(SUFFIX) dgerq2.$(SUFFIX) dgerqf.$(SUFFIX) \
+ dgesc2.$(SUFFIX) dgesdd.$(SUFFIX) dgesvd.$(SUFFIX) dgesvx.$(SUFFIX) dgetc2.$(SUFFIX) \
+ dgetri.$(SUFFIX) \
+ dggbak.$(SUFFIX) dggbal.$(SUFFIX) dgges.$(SUFFIX) dggesx.$(SUFFIX) dggev.$(SUFFIX) dggevx.$(SUFFIX) \
+ dggglm.$(SUFFIX) dgghrd.$(SUFFIX) dgglse.$(SUFFIX) dggqrf.$(SUFFIX) \
+ dggrqf.$(SUFFIX) dggsvd.$(SUFFIX) dggsvp.$(SUFFIX) dgtcon.$(SUFFIX) dgtrfs.$(SUFFIX) dgtsv.$(SUFFIX) \
+ dgtsvx.$(SUFFIX) dgttrf.$(SUFFIX) dgttrs.$(SUFFIX) dgtts2.$(SUFFIX) dhgeqz.$(SUFFIX) \
+ dhsein.$(SUFFIX) dhseqr.$(SUFFIX) dlabrd.$(SUFFIX) dlacon.$(SUFFIX) dlacn2.$(SUFFIX) \
+ dlaein.$(SUFFIX) dlaexc.$(SUFFIX) dlag2.$(SUFFIX) dlags2.$(SUFFIX) dlagtm.$(SUFFIX) dlagv2.$(SUFFIX) dlahqr.$(SUFFIX) \
+ dlahrd.$(SUFFIX) dlahr2.$(SUFFIX) dlaic1.$(SUFFIX) dlaln2.$(SUFFIX) dlals0.$(SUFFIX) dlalsa.$(SUFFIX) dlalsd.$(SUFFIX) \
+ dlangb.$(SUFFIX) dlange.$(SUFFIX) dlangt.$(SUFFIX) dlanhs.$(SUFFIX) dlansb.$(SUFFIX) dlansp.$(SUFFIX) \
+ dlansy.$(SUFFIX) dlantb.$(SUFFIX) dlantp.$(SUFFIX) dlantr.$(SUFFIX) dlanv2.$(SUFFIX) \
+ dlapll.$(SUFFIX) dlapmt.$(SUFFIX) \
+ dlaqgb.$(SUFFIX) dlaqge.$(SUFFIX) dlaqp2.$(SUFFIX) dlaqps.$(SUFFIX) dlaqsb.$(SUFFIX) dlaqsp.$(SUFFIX) dlaqsy.$(SUFFIX) \
+ dlaqr0.$(SUFFIX) dlaqr1.$(SUFFIX) dlaqr2.$(SUFFIX) dlaqr3.$(SUFFIX) dlaqr4.$(SUFFIX) dlaqr5.$(SUFFIX) \
+ dlaqtr.$(SUFFIX) dlar1v.$(SUFFIX) dlar2v.$(SUFFIX) \
+ dlarf.$(SUFFIX) dlarfb.$(SUFFIX) dlarfg.$(SUFFIX) dlarft.$(SUFFIX) dlarfx.$(SUFFIX) dlargv.$(SUFFIX) \
+ dlarrv.$(SUFFIX) dlartv.$(SUFFIX) \
+ dlarz.$(SUFFIX) dlarzb.$(SUFFIX) dlarzt.$(SUFFIX) dlasy2.$(SUFFIX) dlasyf.$(SUFFIX) \
+ dlatbs.$(SUFFIX) dlatdf.$(SUFFIX) dlatps.$(SUFFIX) dlatrd.$(SUFFIX) dlatrs.$(SUFFIX) dlatrz.$(SUFFIX) dlatzm.$(SUFFIX) \
+ dopgtr.$(SUFFIX) dopmtr.$(SUFFIX) dorg2l.$(SUFFIX) dorg2r.$(SUFFIX) \
+ dorgbr.$(SUFFIX) dorghr.$(SUFFIX) dorgl2.$(SUFFIX) dorglq.$(SUFFIX) dorgql.$(SUFFIX) dorgqr.$(SUFFIX) dorgr2.$(SUFFIX) \
+ dorgrq.$(SUFFIX) dorgtr.$(SUFFIX) dorm2l.$(SUFFIX) dorm2r.$(SUFFIX) \
+ dormbr.$(SUFFIX) dormhr.$(SUFFIX) dorml2.$(SUFFIX) dormlq.$(SUFFIX) dormql.$(SUFFIX) dormqr.$(SUFFIX) dormr2.$(SUFFIX) \
+ dormr3.$(SUFFIX) dormrq.$(SUFFIX) dormrz.$(SUFFIX) dormtr.$(SUFFIX) dpbcon.$(SUFFIX) dpbequ.$(SUFFIX) dpbrfs.$(SUFFIX) \
+ dpbstf.$(SUFFIX) dpbsv.$(SUFFIX) dpbsvx.$(SUFFIX) \
+ dpbtf2.$(SUFFIX) dpbtrf.$(SUFFIX) dpbtrs.$(SUFFIX) dpocon.$(SUFFIX) dpoequ.$(SUFFIX) dporfs.$(SUFFIX) dposv.$(SUFFIX) \
+ dposvx.$(SUFFIX) dpotrs.$(SUFFIX) dppcon.$(SUFFIX) dppequ.$(SUFFIX) \
+ dpprfs.$(SUFFIX) dppsv.$(SUFFIX) dppsvx.$(SUFFIX) dpptrf.$(SUFFIX) dpptri.$(SUFFIX) dpptrs.$(SUFFIX) dptcon.$(SUFFIX) \
+ dpteqr.$(SUFFIX) dptrfs.$(SUFFIX) dptsv.$(SUFFIX) dptsvx.$(SUFFIX) dpttrs.$(SUFFIX) dptts2.$(SUFFIX) drscl.$(SUFFIX) \
+ dsbev.$(SUFFIX) dsbevd.$(SUFFIX) dsbevx.$(SUFFIX) dsbgst.$(SUFFIX) dsbgv.$(SUFFIX) dsbgvd.$(SUFFIX) dsbgvx.$(SUFFIX) \
+ dsbtrd.$(SUFFIX) dspcon.$(SUFFIX) dspev.$(SUFFIX) dspevd.$(SUFFIX) dspevx.$(SUFFIX) dspgst.$(SUFFIX) \
+ dspgv.$(SUFFIX) dspgvd.$(SUFFIX) dspgvx.$(SUFFIX) dsprfs.$(SUFFIX) dspsv.$(SUFFIX) dspsvx.$(SUFFIX) dsptrd.$(SUFFIX) \
+ dsptrf.$(SUFFIX) dsptri.$(SUFFIX) dsptrs.$(SUFFIX) dstegr.$(SUFFIX) dstein.$(SUFFIX) dstev.$(SUFFIX) dstevd.$(SUFFIX) dstevr.$(SUFFIX) \
+ dstevx.$(SUFFIX) dsycon.$(SUFFIX) dsyev.$(SUFFIX) dsyevd.$(SUFFIX) dsyevr.$(SUFFIX) \
+ dsyevx.$(SUFFIX) dsygs2.$(SUFFIX) dsygst.$(SUFFIX) dsygv.$(SUFFIX) dsygvd.$(SUFFIX) dsygvx.$(SUFFIX) dsyrfs.$(SUFFIX) \
+ dsysv.$(SUFFIX) dsysvx.$(SUFFIX) \
+ dsytd2.$(SUFFIX) dsytf2.$(SUFFIX) dsytrd.$(SUFFIX) dsytrf.$(SUFFIX) dsytri.$(SUFFIX) dsytrs.$(SUFFIX) dtbcon.$(SUFFIX) \
+ dtbrfs.$(SUFFIX) dtbtrs.$(SUFFIX) dtgevc.$(SUFFIX) dtgex2.$(SUFFIX) dtgexc.$(SUFFIX) dtgsen.$(SUFFIX) \
+ dtgsja.$(SUFFIX) dtgsna.$(SUFFIX) dtgsy2.$(SUFFIX) dtgsyl.$(SUFFIX) dtpcon.$(SUFFIX) dtprfs.$(SUFFIX) dtptri.$(SUFFIX) \
+ dtptrs.$(SUFFIX) \
+ dtrcon.$(SUFFIX) dtrevc.$(SUFFIX) dtrexc.$(SUFFIX) dtrrfs.$(SUFFIX) dtrsen.$(SUFFIX) dtrsna.$(SUFFIX) dtrsyl.$(SUFFIX) \
+ dtrtrs.$(SUFFIX) dtzrqf.$(SUFFIX) dtzrzf.$(SUFFIX) dstemr.$(SUFFIX) \
+ dsgesv.$(SUFFIX) dlag2s.$(SUFFIX) slag2d.$(SUFFIX)
ZLASRC = \
- zbdsqr.o zgbbrd.o zgbcon.o zgbequ.o zgbrfs.o zgbsv.o zgbsvx.o \
- zgbtf2.o zgbtrf.o zgbtrs.o zgebak.o zgebal.o zgebd2.o zgebrd.o \
- zgecon.o zgeequ.o zgees.o zgeesx.o zgeev.o zgeevx.o \
- zgegs.o zgegv.o zgehd2.o zgehrd.o zgelq2.o zgelqf.o \
- zgels.o zgelsd.o zgelss.o zgelsx.o zgelsy.o zgeql2.o zgeqlf.o zgeqp3.o \
- zgeqpf.o zgeqr2.o zgeqrf.o zgerfs.o zgerq2.o zgerqf.o \
- zgesc2.o zgesdd.o zgesv.o zgesvd.o zgesvx.o zgetc2.o zgetf2.o zgetrf.o \
- zgetri.o zgetrs.o \
- zggbak.o zggbal.o zgges.o zggesx.o zggev.o zggevx.o zggglm.o \
- zgghrd.o zgglse.o zggqrf.o zggrqf.o \
- zggsvd.o zggsvp.o \
- zgtcon.o zgtrfs.o zgtsv.o zgtsvx.o zgttrf.o zgttrs.o zgtts2.o zhbev.o \
- zhbevd.o zhbevx.o zhbgst.o zhbgv.o zhbgvd.o zhbgvx.o zhbtrd.o \
- zhecon.o zheev.o zheevd.o zheevr.o zheevx.o zhegs2.o zhegst.o \
- zhegv.o zhegvd.o zhegvx.o zherfs.o zhesv.o zhesvx.o zhetd2.o \
- zhetf2.o zhetrd.o \
- zhetrf.o zhetri.o zhetrs.o zhgeqz.o zhpcon.o zhpev.o zhpevd.o \
- zhpevx.o zhpgst.o zhpgv.o zhpgvd.o zhpgvx.o zhprfs.o zhpsv.o \
- zhpsvx.o \
- zhptrd.o zhptrf.o zhptri.o zhptrs.o zhsein.o zhseqr.o zlabrd.o \
- zlacgv.o zlacon.o zlacn2.o zlacp2.o zlacpy.o zlacrm.o zlacrt.o zladiv.o \
- zlaed0.o zlaed7.o zlaed8.o \
- zlaein.o zlaesy.o zlaev2.o zlags2.o zlagtm.o \
- zlahef.o zlahqr.o \
- zlahrd.o zlahr2.o zlaic1.o zlals0.o zlalsa.o zlalsd.o zlangb.o zlange.o \
- zlangt.o zlanhb.o \
- zlanhe.o \
- zlanhp.o zlanhs.o zlanht.o zlansb.o zlansp.o zlansy.o zlantb.o \
- zlantp.o zlantr.o zlapll.o zlapmt.o zlaqgb.o zlaqge.o \
- zlaqhb.o zlaqhe.o zlaqhp.o zlaqp2.o zlaqps.o zlaqsb.o \
- zlaqr0.o zlaqr1.o zlaqr2.o zlaqr3.o zlaqr4.o zlaqr5.o \
- zlaqsp.o zlaqsy.o zlar1v.o zlar2v.o zlarcm.o zlarf.o zlarfb.o \
- zlarfg.o zlarft.o \
- zlarfx.o zlargv.o zlarnv.o zlarrv.o zlartg.o zlartv.o \
- zlarz.o zlarzb.o zlarzt.o zlascl.o zlaset.o zlasr.o \
- zlassq.o zlaswp.o zlasyf.o \
- zlatbs.o zlatdf.o zlatps.o zlatrd.o zlatrs.o zlatrz.o zlatzm.o zlauu2.o \
- zlauum.o zpbcon.o zpbequ.o zpbrfs.o zpbstf.o zpbsv.o \
- zpbsvx.o zpbtf2.o zpbtrf.o zpbtrs.o zpocon.o zpoequ.o zporfs.o \
- zposv.o zposvx.o zpotf2.o zpotrf.o zpotri.o zpotrs.o zppcon.o \
- zppequ.o zpprfs.o zppsv.o zppsvx.o zpptrf.o zpptri.o zpptrs.o \
- zptcon.o zpteqr.o zptrfs.o zptsv.o zptsvx.o zpttrf.o zpttrs.o zptts2.o \
- zrot.o zspcon.o zspmv.o zspr.o zsprfs.o zspsv.o \
- zspsvx.o zsptrf.o zsptri.o zsptrs.o zdrscl.o zstedc.o \
- zstegr.o zstein.o zsteqr.o zsycon.o zsymv.o \
- zsyr.o zsyrfs.o zsysv.o zsysvx.o zsytf2.o zsytrf.o zsytri.o \
- zsytrs.o ztbcon.o ztbrfs.o ztbtrs.o ztgevc.o ztgex2.o \
- ztgexc.o ztgsen.o ztgsja.o ztgsna.o ztgsy2.o ztgsyl.o ztpcon.o \
- ztprfs.o ztptri.o \
- ztptrs.o ztrcon.o ztrevc.o ztrexc.o ztrrfs.o ztrsen.o ztrsna.o \
- ztrsyl.o ztrti2.o ztrtri.o ztrtrs.o ztzrqf.o ztzrzf.o zung2l.o \
- zung2r.o zungbr.o zunghr.o zungl2.o zunglq.o zungql.o zungqr.o zungr2.o \
- zungrq.o zungtr.o zunm2l.o zunm2r.o zunmbr.o zunmhr.o zunml2.o \
- zunmlq.o zunmql.o zunmqr.o zunmr2.o zunmr3.o zunmrq.o zunmrz.o \
- zunmtr.o zupgtr.o \
- zupmtr.o izmax1.o dzsum1.o zstemr.o \
- zcgesv.o zlag2c.o clag2z.o
+ zbdsqr.$(SUFFIX) zgbbrd.$(SUFFIX) zgbcon.$(SUFFIX) zgbequ.$(SUFFIX) zgbrfs.$(SUFFIX) zgbsv.$(SUFFIX) zgbsvx.$(SUFFIX) \
+ zgbtf2.$(SUFFIX) zgbtrf.$(SUFFIX) zgbtrs.$(SUFFIX) zgebak.$(SUFFIX) zgebal.$(SUFFIX) zgebd2.$(SUFFIX) zgebrd.$(SUFFIX) \
+ zgecon.$(SUFFIX) zgeequ.$(SUFFIX) zgees.$(SUFFIX) zgeesx.$(SUFFIX) zgeev.$(SUFFIX) zgeevx.$(SUFFIX) \
+ zgegs.$(SUFFIX) zgegv.$(SUFFIX) zgehd2.$(SUFFIX) zgehrd.$(SUFFIX) zgelq2.$(SUFFIX) zgelqf.$(SUFFIX) \
+ zgels.$(SUFFIX) zgelsd.$(SUFFIX) zgelss.$(SUFFIX) zgelsx.$(SUFFIX) zgelsy.$(SUFFIX) zgeql2.$(SUFFIX) zgeqlf.$(SUFFIX) zgeqp3.$(SUFFIX) \
+ zgeqpf.$(SUFFIX) zgeqr2.$(SUFFIX) zgeqrf.$(SUFFIX) zgerfs.$(SUFFIX) zgerq2.$(SUFFIX) zgerqf.$(SUFFIX) \
+ zgesc2.$(SUFFIX) zgesdd.$(SUFFIX) zgesvd.$(SUFFIX) zgesvx.$(SUFFIX) zgetc2.$(SUFFIX) \
+ zgetri.$(SUFFIX) \
+ zggbak.$(SUFFIX) zggbal.$(SUFFIX) zgges.$(SUFFIX) zggesx.$(SUFFIX) zggev.$(SUFFIX) zggevx.$(SUFFIX) zggglm.$(SUFFIX) \
+ zgghrd.$(SUFFIX) zgglse.$(SUFFIX) zggqrf.$(SUFFIX) zggrqf.$(SUFFIX) \
+ zggsvd.$(SUFFIX) zggsvp.$(SUFFIX) \
+ zgtcon.$(SUFFIX) zgtrfs.$(SUFFIX) zgtsv.$(SUFFIX) zgtsvx.$(SUFFIX) zgttrf.$(SUFFIX) zgttrs.$(SUFFIX) zgtts2.$(SUFFIX) zhbev.$(SUFFIX) \
+ zhbevd.$(SUFFIX) zhbevx.$(SUFFIX) zhbgst.$(SUFFIX) zhbgv.$(SUFFIX) zhbgvd.$(SUFFIX) zhbgvx.$(SUFFIX) zhbtrd.$(SUFFIX) \
+ zhecon.$(SUFFIX) zheev.$(SUFFIX) zheevd.$(SUFFIX) zheevr.$(SUFFIX) zheevx.$(SUFFIX) zhegs2.$(SUFFIX) zhegst.$(SUFFIX) \
+ zhegv.$(SUFFIX) zhegvd.$(SUFFIX) zhegvx.$(SUFFIX) zherfs.$(SUFFIX) zhesv.$(SUFFIX) zhesvx.$(SUFFIX) zhetd2.$(SUFFIX) \
+ zhetf2.$(SUFFIX) zhetrd.$(SUFFIX) \
+ zhetrf.$(SUFFIX) zhetri.$(SUFFIX) zhetrs.$(SUFFIX) zhgeqz.$(SUFFIX) zhpcon.$(SUFFIX) zhpev.$(SUFFIX) zhpevd.$(SUFFIX) \
+ zhpevx.$(SUFFIX) zhpgst.$(SUFFIX) zhpgv.$(SUFFIX) zhpgvd.$(SUFFIX) zhpgvx.$(SUFFIX) zhprfs.$(SUFFIX) zhpsv.$(SUFFIX) \
+ zhpsvx.$(SUFFIX) \
+ zhptrd.$(SUFFIX) zhptrf.$(SUFFIX) zhptri.$(SUFFIX) zhptrs.$(SUFFIX) zhsein.$(SUFFIX) zhseqr.$(SUFFIX) zlabrd.$(SUFFIX) \
+ zlacgv.$(SUFFIX) zlacon.$(SUFFIX) zlacn2.$(SUFFIX) zlacp2.$(SUFFIX) zlacpy.$(SUFFIX) zlacrm.$(SUFFIX) zlacrt.$(SUFFIX) zladiv.$(SUFFIX) \
+ zlaed0.$(SUFFIX) zlaed7.$(SUFFIX) zlaed8.$(SUFFIX) \
+ zlaein.$(SUFFIX) zlaesy.$(SUFFIX) zlaev2.$(SUFFIX) zlags2.$(SUFFIX) zlagtm.$(SUFFIX) \
+ zlahef.$(SUFFIX) zlahqr.$(SUFFIX) \
+ zlahrd.$(SUFFIX) zlahr2.$(SUFFIX) zlaic1.$(SUFFIX) zlals0.$(SUFFIX) zlalsa.$(SUFFIX) zlalsd.$(SUFFIX) zlangb.$(SUFFIX) zlange.$(SUFFIX) \
+ zlangt.$(SUFFIX) zlanhb.$(SUFFIX) \
+ zlanhe.$(SUFFIX) \
+ zlanhp.$(SUFFIX) zlanhs.$(SUFFIX) zlanht.$(SUFFIX) zlansb.$(SUFFIX) zlansp.$(SUFFIX) zlansy.$(SUFFIX) zlantb.$(SUFFIX) \
+ zlantp.$(SUFFIX) zlantr.$(SUFFIX) zlapll.$(SUFFIX) zlapmt.$(SUFFIX) zlaqgb.$(SUFFIX) zlaqge.$(SUFFIX) \
+ zlaqhb.$(SUFFIX) zlaqhe.$(SUFFIX) zlaqhp.$(SUFFIX) zlaqp2.$(SUFFIX) zlaqps.$(SUFFIX) zlaqsb.$(SUFFIX) \
+ zlaqr0.$(SUFFIX) zlaqr1.$(SUFFIX) zlaqr2.$(SUFFIX) zlaqr3.$(SUFFIX) zlaqr4.$(SUFFIX) zlaqr5.$(SUFFIX) \
+ zlaqsp.$(SUFFIX) zlaqsy.$(SUFFIX) zlar1v.$(SUFFIX) zlar2v.$(SUFFIX) zlarcm.$(SUFFIX) zlarf.$(SUFFIX) zlarfb.$(SUFFIX) \
+ zlarfg.$(SUFFIX) zlarft.$(SUFFIX) \
+ zlarfx.$(SUFFIX) zlargv.$(SUFFIX) zlarnv.$(SUFFIX) zlarrv.$(SUFFIX) zlartg.$(SUFFIX) zlartv.$(SUFFIX) \
+ zlarz.$(SUFFIX) zlarzb.$(SUFFIX) zlarzt.$(SUFFIX) zlascl.$(SUFFIX) zlaset.$(SUFFIX) zlasr.$(SUFFIX) \
+ zlassq.$(SUFFIX) zlasyf.$(SUFFIX) \
+ zlatbs.$(SUFFIX) zlatdf.$(SUFFIX) zlatps.$(SUFFIX) zlatrd.$(SUFFIX) zlatrs.$(SUFFIX) zlatrz.$(SUFFIX) zlatzm.$(SUFFIX) \
+ zpbcon.$(SUFFIX) zpbequ.$(SUFFIX) zpbrfs.$(SUFFIX) zpbstf.$(SUFFIX) zpbsv.$(SUFFIX) \
+ zpbsvx.$(SUFFIX) zpbtf2.$(SUFFIX) zpbtrf.$(SUFFIX) zpbtrs.$(SUFFIX) zpocon.$(SUFFIX) zpoequ.$(SUFFIX) zporfs.$(SUFFIX) \
+ zposv.$(SUFFIX) zposvx.$(SUFFIX) zpotrs.$(SUFFIX) zppcon.$(SUFFIX) \
+ zppequ.$(SUFFIX) zpprfs.$(SUFFIX) zppsv.$(SUFFIX) zppsvx.$(SUFFIX) zpptrf.$(SUFFIX) zpptri.$(SUFFIX) zpptrs.$(SUFFIX) \
+ zptcon.$(SUFFIX) zpteqr.$(SUFFIX) zptrfs.$(SUFFIX) zptsv.$(SUFFIX) zptsvx.$(SUFFIX) zpttrf.$(SUFFIX) zpttrs.$(SUFFIX) zptts2.$(SUFFIX) \
+ zrot.$(SUFFIX) zspcon.$(SUFFIX) zsprfs.$(SUFFIX) zspsv.$(SUFFIX) \
+ zspsvx.$(SUFFIX) zsptrf.$(SUFFIX) zsptri.$(SUFFIX) zsptrs.$(SUFFIX) zdrscl.$(SUFFIX) zstedc.$(SUFFIX) \
+ zstegr.$(SUFFIX) zstein.$(SUFFIX) zsteqr.$(SUFFIX) zsycon.$(SUFFIX) \
+ zsyrfs.$(SUFFIX) zsysv.$(SUFFIX) zsysvx.$(SUFFIX) zsytf2.$(SUFFIX) zsytrf.$(SUFFIX) zsytri.$(SUFFIX) \
+ zsytrs.$(SUFFIX) ztbcon.$(SUFFIX) ztbrfs.$(SUFFIX) ztbtrs.$(SUFFIX) ztgevc.$(SUFFIX) ztgex2.$(SUFFIX) \
+ ztgexc.$(SUFFIX) ztgsen.$(SUFFIX) ztgsja.$(SUFFIX) ztgsna.$(SUFFIX) ztgsy2.$(SUFFIX) ztgsyl.$(SUFFIX) ztpcon.$(SUFFIX) \
+ ztprfs.$(SUFFIX) ztptri.$(SUFFIX) \
+ ztptrs.$(SUFFIX) ztrcon.$(SUFFIX) ztrevc.$(SUFFIX) ztrexc.$(SUFFIX) ztrrfs.$(SUFFIX) ztrsen.$(SUFFIX) ztrsna.$(SUFFIX) \
+ ztrsyl.$(SUFFIX) ztrtrs.$(SUFFIX) ztzrqf.$(SUFFIX) ztzrzf.$(SUFFIX) zung2l.$(SUFFIX) \
+ zung2r.$(SUFFIX) zungbr.$(SUFFIX) zunghr.$(SUFFIX) zungl2.$(SUFFIX) zunglq.$(SUFFIX) zungql.$(SUFFIX) zungqr.$(SUFFIX) zungr2.$(SUFFIX) \
+ zungrq.$(SUFFIX) zungtr.$(SUFFIX) zunm2l.$(SUFFIX) zunm2r.$(SUFFIX) zunmbr.$(SUFFIX) zunmhr.$(SUFFIX) zunml2.$(SUFFIX) \
+ zunmlq.$(SUFFIX) zunmql.$(SUFFIX) zunmqr.$(SUFFIX) zunmr2.$(SUFFIX) zunmr3.$(SUFFIX) zunmrq.$(SUFFIX) zunmrz.$(SUFFIX) \
+ zunmtr.$(SUFFIX) zupgtr.$(SUFFIX) \
+ zupmtr.$(SUFFIX) izmax1.$(SUFFIX) dzsum1.$(SUFFIX) zstemr.$(SUFFIX) \
+ zcgesv.$(SUFFIX) zlag2c.$(SUFFIX) clag2z.$(SUFFIX)
all: ../$(LAPACKLIB)
+lapack_prof: ../$(LAPACKLIB_P)
+
ALLOBJ=$(SLASRC) $(DLASRC) $(CLASRC) $(ZLASRC) $(SCLAUX) $(DZLAUX) \
$(ALLAUX)
+ALLOBJ_P = $(ALLOBJ:.$(SUFFIX)=.$(PSUFFIX))
+
../$(LAPACKLIB): $(ALLOBJ)
$(ARCH) $(ARCHFLAGS) $@ $(ALLOBJ)
$(RANLIB) $@
+../$(LAPACKLIB_P): $(ALLOBJ_P)
+ $(ARCH) $(ARCHFLAGS) $@ $(ALLOBJ_P)
+ $(RANLIB) $@
+
single: $(SLASRC) $(ALLAUX) $(SCLAUX)
$(ARCH) $(ARCHFLAGS) ../$(LAPACKLIB) $(SLASRC) $(ALLAUX) \
$(SCLAUX)
@@ -317,6 +325,7 @@
$(DZLAUX)
$(RANLIB) ../$(LAPACKLIB)
+
$(ALLAUX): $(FRC)
$(SCLAUX): $(FRC)
$(DZLAUX): $(FRC)
@@ -329,11 +338,16 @@
@FRC=$(FRC)
clean:
- rm -f *.o
+ rm -f *.$(SUFFIX) *.$(PSUFFIX)
-.f.o:
+%.$(SUFFIX): %.f
$(FORTRAN) $(OPTS) -c $< -o $@
-slaruv.o: slaruv.f ; $(FORTRAN) $(NOOPT) -c $< -o $@
-dlaruv.o: dlaruv.f ; $(FORTRAN) $(NOOPT) -c $< -o $@
+%.$(PSUFFIX): %.f
+ $(FORTRAN) $(POPTS) -c $< -o $@
+
+slaruv.$(SUFFIX): slaruv.f ; $(FORTRAN) $(NOOPT) -O0 -c $< -o $@
+dlaruv.$(SUFFIX): dlaruv.f ; $(FORTRAN) $(NOOPT) -O0 -c $< -o $@
+slaruv.$(PSUFFIX): slaruv.f ; $(FORTRAN) $(PNOOPT) -O0 -c $< -o $@
+dlaruv.$(PSUFFIX): dlaruv.f ; $(FORTRAN) $(PNOOPT) -O0 -c $< -o $@
diff -ruN lapack-3.1.1.old/TESTING/EIG/Makefile lapack-3.1.1/TESTING/EIG/Makefile
--- lapack-3.1.1.old/TESTING/EIG/Makefile 2007-02-20 15:33:03.000000000 -0600
+++ lapack-3.1.1/TESTING/EIG/Makefile 2009-12-16 14:40:35.000000000 -0600
@@ -78,7 +78,7 @@
cget35.o cget36.o cget37.o cget38.o cget51.o cget52.o \
cget54.o cglmts.o cgqrts.o cgrqts.o cgsvts.o \
chbt21.o chet21.o chet22.o chpt21.o chst01.o \
- clarfy.o clarhs.o clatm4.o clctes.o clctsx.o clsets.o csbmv.o \
+ clarfy.o clarhs.o clatm4.o clctes.o clctsx.o clsets.o \
csgt01.o cslect.o \
cstt21.o cstt22.o cunt01.o cunt03.o
@@ -115,7 +115,7 @@
zget35.o zget36.o zget37.o zget38.o zget51.o zget52.o \
zget54.o zglmts.o zgqrts.o zgrqts.o zgsvts.o \
zhbt21.o zhet21.o zhet22.o zhpt21.o zhst01.o \
- zlarfy.o zlarhs.o zlatm4.o zlctes.o zlctsx.o zlsets.o zsbmv.o \
+ zlarfy.o zlarhs.o zlatm4.o zlctes.o zlctsx.o zlsets.o \
zsgt01.o zslect.o \
zstt21.o zstt22.o zunt01.o zunt03.o
@@ -129,22 +129,22 @@
../xeigtsts: $(SEIGTST) $(SCIGTST) $(AEIGTST) ; \
$(LOADER) $(LOADOPTS) -o $@ \
$(SEIGTST) $(SCIGTST) $(AEIGTST) ../../$(TMGLIB) \
- ../../$(LAPACKLIB) $(BLASLIB)
+ ../../$(LAPACKLIB) $(BLASLIB) $(CEXTRALIB)
../xeigtstc: $(CEIGTST) $(SCIGTST) $(AEIGTST) ; \
$(LOADER) $(LOADOPTS) -o $@ \
$(CEIGTST) $(SCIGTST) $(AEIGTST) ../../$(TMGLIB) \
- ../../$(LAPACKLIB) $(BLASLIB)
+ ../../$(LAPACKLIB) $(BLASLIB) $(CEXTRALIB)
../xeigtstd: $(DEIGTST) $(DZIGTST) $(AEIGTST) ; \
$(LOADER) $(LOADOPTS) -o $@ \
$(DEIGTST) $(DZIGTST) $(AEIGTST) ../../$(TMGLIB) \
- ../../$(LAPACKLIB) $(BLASLIB)
+ ../../$(LAPACKLIB) $(BLASLIB) $(CEXTRALIB)
../xeigtstz: $(ZEIGTST) $(DZIGTST) $(AEIGTST) ; \
$(LOADER) $(LOADOPTS) -o $@ \
$(ZEIGTST) $(DZIGTST) $(AEIGTST) ../../$(TMGLIB) \
- ../../$(LAPACKLIB) $(BLASLIB)
+ ../../$(LAPACKLIB) $(BLASLIB) $(CEXTRALIB)
$(AEIGTST): $(FRC)
$(SCIGTST): $(FRC)
diff -ruN lapack-3.1.1.old/TESTING/LIN/Makefile lapack-3.1.1/TESTING/LIN/Makefile
--- lapack-3.1.1.old/TESTING/LIN/Makefile 2007-02-20 15:33:03.000000000 -0600
+++ lapack-3.1.1/TESTING/LIN/Makefile 2009-12-16 14:40:35.000000000 -0600
@@ -97,7 +97,7 @@
cqpt01.o cqrt01.o cqrt02.o cqrt03.o cqrt11.o \
cqrt12.o cqrt13.o cqrt14.o cqrt15.o cqrt16.o \
cqrt17.o crqt01.o crqt02.o crqt03.o crzt01.o crzt02.o \
- csbmv.o cspt01.o \
+ cspt01.o \
cspt02.o cspt03.o csyt01.o csyt02.o csyt03.o \
ctbt02.o ctbt03.o ctbt05.o ctbt06.o ctpt01.o \
ctpt02.o ctpt03.o ctpt05.o ctpt06.o ctrt01.o \
@@ -159,7 +159,7 @@
zqpt01.o zqrt01.o zqrt02.o zqrt03.o zqrt11.o \
zqrt12.o zqrt13.o zqrt14.o zqrt15.o zqrt16.o \
zqrt17.o zrqt01.o zrqt02.o zrqt03.o zrzt01.o zrzt02.o \
- zsbmv.o zspt01.o \
+ zspt01.o \
zspt02.o zspt03.o zsyt01.o zsyt02.o zsyt03.o \
ztbt02.o ztbt03.o ztbt05.o ztbt06.o ztpt01.o \
ztpt02.o ztpt03.o ztpt05.o ztpt06.o ztrt01.o \
@@ -176,7 +176,7 @@
zdrvab.o zerrab.o zget08.o \
alaerh.o alahd.o aladhd.o alareq.o \
chkxer.o zget02.o zlarhs.o zlatb4.o \
- zsbmv.o xerbla.o
+ xerbla.o
all: single double complex complex16 proto-double proto-complex16
@@ -190,27 +190,27 @@
../xlintsts : $(ALINTST) $(SLINTST) $(SCLNTST)
$(LOADER) $(LOADOPTS) $(ALINTST) $(SCLNTST) $(SLINTST) \
- ../../$(TMGLIB) ../../$(LAPACKLIB) $(BLASLIB) -o $@
+ ../../$(TMGLIB) ../../$(LAPACKLIB) $(BLASLIB) -o $@ $(CEXTRALIB)
../xlintstc : $(ALINTST) $(CLINTST) $(SCLNTST)
$(LOADER) $(LOADOPTS) $(ALINTST) $(SCLNTST) $(CLINTST) \
- ../../$(TMGLIB) ../../$(LAPACKLIB) $(BLASLIB) -o $@
+ ../../$(TMGLIB) ../../$(LAPACKLIB) $(BLASLIB) -o $@ $(CEXTRALIB)
../xlintstd : $(ALINTST) $(DLINTST) $(DZLNTST)
$(LOADER) $(LOADOPTS) $(ALINTST) $(DZLNTST) $(DLINTST) \
- ../../$(TMGLIB) ../../$(LAPACKLIB) $(BLASLIB) -o $@
+ ../../$(TMGLIB) ../../$(LAPACKLIB) $(BLASLIB) -o $@ $(CEXTRALIB)
../xlintstz : $(ALINTST) $(ZLINTST) $(DZLNTST)
$(LOADER) $(LOADOPTS) $(ALINTST) $(DZLNTST) $(ZLINTST) \
- ../../$(TMGLIB) ../../$(LAPACKLIB) $(BLASLIB) -o $@
+ ../../$(TMGLIB) ../../$(LAPACKLIB) $(BLASLIB) -o $@ $(CEXTRALIB)
../xlintstds : $(DSLINTST)
$(LOADER) $(LOADOPTS) $(DSLINTST) \
- ../../$(TMGLIB) ../../$(LAPACKLIB) $(BLASLIB) -o $@
+ ../../$(TMGLIB) ../../$(LAPACKLIB) $(BLASLIB) -o $@ $(CEXTRALIB)
../xlintstzc : $(ZCLINTST)
$(LOADER) $(LOADOPTS) $(ZCLINTST) \
- ../../$(TMGLIB) ../../$(LAPACKLIB) $(BLASLIB) -o $@
+ ../../$(TMGLIB) ../../$(LAPACKLIB) $(BLASLIB) -o $@ $(CEXTRALIB)
$(ALINTST): $(FRC)
$(SCLNTST): $(FRC)

File diff suppressed because it is too large Load Diff

View File

@@ -1,932 +0,0 @@
diff -ruN lapack-3.4.1.old/INSTALL/Makefile lapack-3.4.1/INSTALL/Makefile
--- lapack-3.4.1.old/INSTALL/Makefile 2011-10-01 04:37:03 +0200
+++ lapack-3.4.1/INSTALL/Makefile 2012-04-22 21:48:48 +0200
@@ -27,7 +27,7 @@
$(LOADER) $(LOADOPTS) -o testversion ilaver.o LAPACK_version.o
clean:
- rm -f *.o
+ rm -f *.o test*
.f.o:
$(FORTRAN) $(OPTS) -c $< -o $@
diff -ruN lapack-3.4.1.old/Makefile lapack-3.4.1/Makefile
--- lapack-3.4.1.old/Makefile 2012-04-13 20:13:07 +0200
+++ lapack-3.4.1/Makefile 2012-04-22 21:48:07 +0200
@@ -20,9 +20,12 @@
blaslib:
( cd BLAS/SRC; $(MAKE) )
-lapacklib: lapack_install
+lapacklib:
( cd SRC; $(MAKE) )
+lapack_prof:
+ ( cd SRC; $(MAKE) lapack_prof)
+
lapackelib: lapacklib
( cd lapacke; $(MAKE) )
diff -ruN lapack-3.4.1.old/SRC/Makefile lapack-3.4.1/SRC/Makefile
--- lapack-3.4.1.old/SRC/Makefile 2012-04-02 21:06:36 +0200
+++ lapack-3.4.1/SRC/Makefile 2012-04-22 21:40:21 +0200
@@ -54,363 +54,371 @@
#
#######################################################################
-ALLAUX = ilaenv.o ieeeck.o lsamen.o xerbla.o xerbla_array.o iparmq.o \
- ilaprec.o ilatrans.o ilauplo.o iladiag.o chla_transtype.o \
- ../INSTALL/ilaver.o ../INSTALL/lsame.o ../INSTALL/slamch.o
+ALLAUX = ilaenv.$(SUFFIX) ieeeck.$(SUFFIX) lsamen.$(SUFFIX) xerbla_array.$(SUFFIX) iparmq.$(SUFFIX) \
+ ilaprec.$(SUFFIX) ilatrans.$(SUFFIX) ilauplo.$(SUFFIX) iladiag.$(SUFFIX) chla_transtype.$(SUFFIX) \
+ ../INSTALL/ilaver.$(SUFFIX)
SCLAUX = \
- sbdsdc.o \
- sbdsqr.o sdisna.o slabad.o slacpy.o sladiv.o slae2.o slaebz.o \
- slaed0.o slaed1.o slaed2.o slaed3.o slaed4.o slaed5.o slaed6.o \
- slaed7.o slaed8.o slaed9.o slaeda.o slaev2.o slagtf.o \
- slagts.o slamrg.o slanst.o \
- slapy2.o slapy3.o slarnv.o \
- slarra.o slarrb.o slarrc.o slarrd.o slarre.o slarrf.o slarrj.o \
- slarrk.o slarrr.o slaneg.o \
- slartg.o slaruv.o slas2.o slascl.o \
- slasd0.o slasd1.o slasd2.o slasd3.o slasd4.o slasd5.o slasd6.o \
- slasd7.o slasd8.o slasda.o slasdq.o slasdt.o \
- slaset.o slasq1.o slasq2.o slasq3.o slasq4.o slasq5.o slasq6.o \
- slasr.o slasrt.o slassq.o slasv2.o spttrf.o sstebz.o sstedc.o \
- ssteqr.o ssterf.o slaisnan.o sisnan.o \
- slartgp.o slartgs.o \
- ../INSTALL/second_$(TIMER).o
+ sbdsdc.$(SUFFIX) \
+ sbdsqr.$(SUFFIX) sdisna.$(SUFFIX) slabad.$(SUFFIX) slacpy.$(SUFFIX) sladiv.$(SUFFIX) slae2.$(SUFFIX) slaebz.$(SUFFIX) \
+ slaed0.$(SUFFIX) slaed1.$(SUFFIX) slaed2.$(SUFFIX) slaed3.$(SUFFIX) slaed4.$(SUFFIX) slaed5.$(SUFFIX) slaed6.$(SUFFIX) \
+ slaed7.$(SUFFIX) slaed8.$(SUFFIX) slaed9.$(SUFFIX) slaeda.$(SUFFIX) slaev2.$(SUFFIX) slagtf.$(SUFFIX) \
+ slagts.$(SUFFIX) slamrg.$(SUFFIX) slanst.$(SUFFIX) \
+ slapy2.$(SUFFIX) slapy3.$(SUFFIX) slarnv.$(SUFFIX) \
+ slarra.$(SUFFIX) slarrb.$(SUFFIX) slarrc.$(SUFFIX) slarrd.$(SUFFIX) slarre.$(SUFFIX) slarrf.$(SUFFIX) slarrj.$(SUFFIX) \
+ slarrk.$(SUFFIX) slarrr.$(SUFFIX) slaneg.$(SUFFIX) \
+ slartg.$(SUFFIX) slaruv.$(SUFFIX) slas2.$(SUFFIX) slascl.$(SUFFIX) \
+ slasd0.$(SUFFIX) slasd1.$(SUFFIX) slasd2.$(SUFFIX) slasd3.$(SUFFIX) slasd4.$(SUFFIX) slasd5.$(SUFFIX) slasd6.$(SUFFIX) \
+ slasd7.$(SUFFIX) slasd8.$(SUFFIX) slasda.$(SUFFIX) slasdq.$(SUFFIX) slasdt.$(SUFFIX) \
+ slaset.$(SUFFIX) slasq1.$(SUFFIX) slasq2.$(SUFFIX) slasq3.$(SUFFIX) slasq4.$(SUFFIX) slasq5.$(SUFFIX) slasq6.$(SUFFIX) \
+ slasr.$(SUFFIX) slasrt.$(SUFFIX) slassq.$(SUFFIX) slasv2.$(SUFFIX) spttrf.$(SUFFIX) sstebz.$(SUFFIX) sstedc.$(SUFFIX) \
+ ssteqr.$(SUFFIX) ssterf.$(SUFFIX) slaisnan.$(SUFFIX) sisnan.$(SUFFIX) \
+ slartgp.$(SUFFIX) slartgs.$(SUFFIX) \
+ ../INSTALL/second_$(TIMER).$(SUFFIX)
DZLAUX = \
- dbdsdc.o \
- dbdsqr.o ddisna.o dlabad.o dlacpy.o dladiv.o dlae2.o dlaebz.o \
- dlaed0.o dlaed1.o dlaed2.o dlaed3.o dlaed4.o dlaed5.o dlaed6.o \
- dlaed7.o dlaed8.o dlaed9.o dlaeda.o dlaev2.o dlagtf.o \
- dlagts.o dlamrg.o dlanst.o \
- dlapy2.o dlapy3.o dlarnv.o \
- dlarra.o dlarrb.o dlarrc.o dlarrd.o dlarre.o dlarrf.o dlarrj.o \
- dlarrk.o dlarrr.o dlaneg.o \
- dlartg.o dlaruv.o dlas2.o dlascl.o \
- dlasd0.o dlasd1.o dlasd2.o dlasd3.o dlasd4.o dlasd5.o dlasd6.o \
- dlasd7.o dlasd8.o dlasda.o dlasdq.o dlasdt.o \
- dlaset.o dlasq1.o dlasq2.o dlasq3.o dlasq4.o dlasq5.o dlasq6.o \
- dlasr.o dlasrt.o dlassq.o dlasv2.o dpttrf.o dstebz.o dstedc.o \
- dsteqr.o dsterf.o dlaisnan.o disnan.o \
- dlartgp.o dlartgs.o \
- ../INSTALL/dlamch.o ../INSTALL/dsecnd_$(TIMER).o
+ dbdsdc.$(SUFFIX) \
+ dbdsqr.$(SUFFIX) ddisna.$(SUFFIX) dlabad.$(SUFFIX) dlacpy.$(SUFFIX) dladiv.$(SUFFIX) dlae2.$(SUFFIX) dlaebz.$(SUFFIX) \
+ dlaed0.$(SUFFIX) dlaed1.$(SUFFIX) dlaed2.$(SUFFIX) dlaed3.$(SUFFIX) dlaed4.$(SUFFIX) dlaed5.$(SUFFIX) dlaed6.$(SUFFIX) \
+ dlaed7.$(SUFFIX) dlaed8.$(SUFFIX) dlaed9.$(SUFFIX) dlaeda.$(SUFFIX) dlaev2.$(SUFFIX) dlagtf.$(SUFFIX) \
+ dlagts.$(SUFFIX) dlamrg.$(SUFFIX) dlanst.$(SUFFIX) \
+ dlapy2.$(SUFFIX) dlapy3.$(SUFFIX) dlarnv.$(SUFFIX) \
+ dlarra.$(SUFFIX) dlarrb.$(SUFFIX) dlarrc.$(SUFFIX) dlarrd.$(SUFFIX) dlarre.$(SUFFIX) dlarrf.$(SUFFIX) dlarrj.$(SUFFIX) \
+ dlarrk.$(SUFFIX) dlarrr.$(SUFFIX) dlaneg.$(SUFFIX) \
+ dlartg.$(SUFFIX) dlaruv.$(SUFFIX) dlas2.$(SUFFIX) dlascl.$(SUFFIX) \
+ dlasd0.$(SUFFIX) dlasd1.$(SUFFIX) dlasd2.$(SUFFIX) dlasd3.$(SUFFIX) dlasd4.$(SUFFIX) dlasd5.$(SUFFIX) dlasd6.$(SUFFIX) \
+ dlasd7.$(SUFFIX) dlasd8.$(SUFFIX) dlasda.$(SUFFIX) dlasdq.$(SUFFIX) dlasdt.$(SUFFIX) \
+ dlaset.$(SUFFIX) dlasq1.$(SUFFIX) dlasq2.$(SUFFIX) dlasq3.$(SUFFIX) dlasq4.$(SUFFIX) dlasq5.$(SUFFIX) dlasq6.$(SUFFIX) \
+ dlasr.$(SUFFIX) dlasrt.$(SUFFIX) dlassq.$(SUFFIX) dlasv2.$(SUFFIX) dpttrf.$(SUFFIX) dstebz.$(SUFFIX) dstedc.$(SUFFIX) \
+ dsteqr.$(SUFFIX) dsterf.$(SUFFIX) dlaisnan.$(SUFFIX) disnan.$(SUFFIX) \
+ dlartgp.$(SUFFIX) dlartgs.$(SUFFIX) \
+ ../INSTALL/dsecnd_$(TIMER).$(SUFFIX)
SLASRC = \
- sgbbrd.o sgbcon.o sgbequ.o sgbrfs.o sgbsv.o \
- sgbsvx.o sgbtf2.o sgbtrf.o sgbtrs.o sgebak.o sgebal.o sgebd2.o \
- sgebrd.o sgecon.o sgeequ.o sgees.o sgeesx.o sgeev.o sgeevx.o \
- sgegs.o sgegv.o sgehd2.o sgehrd.o sgelq2.o sgelqf.o \
- sgels.o sgelsd.o sgelss.o sgelsx.o sgelsy.o sgeql2.o sgeqlf.o \
- sgeqp3.o sgeqpf.o sgeqr2.o sgeqr2p.o sgeqrf.o sgeqrfp.o sgerfs.o \
- sgerq2.o sgerqf.o sgesc2.o sgesdd.o sgesv.o sgesvd.o sgesvx.o \
- sgetc2.o sgetf2.o sgetri.o \
- sggbak.o sggbal.o sgges.o sggesx.o sggev.o sggevx.o \
- sggglm.o sgghrd.o sgglse.o sggqrf.o \
- sggrqf.o sggsvd.o sggsvp.o sgtcon.o sgtrfs.o sgtsv.o \
- sgtsvx.o sgttrf.o sgttrs.o sgtts2.o shgeqz.o \
- shsein.o shseqr.o slabrd.o slacon.o slacn2.o \
- slaein.o slaexc.o slag2.o slags2.o slagtm.o slagv2.o slahqr.o \
- slahrd.o slahr2.o slaic1.o slaln2.o slals0.o slalsa.o slalsd.o \
- slangb.o slange.o slangt.o slanhs.o slansb.o slansp.o \
- slansy.o slantb.o slantp.o slantr.o slanv2.o \
- slapll.o slapmt.o \
- slaqgb.o slaqge.o slaqp2.o slaqps.o slaqsb.o slaqsp.o slaqsy.o \
- slaqr0.o slaqr1.o slaqr2.o slaqr3.o slaqr4.o slaqr5.o \
- slaqtr.o slar1v.o slar2v.o ilaslr.o ilaslc.o \
- slarf.o slarfb.o slarfg.o slarfgp.o slarft.o slarfx.o slargv.o \
- slarrv.o slartv.o \
- slarz.o slarzb.o slarzt.o slaswp.o slasy2.o slasyf.o \
- slatbs.o slatdf.o slatps.o slatrd.o slatrs.o slatrz.o slatzm.o \
- slauu2.o slauum.o sopgtr.o sopmtr.o sorg2l.o sorg2r.o \
- sorgbr.o sorghr.o sorgl2.o sorglq.o sorgql.o sorgqr.o sorgr2.o \
- sorgrq.o sorgtr.o sorm2l.o sorm2r.o \
- sormbr.o sormhr.o sorml2.o sormlq.o sormql.o sormqr.o sormr2.o \
- sormr3.o sormrq.o sormrz.o sormtr.o spbcon.o spbequ.o spbrfs.o \
- spbstf.o spbsv.o spbsvx.o \
- spbtf2.o spbtrf.o spbtrs.o spocon.o spoequ.o sporfs.o sposv.o \
- sposvx.o spotf2.o spotri.o spstrf.o spstf2.o \
- sppcon.o sppequ.o \
- spprfs.o sppsv.o sppsvx.o spptrf.o spptri.o spptrs.o sptcon.o \
- spteqr.o sptrfs.o sptsv.o sptsvx.o spttrs.o sptts2.o srscl.o \
- ssbev.o ssbevd.o ssbevx.o ssbgst.o ssbgv.o ssbgvd.o ssbgvx.o \
- ssbtrd.o sspcon.o sspev.o sspevd.o sspevx.o sspgst.o \
- sspgv.o sspgvd.o sspgvx.o ssprfs.o sspsv.o sspsvx.o ssptrd.o \
- ssptrf.o ssptri.o ssptrs.o sstegr.o sstein.o sstev.o sstevd.o sstevr.o \
- sstevx.o \
- ssycon.o ssyev.o ssyevd.o ssyevr.o ssyevx.o ssygs2.o \
- ssygst.o ssygv.o ssygvd.o ssygvx.o ssyrfs.o ssysv.o ssysvx.o \
- ssytd2.o ssytf2.o ssytrd.o ssytrf.o ssytri.o ssytri2.o ssytri2x.o \
- ssyswapr.o ssytrs.o ssytrs2.o ssyconv.o \
- stbcon.o \
- stbrfs.o stbtrs.o stgevc.o stgex2.o stgexc.o stgsen.o \
- stgsja.o stgsna.o stgsy2.o stgsyl.o stpcon.o stprfs.o stptri.o \
- stptrs.o \
- strcon.o strevc.o strexc.o strrfs.o strsen.o strsna.o strsyl.o \
- strti2.o strtri.o strtrs.o stzrqf.o stzrzf.o sstemr.o \
- slansf.o spftrf.o spftri.o spftrs.o ssfrk.o stfsm.o stftri.o stfttp.o \
- stfttr.o stpttf.o stpttr.o strttf.o strttp.o \
- sgejsv.o sgesvj.o sgsvj0.o sgsvj1.o \
- sgeequb.o ssyequb.o spoequb.o sgbequb.o \
- sbbcsd.o slapmr.o sorbdb.o sorcsd.o \
- sgeqrt.o sgeqrt2.o sgeqrt3.o sgemqrt.o \
- stpqrt.o stpqrt2.o stpmqrt.o stprfb.o
+ sgbbrd.$(SUFFIX) sgbcon.$(SUFFIX) sgbequ.$(SUFFIX) sgbrfs.$(SUFFIX) sgbsv.$(SUFFIX) \
+ sgbsvx.$(SUFFIX) sgbtf2.$(SUFFIX) sgbtrf.$(SUFFIX) sgbtrs.$(SUFFIX) sgebak.$(SUFFIX) sgebal.$(SUFFIX) sgebd2.$(SUFFIX) \
+ sgebrd.$(SUFFIX) sgecon.$(SUFFIX) sgeequ.$(SUFFIX) sgees.$(SUFFIX) sgeesx.$(SUFFIX) sgeev.$(SUFFIX) sgeevx.$(SUFFIX) \
+ sgegs.$(SUFFIX) sgegv.$(SUFFIX) sgehd2.$(SUFFIX) sgehrd.$(SUFFIX) sgelq2.$(SUFFIX) sgelqf.$(SUFFIX) \
+ sgels.$(SUFFIX) sgelsd.$(SUFFIX) sgelss.$(SUFFIX) sgelsx.$(SUFFIX) sgelsy.$(SUFFIX) sgeql2.$(SUFFIX) sgeqlf.$(SUFFIX) \
+ sgeqp3.$(SUFFIX) sgeqpf.$(SUFFIX) sgeqr2.$(SUFFIX) sgeqr2p.$(SUFFIX) sgeqrf.$(SUFFIX) sgeqrfp.$(SUFFIX) sgerfs.$(SUFFIX) \
+ sgerq2.$(SUFFIX) sgerqf.$(SUFFIX) sgesc2.$(SUFFIX) sgesdd.$(SUFFIX) sgesv.$(SUFFIX) sgesvd.$(SUFFIX) sgesvx.$(SUFFIX) \
+ sgetc2.$(SUFFIX) sgetri.$(SUFFIX) \
+ sggbak.$(SUFFIX) sggbal.$(SUFFIX) sgges.$(SUFFIX) sggesx.$(SUFFIX) sggev.$(SUFFIX) sggevx.$(SUFFIX) \
+ sggglm.$(SUFFIX) sgghrd.$(SUFFIX) sgglse.$(SUFFIX) sggqrf.$(SUFFIX) \
+ sggrqf.$(SUFFIX) sggsvd.$(SUFFIX) sggsvp.$(SUFFIX) sgtcon.$(SUFFIX) sgtrfs.$(SUFFIX) sgtsv.$(SUFFIX) \
+ sgtsvx.$(SUFFIX) sgttrf.$(SUFFIX) sgttrs.$(SUFFIX) sgtts2.$(SUFFIX) shgeqz.$(SUFFIX) \
+ shsein.$(SUFFIX) shseqr.$(SUFFIX) slabrd.$(SUFFIX) slacon.$(SUFFIX) slacn2.$(SUFFIX) \
+ slaein.$(SUFFIX) slaexc.$(SUFFIX) slag2.$(SUFFIX) slags2.$(SUFFIX) slagtm.$(SUFFIX) slagv2.$(SUFFIX) slahqr.$(SUFFIX) \
+ slahrd.$(SUFFIX) slahr2.$(SUFFIX) slaic1.$(SUFFIX) slaln2.$(SUFFIX) slals0.$(SUFFIX) slalsa.$(SUFFIX) slalsd.$(SUFFIX) \
+ slangb.$(SUFFIX) slange.$(SUFFIX) slangt.$(SUFFIX) slanhs.$(SUFFIX) slansb.$(SUFFIX) slansp.$(SUFFIX) \
+ slansy.$(SUFFIX) slantb.$(SUFFIX) slantp.$(SUFFIX) slantr.$(SUFFIX) slanv2.$(SUFFIX) \
+ slapll.$(SUFFIX) slapmt.$(SUFFIX) \
+ slaqgb.$(SUFFIX) slaqge.$(SUFFIX) slaqp2.$(SUFFIX) slaqps.$(SUFFIX) slaqsb.$(SUFFIX) slaqsp.$(SUFFIX) slaqsy.$(SUFFIX) \
+ slaqr0.$(SUFFIX) slaqr1.$(SUFFIX) slaqr2.$(SUFFIX) slaqr3.$(SUFFIX) slaqr4.$(SUFFIX) slaqr5.$(SUFFIX) \
+ slaqtr.$(SUFFIX) slar1v.$(SUFFIX) slar2v.$(SUFFIX) ilaslr.$(SUFFIX) ilaslc.$(SUFFIX) \
+ slarf.$(SUFFIX) slarfb.$(SUFFIX) slarfg.$(SUFFIX) slarfgp.$(SUFFIX) slarft.$(SUFFIX) slarfx.$(SUFFIX) slargv.$(SUFFIX) \
+ slarrv.$(SUFFIX) slartv.$(SUFFIX) \
+ slarz.$(SUFFIX) slarzb.$(SUFFIX) slarzt.$(SUFFIX) slasy2.$(SUFFIX) slasyf.$(SUFFIX) \
+ slatbs.$(SUFFIX) slatdf.$(SUFFIX) slatps.$(SUFFIX) slatrd.$(SUFFIX) slatrs.$(SUFFIX) slatrz.$(SUFFIX) slatzm.$(SUFFIX) \
+ sopgtr.$(SUFFIX) sopmtr.$(SUFFIX) sorg2l.$(SUFFIX) sorg2r.$(SUFFIX) \
+ sorgbr.$(SUFFIX) sorghr.$(SUFFIX) sorgl2.$(SUFFIX) sorglq.$(SUFFIX) sorgql.$(SUFFIX) sorgqr.$(SUFFIX) sorgr2.$(SUFFIX) \
+ sorgrq.$(SUFFIX) sorgtr.$(SUFFIX) sorm2l.$(SUFFIX) sorm2r.$(SUFFIX) \
+ sormbr.$(SUFFIX) sormhr.$(SUFFIX) sorml2.$(SUFFIX) sormlq.$(SUFFIX) sormql.$(SUFFIX) sormqr.$(SUFFIX) sormr2.$(SUFFIX) \
+ sormr3.$(SUFFIX) sormrq.$(SUFFIX) sormrz.$(SUFFIX) sormtr.$(SUFFIX) spbcon.$(SUFFIX) spbequ.$(SUFFIX) spbrfs.$(SUFFIX) \
+ spbstf.$(SUFFIX) spbsv.$(SUFFIX) spbsvx.$(SUFFIX) \
+ spbtf2.$(SUFFIX) spbtrf.$(SUFFIX) spbtrs.$(SUFFIX) spocon.$(SUFFIX) spoequ.$(SUFFIX) sporfs.$(SUFFIX) sposv.$(SUFFIX) \
+ sposvx.$(SUFFIX) spotri.$(SUFFIX) spstrf.$(SUFFIX) spstf2.$(SUFFIX) \
+ sppcon.$(SUFFIX) sppequ.$(SUFFIX) \
+ spprfs.$(SUFFIX) sppsv.$(SUFFIX) sppsvx.$(SUFFIX) spptrf.$(SUFFIX) spptri.$(SUFFIX) spptrs.$(SUFFIX) sptcon.$(SUFFIX) \
+ spteqr.$(SUFFIX) sptrfs.$(SUFFIX) sptsv.$(SUFFIX) sptsvx.$(SUFFIX) spttrs.$(SUFFIX) sptts2.$(SUFFIX) srscl.$(SUFFIX) \
+ ssbev.$(SUFFIX) ssbevd.$(SUFFIX) ssbevx.$(SUFFIX) ssbgst.$(SUFFIX) ssbgv.$(SUFFIX) ssbgvd.$(SUFFIX) ssbgvx.$(SUFFIX) \
+ ssbtrd.$(SUFFIX) sspcon.$(SUFFIX) sspev.$(SUFFIX) sspevd.$(SUFFIX) sspevx.$(SUFFIX) sspgst.$(SUFFIX) \
+ sspgv.$(SUFFIX) sspgvd.$(SUFFIX) sspgvx.$(SUFFIX) ssprfs.$(SUFFIX) sspsv.$(SUFFIX) sspsvx.$(SUFFIX) ssptrd.$(SUFFIX) \
+ ssptrf.$(SUFFIX) ssptri.$(SUFFIX) ssptrs.$(SUFFIX) sstegr.$(SUFFIX) sstein.$(SUFFIX) sstev.$(SUFFIX) sstevd.$(SUFFIX) sstevr.$(SUFFIX) \
+ sstevx.$(SUFFIX) \
+ ssycon.$(SUFFIX) ssyev.$(SUFFIX) ssyevd.$(SUFFIX) ssyevr.$(SUFFIX) ssyevx.$(SUFFIX) ssygs2.$(SUFFIX) \
+ ssygst.$(SUFFIX) ssygv.$(SUFFIX) ssygvd.$(SUFFIX) ssygvx.$(SUFFIX) ssyrfs.$(SUFFIX) ssysv.$(SUFFIX) ssysvx.$(SUFFIX) \
+ ssytd2.$(SUFFIX) ssytf2.$(SUFFIX) ssytrd.$(SUFFIX) ssytrf.$(SUFFIX) ssytri.$(SUFFIX) ssytri2.$(SUFFIX) ssytri2x.$(SUFFIX) \
+ ssyswapr.$(SUFFIX) ssytrs.$(SUFFIX) ssytrs2.$(SUFFIX) ssyconv.$(SUFFIX) \
+ stbcon.$(SUFFIX) \
+ stbrfs.$(SUFFIX) stbtrs.$(SUFFIX) stgevc.$(SUFFIX) stgex2.$(SUFFIX) stgexc.$(SUFFIX) stgsen.$(SUFFIX) \
+ stgsja.$(SUFFIX) stgsna.$(SUFFIX) stgsy2.$(SUFFIX) stgsyl.$(SUFFIX) stpcon.$(SUFFIX) stprfs.$(SUFFIX) stptri.$(SUFFIX) \
+ stptrs.$(SUFFIX) \
+ strcon.$(SUFFIX) strevc.$(SUFFIX) strexc.$(SUFFIX) strrfs.$(SUFFIX) strsen.$(SUFFIX) strsna.$(SUFFIX) strsyl.$(SUFFIX) \
+ strtrs.$(SUFFIX) stzrqf.$(SUFFIX) stzrzf.$(SUFFIX) sstemr.$(SUFFIX) \
+ slansf.$(SUFFIX) spftrf.$(SUFFIX) spftri.$(SUFFIX) spftrs.$(SUFFIX) ssfrk.$(SUFFIX) stfsm.$(SUFFIX) stftri.$(SUFFIX) stfttp.$(SUFFIX) \
+ stfttr.$(SUFFIX) stpttf.$(SUFFIX) stpttr.$(SUFFIX) strttf.$(SUFFIX) strttp.$(SUFFIX) \
+ sgejsv.$(SUFFIX) sgesvj.$(SUFFIX) sgsvj0.$(SUFFIX) sgsvj1.$(SUFFIX) \
+ sgeequb.$(SUFFIX) ssyequb.$(SUFFIX) spoequb.$(SUFFIX) sgbequb.$(SUFFIX) \
+ sbbcsd.$(SUFFIX) slapmr.$(SUFFIX) sorbdb.$(SUFFIX) sorcsd.$(SUFFIX) \
+ sgeqrt.$(SUFFIX) sgeqrt2.$(SUFFIX) sgeqrt3.$(SUFFIX) sgemqrt.$(SUFFIX) \
+ stpqrt.$(SUFFIX) stpqrt2.$(SUFFIX) stpmqrt.$(SUFFIX) stprfb.$(SUFFIX)
-DSLASRC = spotrs.o sgetrs.o spotrf.o sgetrf.o
+DSLASRC = spotrs.$(SUFFIX)
ifdef USEXBLAS
-SXLASRC = sgesvxx.o sgerfsx.o sla_gerfsx_extended.o sla_geamv.o \
- sla_gercond.o sla_gerpvgrw.o ssysvxx.o ssyrfsx.o \
- sla_syrfsx_extended.o sla_syamv.o sla_syrcond.o sla_syrpvgrw.o \
- sposvxx.o sporfsx.o sla_porfsx_extended.o sla_porcond.o \
- sla_porpvgrw.o sgbsvxx.o sgbrfsx.o sla_gbrfsx_extended.o \
- sla_gbamv.o sla_gbrcond.o sla_gbrpvgrw.o sla_lin_berr.o slarscl2.o \
- slascl2.o sla_wwaddw.o
+SXLASRC = sgesvxx.$(SUFFIX) sgerfsx.$(SUFFIX) sla_gerfsx_extended.$(SUFFIX) sla_geamv.$(SUFFIX) \
+ sla_gercond.$(SUFFIX) sla_gerpvgrw.$(SUFFIX) ssysvxx.$(SUFFIX) ssyrfsx.$(SUFFIX) \
+ sla_syrfsx_extended.$(SUFFIX) sla_syamv.$(SUFFIX) sla_syrcond.$(SUFFIX) sla_syrpvgrw.$(SUFFIX) \
+ sposvxx.$(SUFFIX) sporfsx.$(SUFFIX) sla_porfsx_extended.$(SUFFIX) sla_porcond.$(SUFFIX) \
+ sla_porpvgrw.$(SUFFIX) sgbsvxx.$(SUFFIX) sgbrfsx.$(SUFFIX) sla_gbrfsx_extended.$(SUFFIX) \
+ sla_gbamv.$(SUFFIX) sla_gbrcond.$(SUFFIX) sla_gbrpvgrw.$(SUFFIX) sla_lin_berr.$(SUFFIX) slarscl2.$(SUFFIX) \
+ slascl2.$(SUFFIX) sla_wwaddw.$(SUFFIX)
endif
CLASRC = \
- cbdsqr.o cgbbrd.o cgbcon.o cgbequ.o cgbrfs.o cgbsv.o cgbsvx.o \
- cgbtf2.o cgbtrf.o cgbtrs.o cgebak.o cgebal.o cgebd2.o cgebrd.o \
- cgecon.o cgeequ.o cgees.o cgeesx.o cgeev.o cgeevx.o \
- cgegs.o cgegv.o cgehd2.o cgehrd.o cgelq2.o cgelqf.o \
- cgels.o cgelsd.o cgelss.o cgelsx.o cgelsy.o cgeql2.o cgeqlf.o cgeqp3.o \
- cgeqpf.o cgeqr2.o cgeqr2p.o cgeqrf.o cgeqrfp.o cgerfs.o \
- cgerq2.o cgerqf.o cgesc2.o cgesdd.o cgesv.o cgesvd.o \
- cgesvx.o cgetc2.o cgetf2.o cgetri.o \
- cggbak.o cggbal.o cgges.o cggesx.o cggev.o cggevx.o cggglm.o \
- cgghrd.o cgglse.o cggqrf.o cggrqf.o \
- cggsvd.o cggsvp.o \
- cgtcon.o cgtrfs.o cgtsv.o cgtsvx.o cgttrf.o cgttrs.o cgtts2.o chbev.o \
- chbevd.o chbevx.o chbgst.o chbgv.o chbgvd.o chbgvx.o chbtrd.o \
- checon.o cheev.o cheevd.o cheevr.o cheevx.o chegs2.o chegst.o \
- chegv.o chegvd.o chegvx.o cherfs.o chesv.o chesvx.o chetd2.o \
- chetf2.o chetrd.o \
- chetrf.o chetri.o chetri2.o chetri2x.o cheswapr.o \
- chetrs.o chetrs2.o chgeqz.o chpcon.o chpev.o chpevd.o \
- chpevx.o chpgst.o chpgv.o chpgvd.o chpgvx.o chprfs.o chpsv.o \
- chpsvx.o \
- chptrd.o chptrf.o chptri.o chptrs.o chsein.o chseqr.o clabrd.o \
- clacgv.o clacon.o clacn2.o clacp2.o clacpy.o clacrm.o clacrt.o cladiv.o \
- claed0.o claed7.o claed8.o \
- claein.o claesy.o claev2.o clags2.o clagtm.o \
- clahef.o clahqr.o \
- clahrd.o clahr2.o claic1.o clals0.o clalsa.o clalsd.o clangb.o clange.o clangt.o \
- clanhb.o clanhe.o \
- clanhp.o clanhs.o clanht.o clansb.o clansp.o clansy.o clantb.o \
- clantp.o clantr.o clapll.o clapmt.o clarcm.o claqgb.o claqge.o \
- claqhb.o claqhe.o claqhp.o claqp2.o claqps.o claqsb.o \
- claqr0.o claqr1.o claqr2.o claqr3.o claqr4.o claqr5.o \
- claqsp.o claqsy.o clar1v.o clar2v.o ilaclr.o ilaclc.o \
- clarf.o clarfb.o clarfg.o clarft.o clarfgp.o \
- clarfx.o clargv.o clarnv.o clarrv.o clartg.o clartv.o \
- clarz.o clarzb.o clarzt.o clascl.o claset.o clasr.o classq.o \
- claswp.o clasyf.o clatbs.o clatdf.o clatps.o clatrd.o clatrs.o clatrz.o \
- clatzm.o clauu2.o clauum.o cpbcon.o cpbequ.o cpbrfs.o cpbstf.o cpbsv.o \
- cpbsvx.o cpbtf2.o cpbtrf.o cpbtrs.o cpocon.o cpoequ.o cporfs.o \
- cposv.o cposvx.o cpotf2.o cpotri.o cpstrf.o cpstf2.o \
- cppcon.o cppequ.o cpprfs.o cppsv.o cppsvx.o cpptrf.o cpptri.o cpptrs.o \
- cptcon.o cpteqr.o cptrfs.o cptsv.o cptsvx.o cpttrf.o cpttrs.o cptts2.o \
- crot.o cspcon.o cspmv.o cspr.o csprfs.o cspsv.o \
- cspsvx.o csptrf.o csptri.o csptrs.o csrscl.o cstedc.o \
- cstegr.o cstein.o csteqr.o \
- csycon.o csymv.o \
- csyr.o csyrfs.o csysv.o csysvx.o csytf2.o csytrf.o csytri.o csytri2.o csytri2x.o \
- csyswapr.o csytrs.o csytrs2.o csyconv.o \
- ctbcon.o ctbrfs.o ctbtrs.o ctgevc.o ctgex2.o \
- ctgexc.o ctgsen.o ctgsja.o ctgsna.o ctgsy2.o ctgsyl.o ctpcon.o \
- ctprfs.o ctptri.o \
- ctptrs.o ctrcon.o ctrevc.o ctrexc.o ctrrfs.o ctrsen.o ctrsna.o \
- ctrsyl.o ctrti2.o ctrtri.o ctrtrs.o ctzrqf.o ctzrzf.o cung2l.o cung2r.o \
- cungbr.o cunghr.o cungl2.o cunglq.o cungql.o cungqr.o cungr2.o \
- cungrq.o cungtr.o cunm2l.o cunm2r.o cunmbr.o cunmhr.o cunml2.o \
- cunmlq.o cunmql.o cunmqr.o cunmr2.o cunmr3.o cunmrq.o cunmrz.o \
- cunmtr.o cupgtr.o cupmtr.o icmax1.o scsum1.o cstemr.o \
- chfrk.o ctfttp.o clanhf.o cpftrf.o cpftri.o cpftrs.o ctfsm.o ctftri.o \
- ctfttr.o ctpttf.o ctpttr.o ctrttf.o ctrttp.o \
- cgeequb.o cgbequb.o csyequb.o cpoequb.o cheequb.o \
- cbbcsd.o clapmr.o cunbdb.o cuncsd.o \
- cgeqrt.o cgeqrt2.o cgeqrt3.o cgemqrt.o \
- ctpqrt.o ctpqrt2.o ctpmqrt.o ctprfb.o
+ cbdsqr.$(SUFFIX) cgbbrd.$(SUFFIX) cgbcon.$(SUFFIX) cgbequ.$(SUFFIX) cgbrfs.$(SUFFIX) cgbsv.$(SUFFIX) cgbsvx.$(SUFFIX) \
+ cgbtf2.$(SUFFIX) cgbtrf.$(SUFFIX) cgbtrs.$(SUFFIX) cgebak.$(SUFFIX) cgebal.$(SUFFIX) cgebd2.$(SUFFIX) cgebrd.$(SUFFIX) \
+ cgecon.$(SUFFIX) cgeequ.$(SUFFIX) cgees.$(SUFFIX) cgeesx.$(SUFFIX) cgeev.$(SUFFIX) cgeevx.$(SUFFIX) \
+ cgegs.$(SUFFIX) cgegv.$(SUFFIX) cgehd2.$(SUFFIX) cgehrd.$(SUFFIX) cgelq2.$(SUFFIX) cgelqf.$(SUFFIX) \
+ cgels.$(SUFFIX) cgelsd.$(SUFFIX) cgelss.$(SUFFIX) cgelsx.$(SUFFIX) cgelsy.$(SUFFIX) cgeql2.$(SUFFIX) cgeqlf.$(SUFFIX) cgeqp3.$(SUFFIX) \
+ cgeqpf.$(SUFFIX) cgeqr2.$(SUFFIX) cgeqr2p.$(SUFFIX) cgeqrf.$(SUFFIX) cgeqrfp.$(SUFFIX) cgerfs.$(SUFFIX) \
+ cgerq2.$(SUFFIX) cgerqf.$(SUFFIX) cgesc2.$(SUFFIX) cgesdd.$(SUFFIX) cgesv.$(SUFFIX) cgesvd.$(SUFFIX) \
+ cgesvx.$(SUFFIX) cgetc2.$(SUFFIX) cgetri.$(SUFFIX) \
+ cggbak.$(SUFFIX) cggbal.$(SUFFIX) cgges.$(SUFFIX) cggesx.$(SUFFIX) cggev.$(SUFFIX) cggevx.$(SUFFIX) cggglm.$(SUFFIX) \
+ cgghrd.$(SUFFIX) cgglse.$(SUFFIX) cggqrf.$(SUFFIX) cggrqf.$(SUFFIX) \
+ cggsvd.$(SUFFIX) cggsvp.$(SUFFIX) \
+ cgtcon.$(SUFFIX) cgtrfs.$(SUFFIX) cgtsv.$(SUFFIX) cgtsvx.$(SUFFIX) cgttrf.$(SUFFIX) cgttrs.$(SUFFIX) cgtts2.$(SUFFIX) chbev.$(SUFFIX) \
+ chbevd.$(SUFFIX) chbevx.$(SUFFIX) chbgst.$(SUFFIX) chbgv.$(SUFFIX) chbgvd.$(SUFFIX) chbgvx.$(SUFFIX) chbtrd.$(SUFFIX) \
+ checon.$(SUFFIX) cheev.$(SUFFIX) cheevd.$(SUFFIX) cheevr.$(SUFFIX) cheevx.$(SUFFIX) chegs2.$(SUFFIX) chegst.$(SUFFIX) \
+ chegv.$(SUFFIX) chegvd.$(SUFFIX) chegvx.$(SUFFIX) cherfs.$(SUFFIX) chesv.$(SUFFIX) chesvx.$(SUFFIX) chetd2.$(SUFFIX) \
+ chetf2.$(SUFFIX) chetrd.$(SUFFIX) \
+ chetrf.$(SUFFIX) chetri.$(SUFFIX) chetri2.$(SUFFIX) chetri2x.$(SUFFIX) cheswapr.$(SUFFIX) \
+ chetrs.$(SUFFIX) chetrs2.$(SUFFIX) chgeqz.$(SUFFIX) chpcon.$(SUFFIX) chpev.$(SUFFIX) chpevd.$(SUFFIX) \
+ chpevx.$(SUFFIX) chpgst.$(SUFFIX) chpgv.$(SUFFIX) chpgvd.$(SUFFIX) chpgvx.$(SUFFIX) chprfs.$(SUFFIX) chpsv.$(SUFFIX) \
+ chpsvx.$(SUFFIX) \
+ chptrd.$(SUFFIX) chptrf.$(SUFFIX) chptri.$(SUFFIX) chptrs.$(SUFFIX) chsein.$(SUFFIX) chseqr.$(SUFFIX) clabrd.$(SUFFIX) \
+ clacgv.$(SUFFIX) clacon.$(SUFFIX) clacn2.$(SUFFIX) clacp2.$(SUFFIX) clacpy.$(SUFFIX) clacrm.$(SUFFIX) clacrt.$(SUFFIX) cladiv.$(SUFFIX) \
+ claed0.$(SUFFIX) claed7.$(SUFFIX) claed8.$(SUFFIX) \
+ claein.$(SUFFIX) claesy.$(SUFFIX) claev2.$(SUFFIX) clags2.$(SUFFIX) clagtm.$(SUFFIX) \
+ clahef.$(SUFFIX) clahqr.$(SUFFIX) \
+ clahrd.$(SUFFIX) clahr2.$(SUFFIX) claic1.$(SUFFIX) clals0.$(SUFFIX) clalsa.$(SUFFIX) clalsd.$(SUFFIX) clangb.$(SUFFIX) clange.$(SUFFIX) clangt.$(SUFFIX) \
+ clanhb.$(SUFFIX) clanhe.$(SUFFIX) \
+ clanhp.$(SUFFIX) clanhs.$(SUFFIX) clanht.$(SUFFIX) clansb.$(SUFFIX) clansp.$(SUFFIX) clansy.$(SUFFIX) clantb.$(SUFFIX) \
+ clantp.$(SUFFIX) clantr.$(SUFFIX) clapll.$(SUFFIX) clapmt.$(SUFFIX) clarcm.$(SUFFIX) claqgb.$(SUFFIX) claqge.$(SUFFIX) \
+ claqhb.$(SUFFIX) claqhe.$(SUFFIX) claqhp.$(SUFFIX) claqp2.$(SUFFIX) claqps.$(SUFFIX) claqsb.$(SUFFIX) \
+ claqr0.$(SUFFIX) claqr1.$(SUFFIX) claqr2.$(SUFFIX) claqr3.$(SUFFIX) claqr4.$(SUFFIX) claqr5.$(SUFFIX) \
+ claqsp.$(SUFFIX) claqsy.$(SUFFIX) clar1v.$(SUFFIX) clar2v.$(SUFFIX) ilaclr.$(SUFFIX) ilaclc.$(SUFFIX) \
+ clarf.$(SUFFIX) clarfb.$(SUFFIX) clarfg.$(SUFFIX) clarft.$(SUFFIX) clarfgp.$(SUFFIX) \
+ clarfx.$(SUFFIX) clargv.$(SUFFIX) clarnv.$(SUFFIX) clarrv.$(SUFFIX) clartg.$(SUFFIX) clartv.$(SUFFIX) \
+ clarz.$(SUFFIX) clarzb.$(SUFFIX) clarzt.$(SUFFIX) clascl.$(SUFFIX) claset.$(SUFFIX) clasr.$(SUFFIX) classq.$(SUFFIX) \
+ clasyf.$(SUFFIX) clatbs.$(SUFFIX) clatdf.$(SUFFIX) clatps.$(SUFFIX) clatrd.$(SUFFIX) clatrs.$(SUFFIX) clatrz.$(SUFFIX) \
+ clatzm.$(SUFFIX) cpbcon.$(SUFFIX) cpbequ.$(SUFFIX) cpbrfs.$(SUFFIX) cpbstf.$(SUFFIX) cpbsv.$(SUFFIX) \
+ cpbsvx.$(SUFFIX) cpbtf2.$(SUFFIX) cpbtrf.$(SUFFIX) cpbtrs.$(SUFFIX) cpocon.$(SUFFIX) cpoequ.$(SUFFIX) cporfs.$(SUFFIX) \
+ cposv.$(SUFFIX) cposvx.$(SUFFIX) cpotri.$(SUFFIX) cpstrf.$(SUFFIX) cpstf2.$(SUFFIX) \
+ cppcon.$(SUFFIX) cppequ.$(SUFFIX) cpprfs.$(SUFFIX) cppsv.$(SUFFIX) cppsvx.$(SUFFIX) cpptrf.$(SUFFIX) cpptri.$(SUFFIX) cpptrs.$(SUFFIX) \
+ cptcon.$(SUFFIX) cpteqr.$(SUFFIX) cptrfs.$(SUFFIX) cptsv.$(SUFFIX) cptsvx.$(SUFFIX) cpttrf.$(SUFFIX) cpttrs.$(SUFFIX) cptts2.$(SUFFIX) \
+ crot.$(SUFFIX) cspcon.$(SUFFIX) cspmv.$(SUFFIX) cspr.$(SUFFIX) csprfs.$(SUFFIX) cspsv.$(SUFFIX) \
+ cspsvx.$(SUFFIX) csptrf.$(SUFFIX) csptri.$(SUFFIX) csptrs.$(SUFFIX) csrscl.$(SUFFIX) cstedc.$(SUFFIX) \
+ cstegr.$(SUFFIX) cstein.$(SUFFIX) csteqr.$(SUFFIX) \
+ csycon.$(SUFFIX) csymv.$(SUFFIX) \
+ csyr.$(SUFFIX) csyrfs.$(SUFFIX) csysv.$(SUFFIX) csysvx.$(SUFFIX) csytf2.$(SUFFIX) csytrf.$(SUFFIX) csytri.$(SUFFIX) csytri2.$(SUFFIX) csytri2x.$(SUFFIX) \
+ csyswapr.$(SUFFIX) csytrs.$(SUFFIX) csytrs2.$(SUFFIX) csyconv.$(SUFFIX) \
+ ctbcon.$(SUFFIX) ctbrfs.$(SUFFIX) ctbtrs.$(SUFFIX) ctgevc.$(SUFFIX) ctgex2.$(SUFFIX) \
+ ctgexc.$(SUFFIX) ctgsen.$(SUFFIX) ctgsja.$(SUFFIX) ctgsna.$(SUFFIX) ctgsy2.$(SUFFIX) ctgsyl.$(SUFFIX) ctpcon.$(SUFFIX) \
+ ctprfs.$(SUFFIX) ctptri.$(SUFFIX) \
+ ctptrs.$(SUFFIX) ctrcon.$(SUFFIX) ctrevc.$(SUFFIX) ctrexc.$(SUFFIX) ctrrfs.$(SUFFIX) ctrsen.$(SUFFIX) ctrsna.$(SUFFIX) \
+ ctrsyl.$(SUFFIX) ctrtrs.$(SUFFIX) ctzrqf.$(SUFFIX) ctzrzf.$(SUFFIX) cung2l.$(SUFFIX) cung2r.$(SUFFIX) \
+ cungbr.$(SUFFIX) cunghr.$(SUFFIX) cungl2.$(SUFFIX) cunglq.$(SUFFIX) cungql.$(SUFFIX) cungqr.$(SUFFIX) cungr2.$(SUFFIX) \
+ cungrq.$(SUFFIX) cungtr.$(SUFFIX) cunm2l.$(SUFFIX) cunm2r.$(SUFFIX) cunmbr.$(SUFFIX) cunmhr.$(SUFFIX) cunml2.$(SUFFIX) \
+ cunmlq.$(SUFFIX) cunmql.$(SUFFIX) cunmqr.$(SUFFIX) cunmr2.$(SUFFIX) cunmr3.$(SUFFIX) cunmrq.$(SUFFIX) cunmrz.$(SUFFIX) \
+ cunmtr.$(SUFFIX) cupgtr.$(SUFFIX) cupmtr.$(SUFFIX) icmax1.$(SUFFIX) scsum1.$(SUFFIX) cstemr.$(SUFFIX) \
+ chfrk.$(SUFFIX) ctfttp.$(SUFFIX) clanhf.$(SUFFIX) cpftrf.$(SUFFIX) cpftri.$(SUFFIX) cpftrs.$(SUFFIX) ctfsm.$(SUFFIX) ctftri.$(SUFFIX) \
+ ctfttr.$(SUFFIX) ctpttf.$(SUFFIX) ctpttr.$(SUFFIX) ctrttf.$(SUFFIX) ctrttp.$(SUFFIX) \
+ cgeequb.$(SUFFIX) cgbequb.$(SUFFIX) csyequb.$(SUFFIX) cpoequb.$(SUFFIX) cheequb.$(SUFFIX) \
+ cbbcsd.$(SUFFIX) clapmr.$(SUFFIX) cunbdb.$(SUFFIX) cuncsd.$(SUFFIX) \
+ cgeqrt.$(SUFFIX) cgeqrt2.$(SUFFIX) cgeqrt3.$(SUFFIX) cgemqrt.$(SUFFIX) \
+ ctpqrt.$(SUFFIX) ctpqrt2.$(SUFFIX) ctpmqrt.$(SUFFIX) ctprfb.$(SUFFIX)
ifdef USEXBLAS
-CXLASRC = cgesvxx.o cgerfsx.o cla_gerfsx_extended.o cla_geamv.o \
- cla_gercond_c.o cla_gercond_x.o cla_gerpvgrw.o \
- csysvxx.o csyrfsx.o cla_syrfsx_extended.o cla_syamv.o \
- cla_syrcond_c.o cla_syrcond_x.o cla_syrpvgrw.o \
- cposvxx.o cporfsx.o cla_porfsx_extended.o \
- cla_porcond_c.o cla_porcond_x.o cla_porpvgrw.o \
- cgbsvxx.o cgbrfsx.o cla_gbrfsx_extended.o cla_gbamv.o \
- cla_gbrcond_c.o cla_gbrcond_x.o cla_gbrpvgrw.o \
- chesvxx.o cherfsx.o cla_herfsx_extended.o cla_heamv.o \
- cla_hercond_c.o cla_hercond_x.o cla_herpvgrw.o \
- cla_lin_berr.o clarscl2.o clascl2.o cla_wwaddw.o
+CXLASRC = cgesvxx.$(SUFFIX) cgerfsx.$(SUFFIX) cla_gerfsx_extended.$(SUFFIX) cla_geamv.$(SUFFIX) \
+ cla_gercond_c.$(SUFFIX) cla_gercond_x.$(SUFFIX) cla_gerpvgrw.$(SUFFIX) \
+ csysvxx.$(SUFFIX) csyrfsx.$(SUFFIX) cla_syrfsx_extended.$(SUFFIX) cla_syamv.$(SUFFIX) \
+ cla_syrcond_c.$(SUFFIX) cla_syrcond_x.$(SUFFIX) cla_syrpvgrw.$(SUFFIX) \
+ cposvxx.$(SUFFIX) cporfsx.$(SUFFIX) cla_porfsx_extended.$(SUFFIX) \
+ cla_porcond_c.$(SUFFIX) cla_porcond_x.$(SUFFIX) cla_porpvgrw.$(SUFFIX) \
+ cgbsvxx.$(SUFFIX) cgbrfsx.$(SUFFIX) cla_gbrfsx_extended.$(SUFFIX) cla_gbamv.$(SUFFIX) \
+ cla_gbrcond_c.$(SUFFIX) cla_gbrcond_x.$(SUFFIX) cla_gbrpvgrw.$(SUFFIX) \
+ chesvxx.$(SUFFIX) cherfsx.$(SUFFIX) cla_herfsx_extended.$(SUFFIX) cla_heamv.$(SUFFIX) \
+ cla_hercond_c.$(SUFFIX) cla_hercond_x.$(SUFFIX) cla_herpvgrw.$(SUFFIX) \
+ cla_lin_berr.$(SUFFIX) clarscl2.$(SUFFIX) clascl2.$(SUFFIX) cla_wwaddw.$(SUFFIX)
endif
-ZCLASRC = cpotrs.o cgetrs.o cpotrf.o cgetrf.o
+ZCLASRC = cpotrs.$(SUFFIX)
DLASRC = \
- dgbbrd.o dgbcon.o dgbequ.o dgbrfs.o dgbsv.o \
- dgbsvx.o dgbtf2.o dgbtrf.o dgbtrs.o dgebak.o dgebal.o dgebd2.o \
- dgebrd.o dgecon.o dgeequ.o dgees.o dgeesx.o dgeev.o dgeevx.o \
- dgegs.o dgegv.o dgehd2.o dgehrd.o dgelq2.o dgelqf.o \
- dgels.o dgelsd.o dgelss.o dgelsx.o dgelsy.o dgeql2.o dgeqlf.o \
- dgeqp3.o dgeqpf.o dgeqr2.o dgeqr2p.o dgeqrf.o dgeqrfp.o dgerfs.o \
- dgerq2.o dgerqf.o dgesc2.o dgesdd.o dgesv.o dgesvd.o dgesvx.o \
- dgetc2.o dgetf2.o dgetrf.o dgetri.o \
- dgetrs.o dggbak.o dggbal.o dgges.o dggesx.o dggev.o dggevx.o \
- dggglm.o dgghrd.o dgglse.o dggqrf.o \
- dggrqf.o dggsvd.o dggsvp.o dgtcon.o dgtrfs.o dgtsv.o \
- dgtsvx.o dgttrf.o dgttrs.o dgtts2.o dhgeqz.o \
- dhsein.o dhseqr.o dlabrd.o dlacon.o dlacn2.o \
- dlaein.o dlaexc.o dlag2.o dlags2.o dlagtm.o dlagv2.o dlahqr.o \
- dlahrd.o dlahr2.o dlaic1.o dlaln2.o dlals0.o dlalsa.o dlalsd.o \
- dlangb.o dlange.o dlangt.o dlanhs.o dlansb.o dlansp.o \
- dlansy.o dlantb.o dlantp.o dlantr.o dlanv2.o \
- dlapll.o dlapmt.o \
- dlaqgb.o dlaqge.o dlaqp2.o dlaqps.o dlaqsb.o dlaqsp.o dlaqsy.o \
- dlaqr0.o dlaqr1.o dlaqr2.o dlaqr3.o dlaqr4.o dlaqr5.o \
- dlaqtr.o dlar1v.o dlar2v.o iladlr.o iladlc.o \
- dlarf.o dlarfb.o dlarfg.o dlarfgp.o dlarft.o dlarfx.o \
- dlargv.o dlarrv.o dlartv.o \
- dlarz.o dlarzb.o dlarzt.o dlaswp.o dlasy2.o dlasyf.o \
- dlatbs.o dlatdf.o dlatps.o dlatrd.o dlatrs.o dlatrz.o dlatzm.o dlauu2.o \
- dlauum.o dopgtr.o dopmtr.o dorg2l.o dorg2r.o \
- dorgbr.o dorghr.o dorgl2.o dorglq.o dorgql.o dorgqr.o dorgr2.o \
- dorgrq.o dorgtr.o dorm2l.o dorm2r.o \
- dormbr.o dormhr.o dorml2.o dormlq.o dormql.o dormqr.o dormr2.o \
- dormr3.o dormrq.o dormrz.o dormtr.o dpbcon.o dpbequ.o dpbrfs.o \
- dpbstf.o dpbsv.o dpbsvx.o \
- dpbtf2.o dpbtrf.o dpbtrs.o dpocon.o dpoequ.o dporfs.o dposv.o \
- dposvx.o dpotf2.o dpotrf.o dpotri.o dpotrs.o dpstrf.o dpstf2.o \
- dppcon.o dppequ.o \
- dpprfs.o dppsv.o dppsvx.o dpptrf.o dpptri.o dpptrs.o dptcon.o \
- dpteqr.o dptrfs.o dptsv.o dptsvx.o dpttrs.o dptts2.o drscl.o \
- dsbev.o dsbevd.o dsbevx.o dsbgst.o dsbgv.o dsbgvd.o dsbgvx.o \
- dsbtrd.o dspcon.o dspev.o dspevd.o dspevx.o dspgst.o \
- dspgv.o dspgvd.o dspgvx.o dsprfs.o dspsv.o dspsvx.o dsptrd.o \
- dsptrf.o dsptri.o dsptrs.o dstegr.o dstein.o dstev.o dstevd.o dstevr.o \
- dstevx.o \
- dsycon.o dsyev.o dsyevd.o dsyevr.o \
- dsyevx.o dsygs2.o dsygst.o dsygv.o dsygvd.o dsygvx.o dsyrfs.o \
- dsysv.o dsysvx.o \
- dsytd2.o dsytf2.o dsytrd.o dsytrf.o dsytri.o dsytri2.o dsytri2x.o \
- dsyswapr.o dsytrs.o dsytrs2.o dsyconv.o \
- dtbcon.o dtbrfs.o dtbtrs.o dtgevc.o dtgex2.o dtgexc.o dtgsen.o \
- dtgsja.o dtgsna.o dtgsy2.o dtgsyl.o dtpcon.o dtprfs.o dtptri.o \
- dtptrs.o \
- dtrcon.o dtrevc.o dtrexc.o dtrrfs.o dtrsen.o dtrsna.o dtrsyl.o \
- dtrti2.o dtrtri.o dtrtrs.o dtzrqf.o dtzrzf.o dstemr.o \
- dsgesv.o dsposv.o dlag2s.o slag2d.o dlat2s.o \
- dlansf.o dpftrf.o dpftri.o dpftrs.o dsfrk.o dtfsm.o dtftri.o dtfttp.o \
- dtfttr.o dtpttf.o dtpttr.o dtrttf.o dtrttp.o \
- dgejsv.o dgesvj.o dgsvj0.o dgsvj1.o \
- dgeequb.o dsyequb.o dpoequb.o dgbequb.o \
- dbbcsd.o dlapmr.o dorbdb.o dorcsd.o \
- dgeqrt.o dgeqrt2.o dgeqrt3.o dgemqrt.o \
- dtpqrt.o dtpqrt2.o dtpmqrt.o dtprfb.o
+ dgbbrd.$(SUFFIX) dgbcon.$(SUFFIX) dgbequ.$(SUFFIX) dgbrfs.$(SUFFIX) dgbsv.$(SUFFIX) \
+ dgbsvx.$(SUFFIX) dgbtf2.$(SUFFIX) dgbtrf.$(SUFFIX) dgbtrs.$(SUFFIX) dgebak.$(SUFFIX) dgebal.$(SUFFIX) dgebd2.$(SUFFIX) \
+ dgebrd.$(SUFFIX) dgecon.$(SUFFIX) dgeequ.$(SUFFIX) dgees.$(SUFFIX) dgeesx.$(SUFFIX) dgeev.$(SUFFIX) dgeevx.$(SUFFIX) \
+ dgegs.$(SUFFIX) dgegv.$(SUFFIX) dgehd2.$(SUFFIX) dgehrd.$(SUFFIX) dgelq2.$(SUFFIX) dgelqf.$(SUFFIX) \
+ dgels.$(SUFFIX) dgelsd.$(SUFFIX) dgelss.$(SUFFIX) dgelsx.$(SUFFIX) dgelsy.$(SUFFIX) dgeql2.$(SUFFIX) dgeqlf.$(SUFFIX) \
+ dgeqp3.$(SUFFIX) dgeqpf.$(SUFFIX) dgeqr2.$(SUFFIX) dgeqr2p.$(SUFFIX) dgeqrf.$(SUFFIX) dgeqrfp.$(SUFFIX) dgerfs.$(SUFFIX) \
+ dgerq2.$(SUFFIX) dgerqf.$(SUFFIX) dgesc2.$(SUFFIX) dgesdd.$(SUFFIX) dgesv.$(SUFFIX) dgesvd.$(SUFFIX) dgesvx.$(SUFFIX) \
+ dgetc2.$(SUFFIX) dgetri.$(SUFFIX) \
+ dggbak.$(SUFFIX) dggbal.$(SUFFIX) dgges.$(SUFFIX) dggesx.$(SUFFIX) dggev.$(SUFFIX) dggevx.$(SUFFIX) \
+ dggglm.$(SUFFIX) dgghrd.$(SUFFIX) dgglse.$(SUFFIX) dggqrf.$(SUFFIX) \
+ dggrqf.$(SUFFIX) dggsvd.$(SUFFIX) dggsvp.$(SUFFIX) dgtcon.$(SUFFIX) dgtrfs.$(SUFFIX) dgtsv.$(SUFFIX) \
+ dgtsvx.$(SUFFIX) dgttrf.$(SUFFIX) dgttrs.$(SUFFIX) dgtts2.$(SUFFIX) dhgeqz.$(SUFFIX) \
+ dhsein.$(SUFFIX) dhseqr.$(SUFFIX) dlabrd.$(SUFFIX) dlacon.$(SUFFIX) dlacn2.$(SUFFIX) \
+ dlaein.$(SUFFIX) dlaexc.$(SUFFIX) dlag2.$(SUFFIX) dlags2.$(SUFFIX) dlagtm.$(SUFFIX) dlagv2.$(SUFFIX) dlahqr.$(SUFFIX) \
+ dlahrd.$(SUFFIX) dlahr2.$(SUFFIX) dlaic1.$(SUFFIX) dlaln2.$(SUFFIX) dlals0.$(SUFFIX) dlalsa.$(SUFFIX) dlalsd.$(SUFFIX) \
+ dlangb.$(SUFFIX) dlange.$(SUFFIX) dlangt.$(SUFFIX) dlanhs.$(SUFFIX) dlansb.$(SUFFIX) dlansp.$(SUFFIX) \
+ dlansy.$(SUFFIX) dlantb.$(SUFFIX) dlantp.$(SUFFIX) dlantr.$(SUFFIX) dlanv2.$(SUFFIX) \
+ dlapll.$(SUFFIX) dlapmt.$(SUFFIX) \
+ dlaqgb.$(SUFFIX) dlaqge.$(SUFFIX) dlaqp2.$(SUFFIX) dlaqps.$(SUFFIX) dlaqsb.$(SUFFIX) dlaqsp.$(SUFFIX) dlaqsy.$(SUFFIX) \
+ dlaqr0.$(SUFFIX) dlaqr1.$(SUFFIX) dlaqr2.$(SUFFIX) dlaqr3.$(SUFFIX) dlaqr4.$(SUFFIX) dlaqr5.$(SUFFIX) \
+ dlaqtr.$(SUFFIX) dlar1v.$(SUFFIX) dlar2v.$(SUFFIX) iladlr.$(SUFFIX) iladlc.$(SUFFIX) \
+ dlarf.$(SUFFIX) dlarfb.$(SUFFIX) dlarfg.$(SUFFIX) dlarfgp.$(SUFFIX) dlarft.$(SUFFIX) dlarfx.$(SUFFIX) \
+ dlargv.$(SUFFIX) dlarrv.$(SUFFIX) dlartv.$(SUFFIX) \
+ dlarz.$(SUFFIX) dlarzb.$(SUFFIX) dlarzt.$(SUFFIX) dlasy2.$(SUFFIX) dlasyf.$(SUFFIX) \
+ dlatbs.$(SUFFIX) dlatdf.$(SUFFIX) dlatps.$(SUFFIX) dlatrd.$(SUFFIX) dlatrs.$(SUFFIX) dlatrz.$(SUFFIX) dlatzm.$(SUFFIX) \
+ dopgtr.$(SUFFIX) dopmtr.$(SUFFIX) dorg2l.$(SUFFIX) dorg2r.$(SUFFIX) \
+ dorgbr.$(SUFFIX) dorghr.$(SUFFIX) dorgl2.$(SUFFIX) dorglq.$(SUFFIX) dorgql.$(SUFFIX) dorgqr.$(SUFFIX) dorgr2.$(SUFFIX) \
+ dorgrq.$(SUFFIX) dorgtr.$(SUFFIX) dorm2l.$(SUFFIX) dorm2r.$(SUFFIX) \
+ dormbr.$(SUFFIX) dormhr.$(SUFFIX) dorml2.$(SUFFIX) dormlq.$(SUFFIX) dormql.$(SUFFIX) dormqr.$(SUFFIX) dormr2.$(SUFFIX) \
+ dormr3.$(SUFFIX) dormrq.$(SUFFIX) dormrz.$(SUFFIX) dormtr.$(SUFFIX) dpbcon.$(SUFFIX) dpbequ.$(SUFFIX) dpbrfs.$(SUFFIX) \
+ dpbstf.$(SUFFIX) dpbsv.$(SUFFIX) dpbsvx.$(SUFFIX) \
+ dpbtf2.$(SUFFIX) dpbtrf.$(SUFFIX) dpbtrs.$(SUFFIX) dpocon.$(SUFFIX) dpoequ.$(SUFFIX) dporfs.$(SUFFIX) dposv.$(SUFFIX) \
+ dposvx.$(SUFFIX) dpotri.$(SUFFIX) dpotrs.$(SUFFIX) dpstrf.$(SUFFIX) dpstf2.$(SUFFIX) \
+ dppcon.$(SUFFIX) dppequ.$(SUFFIX) \
+ dpprfs.$(SUFFIX) dppsv.$(SUFFIX) dppsvx.$(SUFFIX) dpptrf.$(SUFFIX) dpptri.$(SUFFIX) dpptrs.$(SUFFIX) dptcon.$(SUFFIX) \
+ dpteqr.$(SUFFIX) dptrfs.$(SUFFIX) dptsv.$(SUFFIX) dptsvx.$(SUFFIX) dpttrs.$(SUFFIX) dptts2.$(SUFFIX) drscl.$(SUFFIX) \
+ dsbev.$(SUFFIX) dsbevd.$(SUFFIX) dsbevx.$(SUFFIX) dsbgst.$(SUFFIX) dsbgv.$(SUFFIX) dsbgvd.$(SUFFIX) dsbgvx.$(SUFFIX) \
+ dsbtrd.$(SUFFIX) dspcon.$(SUFFIX) dspev.$(SUFFIX) dspevd.$(SUFFIX) dspevx.$(SUFFIX) dspgst.$(SUFFIX) \
+ dspgv.$(SUFFIX) dspgvd.$(SUFFIX) dspgvx.$(SUFFIX) dsprfs.$(SUFFIX) dspsv.$(SUFFIX) dspsvx.$(SUFFIX) dsptrd.$(SUFFIX) \
+ dsptrf.$(SUFFIX) dsptri.$(SUFFIX) dsptrs.$(SUFFIX) dstegr.$(SUFFIX) dstein.$(SUFFIX) dstev.$(SUFFIX) dstevd.$(SUFFIX) dstevr.$(SUFFIX) \
+ dstevx.$(SUFFIX) \
+ dsycon.$(SUFFIX) dsyev.$(SUFFIX) dsyevd.$(SUFFIX) dsyevr.$(SUFFIX) \
+ dsyevx.$(SUFFIX) dsygs2.$(SUFFIX) dsygst.$(SUFFIX) dsygv.$(SUFFIX) dsygvd.$(SUFFIX) dsygvx.$(SUFFIX) dsyrfs.$(SUFFIX) \
+ dsysv.$(SUFFIX) dsysvx.$(SUFFIX) \
+ dsytd2.$(SUFFIX) dsytf2.$(SUFFIX) dsytrd.$(SUFFIX) dsytrf.$(SUFFIX) dsytri.$(SUFFIX) dsytri2.$(SUFFIX) dsytri2x.$(SUFFIX) \
+ dsyswapr.$(SUFFIX) dsytrs.$(SUFFIX) dsytrs2.$(SUFFIX) dsyconv.$(SUFFIX) \
+ dtbcon.$(SUFFIX) dtbrfs.$(SUFFIX) dtbtrs.$(SUFFIX) dtgevc.$(SUFFIX) dtgex2.$(SUFFIX) dtgexc.$(SUFFIX) dtgsen.$(SUFFIX) \
+ dtgsja.$(SUFFIX) dtgsna.$(SUFFIX) dtgsy2.$(SUFFIX) dtgsyl.$(SUFFIX) dtpcon.$(SUFFIX) dtprfs.$(SUFFIX) dtptri.$(SUFFIX) \
+ dtptrs.$(SUFFIX) \
+ dtrcon.$(SUFFIX) dtrevc.$(SUFFIX) dtrexc.$(SUFFIX) dtrrfs.$(SUFFIX) dtrsen.$(SUFFIX) dtrsna.$(SUFFIX) dtrsyl.$(SUFFIX) \
+ dtrtrs.$(SUFFIX) dtzrqf.$(SUFFIX) dtzrzf.$(SUFFIX) dstemr.$(SUFFIX) \
+ dsgesv.$(SUFFIX) dsposv.$(SUFFIX) dlag2s.$(SUFFIX) slag2d.$(SUFFIX) dlat2s.$(SUFFIX) \
+ dlansf.$(SUFFIX) dpftrf.$(SUFFIX) dpftri.$(SUFFIX) dpftrs.$(SUFFIX) dsfrk.$(SUFFIX) dtfsm.$(SUFFIX) dtftri.$(SUFFIX) dtfttp.$(SUFFIX) \
+ dtfttr.$(SUFFIX) dtpttf.$(SUFFIX) dtpttr.$(SUFFIX) dtrttf.$(SUFFIX) dtrttp.$(SUFFIX) \
+ dgejsv.$(SUFFIX) dgesvj.$(SUFFIX) dgsvj0.$(SUFFIX) dgsvj1.$(SUFFIX) \
+ dgeequb.$(SUFFIX) dsyequb.$(SUFFIX) dpoequb.$(SUFFIX) dgbequb.$(SUFFIX) \
+ dbbcsd.$(SUFFIX) dlapmr.$(SUFFIX) dorbdb.$(SUFFIX) dorcsd.$(SUFFIX) \
+ dgeqrt.$(SUFFIX) dgeqrt2.$(SUFFIX) dgeqrt3.$(SUFFIX) dgemqrt.$(SUFFIX) \
+ dtpqrt.$(SUFFIX) dtpqrt2.$(SUFFIX) dtpmqrt.$(SUFFIX) dtprfb.$(SUFFIX)
ifdef USEXBLAS
-DXLASRC = dgesvxx.o dgerfsx.o dla_gerfsx_extended.o dla_geamv.o \
- dla_gercond.o dla_gerpvgrw.o dsysvxx.o dsyrfsx.o \
- dla_syrfsx_extended.o dla_syamv.o dla_syrcond.o dla_syrpvgrw.o \
- dposvxx.o dporfsx.o dla_porfsx_extended.o dla_porcond.o \
- dla_porpvgrw.o dgbsvxx.o dgbrfsx.o dla_gbrfsx_extended.o \
- dla_gbamv.o dla_gbrcond.o dla_gbrpvgrw.o dla_lin_berr.o dlarscl2.o \
- dlascl2.o dla_wwaddw.o
+DXLASRC = dgesvxx.$(SUFFIX) dgerfsx.$(SUFFIX) dla_gerfsx_extended.$(SUFFIX) dla_geamv.$(SUFFIX) \
+ dla_gercond.$(SUFFIX) dla_gerpvgrw.$(SUFFIX) dsysvxx.$(SUFFIX) dsyrfsx.$(SUFFIX) \
+ dla_syrfsx_extended.$(SUFFIX) dla_syamv.$(SUFFIX) dla_syrcond.$(SUFFIX) dla_syrpvgrw.$(SUFFIX) \
+ dposvxx.$(SUFFIX) dporfsx.$(SUFFIX) dla_porfsx_extended.$(SUFFIX) dla_porcond.$(SUFFIX) \
+ dla_porpvgrw.$(SUFFIX) dgbsvxx.$(SUFFIX) dgbrfsx.$(SUFFIX) dla_gbrfsx_extended.$(SUFFIX) \
+ dla_gbamv.$(SUFFIX) dla_gbrcond.$(SUFFIX) dla_gbrpvgrw.$(SUFFIX) dla_lin_berr.$(SUFFIX) dlarscl2.$(SUFFIX) \
+ dlascl2.$(SUFFIX) dla_wwaddw.$(SUFFIX)
endif
ZLASRC = \
- zbdsqr.o zgbbrd.o zgbcon.o zgbequ.o zgbrfs.o zgbsv.o zgbsvx.o \
- zgbtf2.o zgbtrf.o zgbtrs.o zgebak.o zgebal.o zgebd2.o zgebrd.o \
- zgecon.o zgeequ.o zgees.o zgeesx.o zgeev.o zgeevx.o \
- zgegs.o zgegv.o zgehd2.o zgehrd.o zgelq2.o zgelqf.o \
- zgels.o zgelsd.o zgelss.o zgelsx.o zgelsy.o zgeql2.o zgeqlf.o zgeqp3.o \
- zgeqpf.o zgeqr2.o zgeqr2p.o zgeqrf.o zgeqrfp.o zgerfs.o zgerq2.o zgerqf.o \
- zgesc2.o zgesdd.o zgesv.o zgesvd.o zgesvx.o zgetc2.o zgetf2.o zgetrf.o \
- zgetri.o zgetrs.o \
- zggbak.o zggbal.o zgges.o zggesx.o zggev.o zggevx.o zggglm.o \
- zgghrd.o zgglse.o zggqrf.o zggrqf.o \
- zggsvd.o zggsvp.o \
- zgtcon.o zgtrfs.o zgtsv.o zgtsvx.o zgttrf.o zgttrs.o zgtts2.o zhbev.o \
- zhbevd.o zhbevx.o zhbgst.o zhbgv.o zhbgvd.o zhbgvx.o zhbtrd.o \
- zhecon.o zheev.o zheevd.o zheevr.o zheevx.o zhegs2.o zhegst.o \
- zhegv.o zhegvd.o zhegvx.o zherfs.o zhesv.o zhesvx.o zhetd2.o \
- zhetf2.o zhetrd.o \
- zhetrf.o zhetri.o zhetri2.o zhetri2x.o zheswapr.o \
- zhetrs.o zhetrs2.o zhgeqz.o zhpcon.o zhpev.o zhpevd.o \
- zhpevx.o zhpgst.o zhpgv.o zhpgvd.o zhpgvx.o zhprfs.o zhpsv.o \
- zhpsvx.o \
- zhptrd.o zhptrf.o zhptri.o zhptrs.o zhsein.o zhseqr.o zlabrd.o \
- zlacgv.o zlacon.o zlacn2.o zlacp2.o zlacpy.o zlacrm.o zlacrt.o zladiv.o \
- zlaed0.o zlaed7.o zlaed8.o \
- zlaein.o zlaesy.o zlaev2.o zlags2.o zlagtm.o \
- zlahef.o zlahqr.o \
- zlahrd.o zlahr2.o zlaic1.o zlals0.o zlalsa.o zlalsd.o zlangb.o zlange.o \
- zlangt.o zlanhb.o \
- zlanhe.o \
- zlanhp.o zlanhs.o zlanht.o zlansb.o zlansp.o zlansy.o zlantb.o \
- zlantp.o zlantr.o zlapll.o zlapmt.o zlaqgb.o zlaqge.o \
- zlaqhb.o zlaqhe.o zlaqhp.o zlaqp2.o zlaqps.o zlaqsb.o \
- zlaqr0.o zlaqr1.o zlaqr2.o zlaqr3.o zlaqr4.o zlaqr5.o \
- zlaqsp.o zlaqsy.o zlar1v.o zlar2v.o ilazlr.o ilazlc.o \
- zlarcm.o zlarf.o zlarfb.o \
- zlarfg.o zlarft.o zlarfgp.o \
- zlarfx.o zlargv.o zlarnv.o zlarrv.o zlartg.o zlartv.o \
- zlarz.o zlarzb.o zlarzt.o zlascl.o zlaset.o zlasr.o \
- zlassq.o zlaswp.o zlasyf.o \
- zlatbs.o zlatdf.o zlatps.o zlatrd.o zlatrs.o zlatrz.o zlatzm.o zlauu2.o \
- zlauum.o zpbcon.o zpbequ.o zpbrfs.o zpbstf.o zpbsv.o \
- zpbsvx.o zpbtf2.o zpbtrf.o zpbtrs.o zpocon.o zpoequ.o zporfs.o \
- zposv.o zposvx.o zpotf2.o zpotrf.o zpotri.o zpotrs.o zpstrf.o zpstf2.o \
- zppcon.o zppequ.o zpprfs.o zppsv.o zppsvx.o zpptrf.o zpptri.o zpptrs.o \
- zptcon.o zpteqr.o zptrfs.o zptsv.o zptsvx.o zpttrf.o zpttrs.o zptts2.o \
- zrot.o zspcon.o zspmv.o zspr.o zsprfs.o zspsv.o \
- zspsvx.o zsptrf.o zsptri.o zsptrs.o zdrscl.o zstedc.o \
- zstegr.o zstein.o zsteqr.o \
- zsycon.o zsymv.o \
- zsyr.o zsyrfs.o zsysv.o zsysvx.o zsytf2.o zsytrf.o zsytri.o zsytri2.o zsytri2x.o \
- zsyswapr.o zsytrs.o zsytrs2.o zsyconv.o \
- ztbcon.o ztbrfs.o ztbtrs.o ztgevc.o ztgex2.o \
- ztgexc.o ztgsen.o ztgsja.o ztgsna.o ztgsy2.o ztgsyl.o ztpcon.o \
- ztprfs.o ztptri.o \
- ztptrs.o ztrcon.o ztrevc.o ztrexc.o ztrrfs.o ztrsen.o ztrsna.o \
- ztrsyl.o ztrti2.o ztrtri.o ztrtrs.o ztzrqf.o ztzrzf.o zung2l.o \
- zung2r.o zungbr.o zunghr.o zungl2.o zunglq.o zungql.o zungqr.o zungr2.o \
- zungrq.o zungtr.o zunm2l.o zunm2r.o zunmbr.o zunmhr.o zunml2.o \
- zunmlq.o zunmql.o zunmqr.o zunmr2.o zunmr3.o zunmrq.o zunmrz.o \
- zunmtr.o zupgtr.o \
- zupmtr.o izmax1.o dzsum1.o zstemr.o \
- zcgesv.o zcposv.o zlag2c.o clag2z.o zlat2c.o \
- zhfrk.o ztfttp.o zlanhf.o zpftrf.o zpftri.o zpftrs.o ztfsm.o ztftri.o \
- ztfttr.o ztpttf.o ztpttr.o ztrttf.o ztrttp.o \
- zgeequb.o zgbequb.o zsyequb.o zpoequb.o zheequb.o \
- zbbcsd.o zlapmr.o zunbdb.o zuncsd.o \
- zgeqrt.o zgeqrt2.o zgeqrt3.o zgemqrt.o \
- ztpqrt.o ztpqrt2.o ztpmqrt.o ztprfb.o
+ zbdsqr.$(SUFFIX) zgbbrd.$(SUFFIX) zgbcon.$(SUFFIX) zgbequ.$(SUFFIX) zgbrfs.$(SUFFIX) zgbsv.$(SUFFIX) zgbsvx.$(SUFFIX) \
+ zgbtf2.$(SUFFIX) zgbtrf.$(SUFFIX) zgbtrs.$(SUFFIX) zgebak.$(SUFFIX) zgebal.$(SUFFIX) zgebd2.$(SUFFIX) zgebrd.$(SUFFIX) \
+ zgecon.$(SUFFIX) zgeequ.$(SUFFIX) zgees.$(SUFFIX) zgeesx.$(SUFFIX) zgeev.$(SUFFIX) zgeevx.$(SUFFIX) \
+ zgegs.$(SUFFIX) zgegv.$(SUFFIX) zgehd2.$(SUFFIX) zgehrd.$(SUFFIX) zgelq2.$(SUFFIX) zgelqf.$(SUFFIX) \
+ zgels.$(SUFFIX) zgelsd.$(SUFFIX) zgelss.$(SUFFIX) zgelsx.$(SUFFIX) zgelsy.$(SUFFIX) zgeql2.$(SUFFIX) zgeqlf.$(SUFFIX) zgeqp3.$(SUFFIX) \
+ zgeqpf.$(SUFFIX) zgeqr2.$(SUFFIX) zgeqr2p.$(SUFFIX) zgeqrf.$(SUFFIX) zgeqrfp.$(SUFFIX) zgerfs.$(SUFFIX) zgerq2.$(SUFFIX) zgerqf.$(SUFFIX) \
+ zgesc2.$(SUFFIX) zgesdd.$(SUFFIX) zgesv.$(SUFFIX) zgesvd.$(SUFFIX) zgesvx.$(SUFFIX) zgetc2.$(SUFFIX) \
+ zgetri.$(SUFFIX) \
+ zggbak.$(SUFFIX) zggbal.$(SUFFIX) zgges.$(SUFFIX) zggesx.$(SUFFIX) zggev.$(SUFFIX) zggevx.$(SUFFIX) zggglm.$(SUFFIX) \
+ zgghrd.$(SUFFIX) zgglse.$(SUFFIX) zggqrf.$(SUFFIX) zggrqf.$(SUFFIX) \
+ zggsvd.$(SUFFIX) zggsvp.$(SUFFIX) \
+ zgtcon.$(SUFFIX) zgtrfs.$(SUFFIX) zgtsv.$(SUFFIX) zgtsvx.$(SUFFIX) zgttrf.$(SUFFIX) zgttrs.$(SUFFIX) zgtts2.$(SUFFIX) zhbev.$(SUFFIX) \
+ zhbevd.$(SUFFIX) zhbevx.$(SUFFIX) zhbgst.$(SUFFIX) zhbgv.$(SUFFIX) zhbgvd.$(SUFFIX) zhbgvx.$(SUFFIX) zhbtrd.$(SUFFIX) \
+ zhecon.$(SUFFIX) zheev.$(SUFFIX) zheevd.$(SUFFIX) zheevr.$(SUFFIX) zheevx.$(SUFFIX) zhegs2.$(SUFFIX) zhegst.$(SUFFIX) \
+ zhegv.$(SUFFIX) zhegvd.$(SUFFIX) zhegvx.$(SUFFIX) zherfs.$(SUFFIX) zhesv.$(SUFFIX) zhesvx.$(SUFFIX) zhetd2.$(SUFFIX) \
+ zhetf2.$(SUFFIX) zhetrd.$(SUFFIX) \
+ zhetrf.$(SUFFIX) zhetri.$(SUFFIX) zhetri2.$(SUFFIX) zhetri2x.$(SUFFIX) zheswapr.$(SUFFIX) \
+ zhetrs.$(SUFFIX) zhetrs2.$(SUFFIX) zhgeqz.$(SUFFIX) zhpcon.$(SUFFIX) zhpev.$(SUFFIX) zhpevd.$(SUFFIX) \
+ zhpevx.$(SUFFIX) zhpgst.$(SUFFIX) zhpgv.$(SUFFIX) zhpgvd.$(SUFFIX) zhpgvx.$(SUFFIX) zhprfs.$(SUFFIX) zhpsv.$(SUFFIX) \
+ zhpsvx.$(SUFFIX) \
+ zhptrd.$(SUFFIX) zhptrf.$(SUFFIX) zhptri.$(SUFFIX) zhptrs.$(SUFFIX) zhsein.$(SUFFIX) zhseqr.$(SUFFIX) zlabrd.$(SUFFIX) \
+ zlacgv.$(SUFFIX) zlacon.$(SUFFIX) zlacn2.$(SUFFIX) zlacp2.$(SUFFIX) zlacpy.$(SUFFIX) zlacrm.$(SUFFIX) zlacrt.$(SUFFIX) zladiv.$(SUFFIX) \
+ zlaed0.$(SUFFIX) zlaed7.$(SUFFIX) zlaed8.$(SUFFIX) \
+ zlaein.$(SUFFIX) zlaesy.$(SUFFIX) zlaev2.$(SUFFIX) zlags2.$(SUFFIX) zlagtm.$(SUFFIX) \
+ zlahef.$(SUFFIX) zlahqr.$(SUFFIX) \
+ zlahrd.$(SUFFIX) zlahr2.$(SUFFIX) zlaic1.$(SUFFIX) zlals0.$(SUFFIX) zlalsa.$(SUFFIX) zlalsd.$(SUFFIX) zlangb.$(SUFFIX) zlange.$(SUFFIX) \
+ zlangt.$(SUFFIX) zlanhb.$(SUFFIX) \
+ zlanhe.$(SUFFIX) \
+ zlanhp.$(SUFFIX) zlanhs.$(SUFFIX) zlanht.$(SUFFIX) zlansb.$(SUFFIX) zlansp.$(SUFFIX) zlansy.$(SUFFIX) zlantb.$(SUFFIX) \
+ zlantp.$(SUFFIX) zlantr.$(SUFFIX) zlapll.$(SUFFIX) zlapmt.$(SUFFIX) zlaqgb.$(SUFFIX) zlaqge.$(SUFFIX) \
+ zlaqhb.$(SUFFIX) zlaqhe.$(SUFFIX) zlaqhp.$(SUFFIX) zlaqp2.$(SUFFIX) zlaqps.$(SUFFIX) zlaqsb.$(SUFFIX) \
+ zlaqr0.$(SUFFIX) zlaqr1.$(SUFFIX) zlaqr2.$(SUFFIX) zlaqr3.$(SUFFIX) zlaqr4.$(SUFFIX) zlaqr5.$(SUFFIX) \
+ zlaqsp.$(SUFFIX) zlaqsy.$(SUFFIX) zlar1v.$(SUFFIX) zlar2v.$(SUFFIX) ilazlr.$(SUFFIX) ilazlc.$(SUFFIX) \
+ zlarcm.$(SUFFIX) zlarf.$(SUFFIX) zlarfb.$(SUFFIX) \
+ zlarfg.$(SUFFIX) zlarft.$(SUFFIX) zlarfgp.$(SUFFIX) \
+ zlarfx.$(SUFFIX) zlargv.$(SUFFIX) zlarnv.$(SUFFIX) zlarrv.$(SUFFIX) zlartg.$(SUFFIX) zlartv.$(SUFFIX) \
+ zlarz.$(SUFFIX) zlarzb.$(SUFFIX) zlarzt.$(SUFFIX) zlascl.$(SUFFIX) zlaset.$(SUFFIX) zlasr.$(SUFFIX) \
+ zlassq.$(SUFFIX) zlasyf.$(SUFFIX) \
+ zlatbs.$(SUFFIX) zlatdf.$(SUFFIX) zlatps.$(SUFFIX) zlatrd.$(SUFFIX) zlatrs.$(SUFFIX) zlatrz.$(SUFFIX) zlatzm.$(SUFFIX) zlauu2.$(SUFFIX) \
+ zpbcon.$(SUFFIX) zpbequ.$(SUFFIX) zpbrfs.$(SUFFIX) zpbstf.$(SUFFIX) zpbsv.$(SUFFIX) \
+ zpbsvx.$(SUFFIX) zpbtf2.$(SUFFIX) zpbtrf.$(SUFFIX) zpbtrs.$(SUFFIX) zpocon.$(SUFFIX) zpoequ.$(SUFFIX) zporfs.$(SUFFIX) \
+ zposv.$(SUFFIX) zposvx.$(SUFFIX) zpotri.$(SUFFIX) zpotrs.$(SUFFIX) zpstrf.$(SUFFIX) zpstf2.$(SUFFIX) \
+ zppcon.$(SUFFIX) zppequ.$(SUFFIX) zpprfs.$(SUFFIX) zppsv.$(SUFFIX) zppsvx.$(SUFFIX) zpptrf.$(SUFFIX) zpptri.$(SUFFIX) zpptrs.$(SUFFIX) \
+ zptcon.$(SUFFIX) zpteqr.$(SUFFIX) zptrfs.$(SUFFIX) zptsv.$(SUFFIX) zptsvx.$(SUFFIX) zpttrf.$(SUFFIX) zpttrs.$(SUFFIX) zptts2.$(SUFFIX) \
+ zrot.$(SUFFIX) zspcon.$(SUFFIX) zspmv.$(SUFFIX) zspr.$(SUFFIX) zsprfs.$(SUFFIX) zspsv.$(SUFFIX) \
+ zspsvx.$(SUFFIX) zsptrf.$(SUFFIX) zsptri.$(SUFFIX) zsptrs.$(SUFFIX) zdrscl.$(SUFFIX) zstedc.$(SUFFIX) \
+ zstegr.$(SUFFIX) zstein.$(SUFFIX) zsteqr.$(SUFFIX) \
+ zsycon.$(SUFFIX) zsymv.$(SUFFIX) \
+ zsyr.$(SUFFIX) zsyrfs.$(SUFFIX) zsysv.$(SUFFIX) zsysvx.$(SUFFIX) zsytf2.$(SUFFIX) zsytrf.$(SUFFIX) zsytri.$(SUFFIX) zsytri2.$(SUFFIX) zsytri2x.$(SUFFIX) \
+ zsyswapr.$(SUFFIX) zsytrs.$(SUFFIX) zsytrs2.$(SUFFIX) zsyconv.$(SUFFIX) \
+ ztbcon.$(SUFFIX) ztbrfs.$(SUFFIX) ztbtrs.$(SUFFIX) ztgevc.$(SUFFIX) ztgex2.$(SUFFIX) \
+ ztgexc.$(SUFFIX) ztgsen.$(SUFFIX) ztgsja.$(SUFFIX) ztgsna.$(SUFFIX) ztgsy2.$(SUFFIX) ztgsyl.$(SUFFIX) ztpcon.$(SUFFIX) \
+ ztprfs.$(SUFFIX) ztptri.$(SUFFIX) \
+ ztptrs.$(SUFFIX) ztrcon.$(SUFFIX) ztrevc.$(SUFFIX) ztrexc.$(SUFFIX) ztrrfs.$(SUFFIX) ztrsen.$(SUFFIX) ztrsna.$(SUFFIX) \
+ ztrsyl.$(SUFFIX) ztrtrs.$(SUFFIX) ztzrqf.$(SUFFIX) ztzrzf.$(SUFFIX) zung2l.$(SUFFIX) \
+ zung2r.$(SUFFIX) zungbr.$(SUFFIX) zunghr.$(SUFFIX) zungl2.$(SUFFIX) zunglq.$(SUFFIX) zungql.$(SUFFIX) zungqr.$(SUFFIX) zungr2.$(SUFFIX) \
+ zungrq.$(SUFFIX) zungtr.$(SUFFIX) zunm2l.$(SUFFIX) zunm2r.$(SUFFIX) zunmbr.$(SUFFIX) zunmhr.$(SUFFIX) zunml2.$(SUFFIX) \
+ zunmlq.$(SUFFIX) zunmql.$(SUFFIX) zunmqr.$(SUFFIX) zunmr2.$(SUFFIX) zunmr3.$(SUFFIX) zunmrq.$(SUFFIX) zunmrz.$(SUFFIX) \
+ zunmtr.$(SUFFIX) zupgtr.$(SUFFIX) \
+ zupmtr.$(SUFFIX) izmax1.$(SUFFIX) dzsum1.$(SUFFIX) zstemr.$(SUFFIX) \
+ zcgesv.$(SUFFIX) zcposv.$(SUFFIX) zlag2c.$(SUFFIX) clag2z.$(SUFFIX) zlat2c.$(SUFFIX) \
+ zhfrk.$(SUFFIX) ztfttp.$(SUFFIX) zlanhf.$(SUFFIX) zpftrf.$(SUFFIX) zpftri.$(SUFFIX) zpftrs.$(SUFFIX) ztfsm.$(SUFFIX) ztftri.$(SUFFIX) \
+ ztfttr.$(SUFFIX) ztpttf.$(SUFFIX) ztpttr.$(SUFFIX) ztrttf.$(SUFFIX) ztrttp.$(SUFFIX) \
+ zgeequb.$(SUFFIX) zgbequb.$(SUFFIX) zsyequb.$(SUFFIX) zpoequb.$(SUFFIX) zheequb.$(SUFFIX) \
+ zbbcsd.$(SUFFIX) zlapmr.$(SUFFIX) zunbdb.$(SUFFIX) zuncsd.$(SUFFIX) \
+ zgeqrt.$(SUFFIX) zgeqrt2.$(SUFFIX) zgeqrt3.$(SUFFIX) zgemqrt.$(SUFFIX) \
+ ztpqrt.$(SUFFIX) ztpqrt2.$(SUFFIX) ztpmqrt.$(SUFFIX) ztprfb.$(SUFFIX)
ifdef USEXBLAS
-ZXLASRC = zgesvxx.o zgerfsx.o zla_gerfsx_extended.o zla_geamv.o \
- zla_gercond_c.o zla_gercond_x.o zla_gerpvgrw.o zsysvxx.o zsyrfsx.o \
- zla_syrfsx_extended.o zla_syamv.o zla_syrcond_c.o zla_syrcond_x.o \
- zla_syrpvgrw.o zposvxx.o zporfsx.o zla_porfsx_extended.o \
- zla_porcond_c.o zla_porcond_x.o zla_porpvgrw.o zgbsvxx.o zgbrfsx.o \
- zla_gbrfsx_extended.o zla_gbamv.o zla_gbrcond_c.o zla_gbrcond_x.o \
- zla_gbrpvgrw.o zhesvxx.o zherfsx.o zla_herfsx_extended.o \
- zla_heamv.o zla_hercond_c.o zla_hercond_x.o zla_herpvgrw.o \
- zla_lin_berr.o zlarscl2.o zlascl2.o zla_wwaddw.o
+ZXLASRC = zgesvxx.$(SUFFIX) zgerfsx.$(SUFFIX) zla_gerfsx_extended.$(SUFFIX) zla_geamv.$(SUFFIX) \
+ zla_gercond_c.$(SUFFIX) zla_gercond_x.$(SUFFIX) zla_gerpvgrw.$(SUFFIX) zsysvxx.$(SUFFIX) zsyrfsx.$(SUFFIX) \
+ zla_syrfsx_extended.$(SUFFIX) zla_syamv.$(SUFFIX) zla_syrcond_c.$(SUFFIX) zla_syrcond_x.$(SUFFIX) \
+ zla_syrpvgrw.$(SUFFIX) zposvxx.$(SUFFIX) zporfsx.$(SUFFIX) zla_porfsx_extended.$(SUFFIX) \
+ zla_porcond_c.$(SUFFIX) zla_porcond_x.$(SUFFIX) zla_porpvgrw.$(SUFFIX) zgbsvxx.$(SUFFIX) zgbrfsx.$(SUFFIX) \
+ zla_gbrfsx_extended.$(SUFFIX) zla_gbamv.$(SUFFIX) zla_gbrcond_c.$(SUFFIX) zla_gbrcond_x.$(SUFFIX) \
+ zla_gbrpvgrw.$(SUFFIX) zhesvxx.$(SUFFIX) zherfsx.$(SUFFIX) zla_herfsx_extended.$(SUFFIX) \
+ zla_heamv.$(SUFFIX) zla_hercond_c.$(SUFFIX) zla_hercond_x.$(SUFFIX) zla_herpvgrw.$(SUFFIX) \
+ zla_lin_berr.$(SUFFIX) zlarscl2.$(SUFFIX) zlascl2.$(SUFFIX) zla_wwaddw.$(SUFFIX)
endif
ALLOBJ = $(SLASRC) $(DLASRC) $(DSLASRC) $(CLASRC) $(ZLASRC) $(ZCLASRC) \
$(SCLAUX) $(DZLAUX) $(ALLAUX)
+ALLOBJ_P = $(ALLOBJ:.$(SUFFIX)=.$(PSUFFIX))
+
ifdef USEXBLAS
ALLXOBJ = $(SXLASRC) $(DXLASRC) $(CXLASRC) $(ZXLASRC)
endif
all: ../$(LAPACKLIB)
+lapack_prof: ../$(LAPACKLIB_P)
+
../$(LAPACKLIB): $(ALLOBJ) $(ALLXOBJ)
$(ARCH) $(ARCHFLAGS) $@ $(ALLOBJ) $(ALLXOBJ)
$(RANLIB) $@
+../$(LAPACKLIB_P): $(ALLOBJ_P)
+ $(ARCH) $(ARCHFLAGS) $@ $(ALLOBJ_P)
+ $(RANLIB) $@
+
single: $(SLASRC) $(DSLASRC) $(SXLASRC) $(SCLAUX) $(ALLAUX)
$(ARCH) $(ARCHFLAGS) ../$(LAPACKLIB) $(SLASRC) $(DSLASRC) \
$(SXLASRC) $(SCLAUX) $(ALLAUX) $(ALLXAUX)
@@ -451,15 +459,24 @@
@FRC=$(FRC)
clean:
- rm -f *.o
+ rm -f *.$(SUFFIX) *.$(PSUFFIX)
-.f.o:
+%.$(SUFFIX): %.f
$(FORTRAN) $(OPTS) -c $< -o $@
-slaruv.o: slaruv.f ; $(FORTRAN) $(NOOPT) -c $< -o $@
-dlaruv.o: dlaruv.f ; $(FORTRAN) $(NOOPT) -c $< -o $@
-sla_wwaddw.o: sla_wwaddw.f ; $(FORTRAN) $(NOOPT) -c $< -o $@
-dla_wwaddw.o: dla_wwaddw.f ; $(FORTRAN) $(NOOPT) -c $< -o $@
-cla_wwaddw.o: cla_wwaddw.f ; $(FORTRAN) $(NOOPT) -c $< -o $@
-zla_wwaddw.o: zla_wwaddw.f ; $(FORTRAN) $(NOOPT) -c $< -o $@
+%.$(PSUFFIX): %.f
+ $(FORTRAN) $(POPTS) -c $< -o $@
+slaruv.$(SUFFIX): slaruv.f ; $(FORTRAN) $(NOOPT) -O0 -c $< -o $@
+dlaruv.$(SUFFIX): dlaruv.f ; $(FORTRAN) $(NOOPT) -O0 -c $< -o $@
+sla_wwaddw.$(SUFFIX): sla_wwaddw.f ; $(FORTRAN) $(NOOPT) -O0 -c $< -o $@
+dla_wwaddw.$(SUFFIX): dla_wwaddw.f ; $(FORTRAN) $(NOOPT) -O0 -c $< -o $@
+cla_wwaddw.$(SUFFIX): cla_wwaddw.f ; $(FORTRAN) $(NOOPT) -O0 -c $< -o $@
+zla_wwaddw.$(SUFFIX): zla_wwaddw.f ; $(FORTRAN) $(NOOPT) -O0 -c $< -o $@
+
+slaruv.$(PSUFFIX): slaruv.f ; $(FORTRAN) $(PNOOPT) -O0 -c $< -o $@
+dlaruv.$(PSUFFIX): dlaruv.f ; $(FORTRAN) $(PNOOPT) -O0 -c $< -o $@
+sla_wwaddw.$(PSUFFIX): sla_wwaddw.f ; $(FORTRAN) $(PNOOPT) -O0 -c $< -o $@
+dla_wwaddw.$(PSUFFIX): dla_wwaddw.f ; $(FORTRAN) $(PNOOPT) -O0 -c $< -o $@
+cla_wwaddw.$(PSUFFIX): cla_wwaddw.f ; $(FORTRAN) $(PNOOPT) -O0 -c $< -o $@
+zla_wwaddw.$(PSUFFIX): zla_wwaddw.f ; $(FORTRAN) $(PNOOPT) -O0 -c $< -o $@
diff -ruN lapack-3.4.1.old/TESTING/EIG/Makefile lapack-3.4.1/TESTING/EIG/Makefile
--- lapack-3.4.1.old/TESTING/EIG/Makefile 2011-09-26 23:52:31 +0200
+++ lapack-3.4.1/TESTING/EIG/Makefile 2012-04-22 21:41:45 +0200
@@ -78,7 +78,7 @@
cget35.o cget36.o cget37.o cget38.o cget51.o cget52.o \
cget54.o cglmts.o cgqrts.o cgrqts.o cgsvts.o \
chbt21.o chet21.o chet22.o chpt21.o chst01.o \
- clarfy.o clarhs.o clatm4.o clctes.o clctsx.o clsets.o csbmv.o \
+ clarfy.o clarhs.o clatm4.o clctes.o clctsx.o clsets.o \
csgt01.o cslect.o \
cstt21.o cstt22.o cunt01.o cunt03.o
@@ -115,7 +115,7 @@
zget35.o zget36.o zget37.o zget38.o zget51.o zget52.o \
zget54.o zglmts.o zgqrts.o zgrqts.o zgsvts.o \
zhbt21.o zhet21.o zhet22.o zhpt21.o zhst01.o \
- zlarfy.o zlarhs.o zlatm4.o zlctes.o zlctsx.o zlsets.o zsbmv.o \
+ zlarfy.o zlarhs.o zlatm4.o zlctes.o zlctsx.o zlsets.o \
zsgt01.o zslect.o \
zstt21.o zstt22.o zunt01.o zunt03.o
@@ -129,22 +129,22 @@
../xeigtsts: $(SEIGTST) $(SCIGTST) $(AEIGTST) ../../$(LAPACKLIB); \
$(LOADER) $(LOADOPTS) -o xeigtsts \
$(SEIGTST) $(SCIGTST) $(AEIGTST) ../../$(TMGLIB) \
- ../../$(LAPACKLIB) $(BLASLIB) && mv xeigtsts $@
+ ../../$(LAPACKLIB) $(BLASLIB) $(CEXTRALIB) && mv xeigtsts $@
../xeigtstc: $(CEIGTST) $(SCIGTST) $(AEIGTST) ../../$(LAPACKLIB); \
$(LOADER) $(LOADOPTS) -o xeigtstc \
$(CEIGTST) $(SCIGTST) $(AEIGTST) ../../$(TMGLIB) \
- ../../$(LAPACKLIB) $(BLASLIB) && mv xeigtstc $@
+ ../../$(LAPACKLIB) $(BLASLIB) $(CEXTRALIB) && mv xeigtstc $@
../xeigtstd: $(DEIGTST) $(DZIGTST) $(AEIGTST) ../../$(LAPACKLIB); \
$(LOADER) $(LOADOPTS) -o xeigtstd \
$(DEIGTST) $(DZIGTST) $(AEIGTST) ../../$(TMGLIB) \
- ../../$(LAPACKLIB) $(BLASLIB) && mv xeigtstd $@
+ ../../$(LAPACKLIB) $(BLASLIB) $(CEXTRALIB) && mv xeigtstd $@
../xeigtstz: $(ZEIGTST) $(DZIGTST) $(AEIGTST) ../../$(LAPACKLIB); \
$(LOADER) $(LOADOPTS) -o xeigtstz \
$(ZEIGTST) $(DZIGTST) $(AEIGTST) ../../$(TMGLIB) \
- ../../$(LAPACKLIB) $(BLASLIB) && mv xeigtstz $@
+ ../../$(LAPACKLIB) $(BLASLIB) $(CEXTRALIB) && mv xeigtstz $@
$(AEIGTST): $(FRC)
$(SCIGTST): $(FRC)
diff -ruN lapack-3.4.1.old/TESTING/LIN/Makefile lapack-3.4.1/TESTING/LIN/Makefile
--- lapack-3.4.1.old/TESTING/LIN/Makefile 2012-04-02 21:06:36 +0200
+++ lapack-3.4.1/TESTING/LIN/Makefile 2012-04-22 21:43:30 +0200
@@ -109,7 +109,7 @@
cqpt01.o cqrt01.o cqrt01p.o cqrt02.o cqrt03.o cqrt11.o \
cqrt12.o cqrt13.o cqrt14.o cqrt15.o cqrt16.o \
cqrt17.o crqt01.o crqt02.o crqt03.o crzt01.o crzt02.o \
- csbmv.o cspt01.o \
+ cspt01.o \
cspt02.o cspt03.o csyt01.o csyt02.o csyt03.o \
ctbt02.o ctbt03.o ctbt05.o ctbt06.o ctpt01.o \
ctpt02.o ctpt03.o ctpt05.o ctpt06.o ctrt01.o \
@@ -188,7 +188,7 @@
zqpt01.o zqrt01.o zqrt01p.o zqrt02.o zqrt03.o zqrt11.o \
zqrt12.o zqrt13.o zqrt14.o zqrt15.o zqrt16.o \
zqrt17.o zrqt01.o zrqt02.o zrqt03.o zrzt01.o zrzt02.o \
- zsbmv.o zspt01.o \
+ zspt01.o \
zspt02.o zspt03.o zsyt01.o zsyt02.o zsyt03.o \
ztbt02.o ztbt03.o ztbt05.o ztbt06.o ztpt01.o \
ztpt02.o ztpt03.o ztpt05.o ztpt06.o ztrt01.o \
@@ -214,7 +214,7 @@
zdrvab.o zdrvac.o zerrab.o zerrac.o zget08.o \
alaerh.o alahd.o aladhd.o alareq.o \
chkxer.o zget02.o zlarhs.o zlatb4.o \
- zsbmv.o xerbla.o zpot06.o zlaipd.o
+ xerbla.o zpot06.o zlaipd.o
SLINTSTRFP = schkrfp.o sdrvrfp.o sdrvrf1.o sdrvrf2.o sdrvrf3.o sdrvrf4.o serrrfp.o \
slatb4.o slarhs.o sget04.o spot01.o spot03.o spot02.o \
@@ -225,11 +225,11 @@
chkxer.o xerbla.o alaerh.o aladhd.o alahd.o alasvm.o
CLINTSTRFP = cchkrfp.o cdrvrfp.o cdrvrf1.o cdrvrf2.o cdrvrf3.o cdrvrf4.o cerrrfp.o \
- claipd.o clatb4.o clarhs.o csbmv.o cget04.o cpot01.o cpot03.o cpot02.o \
+ claipd.o clatb4.o clarhs.o cget04.o cpot01.o cpot03.o cpot02.o \
chkxer.o xerbla.o alaerh.o aladhd.o alahd.o alasvm.o
ZLINTSTRFP = zchkrfp.o zdrvrfp.o zdrvrf1.o zdrvrf2.o zdrvrf3.o zdrvrf4.o zerrrfp.o \
- zlatb4.o zlaipd.o zlarhs.o zsbmv.o zget04.o zpot01.o zpot03.o zpot02.o \
+ zlatb4.o zlaipd.o zlarhs.o zget04.o zpot01.o zpot03.o zpot02.o \
chkxer.o xerbla.o alaerh.o aladhd.o alahd.o alasvm.o
all: single double complex complex16 proto-single proto-double proto-complex proto-complex16
@@ -246,43 +246,43 @@
xlintsts : $(ALINTST) $(SLINTST) $(SCLNTST) ../../$(LAPACKLIB)
$(LOADER) $(LOADOPTS) $(ALINTST) $(SCLNTST) $(SLINTST) \
- ../../$(TMGLIB) ../../$(LAPACKLIB) $(XBLASLIB) $(BLASLIB) -o $@
+ ../../$(TMGLIB) ../../$(LAPACKLIB) $(XBLASLIB) $(BLASLIB) -o $@ $(CEXTRALIB)
xlintstc : $(ALINTST) $(CLINTST) $(SCLNTST) ../../$(LAPACKLIB)
$(LOADER) $(LOADOPTS) $(ALINTST) $(SCLNTST) $(CLINTST) \
- ../../$(TMGLIB) ../../$(LAPACKLIB) $(XBLASLIB) $(BLASLIB) -o $@
+ ../../$(TMGLIB) ../../$(LAPACKLIB) $(XBLASLIB) $(BLASLIB) -o $@ $(CEXTRALIB)
xlintstd : $(ALINTST) $(DLINTST) $(DZLNTST) ../../$(LAPACKLIB)
$(LOADER) $(LOADOPTS) $^ \
- ../../$(TMGLIB) ../../$(LAPACKLIB) $(XBLASLIB) $(BLASLIB) -o $@
+ ../../$(TMGLIB) ../../$(LAPACKLIB) $(XBLASLIB) $(BLASLIB) -o $@ $(CEXTRALIB)
xlintstz : $(ALINTST) $(ZLINTST) $(DZLNTST) ../../$(LAPACKLIB)
$(LOADER) $(LOADOPTS) $(ALINTST) $(DZLNTST) $(ZLINTST) \
- ../../$(TMGLIB) ../../$(LAPACKLIB) $(XBLASLIB) $(BLASLIB) -o $@
+ ../../$(TMGLIB) ../../$(LAPACKLIB) $(XBLASLIB) $(BLASLIB) -o $@ $(CEXTRALIB)
xlintstds : $(DSLINTST) ../../$(LAPACKLIB)
$(LOADER) $(LOADOPTS) $(DSLINTST) \
- ../../$(TMGLIB) ../../$(LAPACKLIB) $(BLASLIB) -o $@
+ ../../$(TMGLIB) ../../$(LAPACKLIB) $(BLASLIB) -o $@ $(CEXTRALIB)
xlintstzc : $(ZCLINTST) ../../$(LAPACKLIB)
$(LOADER) $(LOADOPTS) $(ZCLINTST) \
- ../../$(TMGLIB) ../../$(LAPACKLIB) $(BLASLIB) -o $@
+ ../../$(TMGLIB) ../../$(LAPACKLIB) $(BLASLIB) -o $@ $(CEXTRALIB)
xlintstrfs : $(SLINTSTRFP) ../../$(LAPACKLIB)
$(LOADER) $(LOADOPTS) $(SLINTSTRFP) \
- ../../$(TMGLIB) ../../$(LAPACKLIB) $(BLASLIB) -o $@
+ ../../$(TMGLIB) ../../$(LAPACKLIB) $(BLASLIB) -o $@ $(CEXTRALIB)
xlintstrfd : $(DLINTSTRFP) ../../$(LAPACKLIB)
$(LOADER) $(LOADOPTS) $(DLINTSTRFP) \
- ../../$(TMGLIB) ../../$(LAPACKLIB) $(BLASLIB) -o $@
+ ../../$(TMGLIB) ../../$(LAPACKLIB) $(BLASLIB) -o $@ $(CEXTRALIB)
xlintstrfc : $(CLINTSTRFP) ../../$(LAPACKLIB)
$(LOADER) $(LOADOPTS) $(CLINTSTRFP) \
- ../../$(TMGLIB) ../../$(LAPACKLIB) $(BLASLIB) -o $@
+ ../../$(TMGLIB) ../../$(LAPACKLIB) $(BLASLIB) -o $@ $(CEXTRALIB)
xlintstrfz : $(ZLINTSTRFP) ../../$(LAPACKLIB)
$(LOADER) $(LOADOPTS) $(ZLINTSTRFP) \
- ../../$(TMGLIB) ../../$(LAPACKLIB) $(BLASLIB) -o $@
+ ../../$(TMGLIB) ../../$(LAPACKLIB) $(BLASLIB) -o $@ $(CEXTRALIB)
../xlintsts: xlintsts
mv xlintsts $@
diff -ruN lapack-3.4.1.old/lapacke/src/Makefile lapack-3.4.1/lapacke/src/Makefile
--- lapack-3.4.1.old/lapacke/src/Makefile 2012-04-02 22:16:32 +0200
+++ lapack-3.4.1/lapacke/src/Makefile 2012-04-22 21:38:38 +0200
@@ -2040,19 +2040,21 @@
lapacke_zlagsy.o \
lapacke_zlagsy_work.o
-ALLOBJ = $(SRC_OBJ) $(MATGEN_OBJ)
+OBJ_FILES := $(SRC_OBJ)
-ifdef USEXBLAS
-ALLXOBJ = $(SXLASRC) $(DXLASRC) $(CXLASRC) $(ZXLASRC)
+ifdef LAPACKE_EXTENDED
+OBJ_FILES += $(SXLASRC) $(DXLASRC) $(CXLASRC) $(ZXLASRC)
endif
-
-OBJ_FILES := $(C_FILES:.o=.o)
+ifdef LAPACKE_TESTING
+OBJ_FILES += $(MATGEN_OBJ)
+endif
all: ../../$(LAPACKELIB)
-../../$(LAPACKELIB): $(ALLOBJ) $(ALLXOBJ)
- $(ARCH) $(ARCHFLAGS) ../../$(LAPACKELIB) $(ALLOBJ) $(ALLXOBJ)
+../../$(LAPACKELIB): $(OBJ_FILES)
+# http://hackage.haskell.org/trac/gtk2hs/ticket/1146
+ echo $(OBJ_FILES) | xargs -n 100 $(ARCH) $(ARCHFLAGS) ../../$(LAPACKELIB)
$(RANLIB) ../../$(LAPACKELIB)
.c.o:

File diff suppressed because it is too large Load Diff

View File

@@ -1,4 +1,4 @@
#!/bin/bash
echo " Please read https://github.com/xianyi/OpenBLAS/wiki/How-to-use-OpenBLAS-in-Microsoft-Visual-Studio "
make BINARY=64 CC=x86_64-w64-mingw32-gcc FC=x86_64-w64-mingw32-gfortran
make BINARY=64 CC=gcc FC=gfortran

View File

@@ -11,7 +11,7 @@ CUNIT_LIB=$(CUNIT_DIR)/lib/libcunit.a
CFLAGS+=-I$(CUNIT_DIR)/include
OBJS=main.o test_rot.o test_swap.o test_axpy.o test_dotu.o test_rotmg.o test_dsdot.o test_amax.o
OBJS=main.o test_rot.o test_swap.o test_axpy.o test_dotu.o test_rotmg.o test_dsdot.o test_amax.o test_fork.o
all : run_test

View File

@@ -63,4 +63,6 @@ void test_dsdot_n_1(void);
void test_samax(void);
void test_fork_safety(void);
#endif

View File

@@ -60,6 +60,14 @@ CU_TestInfo test_level1[]={
{"Testing dsdot with n == 1",test_dsdot_n_1},
{"Testing samax", test_samax},
#if !defined(USE_OPENMP) && !defined(OS_WINDOWS)
// The GNU OpenMP implementation libgomp is not fork-safe (as of 4.8.2):
// http://gcc.gnu.org/bugzilla/show_bug.cgi?id=60035
// Hence skip this test when OpenBLAS is built with OpenMP.
{"Testing fork safety", test_fork_safety},
#endif
CU_TEST_INFO_NULL,
};

123
utest/test_fork.c Normal file
View File

@@ -0,0 +1,123 @@
/*****************************************************************************
Copyright (c) 2014, Lab of Parallel Software and Computational Science,ICSAS
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
3. Neither the name of the ISCAS nor the names of its contributors may
be used to endorse or promote products derived from this software
without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
**********************************************************************************/
#ifndef OS_WINDOWS
#include "common_utest.h"
#include <sys/wait.h>
#include <cblas.h>
void* xmalloc(size_t n)
{
void* tmp;
tmp = malloc(n);
if (tmp == NULL) {
fprintf(stderr, "You are about to die\n");
exit(1);
} else {
return tmp;
}
}
void check_dgemm(double *a, double *b, double *result, double *expected, int n)
{
int i;
cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, n, n, n,
1.0, a, n, b, n, 0.0, result, n);
for(i = 0; i < n * n; ++i) {
CU_ASSERT_DOUBLE_EQUAL(expected[i], result[i], CHECK_EPS);
}
}
void test_fork_safety(void)
{
int n = 1000;
int i;
double *a, *b, *c, *d;
size_t n_bytes;
pid_t fork_pid;
pid_t fork_pid_nested;
n_bytes = sizeof(*a) * n * n;
a = xmalloc(n_bytes);
b = xmalloc(n_bytes);
c = xmalloc(n_bytes);
d = xmalloc(n_bytes);
// Put ones in a and b
for(i = 0; i < n * n; ++i) {
a[i] = 1;
b[i] = 1;
}
// Compute a DGEMM product in the parent process prior to forking to
// ensure that the OpenBLAS thread pool is initialized.
cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, n, n, n,
1.0, a, n, b, n, 0.0, c, n);
fork_pid = fork();
if (fork_pid == -1) {
CU_FAIL("Failed to fork process.");
} else if (fork_pid == 0) {
// Compute a DGEMM product in the child process to check that the
// thread pool as been properly been reinitialized after the fork.
check_dgemm(a, b, d, c, n);
// Nested fork to check that the pthread_atfork protection can work
// recursively
fork_pid_nested = fork();
if (fork_pid_nested == -1) {
CU_FAIL("Failed to fork process.");
exit(1);
} else if (fork_pid_nested == 0) {
check_dgemm(a, b, d, c, n);
exit(0);
} else {
check_dgemm(a, b, d, c, n);
int child_status = 0;
pid_t wait_pid = wait(&child_status);
CU_ASSERT(wait_pid == fork_pid_nested);
CU_ASSERT(WEXITSTATUS (child_status) == 0);
exit(0);
}
} else {
check_dgemm(a, b, d, c, n);
// Wait for the child to finish and check the exit code.
int child_status = 0;
pid_t wait_pid = wait(&child_status);
CU_ASSERT(wait_pid == fork_pid);
CU_ASSERT(WEXITSTATUS (child_status) == 0);
}
}
#endif