Compare commits
528 Commits
piledriver
...
v0.2.14
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
d0c51c4de9 | ||
|
|
a3491e1e88 | ||
|
|
e81a5d61e4 | ||
|
|
c674fa32be | ||
|
|
e34911a73d | ||
|
|
76dcaf2281 | ||
|
|
770fac92eb | ||
|
|
e95d64333a | ||
|
|
75c40bcc48 | ||
|
|
b62f9f4120 | ||
|
|
b6438dedea | ||
|
|
cdefdb21cd | ||
|
|
ea7f9dacf4 | ||
|
|
bf5dbb7e2a | ||
|
|
39cc6b21d3 | ||
|
|
771b18ae9c | ||
|
|
cfa9392ffa | ||
|
|
1ccd57ce80 | ||
|
|
65a847cd36 | ||
|
|
07ff001981 | ||
|
|
b17ccb4c5c | ||
|
|
63c6fcfa0a | ||
|
|
29cb47fc06 | ||
|
|
4e6c4046f7 | ||
|
|
229ce2ccd1 | ||
|
|
ef75be0e51 | ||
|
|
5344f335a8 | ||
|
|
5cb5af9333 | ||
|
|
41aad0407f | ||
|
|
f8f2e84659 | ||
|
|
34633fef01 | ||
|
|
ddf983d643 | ||
|
|
17b9db20f1 | ||
|
|
0dc559ed30 | ||
|
|
9566f5fdb0 | ||
|
|
4319769b79 | ||
|
|
e9d9a8eae3 | ||
|
|
cbb3ab80e7 | ||
|
|
cd9868b1b4 | ||
|
|
eb738148fe | ||
|
|
587e16fba3 | ||
|
|
4de7b9ae47 | ||
|
|
887aed634d | ||
|
|
6261342de3 | ||
|
|
1e566223ed | ||
|
|
113b48ca22 | ||
|
|
3e81c99b6b | ||
|
|
ec85c4a51d | ||
|
|
97de657d38 | ||
|
|
71966eba6c | ||
|
|
a359979e17 | ||
|
|
7a6a141bc4 | ||
|
|
b8ff6892f6 | ||
|
|
8fe7a9ce6f | ||
|
|
bc5fff7085 | ||
|
|
51ce5ef447 | ||
|
|
1943ea91a8 | ||
|
|
37aee1f9b1 | ||
|
|
f5424fc9de | ||
|
|
0cf29ba6d2 | ||
|
|
50e18033e6 | ||
|
|
551b55d1c7 | ||
|
|
271ceb8bae | ||
|
|
5f846be2e4 | ||
|
|
fe7dcf98f3 | ||
|
|
2fb02626da | ||
|
|
a85c2785ae | ||
|
|
4806715c97 | ||
|
|
58c90d5937 | ||
|
|
2987bc7b40 | ||
|
|
695e0fa649 | ||
|
|
cbb23c46c2 | ||
|
|
0b4602b753 | ||
|
|
7e4e195e82 | ||
|
|
ac5a7e1c1b | ||
|
|
f1b9a4a1ca | ||
|
|
ae6b7caf32 | ||
|
|
f446d2368a | ||
|
|
dab4edd069 | ||
|
|
9d7057366d | ||
|
|
7f234f8ed1 | ||
|
|
9e829ce98f | ||
|
|
d49fd33885 | ||
|
|
f0f9b25bb6 | ||
|
|
7aae4a62e7 | ||
|
|
7a911569b8 | ||
|
|
466bfb8b86 | ||
|
|
70d1ba09b2 | ||
|
|
d293b78b64 | ||
|
|
9912dbbcf9 | ||
|
|
01bc462e8e | ||
|
|
3300f5ebff | ||
|
|
59e2c20557 | ||
|
|
b7c9566eea | ||
|
|
6df1b0be81 | ||
|
|
2ac1e076c1 | ||
|
|
9908b6031c | ||
|
|
8f100a14f2 | ||
|
|
53b5726b04 | ||
|
|
1a352b24e6 | ||
|
|
5194818d4b | ||
|
|
8a39cdb1c1 | ||
|
|
fd2478c9e2 | ||
|
|
0a1390f2d8 | ||
|
|
a8b0812feb | ||
|
|
a0fb68ab42 | ||
|
|
44c11165d5 | ||
|
|
564be4eb72 | ||
|
|
107c3ea7d5 | ||
|
|
bb8d698335 | ||
|
|
e0192a6914 | ||
|
|
bced4594bb | ||
|
|
cafba99b6b | ||
|
|
ac8f232b2a | ||
|
|
f98e1244c4 | ||
|
|
be95700b30 | ||
|
|
4aa534ae93 | ||
|
|
1cba8e7b11 | ||
|
|
d13e92f07e | ||
|
|
baa46e4fba | ||
|
|
faab7a181d | ||
|
|
8109d8232c | ||
|
|
debc6d1a05 | ||
|
|
e73a0113ec | ||
|
|
44f2bf9bae | ||
|
|
a057e5434d | ||
|
|
cd34e9701b | ||
|
|
7794766d3c | ||
|
|
658939faaa | ||
|
|
f511807fc0 | ||
|
|
c4d9d4e5f8 | ||
|
|
7c0a94ff47 | ||
|
|
cbbc80aad3 | ||
|
|
2be5c7a640 | ||
|
|
80f7786875 | ||
|
|
553e275407 | ||
|
|
7b3932b3f3 | ||
|
|
75207b1148 | ||
|
|
274828fa50 | ||
|
|
5ae1731fe6 | ||
|
|
c8eaf3ae2d | ||
|
|
3a7ab47ee9 | ||
|
|
cf5544b417 | ||
|
|
d143f84dd2 | ||
|
|
7794237475 | ||
|
|
a64fe9bcc9 | ||
|
|
2021d0f9d6 | ||
|
|
6df7a88930 | ||
|
|
53de943690 | ||
|
|
7f910010a0 | ||
|
|
3a5d8dbff9 | ||
|
|
2a60c6d4b0 | ||
|
|
0fc560ba23 | ||
|
|
d1800397f5 | ||
|
|
f4ff889491 | ||
|
|
210bec9111 | ||
|
|
f3b50dcf5b | ||
|
|
93eaba959d | ||
|
|
9570e56965 | ||
|
|
d7f91f8b4f | ||
|
|
53f1277b6b | ||
|
|
bc99faef1b | ||
|
|
848c0f16f7 | ||
|
|
e2fc8c8c2c | ||
|
|
53e6dbf6ca | ||
|
|
868f8a8756 | ||
|
|
db7e6366cd | ||
|
|
2702323f7d | ||
|
|
20cd850125 | ||
|
|
5fa6158731 | ||
|
|
84badf8086 | ||
|
|
c8cc4a0d22 | ||
|
|
3885eebdb8 | ||
|
|
ee74445155 | ||
|
|
9d2ace8bac | ||
|
|
b55f997302 | ||
|
|
29125864b3 | ||
|
|
e45c960c2c | ||
|
|
55e81da379 | ||
|
|
ac76b6267f | ||
|
|
f1b96c4846 | ||
|
|
16d6be852d | ||
|
|
53ec5789e2 | ||
|
|
95a707ced3 | ||
|
|
5d97b0754c | ||
|
|
8a9e868919 | ||
|
|
7e404de3de | ||
|
|
e4472ad850 | ||
|
|
fb0b4552a5 | ||
|
|
6f73ffc114 | ||
|
|
c8b0645266 | ||
|
|
ec05ff3f64 | ||
|
|
f6f9122660 | ||
|
|
8247f38dc1 | ||
|
|
ef6374196d | ||
|
|
f824c2b751 | ||
|
|
4ba4ab623f | ||
|
|
4f39447c05 | ||
|
|
74c9465672 | ||
|
|
a7126c2ce4 | ||
|
|
a69dd3fbc5 | ||
|
|
101dd08173 | ||
|
|
493d4fe7e5 | ||
|
|
0a22816e70 | ||
|
|
c3cd6e7e32 | ||
|
|
11eab4c019 | ||
|
|
4568d32b6b | ||
|
|
c1a6374c6f | ||
|
|
dc05937313 | ||
|
|
2470129132 | ||
|
|
8c582d362d | ||
|
|
11e34ddd1b | ||
|
|
9528f0d9ee | ||
|
|
b06550519e | ||
|
|
6093ee5363 | ||
|
|
07c66b1960 | ||
|
|
58b075daef | ||
|
|
09fcd3a341 | ||
|
|
726ad085cb | ||
|
|
6fe416976d | ||
|
|
dbc2eff029 | ||
|
|
462b4885ff | ||
|
|
aa54fe064c | ||
|
|
006ef3ea01 | ||
|
|
60f17628cc | ||
|
|
c9bad1403a | ||
|
|
2f8927376f | ||
|
|
d945a2b06d | ||
|
|
ca6c8d06ce | ||
|
|
7aa43c8928 | ||
|
|
891b960854 | ||
|
|
95a8caa2f3 | ||
|
|
5c0d0ecbde | ||
|
|
8c05b8105b | ||
|
|
c80084a98f | ||
|
|
2bab92961f | ||
|
|
9175b8bd5f | ||
|
|
793f2d43b0 | ||
|
|
a4dde45f87 | ||
|
|
7fa7ea3e1e | ||
|
|
3fbc13eb65 | ||
|
|
db6917303f | ||
|
|
c2fdeb6c22 | ||
|
|
f7eb81a846 | ||
|
|
edc329883c | ||
|
|
793175be3a | ||
|
|
83c4ba8d32 | ||
|
|
271af406f3 | ||
|
|
f5f50b3563 | ||
|
|
651dd22d7d | ||
|
|
f329f77bd0 | ||
|
|
7c611a2f95 | ||
|
|
296564e369 | ||
|
|
27af6e35d3 | ||
|
|
a183ad1df4 | ||
|
|
799a0eabbd | ||
|
|
ca63503e61 | ||
|
|
4f83217df6 | ||
|
|
5087096711 | ||
|
|
21f7768b26 | ||
|
|
46bc4fd50c | ||
|
|
1cc02b4337 | ||
|
|
6e223db7fc | ||
|
|
1d33547222 | ||
|
|
3ea4dadd30 | ||
|
|
1b10ff129a | ||
|
|
125610d23b | ||
|
|
e213a42cde | ||
|
|
e4663be46a | ||
|
|
11637b6926 | ||
|
|
80bf3e6a35 | ||
|
|
6acbafe45b | ||
|
|
5392d11b04 | ||
|
|
c0fe95fb72 | ||
|
|
d9d4077c93 | ||
|
|
02eb72ac42 | ||
|
|
c06f9986d4 | ||
|
|
2cce125c79 | ||
|
|
b3938fe371 | ||
|
|
e6668dd83b | ||
|
|
c8a4a56177 | ||
|
|
3c5732615d | ||
|
|
f20c0f9819 | ||
|
|
134fa320e6 | ||
|
|
a79df1ff49 | ||
|
|
7ceb25d7b3 | ||
|
|
21b5347fbe | ||
|
|
f2eb480738 | ||
|
|
c94762bb56 | ||
|
|
51413925bd | ||
|
|
b985cea65d | ||
|
|
d286daa2ba | ||
|
|
bcb115b55b | ||
|
|
3dd094f17a | ||
|
|
339ab34c4c | ||
|
|
7424e2b609 | ||
|
|
73594cff73 | ||
|
|
880597b301 | ||
|
|
9c835431d0 | ||
|
|
1d4ffddf69 | ||
|
|
b0e7810a6b | ||
|
|
2b92a8c499 | ||
|
|
274b8dc91a | ||
|
|
74b237ca22 | ||
|
|
c353abd38c | ||
|
|
0acce17979 | ||
|
|
2016a685e6 | ||
|
|
1b9a6aac30 | ||
|
|
e27433ab6a | ||
|
|
7961404a40 | ||
|
|
cedc1f4b14 | ||
|
|
0884b73c69 | ||
|
|
9bd9472ae9 | ||
|
|
2e2473f390 | ||
|
|
c4a423a642 | ||
|
|
f9991fd5f6 | ||
|
|
47688e24e9 | ||
|
|
61ef0c3419 | ||
|
|
698e77dba4 | ||
|
|
2081f6e8ff | ||
|
|
dc6b809f15 | ||
|
|
0f08684649 | ||
|
|
552119c484 | ||
|
|
94d3cfaa10 | ||
|
|
13348b2137 | ||
|
|
783a7d2202 | ||
|
|
50e99a52ea | ||
|
|
9964ed2f79 | ||
|
|
d5b976f92d | ||
|
|
f7267d9b0e | ||
|
|
e0c080a28c | ||
|
|
e80b144932 | ||
|
|
02a504c0b8 | ||
|
|
be94db096c | ||
|
|
b079df9ef4 | ||
|
|
aee61456a4 | ||
|
|
01a119abfc | ||
|
|
1fad2b759f | ||
|
|
e1e83a1b71 | ||
|
|
da3d70420a | ||
|
|
1127f5a2d7 | ||
|
|
0ae4cc2803 | ||
|
|
99efbbbad5 | ||
|
|
22e5aee2dd | ||
|
|
249917700d | ||
|
|
7a8949e0ce | ||
|
|
b82108f899 | ||
|
|
8373ad4ec2 | ||
|
|
35d37e124f | ||
|
|
d8ba46efdb | ||
|
|
a15f22a1f6 | ||
|
|
b94ea89f52 | ||
|
|
35f668bb14 | ||
|
|
4ebbf758f5 | ||
|
|
8615d6ec87 | ||
|
|
6c2ead30f0 | ||
|
|
f41f03ab83 | ||
|
|
365e8de346 | ||
|
|
578d1b6219 | ||
|
|
a6ae079b17 | ||
|
|
d10db52edb | ||
|
|
dabab2b5f4 | ||
|
|
aa2709c4e0 | ||
|
|
9d6f2b594e | ||
|
|
a13bcc1716 | ||
|
|
d2c82d7543 | ||
|
|
0517672dd0 | ||
|
|
15d5dfa92c | ||
|
|
d83373db61 | ||
|
|
88b6bf251a | ||
|
|
4a2ab7460b | ||
|
|
86d8c8978b | ||
|
|
316df0e821 | ||
|
|
438002204d | ||
|
|
23203d52c1 | ||
|
|
73545a79cd | ||
|
|
a19d209005 | ||
|
|
8602816536 | ||
|
|
d52863cfd7 | ||
|
|
c6361d63c2 | ||
|
|
53bfa51ee0 | ||
|
|
ff9cfca24c | ||
|
|
a86d349a51 | ||
|
|
7b277f0110 | ||
|
|
faeab93df0 | ||
|
|
f773f492f3 | ||
|
|
21a6b5f79e | ||
|
|
cee257f384 | ||
|
|
7bfb3011e8 | ||
|
|
8c8f596238 | ||
|
|
bff575d0b1 | ||
|
|
faf3ac0aad | ||
|
|
a40116de25 | ||
|
|
b31ec99372 | ||
|
|
0ac073fa94 | ||
|
|
25e899b60b | ||
|
|
219bcb119d | ||
|
|
5664445543 | ||
|
|
89da450800 | ||
|
|
c26bbee489 | ||
|
|
ced13574a0 | ||
|
|
fe858873af | ||
|
|
a8d4d1c4d3 | ||
|
|
c4ccb3fbb2 | ||
|
|
a748d3a75d | ||
|
|
a5ab231ad4 | ||
|
|
dbaeea7b59 | ||
|
|
10a16bd690 | ||
|
|
406f5bd22b | ||
|
|
a0ae53966f | ||
|
|
0d75f3b6a2 | ||
|
|
abad6f66d6 | ||
|
|
2ff66e661d | ||
|
|
5e55034922 | ||
|
|
9a9e810239 | ||
|
|
45be9ac111 | ||
|
|
9f201558c9 | ||
|
|
d4237cb7f3 | ||
|
|
d2a8ff4b04 | ||
|
|
f331cb1a76 | ||
|
|
9ed981c5dc | ||
|
|
aaa9d7fbf8 | ||
|
|
ebc95e6f11 | ||
|
|
61a2c50e8e | ||
|
|
4f98f8c9b3 | ||
|
|
536875d463 | ||
|
|
65f2fba4c3 | ||
|
|
eea6f51df9 | ||
|
|
6fc4646709 | ||
|
|
ac029f81b3 | ||
|
|
c0cf875a82 | ||
|
|
b6d904838e | ||
|
|
5379eff022 | ||
|
|
aaddb05411 | ||
|
|
e52532a9fe | ||
|
|
e826a5a6af | ||
|
|
165d5436b5 | ||
|
|
409b52255c | ||
|
|
5953972a5a | ||
|
|
d751224ea4 | ||
|
|
4a5938b5cc | ||
|
|
d18bc5468f | ||
|
|
8877c6db51 | ||
|
|
c38379c9dd | ||
|
|
a0b07c1440 | ||
|
|
43fbdb7a5a | ||
|
|
777cebc8c7 | ||
|
|
aa5c73e20f | ||
|
|
5e5ef28ca0 | ||
|
|
650ed34336 | ||
|
|
189ca1bcee | ||
|
|
4c1caa7454 | ||
|
|
7bb19cf90e | ||
|
|
2a94aaaf2e | ||
|
|
5e4b4f6712 | ||
|
|
47e8950e77 | ||
|
|
f45f2c8465 | ||
|
|
10780ae650 | ||
|
|
9bae50f700 | ||
|
|
0758c1a374 | ||
|
|
564ff395f6 | ||
|
|
7fb78a5f01 | ||
|
|
8204ab4aa8 | ||
|
|
48d1325784 | ||
|
|
57bbc586ef | ||
|
|
bfef3c5dd1 | ||
|
|
d972f4a60a | ||
|
|
eebce01cf2 | ||
|
|
e2c39a4a8e | ||
|
|
1e8e6faa7e | ||
|
|
c7eb901496 | ||
|
|
2ed03ea0a2 | ||
|
|
de00e2937a | ||
|
|
e187b5e9d0 | ||
|
|
0947fc1c89 | ||
|
|
4d61607c9e | ||
|
|
781bfb6e66 | ||
|
|
79a82ba7f1 | ||
|
|
d63bd7fa5e | ||
|
|
e265c4ec86 | ||
|
|
0732238213 | ||
|
|
5f3b68b4d4 | ||
|
|
2424af62fd | ||
|
|
6b252033ae | ||
|
|
320c805905 | ||
|
|
e673848a9b | ||
|
|
a35a1a9ae7 | ||
|
|
793509a3b5 | ||
|
|
020f36f970 | ||
|
|
9d0cc399ac | ||
|
|
025fc914cc | ||
|
|
43bb633096 | ||
|
|
187237b622 | ||
|
|
66198faab6 | ||
|
|
47b22763f8 | ||
|
|
4d42368214 | ||
|
|
3e068e78e2 | ||
|
|
1140c489c9 | ||
|
|
804a306313 | ||
|
|
9db0fb8b02 | ||
|
|
692b14cecd | ||
|
|
322a178430 | ||
|
|
f80f29e256 | ||
|
|
2c556f093a | ||
|
|
3b027d2528 | ||
|
|
57526cae99 | ||
|
|
5de5ef118c | ||
|
|
b161ac29e3 | ||
|
|
b20ee6924a | ||
|
|
49bd98f410 | ||
|
|
a14f98ca7c | ||
|
|
138a841390 | ||
|
|
046e4013cb | ||
|
|
dd2d3e61ab | ||
|
|
3617c22a56 | ||
|
|
f9daebba0a | ||
|
|
9a557e90da | ||
|
|
2d557eb1e0 | ||
|
|
a789b77b75 | ||
|
|
75acf96d94 | ||
|
|
8c7687b419 | ||
|
|
3e0a7b931c | ||
|
|
306d9f2e35 | ||
|
|
7b8604ea29 | ||
|
|
ab69443bd4 | ||
|
|
b263e096af | ||
|
|
05bb391c3a | ||
|
|
0ab080987d |
4
.gitignore
vendored
4
.gitignore
vendored
@@ -21,8 +21,10 @@ lapack-netlib/TESTING/testing_results.txt
|
||||
lib.grd
|
||||
nohup.out
|
||||
config.h
|
||||
config_kernel.h
|
||||
Makefile.conf
|
||||
Makefile.conf_last
|
||||
Makefile_kernel.conf
|
||||
config_last.h
|
||||
getarch
|
||||
getarch_2nd
|
||||
@@ -41,6 +43,8 @@ ctest/xzcblat2
|
||||
ctest/xzcblat3
|
||||
exports/linktest.c
|
||||
exports/linux.def
|
||||
kernel/setparam_*.c
|
||||
kernel/kernel_*.h
|
||||
test/CBLAT2.SUMM
|
||||
test/CBLAT3.SUMM
|
||||
test/DBLAT2.SUMM
|
||||
|
||||
@@ -11,7 +11,7 @@ env:
|
||||
|
||||
before_install:
|
||||
- sudo apt-get update -qq
|
||||
- sudo apt-get install -qq gfortran
|
||||
- sudo apt-get install -qq gfortran
|
||||
- if [[ "$TARGET_BOX" == "WIN64" ]]; then sudo apt-get install -qq binutils-mingw-w64-x86-64 gcc-mingw-w64-x86-64 gfortran-mingw-w64-x86-64; fi
|
||||
- if [[ "$TARGET_BOX" == "LINUX32" ]]; then sudo apt-get install -qq gcc-multilib gfortran-multilib; fi
|
||||
|
||||
|
||||
@@ -10,15 +10,28 @@
|
||||
* Optimize BLAS3 on ICT Loongson 3A.
|
||||
* Optimize BLAS3 on Intel Sandy Bridge.
|
||||
|
||||
* Werner Saar <wernsaar@googlemail.com>
|
||||
* [2013-03-04] Optimize AVX and FMA4 DGEMM on AMD Bulldozer
|
||||
* [2013-04-27] Optimize AVX and FMA4 TRSM on AMD Bulldozer
|
||||
* [2013-06-09] Optimize AVX and FMA4 SGEMM on AMD Bulldozer
|
||||
* [2013-06-11] Optimize AVX and FMA4 ZGEMM on AMD Bulldozer
|
||||
* [2013-06-12] Optimize AVX and FMA4 CGEMM on AMD Bulldozer
|
||||
* [2013-06-16] Optimize dgemv_n kernel on AMD Bulldozer
|
||||
* [2013-06-20] Optimize ddot, daxpy kernel on AMD Bulldozer
|
||||
* [2013-06-21] Optimize dcopy kernel on AMD Bulldozer
|
||||
* Porting and Optimization on ARM Cortex-A9
|
||||
* Optimization on AMD Piledriver
|
||||
* Optimization on Intel Haswell
|
||||
|
||||
## Previous Developers
|
||||
|
||||
* Zaheer Chothia <zaheer.chothia@gmail.com>
|
||||
* Improve the compatibility about complex number
|
||||
* Build LAPACKE: C interface to LAPACK
|
||||
* Improve the windows build.
|
||||
|
||||
## Previous Developers
|
||||
|
||||
* Chen Shaohu <huhumartinwar@gmail.com>
|
||||
* Optimize GEMV on the Loongson 3A processor.
|
||||
* Optimize GEMV on the Loongson 3A processor.
|
||||
|
||||
* Luo Wen
|
||||
* Intern. Test Level-2 BLAS.
|
||||
@@ -40,11 +53,11 @@ In chronological order:
|
||||
* [2012-05-19] Fix building bug on FreeBSD and NetBSD.
|
||||
|
||||
* Sylvestre Ledru <https://github.com/sylvestre>
|
||||
* [2012-07-01] Improve the detection of sparc. Fix building bug under
|
||||
* [2012-07-01] Improve the detection of sparc. Fix building bug under
|
||||
Hurd and kfreebsd.
|
||||
|
||||
* Jameson Nash <https://github.com/vtjnash>
|
||||
* [2012-08-20] Provide support for passing CFLAGS, FFLAGS, PFLAGS, FPFLAGS to
|
||||
* [2012-08-20] Provide support for passing CFLAGS, FFLAGS, PFLAGS, FPFLAGS to
|
||||
make on the command line.
|
||||
|
||||
* Alexander Nasonov <alnsn@yandex.ru>
|
||||
@@ -52,16 +65,7 @@ In chronological order:
|
||||
|
||||
* Sébastien Villemot <sebastien@debian.org>
|
||||
* [2012-11-14] Fix compilation with TARGET=GENERIC. Patch applied to Debian package.
|
||||
|
||||
* Werner Saar <wernsaar@googlemail.com>
|
||||
* [2013-03-04] Optimize AVX and FMA4 DGEMM on AMD Bulldozer
|
||||
* [2013-04-27] Optimize AVX and FMA4 TRSM on AMD Bulldozer
|
||||
* [2013-06-09] Optimize AVX and FMA4 SGEMM on AMD Bulldozer
|
||||
* [2013-06-11] Optimize AVX and FMA4 ZGEMM on AMD Bulldozer
|
||||
* [2013-06-12] Optimize AVX and FMA4 CGEMM on AMD Bulldozer
|
||||
* [2013-06-16] Optimize dgemv_n kernel on AMD Bulldozer
|
||||
* [2013-06-20] Optimize ddot, daxpy kernel on AMD Bulldozer
|
||||
* [2013-06-21] Optimize dcopy kernel on AMD Bulldozer
|
||||
* [2013-08-28] Avoid failure on qemu guests declaring an Athlon CPU without 3dnow!
|
||||
|
||||
* Kang-Che Sung <Explorer09@gmail.com>
|
||||
* [2013-05-17] Fix typo in the document. Re-order the architecture list in getarch.c.
|
||||
@@ -76,15 +80,52 @@ In chronological order:
|
||||
* [2013-06-30] Add Intel Haswell support (using sandybridge optimizations).
|
||||
|
||||
* grisuthedragon <https://github.com/grisuthedragon>
|
||||
* [2013-07-11] create openblas_get_parallel to retrieve information which parallelization
|
||||
* [2013-07-11] create openblas_get_parallel to retrieve information which parallelization
|
||||
model is used by OpenBLAS.
|
||||
|
||||
* Elliot Saba <staticfloat@gmail.com>
|
||||
* [2013-07-22] Add in return value for `interface/trtri.c`
|
||||
|
||||
* Sébastien Fabbro <bicatali@gentoo.org>
|
||||
* [2013-07-24] Modify makefile to respect user's LDFLAGS
|
||||
* [2013-07-24] Add stack markings for GNU as arch-independent for assembler files
|
||||
|
||||
* Viral B. Shah <viral@mayin.org>
|
||||
* [2013-08-21] Patch LAPACK XLASD4.f as discussed in JuliaLang/julia#2340
|
||||
|
||||
* Lars Buitinck <https://github.com/larsmans>
|
||||
* [2013-08-28] get rid of the generated cblas_noconst.h file
|
||||
* [2013-08-28] Missing threshold in gemm.c
|
||||
* [2013-08-28] fix default prefix handling in makefiles
|
||||
|
||||
* yieldthought <https://github.com/yieldthought>
|
||||
* [2013-10-08] Remove -Wl,--retain-symbols-file from dynamic link line to fix tool support
|
||||
|
||||
* Keno Fischer <https://github.com/loladiro>
|
||||
* [2013-10-23] Use FC instead of CC to link the dynamic library on OS X
|
||||
|
||||
* Christopher Meng <cickumqt@gmail.com>
|
||||
* [2013-12-09] Add DESTDIR support for easier building on RPM based distros.
|
||||
Use install command instead of cp to install files with permissions control.
|
||||
|
||||
* Lucas Beyer <lucasb.eyer.be@gmail.com>
|
||||
* [2013-12-10] Added support for NO_SHARED in make install.
|
||||
|
||||
* carlkl <https://github.com/carlkl>
|
||||
* [2013-12-13] Fixed LAPACKE building bug on Windows
|
||||
|
||||
* Isaac Dunham <https://github.com/idunham>
|
||||
* [2014-08-03] Fixed link error on Linux/musl
|
||||
|
||||
* Dave Nuechterlein
|
||||
* [2014-10-10] trmm and sgemm kernels (optimized for APM's X-Gene 1).
|
||||
ARMv8 support.
|
||||
|
||||
* Dan Kortschak
|
||||
* [2015-01-07] Added test for drotmg bug #484.
|
||||
|
||||
* Ton van den Heuvel <https://github.com/ton>
|
||||
* [2015-03-18] Fix race condition during shutdown causing a crash in gotoblas_set_affinity().
|
||||
|
||||
* [Your name or handle] <[email or website]>
|
||||
* [Date] [Brief summary of your changes]
|
||||
|
||||
205
Changelog.txt
205
Changelog.txt
@@ -1,4 +1,133 @@
|
||||
OpenBLAS ChangeLog
|
||||
====================================================================
|
||||
Version 0.2.14
|
||||
24-Mar-2015
|
||||
common:
|
||||
* Improve OpenBLASConfig.cmake. (#474, #475. Thanks, xantares.)
|
||||
* Improve ger and gemv for small matrices by stack allocation.
|
||||
e.g. make -DMAX_STACK_ALLOC=2048 (#482. Thanks, Jerome Robert.)
|
||||
* Introduce openblas_get_num_threads and openblas_get_num_procs.
|
||||
(#497. Thanks, Erik Schnetter.)
|
||||
* Add ATLAS-style ?geadd function. (#509. Thanks, Martin Köhler.)
|
||||
* Fix c/zsyr bug with negative incx. (#492.)
|
||||
* Fix race condition during shutdown causing a crash in
|
||||
gotoblas_set_affinity(). (#508. Thanks, Ton van den Heuvel.)
|
||||
|
||||
x86/x86-64:
|
||||
* Support AMD Streamroller.
|
||||
|
||||
ARM:
|
||||
* Add Cortex-A9 and Cortex-A15 targets.
|
||||
|
||||
====================================================================
|
||||
Version 0.2.13
|
||||
3-Dec-2014
|
||||
common:
|
||||
* Add SYMBOLPREFIX and SYMBOLSUFFIX makefile options
|
||||
for adding a prefix or suffix to all exported symbol names
|
||||
in the shared library.(#459, Thanks Tony Kelman)
|
||||
* Provide OpenBLASConfig.cmake at installation.
|
||||
* Fix Fortran compiler detection on FreeBSD.
|
||||
(#470, Thanks Mike Nolta)
|
||||
|
||||
|
||||
x86/x86-64:
|
||||
* Add generic kernel files for x86-64. make TARGET=GENERIC
|
||||
* Fix a bug of sgemm kernel on Intel Sandy Bridge.
|
||||
* Fix c_check bug on some amd64 systems. (#471, Thanks Mike Nolta)
|
||||
|
||||
ARM:
|
||||
* Support APM's X-Gene 1 AArch64 processors.
|
||||
Optimize trmm and sgemm. (#465, Thanks Dave Nuechterlein)
|
||||
|
||||
====================================================================
|
||||
Version 0.2.12
|
||||
13-Oct-2014
|
||||
common:
|
||||
* Added CBLAS interface for ?omatcopy and ?imatcopy.
|
||||
* Enable ?gemm3m functions.
|
||||
* Added benchmark for ?gemm3m.
|
||||
* Optimized multithreading lower limits.
|
||||
* Disabled SYMM3M and HEMM3M functions
|
||||
because of segment violations.
|
||||
|
||||
x86/x86-64:
|
||||
* Improved axpy and symv performance on AMD Bulldozer.
|
||||
* Improved gemv performance on modern Intel and AMD CPUs.
|
||||
|
||||
====================================================================
|
||||
Version 0.2.11
|
||||
18-Aug-2014
|
||||
common:
|
||||
* Added some benchmark codes.
|
||||
* Fix link error on Linux/musl.(Thanks Isaac Dunham)
|
||||
|
||||
x86/x86-64:
|
||||
* Improved s/c/zgemm performance for Intel Haswell.
|
||||
* Improved s/d/c/zgemv performance.
|
||||
* Support the big numa machine.(EXPERIMENT)
|
||||
|
||||
ARM:
|
||||
* Fix detection when cpuinfo uses "Processor". (Thanks Isaiah)
|
||||
|
||||
====================================================================
|
||||
Version 0.2.10
|
||||
16-Jul-2014
|
||||
common:
|
||||
* Added BLAS extensions as following.
|
||||
s/d/c/zaxpby, s/d/c/zimatcopy, s/d/c/zomatcopy.
|
||||
* Added OPENBLAS_CORETYPE environment for dynamic_arch. (a86d34)
|
||||
* Added NO_AVX2 flag for old binutils. (#401)
|
||||
* Support outputing the CPU corename on runtime.(#407)
|
||||
* Patched LAPACK to fix bug 114, 117, 118.
|
||||
(http://www.netlib.org/lapack/bug_list.html)
|
||||
* Disabled ?gemm3m for a work-around fix. (#400)
|
||||
x86/x86-64:
|
||||
* Fixed lots of bugs for optimized kernels on sandybridge,Haswell,
|
||||
bulldozer, and piledriver.
|
||||
https://github.com/xianyi/OpenBLAS/wiki/Fixed-optimized-kernels-To-do-List
|
||||
|
||||
ARM:
|
||||
* Improved LAPACK testing.
|
||||
|
||||
====================================================================
|
||||
Version 0.2.9
|
||||
10-Jun-2014
|
||||
common:
|
||||
* Improved the result for LAPACK testing. (#372)
|
||||
* Installed DLL to prefix/bin instead of prefix/lib. (#366)
|
||||
* Build import library on Windows.(#374)
|
||||
x86/x86-64:
|
||||
* To improve LAPACK testing, we fallback some kernels. (#372)
|
||||
https://github.com/xianyi/OpenBLAS/wiki/Fixed-optimized-kernels-To-do-List
|
||||
|
||||
====================================================================
|
||||
Version 0.2.9.rc2
|
||||
06-Mar-2014
|
||||
common:
|
||||
* Added OPENBLAS_VERBOSE environment variable.(#338)
|
||||
* Make OpenBLAS thread-pool resilient to fork via pthread_atfork.
|
||||
(#294, Thank Olivier Grisel)
|
||||
* Rewrote rotmg
|
||||
* Fixed sdsdot bug.
|
||||
x86/x86-64:
|
||||
* Detect Intel Haswell for new Macbook.
|
||||
|
||||
====================================================================
|
||||
Version 0.2.9.rc1
|
||||
13-Jan-2013
|
||||
common:
|
||||
* Update LAPACK to 3.5.0 version
|
||||
* Fixed compatiable issues with Clang and Pathscale compilers.
|
||||
|
||||
x86/x86-64:
|
||||
* Optimization on Intel Haswell.
|
||||
* Enable optimization kernels on AMD Bulldozer and Piledriver.
|
||||
|
||||
ARM:
|
||||
* Support ARMv6 and ARMv7 ISA.
|
||||
* Optimization on ARM Cortex-A9.
|
||||
|
||||
====================================================================
|
||||
Version 0.2.8
|
||||
01-Aug-2013
|
||||
@@ -17,25 +146,25 @@ Version 0.2.7
|
||||
common:
|
||||
* Support LSB (Linux Standard Base) 4.1.
|
||||
e.g. make CC=lsbcc
|
||||
* Include LAPACK 3.4.2 source codes to the repo.
|
||||
* Include LAPACK 3.4.2 source codes to the repo.
|
||||
Avoid downloading at compile time.
|
||||
* Add NO_PARALLEL_MAKE flag to disable parallel make.
|
||||
* Create openblas_get_parallel to retrieve information which
|
||||
* Create openblas_get_parallel to retrieve information which
|
||||
parallelization model is used by OpenBLAS. (Thank grisuthedragon)
|
||||
* Detect LLVM/Clang compiler. The default compiler is Clang on Mac OS X.
|
||||
* Change LIBSUFFIX from .lib to .a on windows.
|
||||
* A walk round for dtrti_U single thread bug. Replace it with LAPACK codes. (#191)
|
||||
* A work-around for dtrti_U single thread bug. Replace it with LAPACK codes. (#191)
|
||||
|
||||
x86/x86-64:
|
||||
* Optimize c/zgemm, trsm, dgemv_n, ddot, daxpy, dcopy on
|
||||
* Optimize c/zgemm, trsm, dgemv_n, ddot, daxpy, dcopy on
|
||||
AMD Bulldozer. (Thank Werner Saar)
|
||||
* Add Intel Haswell support (using Sandybridge optimizations).
|
||||
(Thank Dan Luu)
|
||||
* Add AMD Piledriver support (using Bulldozer optimizations).
|
||||
* Fix the computational error in zgemm avx kernel on
|
||||
* Fix the computational error in zgemm avx kernel on
|
||||
Sandybridge. (#237)
|
||||
* Fix the overflow bug in gemv.
|
||||
* Fix the overflow bug in multi-threaded BLAS3, getrf when NUM_THREADS
|
||||
* Fix the overflow bug in multi-threaded BLAS3, getrf when NUM_THREADS
|
||||
is very large.(#214, #221, #246).
|
||||
MIPS64:
|
||||
* Support loongcc (Open64 based) compiler for ICT Loongson 3A/B.
|
||||
@@ -72,7 +201,7 @@ common:
|
||||
* Fixed NetBSD build. (#155)
|
||||
* Fixed compilation with TARGET=GENERIC. (#160)
|
||||
x86/x86-64:
|
||||
* Restore the original CPU affinity when calling
|
||||
* Restore the original CPU affinity when calling
|
||||
openblas_set_num_threads(1) (#153)
|
||||
* Fixed a SEGFAULT bug in dgemv_t when m is very large.(#154)
|
||||
MIPS64:
|
||||
@@ -82,13 +211,13 @@ Version 0.2.4
|
||||
8-Oct-2012
|
||||
common:
|
||||
* Upgraded LAPACK to 3.4.2 version. (#145)
|
||||
* Provided support for passing CFLAGS, FFLAGS, PFLAGS,
|
||||
* Provided support for passing CFLAGS, FFLAGS, PFLAGS,
|
||||
FPFLAGS to make. (#137)
|
||||
* f77blas.h:compatibility for compilers without C99 complex
|
||||
* f77blas.h:compatibility for compilers without C99 complex
|
||||
number support. (#141)
|
||||
x86/x86-64:
|
||||
* Added NO_AVX flag. Check OS supporting AVX on runtime. (#139)
|
||||
* Fixed zdot incompatibility ABI issue with GCC 4.7 on
|
||||
* Fixed zdot incompatibility ABI issue with GCC 4.7 on
|
||||
Windows 32-bit. (#140)
|
||||
MIPS64:
|
||||
* Fixed the generation of shared library bug.
|
||||
@@ -98,14 +227,14 @@ Version 0.2.3
|
||||
20-Aug-2012
|
||||
common:
|
||||
* Fixed LAPACK unstable bug about ?laswp. (#130)
|
||||
* Fixed the shared library bug about unloading the library on
|
||||
* Fixed the shared library bug about unloading the library on
|
||||
Linux (#132).
|
||||
* Fixed the compilation failure on BlueGene/P (TARGET=PPC440FP2)
|
||||
Please use gcc and IBM xlf. (#134)
|
||||
x86/x86-64:
|
||||
* Supported goto_set_num_threads and openblas_set_num_threads
|
||||
* Supported goto_set_num_threads and openblas_set_num_threads
|
||||
APIs in Windows. They can set the number of threads on runtime.
|
||||
|
||||
|
||||
====================================================================
|
||||
Version 0.2.2
|
||||
6-July-2012
|
||||
@@ -153,14 +282,14 @@ x86/x86_64:
|
||||
* Auto-detect Intel Sandy Bridge Core i7-3xxx & Xeon E7 Westmere-EX.
|
||||
* Test alpha=Nan in dscale.
|
||||
* Fixed a SEGFAULT bug in samax on x86 windows.
|
||||
|
||||
|
||||
====================================================================
|
||||
Version 0.1.0
|
||||
23-Mar-2012
|
||||
common:
|
||||
* Set soname of shared library on Linux.
|
||||
* Added LIBNAMESUFFIX flag in Makefile.rule. The user can use
|
||||
this flag to control the library name, e.g. libopenblas.a,
|
||||
* Added LIBNAMESUFFIX flag in Makefile.rule. The user can use
|
||||
this flag to control the library name, e.g. libopenblas.a,
|
||||
libopenblas_ifort.a or libopenblas_omp.a.
|
||||
* Added GEMM_MULTITHREAD_THRESHOLD flag in Makefile.rule.
|
||||
The lib use single thread in GEMM function with small matrices.
|
||||
@@ -191,7 +320,7 @@ x86/x86_64:
|
||||
Version 0.1 alpha2.4
|
||||
18-Sep-2011
|
||||
common:
|
||||
* Fixed a bug about installation. The header file "fblas77.h"
|
||||
* Fixed a bug about installation. The header file "fblas77.h"
|
||||
works fine now.
|
||||
* Fixed #61 a building bug about setting TARGET and DYNAMIC_ARCH.
|
||||
* Try to handle absolute path of shared library in OSX. (#57)
|
||||
@@ -200,16 +329,16 @@ common:
|
||||
$(PREFIX)/lib
|
||||
|
||||
x86/x86_64:
|
||||
* Fixed #58 zdot/xdot SEGFAULT bug with GCC-4.6 on x86. According
|
||||
to i386 calling convention, The callee should remove the first
|
||||
hidden parameter.Thank Mr. John for this patch.
|
||||
* Fixed #58 zdot/xdot SEGFAULT bug with GCC-4.6 on x86. According
|
||||
to i386 calling convention, The callee should remove the first
|
||||
hidden parameter.Thank Mr. John for this patch.
|
||||
|
||||
====================================================================
|
||||
Version 0.1 alpha2.3
|
||||
5-Sep-2011
|
||||
|
||||
x86/x86_64:
|
||||
* Added DTB_ENTRIES into dynamic arch setting parameters. Now,
|
||||
* Added DTB_ENTRIES into dynamic arch setting parameters. Now,
|
||||
it can read DTB_ENTRIES on runtime. (Refs issue #55 on github)
|
||||
|
||||
====================================================================
|
||||
@@ -217,7 +346,7 @@ Version 0.1 alpha2.2
|
||||
14-Jul-2011
|
||||
|
||||
common:
|
||||
* Fixed a building bug when DYNAMIC_ARCH=1 & INTERFACE64=1.
|
||||
* Fixed a building bug when DYNAMIC_ARCH=1 & INTERFACE64=1.
|
||||
(Refs issue #44 on github)
|
||||
|
||||
====================================================================
|
||||
@@ -225,7 +354,7 @@ Version 0.1 alpha2.1
|
||||
28-Jun-2011
|
||||
|
||||
common:
|
||||
* Stop the build and output the error message when detecting
|
||||
* Stop the build and output the error message when detecting
|
||||
fortran compiler failed. (Refs issue #42 on github)
|
||||
|
||||
====================================================================
|
||||
@@ -233,16 +362,16 @@ Version 0.1 alpha2
|
||||
23-Jun-2011
|
||||
|
||||
common:
|
||||
* Fixed blasint undefined bug in <cblas.h> file. Other software
|
||||
* Fixed blasint undefined bug in <cblas.h> file. Other software
|
||||
could include this header successfully(Refs issue #13 on github)
|
||||
* Fixed the SEGFAULT bug on 64 cores. On SMP server, the number
|
||||
of CPUs or cores should be less than or equal to 64.(Refs issue #14
|
||||
* Fixed the SEGFAULT bug on 64 cores. On SMP server, the number
|
||||
of CPUs or cores should be less than or equal to 64.(Refs issue #14
|
||||
on github)
|
||||
* Support "void goto_set_num_threads(int num_threads)" and "void
|
||||
openblas_set_num_threads(int num_threads)" when USE_OPENMP=1
|
||||
* Added extern "C" to support C++. Thank Tasio for the patch(Refs
|
||||
* Added extern "C" to support C++. Thank Tasio for the patch(Refs
|
||||
issue #21 on github)
|
||||
* Provided an error message when the arch is not supported.(Refs
|
||||
* Provided an error message when the arch is not supported.(Refs
|
||||
issue #19 on github)
|
||||
* Fixed issue #23. Fixed a bug of f_check script about generating link flags.
|
||||
* Added openblas_set_num_threads for Fortran.
|
||||
@@ -257,10 +386,10 @@ x86/x86_64:
|
||||
* Fixed #28 a wrong result of dsdot on x86_64.
|
||||
* Fixed #32 a SEGFAULT bug of zdotc with gcc-4.6.
|
||||
* Fixed #33 ztrmm bug on Nehalem.
|
||||
* Walk round #27 the low performance axpy issue with small imput size & multithreads.
|
||||
* Work-around #27 the low performance axpy issue with small imput size & multithreads.
|
||||
|
||||
MIPS64:
|
||||
* Fixed #28 a wrong result of dsdot on Loongson3A/MIPS64.
|
||||
* Fixed #28 a wrong result of dsdot on Loongson3A/MIPS64.
|
||||
* Optimized single/double precision BLAS Level3 on Loongson3A/MIPS64. (Refs #2)
|
||||
* Optimized single/double precision axpy function on Loongson3A/MIPS64. (Refs #3)
|
||||
|
||||
@@ -269,9 +398,9 @@ Version 0.1 alpha1
|
||||
20-Mar-2011
|
||||
|
||||
common:
|
||||
* Support "make NO_LAPACK=1" to build the library without
|
||||
* Support "make NO_LAPACK=1" to build the library without
|
||||
LAPACK functions.
|
||||
* Fixed randomly SEGFAULT when nodemask==NULL with above Linux 2.6.34.
|
||||
* Fixed randomly SEGFAULT when nodemask==NULL with above Linux 2.6.34.
|
||||
Thank Mr.Ei-ji Nakama providing this patch. (Refs issue #12 on github)
|
||||
* Added DEBUG=1 rule in Makefile.rule to build debug version.
|
||||
* Disable compiling quad precision in reference BLAS library(netlib BLAS).
|
||||
@@ -280,15 +409,15 @@ common:
|
||||
* Imported GotoBLAS2 1.13 BSD version
|
||||
|
||||
x86/x86_64:
|
||||
* On x86 32bits, fixed a bug in zdot_sse2.S line 191. This would casue
|
||||
zdotu & zdotc failures.Instead,Walk around it. (Refs issue #8 #9 on github)
|
||||
* Modified ?axpy functions to return same netlib BLAS results
|
||||
* On x86 32bits, fixed a bug in zdot_sse2.S line 191. This would casue
|
||||
zdotu & zdotc failures. Instead, work-around it. (Refs issue #8 #9 on github)
|
||||
* Modified ?axpy functions to return same netlib BLAS results
|
||||
when incx==0 or incy==0 (Refs issue #7 on github)
|
||||
* Modified ?swap functions to return same netlib BLAS results
|
||||
* Modified ?swap functions to return same netlib BLAS results
|
||||
when incx==0 or incy==0 (Refs issue #6 on github)
|
||||
* Modified ?rot functions to return same netlib BLAS results
|
||||
* Modified ?rot functions to return same netlib BLAS results
|
||||
when incx==0 or incy==0 (Refs issue #4 on github)
|
||||
* Detect Intel Westmere,Intel Clarkdale and Intel Arrandale
|
||||
* Detect Intel Westmere,Intel Clarkdale and Intel Arrandale
|
||||
to use Nehalem codes.
|
||||
* Fixed a typo bug about compiling dynamic ARCH library.
|
||||
MIPS64:
|
||||
|
||||
@@ -83,7 +83,7 @@
|
||||
4. Suported precision
|
||||
|
||||
Now x86/x86_64 version support 80bit FP precision in addition to
|
||||
normal double presicion and single precision. Currently only
|
||||
normal double presicion and single precision. Currently only
|
||||
gfortran supports 80bit FP with "REAL*10".
|
||||
|
||||
|
||||
|
||||
@@ -32,9 +32,9 @@
|
||||
|
||||
GotoBLAS2 build complete.
|
||||
|
||||
OS ... Linux
|
||||
Architecture ... x86_64
|
||||
BINARY ... 64bit
|
||||
OS ... Linux
|
||||
Architecture ... x86_64
|
||||
BINARY ... 64bit
|
||||
C compiler ... GCC (command line : gcc)
|
||||
Fortran compiler ... PATHSCALE (command line : pathf90)
|
||||
Library Name ... libgoto_barcelonap-r1.27.a (Multi threaded; Max
|
||||
|
||||
@@ -56,7 +56,7 @@
|
||||
|
||||
1.6 Q I use OpenMP compiler. How can I use GotoBLAS2 with it?
|
||||
|
||||
A Please understand that OpenMP is a compromised method to use
|
||||
A Please understand that OpenMP is a compromised method to use
|
||||
thread. If you want to use OpenMP based code with GotoBLAS2, you
|
||||
should enable "USE_OPENMP=1" in Makefile.rule.
|
||||
|
||||
|
||||
@@ -9,10 +9,10 @@
|
||||
|
||||
If you want to allocate 64 large pages,
|
||||
|
||||
$shell> echo 0 > /pros/sys/vm/nr_hugepages # need to be reset
|
||||
$shell> echo 65 > /pros/sys/vm/nr_hugepages # add 1 extra page
|
||||
$shell> echo 3355443200 > /pros/sys/kernel/shmmax # just large number
|
||||
$shell> echo 3355443200 > /pros/sys/kernel/shmall
|
||||
$shell> echo 0 > /proc/sys/vm/nr_hugepages # need to be reset
|
||||
$shell> echo 65 > /proc/sys/vm/nr_hugepages # add 1 extra page
|
||||
$shell> echo 3355443200 > /proc/sys/kernel/shmmax # just large number
|
||||
$shell> echo 3355443200 > /proc/sys/kernel/shmall
|
||||
|
||||
Also may add a few lines into /etc/security/limits.conf file.
|
||||
|
||||
@@ -43,7 +43,7 @@
|
||||
F) Other aarchitecture which doesn't have Large TLB enhancement
|
||||
|
||||
If you have root permission, please install device driver which
|
||||
located in drivers/mapper.
|
||||
located in drivers/mapper.
|
||||
|
||||
$shell> cd drivers/mapper
|
||||
$shell> make
|
||||
|
||||
@@ -4,7 +4,7 @@
|
||||
probably you created too many threads or process. Basically GotoBLAS
|
||||
assumes that available cores that you specify are exclusively for
|
||||
BLAS computation. Even one small thread/process conflicts with BLAS
|
||||
threads, performance will become worse.
|
||||
threads, performance will become worse.
|
||||
|
||||
The best solution is to reduce your number of threads or insert
|
||||
some synchronization mechanism and suspend your threads until BLAS
|
||||
@@ -19,4 +19,4 @@
|
||||
|
||||
|
||||
Anyway, if you see any weird performance loss, it means your code or
|
||||
algorithm is not optimal.
|
||||
algorithm is not optimal.
|
||||
|
||||
27
LICENSE
27
LICENSE
@@ -1,4 +1,4 @@
|
||||
Copyright (c) 2011,2012 Lab of Parallel Software and Computational Science,ISCAS
|
||||
Copyright (c) 2011-2014, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
@@ -12,17 +12,18 @@ met:
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the ISCAS nor the names of its contributors may
|
||||
be used to endorse or promote products derived from this software
|
||||
without specific prior written permission.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written
|
||||
permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
84
Makefile
84
Makefile
@@ -4,7 +4,7 @@ include ./Makefile.system
|
||||
BLASDIRS = interface driver/level2 driver/level3 driver/others
|
||||
|
||||
ifneq ($(DYNAMIC_ARCH), 1)
|
||||
BLASDIRS += kernel
|
||||
BLASDIRS += kernel
|
||||
endif
|
||||
|
||||
ifdef UTEST_CHECK
|
||||
@@ -23,7 +23,7 @@ endif
|
||||
SUBDIRS_ALL = $(SUBDIRS) test ctest utest exports benchmark ../laswp ../bench
|
||||
|
||||
.PHONY : all libs netlib test ctest shared install
|
||||
.NOTPARALLEL : all libs prof lapack-test install
|
||||
.NOTPARALLEL : all libs prof lapack-test install blas-test
|
||||
|
||||
all :: libs netlib tests shared
|
||||
@echo
|
||||
@@ -36,9 +36,13 @@ ifndef BINARY64
|
||||
else
|
||||
@echo " BINARY ... 64bit "
|
||||
endif
|
||||
|
||||
ifdef INTERFACE64
|
||||
ifneq ($(INTERFACE64), 0)
|
||||
@echo " Use 64 bits int (equivalent to \"-i8\" in Fortran) "
|
||||
endif
|
||||
endif
|
||||
|
||||
@echo " C compiler ... $(C_COMPILER) (command line : $(CC))"
|
||||
ifndef NOFORTRAN
|
||||
@echo " Fortran compiler ... $(F_COMPILER) (command line : $(FC))"
|
||||
@@ -57,7 +61,7 @@ endif
|
||||
|
||||
ifeq ($(USE_OPENMP), 1)
|
||||
@echo
|
||||
@echo " Use OpenMP in the multithreading. Becasue of ignoring OPENBLAS_NUM_THREADS and GOTO_NUM_THREADS flags, "
|
||||
@echo " Use OpenMP in the multithreading. Because of ignoring OPENBLAS_NUM_THREADS and GOTO_NUM_THREADS flags, "
|
||||
@echo " you should use OMP_NUM_THREADS environment variable to control the number of threads."
|
||||
@echo
|
||||
endif
|
||||
@@ -128,6 +132,11 @@ ifeq ($(CORE), UNKOWN)
|
||||
endif
|
||||
ifeq ($(NOFORTRAN), 1)
|
||||
$(error OpenBLAS: Detecting fortran compiler failed. Please install fortran compiler, e.g. gfortran, ifort, openf90.)
|
||||
endif
|
||||
ifeq ($(NO_STATIC), 1)
|
||||
ifeq ($(NO_SHARED), 1)
|
||||
$(error OpenBLAS: neither static nor shared are enabled.)
|
||||
endif
|
||||
endif
|
||||
@-ln -fs $(LIBNAME) $(LIBPREFIX).$(LIBSUFFIX)
|
||||
@for d in $(SUBDIRS) ; \
|
||||
@@ -144,7 +153,7 @@ endif
|
||||
ifeq ($(EXPRECISION), 1)
|
||||
@echo "#define EXPRECISION">> config_last.h
|
||||
endif
|
||||
##
|
||||
##
|
||||
ifeq ($(DYNAMIC_ARCH), 1)
|
||||
@$(MAKE) -C kernel commonlibs || exit 1
|
||||
@for d in $(DYNAMIC_CORE) ; \
|
||||
@@ -178,7 +187,7 @@ blas :
|
||||
fi; \
|
||||
done
|
||||
|
||||
hpl :
|
||||
hpl :
|
||||
ln -fs $(LIBNAME) $(LIBPREFIX).$(LIBSUFFIX)
|
||||
for d in $(BLASDIRS) ../laswp exports ; \
|
||||
do if test -d $$d; then \
|
||||
@@ -201,12 +210,13 @@ hpl_p :
|
||||
done
|
||||
|
||||
ifeq ($(NO_LAPACK), 1)
|
||||
netlib :
|
||||
netlib :
|
||||
|
||||
else
|
||||
netlib : lapack_prebuild
|
||||
ifndef NOFORTRAN
|
||||
@$(MAKE) -C $(NETLIB_LAPACK_DIR) lapacklib
|
||||
@$(MAKE) -C $(NETLIB_LAPACK_DIR) tmglib
|
||||
endif
|
||||
ifndef NO_LAPACKE
|
||||
@$(MAKE) -C $(NETLIB_LAPACK_DIR) lapackelib
|
||||
@@ -230,46 +240,28 @@ ifndef NOFORTRAN
|
||||
-@echo "ARCHFLAGS = -ru" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@echo "RANLIB = $(RANLIB)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@echo "LAPACKLIB = ../$(LIBNAME)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@echo "TMGLIB = ../$(LIBNAME)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@echo "BLASLIB = ../../../$(LIBNAME)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@echo "LAPACKELIB = ../$(LIBNAME)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@echo "LAPACKLIB_P = ../$(LIBNAME_P)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@echo "SUFFIX = $(SUFFIX)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@echo "PSUFFIX = $(PSUFFIX)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@echo "CEXTRALIB = $(EXTRALIB)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
ifeq ($(FC), gfortran)
|
||||
-@echo "TIMER = INT_ETIME" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
ifdef SMP
|
||||
-@echo "LOADER = $(FC) -pthread" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
else
|
||||
-@echo "LOADER = $(FC)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
endif
|
||||
else
|
||||
-@echo "TIMER = NONE" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@echo "LOADER = $(FC)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
endif
|
||||
-@cat make.inc >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
endif
|
||||
|
||||
lapack-3.4.2 : lapack-3.4.2.tgz
|
||||
ifndef NOFORTRAN
|
||||
ifndef NO_LAPACK
|
||||
@if test `$(MD5SUM) $< | $(AWK) '{print $$1}'` = 61bf1a8a4469d4bdb7604f5897179478; then \
|
||||
echo $(TAR) zxf $< ;\
|
||||
$(TAR) zxf $< && (cd $(NETLIB_LAPACK_DIR); $(PATCH) -p1 < ../patch.for_lapack-3.4.2) ;\
|
||||
rm -f $(NETLIB_LAPACK_DIR)/lapacke/make.inc ;\
|
||||
else \
|
||||
rm -rf $(NETLIB_LAPACK_DIR) ;\
|
||||
echo " Cannot download lapack-3.4.2.tgz or the MD5 check sum is wrong (Please use orignal)."; \
|
||||
exit 1; \
|
||||
fi
|
||||
endif
|
||||
endif
|
||||
|
||||
LAPACK_URL=http://www.netlib.org/lapack/lapack-3.4.2.tgz
|
||||
|
||||
lapack-3.4.2.tgz :
|
||||
ifndef NOFORTRAN
|
||||
#http://stackoverflow.com/questions/7656425/makefile-ifeq-logical-or
|
||||
ifeq ($(OSNAME), $(filter $(OSNAME),Darwin NetBSD))
|
||||
curl -O $(LAPACK_URL);
|
||||
else
|
||||
ifeq ($(OSNAME), FreeBSD)
|
||||
fetch $(LAPACK_URL);
|
||||
else
|
||||
wget -O $@ $(LAPACK_URL);
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
large.tgz :
|
||||
large.tgz :
|
||||
ifndef NOFORTRAN
|
||||
if [ ! -a $< ]; then
|
||||
-wget http://www.netlib.org/lapack/timing/large.tgz;
|
||||
@@ -287,17 +279,20 @@ lapack-timing : large.tgz timing.tgz
|
||||
ifndef NOFORTRAN
|
||||
(cd $(NETLIB_LAPACK_DIR); $(TAR) zxf ../timing.tgz TIMING)
|
||||
(cd $(NETLIB_LAPACK_DIR)/TIMING; $(TAR) zxf ../../large.tgz )
|
||||
make -C $(NETLIB_LAPACK_DIR) tmglib
|
||||
make -C $(NETLIB_LAPACK_DIR)/TIMING
|
||||
endif
|
||||
|
||||
|
||||
lapack-test :
|
||||
$(MAKE) -C $(NETLIB_LAPACK_DIR) tmglib
|
||||
$(MAKE) -C $(NETLIB_LAPACK_DIR)/TESTING xeigtstc xeigtstd xeigtsts xeigtstz xlintstc xlintstd xlintstds xlintsts xlintstz xlintstzc
|
||||
@rm -f $(NETLIB_LAPACK_DIR)/TESTING/*.out
|
||||
$(MAKE) -j 1 -C $(NETLIB_LAPACK_DIR)/TESTING
|
||||
$(GREP) failed $(NETLIB_LAPACK_DIR)/TESTING/*.out
|
||||
(cd $(NETLIB_LAPACK_DIR)/TESTING && rm -f x* *.out)
|
||||
make -j 1 -C $(NETLIB_LAPACK_DIR)/TESTING xeigtstc xeigtstd xeigtsts xeigtstz xlintstc xlintstd xlintstds xlintstrfd xlintstrfz xlintsts xlintstz xlintstzc xlintstrfs xlintstrfc
|
||||
(cd $(NETLIB_LAPACK_DIR); ./lapack_testing.py -r )
|
||||
|
||||
blas-test:
|
||||
(cd $(NETLIB_LAPACK_DIR)/BLAS && rm -f x* *.out)
|
||||
make -j 1 -C $(NETLIB_LAPACK_DIR) blas_testing
|
||||
(cd $(NETLIB_LAPACK_DIR)/BLAS && cat *.out)
|
||||
|
||||
|
||||
dummy :
|
||||
|
||||
@@ -323,4 +318,5 @@ endif
|
||||
@$(MAKE) -C $(NETLIB_LAPACK_DIR) clean
|
||||
@rm -f $(NETLIB_LAPACK_DIR)/make.inc $(NETLIB_LAPACK_DIR)/lapacke/include/lapacke_mangling.h
|
||||
@rm -f *.grd Makefile.conf_last config_last.h
|
||||
@(cd $(NETLIB_LAPACK_DIR)/TESTING && rm -f x* *.out testing_results.txt)
|
||||
@echo Done.
|
||||
|
||||
@@ -50,7 +50,7 @@ endif
|
||||
|
||||
ifndef SMP
|
||||
LIBCXML = -lcxml -lots -lm
|
||||
LIBATLAS = -L/usr/lib/atlas3.7.8 -lf77blas -latlas -lm
|
||||
LIBATLAS = -L/usr/lib/atlas3.7.8 -lf77blas -latlas -lm
|
||||
else
|
||||
LIBCXML = -lcxmlp -lots -lm
|
||||
LIBATLAS = -L/usr/lib/atlas3.7.8p -llapack -lptcblas -lptf77blas -latlas -lpthread -lm
|
||||
|
||||
11
Makefile.arm
11
Makefile.arm
@@ -1,3 +1,8 @@
|
||||
# ifeq logical or
|
||||
ifeq ($(CORE), $(filter $(CORE),CORTEXA9 CORTEXA15))
|
||||
CCOMMON_OPT += -marm -mfpu=vfpv3 -mfloat-abi=hard -march=armv7-a
|
||||
FCOMMON_OPT += -marm -mfpu=vfpv3 -mfloat-abi=hard -march=armv7-a
|
||||
endif
|
||||
|
||||
ifeq ($(CORE), ARMV7)
|
||||
CCOMMON_OPT += -marm -mfpu=vfpv3 -mfloat-abi=hard -march=armv7-a
|
||||
@@ -10,3 +15,9 @@ FCOMMON_OPT += -marm -mfpu=vfp -mfloat-abi=hard -march=armv6
|
||||
endif
|
||||
|
||||
|
||||
ifeq ($(CORE), ARMV5)
|
||||
CCOMMON_OPT += -marm -mfpu=vfp -mfloat-abi=hard -march=armv6
|
||||
FCOMMON_OPT += -marm -mfpu=vfp -mfloat-abi=hard -march=armv6
|
||||
endif
|
||||
|
||||
|
||||
|
||||
@@ -16,7 +16,7 @@ LIBMLIB = ../../level1/others/libmisc.a -L/opt/intel/fc/ia64/9.1.040/lib -L/opt
|
||||
LIBSCSL = -L/opt/scsl/1.4.1.0/lib -Wl,-rpath,/opt/scsl/1.4.1.0/lib -lscs
|
||||
|
||||
ifndef SMP
|
||||
LIBATLAS = -L/usr/lib/atlas3.6.0 -lf77blas -latlas -lm
|
||||
LIBATLAS = -L/usr/lib/atlas3.6.0 -lf77blas -latlas -lm
|
||||
else
|
||||
LIBATLAS = -L$(HOME)/misc/lib -L/usr/lib/atlas3.6.0p -llapack -lptcblas -lptf77blas -latlas -lpthread -lm
|
||||
endif
|
||||
|
||||
@@ -7,7 +7,10 @@ PREFIX ?= /opt/OpenBLAS
|
||||
|
||||
OPENBLAS_INCLUDE_DIR := $(PREFIX)/include
|
||||
OPENBLAS_LIBRARY_DIR := $(PREFIX)/lib
|
||||
OPENBLAS_BINARY_DIR := $(PREFIX)/bin
|
||||
OPENBLAS_BUILD_DIR := $(CURDIR)
|
||||
OPENBLAS_CMAKE_DIR := $(OPENBLAS_LIBRARY_DIR)/cmake/openblas
|
||||
OPENBLAS_CMAKE_CONFIG := OpenBLASConfig.cmake
|
||||
|
||||
.PHONY : install
|
||||
.NOTPARALLEL : install
|
||||
@@ -19,11 +22,13 @@ install : lib.grd
|
||||
@-mkdir -p $(DESTDIR)$(PREFIX)
|
||||
@-mkdir -p $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)
|
||||
@-mkdir -p $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)
|
||||
@-mkdir -p $(DESTDIR)$(OPENBLAS_BINARY_DIR)
|
||||
@-mkdir -p $(DESTDIR)$(OPENBLAS_CMAKE_DIR)
|
||||
@echo Generating openblas_config.h in $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)
|
||||
#for inc
|
||||
#for inc
|
||||
@echo \#ifndef OPENBLAS_CONFIG_H > $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/openblas_config.h
|
||||
@echo \#define OPENBLAS_CONFIG_H >> $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/openblas_config.h
|
||||
@awk '{print $$1, "OPENBLAS_"$$2, $$3}' config_last.h >> $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/openblas_config.h
|
||||
@awk 'NF {print $$1, "OPENBLAS_"$$2, $$3}' config_last.h >> $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/openblas_config.h
|
||||
@echo \#define OPENBLAS_VERSION \" OpenBLAS $(VERSION) \" >> $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/openblas_config.h
|
||||
@cat openblas_config_template.h >> $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/openblas_config.h
|
||||
@echo \#endif \/\* OPENBLAS_CONFIG_H \*\/ >> $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/openblas_config.h
|
||||
@@ -41,19 +46,21 @@ ifndef NO_CBLAS
|
||||
endif
|
||||
|
||||
ifndef NO_LAPACKE
|
||||
@echo Copying LAPACKE header files to $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)
|
||||
@-install -pDm644 $(NETLIB_LAPACK_DIR)/lapacke/include/lapacke.h $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke.h
|
||||
@-install -pDm644 $(NETLIB_LAPACK_DIR)/lapacke/include/lapacke_config.h $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke_config.h
|
||||
@-install -pDm644 $(NETLIB_LAPACK_DIR)/lapacke/include/lapacke_mangling_with_flags.h $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke_mangling.h
|
||||
@-install -pDm644 $(NETLIB_LAPACK_DIR)/lapacke/include/lapacke_utils.h $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke_utils.h
|
||||
@echo Copying LAPACKE header files to $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)
|
||||
@-install -pm644 $(NETLIB_LAPACK_DIR)/lapacke/include/lapacke.h $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke.h
|
||||
@-install -pm644 $(NETLIB_LAPACK_DIR)/lapacke/include/lapacke_config.h $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke_config.h
|
||||
@-install -pm644 $(NETLIB_LAPACK_DIR)/lapacke/include/lapacke_mangling_with_flags.h $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke_mangling.h
|
||||
@-install -pm644 $(NETLIB_LAPACK_DIR)/lapacke/include/lapacke_utils.h $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke_utils.h
|
||||
endif
|
||||
|
||||
#for install static library
|
||||
#for install static library
|
||||
ifndef NO_STATIC
|
||||
@echo Copying the static library to $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)
|
||||
@install -pm644 $(LIBNAME) $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)
|
||||
@cd $(DESTDIR)$(OPENBLAS_LIBRARY_DIR) ; \
|
||||
ln -fs $(LIBNAME) $(LIBPREFIX).$(LIBSUFFIX)
|
||||
#for install shared library
|
||||
endif
|
||||
#for install shared library
|
||||
ifndef NO_SHARED
|
||||
@echo Copying the shared library to $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)
|
||||
ifeq ($(OSNAME), Linux)
|
||||
@@ -72,18 +79,38 @@ ifeq ($(OSNAME), NetBSD)
|
||||
@cd $(DESTDIR)$(OPENBLAS_LIBRARY_DIR) ; \
|
||||
ln -fs $(LIBSONAME) $(LIBPREFIX).so
|
||||
endif
|
||||
ifeq ($(OSNAME), Darwin)
|
||||
ifeq ($(OSNAME), Darwin)
|
||||
@-cp $(LIBDYNNAME) $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)
|
||||
@-install_name_tool -id $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)/$(LIBDYNNAME) $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)/$(LIBDYNNAME)
|
||||
@-ln -fs $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)/$(LIBDYNNAME) $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)/$(LIBPREFIX).dylib
|
||||
@cd $(DESTDIR)$(OPENBLAS_LIBRARY_DIR) ; \
|
||||
ln -fs $(LIBDYNNAME) $(LIBPREFIX).dylib
|
||||
endif
|
||||
ifeq ($(OSNAME), WINNT)
|
||||
@-cp $(LIBDLLNAME) $(OPENBLAS_LIBRARY_DIR)
|
||||
@-cp $(LIBDLLNAME) $(OPENBLAS_BINARY_DIR)
|
||||
@-cp $(LIBDLLNAME).a $(OPENBLAS_LIBRARY_DIR)
|
||||
endif
|
||||
ifeq ($(OSNAME), CYGWIN_NT)
|
||||
@-cp $(LIBDLLNAME) $(OPENBLAS_LIBRARY_DIR)
|
||||
@-cp $(LIBDLLNAME) $(OPENBLAS_BINARY_DIR)
|
||||
endif
|
||||
endif
|
||||
|
||||
#Generating OpenBLASConfig.cmake
|
||||
@echo Generating $(OPENBLAS_CMAKE_CONFIG) in $(DESTDIR)$(OPENBLAS_CMAKE_DIR)
|
||||
@echo "SET(OpenBLAS_VERSION \"${VERSION}\")" > $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)
|
||||
@echo "SET(OpenBLAS_INCLUDE_DIRS ${OPENBLAS_INCLUDE_DIR})" >> $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)
|
||||
ifndef NO_SHARED
|
||||
#ifeq logical or
|
||||
ifeq ($(OSNAME), $(filter $(OSNAME),Linux FreeBSD NetBSD))
|
||||
@echo "SET(OpenBLAS_LIBRARIES ${OPENBLAS_LIBRARY_DIR}/$(LIBPREFIX).so)" >> $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)
|
||||
endif
|
||||
ifeq ($(OSNAME), $(filter $(OSNAME),WINNT CYGWIN_NT))
|
||||
@echo "SET(OpenBLAS_LIBRARIES ${OPENBLAS_BINARY_DIR}/$(LIBDLLNAME))" >> $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)
|
||||
endif
|
||||
ifeq ($(OSNAME), Darwin)
|
||||
@echo "SET(OpenBLAS_LIBRARIES ${OPENBLAS_LIBRARY_DIR}/$(LIBPREFIX).dylib)" >> $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)
|
||||
endif
|
||||
else
|
||||
#only static
|
||||
@echo "SET(OpenBLAS_LIBRARIES ${OPENBLAS_LIBRARY_DIR}/$(LIBPREFIX).$(LIBSUFFIX))" >> $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)
|
||||
endif
|
||||
@echo Install OK!
|
||||
|
||||
|
||||
@@ -5,7 +5,7 @@ FLAMEPATH = $(HOME)/flame/lib
|
||||
#ifeq ($(CORE), CELL)
|
||||
#CELL_SDK_ROOT = /opt/IBM/cell-sdk-1.1/sysroot/usr
|
||||
#SPU_CC = spu-gcc
|
||||
#EXTRALIB += -lspe
|
||||
#EXTRALIB += -lspe
|
||||
#endif
|
||||
|
||||
ifeq ($(OSNAME), Linux)
|
||||
@@ -38,7 +38,7 @@ ASFLAGS = -a32
|
||||
endif
|
||||
endif
|
||||
|
||||
# CCOMMON_OPT += -maltivec -mabi=altivec
|
||||
# CCOMMON_OPT += -maltivec -mabi=altivec
|
||||
|
||||
LIBFLAME = -L$(FLAMEPATH) -llapack2flame -lflame-lapack -lflame-base $(LIBS)
|
||||
|
||||
@@ -57,7 +57,7 @@ endif
|
||||
|
||||
LIBVECLIB = -framework VecLib
|
||||
ifndef SMP
|
||||
LIBATLAS = -L/usr/lib/atlas3.7.11 -lf77blas -latlas -lg2c -lm
|
||||
LIBATLAS = -L/usr/lib/atlas3.7.11 -lf77blas -latlas -lg2c -lm
|
||||
LIBESSL = -lessl $(ESSLPATH) ../../level1/others/libmisc.a -lm
|
||||
else
|
||||
LIBATLAS = -L/usr/lib/atlas3.7.11p -lptf77blas -latlas -lm -lpthread
|
||||
@@ -73,7 +73,7 @@ endif
|
||||
LIBVECLIB = /System/Library/Frameworks/vecLib.framework/Versions/Current/vecLib
|
||||
|
||||
ifndef SMP
|
||||
LIBATLAS = -L/usr/lib64/atlas3.7.11 -lf77blas -latlas -lg2c -lm
|
||||
LIBATLAS = -L/usr/lib64/atlas3.7.11 -lf77blas -latlas -lg2c -lm
|
||||
LIBESSL = -lessl $(ESSLPATH) -lm
|
||||
else
|
||||
LIBATLAS = -L/usr/lib64/atlas3.7.11p -lptf77blas -latlas -lm -lpthread
|
||||
|
||||
@@ -1,12 +1,12 @@
|
||||
#
|
||||
# Beginning of user configuration
|
||||
# Beginning of user configuration
|
||||
#
|
||||
|
||||
# This library's version
|
||||
VERSION = 0.2.8
|
||||
VERSION = 0.2.14
|
||||
|
||||
# If you set the suffix, the library name will be libopenblas_$(LIBNAMESUFFIX).a
|
||||
# and libopenblas_$(LIBNAMESUFFIX).so. Meanwhile, the soname in shared library
|
||||
# and libopenblas_$(LIBNAMESUFFIX).so. Meanwhile, the soname in shared library
|
||||
# is libopenblas_$(LIBNAMESUFFIX).so.0.
|
||||
# LIBNAMESUFFIX = omp
|
||||
|
||||
@@ -25,9 +25,20 @@ VERSION = 0.2.8
|
||||
# FC = gfortran
|
||||
|
||||
# Even you can specify cross compiler. Meanwhile, please set HOSTCC.
|
||||
|
||||
# cross compiler for Windows
|
||||
# CC = x86_64-w64-mingw32-gcc
|
||||
# FC = x86_64-w64-mingw32-gfortran
|
||||
|
||||
# cross compiler for 32bit ARM
|
||||
# CC = arm-linux-gnueabihf-gcc
|
||||
# FC = arm-linux-gnueabihf-gfortran
|
||||
|
||||
# cross compiler for 64bit ARM
|
||||
# CC = aarch64-linux-gnu-gcc
|
||||
# FC = aarch64-linux-gnu-gfortran
|
||||
|
||||
|
||||
# If you use the cross compiler, please set this host compiler.
|
||||
# HOSTCC = gcc
|
||||
|
||||
@@ -48,17 +59,20 @@ VERSION = 0.2.8
|
||||
# automatically detected by the the script.
|
||||
# NUM_THREADS = 24
|
||||
|
||||
# if you don't need to install the static library, please comment it in.
|
||||
# NO_STATIC = 1
|
||||
|
||||
# if you don't need generate the shared library, please comment it in.
|
||||
# NO_SHARED = 1
|
||||
|
||||
# If you don't need CBLAS interface, please comment it in.
|
||||
# NO_CBLAS = 1
|
||||
|
||||
# If you only want CBLAS interface without installing Fortran compiler,
|
||||
# If you only want CBLAS interface without installing Fortran compiler,
|
||||
# please comment it in.
|
||||
# ONLY_CBLAS = 1
|
||||
|
||||
# If you don't need LAPACK, please comment it in.
|
||||
# If you don't need LAPACK, please comment it in.
|
||||
# If you set NO_LAPACK=1, the library automatically sets NO_LAPACKE=1.
|
||||
# NO_LAPACK = 1
|
||||
|
||||
@@ -76,15 +90,21 @@ VERSION = 0.2.8
|
||||
# Unfortunately most of kernel won't give us high quality buffer.
|
||||
# BLAS tries to find the best region before entering main function,
|
||||
# but it will consume time. If you don't like it, you can disable one.
|
||||
# NO_WARMUP = 1
|
||||
NO_WARMUP = 1
|
||||
|
||||
# If you want to disable CPU/Memory affinity on Linux.
|
||||
# NO_AFFINITY = 1
|
||||
NO_AFFINITY = 1
|
||||
|
||||
# Don't use AVX kernel on Sandy Bridge. It is compatible with old compilers
|
||||
# if you are compiling for Linux and you have more than 16 numa nodes or more than 256 cpus
|
||||
# BIGNUMA = 1
|
||||
|
||||
# Don't use AVX kernel on Sandy Bridge. It is compatible with old compilers
|
||||
# and OS. However, the performance is low.
|
||||
# NO_AVX = 1
|
||||
|
||||
# Don't use Haswell optimizations if binutils is too old (e.g. RHEL6)
|
||||
# NO_AVX2 = 1
|
||||
|
||||
# Don't use parallel make.
|
||||
# NO_PARALLEL_MAKE = 1
|
||||
|
||||
@@ -109,8 +129,8 @@ VERSION = 0.2.8
|
||||
# If you need to synchronize FP CSR between threads (for x86/x86_64 only).
|
||||
# CONSISTENT_FPCSR = 1
|
||||
|
||||
# If any gemm arguement m, n or k is less or equal this threshold, gemm will be execute
|
||||
# with single thread. You can use this flag to avoid the overhead of multi-threading
|
||||
# If any gemm arguement m, n or k is less or equal this threshold, gemm will be execute
|
||||
# with single thread. You can use this flag to avoid the overhead of multi-threading
|
||||
# in small matrix sizes. The default value is 4.
|
||||
# GEMM_MULTITHREAD_THRESHOLD = 4
|
||||
|
||||
@@ -125,16 +145,33 @@ VERSION = 0.2.8
|
||||
# The installation directory.
|
||||
# PREFIX = /opt/OpenBLAS
|
||||
|
||||
# Common Optimization Flag;
|
||||
# Common Optimization Flag;
|
||||
# The default -O2 is enough.
|
||||
# COMMON_OPT = -O2
|
||||
|
||||
# gfortran option for LAPACK
|
||||
# enable this flag only on 64bit Linux and if you need a thread safe lapack library
|
||||
# FCOMMON_OPT = -frecursive
|
||||
|
||||
# Profiling flags
|
||||
COMMON_PROF = -pg
|
||||
|
||||
# Build Debug version
|
||||
# DEBUG = 1
|
||||
|
||||
# Improve GEMV and GER for small matrices by stack allocation.
|
||||
# For details, https://github.com/xianyi/OpenBLAS/pull/482
|
||||
#
|
||||
# End of user configuration
|
||||
# MAX_STACK_ALLOC=2048
|
||||
|
||||
# Add a prefix or suffix to all exported symbol names in the shared library.
|
||||
# Avoid conflicts with other BLAS libraries, especially when using
|
||||
# 64 bit integer interfaces in OpenBLAS.
|
||||
# For details, https://github.com/xianyi/OpenBLAS/pull/459
|
||||
#
|
||||
# SYMBOLPREFIX=
|
||||
# SYMBOLSUFFIX=
|
||||
|
||||
#
|
||||
# End of user configuration
|
||||
#
|
||||
|
||||
@@ -27,7 +27,7 @@ LIBNAME = $(LIBPREFIX).a
|
||||
|
||||
ifndef SMP
|
||||
LIBCXML = -L/opt/SUNWspro/lib/v9
|
||||
LIBATLAS = -L$(HOME)/misc/lib -lf77blas -latlas -lm
|
||||
LIBATLAS = -L$(HOME)/misc/lib -lf77blas -latlas -lm
|
||||
else
|
||||
LIBCXML = -lcxmlp -lots -lm
|
||||
endif
|
||||
|
||||
166
Makefile.system
166
Makefile.system
@@ -35,7 +35,7 @@ include $(TOPDIR)/$(MAKEFILE_RULE)
|
||||
endif
|
||||
|
||||
#
|
||||
# Beginning of system configuration
|
||||
# Beginning of system configuration
|
||||
#
|
||||
|
||||
ifndef HOSTCC
|
||||
@@ -46,25 +46,79 @@ ifdef TARGET
|
||||
GETARCH_FLAGS := -DFORCE_$(TARGET)
|
||||
endif
|
||||
|
||||
# Force fallbacks for 32bit
|
||||
|
||||
ifeq ($(BINARY), 32)
|
||||
ifeq ($(TARGET), HASWELL)
|
||||
GETARCH_FLAGS := -DFORCE_NEHALEM
|
||||
endif
|
||||
ifeq ($(TARGET), SANDYBRIDGE)
|
||||
GETARCH_FLAGS := -DFORCE_NEHALEM
|
||||
endif
|
||||
ifeq ($(TARGET), BULLDOZER)
|
||||
GETARCH_FLAGS := -DFORCE_BARCELONA
|
||||
endif
|
||||
ifeq ($(TARGET), PILEDRIVER)
|
||||
GETARCH_FLAGS := -DFORCE_BARCELONA
|
||||
endif
|
||||
ifeq ($(TARGET), STEAMROLLER)
|
||||
GETARCH_FLAGS := -DFORCE_BARCELONA
|
||||
endif
|
||||
endif
|
||||
|
||||
|
||||
#TARGET_CORE will override TARGET which is used in DYNAMIC_ARCH=1.
|
||||
#
|
||||
ifdef TARGET_CORE
|
||||
GETARCH_FLAGS := -DFORCE_$(TARGET_CORE)
|
||||
endif
|
||||
|
||||
# Force fallbacks for 32bit
|
||||
|
||||
ifeq ($(BINARY), 32)
|
||||
ifeq ($(TARGET_CORE), HASWELL)
|
||||
GETARCH_FLAGS := -DFORCE_NEHALEM
|
||||
endif
|
||||
ifeq ($(TARGET_CORE), SANDYBRIDGE)
|
||||
GETARCH_FLAGS := -DFORCE_NEHALEM
|
||||
endif
|
||||
ifeq ($(TARGET_CORE), BULLDOZER)
|
||||
GETARCH_FLAGS := -DFORCE_BARCELONA
|
||||
endif
|
||||
ifeq ($(TARGET_CORE), PILEDRIVER)
|
||||
GETARCH_FLAGS := -DFORCE_BARCELONA
|
||||
endif
|
||||
ifeq ($(TARGET_CORE), STEAMROLLER)
|
||||
GETARCH_FLAGS := -DFORCE_BARCELONA
|
||||
endif
|
||||
endif
|
||||
|
||||
|
||||
|
||||
|
||||
ifdef INTERFACE64
|
||||
ifneq ($(INTERFACE64), 0)
|
||||
GETARCH_FLAGS += -DUSE64BITINT
|
||||
endif
|
||||
endif
|
||||
|
||||
ifndef GEMM_MULTITHREAD_THRESHOLD
|
||||
GEMM_MULTITHREAD_THRESHOLD=4
|
||||
endif
|
||||
GETARCH_FLAGS += -DGEMM_MULTITHREAD_THRESHOLD=$(GEMM_MULTITHREAD_THRESHOLD)
|
||||
GETARCH_FLAGS += -DGEMM_MULTITHREAD_THRESHOLD=$(GEMM_MULTITHREAD_THRESHOLD)
|
||||
|
||||
ifeq ($(NO_AVX), 1)
|
||||
GETARCH_FLAGS += -DNO_AVX
|
||||
endif
|
||||
|
||||
ifeq ($(BINARY), 32)
|
||||
GETARCH_FLAGS += -DNO_AVX
|
||||
endif
|
||||
|
||||
ifeq ($(NO_AVX2), 1)
|
||||
GETARCH_FLAGS += -DNO_AVX2
|
||||
endif
|
||||
|
||||
ifeq ($(DEBUG), 1)
|
||||
GETARCH_FLAGS += -g
|
||||
endif
|
||||
@@ -138,6 +192,8 @@ LD = $(CROSS_SUFFIX)ld
|
||||
RANLIB = $(CROSS_SUFFIX)ranlib
|
||||
NM = $(CROSS_SUFFIX)nm
|
||||
DLLWRAP = $(CROSS_SUFFIX)dllwrap
|
||||
OBJCOPY = $(CROSS_SUFFIX)objcopy
|
||||
OBJCONV = $(CROSS_SUFFIX)objconv
|
||||
|
||||
#
|
||||
# OS dependent settings
|
||||
@@ -158,6 +214,7 @@ endif
|
||||
|
||||
ifeq ($(OSNAME), Linux)
|
||||
EXTRALIB += -lm
|
||||
NO_EXPRECISION = 1
|
||||
endif
|
||||
|
||||
ifeq ($(OSNAME), AIX)
|
||||
@@ -185,14 +242,14 @@ GCCVERSIONGT4 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \> 4)
|
||||
GCCMINORVERSIONGTEQ7 := $(shell expr `$(CC) -dumpversion | cut -f2 -d.` \>= 7)
|
||||
ifeq ($(GCCVERSIONGT4), 1)
|
||||
# GCC Majar version > 4
|
||||
# It is compatible with MSVC ABI.
|
||||
# It is compatible with MSVC ABI.
|
||||
CCOMMON_OPT += -DMS_ABI
|
||||
endif
|
||||
|
||||
ifeq ($(GCCVERSIONGTEQ4), 1)
|
||||
ifeq ($(GCCMINORVERSIONGTEQ7), 1)
|
||||
# GCC Version >=4.7
|
||||
# It is compatible with MSVC ABI.
|
||||
# It is compatible with MSVC ABI.
|
||||
CCOMMON_OPT += -DMS_ABI
|
||||
endif
|
||||
endif
|
||||
@@ -254,6 +311,10 @@ ifdef SANITY_CHECK
|
||||
CCOMMON_OPT += -DSANITY_CHECK -DREFNAME=$(*F)f$(BU)
|
||||
endif
|
||||
|
||||
ifdef MAX_STACK_ALLOC
|
||||
CCOMMON_OPT += -DMAX_STACK_ALLOC=$(MAX_STACK_ALLOC)
|
||||
endif
|
||||
|
||||
#
|
||||
# Architecture dependent settings
|
||||
#
|
||||
@@ -272,7 +333,7 @@ FCOMMON_OPT += -m128bit-long-double
|
||||
endif
|
||||
ifeq ($(C_COMPILER), CLANG)
|
||||
EXPRECISION = 1
|
||||
CCOMMON_OPT += -DEXPRECISION
|
||||
CCOMMON_OPT += -DEXPRECISION
|
||||
FCOMMON_OPT += -m128bit-long-double
|
||||
endif
|
||||
endif
|
||||
@@ -301,7 +362,14 @@ ifeq ($(C_COMPILER), INTEL)
|
||||
CCOMMON_OPT += -wd981
|
||||
endif
|
||||
|
||||
|
||||
ifeq ($(USE_OPENMP), 1)
|
||||
|
||||
#check
|
||||
ifeq ($(USE_THREAD), 0)
|
||||
$(error OpenBLAS: Cannot set both USE_OPENMP=1 and USE_THREAD=0. The USE_THREAD=0 is only for building single thread version.)
|
||||
endif
|
||||
|
||||
# ifeq logical or. GCC or LSB
|
||||
ifeq ($(C_COMPILER), $(filter $(C_COMPILER),GCC LSB))
|
||||
CCOMMON_OPT += -fopenmp
|
||||
@@ -335,15 +403,15 @@ ifeq ($(DYNAMIC_ARCH), 1)
|
||||
ifeq ($(ARCH), x86)
|
||||
DYNAMIC_CORE = KATMAI COPPERMINE NORTHWOOD PRESCOTT BANIAS \
|
||||
CORE2 PENRYN DUNNINGTON NEHALEM ATHLON OPTERON OPTERON_SSE3 BARCELONA BOBCAT ATOM NANO
|
||||
ifneq ($(NO_AVX), 1)
|
||||
DYNAMIC_CORE += SANDYBRIDGE BULLDOZER PILEDRIVER HASWELL
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(ARCH), x86_64)
|
||||
DYNAMIC_CORE = PRESCOTT CORE2 PENRYN DUNNINGTON NEHALEM OPTERON OPTERON_SSE3 BARCELONA BOBCAT ATOM NANO
|
||||
ifneq ($(NO_AVX), 1)
|
||||
DYNAMIC_CORE += SANDYBRIDGE BULLDOZER PILEDRIVER HASWELL
|
||||
DYNAMIC_CORE += SANDYBRIDGE BULLDOZER PILEDRIVER STEAMROLLER
|
||||
endif
|
||||
ifneq ($(NO_AVX2), 1)
|
||||
DYNAMIC_CORE += HASWELL
|
||||
endif
|
||||
endif
|
||||
|
||||
@@ -413,12 +481,12 @@ endif
|
||||
BINARY_DEFINED = 1
|
||||
endif
|
||||
|
||||
ifeq ($(CORE), LOONGSON3A)
|
||||
ifeq ($(CORE), LOONGSON3A)
|
||||
CCOMMON_OPT += -march=mips64
|
||||
FCOMMON_OPT += -march=mips64
|
||||
endif
|
||||
|
||||
ifeq ($(CORE), LOONGSON3B)
|
||||
ifeq ($(CORE), LOONGSON3B)
|
||||
CCOMMON_OPT += -march=mips64
|
||||
FCOMMON_OPT += -march=mips64
|
||||
endif
|
||||
@@ -488,7 +556,7 @@ CCOMMON_OPT += -DF_INTERFACE_GFORT
|
||||
FCOMMON_OPT += -Wall
|
||||
#Don't include -lgfortran, when NO_LAPACK=1 or lsbcc
|
||||
ifneq ($(NO_LAPACK), 1)
|
||||
EXTRALIB += -lgfortran
|
||||
EXTRALIB += -lgfortran
|
||||
endif
|
||||
ifdef NO_BINARY_MODE
|
||||
ifeq ($(ARCH), mips64)
|
||||
@@ -502,8 +570,10 @@ else
|
||||
ifdef BINARY64
|
||||
FCOMMON_OPT += -m64
|
||||
ifdef INTERFACE64
|
||||
ifneq ($(INTERFACE64), 0)
|
||||
FCOMMON_OPT += -fdefault-integer-8
|
||||
endif
|
||||
endif
|
||||
else
|
||||
FCOMMON_OPT += -m32
|
||||
endif
|
||||
@@ -516,8 +586,10 @@ endif
|
||||
ifeq ($(F_COMPILER), INTEL)
|
||||
CCOMMON_OPT += -DF_INTERFACE_INTEL
|
||||
ifdef INTERFACE64
|
||||
ifneq ($(INTERFACE64), 0)
|
||||
FCOMMON_OPT += -i8
|
||||
endif
|
||||
endif
|
||||
ifdef USE_OPENMP
|
||||
FCOMMON_OPT += -openmp
|
||||
endif
|
||||
@@ -536,8 +608,10 @@ CCOMMON_OPT += -DF_INTERFACE_IBM
|
||||
ifdef BINARY64
|
||||
FCOMMON_OPT += -q64
|
||||
ifdef INTERFACE64
|
||||
ifneq ($(INTERFACE64), 0)
|
||||
FCOMMON_OPT += -qintsize=8
|
||||
endif
|
||||
endif
|
||||
else
|
||||
FCOMMON_OPT += -q32
|
||||
endif
|
||||
@@ -551,8 +625,10 @@ CCOMMON_OPT += -DF_INTERFACE_PGI
|
||||
COMMON_PROF += -DPGICOMPILER
|
||||
ifdef BINARY64
|
||||
ifdef INTERFACE64
|
||||
ifneq ($(INTERFACE64), 0)
|
||||
FCOMMON_OPT += -i8
|
||||
endif
|
||||
endif
|
||||
FCOMMON_OPT += -tp p7-64
|
||||
else
|
||||
FCOMMON_OPT += -tp p7
|
||||
@@ -566,9 +642,11 @@ ifeq ($(F_COMPILER), PATHSCALE)
|
||||
CCOMMON_OPT += -DF_INTERFACE_PATHSCALE
|
||||
ifdef BINARY64
|
||||
ifdef INTERFACE64
|
||||
ifneq ($(INTERFACE64), 0)
|
||||
FCOMMON_OPT += -i8
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
ifneq ($(ARCH), mips64)
|
||||
ifndef BINARY64
|
||||
@@ -593,9 +671,11 @@ ifeq ($(F_COMPILER), OPEN64)
|
||||
CCOMMON_OPT += -DF_INTERFACE_OPEN64
|
||||
ifdef BINARY64
|
||||
ifdef INTERFACE64
|
||||
ifneq ($(INTERFACE64), 0)
|
||||
FCOMMON_OPT += -i8
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(ARCH), mips64)
|
||||
ifndef BINARY64
|
||||
@@ -603,11 +683,11 @@ FCOMMON_OPT += -n32
|
||||
else
|
||||
FCOMMON_OPT += -n64
|
||||
endif
|
||||
ifeq ($(CORE), LOONGSON3A)
|
||||
ifeq ($(CORE), LOONGSON3A)
|
||||
FCOMMON_OPT += -loongson3 -static
|
||||
endif
|
||||
|
||||
ifeq ($(CORE), LOONGSON3B)
|
||||
ifeq ($(CORE), LOONGSON3B)
|
||||
FCOMMON_OPT += -loongson3 -static
|
||||
endif
|
||||
|
||||
@@ -633,11 +713,11 @@ CCOMMON_OPT += -n32
|
||||
else
|
||||
CCOMMON_OPT += -n64
|
||||
endif
|
||||
ifeq ($(CORE), LOONGSON3A)
|
||||
ifeq ($(CORE), LOONGSON3A)
|
||||
CCOMMON_OPT += -loongson3 -static
|
||||
endif
|
||||
|
||||
ifeq ($(CORE), LOONGSON3B)
|
||||
ifeq ($(CORE), LOONGSON3B)
|
||||
CCOMMON_OPT += -loongson3 -static
|
||||
endif
|
||||
|
||||
@@ -681,21 +761,23 @@ endif
|
||||
|
||||
ifdef BINARY64
|
||||
ifdef INTERFACE64
|
||||
CCOMMON_OPT +=
|
||||
ifneq ($(INTERFACE64), 0)
|
||||
CCOMMON_OPT +=
|
||||
#-DUSE64BITINT
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(NEED_PIC), 1)
|
||||
ifeq ($(C_COMPILER), IBM)
|
||||
CCOMMON_OPT += -qpic=large
|
||||
CCOMMON_OPT += -qpic=large
|
||||
else
|
||||
CCOMMON_OPT += -fPIC
|
||||
CCOMMON_OPT += -fPIC
|
||||
endif
|
||||
ifeq ($(F_COMPILER), SUN)
|
||||
FCOMMON_OPT += -pic
|
||||
else
|
||||
FCOMMON_OPT += -fPIC
|
||||
FCOMMON_OPT += -fPIC
|
||||
endif
|
||||
endif
|
||||
|
||||
@@ -717,6 +799,14 @@ ifeq ($(NO_AVX), 1)
|
||||
CCOMMON_OPT += -DNO_AVX
|
||||
endif
|
||||
|
||||
ifeq ($(ARCH), x86)
|
||||
CCOMMON_OPT += -DNO_AVX
|
||||
endif
|
||||
|
||||
ifeq ($(NO_AVX2), 1)
|
||||
CCOMMON_OPT += -DNO_AVX2
|
||||
endif
|
||||
|
||||
ifdef SMP
|
||||
CCOMMON_OPT += -DSMP_SERVER
|
||||
|
||||
@@ -732,6 +822,10 @@ ifeq ($(USE_OPENMP), 1)
|
||||
CCOMMON_OPT += -DUSE_OPENMP
|
||||
endif
|
||||
|
||||
ifeq ($(BIGNUMA), 1)
|
||||
CCOMMON_OPT += -DBIGNUMA
|
||||
endif
|
||||
|
||||
endif
|
||||
|
||||
ifeq ($(NO_WARMUP), 1)
|
||||
@@ -769,6 +863,14 @@ else
|
||||
LIBPREFIX = libopenblas_$(LIBNAMESUFFIX)
|
||||
endif
|
||||
|
||||
ifndef SYMBOLPREFIX
|
||||
SYMBOLPREFIX =
|
||||
endif
|
||||
|
||||
ifndef SYMBOLSUFFIX
|
||||
SYMBOLSUFFIX =
|
||||
endif
|
||||
|
||||
KERNELDIR = $(TOPDIR)/kernel/$(ARCH)
|
||||
|
||||
include $(TOPDIR)/Makefile.$(ARCH)
|
||||
@@ -846,19 +948,6 @@ ifeq ($(DEBUG), 1)
|
||||
COMMON_OPT += -g
|
||||
endif
|
||||
|
||||
ifndef COMMON_OPT
|
||||
ifeq ($(ARCH), arm)
|
||||
COMMON_OPT = -O3
|
||||
endif
|
||||
endif
|
||||
|
||||
ifndef COMMON_OPT
|
||||
ifeq ($(ARCH), arm64)
|
||||
COMMON_OPT = -O3
|
||||
endif
|
||||
endif
|
||||
|
||||
|
||||
ifndef COMMON_OPT
|
||||
COMMON_OPT = -O2
|
||||
endif
|
||||
@@ -872,14 +961,23 @@ override FPFLAGS += $(COMMON_OPT) $(FCOMMON_OPT) $(COMMON_PROF)
|
||||
#MAKEOVERRIDES =
|
||||
|
||||
#For LAPACK Fortran codes.
|
||||
#Disable -fopenmp for LAPACK Fortran codes on Windows.
|
||||
ifdef OS_WINDOWS
|
||||
LAPACK_FFLAGS := $(filter-out -fopenmp -mp -openmp -xopenmp=parallel,$(FFLAGS))
|
||||
LAPACK_FPFLAGS := $(filter-out -fopenmp -mp -openmp -xopenmp=parallel,$(FPFLAGS))
|
||||
else
|
||||
LAPACK_FFLAGS := $(FFLAGS)
|
||||
LAPACK_FPFLAGS := $(FPFLAGS)
|
||||
endif
|
||||
|
||||
LAPACK_CFLAGS = $(CFLAGS)
|
||||
LAPACK_CFLAGS += -DHAVE_LAPACK_CONFIG_H
|
||||
LAPACK_CFLAGS += -DHAVE_LAPACK_CONFIG_H
|
||||
ifdef INTERFACE64
|
||||
ifneq ($(INTERFACE64), 0)
|
||||
LAPACK_CFLAGS += -DLAPACK_ILP64
|
||||
endif
|
||||
endif
|
||||
|
||||
ifdef OS_WINDOWS
|
||||
LAPACK_CFLAGS += -DOPENBLAS_OS_WINDOWS
|
||||
endif
|
||||
|
||||
@@ -57,7 +57,7 @@ commonlibs :: $(COMMONOBJS)
|
||||
commonprof :: $(COMMONOBJS_P)
|
||||
$(AR) $(ARFLAGS) -ru $(TOPDIR)/$(LIBNAME_P) $^
|
||||
|
||||
quick :
|
||||
quick :
|
||||
$(MAKE) -C $(TOPDIR) libs
|
||||
|
||||
bms.$(SUFFIX):bm.c $(TOPDIR)/../bench/bmcommon.h
|
||||
@@ -386,7 +386,7 @@ kbench_rank_k: kbench_rank_k.$(SUFFIX) $(DBLASOBJS) $(COMMONOBJS) $(OBJS) $(LIBS
|
||||
smallbench: smallbench.$(SUFFIX) $(BLASOBJS) $(COMMONOBJS) $(OBJS) $(LIBS)
|
||||
$(CC) -o $(@F) $(CFLAGS) $^ $(EXTRALIB) $(CEXTRALIB)
|
||||
|
||||
smallbench.mkl: smallbench.$(SUFFIX)
|
||||
smallbench.mkl: smallbench.$(SUFFIX)
|
||||
$(CC) -o $(@F) $(CFLAGS) $^ $(LIBMKL) $(EXTRALIB) $(CEXTRALIB)
|
||||
|
||||
bench.sun: bench.$(SUFFIX) $(OBJS)
|
||||
@@ -410,7 +410,7 @@ bench.acml: bench.$(SUFFIX) $(OBJS)
|
||||
bench.flame: bench.$(SUFFIX) $(OBJS)
|
||||
$(CC) -o $(@F) $(CFLAGS) $^ $(LIBFLAME) $(EXTRALIB) $(CEXTRALIB)
|
||||
|
||||
kbench.mkl: kbench.$(SUFFIX) $(OBJS)
|
||||
kbench.mkl: kbench.$(SUFFIX) $(OBJS)
|
||||
$(CC) -static -o $(@F) $(CFLAGS) $^ $(LIBMKL) $(EXTRALIB) $(CEXTRALIB)
|
||||
|
||||
bench.mkl: bench.$(SUFFIX) $(OBJS)
|
||||
@@ -537,10 +537,10 @@ params.$(SUFFIX):param.c $(TOPDIR)/../bench/bmcommon.h
|
||||
$(CC) $(CFLAGS) -UDOUBLE -UCOMPLEX -c $< -o $(@F)
|
||||
|
||||
paramd.$(SUFFIX):param.c $(TOPDIR)/../bench/bmcommon.h
|
||||
$(CC) $(CFLAGS) -DDOUBLE -UCOMPLEX -c $< -o $(@F)
|
||||
$(CC) $(CFLAGS) -DDOUBLE -UCOMPLEX -c $< -o $(@F)
|
||||
|
||||
paramq.$(SUFFIX):param.c $(TOPDIR)/../bench/bmcommon.h
|
||||
$(CC) $(CFLAGS) -DXDOUBLE -UCOMPLEX -c $< -o $(@F)
|
||||
$(CC) $(CFLAGS) -DXDOUBLE -UCOMPLEX -c $< -o $(@F)
|
||||
|
||||
paramc.$(SUFFIX):paramz.c $(TOPDIR)/../bench/bmcommon.h
|
||||
$(CC) $(CFLAGS) -UDOUBLE -DCOMPLEX -c $< -o $(@F)
|
||||
@@ -555,10 +555,10 @@ params-ex.$(SUFFIX):param-ex.c $(TOPDIR)/../bench/bmcommon.h
|
||||
$(CC) $(CFLAGS) -UDOUBLE -UCOMPLEX -c $< -o $(@F)
|
||||
|
||||
paramd-ex.$(SUFFIX):param-ex.c $(TOPDIR)/../bench/bmcommon.h
|
||||
$(CC) $(CFLAGS) -DDOUBLE -UCOMPLEX -c $< -o $(@F)
|
||||
$(CC) $(CFLAGS) -DDOUBLE -UCOMPLEX -c $< -o $(@F)
|
||||
|
||||
paramq-ex.$(SUFFIX):param-ex.c $(TOPDIR)/../bench/bmcommon.h
|
||||
$(CC) $(CFLAGS) -DXDOUBLE -UCOMPLEX -c $< -o $(@F)
|
||||
$(CC) $(CFLAGS) -DXDOUBLE -UCOMPLEX -c $< -o $(@F)
|
||||
|
||||
paramc-ex.$(SUFFIX):paramz-ex.c $(TOPDIR)/../bench/bmcommon.h
|
||||
$(CC) $(CFLAGS) -UDOUBLE -DCOMPLEX -c $< -o $(@F)
|
||||
|
||||
@@ -14,7 +14,7 @@ endif
|
||||
# LIBMKL = -L$(MKLPATH)/32 -lmkl_lapack -lmkl_ia32 -lguide -lpthread -lm
|
||||
|
||||
ifndef SMP
|
||||
LIBATLAS = -L$(ATLAS) -lf77blas -latlas -lg2c -lm
|
||||
LIBATLAS = -L$(ATLAS) -lf77blas -latlas -lg2c -lm
|
||||
else
|
||||
LIBATLAS = -L$(ATLAS) -lptf77blas -latlas -lpthread -lg2c -lm
|
||||
endif
|
||||
@@ -50,7 +50,7 @@ LIBSUNPERF = -L/opt/SUNWspro/lib/sse2 -Wl,-R,/opt/SUNWspro/lib/sse2 -lsunperf
|
||||
LIBVECLIB = /System/Library/Frameworks/vecLib.framework/Versions/Current/vecLib
|
||||
|
||||
ifndef SMP
|
||||
LIBATLAS = -L$(ATLASPATH)/32 -lcblas -lf77blas -latlas -lm
|
||||
LIBATLAS = -L$(ATLASPATH)/32 -lcblas -lf77blas -latlas -lm
|
||||
else
|
||||
LIBATLAS = -L$(ATLASPATH)/32 -lptf77blas -lptatlas -lpthread -lm
|
||||
endif
|
||||
|
||||
@@ -28,7 +28,7 @@ endif
|
||||
|
||||
|
||||
ifndef SMP
|
||||
LIBATLAS = -L$(ATLASPATH)64 -llapack -lcblas -lf77blas -latlas -lm
|
||||
LIBATLAS = -L$(ATLASPATH)64 -llapack -lcblas -lf77blas -latlas -lm
|
||||
else
|
||||
LIBATLAS = -L$(ATLASPATH)64 -llapack -lptcblas -lptf77blas -latlas -lpthread -lm
|
||||
endif
|
||||
|
||||
32
README.md
32
README.md
@@ -3,7 +3,7 @@
|
||||
[](https://travis-ci.org/xianyi/OpenBLAS)
|
||||
|
||||
## Introduction
|
||||
OpenBLAS is an optimized BLAS library based on GotoBLAS2 1.13 BSD version.
|
||||
OpenBLAS is an optimized BLAS library based on GotoBLAS2 1.13 BSD version.
|
||||
|
||||
Please read the documents on OpenBLAS wiki pages <http://github.com/xianyi/OpenBLAS/wiki>.
|
||||
|
||||
@@ -55,16 +55,24 @@ Please read GotoBLAS_01Readme.txt
|
||||
|
||||
#### x86/x86-64:
|
||||
- **Intel Xeon 56xx (Westmere)**: Used GotoBLAS2 Nehalem codes.
|
||||
- **Intel Sandy Bridge**: Optimized Level-3 BLAS with AVX on x86-64.
|
||||
- **Intel Haswell**: Optimized Level-3 BLAS with AVX on x86-64 (identical to Sandy Bridge).
|
||||
- **Intel Sandy Bridge**: Optimized Level-3 and Level-2 BLAS with AVX on x86-64.
|
||||
- **Intel Haswell**: Optimized Level-3 and Level-2 BLAS with AVX2 and FMA on x86-64.
|
||||
- **AMD Bobcat**: Used GotoBLAS2 Barcelona codes.
|
||||
- **AMD Bulldozer**: x86-64 S/DGEMM AVX kernels. (Thank Werner Saar)
|
||||
- **AMD PILEDRIVER**: Used Bulldozer codes.
|
||||
- **AMD Bulldozer**: x86-64 ?GEMM FMA4 kernels. (Thank Werner Saar)
|
||||
- **AMD PILEDRIVER**: Uses Bulldozer codes with some optimizations.
|
||||
- **AMD STEAMROLLER**: Uses Bulldozer codes with some optimizations.
|
||||
|
||||
#### MIPS64:
|
||||
- **ICT Loongson 3A**: Optimized Level-3 BLAS and the part of Level-1,2.
|
||||
- **ICT Loongson 3B**: Experimental
|
||||
|
||||
#### ARM:
|
||||
- **ARMV6**: Optimized BLAS for vfpv2 and vfpv3-d16 ( e.g. BCM2835, Cortex M0+ )
|
||||
- **ARMV7**: Optimized BLAS for vfpv3-d32 ( e.g. Cortex A8, A9 and A15 )
|
||||
|
||||
#### ARM64:
|
||||
- **ARMV8**: Experimental
|
||||
|
||||
### Support OS:
|
||||
- **GNU/Linux**
|
||||
- **MingWin/Windows**: Please read <https://github.com/xianyi/OpenBLAS/wiki/How-to-use-OpenBLAS-in-Microsoft-Visual-Studio>.
|
||||
@@ -74,7 +82,7 @@ Please read GotoBLAS_01Readme.txt
|
||||
## Usages
|
||||
Link with libopenblas.a or -lopenblas for shared library.
|
||||
|
||||
### Set the number of threads with environment variables.
|
||||
### Set the number of threads with environment variables.
|
||||
|
||||
Examples:
|
||||
|
||||
@@ -84,7 +92,7 @@ Examples:
|
||||
|
||||
export GOTO_NUM_THREADS=4
|
||||
|
||||
or
|
||||
or
|
||||
|
||||
export OMP_NUM_THREADS=4
|
||||
|
||||
@@ -92,7 +100,7 @@ The priorities are OPENBLAS_NUM_THREADS > GOTO_NUM_THREADS > OMP_NUM_THREADS.
|
||||
|
||||
If you compile this lib with USE_OPENMP=1, you should set OMP_NUM_THREADS environment variable. OpenBLAS ignores OPENBLAS_NUM_THREADS and GOTO_NUM_THREADS with USE_OPENMP=1.
|
||||
|
||||
### Set the number of threads on runtime.
|
||||
### Set the number of threads on runtime.
|
||||
|
||||
We provided the below functions to control the number of threads on runtime.
|
||||
|
||||
@@ -116,12 +124,12 @@ Please see Changelog.txt to obtain the differences between GotoBLAS2 1.13 BSD ve
|
||||
* Please read [Faq](https://github.com/xianyi/OpenBLAS/wiki/Faq) at first.
|
||||
* Please use gcc version 4.6 and above to compile Sandy Bridge AVX kernels on Linux/MingW/BSD.
|
||||
* Please use Clang version 3.1 and above to compile the library on Sandy Bridge microarchitecture. The Clang 3.0 will generate the wrong AVX binary code.
|
||||
* The number of CPUs/Cores should less than or equal to 256.
|
||||
* On Linux, OpenBLAS sets the processor affinity by default. This may cause [the conflict with R parallel](https://stat.ethz.ch/pipermail/r-sig-hpc/2012-April/001348.html). You can build the library with NO_AFFINITY=1.
|
||||
* On Loongson 3A. make test would be failed because of pthread_create error. The error code is EAGAIN. However, it will be OK when you run the same testcase on shell.
|
||||
* The number of CPUs/Cores should less than or equal to 256. On Linux x86_64(amd64), there is experimental support for up to 1024 CPUs/Cores and 128 numa nodes if you build the library with BIGNUMA=1.
|
||||
* OpenBLAS does not set processor affinity by default. On Linux, you can enable processor affinity by commenting the line NO_AFFINITY=1 in Makefile.rule. But this may cause [the conflict with R parallel](https://stat.ethz.ch/pipermail/r-sig-hpc/2012-April/001348.html).
|
||||
* On Loongson 3A. make test would be failed because of pthread_create error. The error code is EAGAIN. However, it will be OK when you run the same testcase on shell.
|
||||
|
||||
## Contributing
|
||||
1. [Check for open issues](https://github.com/xianyi/OpenBLAS/issues) or open a fresh issue to start a discussion around a feature idea or a bug.
|
||||
1. [Check for open issues](https://github.com/xianyi/OpenBLAS/issues) or open a fresh issue to start a discussion around a feature idea or a bug.
|
||||
1. Fork the [OpenBLAS](https://github.com/xianyi/OpenBLAS) repository to start making your changes.
|
||||
1. Write a test which shows that the bug was fixed or that the feature works as expected.
|
||||
1. Send a pull request. Make sure to add yourself to `CONTRIBUTORS.md`.
|
||||
|
||||
@@ -19,6 +19,7 @@ PENRYN
|
||||
DUNNINGTON
|
||||
NEHALEM
|
||||
SANDYBRIDGE
|
||||
HASWELL
|
||||
ATOM
|
||||
|
||||
b)AMD CPU:
|
||||
@@ -30,6 +31,8 @@ SHANGHAI
|
||||
ISTANBUL
|
||||
BOBCAT
|
||||
BULLDOZER
|
||||
PILEDRIVER
|
||||
STEAMROLLER
|
||||
|
||||
c)VIA CPU:
|
||||
SSE_GENERIC
|
||||
@@ -59,3 +62,12 @@ ITANIUM2
|
||||
SPARC
|
||||
SPARCV7
|
||||
|
||||
6.ARM CPU:
|
||||
CORTEXA15
|
||||
CORTEXA9
|
||||
ARMV7
|
||||
ARMV6
|
||||
ARMV5
|
||||
|
||||
7.ARM 64-bit CPU:
|
||||
ARMV8
|
||||
|
||||
1327
benchmark/Makefile
1327
benchmark/Makefile
File diff suppressed because it is too large
Load Diff
201
benchmark/axpy.c
Normal file
201
benchmark/axpy.c
Normal file
@@ -0,0 +1,201 @@
|
||||
/***************************************************************************
|
||||
Copyright (c) 2014, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#ifdef __CYGWIN32__
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
#include "common.h"
|
||||
|
||||
|
||||
#undef AXPY
|
||||
|
||||
#ifdef COMPLEX
|
||||
#ifdef DOUBLE
|
||||
#define AXPY BLASFUNC(zaxpy)
|
||||
#else
|
||||
#define AXPY BLASFUNC(caxpy)
|
||||
#endif
|
||||
#else
|
||||
#ifdef DOUBLE
|
||||
#define AXPY BLASFUNC(daxpy)
|
||||
#else
|
||||
#define AXPY BLASFUNC(saxpy)
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(__WIN32__) || defined(__WIN64__)
|
||||
|
||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
||||
#endif
|
||||
|
||||
int gettimeofday(struct timeval *tv, void *tz){
|
||||
|
||||
FILETIME ft;
|
||||
unsigned __int64 tmpres = 0;
|
||||
static int tzflag;
|
||||
|
||||
if (NULL != tv)
|
||||
{
|
||||
GetSystemTimeAsFileTime(&ft);
|
||||
|
||||
tmpres |= ft.dwHighDateTime;
|
||||
tmpres <<= 32;
|
||||
tmpres |= ft.dwLowDateTime;
|
||||
|
||||
/*converting file time to unix epoch*/
|
||||
tmpres /= 10; /*convert into microseconds*/
|
||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
||||
|
||||
static void *huge_malloc(BLASLONG size){
|
||||
int shmid;
|
||||
void *address;
|
||||
|
||||
#ifndef SHM_HUGETLB
|
||||
#define SHM_HUGETLB 04000
|
||||
#endif
|
||||
|
||||
if ((shmid =shmget(IPC_PRIVATE,
|
||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
||||
printf( "Memory allocation failed(shmget).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
address = shmat(shmid, NULL, SHM_RND);
|
||||
|
||||
if ((BLASLONG)address == -1){
|
||||
printf( "Memory allocation failed(shmat).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
shmctl(shmid, IPC_RMID, 0);
|
||||
|
||||
return address;
|
||||
}
|
||||
|
||||
#define malloc huge_malloc
|
||||
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *x, *y;
|
||||
FLOAT alpha[2] = { 2.0, 2.0 };
|
||||
blasint m, i;
|
||||
blasint inc_x=1,inc_y=1;
|
||||
int loops = 1;
|
||||
int l;
|
||||
char *p;
|
||||
|
||||
int from = 1;
|
||||
int to = 200;
|
||||
int step = 1;
|
||||
|
||||
struct timeval start, stop;
|
||||
double time1,timeg;
|
||||
|
||||
argc--;argv++;
|
||||
|
||||
if (argc > 0) { from = atol(*argv); argc--; argv++;}
|
||||
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
|
||||
if (argc > 0) { step = atol(*argv); argc--; argv++;}
|
||||
|
||||
if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p);
|
||||
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p);
|
||||
if ((p = getenv("OPENBLAS_INCY"))) inc_y = atoi(p);
|
||||
|
||||
fprintf(stderr, "From : %3d To : %3d Step = %3d Inc_x = %d Inc_y = %d Loops = %d\n", from, to, step,inc_x,inc_y,loops);
|
||||
|
||||
if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
if (( y = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_y) * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
#ifdef linux
|
||||
srandom(getpid());
|
||||
#endif
|
||||
|
||||
fprintf(stderr, " SIZE Flops\n");
|
||||
|
||||
for(m = from; m <= to; m += step)
|
||||
{
|
||||
|
||||
timeg=0;
|
||||
|
||||
fprintf(stderr, " %6d : ", (int)m);
|
||||
|
||||
|
||||
for (l=0; l<loops; l++)
|
||||
{
|
||||
|
||||
for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){
|
||||
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
|
||||
for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){
|
||||
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
AXPY (&m, alpha, x, &inc_x, y, &inc_y );
|
||||
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
|
||||
timeg += time1;
|
||||
|
||||
}
|
||||
|
||||
timeg /= loops;
|
||||
|
||||
fprintf(stderr,
|
||||
" %10.2f MFlops\n",
|
||||
COMPSIZE * COMPSIZE * 2. * (double)m / timeg * 1.e-6);
|
||||
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
@@ -78,29 +78,29 @@ int gettimeofday(struct timeval *tv, void *tz){
|
||||
FILETIME ft;
|
||||
unsigned __int64 tmpres = 0;
|
||||
static int tzflag;
|
||||
|
||||
|
||||
if (NULL != tv)
|
||||
{
|
||||
GetSystemTimeAsFileTime(&ft);
|
||||
|
||||
|
||||
tmpres |= ft.dwHighDateTime;
|
||||
tmpres <<= 32;
|
||||
tmpres |= ft.dwLowDateTime;
|
||||
|
||||
|
||||
/*converting file time to unix epoch*/
|
||||
tmpres /= 10; /*convert into microseconds*/
|
||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
||||
}
|
||||
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
static __inline double getmflops(int ratio, int m, double secs){
|
||||
|
||||
|
||||
double mm = (double)m;
|
||||
double mulflops, addflops;
|
||||
|
||||
@@ -117,9 +117,13 @@ static __inline double getmflops(int ratio, int m, double secs){
|
||||
}
|
||||
|
||||
|
||||
int MAIN__(int argc, char *argv[]){
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
#ifndef COMPLEX
|
||||
char *trans[] = {"T", "N"};
|
||||
#else
|
||||
char *trans[] = {"C", "N"};
|
||||
#endif
|
||||
char *uplo[] = {"U", "L"};
|
||||
FLOAT alpha[] = {1.0, 0.0};
|
||||
FLOAT beta [] = {0.0, 0.0};
|
||||
@@ -137,7 +141,7 @@ int MAIN__(int argc, char *argv[]){
|
||||
struct timeval start, stop;
|
||||
double time1;
|
||||
|
||||
argc--;argv++;
|
||||
argc--;argv++;
|
||||
|
||||
if (argc > 0) { from = atol(*argv); argc--; argv++;}
|
||||
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
|
||||
@@ -148,17 +152,17 @@ int MAIN__(int argc, char *argv[]){
|
||||
if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
|
||||
if (( b = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
|
||||
for(m = from; m <= to; m += step){
|
||||
|
||||
|
||||
fprintf(stderr, "M = %6d : ", (int)m);
|
||||
|
||||
|
||||
for (uplos = 0; uplos < 2; uplos ++) {
|
||||
|
||||
|
||||
#ifndef COMPLEX
|
||||
if (uplos & 1) {
|
||||
for (j = 0; j < m; j++) {
|
||||
@@ -219,11 +223,11 @@ int MAIN__(int argc, char *argv[]){
|
||||
fprintf(stderr, "Info = %d\n", info);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
|
||||
maxerr = 0.;
|
||||
|
||||
|
||||
if (!(uplos & 1)) {
|
||||
for (j = 0; j < m; j++) {
|
||||
for(i = 0; i <= j; i++) {
|
||||
@@ -247,8 +251,8 @@ int MAIN__(int argc, char *argv[]){
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fprintf(stderr,
|
||||
|
||||
fprintf(stderr,
|
||||
#ifdef XDOUBLE
|
||||
" %Le %10.3f MFlops", maxerr,
|
||||
#else
|
||||
@@ -269,4 +273,4 @@ int MAIN__(int argc, char *argv[]){
|
||||
return 0;
|
||||
}
|
||||
|
||||
void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
|
||||
195
benchmark/dot.c
Normal file
195
benchmark/dot.c
Normal file
@@ -0,0 +1,195 @@
|
||||
/***************************************************************************
|
||||
Copyright (c) 2014, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#ifdef __CYGWIN32__
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
#include "common.h"
|
||||
|
||||
|
||||
#undef DOT
|
||||
|
||||
|
||||
#ifdef DOUBLE
|
||||
#define DOT BLASFUNC(ddot)
|
||||
#else
|
||||
#define DOT BLASFUNC(sdot)
|
||||
#endif
|
||||
|
||||
|
||||
#if defined(__WIN32__) || defined(__WIN64__)
|
||||
|
||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
||||
#endif
|
||||
|
||||
int gettimeofday(struct timeval *tv, void *tz){
|
||||
|
||||
FILETIME ft;
|
||||
unsigned __int64 tmpres = 0;
|
||||
static int tzflag;
|
||||
|
||||
if (NULL != tv)
|
||||
{
|
||||
GetSystemTimeAsFileTime(&ft);
|
||||
|
||||
tmpres |= ft.dwHighDateTime;
|
||||
tmpres <<= 32;
|
||||
tmpres |= ft.dwLowDateTime;
|
||||
|
||||
/*converting file time to unix epoch*/
|
||||
tmpres /= 10; /*convert into microseconds*/
|
||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
||||
|
||||
static void *huge_malloc(BLASLONG size){
|
||||
int shmid;
|
||||
void *address;
|
||||
|
||||
#ifndef SHM_HUGETLB
|
||||
#define SHM_HUGETLB 04000
|
||||
#endif
|
||||
|
||||
if ((shmid =shmget(IPC_PRIVATE,
|
||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
||||
printf( "Memory allocation failed(shmget).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
address = shmat(shmid, NULL, SHM_RND);
|
||||
|
||||
if ((BLASLONG)address == -1){
|
||||
printf( "Memory allocation failed(shmat).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
shmctl(shmid, IPC_RMID, 0);
|
||||
|
||||
return address;
|
||||
}
|
||||
|
||||
#define malloc huge_malloc
|
||||
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *x, *y;
|
||||
FLOAT result;
|
||||
blasint m, i;
|
||||
blasint inc_x=1,inc_y=1;
|
||||
int loops = 1;
|
||||
int l;
|
||||
char *p;
|
||||
|
||||
int from = 1;
|
||||
int to = 200;
|
||||
int step = 1;
|
||||
|
||||
struct timeval start, stop;
|
||||
double time1,timeg;
|
||||
|
||||
argc--;argv++;
|
||||
|
||||
if (argc > 0) { from = atol(*argv); argc--; argv++;}
|
||||
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
|
||||
if (argc > 0) { step = atol(*argv); argc--; argv++;}
|
||||
|
||||
if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p);
|
||||
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p);
|
||||
if ((p = getenv("OPENBLAS_INCY"))) inc_y = atoi(p);
|
||||
|
||||
fprintf(stderr, "From : %3d To : %3d Step = %3d Inc_x = %d Inc_y = %d Loops = %d\n", from, to, step,inc_x,inc_y,loops);
|
||||
|
||||
if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
if (( y = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_y) * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
#ifdef linux
|
||||
srandom(getpid());
|
||||
#endif
|
||||
|
||||
fprintf(stderr, " SIZE Flops\n");
|
||||
|
||||
for(m = from; m <= to; m += step)
|
||||
{
|
||||
|
||||
timeg=0;
|
||||
|
||||
fprintf(stderr, " %6d : ", (int)m);
|
||||
|
||||
|
||||
for (l=0; l<loops; l++)
|
||||
{
|
||||
|
||||
for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){
|
||||
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
|
||||
for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){
|
||||
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
result = DOT (&m, x, &inc_x, y, &inc_y );
|
||||
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
|
||||
timeg += time1;
|
||||
|
||||
}
|
||||
|
||||
timeg /= loops;
|
||||
|
||||
fprintf(stderr,
|
||||
" %10.2f MFlops\n",
|
||||
COMPSIZE * COMPSIZE * 2. * (double)m / timeg * 1.e-6);
|
||||
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
260
benchmark/geev.c
Normal file
260
benchmark/geev.c
Normal file
@@ -0,0 +1,260 @@
|
||||
/*********************************************************************/
|
||||
/* Copyright 2009, 2010 The University of Texas at Austin. */
|
||||
/* All rights reserved. */
|
||||
/* */
|
||||
/* Redistribution and use in source and binary forms, with or */
|
||||
/* without modification, are permitted provided that the following */
|
||||
/* conditions are met: */
|
||||
/* */
|
||||
/* 1. Redistributions of source code must retain the above */
|
||||
/* copyright notice, this list of conditions and the following */
|
||||
/* disclaimer. */
|
||||
/* */
|
||||
/* 2. Redistributions in binary form must reproduce the above */
|
||||
/* copyright notice, this list of conditions and the following */
|
||||
/* disclaimer in the documentation and/or other materials */
|
||||
/* provided with the distribution. */
|
||||
/* */
|
||||
/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
|
||||
/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
|
||||
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
|
||||
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
|
||||
/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
|
||||
/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
|
||||
/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
|
||||
/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
|
||||
/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
|
||||
/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
|
||||
/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
|
||||
/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
|
||||
/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
|
||||
/* POSSIBILITY OF SUCH DAMAGE. */
|
||||
/* */
|
||||
/* The views and conclusions contained in the software and */
|
||||
/* documentation are those of the authors and should not be */
|
||||
/* interpreted as representing official policies, either expressed */
|
||||
/* or implied, of The University of Texas at Austin. */
|
||||
/*********************************************************************/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#ifdef __CYGWIN32__
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
#include "common.h"
|
||||
|
||||
|
||||
#undef GEEV
|
||||
|
||||
#ifndef COMPLEX
|
||||
#ifdef XDOUBLE
|
||||
#define GEEV BLASFUNC(qgeev)
|
||||
#elif defined(DOUBLE)
|
||||
#define GEEV BLASFUNC(dgeev)
|
||||
#else
|
||||
#define GEEV BLASFUNC(sgeev)
|
||||
#endif
|
||||
#else
|
||||
#ifdef XDOUBLE
|
||||
#define GEEV BLASFUNC(xgeev)
|
||||
#elif defined(DOUBLE)
|
||||
#define GEEV BLASFUNC(zgeev)
|
||||
#else
|
||||
#define GEEV BLASFUNC(cgeev)
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifndef COMPLEX
|
||||
extern void GEEV( char* jobvl, char* jobvr, blasint* n, FLOAT* a,
|
||||
blasint* lda, FLOAT* wr, FLOAT* wi, FLOAT* vl, blasint* ldvl,
|
||||
FLOAT* vr, blasint* ldvr, FLOAT* work, blasint* lwork, blasint* info );
|
||||
#else
|
||||
extern void GEEV( char* jobvl, char* jobvr, blasint* n, FLOAT* a,
|
||||
blasint* lda, FLOAT* wr, FLOAT* vl, blasint* ldvl,
|
||||
FLOAT* vr, blasint* ldvr, FLOAT* work, blasint* lwork, FLOAT *rwork, blasint* info );
|
||||
#endif
|
||||
|
||||
#if defined(__WIN32__) || defined(__WIN64__)
|
||||
|
||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
||||
#endif
|
||||
|
||||
int gettimeofday(struct timeval *tv, void *tz){
|
||||
|
||||
FILETIME ft;
|
||||
unsigned __int64 tmpres = 0;
|
||||
static int tzflag;
|
||||
|
||||
if (NULL != tv)
|
||||
{
|
||||
GetSystemTimeAsFileTime(&ft);
|
||||
|
||||
tmpres |= ft.dwHighDateTime;
|
||||
tmpres <<= 32;
|
||||
tmpres |= ft.dwLowDateTime;
|
||||
|
||||
/*converting file time to unix epoch*/
|
||||
tmpres /= 10; /*convert into microseconds*/
|
||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
||||
|
||||
static void *huge_malloc(BLASLONG size){
|
||||
int shmid;
|
||||
void *address;
|
||||
|
||||
#ifndef SHM_HUGETLB
|
||||
#define SHM_HUGETLB 04000
|
||||
#endif
|
||||
|
||||
if ((shmid =shmget(IPC_PRIVATE,
|
||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
||||
printf( "Memory allocation failed(shmget).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
address = shmat(shmid, NULL, SHM_RND);
|
||||
|
||||
if ((BLASLONG)address == -1){
|
||||
printf( "Memory allocation failed(shmat).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
shmctl(shmid, IPC_RMID, 0);
|
||||
|
||||
return address;
|
||||
}
|
||||
|
||||
#define malloc huge_malloc
|
||||
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *a,*vl,*vr,*wi,*wr,*work,*rwork;
|
||||
FLOAT wkopt[4];
|
||||
char job='V';
|
||||
char *p;
|
||||
|
||||
blasint m, i, j, info,lwork;
|
||||
double factor = 26.33;
|
||||
|
||||
int from = 1;
|
||||
int to = 200;
|
||||
int step = 1;
|
||||
|
||||
struct timeval start, stop;
|
||||
double time1;
|
||||
|
||||
argc--;argv++;
|
||||
|
||||
if (argc > 0) { from = atol(*argv); argc--; argv++;}
|
||||
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
|
||||
if (argc > 0) { step = atol(*argv); argc--; argv++;}
|
||||
|
||||
if ((p = getenv("OPENBLAS_JOB"))) job=*p;
|
||||
|
||||
if ( job == 'N' ) factor = 10.0;
|
||||
|
||||
fprintf(stderr, "From : %3d To : %3d Step = %3d Job=%c\n", from, to, step,job);
|
||||
|
||||
if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
if (( vl = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
if (( vr = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
if (( wr = (FLOAT *)malloc(sizeof(FLOAT) * to * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
if (( wi = (FLOAT *)malloc(sizeof(FLOAT) * to * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
if (( rwork = (FLOAT *)malloc(sizeof(FLOAT) * to * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
for(j = 0; j < to; j++){
|
||||
for(i = 0; i < to * COMPSIZE; i++){
|
||||
a[i + j * to * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
lwork = -1;
|
||||
m=to;
|
||||
#ifndef COMPLEX
|
||||
GEEV (&job, &job, &m, a, &m, wr, wi, vl, &m, vr, &m, wkopt, &lwork, &info);
|
||||
#else
|
||||
GEEV (&job, &job, &m, a, &m, wr, vl, &m, vr, &m, wkopt, &lwork,rwork, &info);
|
||||
#endif
|
||||
|
||||
lwork = (blasint)wkopt[0];
|
||||
if (( work = (FLOAT *)malloc(sizeof(FLOAT) * lwork * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
|
||||
#ifdef linux
|
||||
srandom(getpid());
|
||||
#endif
|
||||
|
||||
fprintf(stderr, " SIZE FLops Time Lwork\n");
|
||||
|
||||
for(m = from; m <= to; m += step){
|
||||
|
||||
fprintf(stderr, " %6d : ", (int)m);
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
lwork = -1;
|
||||
#ifndef COMPLEX
|
||||
GEEV (&job, &job, &m, a, &m, wr, wi, vl, &m, vr, &m, wkopt, &lwork, &info);
|
||||
#else
|
||||
GEEV (&job, &job, &m, a, &m, wr, vl, &m, vr, &m, wkopt, &lwork,rwork, &info);
|
||||
#endif
|
||||
|
||||
lwork = (blasint)wkopt[0];
|
||||
#ifndef COMPLEX
|
||||
GEEV (&job, &job, &m, a, &m, wr, wi, vl, &m, vr, &m, work, &lwork, &info);
|
||||
#else
|
||||
GEEV (&job, &job, &m, a, &m, wr, vl, &m, vr, &m, work, &lwork,rwork, &info);
|
||||
#endif
|
||||
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
|
||||
if (info) {
|
||||
fprintf(stderr, "failed to compute eigenvalues .. %d\n", info);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
|
||||
fprintf(stderr,
|
||||
" %10.2f MFlops : %10.2f Sec : %d\n",
|
||||
COMPSIZE * COMPSIZE * factor * (double)m * (double)m * (double)m / time1 * 1.e-6,time1,lwork);
|
||||
|
||||
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
225
benchmark/gemm.c
Normal file
225
benchmark/gemm.c
Normal file
@@ -0,0 +1,225 @@
|
||||
/***************************************************************************
|
||||
Copyright (c) 2014, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#ifdef __CYGWIN32__
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
#include "common.h"
|
||||
|
||||
|
||||
#undef GEMM
|
||||
|
||||
#ifndef COMPLEX
|
||||
|
||||
#ifdef DOUBLE
|
||||
#define GEMM BLASFUNC(dgemm)
|
||||
#else
|
||||
#define GEMM BLASFUNC(sgemm)
|
||||
#endif
|
||||
|
||||
#else
|
||||
|
||||
#ifdef DOUBLE
|
||||
#define GEMM BLASFUNC(zgemm)
|
||||
#else
|
||||
#define GEMM BLASFUNC(cgemm)
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(__WIN32__) || defined(__WIN64__)
|
||||
|
||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
||||
#endif
|
||||
|
||||
int gettimeofday(struct timeval *tv, void *tz){
|
||||
|
||||
FILETIME ft;
|
||||
unsigned __int64 tmpres = 0;
|
||||
static int tzflag;
|
||||
|
||||
if (NULL != tv)
|
||||
{
|
||||
GetSystemTimeAsFileTime(&ft);
|
||||
|
||||
tmpres |= ft.dwHighDateTime;
|
||||
tmpres <<= 32;
|
||||
tmpres |= ft.dwLowDateTime;
|
||||
|
||||
/*converting file time to unix epoch*/
|
||||
tmpres /= 10; /*convert into microseconds*/
|
||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
||||
|
||||
static void *huge_malloc(BLASLONG size){
|
||||
int shmid;
|
||||
void *address;
|
||||
|
||||
#ifndef SHM_HUGETLB
|
||||
#define SHM_HUGETLB 04000
|
||||
#endif
|
||||
|
||||
if ((shmid =shmget(IPC_PRIVATE,
|
||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
||||
printf( "Memory allocation failed(shmget).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
address = shmat(shmid, NULL, SHM_RND);
|
||||
|
||||
if ((BLASLONG)address == -1){
|
||||
printf( "Memory allocation failed(shmat).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
shmctl(shmid, IPC_RMID, 0);
|
||||
|
||||
return address;
|
||||
}
|
||||
|
||||
#define malloc huge_malloc
|
||||
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *a, *b, *c;
|
||||
FLOAT alpha[] = {1.0, 1.0};
|
||||
FLOAT beta [] = {1.0, 1.0};
|
||||
char trans='N';
|
||||
blasint m, n, i, j;
|
||||
int loops = 1;
|
||||
int has_param_n=0;
|
||||
int l;
|
||||
char *p;
|
||||
|
||||
int from = 1;
|
||||
int to = 200;
|
||||
int step = 1;
|
||||
|
||||
struct timeval start, stop;
|
||||
double time1,timeg;
|
||||
|
||||
argc--;argv++;
|
||||
|
||||
if (argc > 0) { from = atol(*argv); argc--; argv++;}
|
||||
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
|
||||
if (argc > 0) { step = atol(*argv); argc--; argv++;}
|
||||
|
||||
if ((p = getenv("OPENBLAS_TRANS"))) trans=*p;
|
||||
|
||||
fprintf(stderr, "From : %3d To : %3d Step=%d : Trans=%c\n", from, to, step, trans);
|
||||
|
||||
if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
if (( b = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
if (( c = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
p = getenv("OPENBLAS_LOOPS");
|
||||
if ( p != NULL )
|
||||
loops = atoi(p);
|
||||
|
||||
if ((p = getenv("OPENBLAS_PARAM_N"))) {
|
||||
n = atoi(p);
|
||||
has_param_n=1;
|
||||
}
|
||||
|
||||
|
||||
#ifdef linux
|
||||
srandom(getpid());
|
||||
#endif
|
||||
|
||||
fprintf(stderr, " SIZE Flops\n");
|
||||
|
||||
for(m = from; m <= to; m += step)
|
||||
{
|
||||
|
||||
timeg=0;
|
||||
|
||||
if ( has_param_n == 1 && n <= m )
|
||||
n=n;
|
||||
else
|
||||
n=m;
|
||||
|
||||
|
||||
|
||||
fprintf(stderr, " %6dx%d : ", (int)m, (int)n);
|
||||
|
||||
for (l=0; l<loops; l++)
|
||||
{
|
||||
|
||||
for(j = 0; j < m; j++){
|
||||
for(i = 0; i < m * COMPSIZE; i++){
|
||||
a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
b[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
c[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
}
|
||||
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
GEMM (&trans, &trans, &m, &n, &m, alpha, a, &m, b, &m, beta, c, &m );
|
||||
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
|
||||
timeg += time1;
|
||||
|
||||
}
|
||||
|
||||
timeg /= loops;
|
||||
fprintf(stderr,
|
||||
" %10.2f MFlops\n",
|
||||
COMPSIZE * COMPSIZE * 2. * (double)m * (double)m * (double)n / timeg * 1.e-6);
|
||||
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
212
benchmark/gemm3m.c
Normal file
212
benchmark/gemm3m.c
Normal file
@@ -0,0 +1,212 @@
|
||||
/***************************************************************************
|
||||
Copyright (c) 2014, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#ifdef __CYGWIN32__
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
#include "common.h"
|
||||
|
||||
|
||||
#undef GEMM
|
||||
|
||||
#ifndef COMPLEX
|
||||
|
||||
#ifdef DOUBLE
|
||||
#define GEMM BLASFUNC(dgemm)
|
||||
#else
|
||||
#define GEMM BLASFUNC(sgemm)
|
||||
#endif
|
||||
|
||||
#else
|
||||
|
||||
#ifdef DOUBLE
|
||||
#define GEMM BLASFUNC(zgemm3m)
|
||||
#else
|
||||
#define GEMM BLASFUNC(cgemm3m)
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(__WIN32__) || defined(__WIN64__)
|
||||
|
||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
||||
#endif
|
||||
|
||||
int gettimeofday(struct timeval *tv, void *tz){
|
||||
|
||||
FILETIME ft;
|
||||
unsigned __int64 tmpres = 0;
|
||||
static int tzflag;
|
||||
|
||||
if (NULL != tv)
|
||||
{
|
||||
GetSystemTimeAsFileTime(&ft);
|
||||
|
||||
tmpres |= ft.dwHighDateTime;
|
||||
tmpres <<= 32;
|
||||
tmpres |= ft.dwLowDateTime;
|
||||
|
||||
/*converting file time to unix epoch*/
|
||||
tmpres /= 10; /*convert into microseconds*/
|
||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
||||
|
||||
static void *huge_malloc(BLASLONG size){
|
||||
int shmid;
|
||||
void *address;
|
||||
|
||||
#ifndef SHM_HUGETLB
|
||||
#define SHM_HUGETLB 04000
|
||||
#endif
|
||||
|
||||
if ((shmid =shmget(IPC_PRIVATE,
|
||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
||||
printf( "Memory allocation failed(shmget).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
address = shmat(shmid, NULL, SHM_RND);
|
||||
|
||||
if ((BLASLONG)address == -1){
|
||||
printf( "Memory allocation failed(shmat).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
shmctl(shmid, IPC_RMID, 0);
|
||||
|
||||
return address;
|
||||
}
|
||||
|
||||
#define malloc huge_malloc
|
||||
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *a, *b, *c;
|
||||
FLOAT alpha[] = {1.0, 1.0};
|
||||
FLOAT beta [] = {1.0, 1.0};
|
||||
char trans='N';
|
||||
blasint m, i, j;
|
||||
int loops = 1;
|
||||
int l;
|
||||
char *p;
|
||||
|
||||
int from = 1;
|
||||
int to = 200;
|
||||
int step = 1;
|
||||
|
||||
struct timeval start, stop;
|
||||
double time1,timeg;
|
||||
|
||||
argc--;argv++;
|
||||
|
||||
if (argc > 0) { from = atol(*argv); argc--; argv++;}
|
||||
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
|
||||
if (argc > 0) { step = atol(*argv); argc--; argv++;}
|
||||
|
||||
if ((p = getenv("OPENBLAS_TRANS"))) trans=*p;
|
||||
|
||||
fprintf(stderr, "From : %3d To : %3d Step=%d : Trans=%c\n", from, to, step, trans);
|
||||
|
||||
if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
if (( b = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
if (( c = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
p = getenv("OPENBLAS_LOOPS");
|
||||
if ( p != NULL )
|
||||
loops = atoi(p);
|
||||
|
||||
|
||||
#ifdef linux
|
||||
srandom(getpid());
|
||||
#endif
|
||||
|
||||
fprintf(stderr, " SIZE Flops\n");
|
||||
|
||||
for(m = from; m <= to; m += step)
|
||||
{
|
||||
|
||||
timeg=0;
|
||||
|
||||
fprintf(stderr, " %6d : ", (int)m);
|
||||
|
||||
for (l=0; l<loops; l++)
|
||||
{
|
||||
|
||||
for(j = 0; j < m; j++){
|
||||
for(i = 0; i < m * COMPSIZE; i++){
|
||||
a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
b[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
c[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
}
|
||||
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
GEMM (&trans, &trans, &m, &m, &m, alpha, a, &m, b, &m, beta, c, &m );
|
||||
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
|
||||
timeg += time1;
|
||||
|
||||
}
|
||||
|
||||
timeg /= loops;
|
||||
fprintf(stderr,
|
||||
" %10.2f MFlops\n",
|
||||
COMPSIZE * COMPSIZE * 2. * (double)m * (double)m * (double)m / timeg * 1.e-6);
|
||||
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
269
benchmark/gemv.c
Normal file
269
benchmark/gemv.c
Normal file
@@ -0,0 +1,269 @@
|
||||
/***************************************************************************
|
||||
Copyright (c) 2014, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#ifdef __CYGWIN32__
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
#include "common.h"
|
||||
|
||||
|
||||
#undef GEMV
|
||||
|
||||
#ifndef COMPLEX
|
||||
|
||||
#ifdef DOUBLE
|
||||
#define GEMV BLASFUNC(dgemv)
|
||||
#else
|
||||
#define GEMV BLASFUNC(sgemv)
|
||||
#endif
|
||||
|
||||
#else
|
||||
|
||||
#ifdef DOUBLE
|
||||
#define GEMV BLASFUNC(zgemv)
|
||||
#else
|
||||
#define GEMV BLASFUNC(cgemv)
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(__WIN32__) || defined(__WIN64__)
|
||||
|
||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
||||
#endif
|
||||
|
||||
int gettimeofday(struct timeval *tv, void *tz){
|
||||
|
||||
FILETIME ft;
|
||||
unsigned __int64 tmpres = 0;
|
||||
static int tzflag;
|
||||
|
||||
if (NULL != tv)
|
||||
{
|
||||
GetSystemTimeAsFileTime(&ft);
|
||||
|
||||
tmpres |= ft.dwHighDateTime;
|
||||
tmpres <<= 32;
|
||||
tmpres |= ft.dwLowDateTime;
|
||||
|
||||
/*converting file time to unix epoch*/
|
||||
tmpres /= 10; /*convert into microseconds*/
|
||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
||||
|
||||
static void *huge_malloc(BLASLONG size){
|
||||
int shmid;
|
||||
void *address;
|
||||
|
||||
#ifndef SHM_HUGETLB
|
||||
#define SHM_HUGETLB 04000
|
||||
#endif
|
||||
|
||||
if ((shmid =shmget(IPC_PRIVATE,
|
||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
||||
printf( "Memory allocation failed(shmget).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
address = shmat(shmid, NULL, SHM_RND);
|
||||
|
||||
if ((BLASLONG)address == -1){
|
||||
printf( "Memory allocation failed(shmat).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
shmctl(shmid, IPC_RMID, 0);
|
||||
|
||||
return address;
|
||||
}
|
||||
|
||||
#define malloc huge_malloc
|
||||
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *a, *x, *y;
|
||||
FLOAT alpha[] = {1.0, 1.0};
|
||||
FLOAT beta [] = {1.0, 1.0};
|
||||
char trans='N';
|
||||
blasint m, i, j;
|
||||
blasint inc_x=1,inc_y=1;
|
||||
blasint n=0;
|
||||
int has_param_n = 0;
|
||||
int has_param_m = 0;
|
||||
int loops = 1;
|
||||
int l;
|
||||
char *p;
|
||||
|
||||
int from = 1;
|
||||
int to = 200;
|
||||
int step = 1;
|
||||
|
||||
struct timeval start, stop;
|
||||
double time1,timeg;
|
||||
|
||||
argc--;argv++;
|
||||
|
||||
if (argc > 0) { from = atol(*argv); argc--; argv++;}
|
||||
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
|
||||
if (argc > 0) { step = atol(*argv); argc--; argv++;}
|
||||
|
||||
|
||||
int tomax = to;
|
||||
|
||||
if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p);
|
||||
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p);
|
||||
if ((p = getenv("OPENBLAS_INCY"))) inc_y = atoi(p);
|
||||
if ((p = getenv("OPENBLAS_TRANS"))) trans=*p;
|
||||
if ((p = getenv("OPENBLAS_PARAM_N"))) {
|
||||
n = atoi(p);
|
||||
if ((n>0)) has_param_n = 1;
|
||||
if ( n > tomax ) tomax = n;
|
||||
}
|
||||
if ( has_param_n == 0 )
|
||||
if ((p = getenv("OPENBLAS_PARAM_M"))) {
|
||||
m = atoi(p);
|
||||
if ((m>0)) has_param_m = 1;
|
||||
if ( m > tomax ) tomax = m;
|
||||
}
|
||||
|
||||
|
||||
|
||||
fprintf(stderr, "From : %3d To : %3d Step = %3d Trans = '%c' Inc_x = %d Inc_y = %d Loops = %d\n", from, to, step,trans,inc_x,inc_y,loops);
|
||||
|
||||
if (( a = (FLOAT *)malloc(sizeof(FLOAT) * tomax * tomax * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
if (( x = (FLOAT *)malloc(sizeof(FLOAT) * tomax * abs(inc_x) * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
if (( y = (FLOAT *)malloc(sizeof(FLOAT) * tomax * abs(inc_y) * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
#ifdef linux
|
||||
srandom(getpid());
|
||||
#endif
|
||||
|
||||
fprintf(stderr, " SIZE Flops\n");
|
||||
|
||||
if (has_param_m == 0)
|
||||
{
|
||||
|
||||
for(m = from; m <= to; m += step)
|
||||
{
|
||||
timeg=0;
|
||||
if ( has_param_n == 0 ) n = m;
|
||||
fprintf(stderr, " %6dx%d : ", (int)m,(int)n);
|
||||
for(j = 0; j < m; j++){
|
||||
for(i = 0; i < n * COMPSIZE; i++){
|
||||
a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
}
|
||||
|
||||
for (l=0; l<loops; l++)
|
||||
{
|
||||
|
||||
for(i = 0; i < n * COMPSIZE * abs(inc_x); i++){
|
||||
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
|
||||
for(i = 0; i < n * COMPSIZE * abs(inc_y); i++){
|
||||
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
GEMV (&trans, &m, &n, alpha, a, &m, x, &inc_x, beta, y, &inc_y );
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
timeg += time1;
|
||||
|
||||
}
|
||||
|
||||
timeg /= loops;
|
||||
|
||||
fprintf(stderr, " %10.2f MFlops\n", COMPSIZE * COMPSIZE * 2. * (double)m * (double)n / timeg * 1.e-6);
|
||||
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
|
||||
for(n = from; n <= to; n += step)
|
||||
{
|
||||
timeg=0;
|
||||
fprintf(stderr, " %6dx%d : ", (int)m,(int)n);
|
||||
for(j = 0; j < m; j++){
|
||||
for(i = 0; i < n * COMPSIZE; i++){
|
||||
a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
}
|
||||
|
||||
for (l=0; l<loops; l++)
|
||||
{
|
||||
|
||||
for(i = 0; i < n * COMPSIZE * abs(inc_x); i++){
|
||||
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
|
||||
for(i = 0; i < n * COMPSIZE * abs(inc_y); i++){
|
||||
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
GEMV (&trans, &m, &n, alpha, a, &m, x, &inc_x, beta, y, &inc_y );
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
timeg += time1;
|
||||
|
||||
}
|
||||
|
||||
timeg /= loops;
|
||||
|
||||
fprintf(stderr, " %10.2f MFlops\n", COMPSIZE * COMPSIZE * 2. * (double)m * (double)n / timeg * 1.e-6);
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
218
benchmark/ger.c
Normal file
218
benchmark/ger.c
Normal file
@@ -0,0 +1,218 @@
|
||||
/***************************************************************************
|
||||
Copyright (c) 2014, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#ifdef __CYGWIN32__
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
#include "common.h"
|
||||
|
||||
|
||||
#undef GER
|
||||
|
||||
|
||||
#ifdef DOUBLE
|
||||
#define GER BLASFUNC(dger)
|
||||
#else
|
||||
#define GER BLASFUNC(sger)
|
||||
#endif
|
||||
|
||||
|
||||
#if defined(__WIN32__) || defined(__WIN64__)
|
||||
|
||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
||||
#endif
|
||||
|
||||
int gettimeofday(struct timeval *tv, void *tz){
|
||||
|
||||
FILETIME ft;
|
||||
unsigned __int64 tmpres = 0;
|
||||
static int tzflag;
|
||||
|
||||
if (NULL != tv)
|
||||
{
|
||||
GetSystemTimeAsFileTime(&ft);
|
||||
|
||||
tmpres |= ft.dwHighDateTime;
|
||||
tmpres <<= 32;
|
||||
tmpres |= ft.dwLowDateTime;
|
||||
|
||||
/*converting file time to unix epoch*/
|
||||
tmpres /= 10; /*convert into microseconds*/
|
||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
||||
|
||||
static void *huge_malloc(BLASLONG size){
|
||||
int shmid;
|
||||
void *address;
|
||||
|
||||
#ifndef SHM_HUGETLB
|
||||
#define SHM_HUGETLB 04000
|
||||
#endif
|
||||
|
||||
if ((shmid =shmget(IPC_PRIVATE,
|
||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
||||
printf( "Memory allocation failed(shmget).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
address = shmat(shmid, NULL, SHM_RND);
|
||||
|
||||
if ((BLASLONG)address == -1){
|
||||
printf( "Memory allocation failed(shmat).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
shmctl(shmid, IPC_RMID, 0);
|
||||
|
||||
return address;
|
||||
}
|
||||
|
||||
#define malloc huge_malloc
|
||||
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *a, *x, *y;
|
||||
FLOAT alpha[] = {1.0, 1.0};
|
||||
blasint m, i, j;
|
||||
blasint inc_x=1,inc_y=1;
|
||||
blasint n=0;
|
||||
int has_param_n = 0;
|
||||
int loops = 1;
|
||||
int l;
|
||||
char *p;
|
||||
|
||||
int from = 1;
|
||||
int to = 200;
|
||||
int step = 1;
|
||||
|
||||
struct timeval start, stop;
|
||||
double time1,timeg;
|
||||
|
||||
argc--;argv++;
|
||||
|
||||
if (argc > 0) { from = atol(*argv); argc--; argv++;}
|
||||
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
|
||||
if (argc > 0) { step = atol(*argv); argc--; argv++;}
|
||||
|
||||
if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p);
|
||||
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p);
|
||||
if ((p = getenv("OPENBLAS_INCY"))) inc_y = atoi(p);
|
||||
if ((p = getenv("OPENBLAS_PARAM_N"))) {
|
||||
n = atoi(p);
|
||||
if ((n>0) && (n<=to)) has_param_n = 1;
|
||||
}
|
||||
|
||||
if ( has_param_n == 1 )
|
||||
fprintf(stderr, "From : %3d To : %3d Step = %3d N = %d Inc_x = %d Inc_y = %d Loops = %d\n", from, to, step,n,inc_x,inc_y,loops);
|
||||
else
|
||||
fprintf(stderr, "From : %3d To : %3d Step = %3d Inc_x = %d Inc_y = %d Loops = %d\n", from, to, step,inc_x,inc_y,loops);
|
||||
|
||||
if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
if (( y = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_y) * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
#ifdef linux
|
||||
srandom(getpid());
|
||||
#endif
|
||||
|
||||
fprintf(stderr, " SIZE Flops\n");
|
||||
|
||||
for(m = from; m <= to; m += step)
|
||||
{
|
||||
|
||||
timeg=0;
|
||||
|
||||
if ( has_param_n == 0 ) n = m;
|
||||
|
||||
fprintf(stderr, " %6dx%d : ", (int)m,(int)n);
|
||||
|
||||
for(j = 0; j < m; j++){
|
||||
for(i = 0; i < n * COMPSIZE; i++){
|
||||
a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){
|
||||
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
|
||||
for(i = 0; i < n * COMPSIZE * abs(inc_y); i++){
|
||||
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
|
||||
for (l=0; l<loops; l++)
|
||||
{
|
||||
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
GER (&m, &n, alpha, x, &inc_x, y, &inc_y, a , &m);
|
||||
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
|
||||
timeg += time1;
|
||||
|
||||
}
|
||||
|
||||
timeg /= loops;
|
||||
|
||||
fprintf(stderr,
|
||||
" %10.2f MFlops\n",
|
||||
COMPSIZE * COMPSIZE * 2. * (double)m * (double)n / timeg * 1.e-6);
|
||||
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
|
||||
234
benchmark/getri.c
Normal file
234
benchmark/getri.c
Normal file
@@ -0,0 +1,234 @@
|
||||
/*********************************************************************/
|
||||
/* Copyright 2009, 2010 The University of Texas at Austin. */
|
||||
/* All rights reserved. */
|
||||
/* */
|
||||
/* Redistribution and use in source and binary forms, with or */
|
||||
/* without modification, are permitted provided that the following */
|
||||
/* conditions are met: */
|
||||
/* */
|
||||
/* 1. Redistributions of source code must retain the above */
|
||||
/* copyright notice, this list of conditions and the following */
|
||||
/* disclaimer. */
|
||||
/* */
|
||||
/* 2. Redistributions in binary form must reproduce the above */
|
||||
/* copyright notice, this list of conditions and the following */
|
||||
/* disclaimer in the documentation and/or other materials */
|
||||
/* provided with the distribution. */
|
||||
/* */
|
||||
/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
|
||||
/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
|
||||
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
|
||||
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
|
||||
/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
|
||||
/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
|
||||
/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
|
||||
/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
|
||||
/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
|
||||
/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
|
||||
/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
|
||||
/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
|
||||
/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
|
||||
/* POSSIBILITY OF SUCH DAMAGE. */
|
||||
/* */
|
||||
/* The views and conclusions contained in the software and */
|
||||
/* documentation are those of the authors and should not be */
|
||||
/* interpreted as representing official policies, either expressed */
|
||||
/* or implied, of The University of Texas at Austin. */
|
||||
/*********************************************************************/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#ifdef __CYGWIN32__
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
#include "common.h"
|
||||
|
||||
#undef GETRF
|
||||
#undef GETRI
|
||||
|
||||
#ifndef COMPLEX
|
||||
#ifdef XDOUBLE
|
||||
#define GETRF BLASFUNC(qgetrf)
|
||||
#define GETRI BLASFUNC(qgetri)
|
||||
#elif defined(DOUBLE)
|
||||
#define GETRF BLASFUNC(dgetrf)
|
||||
#define GETRI BLASFUNC(dgetri)
|
||||
#else
|
||||
#define GETRF BLASFUNC(sgetrf)
|
||||
#define GETRI BLASFUNC(sgetri)
|
||||
#endif
|
||||
#else
|
||||
#ifdef XDOUBLE
|
||||
#define GETRF BLASFUNC(xgetrf)
|
||||
#define GETRI BLASFUNC(xgetri)
|
||||
#elif defined(DOUBLE)
|
||||
#define GETRF BLASFUNC(zgetrf)
|
||||
#define GETRI BLASFUNC(zgetri)
|
||||
#else
|
||||
#define GETRF BLASFUNC(cgetrf)
|
||||
#define GETRI BLASFUNC(cgetri)
|
||||
#endif
|
||||
#endif
|
||||
|
||||
extern void GETRI(blasint *m, FLOAT *a, blasint *lda, blasint *ipiv, FLOAT *work, blasint *lwork, blasint *info);
|
||||
|
||||
#if defined(__WIN32__) || defined(__WIN64__)
|
||||
|
||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
||||
#endif
|
||||
|
||||
int gettimeofday(struct timeval *tv, void *tz){
|
||||
|
||||
FILETIME ft;
|
||||
unsigned __int64 tmpres = 0;
|
||||
static int tzflag;
|
||||
|
||||
if (NULL != tv)
|
||||
{
|
||||
GetSystemTimeAsFileTime(&ft);
|
||||
|
||||
tmpres |= ft.dwHighDateTime;
|
||||
tmpres <<= 32;
|
||||
tmpres |= ft.dwLowDateTime;
|
||||
|
||||
/*converting file time to unix epoch*/
|
||||
tmpres /= 10; /*convert into microseconds*/
|
||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
||||
|
||||
static void *huge_malloc(BLASLONG size){
|
||||
int shmid;
|
||||
void *address;
|
||||
|
||||
#ifndef SHM_HUGETLB
|
||||
#define SHM_HUGETLB 04000
|
||||
#endif
|
||||
|
||||
if ((shmid =shmget(IPC_PRIVATE,
|
||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
||||
printf( "Memory allocation failed(shmget).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
address = shmat(shmid, NULL, SHM_RND);
|
||||
|
||||
if ((BLASLONG)address == -1){
|
||||
printf( "Memory allocation failed(shmat).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
shmctl(shmid, IPC_RMID, 0);
|
||||
|
||||
return address;
|
||||
}
|
||||
|
||||
#define malloc huge_malloc
|
||||
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *a,*work;
|
||||
FLOAT wkopt[4];
|
||||
blasint *ipiv;
|
||||
blasint m, i, j, info,lwork;
|
||||
|
||||
int from = 1;
|
||||
int to = 200;
|
||||
int step = 1;
|
||||
|
||||
struct timeval start, stop;
|
||||
double time1;
|
||||
|
||||
argc--;argv++;
|
||||
|
||||
if (argc > 0) { from = atol(*argv); argc--; argv++;}
|
||||
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
|
||||
if (argc > 0) { step = atol(*argv); argc--; argv++;}
|
||||
|
||||
|
||||
fprintf(stderr, "From : %3d To : %3d Step = %3d\n", from, to, step);
|
||||
|
||||
if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
if (( ipiv = (blasint *)malloc(sizeof(blasint) * to * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
|
||||
|
||||
for(j = 0; j < to; j++){
|
||||
for(i = 0; i < to * COMPSIZE; i++){
|
||||
a[i + j * to * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
lwork = -1;
|
||||
m=to;
|
||||
|
||||
GETRI(&m, a, &m, ipiv, wkopt, &lwork, &info);
|
||||
|
||||
lwork = (blasint)wkopt[0];
|
||||
if (( work = (FLOAT *)malloc(sizeof(FLOAT) * lwork * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
|
||||
#ifdef linux
|
||||
srandom(getpid());
|
||||
#endif
|
||||
|
||||
fprintf(stderr, " SIZE FLops Time Lwork\n");
|
||||
|
||||
for(m = from; m <= to; m += step){
|
||||
|
||||
fprintf(stderr, " %6d : ", (int)m);
|
||||
|
||||
GETRF (&m, &m, a, &m, ipiv, &info);
|
||||
|
||||
if (info) {
|
||||
fprintf(stderr, "Matrix is not singular .. %d\n", info);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
lwork = -1;
|
||||
GETRI(&m, a, &m, ipiv, wkopt, &lwork, &info);
|
||||
|
||||
lwork = (blasint)wkopt[0];
|
||||
GETRI(&m, a, &m, ipiv, work, &lwork, &info);
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
|
||||
if (info) {
|
||||
fprintf(stderr, "failed compute inverse matrix .. %d\n", info);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
|
||||
fprintf(stderr,
|
||||
" %10.2f MFlops : %10.2f Sec : %d\n",
|
||||
COMPSIZE * COMPSIZE * (4.0/3.0 * (double)m * (double)m *(double)m - (double)m *(double)m + 5.0/3.0* (double)m) / time1 * 1.e-6,time1,lwork);
|
||||
|
||||
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
192
benchmark/hemm.c
Normal file
192
benchmark/hemm.c
Normal file
@@ -0,0 +1,192 @@
|
||||
/***************************************************************************
|
||||
Copyright (c) 2014, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#ifdef __CYGWIN32__
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
#include "common.h"
|
||||
|
||||
|
||||
#undef HEMM
|
||||
|
||||
#ifdef DOUBLE
|
||||
#define HEMM BLASFUNC(zhemm)
|
||||
#else
|
||||
#define HEMM BLASFUNC(chemm)
|
||||
#endif
|
||||
|
||||
|
||||
#if defined(__WIN32__) || defined(__WIN64__)
|
||||
|
||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
||||
#endif
|
||||
|
||||
int gettimeofday(struct timeval *tv, void *tz){
|
||||
|
||||
FILETIME ft;
|
||||
unsigned __int64 tmpres = 0;
|
||||
static int tzflag;
|
||||
|
||||
if (NULL != tv)
|
||||
{
|
||||
GetSystemTimeAsFileTime(&ft);
|
||||
|
||||
tmpres |= ft.dwHighDateTime;
|
||||
tmpres <<= 32;
|
||||
tmpres |= ft.dwLowDateTime;
|
||||
|
||||
/*converting file time to unix epoch*/
|
||||
tmpres /= 10; /*convert into microseconds*/
|
||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
||||
|
||||
static void *huge_malloc(BLASLONG size){
|
||||
int shmid;
|
||||
void *address;
|
||||
|
||||
#ifndef SHM_HUGETLB
|
||||
#define SHM_HUGETLB 04000
|
||||
#endif
|
||||
|
||||
if ((shmid =shmget(IPC_PRIVATE,
|
||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
||||
printf( "Memory allocation failed(shmget).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
address = shmat(shmid, NULL, SHM_RND);
|
||||
|
||||
if ((BLASLONG)address == -1){
|
||||
printf( "Memory allocation failed(shmat).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
shmctl(shmid, IPC_RMID, 0);
|
||||
|
||||
return address;
|
||||
}
|
||||
|
||||
#define malloc huge_malloc
|
||||
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *a, *b, *c;
|
||||
FLOAT alpha[] = {1.0, 1.0};
|
||||
FLOAT beta [] = {1.0, 1.0};
|
||||
char *p;
|
||||
|
||||
char side='L';
|
||||
char uplo='U';
|
||||
|
||||
if ((p = getenv("OPENBLAS_SIDE"))) side=*p;
|
||||
if ((p = getenv("OPENBLAS_UPLO"))) uplo=*p;
|
||||
|
||||
blasint m, i, j;
|
||||
|
||||
int from = 1;
|
||||
int to = 200;
|
||||
int step = 1;
|
||||
|
||||
struct timeval start, stop;
|
||||
double time1;
|
||||
|
||||
argc--;argv++;
|
||||
|
||||
if (argc > 0) { from = atol(*argv); argc--; argv++;}
|
||||
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
|
||||
if (argc > 0) { step = atol(*argv); argc--; argv++;}
|
||||
|
||||
fprintf(stderr, "From : %3d To : %3d Step = %3d Side = %c Uplo = %c\n", from, to, step,side,uplo);
|
||||
|
||||
if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
if (( b = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
if (( c = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
|
||||
|
||||
#ifdef linux
|
||||
srandom(getpid());
|
||||
#endif
|
||||
|
||||
fprintf(stderr, " SIZE Flops\n");
|
||||
|
||||
for(m = from; m <= to; m += step)
|
||||
{
|
||||
|
||||
fprintf(stderr, " %6d : ", (int)m);
|
||||
|
||||
for(j = 0; j < m; j++){
|
||||
for(i = 0; i < m * COMPSIZE; i++){
|
||||
a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
b[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
c[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
}
|
||||
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
HEMM (&side, &uplo, &m, &m, alpha, a, &m, b, &m, beta, c, &m );
|
||||
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
fprintf(stderr,
|
||||
" %10.2f MFlops\n",
|
||||
COMPSIZE * COMPSIZE * 2. * (double)m * (double)m * (double)m / time1 * 1.e-6);
|
||||
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
208
benchmark/hemv.c
Normal file
208
benchmark/hemv.c
Normal file
@@ -0,0 +1,208 @@
|
||||
/***************************************************************************
|
||||
Copyright (c) 2014, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#ifdef __CYGWIN32__
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
#include "common.h"
|
||||
|
||||
|
||||
#undef HEMV
|
||||
|
||||
|
||||
#ifdef DOUBLE
|
||||
#define HEMV BLASFUNC(zhemv)
|
||||
#else
|
||||
#define HEMV BLASFUNC(chemv)
|
||||
#endif
|
||||
|
||||
|
||||
#if defined(__WIN32__) || defined(__WIN64__)
|
||||
|
||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
||||
#endif
|
||||
|
||||
int gettimeofday(struct timeval *tv, void *tz){
|
||||
|
||||
FILETIME ft;
|
||||
unsigned __int64 tmpres = 0;
|
||||
static int tzflag;
|
||||
|
||||
if (NULL != tv)
|
||||
{
|
||||
GetSystemTimeAsFileTime(&ft);
|
||||
|
||||
tmpres |= ft.dwHighDateTime;
|
||||
tmpres <<= 32;
|
||||
tmpres |= ft.dwLowDateTime;
|
||||
|
||||
/*converting file time to unix epoch*/
|
||||
tmpres /= 10; /*convert into microseconds*/
|
||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
||||
|
||||
static void *huge_malloc(BLASLONG size){
|
||||
int shmid;
|
||||
void *address;
|
||||
|
||||
#ifndef SHM_HUGETLB
|
||||
#define SHM_HUGETLB 04000
|
||||
#endif
|
||||
|
||||
if ((shmid =shmget(IPC_PRIVATE,
|
||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
||||
printf( "Memory allocation failed(shmget).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
address = shmat(shmid, NULL, SHM_RND);
|
||||
|
||||
if ((BLASLONG)address == -1){
|
||||
printf( "Memory allocation failed(shmat).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
shmctl(shmid, IPC_RMID, 0);
|
||||
|
||||
return address;
|
||||
}
|
||||
|
||||
#define malloc huge_malloc
|
||||
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *a, *x, *y;
|
||||
FLOAT alpha[] = {1.0, 1.0};
|
||||
FLOAT beta [] = {1.0, 1.0};
|
||||
char uplo='L';
|
||||
blasint m, i, j;
|
||||
blasint inc_x=1,inc_y=1;
|
||||
int loops = 1;
|
||||
int l;
|
||||
char *p;
|
||||
|
||||
int from = 1;
|
||||
int to = 200;
|
||||
int step = 1;
|
||||
|
||||
struct timeval start, stop;
|
||||
double time1,timeg;
|
||||
|
||||
argc--;argv++;
|
||||
|
||||
if (argc > 0) { from = atol(*argv); argc--; argv++;}
|
||||
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
|
||||
if (argc > 0) { step = atol(*argv); argc--; argv++;}
|
||||
|
||||
if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p);
|
||||
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p);
|
||||
if ((p = getenv("OPENBLAS_INCY"))) inc_y = atoi(p);
|
||||
if ((p = getenv("OPENBLAS_UPLO"))) uplo=*p;
|
||||
|
||||
fprintf(stderr, "From : %3d To : %3d Step = %3d Uplo = '%c' Inc_x = %d Inc_y = %d Loops = %d\n", from, to, step,uplo,inc_x,inc_y,loops);
|
||||
|
||||
if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
if (( y = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_y) * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
#ifdef linux
|
||||
srandom(getpid());
|
||||
#endif
|
||||
|
||||
fprintf(stderr, " SIZE Flops\n");
|
||||
|
||||
for(m = from; m <= to; m += step)
|
||||
{
|
||||
|
||||
timeg=0;
|
||||
|
||||
fprintf(stderr, " %6dx%d : ", (int)m,(int)m);
|
||||
|
||||
for(j = 0; j < m; j++){
|
||||
for(i = 0; i < m * COMPSIZE; i++){
|
||||
a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
for (l=0; l<loops; l++)
|
||||
{
|
||||
|
||||
for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){
|
||||
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
|
||||
for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){
|
||||
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
HEMV (&uplo, &m, alpha, a, &m, x, &inc_x, beta, y, &inc_y );
|
||||
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
|
||||
timeg += time1;
|
||||
|
||||
}
|
||||
|
||||
timeg /= loops;
|
||||
|
||||
fprintf(stderr,
|
||||
" %10.2f MFlops\n",
|
||||
COMPSIZE * COMPSIZE * 2. * (double)m * (double)m / timeg * 1.e-6);
|
||||
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
191
benchmark/her2k.c
Normal file
191
benchmark/her2k.c
Normal file
@@ -0,0 +1,191 @@
|
||||
/***************************************************************************
|
||||
Copyright (c) 2014, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#ifdef __CYGWIN32__
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
#include "common.h"
|
||||
|
||||
|
||||
#undef HER2K
|
||||
#ifdef DOUBLE
|
||||
#define HER2K BLASFUNC(zher2k)
|
||||
#else
|
||||
#define HER2K BLASFUNC(cher2k)
|
||||
#endif
|
||||
|
||||
|
||||
#if defined(__WIN32__) || defined(__WIN64__)
|
||||
|
||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
||||
#endif
|
||||
|
||||
int gettimeofday(struct timeval *tv, void *tz){
|
||||
|
||||
FILETIME ft;
|
||||
unsigned __int64 tmpres = 0;
|
||||
static int tzflag;
|
||||
|
||||
if (NULL != tv)
|
||||
{
|
||||
GetSystemTimeAsFileTime(&ft);
|
||||
|
||||
tmpres |= ft.dwHighDateTime;
|
||||
tmpres <<= 32;
|
||||
tmpres |= ft.dwLowDateTime;
|
||||
|
||||
/*converting file time to unix epoch*/
|
||||
tmpres /= 10; /*convert into microseconds*/
|
||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
||||
|
||||
static void *huge_malloc(BLASLONG size){
|
||||
int shmid;
|
||||
void *address;
|
||||
|
||||
#ifndef SHM_HUGETLB
|
||||
#define SHM_HUGETLB 04000
|
||||
#endif
|
||||
|
||||
if ((shmid =shmget(IPC_PRIVATE,
|
||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
||||
printf( "Memory allocation failed(shmget).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
address = shmat(shmid, NULL, SHM_RND);
|
||||
|
||||
if ((BLASLONG)address == -1){
|
||||
printf( "Memory allocation failed(shmat).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
shmctl(shmid, IPC_RMID, 0);
|
||||
|
||||
return address;
|
||||
}
|
||||
|
||||
#define malloc huge_malloc
|
||||
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *a, *b, *c;
|
||||
FLOAT alpha[] = {1.0, 1.0};
|
||||
FLOAT beta [] = {1.0, 1.0};
|
||||
char *p;
|
||||
|
||||
char uplo='U';
|
||||
char trans='N';
|
||||
|
||||
if ((p = getenv("OPENBLAS_UPLO"))) uplo=*p;
|
||||
if ((p = getenv("OPENBLAS_TRANS"))) trans=*p;
|
||||
|
||||
blasint m, i, j;
|
||||
|
||||
int from = 1;
|
||||
int to = 200;
|
||||
int step = 1;
|
||||
|
||||
struct timeval start, stop;
|
||||
double time1;
|
||||
|
||||
argc--;argv++;
|
||||
|
||||
if (argc > 0) { from = atol(*argv); argc--; argv++;}
|
||||
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
|
||||
if (argc > 0) { step = atol(*argv); argc--; argv++;}
|
||||
|
||||
fprintf(stderr, "From : %3d To : %3d Step = %3d Uplo = %c Trans = %c\n", from, to, step,uplo,trans);
|
||||
|
||||
if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
if (( b = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
if (( c = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
|
||||
|
||||
#ifdef linux
|
||||
srandom(getpid());
|
||||
#endif
|
||||
|
||||
fprintf(stderr, " SIZE Flops\n");
|
||||
|
||||
for(m = from; m <= to; m += step)
|
||||
{
|
||||
|
||||
fprintf(stderr, " %6d : ", (int)m);
|
||||
|
||||
for(j = 0; j < m; j++){
|
||||
for(i = 0; i < m * COMPSIZE; i++){
|
||||
a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
b[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
c[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
}
|
||||
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
HER2K (&uplo, &trans, &m, &m, alpha, a, &m, b, &m, beta, c, &m );
|
||||
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
fprintf(stderr,
|
||||
" %10.2f MFlops\n",
|
||||
COMPSIZE * COMPSIZE * 2. * (double)m * (double)m * (double)m / time1 * 1.e-6);
|
||||
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
189
benchmark/herk.c
Normal file
189
benchmark/herk.c
Normal file
@@ -0,0 +1,189 @@
|
||||
/***************************************************************************
|
||||
Copyright (c) 2014, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#ifdef __CYGWIN32__
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
#include "common.h"
|
||||
|
||||
|
||||
#undef HERK
|
||||
|
||||
|
||||
#ifdef DOUBLE
|
||||
#define HERK BLASFUNC(zherk)
|
||||
#else
|
||||
#define HERK BLASFUNC(cherk)
|
||||
#endif
|
||||
|
||||
|
||||
#if defined(__WIN32__) || defined(__WIN64__)
|
||||
|
||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
||||
#endif
|
||||
|
||||
int gettimeofday(struct timeval *tv, void *tz){
|
||||
|
||||
FILETIME ft;
|
||||
unsigned __int64 tmpres = 0;
|
||||
static int tzflag;
|
||||
|
||||
if (NULL != tv)
|
||||
{
|
||||
GetSystemTimeAsFileTime(&ft);
|
||||
|
||||
tmpres |= ft.dwHighDateTime;
|
||||
tmpres <<= 32;
|
||||
tmpres |= ft.dwLowDateTime;
|
||||
|
||||
/*converting file time to unix epoch*/
|
||||
tmpres /= 10; /*convert into microseconds*/
|
||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
||||
|
||||
static void *huge_malloc(BLASLONG size){
|
||||
int shmid;
|
||||
void *address;
|
||||
|
||||
#ifndef SHM_HUGETLB
|
||||
#define SHM_HUGETLB 04000
|
||||
#endif
|
||||
|
||||
if ((shmid =shmget(IPC_PRIVATE,
|
||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
||||
printf( "Memory allocation failed(shmget).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
address = shmat(shmid, NULL, SHM_RND);
|
||||
|
||||
if ((BLASLONG)address == -1){
|
||||
printf( "Memory allocation failed(shmat).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
shmctl(shmid, IPC_RMID, 0);
|
||||
|
||||
return address;
|
||||
}
|
||||
|
||||
#define malloc huge_malloc
|
||||
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *a, *c;
|
||||
FLOAT alpha[] = {1.0, 1.0};
|
||||
FLOAT beta [] = {1.0, 1.0};
|
||||
char *p;
|
||||
|
||||
char uplo='U';
|
||||
char trans='N';
|
||||
|
||||
if ((p = getenv("OPENBLAS_UPLO"))) uplo=*p;
|
||||
if ((p = getenv("OPENBLAS_TRANS"))) trans=*p;
|
||||
|
||||
blasint m, i, j;
|
||||
|
||||
int from = 1;
|
||||
int to = 200;
|
||||
int step = 1;
|
||||
|
||||
struct timeval start, stop;
|
||||
double time1;
|
||||
|
||||
argc--;argv++;
|
||||
|
||||
if (argc > 0) { from = atol(*argv); argc--; argv++;}
|
||||
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
|
||||
if (argc > 0) { step = atol(*argv); argc--; argv++;}
|
||||
|
||||
fprintf(stderr, "From : %3d To : %3d Step = %3d Uplo = %c Trans = %c\n", from, to, step,uplo,trans);
|
||||
|
||||
|
||||
if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
if (( c = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
|
||||
|
||||
#ifdef linux
|
||||
srandom(getpid());
|
||||
#endif
|
||||
|
||||
fprintf(stderr, " SIZE Flops\n");
|
||||
|
||||
for(m = from; m <= to; m += step)
|
||||
{
|
||||
|
||||
fprintf(stderr, " %6d : ", (int)m);
|
||||
|
||||
for(j = 0; j < m; j++){
|
||||
for(i = 0; i < m * COMPSIZE; i++){
|
||||
a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
c[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
}
|
||||
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
HERK (&uplo, &trans, &m, &m, alpha, a, &m, beta, c, &m );
|
||||
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
fprintf(stderr,
|
||||
" %10.2f MFlops\n",
|
||||
COMPSIZE * COMPSIZE * 1. * (double)m * (double)m * (double)m / time1 * 1.e-6);
|
||||
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
@@ -83,22 +83,22 @@ int gettimeofday(struct timeval *tv, void *tz){
|
||||
FILETIME ft;
|
||||
unsigned __int64 tmpres = 0;
|
||||
static int tzflag;
|
||||
|
||||
|
||||
if (NULL != tv)
|
||||
{
|
||||
GetSystemTimeAsFileTime(&ft);
|
||||
|
||||
|
||||
tmpres |= ft.dwHighDateTime;
|
||||
tmpres <<= 32;
|
||||
tmpres |= ft.dwLowDateTime;
|
||||
|
||||
|
||||
/*converting file time to unix epoch*/
|
||||
tmpres /= 10; /*convert into microseconds*/
|
||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
||||
}
|
||||
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -137,7 +137,7 @@ static void *huge_malloc(BLASLONG size){
|
||||
|
||||
#endif
|
||||
|
||||
int MAIN__(int argc, char *argv[]){
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *a, *b;
|
||||
blasint *ipiv;
|
||||
@@ -154,7 +154,7 @@ int MAIN__(int argc, char *argv[]){
|
||||
struct timeval start, stop;
|
||||
double time1, time2;
|
||||
|
||||
argc--;argv++;
|
||||
argc--;argv++;
|
||||
|
||||
if (argc > 0) { from = atol(*argv); argc--; argv++;}
|
||||
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
|
||||
@@ -165,15 +165,15 @@ int MAIN__(int argc, char *argv[]){
|
||||
if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
|
||||
if (( b = (FLOAT *)malloc(sizeof(FLOAT) * to * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
|
||||
if (( ipiv = (blasint *)malloc(sizeof(blasint) * to * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
|
||||
#ifdef linux
|
||||
srandom(getpid());
|
||||
#endif
|
||||
@@ -181,7 +181,7 @@ int MAIN__(int argc, char *argv[]){
|
||||
fprintf(stderr, " SIZE Residual Decompose Solve Total\n");
|
||||
|
||||
for(m = from; m <= to; m += step){
|
||||
|
||||
|
||||
fprintf(stderr, " %6d : ", (int)m);
|
||||
|
||||
for(j = 0; j < m; j++){
|
||||
@@ -189,9 +189,9 @@ int MAIN__(int argc, char *argv[]){
|
||||
a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
for (i = 0; i < m * COMPSIZE; ++i) b[i] = 0.;
|
||||
|
||||
|
||||
for (j = 0; j < m; ++j) {
|
||||
for (i = 0; i < m * COMPSIZE; ++i) {
|
||||
b[i] += a[i + j * m * COMPSIZE];
|
||||
@@ -208,7 +208,7 @@ int MAIN__(int argc, char *argv[]){
|
||||
fprintf(stderr, "Matrix is not singular .. %d\n", info);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
@@ -221,7 +221,7 @@ int MAIN__(int argc, char *argv[]){
|
||||
fprintf(stderr, "Matrix is not singular .. %d\n", info);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
|
||||
time2 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
|
||||
maxerr = 0.;
|
||||
@@ -239,7 +239,7 @@ int MAIN__(int argc, char *argv[]){
|
||||
#endif
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
#ifdef XDOUBLE
|
||||
fprintf(stderr," %Le ", maxerr);
|
||||
#else
|
||||
@@ -247,7 +247,7 @@ int MAIN__(int argc, char *argv[]){
|
||||
#endif
|
||||
|
||||
fprintf(stderr,
|
||||
" %10.2f MFlops %10.2f MFlops %10.2f MFlops\n",
|
||||
" %10.2f MFlops %10.2f MFlops %10.2f MFlops\n",
|
||||
COMPSIZE * COMPSIZE * 2. / 3. * (double)m * (double)m * (double)m / time1 * 1.e-6,
|
||||
COMPSIZE * COMPSIZE * 2. * (double)m * (double)m / time2 * 1.e-6,
|
||||
COMPSIZE * COMPSIZE * (2. / 3. * (double)m * (double)m * (double)m + 2. * (double)m * (double)m) / (time1 + time2) * 1.e-6);
|
||||
@@ -270,4 +270,4 @@ int MAIN__(int argc, char *argv[]){
|
||||
return 0;
|
||||
}
|
||||
|
||||
void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
|
||||
60
benchmark/plot-filter.sh
Executable file
60
benchmark/plot-filter.sh
Executable file
@@ -0,0 +1,60 @@
|
||||
#!/bin/sh
|
||||
# **********************************************************************************
|
||||
# Copyright (c) 2014, The OpenBLAS Project
|
||||
# All rights reserved.
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are
|
||||
# met:
|
||||
# 1. Redistributions of source code must retain the above copyright
|
||||
# notice, this list of conditions and the following disclaimer.
|
||||
# 2. Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in
|
||||
# the documentation and/or other materials provided with the
|
||||
# distribution.
|
||||
# 3. Neither the name of the OpenBLAS project nor the names of
|
||||
# its contributors may be used to endorse or promote products
|
||||
# derived from this software without specific prior written permission.
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
# ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
# USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
# **********************************************************************************
|
||||
|
||||
# ************************************************************************
|
||||
# sample filter for data output from benchmark programs
|
||||
#
|
||||
# usage example:
|
||||
# ./dgemm.goto 2>&1|./plotfilter.sh >OpenBLAS
|
||||
# ************************************************************************
|
||||
|
||||
if [ $# -eq 1 ]
|
||||
then
|
||||
arg1=$1
|
||||
else
|
||||
arg1=0
|
||||
fi
|
||||
|
||||
case $arg1 in
|
||||
|
||||
L)
|
||||
# Linpack Benchmark
|
||||
awk '/MFlops/ { print $1,int($8) }'|tail --lines=+2
|
||||
;;
|
||||
|
||||
C)
|
||||
# Cholesky Benchmark
|
||||
awk '/MFlops/ { print $3,int($9) }'|tail --lines=+2
|
||||
;;
|
||||
|
||||
|
||||
*)
|
||||
awk '/MFlops/ { print $1,int($3) }'|tail --lines=+2
|
||||
;;
|
||||
esac
|
||||
|
||||
42
benchmark/plot-header
Normal file
42
benchmark/plot-header
Normal file
@@ -0,0 +1,42 @@
|
||||
# **********************************************************************************
|
||||
# Copyright (c) 2014, The OpenBLAS Project
|
||||
# All rights reserved.
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are
|
||||
# met:
|
||||
# 1. Redistributions of source code must retain the above copyright
|
||||
# notice, this list of conditions and the following disclaimer.
|
||||
# 2. Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in
|
||||
# the documentation and/or other materials provided with the
|
||||
# distribution.
|
||||
# 3. Neither the name of the OpenBLAS project nor the names of
|
||||
# its contributors may be used to endorse or promote products
|
||||
# derived from this software without specific prior written permission.
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
# ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
# USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
# **********************************************************************************
|
||||
|
||||
set term x11 font sans;
|
||||
set ylabel "MFlops";
|
||||
set xlabel "Size";
|
||||
set grid xtics;
|
||||
set grid ytics;
|
||||
set key left;
|
||||
set timestamp "generated on %Y-%m-%d by `whoami`"
|
||||
set title "Dtrsm\nUPLO=U TRANS=N SIDE=L\nBulldozer 1 Thread"
|
||||
plot 'OpenBLAS' smooth bezier, 'ACML' smooth bezier, 'MKL' smooth bezier;
|
||||
set output "print.png";
|
||||
show title;
|
||||
show plot;
|
||||
show output;
|
||||
|
||||
|
||||
282
benchmark/potrf.c
Normal file
282
benchmark/potrf.c
Normal file
@@ -0,0 +1,282 @@
|
||||
/*********************************************************************/
|
||||
/* Copyright 2009, 2010 The University of Texas at Austin. */
|
||||
/* All rights reserved. */
|
||||
/* */
|
||||
/* Redistribution and use in source and binary forms, with or */
|
||||
/* without modification, are permitted provided that the following */
|
||||
/* conditions are met: */
|
||||
/* */
|
||||
/* 1. Redistributions of source code must retain the above */
|
||||
/* copyright notice, this list of conditions and the following */
|
||||
/* disclaimer. */
|
||||
/* */
|
||||
/* 2. Redistributions in binary form must reproduce the above */
|
||||
/* copyright notice, this list of conditions and the following */
|
||||
/* disclaimer in the documentation and/or other materials */
|
||||
/* provided with the distribution. */
|
||||
/* */
|
||||
/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
|
||||
/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
|
||||
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
|
||||
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
|
||||
/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
|
||||
/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
|
||||
/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
|
||||
/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
|
||||
/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
|
||||
/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
|
||||
/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
|
||||
/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
|
||||
/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
|
||||
/* POSSIBILITY OF SUCH DAMAGE. */
|
||||
/* */
|
||||
/* The views and conclusions contained in the software and */
|
||||
/* documentation are those of the authors and should not be */
|
||||
/* interpreted as representing official policies, either expressed */
|
||||
/* or implied, of The University of Texas at Austin. */
|
||||
/*********************************************************************/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#ifdef __CYGWIN32__
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
#include "common.h"
|
||||
|
||||
double fabs(double);
|
||||
|
||||
#undef POTRF
|
||||
|
||||
#ifndef COMPLEX
|
||||
#ifdef XDOUBLE
|
||||
#define POTRF BLASFUNC(qpotrf)
|
||||
#define POTRS BLASFUNC(qpotrs)
|
||||
#define POTRI BLASFUNC(qpotri)
|
||||
#define SYRK BLASFUNC(qsyrk)
|
||||
#elif defined(DOUBLE)
|
||||
#define POTRF BLASFUNC(dpotrf)
|
||||
#define POTRS BLASFUNC(dpotrs)
|
||||
#define POTRI BLASFUNC(dpotri)
|
||||
#define SYRK BLASFUNC(dsyrk)
|
||||
#else
|
||||
#define POTRF BLASFUNC(spotrf)
|
||||
#define POTRS BLASFUNC(spotrs)
|
||||
#define POTRI BLASFUNC(spotri)
|
||||
#define SYRK BLASFUNC(ssyrk)
|
||||
#endif
|
||||
#else
|
||||
#ifdef XDOUBLE
|
||||
#define POTRF BLASFUNC(xpotrf)
|
||||
#define POTRS BLASFUNC(xpotrs)
|
||||
#define POTRI BLASFUNC(xpotri)
|
||||
#define SYRK BLASFUNC(xherk)
|
||||
#elif defined(DOUBLE)
|
||||
#define POTRF BLASFUNC(zpotrf)
|
||||
#define POTRS BLASFUNC(zpotrs)
|
||||
#define POTRI BLASFUNC(zpotri)
|
||||
#define SYRK BLASFUNC(zherk)
|
||||
#else
|
||||
#define POTRF BLASFUNC(cpotrf)
|
||||
#define POTRS BLASFUNC(cpotrs)
|
||||
#define POTRI BLASFUNC(cpotri)
|
||||
#define SYRK BLASFUNC(cherk)
|
||||
#endif
|
||||
#endif
|
||||
|
||||
// extern void POTRI(char *uplo, blasint *m, FLOAT *a, blasint *lda, blasint *info);
|
||||
// extern void POTRS(char *uplo, blasint *m, blasint *n, FLOAT *a, blasint *lda, FLOAT *b, blasint *ldb, blasint *info);
|
||||
|
||||
#if defined(__WIN32__) || defined(__WIN64__)
|
||||
|
||||
int gettimeofday(struct timeval *tv, void *tz){
|
||||
|
||||
FILETIME ft;
|
||||
unsigned __int64 tmpres = 0;
|
||||
static int tzflag;
|
||||
|
||||
if (NULL != tv)
|
||||
{
|
||||
GetSystemTimeAsFileTime(&ft);
|
||||
|
||||
tmpres |= ft.dwHighDateTime;
|
||||
tmpres <<= 32;
|
||||
tmpres |= ft.dwLowDateTime;
|
||||
|
||||
/*converting file time to unix epoch*/
|
||||
tmpres /= 10; /*convert into microseconds*/
|
||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
#ifndef COMPLEX
|
||||
char *trans[] = {"T", "N"};
|
||||
#else
|
||||
char *trans[] = {"C", "N"};
|
||||
#endif
|
||||
char *uplo[] = {"U", "L"};
|
||||
FLOAT alpha[] = {1.0, 0.0};
|
||||
FLOAT beta [] = {0.0, 0.0};
|
||||
|
||||
FLOAT *a, *b;
|
||||
|
||||
char *p;
|
||||
char btest = 'F';
|
||||
|
||||
blasint m, i, j, info, uplos=0;
|
||||
double flops;
|
||||
|
||||
int from = 1;
|
||||
int to = 200;
|
||||
int step = 1;
|
||||
|
||||
struct timeval start, stop;
|
||||
double time1;
|
||||
|
||||
argc--;argv++;
|
||||
|
||||
if (argc > 0) { from = atol(*argv); argc--; argv++;}
|
||||
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
|
||||
if (argc > 0) { step = atol(*argv); argc--; argv++;}
|
||||
|
||||
if ((p = getenv("OPENBLAS_UPLO")))
|
||||
if (*p == 'L') uplos=1;
|
||||
|
||||
if ((p = getenv("OPENBLAS_TEST"))) btest=*p;
|
||||
|
||||
fprintf(stderr, "From : %3d To : %3d Step = %3d Uplo = %c\n", from, to, step,*uplo[uplos]);
|
||||
|
||||
if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
if (( b = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
for(m = from; m <= to; m += step){
|
||||
|
||||
#ifndef COMPLEX
|
||||
if (uplos & 1) {
|
||||
for (j = 0; j < m; j++) {
|
||||
for(i = 0; i < j; i++) a[i + j * m] = 0.;
|
||||
a[j + j * m] = ((double) rand() / (double) RAND_MAX) + 8.;
|
||||
for(i = j + 1; i < m; i++) a[i + j * m] = ((double) rand() / (double) RAND_MAX) - 0.5;
|
||||
}
|
||||
} else {
|
||||
for (j = 0; j < m; j++) {
|
||||
for(i = 0; i < j; i++) a[i + j * m] = ((double) rand() / (double) RAND_MAX) - 0.5;
|
||||
a[j + j * m] = ((double) rand() / (double) RAND_MAX) + 8.;
|
||||
for(i = j + 1; i < m; i++) a[i + j * m] = 0.;
|
||||
}
|
||||
}
|
||||
#else
|
||||
if (uplos & 1) {
|
||||
for (j = 0; j < m; j++) {
|
||||
for(i = 0; i < j; i++) {
|
||||
a[(i + j * m) * 2 + 0] = 0.;
|
||||
a[(i + j * m) * 2 + 1] = 0.;
|
||||
}
|
||||
|
||||
a[(j + j * m) * 2 + 0] = ((double) rand() / (double) RAND_MAX) + 8.;
|
||||
a[(j + j * m) * 2 + 1] = 0.;
|
||||
|
||||
for(i = j + 1; i < m; i++) {
|
||||
a[(i + j * m) * 2 + 0] = ((double) rand() / (double) RAND_MAX) - 0.5;
|
||||
a[(i + j * m) * 2 + 1] = ((double) rand() / (double) RAND_MAX) - 0.5;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
for (j = 0; j < m; j++) {
|
||||
for(i = 0; i < j; i++) {
|
||||
a[(i + j * m) * 2 + 0] = ((double) rand() / (double) RAND_MAX) - 0.5;
|
||||
a[(i + j * m) * 2 + 1] = ((double) rand() / (double) RAND_MAX) - 0.5;
|
||||
}
|
||||
|
||||
a[(j + j * m) * 2 + 0] = ((double) rand() / (double) RAND_MAX) + 8.;
|
||||
a[(j + j * m) * 2 + 1] = 0.;
|
||||
|
||||
for(i = j + 1; i < m; i++) {
|
||||
a[(i + j * m) * 2 + 0] = 0.;
|
||||
a[(i + j * m) * 2 + 1] = 0.;
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
SYRK(uplo[uplos], trans[uplos], &m, &m, alpha, a, &m, beta, b, &m);
|
||||
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
POTRF(uplo[uplos], &m, b, &m, &info);
|
||||
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
|
||||
if (info != 0) {
|
||||
fprintf(stderr, "Potrf info = %d\n", info);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
flops = COMPSIZE * COMPSIZE * (1.0/3.0 * (double)m * (double)m *(double)m +1.0/2.0* (double)m *(double)m + 1.0/6.0* (double)m) / time1 * 1.e-6;
|
||||
|
||||
if ( btest == 'S' )
|
||||
{
|
||||
|
||||
for(j = 0; j < to; j++){
|
||||
for(i = 0; i < to * COMPSIZE; i++){
|
||||
a[i + j * to * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
}
|
||||
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
POTRS(uplo[uplos], &m, &m, b, &m, a, &m, &info);
|
||||
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
|
||||
if (info != 0) {
|
||||
fprintf(stderr, "Potrs info = %d\n", info);
|
||||
exit(1);
|
||||
}
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
flops = COMPSIZE * COMPSIZE * (2.0 * (double)m * (double)m *(double)m ) / time1 * 1.e-6;
|
||||
|
||||
}
|
||||
|
||||
if ( btest == 'I' )
|
||||
{
|
||||
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
POTRI(uplo[uplos], &m, b, &m, &info);
|
||||
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
|
||||
if (info != 0) {
|
||||
fprintf(stderr, "Potri info = %d\n", info);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
flops = COMPSIZE * COMPSIZE * (2.0/3.0 * (double)m * (double)m *(double)m +1.0/2.0* (double)m *(double)m + 5.0/6.0* (double)m) / time1 * 1.e-6;
|
||||
}
|
||||
|
||||
fprintf(stderr, "%8d : %10.2f MFlops : %10.3f Sec : Test=%c\n",m,flops ,time1,btest);
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
|
||||
203
benchmark/symm.c
Normal file
203
benchmark/symm.c
Normal file
@@ -0,0 +1,203 @@
|
||||
/***************************************************************************
|
||||
Copyright (c) 2014, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#ifdef __CYGWIN32__
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
#include "common.h"
|
||||
|
||||
|
||||
#undef SYMM
|
||||
|
||||
#ifndef COMPLEX
|
||||
|
||||
#ifdef DOUBLE
|
||||
#define SYMM BLASFUNC(dsymm)
|
||||
#else
|
||||
#define SYMM BLASFUNC(ssymm)
|
||||
#endif
|
||||
|
||||
#else
|
||||
|
||||
#ifdef DOUBLE
|
||||
#define SYMM BLASFUNC(zsymm)
|
||||
#else
|
||||
#define SYMM BLASFUNC(csymm)
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(__WIN32__) || defined(__WIN64__)
|
||||
|
||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
||||
#endif
|
||||
|
||||
int gettimeofday(struct timeval *tv, void *tz){
|
||||
|
||||
FILETIME ft;
|
||||
unsigned __int64 tmpres = 0;
|
||||
static int tzflag;
|
||||
|
||||
if (NULL != tv)
|
||||
{
|
||||
GetSystemTimeAsFileTime(&ft);
|
||||
|
||||
tmpres |= ft.dwHighDateTime;
|
||||
tmpres <<= 32;
|
||||
tmpres |= ft.dwLowDateTime;
|
||||
|
||||
/*converting file time to unix epoch*/
|
||||
tmpres /= 10; /*convert into microseconds*/
|
||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
||||
|
||||
static void *huge_malloc(BLASLONG size){
|
||||
int shmid;
|
||||
void *address;
|
||||
|
||||
#ifndef SHM_HUGETLB
|
||||
#define SHM_HUGETLB 04000
|
||||
#endif
|
||||
|
||||
if ((shmid =shmget(IPC_PRIVATE,
|
||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
||||
printf( "Memory allocation failed(shmget).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
address = shmat(shmid, NULL, SHM_RND);
|
||||
|
||||
if ((BLASLONG)address == -1){
|
||||
printf( "Memory allocation failed(shmat).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
shmctl(shmid, IPC_RMID, 0);
|
||||
|
||||
return address;
|
||||
}
|
||||
|
||||
#define malloc huge_malloc
|
||||
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *a, *b, *c;
|
||||
FLOAT alpha[] = {1.0, 1.0};
|
||||
FLOAT beta [] = {1.0, 1.0};
|
||||
char *p;
|
||||
|
||||
char side='L';
|
||||
char uplo='U';
|
||||
|
||||
if ((p = getenv("OPENBLAS_SIDE"))) side=*p;
|
||||
if ((p = getenv("OPENBLAS_UPLO"))) uplo=*p;
|
||||
|
||||
blasint m, i, j;
|
||||
|
||||
int from = 1;
|
||||
int to = 200;
|
||||
int step = 1;
|
||||
|
||||
struct timeval start, stop;
|
||||
double time1;
|
||||
|
||||
argc--;argv++;
|
||||
|
||||
if (argc > 0) { from = atol(*argv); argc--; argv++;}
|
||||
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
|
||||
if (argc > 0) { step = atol(*argv); argc--; argv++;}
|
||||
|
||||
fprintf(stderr, "From : %3d To : %3d Step = %3d Side = %c Uplo = %c\n", from, to, step,side,uplo);
|
||||
|
||||
if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
if (( b = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
if (( c = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
|
||||
|
||||
#ifdef linux
|
||||
srandom(getpid());
|
||||
#endif
|
||||
|
||||
fprintf(stderr, " SIZE Flops\n");
|
||||
|
||||
for(m = from; m <= to; m += step)
|
||||
{
|
||||
|
||||
fprintf(stderr, " %6d : ", (int)m);
|
||||
|
||||
for(j = 0; j < m; j++){
|
||||
for(i = 0; i < m * COMPSIZE; i++){
|
||||
a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
b[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
c[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
}
|
||||
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
SYMM (&side, &uplo, &m, &m, alpha, a, &m, b, &m, beta, c, &m );
|
||||
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
fprintf(stderr,
|
||||
" %10.2f MFlops\n",
|
||||
COMPSIZE * COMPSIZE * 2. * (double)m * (double)m * (double)m / time1 * 1.e-6);
|
||||
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
218
benchmark/symv.c
Normal file
218
benchmark/symv.c
Normal file
@@ -0,0 +1,218 @@
|
||||
/***************************************************************************
|
||||
Copyright (c) 2014, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#ifdef __CYGWIN32__
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
#include "common.h"
|
||||
|
||||
|
||||
#undef SYMV
|
||||
|
||||
#ifndef COMPLEX
|
||||
|
||||
#ifdef DOUBLE
|
||||
#define SYMV BLASFUNC(dsymv)
|
||||
#else
|
||||
#define SYMV BLASFUNC(ssymv)
|
||||
#endif
|
||||
|
||||
#else
|
||||
|
||||
#ifdef DOUBLE
|
||||
#define SYMV BLASFUNC(zsymv)
|
||||
#else
|
||||
#define SYMV BLASFUNC(csymv)
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(__WIN32__) || defined(__WIN64__)
|
||||
|
||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
||||
#endif
|
||||
|
||||
int gettimeofday(struct timeval *tv, void *tz){
|
||||
|
||||
FILETIME ft;
|
||||
unsigned __int64 tmpres = 0;
|
||||
static int tzflag;
|
||||
|
||||
if (NULL != tv)
|
||||
{
|
||||
GetSystemTimeAsFileTime(&ft);
|
||||
|
||||
tmpres |= ft.dwHighDateTime;
|
||||
tmpres <<= 32;
|
||||
tmpres |= ft.dwLowDateTime;
|
||||
|
||||
/*converting file time to unix epoch*/
|
||||
tmpres /= 10; /*convert into microseconds*/
|
||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
||||
|
||||
static void *huge_malloc(BLASLONG size){
|
||||
int shmid;
|
||||
void *address;
|
||||
|
||||
#ifndef SHM_HUGETLB
|
||||
#define SHM_HUGETLB 04000
|
||||
#endif
|
||||
|
||||
if ((shmid =shmget(IPC_PRIVATE,
|
||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
||||
printf( "Memory allocation failed(shmget).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
address = shmat(shmid, NULL, SHM_RND);
|
||||
|
||||
if ((BLASLONG)address == -1){
|
||||
printf( "Memory allocation failed(shmat).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
shmctl(shmid, IPC_RMID, 0);
|
||||
|
||||
return address;
|
||||
}
|
||||
|
||||
#define malloc huge_malloc
|
||||
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *a, *x, *y;
|
||||
FLOAT alpha[] = {1.0, 1.0};
|
||||
FLOAT beta [] = {1.0, 1.0};
|
||||
char uplo='L';
|
||||
blasint m, i, j;
|
||||
blasint inc_x=1,inc_y=1;
|
||||
int loops = 1;
|
||||
int l;
|
||||
char *p;
|
||||
|
||||
int from = 1;
|
||||
int to = 200;
|
||||
int step = 1;
|
||||
|
||||
struct timeval start, stop;
|
||||
double time1,timeg;
|
||||
|
||||
argc--;argv++;
|
||||
|
||||
if (argc > 0) { from = atol(*argv); argc--; argv++;}
|
||||
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
|
||||
if (argc > 0) { step = atol(*argv); argc--; argv++;}
|
||||
|
||||
if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p);
|
||||
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p);
|
||||
if ((p = getenv("OPENBLAS_INCY"))) inc_y = atoi(p);
|
||||
if ((p = getenv("OPENBLAS_UPLO"))) uplo=*p;
|
||||
|
||||
fprintf(stderr, "From : %3d To : %3d Step = %3d Uplo = '%c' Inc_x = %d Inc_y = %d Loops = %d\n", from, to, step,uplo,inc_x,inc_y,loops);
|
||||
|
||||
if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
if (( y = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_y) * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
#ifdef linux
|
||||
srandom(getpid());
|
||||
#endif
|
||||
|
||||
fprintf(stderr, " SIZE Flops\n");
|
||||
|
||||
for(m = from; m <= to; m += step)
|
||||
{
|
||||
|
||||
timeg=0;
|
||||
|
||||
fprintf(stderr, " %6dx%d : ", (int)m,(int)m);
|
||||
|
||||
for(j = 0; j < m; j++){
|
||||
for(i = 0; i < m * COMPSIZE; i++){
|
||||
a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
for (l=0; l<loops; l++)
|
||||
{
|
||||
|
||||
for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){
|
||||
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
|
||||
for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){
|
||||
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
SYMV (&uplo, &m, alpha, a, &m, x, &inc_x, beta, y, &inc_y );
|
||||
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
|
||||
timeg += time1;
|
||||
|
||||
}
|
||||
|
||||
timeg /= loops;
|
||||
|
||||
fprintf(stderr,
|
||||
" %10.2f MFlops\n",
|
||||
COMPSIZE * COMPSIZE * 2. * (double)m * (double)m / timeg * 1.e-6);
|
||||
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
203
benchmark/syr2k.c
Normal file
203
benchmark/syr2k.c
Normal file
@@ -0,0 +1,203 @@
|
||||
/***************************************************************************
|
||||
Copyright (c) 2014, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#ifdef __CYGWIN32__
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
#include "common.h"
|
||||
|
||||
|
||||
#undef SYR2K
|
||||
|
||||
#ifndef COMPLEX
|
||||
|
||||
#ifdef DOUBLE
|
||||
#define SYR2K BLASFUNC(dsyr2k)
|
||||
#else
|
||||
#define SYR2K BLASFUNC(ssyr2k)
|
||||
#endif
|
||||
|
||||
#else
|
||||
|
||||
#ifdef DOUBLE
|
||||
#define SYR2K BLASFUNC(zsyr2k)
|
||||
#else
|
||||
#define SYR2K BLASFUNC(csyr2k)
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(__WIN32__) || defined(__WIN64__)
|
||||
|
||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
||||
#endif
|
||||
|
||||
int gettimeofday(struct timeval *tv, void *tz){
|
||||
|
||||
FILETIME ft;
|
||||
unsigned __int64 tmpres = 0;
|
||||
static int tzflag;
|
||||
|
||||
if (NULL != tv)
|
||||
{
|
||||
GetSystemTimeAsFileTime(&ft);
|
||||
|
||||
tmpres |= ft.dwHighDateTime;
|
||||
tmpres <<= 32;
|
||||
tmpres |= ft.dwLowDateTime;
|
||||
|
||||
/*converting file time to unix epoch*/
|
||||
tmpres /= 10; /*convert into microseconds*/
|
||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
||||
|
||||
static void *huge_malloc(BLASLONG size){
|
||||
int shmid;
|
||||
void *address;
|
||||
|
||||
#ifndef SHM_HUGETLB
|
||||
#define SHM_HUGETLB 04000
|
||||
#endif
|
||||
|
||||
if ((shmid =shmget(IPC_PRIVATE,
|
||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
||||
printf( "Memory allocation failed(shmget).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
address = shmat(shmid, NULL, SHM_RND);
|
||||
|
||||
if ((BLASLONG)address == -1){
|
||||
printf( "Memory allocation failed(shmat).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
shmctl(shmid, IPC_RMID, 0);
|
||||
|
||||
return address;
|
||||
}
|
||||
|
||||
#define malloc huge_malloc
|
||||
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *a, *b, *c;
|
||||
FLOAT alpha[] = {1.0, 1.0};
|
||||
FLOAT beta [] = {1.0, 1.0};
|
||||
char *p;
|
||||
|
||||
char uplo='U';
|
||||
char trans='N';
|
||||
|
||||
if ((p = getenv("OPENBLAS_UPLO"))) uplo=*p;
|
||||
if ((p = getenv("OPENBLAS_TRANS"))) trans=*p;
|
||||
|
||||
blasint m, i, j;
|
||||
|
||||
int from = 1;
|
||||
int to = 200;
|
||||
int step = 1;
|
||||
|
||||
struct timeval start, stop;
|
||||
double time1;
|
||||
|
||||
argc--;argv++;
|
||||
|
||||
if (argc > 0) { from = atol(*argv); argc--; argv++;}
|
||||
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
|
||||
if (argc > 0) { step = atol(*argv); argc--; argv++;}
|
||||
|
||||
fprintf(stderr, "From : %3d To : %3d Step = %3d Uplo = %c Trans = %c\n", from, to, step,uplo,trans);
|
||||
|
||||
if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
if (( b = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
if (( c = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
|
||||
|
||||
#ifdef linux
|
||||
srandom(getpid());
|
||||
#endif
|
||||
|
||||
fprintf(stderr, " SIZE Flops\n");
|
||||
|
||||
for(m = from; m <= to; m += step)
|
||||
{
|
||||
|
||||
fprintf(stderr, " %6d : ", (int)m);
|
||||
|
||||
for(j = 0; j < m; j++){
|
||||
for(i = 0; i < m * COMPSIZE; i++){
|
||||
a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
b[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
c[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
}
|
||||
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
SYR2K (&uplo, &trans, &m, &m, alpha, a, &m, b, &m, beta, c, &m );
|
||||
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
fprintf(stderr,
|
||||
" %10.2f MFlops\n",
|
||||
COMPSIZE * COMPSIZE * 2. * (double)m * (double)m * (double)m / time1 * 1.e-6);
|
||||
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
199
benchmark/syrk.c
Normal file
199
benchmark/syrk.c
Normal file
@@ -0,0 +1,199 @@
|
||||
/***************************************************************************
|
||||
Copyright (c) 2014, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#ifdef __CYGWIN32__
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
#include "common.h"
|
||||
|
||||
|
||||
#undef SYRK
|
||||
|
||||
#ifndef COMPLEX
|
||||
|
||||
#ifdef DOUBLE
|
||||
#define SYRK BLASFUNC(dsyrk)
|
||||
#else
|
||||
#define SYRK BLASFUNC(ssyrk)
|
||||
#endif
|
||||
|
||||
#else
|
||||
|
||||
#ifdef DOUBLE
|
||||
#define SYRK BLASFUNC(zsyrk)
|
||||
#else
|
||||
#define SYRK BLASFUNC(csyrk)
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(__WIN32__) || defined(__WIN64__)
|
||||
|
||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
||||
#endif
|
||||
|
||||
int gettimeofday(struct timeval *tv, void *tz){
|
||||
|
||||
FILETIME ft;
|
||||
unsigned __int64 tmpres = 0;
|
||||
static int tzflag;
|
||||
|
||||
if (NULL != tv)
|
||||
{
|
||||
GetSystemTimeAsFileTime(&ft);
|
||||
|
||||
tmpres |= ft.dwHighDateTime;
|
||||
tmpres <<= 32;
|
||||
tmpres |= ft.dwLowDateTime;
|
||||
|
||||
/*converting file time to unix epoch*/
|
||||
tmpres /= 10; /*convert into microseconds*/
|
||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
||||
|
||||
static void *huge_malloc(BLASLONG size){
|
||||
int shmid;
|
||||
void *address;
|
||||
|
||||
#ifndef SHM_HUGETLB
|
||||
#define SHM_HUGETLB 04000
|
||||
#endif
|
||||
|
||||
if ((shmid =shmget(IPC_PRIVATE,
|
||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
||||
printf( "Memory allocation failed(shmget).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
address = shmat(shmid, NULL, SHM_RND);
|
||||
|
||||
if ((BLASLONG)address == -1){
|
||||
printf( "Memory allocation failed(shmat).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
shmctl(shmid, IPC_RMID, 0);
|
||||
|
||||
return address;
|
||||
}
|
||||
|
||||
#define malloc huge_malloc
|
||||
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *a, *c;
|
||||
FLOAT alpha[] = {1.0, 1.0};
|
||||
FLOAT beta [] = {1.0, 1.0};
|
||||
char *p;
|
||||
|
||||
char uplo='U';
|
||||
char trans='N';
|
||||
|
||||
if ((p = getenv("OPENBLAS_UPLO"))) uplo=*p;
|
||||
if ((p = getenv("OPENBLAS_TRANS"))) trans=*p;
|
||||
|
||||
blasint m, i, j;
|
||||
|
||||
int from = 1;
|
||||
int to = 200;
|
||||
int step = 1;
|
||||
|
||||
struct timeval start, stop;
|
||||
double time1;
|
||||
|
||||
argc--;argv++;
|
||||
|
||||
if (argc > 0) { from = atol(*argv); argc--; argv++;}
|
||||
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
|
||||
if (argc > 0) { step = atol(*argv); argc--; argv++;}
|
||||
|
||||
fprintf(stderr, "From : %3d To : %3d Step = %3d Uplo = %c Trans = %c\n", from, to, step,uplo,trans);
|
||||
|
||||
|
||||
if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
if (( c = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
|
||||
|
||||
#ifdef linux
|
||||
srandom(getpid());
|
||||
#endif
|
||||
|
||||
fprintf(stderr, " SIZE Flops\n");
|
||||
|
||||
for(m = from; m <= to; m += step)
|
||||
{
|
||||
|
||||
fprintf(stderr, " %6d : ", (int)m);
|
||||
|
||||
for(j = 0; j < m; j++){
|
||||
for(i = 0; i < m * COMPSIZE; i++){
|
||||
a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
c[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
}
|
||||
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
SYRK (&uplo, &trans, &m, &m, alpha, a, &m, beta, c, &m );
|
||||
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
fprintf(stderr,
|
||||
" %10.2f MFlops\n",
|
||||
COMPSIZE * COMPSIZE * 1. * (double)m * (double)m * (double)m / time1 * 1.e-6);
|
||||
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
42
benchmark/tplot-header
Normal file
42
benchmark/tplot-header
Normal file
@@ -0,0 +1,42 @@
|
||||
# **********************************************************************************
|
||||
# Copyright (c) 2014, The OpenBLAS Project
|
||||
# All rights reserved.
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are
|
||||
# met:
|
||||
# 1. Redistributions of source code must retain the above copyright
|
||||
# notice, this list of conditions and the following disclaimer.
|
||||
# 2. Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in
|
||||
# the documentation and/or other materials provided with the
|
||||
# distribution.
|
||||
# 3. Neither the name of the OpenBLAS project nor the names of
|
||||
# its contributors may be used to endorse or promote products
|
||||
# derived from this software without specific prior written permission.
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
# ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
# USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
# **********************************************************************************
|
||||
|
||||
set term x11 font sans;
|
||||
set ylabel "MFlops";
|
||||
set xlabel "Size";
|
||||
set grid xtics;
|
||||
set grid ytics;
|
||||
set key left;
|
||||
set timestamp "generated on %Y-%m-%d by `whoami`"
|
||||
set title "Sgemv\nTRANS=T\nBulldozer"
|
||||
plot '1-THREAD' smooth bezier, '2-THREADS' smooth bezier, '4-THREADS' smooth bezier;
|
||||
set output "print.png";
|
||||
show title;
|
||||
show plot;
|
||||
show output;
|
||||
|
||||
|
||||
202
benchmark/trmm.c
Normal file
202
benchmark/trmm.c
Normal file
@@ -0,0 +1,202 @@
|
||||
/***************************************************************************
|
||||
Copyright (c) 2014, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#ifdef __CYGWIN32__
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
#include "common.h"
|
||||
|
||||
|
||||
#undef TRMM
|
||||
|
||||
#ifndef COMPLEX
|
||||
|
||||
#ifdef DOUBLE
|
||||
#define TRMM BLASFUNC(dtrmm)
|
||||
#else
|
||||
#define TRMM BLASFUNC(strmm)
|
||||
#endif
|
||||
|
||||
#else
|
||||
|
||||
#ifdef DOUBLE
|
||||
#define TRMM BLASFUNC(ztrmm)
|
||||
#else
|
||||
#define TRMM BLASFUNC(ctrmm)
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(__WIN32__) || defined(__WIN64__)
|
||||
|
||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
||||
#endif
|
||||
|
||||
int gettimeofday(struct timeval *tv, void *tz){
|
||||
|
||||
FILETIME ft;
|
||||
unsigned __int64 tmpres = 0;
|
||||
static int tzflag;
|
||||
|
||||
if (NULL != tv)
|
||||
{
|
||||
GetSystemTimeAsFileTime(&ft);
|
||||
|
||||
tmpres |= ft.dwHighDateTime;
|
||||
tmpres <<= 32;
|
||||
tmpres |= ft.dwLowDateTime;
|
||||
|
||||
/*converting file time to unix epoch*/
|
||||
tmpres /= 10; /*convert into microseconds*/
|
||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
||||
|
||||
static void *huge_malloc(BLASLONG size){
|
||||
int shmid;
|
||||
void *address;
|
||||
|
||||
#ifndef SHM_HUGETLB
|
||||
#define SHM_HUGETLB 04000
|
||||
#endif
|
||||
|
||||
if ((shmid =shmget(IPC_PRIVATE,
|
||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
||||
printf( "Memory allocation failed(shmget).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
address = shmat(shmid, NULL, SHM_RND);
|
||||
|
||||
if ((BLASLONG)address == -1){
|
||||
printf( "Memory allocation failed(shmat).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
shmctl(shmid, IPC_RMID, 0);
|
||||
|
||||
return address;
|
||||
}
|
||||
|
||||
#define malloc huge_malloc
|
||||
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *a, *b;
|
||||
FLOAT alpha[] = {1.0, 1.0};
|
||||
FLOAT beta [] = {1.0, 1.0};
|
||||
char *p;
|
||||
|
||||
char side ='L';
|
||||
char uplo ='U';
|
||||
char trans='N';
|
||||
char diag ='U';
|
||||
|
||||
if ((p = getenv("OPENBLAS_SIDE"))) side=*p;
|
||||
if ((p = getenv("OPENBLAS_UPLO"))) uplo=*p;
|
||||
if ((p = getenv("OPENBLAS_TRANS"))) trans=*p;
|
||||
if ((p = getenv("OPENBLAS_DIAG"))) diag=*p;
|
||||
|
||||
blasint m, i, j;
|
||||
|
||||
int from = 1;
|
||||
int to = 200;
|
||||
int step = 1;
|
||||
|
||||
struct timeval start, stop;
|
||||
double time1;
|
||||
|
||||
argc--;argv++;
|
||||
|
||||
if (argc > 0) { from = atol(*argv); argc--; argv++;}
|
||||
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
|
||||
if (argc > 0) { step = atol(*argv); argc--; argv++;}
|
||||
|
||||
fprintf(stderr, "From : %3d To : %3d Step = %3d Side = %c Uplo = %c Trans = %c Diag = %c\n", from, to, step,side,uplo,trans,diag);
|
||||
|
||||
if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
if (( b = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
|
||||
|
||||
#ifdef linux
|
||||
srandom(getpid());
|
||||
#endif
|
||||
|
||||
fprintf(stderr, " SIZE Flops\n");
|
||||
|
||||
for(m = from; m <= to; m += step)
|
||||
{
|
||||
|
||||
fprintf(stderr, " %6d : ", (int)m);
|
||||
|
||||
for(j = 0; j < m; j++){
|
||||
for(i = 0; i < m * COMPSIZE; i++){
|
||||
a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
b[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
}
|
||||
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
TRMM (&side, &uplo, &trans, &diag, &m, &m, alpha, a, &m, b, &m);
|
||||
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
fprintf(stderr,
|
||||
" %10.2f MFlops\n",
|
||||
COMPSIZE * COMPSIZE * 1. * (double)m * (double)m * (double)m / time1 * 1.e-6);
|
||||
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
202
benchmark/trsm.c
Normal file
202
benchmark/trsm.c
Normal file
@@ -0,0 +1,202 @@
|
||||
/***************************************************************************
|
||||
Copyright (c) 2014, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#ifdef __CYGWIN32__
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
#include "common.h"
|
||||
|
||||
|
||||
#undef TRSM
|
||||
|
||||
#ifndef COMPLEX
|
||||
|
||||
#ifdef DOUBLE
|
||||
#define TRSM BLASFUNC(dtrsm)
|
||||
#else
|
||||
#define TRSM BLASFUNC(strsm)
|
||||
#endif
|
||||
|
||||
#else
|
||||
|
||||
#ifdef DOUBLE
|
||||
#define TRSM BLASFUNC(ztrsm)
|
||||
#else
|
||||
#define TRSM BLASFUNC(ctrsm)
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(__WIN32__) || defined(__WIN64__)
|
||||
|
||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
||||
#endif
|
||||
|
||||
int gettimeofday(struct timeval *tv, void *tz){
|
||||
|
||||
FILETIME ft;
|
||||
unsigned __int64 tmpres = 0;
|
||||
static int tzflag;
|
||||
|
||||
if (NULL != tv)
|
||||
{
|
||||
GetSystemTimeAsFileTime(&ft);
|
||||
|
||||
tmpres |= ft.dwHighDateTime;
|
||||
tmpres <<= 32;
|
||||
tmpres |= ft.dwLowDateTime;
|
||||
|
||||
/*converting file time to unix epoch*/
|
||||
tmpres /= 10; /*convert into microseconds*/
|
||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
||||
|
||||
static void *huge_malloc(BLASLONG size){
|
||||
int shmid;
|
||||
void *address;
|
||||
|
||||
#ifndef SHM_HUGETLB
|
||||
#define SHM_HUGETLB 04000
|
||||
#endif
|
||||
|
||||
if ((shmid =shmget(IPC_PRIVATE,
|
||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
||||
printf( "Memory allocation failed(shmget).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
address = shmat(shmid, NULL, SHM_RND);
|
||||
|
||||
if ((BLASLONG)address == -1){
|
||||
printf( "Memory allocation failed(shmat).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
shmctl(shmid, IPC_RMID, 0);
|
||||
|
||||
return address;
|
||||
}
|
||||
|
||||
#define malloc huge_malloc
|
||||
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *a, *b;
|
||||
FLOAT alpha[] = {1.0, 1.0};
|
||||
FLOAT beta [] = {1.0, 1.0};
|
||||
char *p;
|
||||
|
||||
char side ='L';
|
||||
char uplo ='U';
|
||||
char trans='N';
|
||||
char diag ='U';
|
||||
|
||||
if ((p = getenv("OPENBLAS_SIDE"))) side=*p;
|
||||
if ((p = getenv("OPENBLAS_UPLO"))) uplo=*p;
|
||||
if ((p = getenv("OPENBLAS_TRANS"))) trans=*p;
|
||||
if ((p = getenv("OPENBLAS_DIAG"))) diag=*p;
|
||||
|
||||
blasint m, i, j;
|
||||
|
||||
int from = 1;
|
||||
int to = 200;
|
||||
int step = 1;
|
||||
|
||||
struct timeval start, stop;
|
||||
double time1;
|
||||
|
||||
argc--;argv++;
|
||||
|
||||
if (argc > 0) { from = atol(*argv); argc--; argv++;}
|
||||
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
|
||||
if (argc > 0) { step = atol(*argv); argc--; argv++;}
|
||||
|
||||
fprintf(stderr, "From : %3d To : %3d Step = %3d Side = %c Uplo = %c Trans = %c Diag = %c\n", from, to, step,side,uplo,trans,diag);
|
||||
|
||||
if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
if (( b = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
|
||||
|
||||
#ifdef linux
|
||||
srandom(getpid());
|
||||
#endif
|
||||
|
||||
fprintf(stderr, " SIZE Flops\n");
|
||||
|
||||
for(m = from; m <= to; m += step)
|
||||
{
|
||||
|
||||
fprintf(stderr, " %6d : ", (int)m);
|
||||
|
||||
for(j = 0; j < m; j++){
|
||||
for(i = 0; i < m * COMPSIZE; i++){
|
||||
a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
b[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
}
|
||||
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
TRSM (&side, &uplo, &trans, &diag, &m, &m, alpha, a, &m, b, &m);
|
||||
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
fprintf(stderr,
|
||||
" %10.2f MFlops\n",
|
||||
COMPSIZE * COMPSIZE * 1. * (double)m * (double)m * (double)m / time1 * 1.e-6);
|
||||
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
23
c_check
23
c_check
@@ -3,6 +3,7 @@
|
||||
# Checking cross compile
|
||||
$hostos = `uname -s | sed -e s/\-.*//`; chop($hostos);
|
||||
$hostarch = `uname -m | sed -e s/i.86/x86/`;chop($hostarch);
|
||||
$hostarch = "x86_64" if ($hostarch eq "amd64");
|
||||
|
||||
$binary = $ENV{"BINARY"};
|
||||
|
||||
@@ -80,6 +81,10 @@ if (($architecture eq "mips32") || ($architecture eq "mips64")) {
|
||||
$defined = 1;
|
||||
}
|
||||
|
||||
if (($architecture eq "arm") || ($architecture eq "arm64")) {
|
||||
$defined = 1;
|
||||
}
|
||||
|
||||
if ($architecture eq "alpha") {
|
||||
$defined = 1;
|
||||
$binary = 64;
|
||||
@@ -180,9 +185,9 @@ $linker_a = "";
|
||||
|
||||
{
|
||||
$link = `$compiler_name -c ctest2.c -o ctest2.o 2>&1 && $compiler_name $openmp -v ctest2.o -o ctest2 2>&1 && rm -f ctest2.o ctest2 ctest2.exe`;
|
||||
|
||||
|
||||
$link =~ s/\-Y\sP\,/\-Y/g;
|
||||
|
||||
|
||||
@flags = split(/[\s\,\n]/, $link);
|
||||
# remove leading and trailing quotes from each flag.
|
||||
@flags = map {s/^['"]|['"]$//g; $_} @flags;
|
||||
@@ -193,15 +198,15 @@ $linker_a = "";
|
||||
&& ($flags !~ /^-LIST:/)
|
||||
&& ($flags !~ /^-LANG:/)
|
||||
) {
|
||||
$linker_L .= $flags . " "
|
||||
$linker_L .= $flags . " "
|
||||
}
|
||||
|
||||
if ($flags =~ /^\-Y/) {
|
||||
$linker_L .= "-Wl,". $flags . " "
|
||||
$linker_L .= "-Wl,". $flags . " "
|
||||
}
|
||||
|
||||
|
||||
if (
|
||||
($flags =~ /^\-l/)
|
||||
($flags =~ /^\-l/)
|
||||
&& ($flags !~ /gfortranbegin/)
|
||||
&& ($flags !~ /frtbegin/)
|
||||
&& ($flags !~ /pathfstart/)
|
||||
@@ -213,7 +218,7 @@ $linker_a = "";
|
||||
&& ($flags !~ /advapi32/)
|
||||
&& ($flags !~ /shell32/)
|
||||
) {
|
||||
$linker_l .= $flags . " "
|
||||
$linker_l .= $flags . " "
|
||||
}
|
||||
|
||||
$linker_a .= $flags . " " if $flags =~ /\.a$/;
|
||||
@@ -250,9 +255,9 @@ print CONFFILE "#define __64BIT__\t1\n" if $binformat eq bin64;
|
||||
print CONFFILE "#define FUNDERSCORE\t$need_fu\n" if $need_fu ne "";
|
||||
|
||||
if ($os eq "LINUX") {
|
||||
|
||||
|
||||
# @pthread = split(/\s+/, `nm /lib/libpthread.so* | grep _pthread_create`);
|
||||
|
||||
|
||||
# if ($pthread[2] ne "") {
|
||||
# print CONFFILE "#define PTHREAD_CREATE_FUNC $pthread[2]\n";
|
||||
# } else {
|
||||
|
||||
58
cblas.h
58
cblas.h
@@ -13,17 +13,26 @@ extern "C" {
|
||||
void openblas_set_num_threads(int num_threads);
|
||||
void goto_set_num_threads(int num_threads);
|
||||
|
||||
/*Get the number of threads on runtime.*/
|
||||
int openblas_get_num_threads(void);
|
||||
|
||||
/*Get the number of physical processors (cores).*/
|
||||
int openblas_get_num_procs(void);
|
||||
|
||||
/*Get the build configure on runtime.*/
|
||||
char* openblas_get_config(void);
|
||||
|
||||
/*Get the CPU corename on runtime.*/
|
||||
char* openblas_get_corename(void);
|
||||
|
||||
/* Get the parallelization type which is used by OpenBLAS */
|
||||
int openblas_get_parallel(void);
|
||||
int openblas_get_parallel(void);
|
||||
/* OpenBLAS is compiled for sequential use */
|
||||
#define OPENBLAS_SEQUENTIAL 0
|
||||
/* OpenBLAS is compiled using normal threading model */
|
||||
#define OPENBLAS_THREAD 1
|
||||
#define OPENBLAS_THREAD 1
|
||||
/* OpenBLAS is compiled using OpenMP threading model */
|
||||
#define OPENBLAS_OPENMP 2
|
||||
#define OPENBLAS_OPENMP 2
|
||||
|
||||
|
||||
/*
|
||||
@@ -240,8 +249,13 @@ void cblas_dgemm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLA
|
||||
OPENBLAS_CONST double alpha, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST double *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST double beta, double *C, OPENBLAS_CONST blasint ldc);
|
||||
void cblas_cgemm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransB, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K,
|
||||
OPENBLAS_CONST float *alpha, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST float *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST float *beta, float *C, OPENBLAS_CONST blasint ldc);
|
||||
void cblas_cgemm3m(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransB, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K,
|
||||
OPENBLAS_CONST float *alpha, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST float *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST float *beta, float *C, OPENBLAS_CONST blasint ldc);
|
||||
void cblas_zgemm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransB, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K,
|
||||
OPENBLAS_CONST double *alpha, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST double *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST double *beta, double *C, OPENBLAS_CONST blasint ldc);
|
||||
void cblas_zgemm3m(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransB, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K,
|
||||
OPENBLAS_CONST double *alpha, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST double *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST double *beta, double *C, OPENBLAS_CONST blasint ldc);
|
||||
|
||||
|
||||
void cblas_ssymm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_SIDE Side, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N,
|
||||
OPENBLAS_CONST float alpha, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST float *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST float beta, float *C, OPENBLAS_CONST blasint ldc);
|
||||
@@ -305,6 +319,44 @@ void cblas_zher2k(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBL
|
||||
|
||||
void cblas_xerbla(blasint p, char *rout, char *form, ...);
|
||||
|
||||
/*** BLAS extensions ***/
|
||||
|
||||
void cblas_saxpby(OPENBLAS_CONST blasint n, OPENBLAS_CONST float alpha, OPENBLAS_CONST float *x, OPENBLAS_CONST blasint incx,OPENBLAS_CONST float beta, float *y, OPENBLAS_CONST blasint incy);
|
||||
|
||||
void cblas_daxpby(OPENBLAS_CONST blasint n, OPENBLAS_CONST double alpha, OPENBLAS_CONST double *x, OPENBLAS_CONST blasint incx,OPENBLAS_CONST double beta, double *y, OPENBLAS_CONST blasint incy);
|
||||
|
||||
void cblas_caxpby(OPENBLAS_CONST blasint n, OPENBLAS_CONST float *alpha, OPENBLAS_CONST float *x, OPENBLAS_CONST blasint incx,OPENBLAS_CONST float *beta, float *y, OPENBLAS_CONST blasint incy);
|
||||
|
||||
void cblas_zaxpby(OPENBLAS_CONST blasint n, OPENBLAS_CONST double *alpha, OPENBLAS_CONST double *x, OPENBLAS_CONST blasint incx,OPENBLAS_CONST double *beta, double *y, OPENBLAS_CONST blasint incy);
|
||||
|
||||
void cblas_somatcopy(OPENBLAS_CONST enum CBLAS_ORDER CORDER, OPENBLAS_CONST enum CBLAS_TRANSPOSE CTRANS, OPENBLAS_CONST blasint crows, OPENBLAS_CONST blasint ccols, OPENBLAS_CONST float calpha, OPENBLAS_CONST float *a,
|
||||
OPENBLAS_CONST blasint clda, float *b, OPENBLAS_CONST blasint cldb);
|
||||
void cblas_domatcopy(OPENBLAS_CONST enum CBLAS_ORDER CORDER, OPENBLAS_CONST enum CBLAS_TRANSPOSE CTRANS, OPENBLAS_CONST blasint crows, OPENBLAS_CONST blasint ccols, OPENBLAS_CONST double calpha, OPENBLAS_CONST double *a,
|
||||
OPENBLAS_CONST blasint clda, double *b, OPENBLAS_CONST blasint cldb);
|
||||
void cblas_comatcopy(OPENBLAS_CONST enum CBLAS_ORDER CORDER, OPENBLAS_CONST enum CBLAS_TRANSPOSE CTRANS, OPENBLAS_CONST blasint crows, OPENBLAS_CONST blasint ccols, OPENBLAS_CONST float* calpha, OPENBLAS_CONST float* a,
|
||||
OPENBLAS_CONST blasint clda, float*b, OPENBLAS_CONST blasint cldb);
|
||||
void cblas_zomatcopy(OPENBLAS_CONST enum CBLAS_ORDER CORDER, OPENBLAS_CONST enum CBLAS_TRANSPOSE CTRANS, OPENBLAS_CONST blasint crows, OPENBLAS_CONST blasint ccols, OPENBLAS_CONST double* calpha, OPENBLAS_CONST double* a,
|
||||
OPENBLAS_CONST blasint clda, double *b, OPENBLAS_CONST blasint cldb);
|
||||
|
||||
void cblas_simatcopy(OPENBLAS_CONST enum CBLAS_ORDER CORDER, OPENBLAS_CONST enum CBLAS_TRANSPOSE CTRANS, OPENBLAS_CONST blasint crows, OPENBLAS_CONST blasint ccols, OPENBLAS_CONST float calpha, float *a,
|
||||
OPENBLAS_CONST blasint clda, OPENBLAS_CONST blasint cldb);
|
||||
void cblas_dimatcopy(OPENBLAS_CONST enum CBLAS_ORDER CORDER, OPENBLAS_CONST enum CBLAS_TRANSPOSE CTRANS, OPENBLAS_CONST blasint crows, OPENBLAS_CONST blasint ccols, OPENBLAS_CONST double calpha, double *a,
|
||||
OPENBLAS_CONST blasint clda, OPENBLAS_CONST blasint cldb);
|
||||
void cblas_cimatcopy(OPENBLAS_CONST enum CBLAS_ORDER CORDER, OPENBLAS_CONST enum CBLAS_TRANSPOSE CTRANS, OPENBLAS_CONST blasint crows, OPENBLAS_CONST blasint ccols, OPENBLAS_CONST float* calpha, float* a,
|
||||
OPENBLAS_CONST blasint clda, OPENBLAS_CONST blasint cldb);
|
||||
void cblas_zimatcopy(OPENBLAS_CONST enum CBLAS_ORDER CORDER, OPENBLAS_CONST enum CBLAS_TRANSPOSE CTRANS, OPENBLAS_CONST blasint crows, OPENBLAS_CONST blasint ccols, OPENBLAS_CONST double* calpha, double* a,
|
||||
OPENBLAS_CONST blasint clda, OPENBLAS_CONST blasint cldb);
|
||||
|
||||
void cblas_sgeadd(OPENBLAS_CONST enum CBLAS_ORDER CORDER,OPENBLAS_CONST blasint crows, OPENBLAS_CONST blasint ccols, OPENBLAS_CONST float calpha, float *a, OPENBLAS_CONST blasint clda, OPENBLAS_CONST float cbeta,
|
||||
float *c, OPENBLAS_CONST blasint cldc);
|
||||
void cblas_dgeadd(OPENBLAS_CONST enum CBLAS_ORDER CORDER,OPENBLAS_CONST blasint crows, OPENBLAS_CONST blasint ccols, OPENBLAS_CONST double calpha, double *a, OPENBLAS_CONST blasint clda, OPENBLAS_CONST double cbeta,
|
||||
double *c, OPENBLAS_CONST blasint cldc);
|
||||
void cblas_cgeadd(OPENBLAS_CONST enum CBLAS_ORDER CORDER,OPENBLAS_CONST blasint crows, OPENBLAS_CONST blasint ccols, OPENBLAS_CONST float *calpha, float *a, OPENBLAS_CONST blasint clda, OPENBLAS_CONST float *cbeta,
|
||||
float *c, OPENBLAS_CONST blasint cldc);
|
||||
void cblas_zgeadd(OPENBLAS_CONST enum CBLAS_ORDER CORDER,OPENBLAS_CONST blasint crows, OPENBLAS_CONST blasint ccols, OPENBLAS_CONST double *calpha, double *a, OPENBLAS_CONST blasint clda, OPENBLAS_CONST double *cbeta,
|
||||
double *c, OPENBLAS_CONST blasint cldc);
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif /* __cplusplus */
|
||||
|
||||
@@ -13,17 +13,23 @@ extern "C" {
|
||||
void openblas_set_num_threads(int num_threads);
|
||||
void goto_set_num_threads(int num_threads);
|
||||
|
||||
/*Get the number of threads on runtime.*/
|
||||
int openblas_get_num_threads(void);
|
||||
|
||||
/*Get the number of physical processors (cores).*/
|
||||
int openblas_get_num_procs(void);
|
||||
|
||||
/*Get the build configure on runtime.*/
|
||||
char* openblas_get_config(void);
|
||||
|
||||
/* Get the parallelization type which is used by OpenBLAS */
|
||||
int openblas_get_parallel(void);
|
||||
int openblas_get_parallel(void);
|
||||
/* OpenBLAS is compiled for sequential use */
|
||||
#define OPENBLAS_SEQUENTIAL 0
|
||||
/* OpenBLAS is compiled using normal threading model */
|
||||
#define OPENBLAS_THREAD 1
|
||||
#define OPENBLAS_THREAD 1
|
||||
/* OpenBLAS is compiled using OpenMP threading model */
|
||||
#define OPENBLAS_OPENMP 2
|
||||
#define OPENBLAS_OPENMP 2
|
||||
|
||||
|
||||
#define CBLAS_INDEX size_t
|
||||
@@ -231,8 +237,12 @@ void cblas_dgemm(enum CBLAS_ORDER Order, enum CBLAS_TRANSPOSE TransA, enum CBLAS
|
||||
double alpha, double *A, blasint lda, double *B, blasint ldb, double beta, double *C, blasint ldc);
|
||||
void cblas_cgemm(enum CBLAS_ORDER Order, enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANSPOSE TransB, blasint M, blasint N, blasint K,
|
||||
float *alpha, float *A, blasint lda, float *B, blasint ldb, float *beta, float *C, blasint ldc);
|
||||
void cblas_cgemm3m(enum CBLAS_ORDER Order, enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANSPOSE TransB, blasint M, blasint N, blasint K,
|
||||
float *alpha, float *A, blasint lda, float *B, blasint ldb, float *beta, float *C, blasint ldc);
|
||||
void cblas_zgemm(enum CBLAS_ORDER Order, enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANSPOSE TransB, blasint M, blasint N, blasint K,
|
||||
double *alpha, double *A, blasint lda, double *B, blasint ldb, double *beta, double *C, blasint ldc);
|
||||
void cblas_zgemm3m(enum CBLAS_ORDER Order, enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANSPOSE TransB, blasint M, blasint N, blasint K,
|
||||
double *alpha, double *A, blasint lda, double *B, blasint ldb, double *beta, double *C, blasint ldc);
|
||||
|
||||
void cblas_ssymm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, blasint M, blasint N,
|
||||
float alpha, float *A, blasint lda, float *B, blasint ldb, float beta, float *C, blasint ldc);
|
||||
@@ -296,6 +306,43 @@ void cblas_zher2k(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANS
|
||||
|
||||
void cblas_xerbla(blasint p, char *rout, char *form, ...);
|
||||
|
||||
/*** BLAS extensions ***/
|
||||
|
||||
void cblas_saxpby(blasint n, float alpha, float *x, blasint incx,float beta, float *y, blasint incy);
|
||||
|
||||
void cblas_daxpby(blasint n, double alpha, double *x, blasint incx,double beta, double *y, blasint incy);
|
||||
|
||||
void cblas_caxpby(blasint n, float *alpha, float *x, blasint incx,float *beta, float *y, blasint incy);
|
||||
|
||||
void cblas_zaxpby(blasint n, double *alpha, double *x, blasint incx,double *beta, double *y, blasint incy);
|
||||
|
||||
void cblas_somatcopy( enum CBLAS_ORDER CORDER, enum CBLAS_TRANSPOSE CTRANS, blasint crows, blasint ccols, float calpha, float *a,
|
||||
blasint clda, float *b, blasint cldb);
|
||||
void cblas_domatcopy( enum CBLAS_ORDER CORDER, enum CBLAS_TRANSPOSE CTRANS, blasint crows, blasint ccols, double calpha, double *a,
|
||||
blasint clda, double *b, blasint cldb);
|
||||
void cblas_comatcopy( enum CBLAS_ORDER CORDER, enum CBLAS_TRANSPOSE CTRANS, blasint crows, blasint ccols, void* calpha, void* a,
|
||||
blasint clda, void *b, blasint cldb);
|
||||
void cblas_zomatcopy( enum CBLAS_ORDER CORDER, enum CBLAS_TRANSPOSE CTRANS, blasint crows, blasint ccols, void* calpha, void* a,
|
||||
blasint clda, void *b, blasint cldb);
|
||||
|
||||
void cblas_simatcopy( enum CBLAS_ORDER CORDER, enum CBLAS_TRANSPOSE CTRANS, blasint crows, blasint ccols, float calpha, float *a,
|
||||
blasint clda, blasint cldb);
|
||||
void cblas_dimatcopy( enum CBLAS_ORDER CORDER, enum CBLAS_TRANSPOSE CTRANS, blasint crows, blasint ccols, double calpha, double *a,
|
||||
blasint clda, blasint cldb);
|
||||
void cblas_cimatcopy( enum CBLAS_ORDER CORDER, enum CBLAS_TRANSPOSE CTRANS, blasint crows, blasint ccols, float* calpha, float* a,
|
||||
blasint clda, blasint cldb);
|
||||
void cblas_zimatcopy( enum CBLAS_ORDER CORDER, enum CBLAS_TRANSPOSE CTRANS, blasint crows, blasint ccols, double* calpha, double* a,
|
||||
blasint clda, blasint cldb);
|
||||
|
||||
void cblas_sgeadd( enum CBLAS_ORDER CORDER, blasint crows, blasint ccols, float calpha, float *a, blasint clda, float cbeta,
|
||||
float *c, blasint cldc);
|
||||
void cblas_dgeadd( enum CBLAS_ORDER CORDER, blasint crows, blasint ccols, double calpha, double *a, blasint clda, double cbeta,
|
||||
double *c, blasint cldc);
|
||||
void cblas_cgeadd( enum CBLAS_ORDER CORDER, blasint crows, blasint ccols, float *calpha, float *a, blasint clda, float *cbeta,
|
||||
float *c, blasint cldc);
|
||||
void cblas_zgeadd( enum CBLAS_ORDER CORDER, blasint crows, blasint ccols, double *calpha, double *a, blasint clda, double *cbeta,
|
||||
double *c, blasint cldc);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif /* __cplusplus */
|
||||
|
||||
58
common.h
58
common.h
@@ -310,15 +310,31 @@ typedef int blasint;
|
||||
#define YIELDING SwitchToThread()
|
||||
#endif
|
||||
|
||||
#if defined(ARMV7) || defined(ARMV6) || defined(ARMV8)
|
||||
#if defined(ARMV7) || defined(ARMV6) || defined(ARMV8) || defined(ARMV5)
|
||||
#define YIELDING asm volatile ("nop;nop;nop;nop;nop;nop;nop;nop; \n");
|
||||
#endif
|
||||
|
||||
#ifdef PILEDRIVER
|
||||
#ifdef BULLDOZER
|
||||
#ifndef YIELDING
|
||||
#define YIELDING __asm__ __volatile__ ("nop;nop;nop;nop;nop;nop;nop;nop;\n");
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef PILEDRIVER
|
||||
#ifndef YIELDING
|
||||
#define YIELDING __asm__ __volatile__ ("nop;nop;nop;nop;nop;nop;nop;nop;\n");
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/*
|
||||
#ifdef STEAMROLLER
|
||||
#ifndef YIELDING
|
||||
#define YIELDING __asm__ __volatile__ ("nop;nop;nop;nop;nop;nop;nop;nop;\n");
|
||||
#endif
|
||||
#endif
|
||||
*/
|
||||
|
||||
#ifndef YIELDING
|
||||
#define YIELDING sched_yield()
|
||||
#endif
|
||||
@@ -380,6 +396,15 @@ please https://github.com/xianyi/OpenBLAS/issues/246
|
||||
#include "common_arm64.h"
|
||||
#endif
|
||||
|
||||
#ifndef ASSEMBLER
|
||||
#ifdef OS_WINDOWS
|
||||
typedef char env_var_t[MAX_PATH];
|
||||
#define readenv(p, n) GetEnvironmentVariable((n), (p), sizeof(p))
|
||||
#else
|
||||
typedef char* env_var_t;
|
||||
#define readenv(p, n) ((p)=getenv(n))
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef OS_LINUX
|
||||
#include "common_linux.h"
|
||||
@@ -507,14 +532,21 @@ static __inline void blas_unlock(volatile BLASULONG *address){
|
||||
*address = 0;
|
||||
}
|
||||
|
||||
static __inline int readenv(char *env) {
|
||||
|
||||
char *p;
|
||||
|
||||
p = getenv(env);
|
||||
|
||||
if (p == NULL) return 0; else return atoi(p);
|
||||
#ifdef OS_WINDOWS
|
||||
static __inline int readenv_atoi(char *env) {
|
||||
env_var_t p;
|
||||
return readenv(p,env) ? 0 : atoi(p);
|
||||
}
|
||||
#else
|
||||
static __inline int readenv_atoi(char *env) {
|
||||
char *p;
|
||||
if (( p = getenv(env) ))
|
||||
return (atoi(p));
|
||||
else
|
||||
return(0);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
#if !defined(XDOUBLE) || !defined(QUAD_PRECISION)
|
||||
@@ -523,7 +555,7 @@ static __inline void compinv(FLOAT *b, FLOAT ar, FLOAT ai){
|
||||
|
||||
#ifndef UNIT
|
||||
FLOAT ratio, den;
|
||||
|
||||
|
||||
if (
|
||||
#ifdef XDOUBLE
|
||||
(fabsl(ar)) >= (fabsl(ai))
|
||||
@@ -549,7 +581,7 @@ static __inline void compinv(FLOAT *b, FLOAT ar, FLOAT ai){
|
||||
b[0] = ONE;
|
||||
b[1] = ZERO;
|
||||
#endif
|
||||
|
||||
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -679,13 +711,13 @@ extern int gotoblas_profile;
|
||||
#define PRINT_DEBUG_CNAME
|
||||
#define PRINT_DEBUG_NAME
|
||||
#else
|
||||
#define PRINT_DEBUG_CNAME if (readenv("GOTO_DEBUG")) fprintf(stderr, "GotoBLAS : %s\n", CHAR_CNAME)
|
||||
#define PRINT_DEBUG_NAME if (readenv("GOTO_DEBUG")) fprintf(stderr, "GotoBLAS : %s\n", CHAR_NAME)
|
||||
#define PRINT_DEBUG_CNAME if (readenv_atoi("GOTO_DEBUG")) fprintf(stderr, "GotoBLAS : %s\n", CHAR_CNAME)
|
||||
#define PRINT_DEBUG_NAME if (readenv_atoi("GOTO_DEBUG")) fprintf(stderr, "GotoBLAS : %s\n", CHAR_NAME)
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
||||
|
||||
#endif /* __cplusplus */
|
||||
|
||||
#endif
|
||||
|
||||
32
common_arm.h
32
common_arm.h
@@ -1,5 +1,5 @@
|
||||
/*****************************************************************************
|
||||
Copyright (c) 2011, Lab of Parallel Software and Computational Science,ICSAS
|
||||
Copyright (c) 2011-2014, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
@@ -13,21 +13,21 @@ met:
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the ISCAS nor the names of its contributors may
|
||||
be used to endorse or promote products derived from this software
|
||||
without specific prior written permission.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written
|
||||
permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
**********************************************************************************/
|
||||
|
||||
/*********************************************************************/
|
||||
@@ -94,7 +94,7 @@ static void __inline blas_lock(volatile BLASULONG *address){
|
||||
"mov %0 , r3 \n\t"
|
||||
: "=r"(ret), "=r"(address)
|
||||
: "1"(address)
|
||||
: "memory", "r2" , "r3"
|
||||
: "memory", "r2" , "r3"
|
||||
|
||||
|
||||
);
|
||||
@@ -143,7 +143,7 @@ static inline int blas_quickdivide(blasint x, blasint y){
|
||||
.func REALNAME ;\
|
||||
REALNAME:
|
||||
|
||||
#define EPILOGUE
|
||||
#define EPILOGUE
|
||||
|
||||
#define PROFCODE
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*****************************************************************************
|
||||
Copyright (c) 2011, Lab of Parallel Software and Computational Science,ICSAS
|
||||
Copyright (c) 2011-2014, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
@@ -13,21 +13,21 @@ met:
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the ISCAS nor the names of its contributors may
|
||||
be used to endorse or promote products derived from this software
|
||||
without specific prior written permission.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written
|
||||
permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
**********************************************************************************/
|
||||
|
||||
/*********************************************************************/
|
||||
@@ -94,7 +94,7 @@ static void __inline blas_lock(volatile BLASULONG *address){
|
||||
"mov %0 , r3 \n\t"
|
||||
: "=r"(ret), "=r"(address)
|
||||
: "1"(address)
|
||||
: "memory", "r2" , "r3"
|
||||
: "memory", "r2" , "r3"
|
||||
|
||||
|
||||
);
|
||||
@@ -119,9 +119,9 @@ static inline int blas_quickdivide(blasint x, blasint y){
|
||||
}
|
||||
|
||||
#if defined(DOUBLE)
|
||||
#define GET_IMAGE(res) __asm__ __volatile__("vstr.f64 d1, %0" : "=m"(res) : : "memory")
|
||||
#define GET_IMAGE(res) __asm__ __volatile__("str d1, %0" : "=m"(res) : : "memory")
|
||||
#else
|
||||
#define GET_IMAGE(res) __asm__ __volatile__("vstr.f32 s1, %0" : "=m"(res) : : "memory")
|
||||
#define GET_IMAGE(res) __asm__ __volatile__("str s1, %0" : "=m"(res) : : "memory")
|
||||
#endif
|
||||
|
||||
#define GET_IMAGE_CANCEL
|
||||
@@ -138,12 +138,11 @@ static inline int blas_quickdivide(blasint x, blasint y){
|
||||
#if defined(ASSEMBLER) && !defined(NEEDPARAM)
|
||||
|
||||
#define PROLOGUE \
|
||||
.arm ;\
|
||||
.global REALNAME ;\
|
||||
.func REALNAME ;\
|
||||
REALNAME:
|
||||
|
||||
#define EPILOGUE
|
||||
#define EPILOGUE
|
||||
|
||||
#define PROFCODE
|
||||
|
||||
|
||||
25
common_c.h
25
common_c.h
@@ -209,6 +209,19 @@
|
||||
#define CNEG_TCOPY cneg_tcopy
|
||||
#define CLASWP_NCOPY claswp_ncopy
|
||||
|
||||
#define CAXPBY_K caxpby_k
|
||||
|
||||
#define COMATCOPY_K_CN comatcopy_k_cn
|
||||
#define COMATCOPY_K_RN comatcopy_k_rn
|
||||
#define COMATCOPY_K_CT comatcopy_k_ct
|
||||
#define COMATCOPY_K_RT comatcopy_k_rt
|
||||
#define COMATCOPY_K_CNC comatcopy_k_cnc
|
||||
#define COMATCOPY_K_RNC comatcopy_k_rnc
|
||||
#define COMATCOPY_K_CTC comatcopy_k_ctc
|
||||
#define COMATCOPY_K_RTC comatcopy_k_rtc
|
||||
|
||||
#define CGEADD_K cgeadd_k
|
||||
|
||||
#else
|
||||
|
||||
#define CAMAX_K gotoblas -> camax_k
|
||||
@@ -380,6 +393,18 @@
|
||||
#define CNEG_TCOPY gotoblas -> cneg_tcopy
|
||||
#define CLASWP_NCOPY gotoblas -> claswp_ncopy
|
||||
|
||||
#define CAXPBY_K gotoblas -> caxpby_k
|
||||
|
||||
#define COMATCOPY_K_CN gotoblas -> comatcopy_k_cn
|
||||
#define COMATCOPY_K_RN gotoblas -> comatcopy_k_rn
|
||||
#define COMATCOPY_K_CT gotoblas -> comatcopy_k_ct
|
||||
#define COMATCOPY_K_RT gotoblas -> comatcopy_k_rt
|
||||
#define COMATCOPY_K_CNC gotoblas -> comatcopy_k_cnc
|
||||
#define COMATCOPY_K_RNC gotoblas -> comatcopy_k_rnc
|
||||
#define COMATCOPY_K_CTC gotoblas -> comatcopy_k_ctc
|
||||
#define COMATCOPY_K_RTC gotoblas -> comatcopy_k_rtc
|
||||
#define CGEADD_K gotoblas -> cgeadd_k
|
||||
|
||||
#endif
|
||||
|
||||
#define CGEMM_NN cgemm_nn
|
||||
|
||||
15
common_d.h
15
common_d.h
@@ -144,6 +144,13 @@
|
||||
#define DNEG_TCOPY dneg_tcopy
|
||||
#define DLASWP_NCOPY dlaswp_ncopy
|
||||
|
||||
#define DAXPBY_K daxpby_k
|
||||
#define DOMATCOPY_K_CN domatcopy_k_cn
|
||||
#define DOMATCOPY_K_RN domatcopy_k_rn
|
||||
#define DOMATCOPY_K_CT domatcopy_k_ct
|
||||
#define DOMATCOPY_K_RT domatcopy_k_rt
|
||||
#define DGEADD_K dgeadd_k
|
||||
|
||||
#else
|
||||
|
||||
#define DAMAX_K gotoblas -> damax_k
|
||||
@@ -255,6 +262,14 @@
|
||||
#define DNEG_TCOPY gotoblas -> dneg_tcopy
|
||||
#define DLASWP_NCOPY gotoblas -> dlaswp_ncopy
|
||||
|
||||
#define DAXPBY_K gotoblas -> daxpby_k
|
||||
#define DOMATCOPY_K_CN gotoblas -> domatcopy_k_cn
|
||||
#define DOMATCOPY_K_RN gotoblas -> domatcopy_k_rn
|
||||
#define DOMATCOPY_K_CT gotoblas -> domatcopy_k_ct
|
||||
#define DOMATCOPY_K_RT gotoblas -> domatcopy_k_rt
|
||||
|
||||
#define DGEADD_K gotoblas -> dgeadd_k
|
||||
|
||||
#endif
|
||||
|
||||
#define DGEMM_NN dgemm_nn
|
||||
|
||||
@@ -58,10 +58,10 @@
|
||||
static __inline void blas_lock(volatile unsigned long *address){
|
||||
|
||||
unsigned long ret;
|
||||
|
||||
|
||||
do {
|
||||
while (*address) {YIELDING;};
|
||||
|
||||
|
||||
__asm__ __volatile__ ("mov ar.ccv=r0\n;;\n"
|
||||
"cmpxchg4.acq %0=[%2],%1,ar.ccv\n"
|
||||
: "=r"(ret) : "r"(1), "r"(address)
|
||||
|
||||
@@ -238,17 +238,17 @@ void BLASFUNC(xgeru)(blasint *, blasint *, xdouble *, xdouble *, blasint *,
|
||||
void BLASFUNC(xgerc)(blasint *, blasint *, xdouble *, xdouble *, blasint *,
|
||||
xdouble *, blasint *, xdouble *, blasint *);
|
||||
|
||||
void BLASFUNC(sgemv)(char *, blasint *, blasint *, float *, float *, blasint *,
|
||||
void BLASFUNC(sgemv)(char *, blasint *, blasint *, float *, float *, blasint *,
|
||||
float *, blasint *, float *, float *, blasint *);
|
||||
void BLASFUNC(dgemv)(char *, blasint *, blasint *, double *, double *, blasint *,
|
||||
void BLASFUNC(dgemv)(char *, blasint *, blasint *, double *, double *, blasint *,
|
||||
double *, blasint *, double *, double *, blasint *);
|
||||
void BLASFUNC(qgemv)(char *, blasint *, blasint *, xdouble *, xdouble *, blasint *,
|
||||
void BLASFUNC(qgemv)(char *, blasint *, blasint *, xdouble *, xdouble *, blasint *,
|
||||
xdouble *, blasint *, xdouble *, xdouble *, blasint *);
|
||||
void BLASFUNC(cgemv)(char *, blasint *, blasint *, float *, float *, blasint *,
|
||||
void BLASFUNC(cgemv)(char *, blasint *, blasint *, float *, float *, blasint *,
|
||||
float *, blasint *, float *, float *, blasint *);
|
||||
void BLASFUNC(zgemv)(char *, blasint *, blasint *, double *, double *, blasint *,
|
||||
void BLASFUNC(zgemv)(char *, blasint *, blasint *, double *, double *, blasint *,
|
||||
double *, blasint *, double *, double *, blasint *);
|
||||
void BLASFUNC(xgemv)(char *, blasint *, blasint *, xdouble *, xdouble *, blasint *,
|
||||
void BLASFUNC(xgemv)(char *, blasint *, blasint *, xdouble *, xdouble *, blasint *,
|
||||
xdouble *, blasint *, xdouble *, xdouble *, blasint *);
|
||||
|
||||
void BLASFUNC(strsv) (char *, char *, char *, blasint *, float *, blasint *,
|
||||
@@ -257,24 +257,24 @@ void BLASFUNC(dtrsv) (char *, char *, char *, blasint *, double *, blasint *,
|
||||
double *, blasint *);
|
||||
void BLASFUNC(qtrsv) (char *, char *, char *, blasint *, xdouble *, blasint *,
|
||||
xdouble *, blasint *);
|
||||
void BLASFUNC(ctrsv) (char *, char *, char *, blasint *, float *, blasint *,
|
||||
void BLASFUNC(ctrsv) (char *, char *, char *, blasint *, float *, blasint *,
|
||||
float *, blasint *);
|
||||
void BLASFUNC(ztrsv) (char *, char *, char *, blasint *, double *, blasint *,
|
||||
void BLASFUNC(ztrsv) (char *, char *, char *, blasint *, double *, blasint *,
|
||||
double *, blasint *);
|
||||
void BLASFUNC(xtrsv) (char *, char *, char *, blasint *, xdouble *, blasint *,
|
||||
void BLASFUNC(xtrsv) (char *, char *, char *, blasint *, xdouble *, blasint *,
|
||||
xdouble *, blasint *);
|
||||
|
||||
void BLASFUNC(strmv) (char *, char *, char *, blasint *, float *, blasint *,
|
||||
float *, blasint *);
|
||||
void BLASFUNC(dtrmv) (char *, char *, char *, blasint *, double *, blasint *,
|
||||
void BLASFUNC(dtrmv) (char *, char *, char *, blasint *, double *, blasint *,
|
||||
double *, blasint *);
|
||||
void BLASFUNC(qtrmv) (char *, char *, char *, blasint *, xdouble *, blasint *,
|
||||
void BLASFUNC(qtrmv) (char *, char *, char *, blasint *, xdouble *, blasint *,
|
||||
xdouble *, blasint *);
|
||||
void BLASFUNC(ctrmv) (char *, char *, char *, blasint *, float *, blasint *,
|
||||
void BLASFUNC(ctrmv) (char *, char *, char *, blasint *, float *, blasint *,
|
||||
float *, blasint *);
|
||||
void BLASFUNC(ztrmv) (char *, char *, char *, blasint *, double *, blasint *,
|
||||
void BLASFUNC(ztrmv) (char *, char *, char *, blasint *, double *, blasint *,
|
||||
double *, blasint *);
|
||||
void BLASFUNC(xtrmv) (char *, char *, char *, blasint *, xdouble *, blasint *,
|
||||
void BLASFUNC(xtrmv) (char *, char *, char *, blasint *, xdouble *, blasint *,
|
||||
xdouble *, blasint *);
|
||||
|
||||
void BLASFUNC(stpsv) (char *, char *, char *, blasint *, float *, float *, blasint *);
|
||||
@@ -305,24 +305,24 @@ void BLASFUNC(ctbsv) (char *, char *, char *, blasint *, blasint *, float *, bl
|
||||
void BLASFUNC(ztbsv) (char *, char *, char *, blasint *, blasint *, double *, blasint *, double *, blasint *);
|
||||
void BLASFUNC(xtbsv) (char *, char *, char *, blasint *, blasint *, xdouble *, blasint *, xdouble *, blasint *);
|
||||
|
||||
void BLASFUNC(ssymv) (char *, blasint *, float *, float *, blasint *,
|
||||
void BLASFUNC(ssymv) (char *, blasint *, float *, float *, blasint *,
|
||||
float *, blasint *, float *, float *, blasint *);
|
||||
void BLASFUNC(dsymv) (char *, blasint *, double *, double *, blasint *,
|
||||
void BLASFUNC(dsymv) (char *, blasint *, double *, double *, blasint *,
|
||||
double *, blasint *, double *, double *, blasint *);
|
||||
void BLASFUNC(qsymv) (char *, blasint *, xdouble *, xdouble *, blasint *,
|
||||
void BLASFUNC(qsymv) (char *, blasint *, xdouble *, xdouble *, blasint *,
|
||||
xdouble *, blasint *, xdouble *, xdouble *, blasint *);
|
||||
void BLASFUNC(csymv) (char *, blasint *, float *, float *, blasint *,
|
||||
void BLASFUNC(csymv) (char *, blasint *, float *, float *, blasint *,
|
||||
float *, blasint *, float *, float *, blasint *);
|
||||
void BLASFUNC(zsymv) (char *, blasint *, double *, double *, blasint *,
|
||||
void BLASFUNC(zsymv) (char *, blasint *, double *, double *, blasint *,
|
||||
double *, blasint *, double *, double *, blasint *);
|
||||
void BLASFUNC(xsymv) (char *, blasint *, xdouble *, xdouble *, blasint *,
|
||||
void BLASFUNC(xsymv) (char *, blasint *, xdouble *, xdouble *, blasint *,
|
||||
xdouble *, blasint *, xdouble *, xdouble *, blasint *);
|
||||
|
||||
void BLASFUNC(sspmv) (char *, blasint *, float *, float *,
|
||||
float *, blasint *, float *, float *, blasint *);
|
||||
void BLASFUNC(dspmv) (char *, blasint *, double *, double *,
|
||||
void BLASFUNC(dspmv) (char *, blasint *, double *, double *,
|
||||
double *, blasint *, double *, double *, blasint *);
|
||||
void BLASFUNC(qspmv) (char *, blasint *, xdouble *, xdouble *,
|
||||
void BLASFUNC(qspmv) (char *, blasint *, xdouble *, xdouble *,
|
||||
xdouble *, blasint *, xdouble *, xdouble *, blasint *);
|
||||
void BLASFUNC(cspmv) (char *, blasint *, float *, float *,
|
||||
float *, blasint *, float *, float *, blasint *);
|
||||
@@ -344,17 +344,17 @@ void BLASFUNC(zsyr) (char *, blasint *, double *, double *, blasint *,
|
||||
void BLASFUNC(xsyr) (char *, blasint *, xdouble *, xdouble *, blasint *,
|
||||
xdouble *, blasint *);
|
||||
|
||||
void BLASFUNC(ssyr2) (char *, blasint *, float *,
|
||||
void BLASFUNC(ssyr2) (char *, blasint *, float *,
|
||||
float *, blasint *, float *, blasint *, float *, blasint *);
|
||||
void BLASFUNC(dsyr2) (char *, blasint *, double *,
|
||||
void BLASFUNC(dsyr2) (char *, blasint *, double *,
|
||||
double *, blasint *, double *, blasint *, double *, blasint *);
|
||||
void BLASFUNC(qsyr2) (char *, blasint *, xdouble *,
|
||||
void BLASFUNC(qsyr2) (char *, blasint *, xdouble *,
|
||||
xdouble *, blasint *, xdouble *, blasint *, xdouble *, blasint *);
|
||||
void BLASFUNC(csyr2) (char *, blasint *, float *,
|
||||
void BLASFUNC(csyr2) (char *, blasint *, float *,
|
||||
float *, blasint *, float *, blasint *, float *, blasint *);
|
||||
void BLASFUNC(zsyr2) (char *, blasint *, double *,
|
||||
void BLASFUNC(zsyr2) (char *, blasint *, double *,
|
||||
double *, blasint *, double *, blasint *, double *, blasint *);
|
||||
void BLASFUNC(xsyr2) (char *, blasint *, xdouble *,
|
||||
void BLASFUNC(xsyr2) (char *, blasint *, xdouble *,
|
||||
xdouble *, blasint *, xdouble *, blasint *, xdouble *, blasint *);
|
||||
|
||||
void BLASFUNC(sspr) (char *, blasint *, float *, float *, blasint *,
|
||||
@@ -370,17 +370,17 @@ void BLASFUNC(zspr) (char *, blasint *, double *, double *, blasint *,
|
||||
void BLASFUNC(xspr) (char *, blasint *, xdouble *, xdouble *, blasint *,
|
||||
xdouble *);
|
||||
|
||||
void BLASFUNC(sspr2) (char *, blasint *, float *,
|
||||
void BLASFUNC(sspr2) (char *, blasint *, float *,
|
||||
float *, blasint *, float *, blasint *, float *);
|
||||
void BLASFUNC(dspr2) (char *, blasint *, double *,
|
||||
void BLASFUNC(dspr2) (char *, blasint *, double *,
|
||||
double *, blasint *, double *, blasint *, double *);
|
||||
void BLASFUNC(qspr2) (char *, blasint *, xdouble *,
|
||||
void BLASFUNC(qspr2) (char *, blasint *, xdouble *,
|
||||
xdouble *, blasint *, xdouble *, blasint *, xdouble *);
|
||||
void BLASFUNC(cspr2) (char *, blasint *, float *,
|
||||
void BLASFUNC(cspr2) (char *, blasint *, float *,
|
||||
float *, blasint *, float *, blasint *, float *);
|
||||
void BLASFUNC(zspr2) (char *, blasint *, double *,
|
||||
void BLASFUNC(zspr2) (char *, blasint *, double *,
|
||||
double *, blasint *, double *, blasint *, double *);
|
||||
void BLASFUNC(xspr2) (char *, blasint *, xdouble *,
|
||||
void BLASFUNC(xspr2) (char *, blasint *, xdouble *,
|
||||
xdouble *, blasint *, xdouble *, blasint *, xdouble *);
|
||||
|
||||
void BLASFUNC(cher) (char *, blasint *, float *, float *, blasint *,
|
||||
@@ -394,25 +394,25 @@ void BLASFUNC(chpr) (char *, blasint *, float *, float *, blasint *, float *
|
||||
void BLASFUNC(zhpr) (char *, blasint *, double *, double *, blasint *, double *);
|
||||
void BLASFUNC(xhpr) (char *, blasint *, xdouble *, xdouble *, blasint *, xdouble *);
|
||||
|
||||
void BLASFUNC(cher2) (char *, blasint *, float *,
|
||||
void BLASFUNC(cher2) (char *, blasint *, float *,
|
||||
float *, blasint *, float *, blasint *, float *, blasint *);
|
||||
void BLASFUNC(zher2) (char *, blasint *, double *,
|
||||
void BLASFUNC(zher2) (char *, blasint *, double *,
|
||||
double *, blasint *, double *, blasint *, double *, blasint *);
|
||||
void BLASFUNC(xher2) (char *, blasint *, xdouble *,
|
||||
void BLASFUNC(xher2) (char *, blasint *, xdouble *,
|
||||
xdouble *, blasint *, xdouble *, blasint *, xdouble *, blasint *);
|
||||
|
||||
void BLASFUNC(chpr2) (char *, blasint *, float *,
|
||||
void BLASFUNC(chpr2) (char *, blasint *, float *,
|
||||
float *, blasint *, float *, blasint *, float *);
|
||||
void BLASFUNC(zhpr2) (char *, blasint *, double *,
|
||||
void BLASFUNC(zhpr2) (char *, blasint *, double *,
|
||||
double *, blasint *, double *, blasint *, double *);
|
||||
void BLASFUNC(xhpr2) (char *, blasint *, xdouble *,
|
||||
void BLASFUNC(xhpr2) (char *, blasint *, xdouble *,
|
||||
xdouble *, blasint *, xdouble *, blasint *, xdouble *);
|
||||
|
||||
void BLASFUNC(chemv) (char *, blasint *, float *, float *, blasint *,
|
||||
void BLASFUNC(chemv) (char *, blasint *, float *, float *, blasint *,
|
||||
float *, blasint *, float *, float *, blasint *);
|
||||
void BLASFUNC(zhemv) (char *, blasint *, double *, double *, blasint *,
|
||||
void BLASFUNC(zhemv) (char *, blasint *, double *, double *, blasint *,
|
||||
double *, blasint *, double *, double *, blasint *);
|
||||
void BLASFUNC(xhemv) (char *, blasint *, xdouble *, xdouble *, blasint *,
|
||||
void BLASFUNC(xhemv) (char *, blasint *, xdouble *, xdouble *, blasint *,
|
||||
xdouble *, blasint *, xdouble *, xdouble *, blasint *);
|
||||
|
||||
void BLASFUNC(chpmv) (char *, blasint *, float *, float *,
|
||||
@@ -427,37 +427,37 @@ int BLASFUNC(dnorm)(char *, blasint *, blasint *, double *, blasint *);
|
||||
int BLASFUNC(cnorm)(char *, blasint *, blasint *, float *, blasint *);
|
||||
int BLASFUNC(znorm)(char *, blasint *, blasint *, double *, blasint *);
|
||||
|
||||
void BLASFUNC(sgbmv)(char *, blasint *, blasint *, blasint *, blasint *, float *, float *, blasint *,
|
||||
void BLASFUNC(sgbmv)(char *, blasint *, blasint *, blasint *, blasint *, float *, float *, blasint *,
|
||||
float *, blasint *, float *, float *, blasint *);
|
||||
void BLASFUNC(dgbmv)(char *, blasint *, blasint *, blasint *, blasint *, double *, double *, blasint *,
|
||||
void BLASFUNC(dgbmv)(char *, blasint *, blasint *, blasint *, blasint *, double *, double *, blasint *,
|
||||
double *, blasint *, double *, double *, blasint *);
|
||||
void BLASFUNC(qgbmv)(char *, blasint *, blasint *, blasint *, blasint *, xdouble *, xdouble *, blasint *,
|
||||
void BLASFUNC(qgbmv)(char *, blasint *, blasint *, blasint *, blasint *, xdouble *, xdouble *, blasint *,
|
||||
xdouble *, blasint *, xdouble *, xdouble *, blasint *);
|
||||
void BLASFUNC(cgbmv)(char *, blasint *, blasint *, blasint *, blasint *, float *, float *, blasint *,
|
||||
void BLASFUNC(cgbmv)(char *, blasint *, blasint *, blasint *, blasint *, float *, float *, blasint *,
|
||||
float *, blasint *, float *, float *, blasint *);
|
||||
void BLASFUNC(zgbmv)(char *, blasint *, blasint *, blasint *, blasint *, double *, double *, blasint *,
|
||||
void BLASFUNC(zgbmv)(char *, blasint *, blasint *, blasint *, blasint *, double *, double *, blasint *,
|
||||
double *, blasint *, double *, double *, blasint *);
|
||||
void BLASFUNC(xgbmv)(char *, blasint *, blasint *, blasint *, blasint *, xdouble *, xdouble *, blasint *,
|
||||
void BLASFUNC(xgbmv)(char *, blasint *, blasint *, blasint *, blasint *, xdouble *, xdouble *, blasint *,
|
||||
xdouble *, blasint *, xdouble *, xdouble *, blasint *);
|
||||
|
||||
void BLASFUNC(ssbmv)(char *, blasint *, blasint *, float *, float *, blasint *,
|
||||
void BLASFUNC(ssbmv)(char *, blasint *, blasint *, float *, float *, blasint *,
|
||||
float *, blasint *, float *, float *, blasint *);
|
||||
void BLASFUNC(dsbmv)(char *, blasint *, blasint *, double *, double *, blasint *,
|
||||
void BLASFUNC(dsbmv)(char *, blasint *, blasint *, double *, double *, blasint *,
|
||||
double *, blasint *, double *, double *, blasint *);
|
||||
void BLASFUNC(qsbmv)(char *, blasint *, blasint *, xdouble *, xdouble *, blasint *,
|
||||
void BLASFUNC(qsbmv)(char *, blasint *, blasint *, xdouble *, xdouble *, blasint *,
|
||||
xdouble *, blasint *, xdouble *, xdouble *, blasint *);
|
||||
void BLASFUNC(csbmv)(char *, blasint *, blasint *, float *, float *, blasint *,
|
||||
void BLASFUNC(csbmv)(char *, blasint *, blasint *, float *, float *, blasint *,
|
||||
float *, blasint *, float *, float *, blasint *);
|
||||
void BLASFUNC(zsbmv)(char *, blasint *, blasint *, double *, double *, blasint *,
|
||||
void BLASFUNC(zsbmv)(char *, blasint *, blasint *, double *, double *, blasint *,
|
||||
double *, blasint *, double *, double *, blasint *);
|
||||
void BLASFUNC(xsbmv)(char *, blasint *, blasint *, xdouble *, xdouble *, blasint *,
|
||||
void BLASFUNC(xsbmv)(char *, blasint *, blasint *, xdouble *, xdouble *, blasint *,
|
||||
xdouble *, blasint *, xdouble *, xdouble *, blasint *);
|
||||
|
||||
void BLASFUNC(chbmv)(char *, blasint *, blasint *, float *, float *, blasint *,
|
||||
void BLASFUNC(chbmv)(char *, blasint *, blasint *, float *, float *, blasint *,
|
||||
float *, blasint *, float *, float *, blasint *);
|
||||
void BLASFUNC(zhbmv)(char *, blasint *, blasint *, double *, double *, blasint *,
|
||||
void BLASFUNC(zhbmv)(char *, blasint *, blasint *, double *, double *, blasint *,
|
||||
double *, blasint *, double *, double *, blasint *);
|
||||
void BLASFUNC(xhbmv)(char *, blasint *, blasint *, xdouble *, xdouble *, blasint *,
|
||||
void BLASFUNC(xhbmv)(char *, blasint *, blasint *, xdouble *, xdouble *, blasint *,
|
||||
xdouble *, blasint *, xdouble *, xdouble *, blasint *);
|
||||
|
||||
/* Level 3 routines */
|
||||
@@ -606,18 +606,18 @@ int BLASFUNC(sgemt)(char *, blasint *, blasint *, float *, float *, blasint *,
|
||||
float *, blasint *);
|
||||
int BLASFUNC(dgemt)(char *, blasint *, blasint *, double *, double *, blasint *,
|
||||
double *, blasint *);
|
||||
int BLASFUNC(cgemt)(char *, blasint *, blasint *, float *, float *, blasint *,
|
||||
int BLASFUNC(cgemt)(char *, blasint *, blasint *, float *, float *, blasint *,
|
||||
float *, blasint *);
|
||||
int BLASFUNC(zgemt)(char *, blasint *, blasint *, double *, double *, blasint *,
|
||||
double *, blasint *);
|
||||
|
||||
int BLASFUNC(sgema)(char *, char *, blasint *, blasint *, float *,
|
||||
int BLASFUNC(sgema)(char *, char *, blasint *, blasint *, float *,
|
||||
float *, blasint *, float *, float *, blasint *, float *, blasint *);
|
||||
int BLASFUNC(dgema)(char *, char *, blasint *, blasint *, double *,
|
||||
double *, blasint *, double*, double *, blasint *, double*, blasint *);
|
||||
int BLASFUNC(cgema)(char *, char *, blasint *, blasint *, float *,
|
||||
float *, blasint *, float *, float *, blasint *, float *, blasint *);
|
||||
int BLASFUNC(zgema)(char *, char *, blasint *, blasint *, double *,
|
||||
int BLASFUNC(zgema)(char *, char *, blasint *, blasint *, double *,
|
||||
double *, blasint *, double*, double *, blasint *, double*, blasint *);
|
||||
|
||||
int BLASFUNC(sgems)(char *, char *, blasint *, blasint *, float *,
|
||||
@@ -679,13 +679,6 @@ int BLASFUNC(cgesv)(blasint *, blasint *, float *, blasint *, blasint *, float
|
||||
int BLASFUNC(zgesv)(blasint *, blasint *, double *, blasint *, blasint *, double*, blasint *, blasint *);
|
||||
int BLASFUNC(xgesv)(blasint *, blasint *, xdouble *, blasint *, blasint *, xdouble*, blasint *, blasint *);
|
||||
|
||||
int BLASFUNC(sgesvd)(char *, char *, blasint *, blasint *, float *, blasint *, float *, float *, blasint *, float *, blasint *, float *, blasint *, blasint *);
|
||||
int BLASFUNC(dgesvd)(char *, char *, blasint *, blasint *, double *, blasint *, double *, double *, blasint *, double *, blasint *, double *, blasint *, blasint *);
|
||||
int BLASFUNC(qgesvd)(char *, char *, blasint *, blasint *, xdouble *, blasint *, xdouble *, xdouble *, blasint *, xdouble *, blasint *, xdouble *, blasint *, blasint *);
|
||||
int BLASFUNC(cgesvd)(char *, char *, blasint *, blasint *, float *, blasint *, float *, float *, blasint *, float *, blasint *, float *, blasint *, blasint *);
|
||||
int BLASFUNC(zgesvd)(char *, char *, blasint *, blasint *, double *, blasint *, double *, double *, blasint *, double *, blasint *, double *, blasint *, blasint *);
|
||||
int BLASFUNC(xgesvd)(char *, char *, blasint *, blasint *, xdouble *, blasint *, xdouble *, xdouble *, blasint *, xdouble *, blasint *, xdouble *, blasint *, blasint *);
|
||||
|
||||
int BLASFUNC(spotf2)(char *, blasint *, float *, blasint *, blasint *);
|
||||
int BLASFUNC(dpotf2)(char *, blasint *, double *, blasint *, blasint *);
|
||||
int BLASFUNC(qpotf2)(char *, blasint *, xdouble *, blasint *, blasint *);
|
||||
@@ -735,19 +728,6 @@ int BLASFUNC(ctrtri)(char *, char *, blasint *, float *, blasint *, blasint *);
|
||||
int BLASFUNC(ztrtri)(char *, char *, blasint *, double *, blasint *, blasint *);
|
||||
int BLASFUNC(xtrtri)(char *, char *, blasint *, xdouble *, blasint *, blasint *);
|
||||
|
||||
int BLASFUNC(spotri)(char *, blasint *, float *, blasint *, blasint *);
|
||||
int BLASFUNC(dpotri)(char *, blasint *, double *, blasint *, blasint *);
|
||||
int BLASFUNC(qpotri)(char *, blasint *, xdouble *, blasint *, blasint *);
|
||||
int BLASFUNC(cpotri)(char *, blasint *, float *, blasint *, blasint *);
|
||||
int BLASFUNC(zpotri)(char *, blasint *, double *, blasint *, blasint *);
|
||||
int BLASFUNC(xpotri)(char *, blasint *, xdouble *, blasint *, blasint *);
|
||||
|
||||
int BLASFUNC(slarf)(char *, blasint *, blasint *, float *, blasint *, float *, float *, blasint *, float *);
|
||||
int BLASFUNC(dlarf)(char *, blasint *, blasint *, double *, blasint *, double *, double *, blasint *, double *);
|
||||
int BLASFUNC(qlarf)(char *, blasint *, blasint *, xdouble *, blasint *, xdouble *, xdouble *, blasint *, xdouble *);
|
||||
int BLASFUNC(clarf)(char *, blasint *, blasint *, float *, blasint *, float *, float *, blasint *, float *);
|
||||
int BLASFUNC(zlarf)(char *, blasint *, blasint *, double *, blasint *, double *, double *, blasint *, double *);
|
||||
int BLASFUNC(xlarf)(char *, blasint *, blasint *, xdouble *, blasint *, xdouble *, xdouble *, blasint *, xdouble *);
|
||||
|
||||
FLOATRET BLASFUNC(slamch)(char *);
|
||||
double BLASFUNC(dlamch)(char *);
|
||||
@@ -757,9 +737,32 @@ FLOATRET BLASFUNC(slamc3)(float *, float *);
|
||||
double BLASFUNC(dlamc3)(double *, double *);
|
||||
xdouble BLASFUNC(qlamc3)(xdouble *, xdouble *);
|
||||
|
||||
/* BLAS extensions */
|
||||
|
||||
void BLASFUNC(saxpby) (blasint *, float *, float *, blasint *, float *, float *, blasint *);
|
||||
void BLASFUNC(daxpby) (blasint *, double *, double *, blasint *, double *, double *, blasint *);
|
||||
void BLASFUNC(caxpby) (blasint *, float *, float *, blasint *, float *, float *, blasint *);
|
||||
void BLASFUNC(zaxpby) (blasint *, double *, double *, blasint *, double *, double *, blasint *);
|
||||
|
||||
void BLASFUNC(somatcopy) (char *, char *, blasint *, blasint *, float *, float *, blasint *, float *, blasint *);
|
||||
void BLASFUNC(domatcopy) (char *, char *, blasint *, blasint *, double *, double *, blasint *, double *, blasint *);
|
||||
void BLASFUNC(comatcopy) (char *, char *, blasint *, blasint *, float *, float *, blasint *, float *, blasint *);
|
||||
void BLASFUNC(zomatcopy) (char *, char *, blasint *, blasint *, double *, double *, blasint *, double *, blasint *);
|
||||
|
||||
void BLASFUNC(simatcopy) (char *, char *, blasint *, blasint *, float *, float *, blasint *, blasint *);
|
||||
void BLASFUNC(dimatcopy) (char *, char *, blasint *, blasint *, double *, double *, blasint *, blasint *);
|
||||
void BLASFUNC(cimatcopy) (char *, char *, blasint *, blasint *, float *, float *, blasint *, blasint *);
|
||||
void BLASFUNC(zimatcopy) (char *, char *, blasint *, blasint *, double *, double *, blasint *, blasint *);
|
||||
|
||||
void BLASFUNC(sgeadd) (blasint *, blasint *, float *, float *, blasint *, float *, float *, blasint*);
|
||||
void BLASFUNC(dgeadd) (blasint *, blasint *, double *, double *, blasint *, double *, double *, blasint*);
|
||||
void BLASFUNC(cgeadd) (blasint *, blasint *, float *, float *, blasint *, float *, float *, blasint*);
|
||||
void BLASFUNC(zgeadd) (blasint *, blasint *, double *, double *, blasint *, double *, double *, blasint*);
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
||||
|
||||
#endif /* __cplusplus */
|
||||
|
||||
#endif
|
||||
|
||||
@@ -54,11 +54,11 @@ double _Complex zdotu_k (BLASLONG, double *, BLASLONG, double *, BLASLONG);
|
||||
xdouble _Complex xdotc_k (BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG);
|
||||
xdouble _Complex xdotu_k (BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG);
|
||||
|
||||
int saxpy_k (BLASLONG, BLASLONG, BLASLONG, float,
|
||||
int saxpy_k (BLASLONG, BLASLONG, BLASLONG, float,
|
||||
float *, BLASLONG, float *, BLASLONG, float *, BLASLONG);
|
||||
int daxpy_k (BLASLONG, BLASLONG, BLASLONG, double,
|
||||
int daxpy_k (BLASLONG, BLASLONG, BLASLONG, double,
|
||||
double *, BLASLONG, double *, BLASLONG, double *, BLASLONG);
|
||||
int qaxpy_k (BLASLONG, BLASLONG, BLASLONG, xdouble,
|
||||
int qaxpy_k (BLASLONG, BLASLONG, BLASLONG, xdouble,
|
||||
xdouble *, BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG);
|
||||
int caxpy_k (BLASLONG, BLASLONG, BLASLONG, float, float,
|
||||
float *, BLASLONG, float *, BLASLONG, float *, BLASLONG);
|
||||
@@ -80,11 +80,11 @@ int ccopy_k(BLASLONG, float *, BLASLONG, float *, BLASLONG);
|
||||
int zcopy_k(BLASLONG, double *, BLASLONG, double *, BLASLONG);
|
||||
int xcopy_k(BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG);
|
||||
|
||||
int sswap_k (BLASLONG, BLASLONG, BLASLONG, float,
|
||||
int sswap_k (BLASLONG, BLASLONG, BLASLONG, float,
|
||||
float *, BLASLONG, float *, BLASLONG, float *, BLASLONG);
|
||||
int dswap_k (BLASLONG, BLASLONG, BLASLONG, double,
|
||||
int dswap_k (BLASLONG, BLASLONG, BLASLONG, double,
|
||||
double *, BLASLONG, double *, BLASLONG, double*, BLASLONG);
|
||||
int qswap_k (BLASLONG, BLASLONG, BLASLONG, xdouble,
|
||||
int qswap_k (BLASLONG, BLASLONG, BLASLONG, xdouble,
|
||||
xdouble *, BLASLONG, xdouble *, BLASLONG, xdouble*, BLASLONG);
|
||||
int cswap_k (BLASLONG, BLASLONG, BLASLONG, float, float,
|
||||
float *, BLASLONG, float *, BLASLONG, float *, BLASLONG);
|
||||
@@ -156,11 +156,11 @@ BLASLONG icmin_k(BLASLONG, float *, BLASLONG);
|
||||
BLASLONG izmin_k(BLASLONG, double *, BLASLONG);
|
||||
BLASLONG ixmin_k(BLASLONG, xdouble *, BLASLONG);
|
||||
|
||||
int sscal_k(BLASLONG, BLASLONG, BLASLONG, float,
|
||||
int sscal_k(BLASLONG, BLASLONG, BLASLONG, float,
|
||||
float *, BLASLONG, float *, BLASLONG, float *, BLASLONG);
|
||||
int dscal_k(BLASLONG, BLASLONG, BLASLONG, double,
|
||||
int dscal_k(BLASLONG, BLASLONG, BLASLONG, double,
|
||||
double *, BLASLONG, double *, BLASLONG, double *, BLASLONG);
|
||||
int qscal_k(BLASLONG, BLASLONG, BLASLONG, xdouble,
|
||||
int qscal_k(BLASLONG, BLASLONG, BLASLONG, xdouble,
|
||||
xdouble *, BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG);
|
||||
int cscal_k(BLASLONG, BLASLONG, BLASLONG, float, float,
|
||||
float *, BLASLONG, float *, BLASLONG, float *, BLASLONG);
|
||||
@@ -204,6 +204,13 @@ int srotm_k (BLASLONG, float, BLASLONG, float, BLASLONG, float);
|
||||
int drotm_k (BLASLONG, double, BLASLONG, double, BLASLONG, double);
|
||||
int qrotm_k (BLASLONG, xdouble, BLASLONG, xdouble, BLASLONG, xdouble);
|
||||
|
||||
|
||||
int saxpby_k (BLASLONG, float, float *, BLASLONG, float, float *, BLASLONG);
|
||||
int daxpby_k (BLASLONG, double, double *, BLASLONG, double, double *, BLASLONG);
|
||||
int caxpby_k (BLASLONG, float, float, float *, BLASLONG, float, float, float *, BLASLONG);
|
||||
int zaxpby_k (BLASLONG, double, double, double *, BLASLONG, double, double, double *, BLASLONG);
|
||||
|
||||
|
||||
#ifdef __CUDACC__
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -986,24 +986,24 @@ int cnorm_t(BLASLONG, BLASLONG, float *a, BLASLONG);
|
||||
int znorm_n(BLASLONG, BLASLONG, double *a, BLASLONG);
|
||||
int znorm_t(BLASLONG, BLASLONG, double *a, BLASLONG);
|
||||
|
||||
void sgbmv_n(BLASLONG, BLASLONG, BLASLONG, BLASLONG, float,
|
||||
void sgbmv_n(BLASLONG, BLASLONG, BLASLONG, BLASLONG, float,
|
||||
float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, void *buffer);
|
||||
void sgbmv_t(BLASLONG, BLASLONG, BLASLONG, BLASLONG, float,
|
||||
float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, void *buffer);
|
||||
|
||||
void dgbmv_n(BLASLONG, BLASLONG, BLASLONG, BLASLONG, double,
|
||||
double *, BLASLONG, double *, BLASLONG, double *, BLASLONG, void *buffer);
|
||||
void dgbmv_t(BLASLONG, BLASLONG, BLASLONG, BLASLONG, double,
|
||||
void dgbmv_t(BLASLONG, BLASLONG, BLASLONG, BLASLONG, double,
|
||||
double *, BLASLONG, double *, BLASLONG, double *, BLASLONG, void *buffer);
|
||||
|
||||
void qgbmv_n(BLASLONG, BLASLONG, BLASLONG, BLASLONG, xdouble,
|
||||
xdouble *, BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG, void *buffer);
|
||||
void qgbmv_t(BLASLONG, BLASLONG, BLASLONG, BLASLONG, xdouble,
|
||||
void qgbmv_t(BLASLONG, BLASLONG, BLASLONG, BLASLONG, xdouble,
|
||||
xdouble *, BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG, void *buffer);
|
||||
|
||||
void cgbmv_n(BLASLONG, BLASLONG, BLASLONG, BLASLONG, float, float,
|
||||
float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, void *buffer);
|
||||
void cgbmv_t(BLASLONG, BLASLONG, BLASLONG, BLASLONG, float, float,
|
||||
void cgbmv_t(BLASLONG, BLASLONG, BLASLONG, BLASLONG, float, float,
|
||||
float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, void *buffer);
|
||||
void cgbmv_r(BLASLONG, BLASLONG, BLASLONG, BLASLONG, float, float,
|
||||
float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, void *buffer);
|
||||
@@ -1052,24 +1052,24 @@ void xgbmv_s(BLASLONG, BLASLONG, BLASLONG, BLASLONG, xdouble, xdouble,
|
||||
void xgbmv_d(BLASLONG, BLASLONG, BLASLONG, BLASLONG, xdouble, xdouble,
|
||||
xdouble *, BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG, void *buffer);
|
||||
|
||||
int sgbmv_thread_n(BLASLONG, BLASLONG, BLASLONG, BLASLONG, float,
|
||||
int sgbmv_thread_n(BLASLONG, BLASLONG, BLASLONG, BLASLONG, float,
|
||||
float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *buffer, int);
|
||||
int sgbmv_thread_t(BLASLONG, BLASLONG, BLASLONG, BLASLONG, float,
|
||||
float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *buffer, int);
|
||||
|
||||
int dgbmv_thread_n(BLASLONG, BLASLONG, BLASLONG, BLASLONG, double,
|
||||
double *, BLASLONG, double *, BLASLONG, double *, BLASLONG, double *buffer, int);
|
||||
int dgbmv_thread_t(BLASLONG, BLASLONG, BLASLONG, BLASLONG, double,
|
||||
int dgbmv_thread_t(BLASLONG, BLASLONG, BLASLONG, BLASLONG, double,
|
||||
double *, BLASLONG, double *, BLASLONG, double *, BLASLONG, double *buffer, int);
|
||||
|
||||
int qgbmv_thread_n(BLASLONG, BLASLONG, BLASLONG, BLASLONG, xdouble,
|
||||
xdouble *, BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG, xdouble *buffer, int);
|
||||
int qgbmv_thread_t(BLASLONG, BLASLONG, BLASLONG, BLASLONG, xdouble,
|
||||
int qgbmv_thread_t(BLASLONG, BLASLONG, BLASLONG, BLASLONG, xdouble,
|
||||
xdouble *, BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG, xdouble *buffer, int);
|
||||
|
||||
int cgbmv_thread_n(BLASLONG, BLASLONG, BLASLONG, BLASLONG, float *,
|
||||
float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *buffer, int);
|
||||
int cgbmv_thread_t(BLASLONG, BLASLONG, BLASLONG, BLASLONG, float *,
|
||||
int cgbmv_thread_t(BLASLONG, BLASLONG, BLASLONG, BLASLONG, float *,
|
||||
float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *buffer, int);
|
||||
int cgbmv_thread_r(BLASLONG, BLASLONG, BLASLONG, BLASLONG, float *,
|
||||
float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *buffer, int);
|
||||
|
||||
@@ -47,9 +47,9 @@ __global__ void cuda_dgemm_kernel(int, int, int, double *, double *, double *);
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
int sgemm_beta(BLASLONG, BLASLONG, BLASLONG, float,
|
||||
int sgemm_beta(BLASLONG, BLASLONG, BLASLONG, float,
|
||||
float *, BLASLONG, float *, BLASLONG, float *, BLASLONG);
|
||||
int dgemm_beta(BLASLONG, BLASLONG, BLASLONG, double,
|
||||
int dgemm_beta(BLASLONG, BLASLONG, BLASLONG, double,
|
||||
double *, BLASLONG, double *, BLASLONG, double *, BLASLONG);
|
||||
int cgemm_beta(BLASLONG, BLASLONG, BLASLONG, float, float,
|
||||
float *, BLASLONG, float *, BLASLONG, float *, BLASLONG);
|
||||
@@ -57,12 +57,12 @@ int zgemm_beta(BLASLONG, BLASLONG, BLASLONG, double, double,
|
||||
double *, BLASLONG, double *, BLASLONG, double *, BLASLONG);
|
||||
|
||||
#ifdef EXPRECISION
|
||||
int qgemm_beta(BLASLONG, BLASLONG, BLASLONG, xdouble,
|
||||
int qgemm_beta(BLASLONG, BLASLONG, BLASLONG, xdouble,
|
||||
xdouble *, BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG);
|
||||
int xgemm_beta(BLASLONG, BLASLONG, BLASLONG, xdouble, xdouble,
|
||||
xdouble *, BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG);
|
||||
#else
|
||||
int qgemm_beta(BLASLONG, BLASLONG, BLASLONG, xdouble *,
|
||||
int qgemm_beta(BLASLONG, BLASLONG, BLASLONG, xdouble *,
|
||||
xdouble *, BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG);
|
||||
int xgemm_beta(BLASLONG, BLASLONG, BLASLONG, xdouble *,
|
||||
xdouble *, BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG);
|
||||
@@ -1732,6 +1732,42 @@ int zgemc_otcopy(BLASLONG m, BLASLONG n, double *a, BLASLONG lda, double *b, BLA
|
||||
int xgemc_oncopy(BLASLONG m, BLASLONG n, xdouble *a, BLASLONG lda, xdouble *b, BLASLONG ldb, xdouble *c);
|
||||
int xgemc_otcopy(BLASLONG m, BLASLONG n, xdouble *a, BLASLONG lda, xdouble *b, BLASLONG ldb, xdouble *c);
|
||||
|
||||
int somatcopy_k_cn(BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG);
|
||||
int somatcopy_k_rn(BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG);
|
||||
int somatcopy_k_ct(BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG);
|
||||
int somatcopy_k_rt(BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG);
|
||||
|
||||
int domatcopy_k_cn(BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG);
|
||||
int domatcopy_k_rn(BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG);
|
||||
int domatcopy_k_ct(BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG);
|
||||
int domatcopy_k_rt(BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG);
|
||||
|
||||
int comatcopy_k_cn(BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG);
|
||||
int comatcopy_k_rn(BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG);
|
||||
int comatcopy_k_ct(BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG);
|
||||
int comatcopy_k_rt(BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG);
|
||||
|
||||
int comatcopy_k_cnc(BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG);
|
||||
int comatcopy_k_rnc(BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG);
|
||||
int comatcopy_k_ctc(BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG);
|
||||
int comatcopy_k_rtc(BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG);
|
||||
|
||||
int zomatcopy_k_cn(BLASLONG, BLASLONG, double, double, double *, BLASLONG, double *, BLASLONG);
|
||||
int zomatcopy_k_rn(BLASLONG, BLASLONG, double, double, double *, BLASLONG, double *, BLASLONG);
|
||||
int zomatcopy_k_ct(BLASLONG, BLASLONG, double, double, double *, BLASLONG, double *, BLASLONG);
|
||||
int zomatcopy_k_rt(BLASLONG, BLASLONG, double, double, double *, BLASLONG, double *, BLASLONG);
|
||||
|
||||
int zomatcopy_k_cnc(BLASLONG, BLASLONG, double, double, double *, BLASLONG, double *, BLASLONG);
|
||||
int zomatcopy_k_rnc(BLASLONG, BLASLONG, double, double, double *, BLASLONG, double *, BLASLONG);
|
||||
int zomatcopy_k_ctc(BLASLONG, BLASLONG, double, double, double *, BLASLONG, double *, BLASLONG);
|
||||
int zomatcopy_k_rtc(BLASLONG, BLASLONG, double, double, double *, BLASLONG, double *, BLASLONG);
|
||||
|
||||
int sgeadd_k(BLASLONG, BLASLONG, float, float*, BLASLONG, float, float *, BLASLONG);
|
||||
int dgeadd_k(BLASLONG, BLASLONG, double, double*, BLASLONG, double, double *, BLASLONG);
|
||||
int cgeadd_k(BLASLONG, BLASLONG, float, float, float*, BLASLONG, float, float, float *, BLASLONG);
|
||||
int zgeadd_k(BLASLONG, BLASLONG, double,double, double*, BLASLONG, double, double, double *, BLASLONG);
|
||||
|
||||
|
||||
#ifdef __CUDACC__
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -75,7 +75,7 @@ static inline int my_mbind(void *addr, unsigned long len, int mode,
|
||||
// https://lsbbugs.linuxfoundation.org/show_bug.cgi?id=3482
|
||||
return 0;
|
||||
#else
|
||||
#if defined (LOONGSON3B)
|
||||
#if defined (LOONGSON3B)
|
||||
#if defined (__64BIT__)
|
||||
return syscall(SYS_mbind, addr, len, mode, nodemask, maxnode, flags);
|
||||
#else
|
||||
@@ -99,9 +99,9 @@ static inline int my_set_mempolicy(int mode, const unsigned long *addr, unsigned
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline int my_gettid(void) {
|
||||
static inline int my_gettid(void) {
|
||||
#ifdef SYS_gettid
|
||||
return syscall(SYS_gettid);
|
||||
return syscall(SYS_gettid);
|
||||
#else
|
||||
return getpid();
|
||||
#endif
|
||||
|
||||
@@ -628,6 +628,13 @@
|
||||
#define HERK_THREAD_LR DSYRK_THREAD_LN
|
||||
#define HERK_THREAD_LC DSYRK_THREAD_LT
|
||||
|
||||
#define AXPBY_K DAXPBY_K
|
||||
|
||||
#define OMATCOPY_K_CN DOMATCOPY_K_CN
|
||||
#define OMATCOPY_K_RN DOMATCOPY_K_RN
|
||||
#define OMATCOPY_K_CT DOMATCOPY_K_CT
|
||||
#define OMATCOPY_K_RT DOMATCOPY_K_RT
|
||||
#define GEADD_K DGEADD_K
|
||||
#else
|
||||
|
||||
#define AMAX_K SAMAX_K
|
||||
@@ -918,6 +925,14 @@
|
||||
#define HERK_THREAD_LR SSYRK_THREAD_LN
|
||||
#define HERK_THREAD_LC SSYRK_THREAD_LT
|
||||
|
||||
#define AXPBY_K SAXPBY_K
|
||||
|
||||
#define OMATCOPY_K_CN SOMATCOPY_K_CN
|
||||
#define OMATCOPY_K_RN SOMATCOPY_K_RN
|
||||
#define OMATCOPY_K_CT SOMATCOPY_K_CT
|
||||
#define OMATCOPY_K_RT SOMATCOPY_K_RT
|
||||
|
||||
#define GEADD_K SGEADD_K
|
||||
#endif
|
||||
#else
|
||||
#ifdef XDOUBLE
|
||||
@@ -1722,6 +1737,18 @@
|
||||
#define SYMM_OUTCOPY ZSYMM_OUTCOPY
|
||||
#define SYMM_OLTCOPY ZSYMM_OLTCOPY
|
||||
|
||||
#define AXPBY_K ZAXPBY_K
|
||||
|
||||
#define OMATCOPY_K_CN ZOMATCOPY_K_CN
|
||||
#define OMATCOPY_K_RN ZOMATCOPY_K_RN
|
||||
#define OMATCOPY_K_CT ZOMATCOPY_K_CT
|
||||
#define OMATCOPY_K_RT ZOMATCOPY_K_RT
|
||||
#define OMATCOPY_K_CNC ZOMATCOPY_K_CNC
|
||||
#define OMATCOPY_K_RNC ZOMATCOPY_K_RNC
|
||||
#define OMATCOPY_K_CTC ZOMATCOPY_K_CTC
|
||||
#define OMATCOPY_K_RTC ZOMATCOPY_K_RTC
|
||||
#define GEADD_K ZGEADD_K
|
||||
|
||||
#else
|
||||
|
||||
#define AMAX_K CAMAX_K
|
||||
@@ -2123,6 +2150,19 @@
|
||||
#define SYMM_OUTCOPY CSYMM_OUTCOPY
|
||||
#define SYMM_OLTCOPY CSYMM_OLTCOPY
|
||||
|
||||
#define AXPBY_K CAXPBY_K
|
||||
|
||||
#define OMATCOPY_K_CN COMATCOPY_K_CN
|
||||
#define OMATCOPY_K_RN COMATCOPY_K_RN
|
||||
#define OMATCOPY_K_CT COMATCOPY_K_CT
|
||||
#define OMATCOPY_K_RT COMATCOPY_K_RT
|
||||
#define OMATCOPY_K_CNC COMATCOPY_K_CNC
|
||||
#define OMATCOPY_K_RNC COMATCOPY_K_RNC
|
||||
#define OMATCOPY_K_CTC COMATCOPY_K_CTC
|
||||
#define OMATCOPY_K_RTC COMATCOPY_K_RTC
|
||||
|
||||
#define GEADD_K CGEADD_K
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*****************************************************************************
|
||||
Copyright (c) 2011, Lab of Parallel Software and Computational Science,ICSAS
|
||||
Copyright (c) 2011-2014, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
@@ -13,21 +13,21 @@ met:
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the ISCAS nor the names of its contributors may
|
||||
be used to endorse or promote products derived from this software
|
||||
without specific prior written permission.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written
|
||||
permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
**********************************************************************************/
|
||||
|
||||
/*********************************************************************/
|
||||
@@ -111,9 +111,9 @@ static inline unsigned int rpcc(void){
|
||||
".set pop": "=r"(ret):: "memory");
|
||||
|
||||
#else
|
||||
__asm__ __volatile__(".set push \n"
|
||||
".set mips32r2\n"
|
||||
"rdhwr %0, $30 \n"
|
||||
__asm__ __volatile__(".set push \n"
|
||||
".set mips32r2\n"
|
||||
"rdhwr %0, $30 \n"
|
||||
".set pop" : "=r"(ret) : : "memory");
|
||||
#endif
|
||||
return ret;
|
||||
@@ -191,13 +191,13 @@ static inline int blas_quickdivide(blasint x, blasint y){
|
||||
#define CMPEQ c.eq.s
|
||||
#define CMPLE c.le.s
|
||||
#define CMPLT c.lt.s
|
||||
#define PLU plu.ps
|
||||
#define PLL pll.ps
|
||||
#define PUU puu.ps
|
||||
#define PUL pul.ps
|
||||
#define MADPS madd.ps
|
||||
#define CVTU cvt.s.pu
|
||||
#define CVTL cvt.s.pl
|
||||
#define PLU plu.ps
|
||||
#define PLL pll.ps
|
||||
#define PUU puu.ps
|
||||
#define PUL pul.ps
|
||||
#define MADPS madd.ps
|
||||
#define CVTU cvt.s.pu
|
||||
#define CVTL cvt.s.pl
|
||||
#define NEG neg.s
|
||||
#endif
|
||||
|
||||
@@ -279,9 +279,9 @@ REALNAME: ;\
|
||||
|
||||
#if defined(LOONGSON3A) || defined(LOONGSON3B)
|
||||
#define PREFETCHD_(x) ld $0, x
|
||||
#define PREFETCHD(x) PREFETCHD_(x)
|
||||
#define PREFETCHD(x) PREFETCHD_(x)
|
||||
#else
|
||||
#define PREFETCHD(x)
|
||||
#define PREFETCHD(x)
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
242
common_param.h
242
common_param.h
@@ -87,12 +87,12 @@ BLASLONG (*ismin_k) (BLASLONG, float *, BLASLONG);
|
||||
int (*sgemm_itcopy )(BLASLONG, BLASLONG, float *, BLASLONG, float *);
|
||||
int (*sgemm_oncopy )(BLASLONG, BLASLONG, float *, BLASLONG, float *);
|
||||
int (*sgemm_otcopy )(BLASLONG, BLASLONG, float *, BLASLONG, float *);
|
||||
|
||||
|
||||
int (*strsm_kernel_LN)(BLASLONG, BLASLONG, BLASLONG, float, float *, float *, float *, BLASLONG, BLASLONG);
|
||||
int (*strsm_kernel_LT)(BLASLONG, BLASLONG, BLASLONG, float, float *, float *, float *, BLASLONG, BLASLONG);
|
||||
int (*strsm_kernel_RN)(BLASLONG, BLASLONG, BLASLONG, float, float *, float *, float *, BLASLONG, BLASLONG);
|
||||
int (*strsm_kernel_RT)(BLASLONG, BLASLONG, BLASLONG, float, float *, float *, float *, BLASLONG, BLASLONG);
|
||||
|
||||
|
||||
int (*strsm_iunucopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, float *);
|
||||
int (*strsm_iunncopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, float *);
|
||||
int (*strsm_iutucopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, float *);
|
||||
@@ -114,7 +114,7 @@ BLASLONG (*ismin_k) (BLASLONG, float *, BLASLONG);
|
||||
int (*strmm_kernel_RT)(BLASLONG, BLASLONG, BLASLONG, float, float *, float *, float *, BLASLONG, BLASLONG);
|
||||
int (*strmm_kernel_LN)(BLASLONG, BLASLONG, BLASLONG, float, float *, float *, float *, BLASLONG, BLASLONG);
|
||||
int (*strmm_kernel_LT)(BLASLONG, BLASLONG, BLASLONG, float, float *, float *, float *, BLASLONG, BLASLONG);
|
||||
|
||||
|
||||
int (*strmm_iunucopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, BLASLONG, float *);
|
||||
int (*strmm_iunncopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, BLASLONG, float *);
|
||||
int (*strmm_iutucopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, BLASLONG, float *);
|
||||
@@ -131,7 +131,7 @@ BLASLONG (*ismin_k) (BLASLONG, float *, BLASLONG);
|
||||
int (*strmm_olnncopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, BLASLONG, float *);
|
||||
int (*strmm_oltucopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, BLASLONG, float *);
|
||||
int (*strmm_oltncopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, BLASLONG, float *);
|
||||
|
||||
|
||||
int (*ssymm_iutcopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, BLASLONG, float *);
|
||||
int (*ssymm_iltcopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, BLASLONG, float *);
|
||||
int (*ssymm_outcopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, BLASLONG, float *);
|
||||
@@ -176,12 +176,12 @@ BLASLONG (*idmin_k) (BLASLONG, double *, BLASLONG);
|
||||
int (*dgemm_itcopy )(BLASLONG, BLASLONG, double *, BLASLONG, double *);
|
||||
int (*dgemm_oncopy )(BLASLONG, BLASLONG, double *, BLASLONG, double *);
|
||||
int (*dgemm_otcopy )(BLASLONG, BLASLONG, double *, BLASLONG, double *);
|
||||
|
||||
|
||||
int (*dtrsm_kernel_LN)(BLASLONG, BLASLONG, BLASLONG, double, double *, double *, double *, BLASLONG, BLASLONG);
|
||||
int (*dtrsm_kernel_LT)(BLASLONG, BLASLONG, BLASLONG, double, double *, double *, double *, BLASLONG, BLASLONG);
|
||||
int (*dtrsm_kernel_RN)(BLASLONG, BLASLONG, BLASLONG, double, double *, double *, double *, BLASLONG, BLASLONG);
|
||||
int (*dtrsm_kernel_RT)(BLASLONG, BLASLONG, BLASLONG, double, double *, double *, double *, BLASLONG, BLASLONG);
|
||||
|
||||
|
||||
int (*dtrsm_iunucopy)(BLASLONG, BLASLONG, double *, BLASLONG, BLASLONG, double *);
|
||||
int (*dtrsm_iunncopy)(BLASLONG, BLASLONG, double *, BLASLONG, BLASLONG, double *);
|
||||
int (*dtrsm_iutucopy)(BLASLONG, BLASLONG, double *, BLASLONG, BLASLONG, double *);
|
||||
@@ -203,7 +203,7 @@ BLASLONG (*idmin_k) (BLASLONG, double *, BLASLONG);
|
||||
int (*dtrmm_kernel_RT)(BLASLONG, BLASLONG, BLASLONG, double, double *, double *, double *, BLASLONG, BLASLONG);
|
||||
int (*dtrmm_kernel_LN)(BLASLONG, BLASLONG, BLASLONG, double, double *, double *, double *, BLASLONG, BLASLONG);
|
||||
int (*dtrmm_kernel_LT)(BLASLONG, BLASLONG, BLASLONG, double, double *, double *, double *, BLASLONG, BLASLONG);
|
||||
|
||||
|
||||
int (*dtrmm_iunucopy)(BLASLONG, BLASLONG, double *, BLASLONG, BLASLONG, BLASLONG, double *);
|
||||
int (*dtrmm_iunncopy)(BLASLONG, BLASLONG, double *, BLASLONG, BLASLONG, BLASLONG, double *);
|
||||
int (*dtrmm_iutucopy)(BLASLONG, BLASLONG, double *, BLASLONG, BLASLONG, BLASLONG, double *);
|
||||
@@ -220,7 +220,7 @@ BLASLONG (*idmin_k) (BLASLONG, double *, BLASLONG);
|
||||
int (*dtrmm_olnncopy)(BLASLONG, BLASLONG, double *, BLASLONG, BLASLONG, BLASLONG, double *);
|
||||
int (*dtrmm_oltucopy)(BLASLONG, BLASLONG, double *, BLASLONG, BLASLONG, BLASLONG, double *);
|
||||
int (*dtrmm_oltncopy)(BLASLONG, BLASLONG, double *, BLASLONG, BLASLONG, BLASLONG, double *);
|
||||
|
||||
|
||||
int (*dsymm_iutcopy)(BLASLONG, BLASLONG, double *, BLASLONG, BLASLONG, BLASLONG, double *);
|
||||
int (*dsymm_iltcopy)(BLASLONG, BLASLONG, double *, BLASLONG, BLASLONG, BLASLONG, double *);
|
||||
int (*dsymm_outcopy)(BLASLONG, BLASLONG, double *, BLASLONG, BLASLONG, BLASLONG, double *);
|
||||
@@ -267,12 +267,12 @@ BLASLONG (*iqmin_k) (BLASLONG, xdouble *, BLASLONG);
|
||||
int (*qgemm_itcopy )(BLASLONG, BLASLONG, xdouble *, BLASLONG, xdouble *);
|
||||
int (*qgemm_oncopy )(BLASLONG, BLASLONG, xdouble *, BLASLONG, xdouble *);
|
||||
int (*qgemm_otcopy )(BLASLONG, BLASLONG, xdouble *, BLASLONG, xdouble *);
|
||||
|
||||
|
||||
int (*qtrsm_kernel_LN)(BLASLONG, BLASLONG, BLASLONG, xdouble, xdouble *, xdouble *, xdouble *, BLASLONG, BLASLONG);
|
||||
int (*qtrsm_kernel_LT)(BLASLONG, BLASLONG, BLASLONG, xdouble, xdouble *, xdouble *, xdouble *, BLASLONG, BLASLONG);
|
||||
int (*qtrsm_kernel_RN)(BLASLONG, BLASLONG, BLASLONG, xdouble, xdouble *, xdouble *, xdouble *, BLASLONG, BLASLONG);
|
||||
int (*qtrsm_kernel_RT)(BLASLONG, BLASLONG, BLASLONG, xdouble, xdouble *, xdouble *, xdouble *, BLASLONG, BLASLONG);
|
||||
|
||||
|
||||
int (*qtrsm_iunucopy)(BLASLONG, BLASLONG, xdouble *, BLASLONG, BLASLONG, xdouble *);
|
||||
int (*qtrsm_iunncopy)(BLASLONG, BLASLONG, xdouble *, BLASLONG, BLASLONG, xdouble *);
|
||||
int (*qtrsm_iutucopy)(BLASLONG, BLASLONG, xdouble *, BLASLONG, BLASLONG, xdouble *);
|
||||
@@ -294,7 +294,7 @@ BLASLONG (*iqmin_k) (BLASLONG, xdouble *, BLASLONG);
|
||||
int (*qtrmm_kernel_RT)(BLASLONG, BLASLONG, BLASLONG, xdouble, xdouble *, xdouble *, xdouble *, BLASLONG, BLASLONG);
|
||||
int (*qtrmm_kernel_LN)(BLASLONG, BLASLONG, BLASLONG, xdouble, xdouble *, xdouble *, xdouble *, BLASLONG, BLASLONG);
|
||||
int (*qtrmm_kernel_LT)(BLASLONG, BLASLONG, BLASLONG, xdouble, xdouble *, xdouble *, xdouble *, BLASLONG, BLASLONG);
|
||||
|
||||
|
||||
int (*qtrmm_iunucopy)(BLASLONG, BLASLONG, xdouble *, BLASLONG, BLASLONG, BLASLONG, xdouble *);
|
||||
int (*qtrmm_iunncopy)(BLASLONG, BLASLONG, xdouble *, BLASLONG, BLASLONG, BLASLONG, xdouble *);
|
||||
int (*qtrmm_iutucopy)(BLASLONG, BLASLONG, xdouble *, BLASLONG, BLASLONG, BLASLONG, xdouble *);
|
||||
@@ -311,7 +311,7 @@ BLASLONG (*iqmin_k) (BLASLONG, xdouble *, BLASLONG);
|
||||
int (*qtrmm_olnncopy)(BLASLONG, BLASLONG, xdouble *, BLASLONG, BLASLONG, BLASLONG, xdouble *);
|
||||
int (*qtrmm_oltucopy)(BLASLONG, BLASLONG, xdouble *, BLASLONG, BLASLONG, BLASLONG, xdouble *);
|
||||
int (*qtrmm_oltncopy)(BLASLONG, BLASLONG, xdouble *, BLASLONG, BLASLONG, BLASLONG, xdouble *);
|
||||
|
||||
|
||||
int (*qsymm_iutcopy)(BLASLONG, BLASLONG, xdouble *, BLASLONG, BLASLONG, BLASLONG, xdouble *);
|
||||
int (*qsymm_iltcopy)(BLASLONG, BLASLONG, xdouble *, BLASLONG, BLASLONG, BLASLONG, xdouble *);
|
||||
int (*qsymm_outcopy)(BLASLONG, BLASLONG, xdouble *, BLASLONG, BLASLONG, BLASLONG, xdouble *);
|
||||
@@ -372,7 +372,7 @@ BLASLONG (*icamin_k)(BLASLONG, float *, BLASLONG);
|
||||
int (*cgemm_itcopy )(BLASLONG, BLASLONG, float *, BLASLONG, float *);
|
||||
int (*cgemm_oncopy )(BLASLONG, BLASLONG, float *, BLASLONG, float *);
|
||||
int (*cgemm_otcopy )(BLASLONG, BLASLONG, float *, BLASLONG, float *);
|
||||
|
||||
|
||||
int (*ctrsm_kernel_LN)(BLASLONG, BLASLONG, BLASLONG, float, float, float *, float *, float *, BLASLONG, BLASLONG);
|
||||
int (*ctrsm_kernel_LT)(BLASLONG, BLASLONG, BLASLONG, float, float, float *, float *, float *, BLASLONG, BLASLONG);
|
||||
int (*ctrsm_kernel_LR)(BLASLONG, BLASLONG, BLASLONG, float, float, float *, float *, float *, BLASLONG, BLASLONG);
|
||||
@@ -381,7 +381,7 @@ BLASLONG (*icamin_k)(BLASLONG, float *, BLASLONG);
|
||||
int (*ctrsm_kernel_RT)(BLASLONG, BLASLONG, BLASLONG, float, float, float *, float *, float *, BLASLONG, BLASLONG);
|
||||
int (*ctrsm_kernel_RR)(BLASLONG, BLASLONG, BLASLONG, float, float, float *, float *, float *, BLASLONG, BLASLONG);
|
||||
int (*ctrsm_kernel_RC)(BLASLONG, BLASLONG, BLASLONG, float, float, float *, float *, float *, BLASLONG, BLASLONG);
|
||||
|
||||
|
||||
int (*ctrsm_iunucopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, float *);
|
||||
int (*ctrsm_iunncopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, float *);
|
||||
int (*ctrsm_iutucopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, float *);
|
||||
@@ -407,7 +407,7 @@ BLASLONG (*icamin_k)(BLASLONG, float *, BLASLONG);
|
||||
int (*ctrmm_kernel_LT)(BLASLONG, BLASLONG, BLASLONG, float, float, float *, float *, float *, BLASLONG, BLASLONG);
|
||||
int (*ctrmm_kernel_LR)(BLASLONG, BLASLONG, BLASLONG, float, float, float *, float *, float *, BLASLONG, BLASLONG);
|
||||
int (*ctrmm_kernel_LC)(BLASLONG, BLASLONG, BLASLONG, float, float, float *, float *, float *, BLASLONG, BLASLONG);
|
||||
|
||||
|
||||
int (*ctrmm_iunucopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, BLASLONG, float *);
|
||||
int (*ctrmm_iunncopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, BLASLONG, float *);
|
||||
int (*ctrmm_iutucopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, BLASLONG, float *);
|
||||
@@ -424,7 +424,7 @@ BLASLONG (*icamin_k)(BLASLONG, float *, BLASLONG);
|
||||
int (*ctrmm_olnncopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, BLASLONG, float *);
|
||||
int (*ctrmm_oltucopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, BLASLONG, float *);
|
||||
int (*ctrmm_oltncopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, BLASLONG, float *);
|
||||
|
||||
|
||||
int (*csymm_iutcopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, BLASLONG, float *);
|
||||
int (*csymm_iltcopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, BLASLONG, float *);
|
||||
int (*csymm_outcopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, BLASLONG, float *);
|
||||
@@ -435,6 +435,9 @@ BLASLONG (*icamin_k)(BLASLONG, float *, BLASLONG);
|
||||
int (*chemm_outcopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, BLASLONG, float *);
|
||||
int (*chemm_oltcopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, BLASLONG, float *);
|
||||
|
||||
int cgemm3m_p, cgemm3m_q, cgemm3m_r;
|
||||
int cgemm3m_unroll_m, cgemm3m_unroll_n, cgemm3m_unroll_mn;
|
||||
|
||||
int (*cgemm3m_kernel)(BLASLONG, BLASLONG, BLASLONG, float, float, float *, float *, float *, BLASLONG);
|
||||
|
||||
int (*cgemm3m_incopyb)(BLASLONG, BLASLONG, float *, BLASLONG, float *);
|
||||
@@ -443,7 +446,7 @@ BLASLONG (*icamin_k)(BLASLONG, float *, BLASLONG);
|
||||
int (*cgemm3m_itcopyb)(BLASLONG, BLASLONG, float *, BLASLONG, float *);
|
||||
int (*cgemm3m_itcopyr)(BLASLONG, BLASLONG, float *, BLASLONG, float *);
|
||||
int (*cgemm3m_itcopyi)(BLASLONG, BLASLONG, float *, BLASLONG, float *);
|
||||
|
||||
|
||||
int (*cgemm3m_oncopyb)(BLASLONG, BLASLONG, float *, BLASLONG, float, float, float *);
|
||||
int (*cgemm3m_oncopyr)(BLASLONG, BLASLONG, float *, BLASLONG, float, float, float *);
|
||||
int (*cgemm3m_oncopyi)(BLASLONG, BLASLONG, float *, BLASLONG, float, float, float *);
|
||||
@@ -457,21 +460,21 @@ BLASLONG (*icamin_k)(BLASLONG, float *, BLASLONG);
|
||||
int (*csymm3m_ilcopyr)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, BLASLONG, float *);
|
||||
int (*csymm3m_iucopyi)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, BLASLONG, float *);
|
||||
int (*csymm3m_ilcopyi)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, BLASLONG, float *);
|
||||
|
||||
|
||||
int (*csymm3m_oucopyb)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, BLASLONG, float, float, float *);
|
||||
int (*csymm3m_olcopyb)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, BLASLONG, float, float, float *);
|
||||
int (*csymm3m_oucopyr)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, BLASLONG, float, float, float *);
|
||||
int (*csymm3m_olcopyr)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, BLASLONG, float, float, float *);
|
||||
int (*csymm3m_oucopyi)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, BLASLONG, float, float, float *);
|
||||
int (*csymm3m_olcopyi)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, BLASLONG, float, float, float *);
|
||||
|
||||
|
||||
int (*chemm3m_iucopyb)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, BLASLONG, float *);
|
||||
int (*chemm3m_ilcopyb)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, BLASLONG, float *);
|
||||
int (*chemm3m_iucopyr)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, BLASLONG, float *);
|
||||
int (*chemm3m_ilcopyr)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, BLASLONG, float *);
|
||||
int (*chemm3m_iucopyi)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, BLASLONG, float *);
|
||||
int (*chemm3m_ilcopyi)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, BLASLONG, float *);
|
||||
|
||||
|
||||
int (*chemm3m_oucopyb)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, BLASLONG, float, float, float *);
|
||||
int (*chemm3m_olcopyb)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, BLASLONG, float, float, float *);
|
||||
int (*chemm3m_oucopyr)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, BLASLONG, float, float, float *);
|
||||
@@ -532,7 +535,7 @@ BLASLONG (*izamin_k)(BLASLONG, double *, BLASLONG);
|
||||
int (*zgemm_itcopy )(BLASLONG, BLASLONG, double *, BLASLONG, double *);
|
||||
int (*zgemm_oncopy )(BLASLONG, BLASLONG, double *, BLASLONG, double *);
|
||||
int (*zgemm_otcopy )(BLASLONG, BLASLONG, double *, BLASLONG, double *);
|
||||
|
||||
|
||||
int (*ztrsm_kernel_LN)(BLASLONG, BLASLONG, BLASLONG, double, double, double *, double *, double *, BLASLONG, BLASLONG);
|
||||
int (*ztrsm_kernel_LT)(BLASLONG, BLASLONG, BLASLONG, double, double, double *, double *, double *, BLASLONG, BLASLONG);
|
||||
int (*ztrsm_kernel_LR)(BLASLONG, BLASLONG, BLASLONG, double, double, double *, double *, double *, BLASLONG, BLASLONG);
|
||||
@@ -541,7 +544,7 @@ BLASLONG (*izamin_k)(BLASLONG, double *, BLASLONG);
|
||||
int (*ztrsm_kernel_RT)(BLASLONG, BLASLONG, BLASLONG, double, double, double *, double *, double *, BLASLONG, BLASLONG);
|
||||
int (*ztrsm_kernel_RR)(BLASLONG, BLASLONG, BLASLONG, double, double, double *, double *, double *, BLASLONG, BLASLONG);
|
||||
int (*ztrsm_kernel_RC)(BLASLONG, BLASLONG, BLASLONG, double, double, double *, double *, double *, BLASLONG, BLASLONG);
|
||||
|
||||
|
||||
int (*ztrsm_iunucopy)(BLASLONG, BLASLONG, double *, BLASLONG, BLASLONG, double *);
|
||||
int (*ztrsm_iunncopy)(BLASLONG, BLASLONG, double *, BLASLONG, BLASLONG, double *);
|
||||
int (*ztrsm_iutucopy)(BLASLONG, BLASLONG, double *, BLASLONG, BLASLONG, double *);
|
||||
@@ -567,7 +570,7 @@ BLASLONG (*izamin_k)(BLASLONG, double *, BLASLONG);
|
||||
int (*ztrmm_kernel_LT)(BLASLONG, BLASLONG, BLASLONG, double, double, double *, double *, double *, BLASLONG, BLASLONG);
|
||||
int (*ztrmm_kernel_LR)(BLASLONG, BLASLONG, BLASLONG, double, double, double *, double *, double *, BLASLONG, BLASLONG);
|
||||
int (*ztrmm_kernel_LC)(BLASLONG, BLASLONG, BLASLONG, double, double, double *, double *, double *, BLASLONG, BLASLONG);
|
||||
|
||||
|
||||
int (*ztrmm_iunucopy)(BLASLONG, BLASLONG, double *, BLASLONG, BLASLONG, BLASLONG, double *);
|
||||
int (*ztrmm_iunncopy)(BLASLONG, BLASLONG, double *, BLASLONG, BLASLONG, BLASLONG, double *);
|
||||
int (*ztrmm_iutucopy)(BLASLONG, BLASLONG, double *, BLASLONG, BLASLONG, BLASLONG, double *);
|
||||
@@ -584,7 +587,7 @@ BLASLONG (*izamin_k)(BLASLONG, double *, BLASLONG);
|
||||
int (*ztrmm_olnncopy)(BLASLONG, BLASLONG, double *, BLASLONG, BLASLONG, BLASLONG, double *);
|
||||
int (*ztrmm_oltucopy)(BLASLONG, BLASLONG, double *, BLASLONG, BLASLONG, BLASLONG, double *);
|
||||
int (*ztrmm_oltncopy)(BLASLONG, BLASLONG, double *, BLASLONG, BLASLONG, BLASLONG, double *);
|
||||
|
||||
|
||||
int (*zsymm_iutcopy)(BLASLONG, BLASLONG, double *, BLASLONG, BLASLONG, BLASLONG, double *);
|
||||
int (*zsymm_iltcopy)(BLASLONG, BLASLONG, double *, BLASLONG, BLASLONG, BLASLONG, double *);
|
||||
int (*zsymm_outcopy)(BLASLONG, BLASLONG, double *, BLASLONG, BLASLONG, BLASLONG, double *);
|
||||
@@ -595,6 +598,9 @@ BLASLONG (*izamin_k)(BLASLONG, double *, BLASLONG);
|
||||
int (*zhemm_outcopy)(BLASLONG, BLASLONG, double *, BLASLONG, BLASLONG, BLASLONG, double *);
|
||||
int (*zhemm_oltcopy)(BLASLONG, BLASLONG, double *, BLASLONG, BLASLONG, BLASLONG, double *);
|
||||
|
||||
int zgemm3m_p, zgemm3m_q, zgemm3m_r;
|
||||
int zgemm3m_unroll_m, zgemm3m_unroll_n, zgemm3m_unroll_mn;
|
||||
|
||||
int (*zgemm3m_kernel)(BLASLONG, BLASLONG, BLASLONG, double, double, double *, double *, double *, BLASLONG);
|
||||
|
||||
int (*zgemm3m_incopyb)(BLASLONG, BLASLONG, double *, BLASLONG, double *);
|
||||
@@ -603,7 +609,7 @@ BLASLONG (*izamin_k)(BLASLONG, double *, BLASLONG);
|
||||
int (*zgemm3m_itcopyb)(BLASLONG, BLASLONG, double *, BLASLONG, double *);
|
||||
int (*zgemm3m_itcopyr)(BLASLONG, BLASLONG, double *, BLASLONG, double *);
|
||||
int (*zgemm3m_itcopyi)(BLASLONG, BLASLONG, double *, BLASLONG, double *);
|
||||
|
||||
|
||||
int (*zgemm3m_oncopyb)(BLASLONG, BLASLONG, double *, BLASLONG, double, double, double *);
|
||||
int (*zgemm3m_oncopyr)(BLASLONG, BLASLONG, double *, BLASLONG, double, double, double *);
|
||||
int (*zgemm3m_oncopyi)(BLASLONG, BLASLONG, double *, BLASLONG, double, double, double *);
|
||||
@@ -617,28 +623,28 @@ BLASLONG (*izamin_k)(BLASLONG, double *, BLASLONG);
|
||||
int (*zsymm3m_ilcopyr)(BLASLONG, BLASLONG, double *, BLASLONG, BLASLONG, BLASLONG, double *);
|
||||
int (*zsymm3m_iucopyi)(BLASLONG, BLASLONG, double *, BLASLONG, BLASLONG, BLASLONG, double *);
|
||||
int (*zsymm3m_ilcopyi)(BLASLONG, BLASLONG, double *, BLASLONG, BLASLONG, BLASLONG, double *);
|
||||
|
||||
|
||||
int (*zsymm3m_oucopyb)(BLASLONG, BLASLONG, double *, BLASLONG, BLASLONG, BLASLONG, double, double, double *);
|
||||
int (*zsymm3m_olcopyb)(BLASLONG, BLASLONG, double *, BLASLONG, BLASLONG, BLASLONG, double, double, double *);
|
||||
int (*zsymm3m_oucopyr)(BLASLONG, BLASLONG, double *, BLASLONG, BLASLONG, BLASLONG, double, double, double *);
|
||||
int (*zsymm3m_olcopyr)(BLASLONG, BLASLONG, double *, BLASLONG, BLASLONG, BLASLONG, double, double, double *);
|
||||
int (*zsymm3m_oucopyi)(BLASLONG, BLASLONG, double *, BLASLONG, BLASLONG, BLASLONG, double, double, double *);
|
||||
int (*zsymm3m_olcopyi)(BLASLONG, BLASLONG, double *, BLASLONG, BLASLONG, BLASLONG, double, double, double *);
|
||||
|
||||
|
||||
int (*zhemm3m_iucopyb)(BLASLONG, BLASLONG, double *, BLASLONG, BLASLONG, BLASLONG, double *);
|
||||
int (*zhemm3m_ilcopyb)(BLASLONG, BLASLONG, double *, BLASLONG, BLASLONG, BLASLONG, double *);
|
||||
int (*zhemm3m_iucopyr)(BLASLONG, BLASLONG, double *, BLASLONG, BLASLONG, BLASLONG, double *);
|
||||
int (*zhemm3m_ilcopyr)(BLASLONG, BLASLONG, double *, BLASLONG, BLASLONG, BLASLONG, double *);
|
||||
int (*zhemm3m_iucopyi)(BLASLONG, BLASLONG, double *, BLASLONG, BLASLONG, BLASLONG, double *);
|
||||
int (*zhemm3m_ilcopyi)(BLASLONG, BLASLONG, double *, BLASLONG, BLASLONG, BLASLONG, double *);
|
||||
|
||||
|
||||
int (*zhemm3m_oucopyb)(BLASLONG, BLASLONG, double *, BLASLONG, BLASLONG, BLASLONG, double, double, double *);
|
||||
int (*zhemm3m_olcopyb)(BLASLONG, BLASLONG, double *, BLASLONG, BLASLONG, BLASLONG, double, double, double *);
|
||||
int (*zhemm3m_oucopyr)(BLASLONG, BLASLONG, double *, BLASLONG, BLASLONG, BLASLONG, double, double, double *);
|
||||
int (*zhemm3m_olcopyr)(BLASLONG, BLASLONG, double *, BLASLONG, BLASLONG, BLASLONG, double, double, double *);
|
||||
int (*zhemm3m_oucopyi)(BLASLONG, BLASLONG, double *, BLASLONG, BLASLONG, BLASLONG, double, double, double *);
|
||||
int (*zhemm3m_olcopyi)(BLASLONG, BLASLONG, double *, BLASLONG, BLASLONG, BLASLONG, double, double, double *);
|
||||
|
||||
|
||||
int (*zneg_tcopy) (BLASLONG, BLASLONG, double *, BLASLONG, double *);
|
||||
int (*zlaswp_ncopy) (BLASLONG, BLASLONG, BLASLONG, double *, BLASLONG, blasint *, double *);
|
||||
|
||||
@@ -694,7 +700,7 @@ BLASLONG (*ixamin_k)(BLASLONG, xdouble *, BLASLONG);
|
||||
int (*xgemm_itcopy )(BLASLONG, BLASLONG, xdouble *, BLASLONG, xdouble *);
|
||||
int (*xgemm_oncopy )(BLASLONG, BLASLONG, xdouble *, BLASLONG, xdouble *);
|
||||
int (*xgemm_otcopy )(BLASLONG, BLASLONG, xdouble *, BLASLONG, xdouble *);
|
||||
|
||||
|
||||
int (*xtrsm_kernel_LN)(BLASLONG, BLASLONG, BLASLONG, xdouble, xdouble, xdouble *, xdouble *, xdouble *, BLASLONG, BLASLONG);
|
||||
int (*xtrsm_kernel_LT)(BLASLONG, BLASLONG, BLASLONG, xdouble, xdouble, xdouble *, xdouble *, xdouble *, BLASLONG, BLASLONG);
|
||||
int (*xtrsm_kernel_LR)(BLASLONG, BLASLONG, BLASLONG, xdouble, xdouble, xdouble *, xdouble *, xdouble *, BLASLONG, BLASLONG);
|
||||
@@ -703,7 +709,7 @@ BLASLONG (*ixamin_k)(BLASLONG, xdouble *, BLASLONG);
|
||||
int (*xtrsm_kernel_RT)(BLASLONG, BLASLONG, BLASLONG, xdouble, xdouble, xdouble *, xdouble *, xdouble *, BLASLONG, BLASLONG);
|
||||
int (*xtrsm_kernel_RR)(BLASLONG, BLASLONG, BLASLONG, xdouble, xdouble, xdouble *, xdouble *, xdouble *, BLASLONG, BLASLONG);
|
||||
int (*xtrsm_kernel_RC)(BLASLONG, BLASLONG, BLASLONG, xdouble, xdouble, xdouble *, xdouble *, xdouble *, BLASLONG, BLASLONG);
|
||||
|
||||
|
||||
int (*xtrsm_iunucopy)(BLASLONG, BLASLONG, xdouble *, BLASLONG, BLASLONG, xdouble *);
|
||||
int (*xtrsm_iunncopy)(BLASLONG, BLASLONG, xdouble *, BLASLONG, BLASLONG, xdouble *);
|
||||
int (*xtrsm_iutucopy)(BLASLONG, BLASLONG, xdouble *, BLASLONG, BLASLONG, xdouble *);
|
||||
@@ -729,7 +735,7 @@ BLASLONG (*ixamin_k)(BLASLONG, xdouble *, BLASLONG);
|
||||
int (*xtrmm_kernel_LT)(BLASLONG, BLASLONG, BLASLONG, xdouble, xdouble, xdouble *, xdouble *, xdouble *, BLASLONG, BLASLONG);
|
||||
int (*xtrmm_kernel_LR)(BLASLONG, BLASLONG, BLASLONG, xdouble, xdouble, xdouble *, xdouble *, xdouble *, BLASLONG, BLASLONG);
|
||||
int (*xtrmm_kernel_LC)(BLASLONG, BLASLONG, BLASLONG, xdouble, xdouble, xdouble *, xdouble *, xdouble *, BLASLONG, BLASLONG);
|
||||
|
||||
|
||||
int (*xtrmm_iunucopy)(BLASLONG, BLASLONG, xdouble *, BLASLONG, BLASLONG, BLASLONG, xdouble *);
|
||||
int (*xtrmm_iunncopy)(BLASLONG, BLASLONG, xdouble *, BLASLONG, BLASLONG, BLASLONG, xdouble *);
|
||||
int (*xtrmm_iutucopy)(BLASLONG, BLASLONG, xdouble *, BLASLONG, BLASLONG, BLASLONG, xdouble *);
|
||||
@@ -746,7 +752,7 @@ BLASLONG (*ixamin_k)(BLASLONG, xdouble *, BLASLONG);
|
||||
int (*xtrmm_olnncopy)(BLASLONG, BLASLONG, xdouble *, BLASLONG, BLASLONG, BLASLONG, xdouble *);
|
||||
int (*xtrmm_oltucopy)(BLASLONG, BLASLONG, xdouble *, BLASLONG, BLASLONG, BLASLONG, xdouble *);
|
||||
int (*xtrmm_oltncopy)(BLASLONG, BLASLONG, xdouble *, BLASLONG, BLASLONG, BLASLONG, xdouble *);
|
||||
|
||||
|
||||
int (*xsymm_iutcopy)(BLASLONG, BLASLONG, xdouble *, BLASLONG, BLASLONG, BLASLONG, xdouble *);
|
||||
int (*xsymm_iltcopy)(BLASLONG, BLASLONG, xdouble *, BLASLONG, BLASLONG, BLASLONG, xdouble *);
|
||||
int (*xsymm_outcopy)(BLASLONG, BLASLONG, xdouble *, BLASLONG, BLASLONG, BLASLONG, xdouble *);
|
||||
@@ -757,6 +763,9 @@ BLASLONG (*ixamin_k)(BLASLONG, xdouble *, BLASLONG);
|
||||
int (*xhemm_outcopy)(BLASLONG, BLASLONG, xdouble *, BLASLONG, BLASLONG, BLASLONG, xdouble *);
|
||||
int (*xhemm_oltcopy)(BLASLONG, BLASLONG, xdouble *, BLASLONG, BLASLONG, BLASLONG, xdouble *);
|
||||
|
||||
int xgemm3m_p, xgemm3m_q, xgemm3m_r;
|
||||
int xgemm3m_unroll_m, xgemm3m_unroll_n, xgemm3m_unroll_mn;
|
||||
|
||||
int (*xgemm3m_kernel)(BLASLONG, BLASLONG, BLASLONG, xdouble, xdouble, xdouble *, xdouble *, xdouble *, BLASLONG);
|
||||
|
||||
int (*xgemm3m_incopyb)(BLASLONG, BLASLONG, xdouble *, BLASLONG, xdouble *);
|
||||
@@ -765,7 +774,7 @@ BLASLONG (*ixamin_k)(BLASLONG, xdouble *, BLASLONG);
|
||||
int (*xgemm3m_itcopyb)(BLASLONG, BLASLONG, xdouble *, BLASLONG, xdouble *);
|
||||
int (*xgemm3m_itcopyr)(BLASLONG, BLASLONG, xdouble *, BLASLONG, xdouble *);
|
||||
int (*xgemm3m_itcopyi)(BLASLONG, BLASLONG, xdouble *, BLASLONG, xdouble *);
|
||||
|
||||
|
||||
int (*xgemm3m_oncopyb)(BLASLONG, BLASLONG, xdouble *, BLASLONG, xdouble, xdouble, xdouble *);
|
||||
int (*xgemm3m_oncopyr)(BLASLONG, BLASLONG, xdouble *, BLASLONG, xdouble, xdouble, xdouble *);
|
||||
int (*xgemm3m_oncopyi)(BLASLONG, BLASLONG, xdouble *, BLASLONG, xdouble, xdouble, xdouble *);
|
||||
@@ -779,21 +788,21 @@ BLASLONG (*ixamin_k)(BLASLONG, xdouble *, BLASLONG);
|
||||
int (*xsymm3m_ilcopyr)(BLASLONG, BLASLONG, xdouble *, BLASLONG, BLASLONG, BLASLONG, xdouble *);
|
||||
int (*xsymm3m_iucopyi)(BLASLONG, BLASLONG, xdouble *, BLASLONG, BLASLONG, BLASLONG, xdouble *);
|
||||
int (*xsymm3m_ilcopyi)(BLASLONG, BLASLONG, xdouble *, BLASLONG, BLASLONG, BLASLONG, xdouble *);
|
||||
|
||||
|
||||
int (*xsymm3m_oucopyb)(BLASLONG, BLASLONG, xdouble *, BLASLONG, BLASLONG, BLASLONG, xdouble, xdouble, xdouble *);
|
||||
int (*xsymm3m_olcopyb)(BLASLONG, BLASLONG, xdouble *, BLASLONG, BLASLONG, BLASLONG, xdouble, xdouble, xdouble *);
|
||||
int (*xsymm3m_oucopyr)(BLASLONG, BLASLONG, xdouble *, BLASLONG, BLASLONG, BLASLONG, xdouble, xdouble, xdouble *);
|
||||
int (*xsymm3m_olcopyr)(BLASLONG, BLASLONG, xdouble *, BLASLONG, BLASLONG, BLASLONG, xdouble, xdouble, xdouble *);
|
||||
int (*xsymm3m_oucopyi)(BLASLONG, BLASLONG, xdouble *, BLASLONG, BLASLONG, BLASLONG, xdouble, xdouble, xdouble *);
|
||||
int (*xsymm3m_olcopyi)(BLASLONG, BLASLONG, xdouble *, BLASLONG, BLASLONG, BLASLONG, xdouble, xdouble, xdouble *);
|
||||
|
||||
|
||||
int (*xhemm3m_iucopyb)(BLASLONG, BLASLONG, xdouble *, BLASLONG, BLASLONG, BLASLONG, xdouble *);
|
||||
int (*xhemm3m_ilcopyb)(BLASLONG, BLASLONG, xdouble *, BLASLONG, BLASLONG, BLASLONG, xdouble *);
|
||||
int (*xhemm3m_iucopyr)(BLASLONG, BLASLONG, xdouble *, BLASLONG, BLASLONG, BLASLONG, xdouble *);
|
||||
int (*xhemm3m_ilcopyr)(BLASLONG, BLASLONG, xdouble *, BLASLONG, BLASLONG, BLASLONG, xdouble *);
|
||||
int (*xhemm3m_iucopyi)(BLASLONG, BLASLONG, xdouble *, BLASLONG, BLASLONG, BLASLONG, xdouble *);
|
||||
int (*xhemm3m_ilcopyi)(BLASLONG, BLASLONG, xdouble *, BLASLONG, BLASLONG, BLASLONG, xdouble *);
|
||||
|
||||
|
||||
int (*xhemm3m_oucopyb)(BLASLONG, BLASLONG, xdouble *, BLASLONG, BLASLONG, BLASLONG, xdouble, xdouble, xdouble *);
|
||||
int (*xhemm3m_olcopyb)(BLASLONG, BLASLONG, xdouble *, BLASLONG, BLASLONG, BLASLONG, xdouble, xdouble, xdouble *);
|
||||
int (*xhemm3m_oucopyr)(BLASLONG, BLASLONG, xdouble *, BLASLONG, BLASLONG, BLASLONG, xdouble, xdouble, xdouble *);
|
||||
@@ -806,10 +815,51 @@ BLASLONG (*ixamin_k)(BLASLONG, xdouble *, BLASLONG);
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
void (*init)(void);
|
||||
|
||||
int snum_opt, dnum_opt, qnum_opt;
|
||||
|
||||
int (*saxpby_k) (BLASLONG, float, float*, BLASLONG,float, float*, BLASLONG);
|
||||
int (*daxpby_k) (BLASLONG, double, double*, BLASLONG,double, double*, BLASLONG);
|
||||
int (*caxpby_k) (BLASLONG, float, float, float*, BLASLONG,float,float, float*, BLASLONG);
|
||||
int (*zaxpby_k) (BLASLONG, double, double, double*, BLASLONG,double,double, double*, BLASLONG);
|
||||
|
||||
int (*somatcopy_k_cn) (BLASLONG, BLASLONG, float, float*, BLASLONG, float*, BLASLONG);
|
||||
int (*somatcopy_k_ct) (BLASLONG, BLASLONG, float, float*, BLASLONG, float*, BLASLONG);
|
||||
int (*somatcopy_k_rn) (BLASLONG, BLASLONG, float, float*, BLASLONG, float*, BLASLONG);
|
||||
int (*somatcopy_k_rt) (BLASLONG, BLASLONG, float, float*, BLASLONG, float*, BLASLONG);
|
||||
|
||||
int (*domatcopy_k_cn) (BLASLONG, BLASLONG, double, double*, BLASLONG, double*, BLASLONG);
|
||||
int (*domatcopy_k_ct) (BLASLONG, BLASLONG, double, double*, BLASLONG, double*, BLASLONG);
|
||||
int (*domatcopy_k_rn) (BLASLONG, BLASLONG, double, double*, BLASLONG, double*, BLASLONG);
|
||||
int (*domatcopy_k_rt) (BLASLONG, BLASLONG, double, double*, BLASLONG, double*, BLASLONG);
|
||||
|
||||
int (*comatcopy_k_cn) (BLASLONG, BLASLONG, float, float, float*, BLASLONG, float*, BLASLONG);
|
||||
int (*comatcopy_k_ct) (BLASLONG, BLASLONG, float, float, float*, BLASLONG, float*, BLASLONG);
|
||||
int (*comatcopy_k_rn) (BLASLONG, BLASLONG, float, float, float*, BLASLONG, float*, BLASLONG);
|
||||
int (*comatcopy_k_rt) (BLASLONG, BLASLONG, float, float, float*, BLASLONG, float*, BLASLONG);
|
||||
|
||||
int (*comatcopy_k_cnc) (BLASLONG, BLASLONG, float, float, float*, BLASLONG, float*, BLASLONG);
|
||||
int (*comatcopy_k_ctc) (BLASLONG, BLASLONG, float, float, float*, BLASLONG, float*, BLASLONG);
|
||||
int (*comatcopy_k_rnc) (BLASLONG, BLASLONG, float, float, float*, BLASLONG, float*, BLASLONG);
|
||||
int (*comatcopy_k_rtc) (BLASLONG, BLASLONG, float, float, float*, BLASLONG, float*, BLASLONG);
|
||||
|
||||
int (*zomatcopy_k_cn) (BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG);
|
||||
int (*zomatcopy_k_ct) (BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG);
|
||||
int (*zomatcopy_k_rn) (BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG);
|
||||
int (*zomatcopy_k_rt) (BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG);
|
||||
|
||||
int (*zomatcopy_k_cnc) (BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG);
|
||||
int (*zomatcopy_k_ctc) (BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG);
|
||||
int (*zomatcopy_k_rnc) (BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG);
|
||||
int (*zomatcopy_k_rtc) (BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG);
|
||||
|
||||
int (*sgeadd_k) (BLASLONG, BLASLONG, float, float *, BLASLONG, float, float *, BLASLONG);
|
||||
int (*dgeadd_k) (BLASLONG, BLASLONG, double, double *, BLASLONG, double, double *, BLASLONG);
|
||||
int (*cgeadd_k) (BLASLONG, BLASLONG, float, float, float *, BLASLONG, float, float, float *, BLASLONG);
|
||||
int (*zgeadd_k) (BLASLONG, BLASLONG, float, double, double *, BLASLONG, double, double, double *, BLASLONG);
|
||||
|
||||
} gotoblas_t;
|
||||
|
||||
extern gotoblas_t *gotoblas;
|
||||
@@ -863,9 +913,30 @@ extern gotoblas_t *gotoblas;
|
||||
#define XGEMM_UNROLL_N gotoblas -> xgemm_unroll_n
|
||||
#define XGEMM_UNROLL_MN gotoblas -> xgemm_unroll_mn
|
||||
|
||||
#define CGEMM3M_P gotoblas -> cgemm3m_p
|
||||
#define CGEMM3M_Q gotoblas -> cgemm3m_q
|
||||
#define CGEMM3M_R gotoblas -> cgemm3m_r
|
||||
#define CGEMM3M_UNROLL_M gotoblas -> cgemm3m_unroll_m
|
||||
#define CGEMM3M_UNROLL_N gotoblas -> cgemm3m_unroll_n
|
||||
#define CGEMM3M_UNROLL_MN gotoblas -> cgemm3m_unroll_mn
|
||||
|
||||
#define ZGEMM3M_P gotoblas -> zgemm3m_p
|
||||
#define ZGEMM3M_Q gotoblas -> zgemm3m_q
|
||||
#define ZGEMM3M_R gotoblas -> zgemm3m_r
|
||||
#define ZGEMM3M_UNROLL_M gotoblas -> zgemm3m_unroll_m
|
||||
#define ZGEMM3M_UNROLL_N gotoblas -> zgemm3m_unroll_n
|
||||
#define ZGEMM3M_UNROLL_MN gotoblas -> zgemm3m_unroll_mn
|
||||
|
||||
#define XGEMM3M_P gotoblas -> xgemm3m_p
|
||||
#define XGEMM3M_Q gotoblas -> xgemm3m_q
|
||||
#define XGEMM3M_R gotoblas -> xgemm3m_r
|
||||
#define XGEMM3M_UNROLL_M gotoblas -> xgemm3m_unroll_m
|
||||
#define XGEMM3M_UNROLL_N gotoblas -> xgemm3m_unroll_n
|
||||
#define XGEMM3M_UNROLL_MN gotoblas -> xgemm3m_unroll_mn
|
||||
|
||||
#else
|
||||
|
||||
#define DTB_ENTRIES DTB_DEFAULT_ENTRIES
|
||||
#define DTB_ENTRIES DTB_DEFAULT_ENTRIES
|
||||
|
||||
#define GEMM_OFFSET_A GEMM_DEFAULT_OFFSET_A
|
||||
#define GEMM_OFFSET_B GEMM_DEFAULT_OFFSET_B
|
||||
@@ -882,14 +953,22 @@ extern gotoblas_t *gotoblas;
|
||||
#define SGEMM_R SGEMM_DEFAULT_R
|
||||
#define SGEMM_UNROLL_M SGEMM_DEFAULT_UNROLL_M
|
||||
#define SGEMM_UNROLL_N SGEMM_DEFAULT_UNROLL_N
|
||||
#ifdef SGEMM_DEFAULT_UNROLL_MN
|
||||
#define SGEMM_UNROLL_MN SGEMM_DEFAULT_UNROLL_MN
|
||||
#else
|
||||
#define SGEMM_UNROLL_MN MAX((SGEMM_UNROLL_M), (SGEMM_UNROLL_N))
|
||||
#endif
|
||||
|
||||
#define DGEMM_P DGEMM_DEFAULT_P
|
||||
#define DGEMM_Q DGEMM_DEFAULT_Q
|
||||
#define DGEMM_R DGEMM_DEFAULT_R
|
||||
#define DGEMM_UNROLL_M DGEMM_DEFAULT_UNROLL_M
|
||||
#define DGEMM_UNROLL_N DGEMM_DEFAULT_UNROLL_N
|
||||
#ifdef DGEMM_DEFAULT_UNROLL_MN
|
||||
#define DGEMM_UNROLL_MN DGEMM_DEFAULT_UNROLL_MN
|
||||
#else
|
||||
#define DGEMM_UNROLL_MN MAX((DGEMM_UNROLL_M), (DGEMM_UNROLL_N))
|
||||
#endif
|
||||
|
||||
#define QGEMM_P QGEMM_DEFAULT_P
|
||||
#define QGEMM_Q QGEMM_DEFAULT_Q
|
||||
@@ -903,14 +982,22 @@ extern gotoblas_t *gotoblas;
|
||||
#define CGEMM_R CGEMM_DEFAULT_R
|
||||
#define CGEMM_UNROLL_M CGEMM_DEFAULT_UNROLL_M
|
||||
#define CGEMM_UNROLL_N CGEMM_DEFAULT_UNROLL_N
|
||||
#ifdef CGEMM_DEFAULT_UNROLL_MN
|
||||
#define CGEMM_UNROLL_MN CGEMM_DEFAULT_UNROLL_MN
|
||||
#else
|
||||
#define CGEMM_UNROLL_MN MAX((CGEMM_UNROLL_M), (CGEMM_UNROLL_N))
|
||||
#endif
|
||||
|
||||
#define ZGEMM_P ZGEMM_DEFAULT_P
|
||||
#define ZGEMM_Q ZGEMM_DEFAULT_Q
|
||||
#define ZGEMM_R ZGEMM_DEFAULT_R
|
||||
#define ZGEMM_UNROLL_M ZGEMM_DEFAULT_UNROLL_M
|
||||
#define ZGEMM_UNROLL_N ZGEMM_DEFAULT_UNROLL_N
|
||||
#ifdef ZGEMM_DEFAULT_UNROLL_MN
|
||||
#define ZGEMM_UNROLL_MN ZGEMM_DEFAULT_UNROLL_MN
|
||||
#else
|
||||
#define ZGEMM_UNROLL_MN MAX((ZGEMM_UNROLL_M), (ZGEMM_UNROLL_N))
|
||||
#endif
|
||||
|
||||
#define XGEMM_P XGEMM_DEFAULT_P
|
||||
#define XGEMM_Q XGEMM_DEFAULT_Q
|
||||
@@ -919,6 +1006,55 @@ extern gotoblas_t *gotoblas;
|
||||
#define XGEMM_UNROLL_N XGEMM_DEFAULT_UNROLL_N
|
||||
#define XGEMM_UNROLL_MN MAX((XGEMM_UNROLL_M), (XGEMM_UNROLL_N))
|
||||
|
||||
#ifdef CGEMM3M_DEFAULT_UNROLL_N
|
||||
|
||||
#define CGEMM3M_P CGEMM3M_DEFAULT_P
|
||||
#define CGEMM3M_Q CGEMM3M_DEFAULT_Q
|
||||
#define CGEMM3M_R CGEMM3M_DEFAULT_R
|
||||
#define CGEMM3M_UNROLL_M CGEMM3M_DEFAULT_UNROLL_M
|
||||
#define CGEMM3M_UNROLL_N CGEMM3M_DEFAULT_UNROLL_N
|
||||
#define CGEMM3M_UNROLL_MN MAX((CGEMM3M_UNROLL_M), (CGEMM3M_UNROLL_N))
|
||||
|
||||
#else
|
||||
|
||||
#define CGEMM3M_P SGEMM_DEFAULT_P
|
||||
#define CGEMM3M_Q SGEMM_DEFAULT_Q
|
||||
#define CGEMM3M_R SGEMM_DEFAULT_R
|
||||
#define CGEMM3M_UNROLL_M SGEMM_DEFAULT_UNROLL_M
|
||||
#define CGEMM3M_UNROLL_N SGEMM_DEFAULT_UNROLL_N
|
||||
#define CGEMM3M_UNROLL_MN MAX((CGEMM_UNROLL_M), (CGEMM_UNROLL_N))
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef ZGEMM3M_DEFAULT_UNROLL_N
|
||||
|
||||
#define ZGEMM3M_P ZGEMM3M_DEFAULT_P
|
||||
#define ZGEMM3M_Q ZGEMM3M_DEFAULT_Q
|
||||
#define ZGEMM3M_R ZGEMM3M_DEFAULT_R
|
||||
#define ZGEMM3M_UNROLL_M ZGEMM3M_DEFAULT_UNROLL_M
|
||||
#define ZGEMM3M_UNROLL_N ZGEMM3M_DEFAULT_UNROLL_N
|
||||
#define ZGEMM3M_UNROLL_MN MAX((ZGEMM_UNROLL_M), (ZGEMM_UNROLL_N))
|
||||
|
||||
#else
|
||||
|
||||
#define ZGEMM3M_P DGEMM_DEFAULT_P
|
||||
#define ZGEMM3M_Q DGEMM_DEFAULT_Q
|
||||
#define ZGEMM3M_R DGEMM_DEFAULT_R
|
||||
#define ZGEMM3M_UNROLL_M DGEMM_DEFAULT_UNROLL_M
|
||||
#define ZGEMM3M_UNROLL_N DGEMM_DEFAULT_UNROLL_N
|
||||
#define ZGEMM3M_UNROLL_MN MAX((ZGEMM_UNROLL_M), (ZGEMM_UNROLL_N))
|
||||
|
||||
#endif
|
||||
|
||||
#define XGEMM3M_P QGEMM_DEFAULT_P
|
||||
#define XGEMM3M_Q QGEMM_DEFAULT_Q
|
||||
#define XGEMM3M_R QGEMM_DEFAULT_R
|
||||
#define XGEMM3M_UNROLL_M QGEMM_DEFAULT_UNROLL_M
|
||||
#define XGEMM3M_UNROLL_N QGEMM_DEFAULT_UNROLL_N
|
||||
#define XGEMM3M_UNROLL_MN MAX((QGEMM_UNROLL_M), (QGEMM_UNROLL_N))
|
||||
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
||||
@@ -1001,14 +1137,14 @@ extern gotoblas_t *gotoblas;
|
||||
#endif
|
||||
|
||||
#ifdef XDOUBLE
|
||||
#define GEMM3M_UNROLL_M QGEMM_UNROLL_M
|
||||
#define GEMM3M_UNROLL_N QGEMM_UNROLL_N
|
||||
#define GEMM3M_UNROLL_M XGEMM3M_UNROLL_M
|
||||
#define GEMM3M_UNROLL_N XGEMM3M_UNROLL_N
|
||||
#elif defined(DOUBLE)
|
||||
#define GEMM3M_UNROLL_M DGEMM_UNROLL_M
|
||||
#define GEMM3M_UNROLL_N DGEMM_UNROLL_N
|
||||
#define GEMM3M_UNROLL_M ZGEMM3M_UNROLL_M
|
||||
#define GEMM3M_UNROLL_N ZGEMM3M_UNROLL_N
|
||||
#else
|
||||
#define GEMM3M_UNROLL_M SGEMM_UNROLL_M
|
||||
#define GEMM3M_UNROLL_N SGEMM_UNROLL_N
|
||||
#define GEMM3M_UNROLL_M CGEMM3M_UNROLL_M
|
||||
#define GEMM3M_UNROLL_N CGEMM3M_UNROLL_N
|
||||
#endif
|
||||
|
||||
|
||||
@@ -1070,31 +1206,31 @@ extern gotoblas_t *gotoblas;
|
||||
|
||||
#ifndef GEMM3M_P
|
||||
#ifdef XDOUBLE
|
||||
#define GEMM3M_P QGEMM_P
|
||||
#define GEMM3M_P XGEMM3M_P
|
||||
#elif defined(DOUBLE)
|
||||
#define GEMM3M_P DGEMM_P
|
||||
#define GEMM3M_P ZGEMM3M_P
|
||||
#else
|
||||
#define GEMM3M_P SGEMM_P
|
||||
#define GEMM3M_P CGEMM3M_P
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifndef GEMM3M_Q
|
||||
#ifdef XDOUBLE
|
||||
#define GEMM3M_Q QGEMM_Q
|
||||
#define GEMM3M_Q XGEMM3M_Q
|
||||
#elif defined(DOUBLE)
|
||||
#define GEMM3M_Q DGEMM_Q
|
||||
#define GEMM3M_Q ZGEMM3M_Q
|
||||
#else
|
||||
#define GEMM3M_Q SGEMM_Q
|
||||
#define GEMM3M_Q CGEMM3M_Q
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifndef GEMM3M_R
|
||||
#ifdef XDOUBLE
|
||||
#define GEMM3M_R QGEMM_R
|
||||
#define GEMM3M_R XGEMM3M_R
|
||||
#elif defined(DOUBLE)
|
||||
#define GEMM3M_R DGEMM_R
|
||||
#define GEMM3M_R ZGEMM3M_R
|
||||
#else
|
||||
#define GEMM3M_R SGEMM_R
|
||||
#define GEMM3M_R CGEMM3M_R
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
||||
@@ -114,7 +114,7 @@ static inline unsigned long getstackaddr(void){
|
||||
__asm__ __volatile__ ("mr %0, 1"
|
||||
: "=r"(addr) : : "memory");
|
||||
|
||||
return addr;
|
||||
return addr;
|
||||
};
|
||||
|
||||
#if defined(OS_LINUX) || defined(OS_AIX)
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*****************************************************************************
|
||||
Copyright (c) 2011, Lab of Parallel Software and Computational Science,ICSAS
|
||||
Copyright (c) 2011-2014, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
@@ -13,19 +13,20 @@ met:
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the ISCAS nor the names of its contributors may
|
||||
be used to endorse or promote products derived from this software
|
||||
without specific prior written permission.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written
|
||||
permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
**********************************************************************************/
|
||||
@@ -65,5 +66,5 @@ void BLASFUNC_REF(drotmg)(double *, double *, double *, double *, double *);
|
||||
double BLASFUNC_REF(dsdot)(blasint *, float *, blasint *, float *, blasint*);
|
||||
|
||||
FLOATRET BLASFUNC_REF(samax) (blasint *, float *, blasint *);
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
22
common_s.h
22
common_s.h
@@ -17,7 +17,7 @@
|
||||
#define SCOPY_K scopy_k
|
||||
#define SDOTU_K sdot_k
|
||||
#define SDOTC_K sdot_k
|
||||
#define SDSDOT_K sdot_k
|
||||
#define SDSDOT_K dsdot_k
|
||||
#define DSDOT_K dsdot_k
|
||||
#define SNRM2_K snrm2_k
|
||||
#define SSCAL_K sscal_k
|
||||
@@ -146,6 +146,15 @@
|
||||
#define SNEG_TCOPY sneg_tcopy
|
||||
#define SLASWP_NCOPY slaswp_ncopy
|
||||
|
||||
#define SAXPBY_K saxpby_k
|
||||
|
||||
#define SOMATCOPY_K_CN somatcopy_k_cn
|
||||
#define SOMATCOPY_K_RN somatcopy_k_rn
|
||||
#define SOMATCOPY_K_CT somatcopy_k_ct
|
||||
#define SOMATCOPY_K_RT somatcopy_k_rt
|
||||
|
||||
#define SGEADD_K sgeadd_k
|
||||
|
||||
#else
|
||||
|
||||
#define SAMAX_K gotoblas -> samax_k
|
||||
@@ -162,7 +171,7 @@
|
||||
#define SCOPY_K gotoblas -> scopy_k
|
||||
#define SDOTU_K gotoblas -> sdot_k
|
||||
#define SDOTC_K gotoblas -> sdot_k
|
||||
#define SDSDOT_K gotoblas -> sdot_k
|
||||
#define SDSDOT_K gotoblas -> dsdot_k
|
||||
#define DSDOT_K gotoblas -> dsdot_k
|
||||
#define SNRM2_K gotoblas -> snrm2_k
|
||||
#define SSCAL_K gotoblas -> sscal_k
|
||||
@@ -259,6 +268,15 @@
|
||||
#define SNEG_TCOPY gotoblas -> sneg_tcopy
|
||||
#define SLASWP_NCOPY gotoblas -> slaswp_ncopy
|
||||
|
||||
#define SAXPBY_K gotoblas -> saxpby_k
|
||||
|
||||
#define SOMATCOPY_K_CN gotoblas -> somatcopy_k_cn
|
||||
#define SOMATCOPY_K_RN gotoblas -> somatcopy_k_rn
|
||||
#define SOMATCOPY_K_CT gotoblas -> somatcopy_k_ct
|
||||
#define SOMATCOPY_K_RT gotoblas -> somatcopy_k_rt
|
||||
|
||||
#define SGEADD_K gotoblas -> sgeadd_k
|
||||
|
||||
#endif
|
||||
|
||||
#define SGEMM_NN sgemm_nn
|
||||
|
||||
@@ -130,7 +130,7 @@ static __inline int blas_quickdivide(blasint x, blasint y){
|
||||
#define FSQRT fsqrts
|
||||
#define FDIV fdivs
|
||||
#endif
|
||||
|
||||
|
||||
#define HALT prefetch [%g0], 5
|
||||
|
||||
#define FMADDS(rs1, rs2, rs3, rd) \
|
||||
@@ -170,19 +170,19 @@ static __inline int blas_quickdivide(blasint x, blasint y){
|
||||
.word ((2 << 30) | ((rd) << 25) | ( 0x36 << 19) | ( 0x7e << 5))
|
||||
|
||||
#ifndef DOUBLE
|
||||
#define FCLR(a) FCLRS(a)
|
||||
#define FONE(a) FONES(a)
|
||||
#define FMADD(a, b, c, d) FMADDS(a, b, c, d)
|
||||
#define FMSUB(a, b, c, d) FMSUBS(a, b, c, d)
|
||||
#define FNMADD(a, b, c, d) FNMADDS(a, b, c, d)
|
||||
#define FNMSUB(a, b, c, d) FNMSUBS(a, b, c, d)
|
||||
#define FCLR(a) FCLRS(a)
|
||||
#define FONE(a) FONES(a)
|
||||
#define FMADD(a, b, c, d) FMADDS(a, b, c, d)
|
||||
#define FMSUB(a, b, c, d) FMSUBS(a, b, c, d)
|
||||
#define FNMADD(a, b, c, d) FNMADDS(a, b, c, d)
|
||||
#define FNMSUB(a, b, c, d) FNMSUBS(a, b, c, d)
|
||||
#else
|
||||
#define FCLR(a) FCLRD(a)
|
||||
#define FONE(a) FONED(a)
|
||||
#define FMADD(a, b, c, d) FMADDD(a, b, c, d)
|
||||
#define FMSUB(a, b, c, d) FMSUBD(a, b, c, d)
|
||||
#define FNMADD(a, b, c, d) FNMADDD(a, b, c, d)
|
||||
#define FNMSUB(a, b, c, d) FNMSUBD(a, b, c, d)
|
||||
#define FCLR(a) FCLRD(a)
|
||||
#define FONE(a) FONED(a)
|
||||
#define FMADD(a, b, c, d) FMADDD(a, b, c, d)
|
||||
#define FMSUB(a, b, c, d) FMSUBD(a, b, c, d)
|
||||
#define FNMADD(a, b, c, d) FNMADDD(a, b, c, d)
|
||||
#define FNMSUB(a, b, c, d) FNMSUBD(a, b, c, d)
|
||||
#endif
|
||||
|
||||
#ifndef F_INTERFACE
|
||||
|
||||
@@ -176,7 +176,7 @@ int exec_blas(BLASLONG num_cpu, blas_param_t *param, void *buffer);
|
||||
|
||||
int blas_level1_thread(int mode, BLASLONG m, BLASLONG n, BLASLONG k, void *alpha,
|
||||
void *a, BLASLONG lda,
|
||||
void *b, BLASLONG ldb,
|
||||
void *b, BLASLONG ldb,
|
||||
void *c, BLASLONG ldc, int (*function)(), int threads);
|
||||
|
||||
int gemm_thread_m (int mode, blas_arg_t *, BLASLONG *, BLASLONG *, int (*function)(), void *, void *, BLASLONG);
|
||||
@@ -187,14 +187,14 @@ int gemm_thread_mn(int mode, blas_arg_t *, BLASLONG *, BLASLONG *, int (*functio
|
||||
|
||||
int gemm_thread_variable(int mode, blas_arg_t *, BLASLONG *, BLASLONG *, int (*function)(), void *, void *, BLASLONG, BLASLONG);
|
||||
|
||||
int trsm_thread(int mode, BLASLONG m, BLASLONG n,
|
||||
int trsm_thread(int mode, BLASLONG m, BLASLONG n,
|
||||
double alpha_r, double alpha_i,
|
||||
void *a, BLASLONG lda,
|
||||
void *c, BLASLONG ldc, int (*function)(), void *buffer);
|
||||
|
||||
int syrk_thread(int mode, blas_arg_t *, BLASLONG *, BLASLONG *, int (*function)(), void *, void *, BLASLONG);
|
||||
|
||||
int beta_thread(int mode, BLASLONG m, BLASLONG n,
|
||||
int beta_thread(int mode, BLASLONG m, BLASLONG n,
|
||||
double alpha_r, double alpha_i,
|
||||
void *c, BLASLONG ldc, int (*fuction)());
|
||||
|
||||
|
||||
14
common_x86.h
14
common_x86.h
@@ -55,7 +55,7 @@ static void __inline blas_lock(volatile BLASULONG *address){
|
||||
|
||||
do {
|
||||
while (*address) {YIELDING;};
|
||||
|
||||
|
||||
__asm__ __volatile__(
|
||||
"xchgl %0, %1\n"
|
||||
: "=r"(ret), "=m"(*address)
|
||||
@@ -70,8 +70,8 @@ static __inline unsigned long long rpcc(void){
|
||||
unsigned int a, d;
|
||||
|
||||
__asm__ __volatile__ ("rdtsc" : "=a" (a), "=d" (d));
|
||||
|
||||
return ((unsigned long long)a + ((unsigned long long)d << 32));
|
||||
|
||||
return ((unsigned long long)a + ((unsigned long long)d << 32));
|
||||
};
|
||||
|
||||
static __inline unsigned long getstackaddr(void){
|
||||
@@ -80,7 +80,7 @@ static __inline unsigned long getstackaddr(void){
|
||||
__asm__ __volatile__ ("mov %%esp, %0"
|
||||
: "=r"(addr) : : "memory");
|
||||
|
||||
return addr;
|
||||
return addr;
|
||||
};
|
||||
|
||||
|
||||
@@ -171,7 +171,7 @@ static __inline int blas_quickdivide(unsigned int x, unsigned int y){
|
||||
#define MMXSTORE movd
|
||||
#endif
|
||||
|
||||
#if defined(PILEDRIVER) || defined(BULLDOZER)
|
||||
#if defined(PILEDRIVER) || defined(BULLDOZER) || defined(STEAMROLLER)
|
||||
//Enable some optimazation for barcelona.
|
||||
#define BARCELONA_OPTIMIZATION
|
||||
#endif
|
||||
@@ -365,9 +365,9 @@ REALNAME:
|
||||
#ifndef ALIGN_6
|
||||
#define ALIGN_6 .align 64
|
||||
#endif
|
||||
// ffreep %st(0).
|
||||
// ffreep %st(0).
|
||||
// Because Clang didn't support ffreep, we directly use the opcode.
|
||||
// Please check out http://www.sandpile.org/x86/opc_fpu.htm
|
||||
// Please check out http://www.sandpile.org/x86/opc_fpu.htm
|
||||
#ifndef ffreep
|
||||
#define ffreep .byte 0xdf, 0xc0 #
|
||||
#endif
|
||||
|
||||
@@ -46,6 +46,7 @@
|
||||
#define __volatile__
|
||||
#endif
|
||||
|
||||
/*
|
||||
#ifdef HAVE_SSE2
|
||||
#define MB __asm__ __volatile__ ("mfence");
|
||||
#define WMB __asm__ __volatile__ ("sfence");
|
||||
@@ -53,6 +54,10 @@
|
||||
#define MB
|
||||
#define WMB
|
||||
#endif
|
||||
*/
|
||||
|
||||
#define MB
|
||||
#define WMB
|
||||
|
||||
static void __inline blas_lock(volatile BLASULONG *address){
|
||||
|
||||
@@ -60,7 +65,7 @@ static void __inline blas_lock(volatile BLASULONG *address){
|
||||
|
||||
do {
|
||||
while (*address) {YIELDING;};
|
||||
|
||||
|
||||
__asm__ __volatile__(
|
||||
"xchgl %0, %1\n"
|
||||
: "=r"(ret), "=m"(*address)
|
||||
@@ -74,8 +79,8 @@ static __inline BLASULONG rpcc(void){
|
||||
BLASULONG a, d;
|
||||
|
||||
__asm__ __volatile__ ("rdtsc" : "=a" (a), "=d" (d));
|
||||
|
||||
return ((BLASULONG)a + ((BLASULONG)d << 32));
|
||||
|
||||
return ((BLASULONG)a + ((BLASULONG)d << 32));
|
||||
}
|
||||
|
||||
#define RPCC64BIT
|
||||
@@ -86,7 +91,7 @@ static __inline BLASULONG getstackaddr(void){
|
||||
__asm__ __volatile__ ("movq %%rsp, %0"
|
||||
: "=r"(addr) : : "memory");
|
||||
|
||||
return addr;
|
||||
return addr;
|
||||
}
|
||||
|
||||
static __inline void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx){
|
||||
@@ -99,7 +104,9 @@ static __inline void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx){
|
||||
: "0" (op));
|
||||
}
|
||||
|
||||
/*
|
||||
#define WHEREAMI
|
||||
*/
|
||||
|
||||
static inline int WhereAmI(void){
|
||||
int eax, ebx, ecx, edx;
|
||||
@@ -111,6 +118,7 @@ static inline int WhereAmI(void){
|
||||
return apicid;
|
||||
}
|
||||
|
||||
|
||||
#ifdef CORE_BARCELONA
|
||||
#define IFLUSH gotoblas_iflush()
|
||||
#define IFLUSH_HALF gotoblas_iflush_half()
|
||||
@@ -218,7 +226,7 @@ static __inline int blas_quickdivide(unsigned int x, unsigned int y){
|
||||
|
||||
#ifdef ASSEMBLER
|
||||
|
||||
#if defined(PILEDRIVER) || defined(BULLDOZER)
|
||||
#if defined(PILEDRIVER) || defined(BULLDOZER) || defined(STEAMROLLER)
|
||||
//Enable some optimazation for barcelona.
|
||||
#define BARCELONA_OPTIMIZATION
|
||||
#endif
|
||||
@@ -457,9 +465,9 @@ REALNAME:
|
||||
#define ALIGN_6 .align 64
|
||||
#endif
|
||||
|
||||
// ffreep %st(0).
|
||||
// ffreep %st(0).
|
||||
// Because Clang didn't support ffreep, we directly use the opcode.
|
||||
// Please check out http://www.sandpile.org/x86/opc_fpu.htm
|
||||
// Please check out http://www.sandpile.org/x86/opc_fpu.htm
|
||||
#ifndef ffreep
|
||||
#define ffreep .byte 0xdf, 0xc0 #
|
||||
#endif
|
||||
|
||||
26
common_z.h
26
common_z.h
@@ -209,6 +209,19 @@
|
||||
#define ZNEG_TCOPY zneg_tcopy
|
||||
#define ZLASWP_NCOPY zlaswp_ncopy
|
||||
|
||||
#define ZAXPBY_K zaxpby_k
|
||||
|
||||
#define ZOMATCOPY_K_CN zomatcopy_k_cn
|
||||
#define ZOMATCOPY_K_RN zomatcopy_k_rn
|
||||
#define ZOMATCOPY_K_CT zomatcopy_k_ct
|
||||
#define ZOMATCOPY_K_RT zomatcopy_k_rt
|
||||
#define ZOMATCOPY_K_CNC zomatcopy_k_cnc
|
||||
#define ZOMATCOPY_K_RNC zomatcopy_k_rnc
|
||||
#define ZOMATCOPY_K_CTC zomatcopy_k_ctc
|
||||
#define ZOMATCOPY_K_RTC zomatcopy_k_rtc
|
||||
|
||||
#define ZGEADD_K zgeadd_k
|
||||
|
||||
#else
|
||||
|
||||
#define ZAMAX_K gotoblas -> zamax_k
|
||||
@@ -380,6 +393,19 @@
|
||||
#define ZNEG_TCOPY gotoblas -> zneg_tcopy
|
||||
#define ZLASWP_NCOPY gotoblas -> zlaswp_ncopy
|
||||
|
||||
#define ZAXPBY_K gotoblas -> zaxpby_k
|
||||
|
||||
#define ZOMATCOPY_K_CN gotoblas -> zomatcopy_k_cn
|
||||
#define ZOMATCOPY_K_RN gotoblas -> zomatcopy_k_rn
|
||||
#define ZOMATCOPY_K_CT gotoblas -> zomatcopy_k_ct
|
||||
#define ZOMATCOPY_K_RT gotoblas -> zomatcopy_k_rt
|
||||
#define ZOMATCOPY_K_CNC gotoblas -> zomatcopy_k_cnc
|
||||
#define ZOMATCOPY_K_RNC gotoblas -> zomatcopy_k_rnc
|
||||
#define ZOMATCOPY_K_CTC gotoblas -> zomatcopy_k_ctc
|
||||
#define ZOMATCOPY_K_RTC gotoblas -> zomatcopy_k_rtc
|
||||
|
||||
#define ZGEADD_K gotoblas -> zgeadd_k
|
||||
|
||||
#endif
|
||||
|
||||
#define ZGEMM_NN zgemm_nn
|
||||
|
||||
4
cpuid.S
4
cpuid.S
@@ -39,10 +39,10 @@
|
||||
#if defined(__APPLE__) && defined(__i386__)
|
||||
|
||||
/* Quick hack for Darwin/x86 */
|
||||
|
||||
|
||||
.text
|
||||
.globl _cpuid
|
||||
_cpuid:
|
||||
_cpuid:
|
||||
pushl %esi
|
||||
pushl %ebx
|
||||
|
||||
|
||||
10
cpuid.h
10
cpuid.h
@@ -104,10 +104,11 @@
|
||||
#define CORE_ATOM 18
|
||||
#define CORE_NANO 19
|
||||
#define CORE_SANDYBRIDGE 20
|
||||
#define CORE_BOBCAT 21
|
||||
#define CORE_BULLDOZER 22
|
||||
#define CORE_BOBCAT 21
|
||||
#define CORE_BULLDOZER 22
|
||||
#define CORE_PILEDRIVER 23
|
||||
#define CORE_HASWELL 24
|
||||
#define CORE_HASWELL 24
|
||||
#define CORE_STEAMROLLER 25
|
||||
|
||||
#define HAVE_SSE (1 << 0)
|
||||
#define HAVE_SSE2 (1 << 1)
|
||||
@@ -200,6 +201,7 @@ typedef struct {
|
||||
#define CPUTYPE_BOBCAT 45
|
||||
#define CPUTYPE_BULLDOZER 46
|
||||
#define CPUTYPE_PILEDRIVER 47
|
||||
#define CPUTYPE_HASWELL 48
|
||||
#define CPUTYPE_HASWELL 48
|
||||
#define CPUTYPE_STEAMROLLER 49
|
||||
|
||||
#endif
|
||||
|
||||
@@ -50,7 +50,7 @@ int implver(void){
|
||||
#endif
|
||||
return arch;
|
||||
}
|
||||
|
||||
|
||||
void get_architecture(void){
|
||||
printf("ALPHA");
|
||||
}
|
||||
@@ -67,7 +67,7 @@ void get_cpuconfig(void){
|
||||
printf("#define EV%d\n", implver() + 4);
|
||||
|
||||
switch (implver()){
|
||||
case 0:
|
||||
case 0:
|
||||
printf("#define L1_DATA_SIZE 16384\n");
|
||||
printf("#define L1_DATA_LINESIZE 32\n");
|
||||
printf("#define L2_SIZE 2097152\n");
|
||||
@@ -76,7 +76,7 @@ void get_cpuconfig(void){
|
||||
printf("#define DTB_SIZE 8192\n");
|
||||
break;
|
||||
|
||||
case 1:
|
||||
case 1:
|
||||
printf("#define L1_DATA_SIZE 16384\n");
|
||||
printf("#define L1_DATA_LINESIZE 32\n");
|
||||
printf("#define L2_SIZE 2097152\n");
|
||||
@@ -85,7 +85,7 @@ void get_cpuconfig(void){
|
||||
printf("#define DTB_SIZE 8192\n");
|
||||
break;
|
||||
|
||||
case 2:
|
||||
case 2:
|
||||
printf("#define L1_DATA_SIZE 32768\n");
|
||||
printf("#define L1_DATA_LINESIZE 64\n");
|
||||
printf("#define L2_SIZE 4194304\n");
|
||||
|
||||
105
cpuid_arm.c
105
cpuid_arm.c
@@ -30,16 +30,27 @@
|
||||
#define CPU_UNKNOWN 0
|
||||
#define CPU_ARMV6 1
|
||||
#define CPU_ARMV7 2
|
||||
#define CPU_CORTEXA15 3
|
||||
#define CPU_CORTEXA9 3
|
||||
#define CPU_CORTEXA15 4
|
||||
|
||||
static char *cpuname[] = {
|
||||
"UNKOWN",
|
||||
"ARMV6",
|
||||
"ARMV7",
|
||||
"CORTEXA9",
|
||||
"CORTEXA15"
|
||||
};
|
||||
|
||||
|
||||
static char *cpuname_lower[] = {
|
||||
"unknown",
|
||||
"armv6",
|
||||
"armv7",
|
||||
"cortexa9",
|
||||
"cortexa15"
|
||||
};
|
||||
|
||||
|
||||
int get_feature(char *search)
|
||||
{
|
||||
|
||||
@@ -67,7 +78,7 @@ int get_feature(char *search)
|
||||
|
||||
t = strtok(p," ");
|
||||
while( t = strtok(NULL," "))
|
||||
{
|
||||
{
|
||||
if (!strcmp(t, search)) { return(1); }
|
||||
}
|
||||
|
||||
@@ -85,12 +96,35 @@ int detect(void)
|
||||
char buffer[512], *p;
|
||||
p = (char *) NULL ;
|
||||
|
||||
infile = fopen("/proc/cpuinfo", "r");
|
||||
while (fgets(buffer, sizeof(buffer), infile))
|
||||
{
|
||||
|
||||
if (!strncmp("CPU part", buffer, 8))
|
||||
{
|
||||
p = strchr(buffer, ':') + 2;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
fclose(infile);
|
||||
if(p != NULL) {
|
||||
if (strstr(p, "0xc09")) {
|
||||
return CPU_CORTEXA9;
|
||||
}
|
||||
if (strstr(p, "0xc0f")) {
|
||||
return CPU_CORTEXA15;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
p = (char *) NULL ;
|
||||
infile = fopen("/proc/cpuinfo", "r");
|
||||
|
||||
while (fgets(buffer, sizeof(buffer), infile))
|
||||
{
|
||||
|
||||
if (!strncmp("model name", buffer, 10))
|
||||
if ((!strncmp("model name", buffer, 10)) || (!strncmp("Processor", buffer, 9)))
|
||||
{
|
||||
p = strchr(buffer, ':') + 2;
|
||||
break;
|
||||
@@ -102,7 +136,7 @@ int detect(void)
|
||||
if(p != NULL)
|
||||
{
|
||||
|
||||
if (strstr(p, "ARMv7"))
|
||||
if (strstr(p, "ARMv7"))
|
||||
{
|
||||
if ( get_feature("vfpv4"))
|
||||
return CPU_ARMV7;
|
||||
@@ -116,7 +150,7 @@ int detect(void)
|
||||
|
||||
}
|
||||
|
||||
if (strstr(p, "ARMv6"))
|
||||
if (strstr(p, "ARMv6"))
|
||||
{
|
||||
if ( get_feature("vfp"))
|
||||
return CPU_ARMV6;
|
||||
@@ -142,21 +176,7 @@ void get_architecture(void)
|
||||
void get_subarchitecture(void)
|
||||
{
|
||||
int d = detect();
|
||||
switch (d)
|
||||
{
|
||||
|
||||
case CPU_ARMV7:
|
||||
printf("ARMV7");
|
||||
break;
|
||||
|
||||
case CPU_ARMV6:
|
||||
printf("ARMV6");
|
||||
break;
|
||||
|
||||
default:
|
||||
printf("UNKNOWN");
|
||||
break;
|
||||
}
|
||||
printf("%s", cpuname[d]);
|
||||
}
|
||||
|
||||
void get_subdirname(void)
|
||||
@@ -170,6 +190,36 @@ void get_cpuconfig(void)
|
||||
int d = detect();
|
||||
switch (d)
|
||||
{
|
||||
case CPU_CORTEXA9:
|
||||
printf("#define CORTEXA9\n");
|
||||
printf("#define HAVE_VFP\n");
|
||||
printf("#define HAVE_VFPV3\n");
|
||||
if ( get_feature("neon")) printf("#define HAVE_NEON\n");
|
||||
if ( get_feature("vfpv4")) printf("#define HAVE_VFPV4\n");
|
||||
printf("#define L1_DATA_SIZE 32768\n");
|
||||
printf("#define L1_DATA_LINESIZE 32\n");
|
||||
printf("#define L2_SIZE 1048576\n");
|
||||
printf("#define L2_LINESIZE 32\n");
|
||||
printf("#define DTB_DEFAULT_ENTRIES 128\n");
|
||||
printf("#define DTB_SIZE 4096\n");
|
||||
printf("#define L2_ASSOCIATIVE 4\n");
|
||||
break;
|
||||
|
||||
case CPU_CORTEXA15:
|
||||
printf("#define CORTEXA15\n");
|
||||
printf("#define HAVE_VFP\n");
|
||||
printf("#define HAVE_VFPV3\n");
|
||||
if ( get_feature("neon")) printf("#define HAVE_NEON\n");
|
||||
if ( get_feature("vfpv4")) printf("#define HAVE_VFPV4\n");
|
||||
printf("#define L1_DATA_SIZE 32768\n");
|
||||
printf("#define L1_DATA_LINESIZE 32\n");
|
||||
printf("#define L2_SIZE 1048576\n");
|
||||
printf("#define L2_LINESIZE 32\n");
|
||||
printf("#define DTB_DEFAULT_ENTRIES 128\n");
|
||||
printf("#define DTB_SIZE 4096\n");
|
||||
printf("#define L2_ASSOCIATIVE 4\n");
|
||||
break;
|
||||
|
||||
|
||||
case CPU_ARMV7:
|
||||
printf("#define ARMV7\n");
|
||||
@@ -206,18 +256,7 @@ void get_libname(void)
|
||||
{
|
||||
|
||||
int d = detect();
|
||||
switch (d)
|
||||
{
|
||||
|
||||
case CPU_ARMV7:
|
||||
printf("armv7\n");
|
||||
break;
|
||||
|
||||
case CPU_ARMV6:
|
||||
printf("armv6\n");
|
||||
break;
|
||||
|
||||
}
|
||||
printf("%s", cpuname_lower[d]);
|
||||
}
|
||||
|
||||
|
||||
@@ -248,7 +287,7 @@ void get_features(void)
|
||||
|
||||
t = strtok(p," ");
|
||||
while( t = strtok(NULL," "))
|
||||
{
|
||||
{
|
||||
if (!strcmp(t, "vfp")) { printf("HAVE_VFP=1\n"); continue; }
|
||||
if (!strcmp(t, "vfpv3")) { printf("HAVE_VFPV3=1\n"); continue; }
|
||||
if (!strcmp(t, "vfpv4")) { printf("HAVE_VFPV4=1\n"); continue; }
|
||||
|
||||
217
cpuid_arm64.c
Normal file
217
cpuid_arm64.c
Normal file
@@ -0,0 +1,217 @@
|
||||
/**************************************************************************
|
||||
Copyright (c) 2013, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include <string.h>
|
||||
|
||||
#define CPU_UNKNOWN 0
|
||||
#define CPU_ARMV8 1
|
||||
|
||||
static char *cpuname[] = {
|
||||
"UNKOWN",
|
||||
"ARMV8"
|
||||
};
|
||||
|
||||
|
||||
int get_feature(char *search)
|
||||
{
|
||||
|
||||
#ifdef linux
|
||||
FILE *infile;
|
||||
char buffer[2048], *p,*t;
|
||||
p = (char *) NULL ;
|
||||
|
||||
infile = fopen("/proc/cpuinfo", "r");
|
||||
|
||||
while (fgets(buffer, sizeof(buffer), infile))
|
||||
{
|
||||
|
||||
if (!strncmp("Features", buffer, 8))
|
||||
{
|
||||
p = strchr(buffer, ':') + 2;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
fclose(infile);
|
||||
|
||||
|
||||
if( p == NULL ) return;
|
||||
|
||||
t = strtok(p," ");
|
||||
while( t = strtok(NULL," "))
|
||||
{
|
||||
if (!strcmp(t, search)) { return(1); }
|
||||
}
|
||||
|
||||
#endif
|
||||
return(0);
|
||||
}
|
||||
|
||||
|
||||
int detect(void)
|
||||
{
|
||||
|
||||
#ifdef linux
|
||||
|
||||
FILE *infile;
|
||||
char buffer[512], *p;
|
||||
p = (char *) NULL ;
|
||||
|
||||
infile = fopen("/proc/cpuinfo", "r");
|
||||
|
||||
while (fgets(buffer, sizeof(buffer), infile))
|
||||
{
|
||||
|
||||
if ((!strncmp("model name", buffer, 10)) || (!strncmp("Processor", buffer, 9)))
|
||||
{
|
||||
p = strchr(buffer, ':') + 2;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
fclose(infile);
|
||||
|
||||
if(p != NULL)
|
||||
{
|
||||
|
||||
if (strstr(p, "AArch64"))
|
||||
{
|
||||
return CPU_ARMV8;
|
||||
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
#endif
|
||||
|
||||
return CPU_UNKNOWN;
|
||||
}
|
||||
|
||||
char *get_corename(void)
|
||||
{
|
||||
return cpuname[detect()];
|
||||
}
|
||||
|
||||
void get_architecture(void)
|
||||
{
|
||||
printf("ARM");
|
||||
}
|
||||
|
||||
void get_subarchitecture(void)
|
||||
{
|
||||
int d = detect();
|
||||
switch (d)
|
||||
{
|
||||
|
||||
case CPU_ARMV8:
|
||||
printf("ARMV8");
|
||||
break;
|
||||
|
||||
default:
|
||||
printf("UNKNOWN");
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void get_subdirname(void)
|
||||
{
|
||||
printf("arm64");
|
||||
}
|
||||
|
||||
void get_cpuconfig(void)
|
||||
{
|
||||
|
||||
int d = detect();
|
||||
switch (d)
|
||||
{
|
||||
|
||||
case CPU_ARMV8:
|
||||
printf("#define ARMV8\n");
|
||||
printf("#define L1_DATA_SIZE 32768\n");
|
||||
printf("#define L1_DATA_LINESIZE 64\n");
|
||||
printf("#define L2_SIZE 262144\n");
|
||||
printf("#define L2_LINESIZE 64\n");
|
||||
printf("#define DTB_DEFAULT_ENTRIES 64\n");
|
||||
printf("#define DTB_SIZE 4096\n");
|
||||
printf("#define L2_ASSOCIATIVE 4\n");
|
||||
break;
|
||||
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void get_libname(void)
|
||||
{
|
||||
|
||||
int d = detect();
|
||||
switch (d)
|
||||
{
|
||||
|
||||
case CPU_ARMV8:
|
||||
printf("armv8\n");
|
||||
break;
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void get_features(void)
|
||||
{
|
||||
|
||||
#ifdef linux
|
||||
FILE *infile;
|
||||
char buffer[2048], *p,*t;
|
||||
p = (char *) NULL ;
|
||||
|
||||
infile = fopen("/proc/cpuinfo", "r");
|
||||
|
||||
while (fgets(buffer, sizeof(buffer), infile))
|
||||
{
|
||||
|
||||
if (!strncmp("Features", buffer, 8))
|
||||
{
|
||||
p = strchr(buffer, ':') + 2;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
fclose(infile);
|
||||
|
||||
|
||||
if( p == NULL ) return;
|
||||
|
||||
t = strtok(p," ");
|
||||
while( t = strtok(NULL," "))
|
||||
{
|
||||
}
|
||||
|
||||
#endif
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
@@ -45,7 +45,7 @@
|
||||
#include <ia64intrin.h>
|
||||
#endif
|
||||
|
||||
static inline unsigned long cpuid(unsigned long regnum){
|
||||
static inline unsigned long cpuid(unsigned long regnum){
|
||||
unsigned long value;
|
||||
|
||||
#ifdef __ECC
|
||||
@@ -65,7 +65,7 @@ int get_vendor(void){
|
||||
|
||||
cpuid0 = cpuid(0);
|
||||
cpuid1 = cpuid(1);
|
||||
|
||||
|
||||
*(unsigned long *)(&vendor[0]) = cpuid0;
|
||||
*(unsigned long *)(&vendor[8]) = cpuid1;
|
||||
vendor[17] = (char)0;
|
||||
@@ -79,7 +79,7 @@ int get_cputype(int gettype){
|
||||
unsigned long cpuid3;
|
||||
|
||||
cpuid3 = cpuid(3);
|
||||
|
||||
|
||||
switch (gettype) {
|
||||
case GET_ARCHREV :
|
||||
return BITMASK(cpuid3, 32, 0xff);
|
||||
|
||||
27
cpuid_mips.c
27
cpuid_mips.c
@@ -1,5 +1,5 @@
|
||||
/*****************************************************************************
|
||||
Copyright (c) 2011,2012 Lab of Parallel Software and Computational Science,ISCAS
|
||||
Copyright (c) 2011-2014, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
@@ -13,19 +13,20 @@ met:
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the ISCAS nor the names of its contributors may
|
||||
be used to endorse or promote products derived from this software
|
||||
without specific prior written permission.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written
|
||||
permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
**********************************************************************************/
|
||||
|
||||
@@ -134,7 +134,7 @@ int detect(void){
|
||||
|
||||
if (hostInfo.cpu_subtype == CPU_SUBTYPE_POWERPC_7450) return CPUTYPE_PPCG4;
|
||||
if (hostInfo.cpu_subtype == CPU_SUBTYPE_POWERPC_970) return CPUTYPE_PPC970;
|
||||
|
||||
|
||||
return CPUTYPE_PPC970;
|
||||
#endif
|
||||
}
|
||||
|
||||
167
cpuid_x86.c
167
cpuid_x86.c
@@ -40,6 +40,7 @@
|
||||
#include <string.h>
|
||||
#include "cpuid.h"
|
||||
|
||||
/*
|
||||
#ifdef NO_AVX
|
||||
#define CPUTYPE_HASWELL CPUTYPE_NEHALEM
|
||||
#define CORE_HASWELL CORE_NEHALEM
|
||||
@@ -50,6 +51,7 @@
|
||||
#define CPUTYPE_PILEDRIVER CPUTYPE_BARCELONA
|
||||
#define CORE_PILEDRIVER CORE_BARCELONA
|
||||
#endif
|
||||
*/
|
||||
|
||||
#ifndef CPUIDEMU
|
||||
|
||||
@@ -57,9 +59,16 @@
|
||||
void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx);
|
||||
#else
|
||||
static inline void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx){
|
||||
#if defined(__i386__) && defined(__PIC__)
|
||||
__asm__ __volatile__
|
||||
("mov %%ebx, %%edi;"
|
||||
"cpuid;"
|
||||
"xchgl %%ebx, %%edi;"
|
||||
: "=a" (*eax), "=D" (*ebx), "=c" (*ecx), "=d" (*edx) : "a" (op) : "cc");
|
||||
#else
|
||||
__asm__ __volatile__
|
||||
("cpuid": "=a" (*eax), "=b" (*ebx), "=c" (*ecx), "=d" (*edx) : "a" (op) : "cc");
|
||||
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -91,7 +100,7 @@ void cpuid(unsigned int op, unsigned int *eax, unsigned int *ebx, unsigned int *
|
||||
if ((current < start) || (current > stop)) current = start;
|
||||
|
||||
while ((count > 0) && (idlist[current].id != op)) {
|
||||
|
||||
|
||||
current ++;
|
||||
if (current > stop) current = start;
|
||||
count --;
|
||||
@@ -132,7 +141,7 @@ int support_avx(){
|
||||
#ifndef NO_AVX
|
||||
int eax, ebx, ecx, edx;
|
||||
int ret=0;
|
||||
|
||||
|
||||
cpuid(1, &eax, &ebx, &ecx, &edx);
|
||||
if ((ecx & (1 << 28)) != 0 && (ecx & (1 << 27)) != 0 && (ecx & (1 << 26)) != 0){
|
||||
xgetbv(0, &eax, &edx);
|
||||
@@ -152,7 +161,7 @@ int get_vendor(void){
|
||||
char vendor[13];
|
||||
|
||||
cpuid(0, &eax, &ebx, &ecx, &edx);
|
||||
|
||||
|
||||
*(int *)(&vendor[0]) = ebx;
|
||||
*(int *)(&vendor[4]) = edx;
|
||||
*(int *)(&vendor[8]) = ecx;
|
||||
@@ -173,7 +182,7 @@ int get_vendor(void){
|
||||
|
||||
return VENDOR_UNKNOWN;
|
||||
}
|
||||
|
||||
|
||||
int get_cputype(int gettype){
|
||||
int eax, ebx, ecx, edx;
|
||||
int extend_family, family;
|
||||
@@ -182,7 +191,7 @@ int get_cputype(int gettype){
|
||||
int feature = 0;
|
||||
|
||||
cpuid(1, &eax, &ebx, &ecx, &edx);
|
||||
|
||||
|
||||
switch (gettype) {
|
||||
case GET_EXFAMILY :
|
||||
return BITMASK(eax, 20, 0xff);
|
||||
@@ -252,12 +261,12 @@ int get_cputype(int gettype){
|
||||
}
|
||||
return feature;
|
||||
}
|
||||
|
||||
|
||||
int get_cacheinfo(int type, cache_info_t *cacheinfo){
|
||||
int eax, ebx, ecx, edx, cpuid_level;
|
||||
int info[15];
|
||||
int i;
|
||||
cache_info_t LC1, LD1, L2, L3,
|
||||
cache_info_t LC1, LD1, L2, L3,
|
||||
ITB, DTB, LITB, LDTB,
|
||||
L2ITB, L2DTB, L2LITB, L2LDTB;
|
||||
|
||||
@@ -283,22 +292,22 @@ int get_cacheinfo(int type, cache_info_t *cacheinfo){
|
||||
info[ 0] = BITMASK(eax, 8, 0xff);
|
||||
info[ 1] = BITMASK(eax, 16, 0xff);
|
||||
info[ 2] = BITMASK(eax, 24, 0xff);
|
||||
|
||||
|
||||
info[ 3] = BITMASK(ebx, 0, 0xff);
|
||||
info[ 4] = BITMASK(ebx, 8, 0xff);
|
||||
info[ 5] = BITMASK(ebx, 16, 0xff);
|
||||
info[ 6] = BITMASK(ebx, 24, 0xff);
|
||||
|
||||
|
||||
info[ 7] = BITMASK(ecx, 0, 0xff);
|
||||
info[ 8] = BITMASK(ecx, 8, 0xff);
|
||||
info[ 9] = BITMASK(ecx, 16, 0xff);
|
||||
info[10] = BITMASK(ecx, 24, 0xff);
|
||||
|
||||
|
||||
info[11] = BITMASK(edx, 0, 0xff);
|
||||
info[12] = BITMASK(edx, 8, 0xff);
|
||||
info[13] = BITMASK(edx, 16, 0xff);
|
||||
info[14] = BITMASK(edx, 24, 0xff);
|
||||
|
||||
|
||||
for (i = 0; i < 15; i++){
|
||||
|
||||
switch (info[i]){
|
||||
@@ -844,11 +853,24 @@ int get_cacheinfo(int type, cache_info_t *cacheinfo){
|
||||
if (get_vendor() == VENDOR_INTEL) {
|
||||
cpuid(0x80000000, &cpuid_level, &ebx, &ecx, &edx);
|
||||
if (cpuid_level >= 0x80000006) {
|
||||
cpuid(0x80000006, &eax, &ebx, &ecx, &edx);
|
||||
if(L2.size<=0){
|
||||
//If we didn't detect L2 correctly before,
|
||||
cpuid(0x80000006, &eax, &ebx, &ecx, &edx);
|
||||
|
||||
L2.size = BITMASK(ecx, 16, 0xffff);
|
||||
L2.associative = BITMASK(ecx, 12, 0x0f);
|
||||
L2.linesize = BITMASK(ecx, 0, 0xff);
|
||||
L2.size = BITMASK(ecx, 16, 0xffff);
|
||||
L2.associative = BITMASK(ecx, 12, 0x0f);
|
||||
|
||||
switch (L2.associative){
|
||||
case 0x06:
|
||||
L2.associative = 8;
|
||||
break;
|
||||
case 0x08:
|
||||
L2.associative = 16;
|
||||
break;
|
||||
}
|
||||
|
||||
L2.linesize = BITMASK(ecx, 0, 0xff);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -864,7 +886,7 @@ int get_cacheinfo(int type, cache_info_t *cacheinfo){
|
||||
LITB.associative = BITMASK(eax, 8, 0xff);
|
||||
if (LITB.associative == 0xff) LITB.associative = 0;
|
||||
LITB.linesize = BITMASK(eax, 0, 0xff);
|
||||
|
||||
|
||||
DTB.size = 4;
|
||||
DTB.associative = BITMASK(ebx, 24, 0xff);
|
||||
if (DTB.associative == 0xff) DTB.associative = 0;
|
||||
@@ -896,7 +918,7 @@ int get_cacheinfo(int type, cache_info_t *cacheinfo){
|
||||
L2LITB.associative = BITMASK(eax, 8, 0xff);
|
||||
if (L2LITB.associative == 0xff) L2LITB.associative = 0;
|
||||
L2LITB.linesize = BITMASK(eax, 0, 0xff);
|
||||
|
||||
|
||||
L2DTB.size = 4;
|
||||
L2DTB.associative = BITMASK(ebx, 24, 0xff);
|
||||
if (L2DTB.associative == 0xff) L2DTB.associative = 0;
|
||||
@@ -907,10 +929,22 @@ int get_cacheinfo(int type, cache_info_t *cacheinfo){
|
||||
if (L2ITB.associative == 0xff) L2ITB.associative = 0;
|
||||
L2ITB.linesize = BITMASK(ebx, 0, 0xff);
|
||||
|
||||
L2.size = BITMASK(ecx, 16, 0xffff);
|
||||
L2.associative = BITMASK(ecx, 12, 0xf);
|
||||
if (L2.associative == 0xff) L2.associative = 0;
|
||||
L2.linesize = BITMASK(ecx, 0, 0xff);
|
||||
if(L2.size <= 0){
|
||||
//If we didn't detect L2 correctly before,
|
||||
L2.size = BITMASK(ecx, 16, 0xffff);
|
||||
L2.associative = BITMASK(ecx, 12, 0xf);
|
||||
switch (L2.associative){
|
||||
case 0x06:
|
||||
L2.associative = 8;
|
||||
break;
|
||||
case 0x08:
|
||||
L2.associative = 16;
|
||||
break;
|
||||
}
|
||||
|
||||
if (L2.associative == 0xff) L2.associative = 0;
|
||||
L2.linesize = BITMASK(ecx, 0, 0xff);
|
||||
}
|
||||
|
||||
L3.size = BITMASK(edx, 18, 0x3fff) * 512;
|
||||
L3.associative = BITMASK(edx, 12, 0xf);
|
||||
@@ -920,7 +954,7 @@ int get_cacheinfo(int type, cache_info_t *cacheinfo){
|
||||
}
|
||||
|
||||
switch (type) {
|
||||
|
||||
|
||||
case CACHE_INFO_L1_I :
|
||||
*cacheinfo = LC1;
|
||||
break;
|
||||
@@ -982,7 +1016,7 @@ int get_cpuname(void){
|
||||
return CPUTYPE_PENTIUM;
|
||||
case 0x6:
|
||||
switch (exmodel) {
|
||||
case 0:
|
||||
case 0:
|
||||
switch (model) {
|
||||
case 1:
|
||||
case 3:
|
||||
@@ -1022,8 +1056,8 @@ int get_cpuname(void){
|
||||
case 2:
|
||||
switch (model) {
|
||||
case 5:
|
||||
//Intel Core (Clarkdale) / Core (Arrandale)
|
||||
// Pentium (Clarkdale) / Pentium Mobile (Arrandale)
|
||||
//Intel Core (Clarkdale) / Core (Arrandale)
|
||||
// Pentium (Clarkdale) / Pentium Mobile (Arrandale)
|
||||
// Xeon (Clarkdale), 32nm
|
||||
return CPUTYPE_NEHALEM;
|
||||
case 10:
|
||||
@@ -1051,13 +1085,20 @@ int get_cpuname(void){
|
||||
case 3:
|
||||
switch (model) {
|
||||
case 10:
|
||||
case 14:
|
||||
// Ivy Bridge
|
||||
if(support_avx())
|
||||
return CPUTYPE_SANDYBRIDGE;
|
||||
else
|
||||
return CPUTYPE_NEHALEM;
|
||||
case 12:
|
||||
case 15:
|
||||
if(support_avx())
|
||||
#ifndef NO_AVX2
|
||||
return CPUTYPE_HASWELL;
|
||||
#else
|
||||
return CPUTYPE_SANDYBRIDGE;
|
||||
#endif
|
||||
else
|
||||
return CPUTYPE_NEHALEM;
|
||||
}
|
||||
@@ -1065,12 +1106,17 @@ int get_cpuname(void){
|
||||
case 4:
|
||||
switch (model) {
|
||||
case 5:
|
||||
case 6:
|
||||
if(support_avx())
|
||||
#ifndef NO_AVX2
|
||||
return CPUTYPE_HASWELL;
|
||||
#else
|
||||
return CPUTYPE_SANDYBRIDGE;
|
||||
#endif
|
||||
else
|
||||
return CPUTYPE_NEHALEM;
|
||||
}
|
||||
break;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case 0x7:
|
||||
@@ -1115,7 +1161,13 @@ int get_cpuname(void){
|
||||
if(support_avx())
|
||||
return CPUTYPE_PILEDRIVER;
|
||||
else
|
||||
return CPUTYPE_BARCELONA; //OS don't support AVX.
|
||||
return CPUTYPE_BARCELONA; //OS don't support AVX.
|
||||
case 0:
|
||||
if(support_avx())
|
||||
return CPUTYPE_STEAMROLLER;
|
||||
else
|
||||
return CPUTYPE_BARCELONA; //OS don't support AVX.
|
||||
|
||||
}
|
||||
break;
|
||||
case 5:
|
||||
@@ -1244,6 +1296,7 @@ static char *cpuname[] = {
|
||||
"BULLDOZER",
|
||||
"PILEDRIVER",
|
||||
"HASWELL",
|
||||
"STEAMROLLER",
|
||||
};
|
||||
|
||||
static char *lowercpuname[] = {
|
||||
@@ -1295,11 +1348,12 @@ static char *lowercpuname[] = {
|
||||
"bulldozer",
|
||||
"piledriver",
|
||||
"haswell",
|
||||
"steamroller",
|
||||
};
|
||||
|
||||
static char *corename[] = {
|
||||
"UNKOWN",
|
||||
"80486",
|
||||
"80486",
|
||||
"P5",
|
||||
"P6",
|
||||
"KATMAI",
|
||||
@@ -1323,11 +1377,12 @@ static char *corename[] = {
|
||||
"BULLDOZER",
|
||||
"PILEDRIVER",
|
||||
"HASWELL",
|
||||
"STEAMROLLER",
|
||||
};
|
||||
|
||||
static char *corename_lower[] = {
|
||||
"unknown",
|
||||
"80486",
|
||||
"80486",
|
||||
"p5",
|
||||
"p6",
|
||||
"katmai",
|
||||
@@ -1351,6 +1406,7 @@ static char *corename_lower[] = {
|
||||
"bulldozer",
|
||||
"piledriver",
|
||||
"haswell",
|
||||
"steamroller",
|
||||
};
|
||||
|
||||
|
||||
@@ -1428,8 +1484,8 @@ int get_coretype(void){
|
||||
case 2:
|
||||
switch (model) {
|
||||
case 5:
|
||||
//Intel Core (Clarkdale) / Core (Arrandale)
|
||||
// Pentium (Clarkdale) / Pentium Mobile (Arrandale)
|
||||
//Intel Core (Clarkdale) / Core (Arrandale)
|
||||
// Pentium (Clarkdale) / Pentium Mobile (Arrandale)
|
||||
// Xeon (Clarkdale), 32nm
|
||||
return CORE_NEHALEM;
|
||||
case 10:
|
||||
@@ -1457,13 +1513,19 @@ int get_coretype(void){
|
||||
case 3:
|
||||
switch (model) {
|
||||
case 10:
|
||||
case 14:
|
||||
if(support_avx())
|
||||
return CORE_SANDYBRIDGE;
|
||||
else
|
||||
return CORE_NEHALEM; //OS doesn't support AVX
|
||||
case 12:
|
||||
case 15:
|
||||
if(support_avx())
|
||||
#ifndef NO_AVX2
|
||||
return CORE_HASWELL;
|
||||
#else
|
||||
return CORE_SANDYBRIDGE;
|
||||
#endif
|
||||
else
|
||||
return CORE_NEHALEM;
|
||||
}
|
||||
@@ -1471,12 +1533,17 @@ int get_coretype(void){
|
||||
case 4:
|
||||
switch (model) {
|
||||
case 5:
|
||||
case 6:
|
||||
if(support_avx())
|
||||
#ifndef NO_AVX2
|
||||
return CORE_HASWELL;
|
||||
#else
|
||||
return CORE_SANDYBRIDGE;
|
||||
#endif
|
||||
else
|
||||
return CORE_NEHALEM;
|
||||
}
|
||||
break;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
|
||||
@@ -1490,8 +1557,8 @@ int get_coretype(void){
|
||||
if (family <= 0x5) return CORE_80486;
|
||||
if (family <= 0xe) return CORE_ATHLON;
|
||||
if (family == 0xf){
|
||||
if ((exfamily == 0) || (exfamily == 2)) return CORE_OPTERON;
|
||||
else if (exfamily == 5) return CORE_BOBCAT;
|
||||
if ((exfamily == 0) || (exfamily == 2)) return CORE_OPTERON;
|
||||
else if (exfamily == 5) return CORE_BOBCAT;
|
||||
else if (exfamily == 6) {
|
||||
switch (model) {
|
||||
case 1:
|
||||
@@ -1504,8 +1571,16 @@ int get_coretype(void){
|
||||
if(support_avx())
|
||||
return CORE_PILEDRIVER;
|
||||
else
|
||||
return CORE_BARCELONA; //OS don't support AVX.
|
||||
return CORE_BARCELONA; //OS don't support AVX.
|
||||
|
||||
case 0:
|
||||
if(support_avx())
|
||||
return CORE_STEAMROLLER;
|
||||
else
|
||||
return CORE_BARCELONA; //OS don't support AVX.
|
||||
}
|
||||
|
||||
|
||||
}else return CORE_BARCELONA;
|
||||
}
|
||||
}
|
||||
@@ -1538,35 +1613,41 @@ void get_cpuconfig(void){
|
||||
printf("#define L1_CODE_ASSOCIATIVE %d\n", info.associative);
|
||||
printf("#define L1_CODE_LINESIZE %d\n", info.linesize);
|
||||
}
|
||||
|
||||
|
||||
get_cacheinfo(CACHE_INFO_L1_D, &info);
|
||||
if (info.size > 0) {
|
||||
printf("#define L1_DATA_SIZE %d\n", info.size * 1024);
|
||||
printf("#define L1_DATA_ASSOCIATIVE %d\n", info.associative);
|
||||
printf("#define L1_DATA_LINESIZE %d\n", info.linesize);
|
||||
}
|
||||
|
||||
|
||||
get_cacheinfo(CACHE_INFO_L2, &info);
|
||||
if (info.size > 0) {
|
||||
printf("#define L2_SIZE %d\n", info.size * 1024);
|
||||
printf("#define L2_ASSOCIATIVE %d\n", info.associative);
|
||||
printf("#define L2_LINESIZE %d\n", info.linesize);
|
||||
} else {
|
||||
//fall back for some virtual machines.
|
||||
printf("#define L2_SIZE 1048576\n");
|
||||
printf("#define L2_ASSOCIATIVE 6\n");
|
||||
printf("#define L2_LINESIZE 64\n");
|
||||
}
|
||||
|
||||
|
||||
|
||||
get_cacheinfo(CACHE_INFO_L3, &info);
|
||||
if (info.size > 0) {
|
||||
printf("#define L3_SIZE %d\n", info.size * 1024);
|
||||
printf("#define L3_ASSOCIATIVE %d\n", info.associative);
|
||||
printf("#define L3_LINESIZE %d\n", info.linesize);
|
||||
}
|
||||
|
||||
|
||||
get_cacheinfo(CACHE_INFO_L1_ITB, &info);
|
||||
if (info.size > 0) {
|
||||
printf("#define ITB_SIZE %d\n", info.size * 1024);
|
||||
printf("#define ITB_ASSOCIATIVE %d\n", info.associative);
|
||||
printf("#define ITB_ENTRIES %d\n", info.linesize);
|
||||
}
|
||||
|
||||
|
||||
get_cacheinfo(CACHE_INFO_L1_DTB, &info);
|
||||
if (info.size > 0) {
|
||||
printf("#define DTB_SIZE %d\n", info.size * 1024);
|
||||
@@ -1576,7 +1657,7 @@ void get_cpuconfig(void){
|
||||
//fall back for some virtual machines.
|
||||
printf("#define DTB_DEFAULT_ENTRIES 32\n");
|
||||
}
|
||||
|
||||
|
||||
features = get_cputype(GET_FEATURE);
|
||||
|
||||
if (features & HAVE_CMOV ) printf("#define HAVE_CMOV\n");
|
||||
@@ -1599,7 +1680,7 @@ void get_cpuconfig(void){
|
||||
if (features & HAVE_MISALIGNSSE) printf("#define HAVE_MISALIGNSSE\n");
|
||||
if (features & HAVE_128BITFPU) printf("#define HAVE_128BITFPU\n");
|
||||
if (features & HAVE_FASTMOVU) printf("#define HAVE_FASTMOVU\n");
|
||||
|
||||
|
||||
printf("#define NUM_SHAREDCACHE %d\n", get_cputype(GET_NUMSHARE) + 1);
|
||||
printf("#define NUM_CORES %d\n", get_cputype(GET_NUMCORES) + 1);
|
||||
|
||||
|
||||
2
ctest.c
2
ctest.c
@@ -125,7 +125,7 @@ ARCH_IA64
|
||||
BINARY_64
|
||||
#endif
|
||||
|
||||
#if defined(__ARM_ARCH) || defined(__ARM_ARCH_7A__)
|
||||
#if defined(__ARM_ARCH) || defined(__ARM_ARCH_7A__) || defined(__arm__)
|
||||
ARCH_ARM
|
||||
#endif
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
#
|
||||
# The Makefile compiles c wrappers and testers for CBLAS.
|
||||
# The Makefile compiles c wrappers and testers for CBLAS.
|
||||
#
|
||||
|
||||
TOPDIR = ..
|
||||
@@ -27,13 +27,13 @@ ctestl2o = c_cblas2.o c_c2chke.o auxiliary.o c_xerbla.o constant.o
|
||||
|
||||
ctestl3o = c_cblas3.o c_c3chke.o auxiliary.o c_xerbla.o constant.o
|
||||
|
||||
ztestl1o = c_zblas1.o
|
||||
ztestl1o = c_zblas1.o
|
||||
|
||||
ztestl2o = c_zblas2.o c_z2chke.o auxiliary.o c_xerbla.o constant.o
|
||||
|
||||
ztestl3o = c_zblas3.o c_z3chke.o auxiliary.o c_xerbla.o constant.o
|
||||
|
||||
all :: all1 all2 all3
|
||||
all :: all1 all2 all3
|
||||
|
||||
all1: xscblat1 xdcblat1 xccblat1 xzcblat1
|
||||
ifeq ($(USE_OPENMP), 1)
|
||||
@@ -74,11 +74,23 @@ else
|
||||
OPENBLAS_NUM_THREADS=2 ./xzcblat3 < zin3
|
||||
endif
|
||||
|
||||
all3_3m: xzcblat3_3m xccblat3_3m
|
||||
ifeq ($(USE_OPENMP), 1)
|
||||
OMP_NUM_THREADS=2 ./xccblat3_3m < cin3_3m
|
||||
OMP_NUM_THREADS=2 ./xzcblat3_3m < zin3_3m
|
||||
else
|
||||
OPENBLAS_NUM_THREADS=2 ./xccblat3_3m < cin3_3m
|
||||
OPENBLAS_NUM_THREADS=2 ./xzcblat3_3m < zin3_3m
|
||||
endif
|
||||
|
||||
|
||||
|
||||
|
||||
clean ::
|
||||
rm -f x*
|
||||
rm -f x*
|
||||
|
||||
FLDFLAGS = $(FFLAGS:-fPIC=) $(LDFLAGS)
|
||||
CEXTRALIB =
|
||||
CEXTRALIB =
|
||||
|
||||
# Single real
|
||||
xscblat1: $(stestl1o) c_sblat1.o $(TOPDIR)/$(LIBNAME)
|
||||
@@ -94,7 +106,7 @@ xdcblat2: $(dtestl2o) c_dblat2.o $(TOPDIR)/$(LIBNAME)
|
||||
$(FC) $(FLDFLAGS) -o xdcblat2 c_dblat2.o $(dtestl2o) $(LIB) $(EXTRALIB) $(CEXTRALIB)
|
||||
xdcblat3: $(dtestl3o) c_dblat3.o $(TOPDIR)/$(LIBNAME)
|
||||
$(FC) $(FLDFLAGS) -o xdcblat3 c_dblat3.o $(dtestl3o) $(LIB) $(EXTRALIB) $(CEXTRALIB)
|
||||
|
||||
|
||||
# Single complex
|
||||
xccblat1: $(ctestl1o) c_cblat1.o $(TOPDIR)/$(LIBNAME)
|
||||
$(FC) $(FLDFLAGS) -o xccblat1 c_cblat1.o $(ctestl1o) $(LIB) $(EXTRALIB) $(CEXTRALIB)
|
||||
@@ -103,12 +115,20 @@ xccblat2: $(ctestl2o) c_cblat2.o $(TOPDIR)/$(LIBNAME)
|
||||
xccblat3: $(ctestl3o) c_cblat3.o $(TOPDIR)/$(LIBNAME)
|
||||
$(FC) $(FLDFLAGS) -o xccblat3 c_cblat3.o $(ctestl3o) $(LIB) $(EXTRALIB) $(CEXTRALIB)
|
||||
|
||||
# Double complex
|
||||
xccblat3_3m: $(ctestl3o) c_cblat3_3m.o $(TOPDIR)/$(LIBNAME)
|
||||
$(FC) $(FLDFLAGS) -o xccblat3_3m c_cblat3_3m.o $(ctestl3o) $(LIB) $(EXTRALIB) $(CEXTRALIB)
|
||||
|
||||
# Double complex
|
||||
xzcblat1: $(ztestl1o) c_zblat1.o $(TOPDIR)/$(LIBNAME)
|
||||
$(FC) $(FLDFLAGS) -o xzcblat1 c_zblat1.o $(ztestl1o) $(LIB) $(EXTRALIB) $(CEXTRALIB)
|
||||
xzcblat2: $(ztestl2o) c_zblat2.o $(TOPDIR)/$(LIBNAME)
|
||||
$(FC) $(FLDFLAGS) -o xzcblat2 c_zblat2.o $(ztestl2o) $(LIB) $(EXTRALIB) $(CEXTRALIB)
|
||||
xzcblat3: $(ztestl3o) c_zblat3.o $(TOPDIR)/$(LIBNAME)
|
||||
$(FC) $(FLDFLAGS) -o xzcblat3 c_zblat3.o $(ztestl3o) $(LIB) $(EXTRALIB) $(CEXTRALIB)
|
||||
|
||||
|
||||
|
||||
xzcblat3_3m: $(ztestl3o) c_zblat3_3m.o $(TOPDIR)/$(LIBNAME)
|
||||
$(FC) $(FLDFLAGS) -o xzcblat3_3m c_zblat3_3m.o $(ztestl3o) $(LIB) $(EXTRALIB) $(CEXTRALIB)
|
||||
|
||||
|
||||
include $(TOPDIR)/Makefile.tail
|
||||
|
||||
292
ctest/c_c2chke.c
292
ctest/c_c2chke.c
@@ -26,11 +26,11 @@ void chkxer(void) {
|
||||
|
||||
void F77_c2chke(char *rout) {
|
||||
char *sf = ( rout ) ;
|
||||
float A[2] = {0.0,0.0},
|
||||
X[2] = {0.0,0.0},
|
||||
Y[2] = {0.0,0.0},
|
||||
float A[2] = {0.0,0.0},
|
||||
X[2] = {0.0,0.0},
|
||||
Y[2] = {0.0,0.0},
|
||||
ALPHA[2] = {0.0,0.0},
|
||||
BETA[2] = {0.0,0.0},
|
||||
BETA[2] = {0.0,0.0},
|
||||
RALPHA = 0.0;
|
||||
extern int cblas_info, cblas_lerr, cblas_ok;
|
||||
extern int RowMajorStrg;
|
||||
@@ -48,588 +48,588 @@ void F77_c2chke(char *rout) {
|
||||
if (strncmp( sf,"cblas_cgemv",11)==0) {
|
||||
cblas_rout = "cblas_cgemv";
|
||||
cblas_info = 1;
|
||||
cblas_cgemv(INVALID, CblasNoTrans, 0, 0,
|
||||
cblas_cgemv(INVALID, CblasNoTrans, 0, 0,
|
||||
ALPHA, A, 1, X, 1, BETA, Y, 1 );
|
||||
chkxer();
|
||||
cblas_info = 2; RowMajorStrg = FALSE;
|
||||
cblas_cgemv(CblasColMajor, INVALID, 0, 0,
|
||||
cblas_cgemv(CblasColMajor, INVALID, 0, 0,
|
||||
ALPHA, A, 1, X, 1, BETA, Y, 1 );
|
||||
chkxer();
|
||||
cblas_info = 3; RowMajorStrg = FALSE;
|
||||
cblas_cgemv(CblasColMajor, CblasNoTrans, INVALID, 0,
|
||||
cblas_cgemv(CblasColMajor, CblasNoTrans, INVALID, 0,
|
||||
ALPHA, A, 1, X, 1, BETA, Y, 1 );
|
||||
chkxer();
|
||||
cblas_info = 4; RowMajorStrg = FALSE;
|
||||
cblas_cgemv(CblasColMajor, CblasNoTrans, 0, INVALID,
|
||||
cblas_cgemv(CblasColMajor, CblasNoTrans, 0, INVALID,
|
||||
ALPHA, A, 1, X, 1, BETA, Y, 1 );
|
||||
chkxer();
|
||||
cblas_info = 7; RowMajorStrg = FALSE;
|
||||
cblas_cgemv(CblasColMajor, CblasNoTrans, 2, 0,
|
||||
cblas_cgemv(CblasColMajor, CblasNoTrans, 2, 0,
|
||||
ALPHA, A, 1, X, 1, BETA, Y, 1 );
|
||||
chkxer();
|
||||
cblas_info = 9; RowMajorStrg = FALSE;
|
||||
cblas_cgemv(CblasColMajor, CblasNoTrans, 0, 0,
|
||||
cblas_cgemv(CblasColMajor, CblasNoTrans, 0, 0,
|
||||
ALPHA, A, 1, X, 0, BETA, Y, 1 );
|
||||
chkxer();
|
||||
cblas_info = 12; RowMajorStrg = FALSE;
|
||||
cblas_cgemv(CblasColMajor, CblasNoTrans, 0, 0,
|
||||
cblas_cgemv(CblasColMajor, CblasNoTrans, 0, 0,
|
||||
ALPHA, A, 1, X, 1, BETA, Y, 0 );
|
||||
chkxer();
|
||||
|
||||
cblas_info = 2; RowMajorStrg = TRUE; RowMajorStrg = TRUE;
|
||||
cblas_cgemv(CblasRowMajor, INVALID, 0, 0,
|
||||
cblas_cgemv(CblasRowMajor, INVALID, 0, 0,
|
||||
ALPHA, A, 1, X, 1, BETA, Y, 1 );
|
||||
chkxer();
|
||||
cblas_info = 3; RowMajorStrg = TRUE;
|
||||
cblas_cgemv(CblasRowMajor, CblasNoTrans, INVALID, 0,
|
||||
cblas_cgemv(CblasRowMajor, CblasNoTrans, INVALID, 0,
|
||||
ALPHA, A, 1, X, 1, BETA, Y, 1 );
|
||||
chkxer();
|
||||
cblas_info = 4; RowMajorStrg = TRUE;
|
||||
cblas_cgemv(CblasRowMajor, CblasNoTrans, 0, INVALID,
|
||||
cblas_cgemv(CblasRowMajor, CblasNoTrans, 0, INVALID,
|
||||
ALPHA, A, 1, X, 1, BETA, Y, 1 );
|
||||
chkxer();
|
||||
cblas_info = 7; RowMajorStrg = TRUE;
|
||||
cblas_cgemv(CblasRowMajor, CblasNoTrans, 0, 2,
|
||||
cblas_cgemv(CblasRowMajor, CblasNoTrans, 0, 2,
|
||||
ALPHA, A, 1, X, 1, BETA, Y, 1 );
|
||||
chkxer();
|
||||
cblas_info = 9; RowMajorStrg = TRUE;
|
||||
cblas_cgemv(CblasRowMajor, CblasNoTrans, 0, 0,
|
||||
cblas_cgemv(CblasRowMajor, CblasNoTrans, 0, 0,
|
||||
ALPHA, A, 1, X, 0, BETA, Y, 1 );
|
||||
chkxer();
|
||||
cblas_info = 12; RowMajorStrg = TRUE;
|
||||
cblas_cgemv(CblasRowMajor, CblasNoTrans, 0, 0,
|
||||
cblas_cgemv(CblasRowMajor, CblasNoTrans, 0, 0,
|
||||
ALPHA, A, 1, X, 1, BETA, Y, 0 );
|
||||
chkxer();
|
||||
} else if (strncmp( sf,"cblas_cgbmv",11)==0) {
|
||||
cblas_rout = "cblas_cgbmv";
|
||||
cblas_info = 1; RowMajorStrg = FALSE;
|
||||
cblas_cgbmv(INVALID, CblasNoTrans, 0, 0, 0, 0,
|
||||
cblas_cgbmv(INVALID, CblasNoTrans, 0, 0, 0, 0,
|
||||
ALPHA, A, 1, X, 1, BETA, Y, 1 );
|
||||
chkxer();
|
||||
cblas_info = 2; RowMajorStrg = FALSE;
|
||||
cblas_cgbmv(CblasColMajor, INVALID, 0, 0, 0, 0,
|
||||
cblas_cgbmv(CblasColMajor, INVALID, 0, 0, 0, 0,
|
||||
ALPHA, A, 1, X, 1, BETA, Y, 1 );
|
||||
chkxer();
|
||||
cblas_info = 3; RowMajorStrg = FALSE;
|
||||
cblas_cgbmv(CblasColMajor, CblasNoTrans, INVALID, 0, 0, 0,
|
||||
cblas_cgbmv(CblasColMajor, CblasNoTrans, INVALID, 0, 0, 0,
|
||||
ALPHA, A, 1, X, 1, BETA, Y, 1 );
|
||||
chkxer();
|
||||
cblas_info = 4; RowMajorStrg = FALSE;
|
||||
cblas_cgbmv(CblasColMajor, CblasNoTrans, 0, INVALID, 0, 0,
|
||||
cblas_cgbmv(CblasColMajor, CblasNoTrans, 0, INVALID, 0, 0,
|
||||
ALPHA, A, 1, X, 1, BETA, Y, 1 );
|
||||
chkxer();
|
||||
cblas_info = 5; RowMajorStrg = FALSE;
|
||||
cblas_cgbmv(CblasColMajor, CblasNoTrans, 0, 0, INVALID, 0,
|
||||
cblas_cgbmv(CblasColMajor, CblasNoTrans, 0, 0, INVALID, 0,
|
||||
ALPHA, A, 1, X, 1, BETA, Y, 1 );
|
||||
chkxer();
|
||||
cblas_info = 6; RowMajorStrg = FALSE;
|
||||
cblas_cgbmv(CblasColMajor, CblasNoTrans, 2, 0, 0, INVALID,
|
||||
cblas_cgbmv(CblasColMajor, CblasNoTrans, 2, 0, 0, INVALID,
|
||||
ALPHA, A, 1, X, 1, BETA, Y, 1 );
|
||||
chkxer();
|
||||
cblas_info = 9; RowMajorStrg = FALSE;
|
||||
cblas_cgbmv(CblasColMajor, CblasNoTrans, 0, 0, 1, 0,
|
||||
cblas_cgbmv(CblasColMajor, CblasNoTrans, 0, 0, 1, 0,
|
||||
ALPHA, A, 1, X, 1, BETA, Y, 1 );
|
||||
chkxer();
|
||||
cblas_info = 11; RowMajorStrg = FALSE;
|
||||
cblas_cgbmv(CblasColMajor, CblasNoTrans, 0, 0, 0, 0,
|
||||
cblas_cgbmv(CblasColMajor, CblasNoTrans, 0, 0, 0, 0,
|
||||
ALPHA, A, 1, X, 0, BETA, Y, 1 );
|
||||
chkxer();
|
||||
cblas_info = 14; RowMajorStrg = FALSE;
|
||||
cblas_cgbmv(CblasColMajor, CblasNoTrans, 0, 0, 0, 0,
|
||||
cblas_cgbmv(CblasColMajor, CblasNoTrans, 0, 0, 0, 0,
|
||||
ALPHA, A, 1, X, 1, BETA, Y, 0 );
|
||||
chkxer();
|
||||
cblas_info = 2; RowMajorStrg = TRUE;
|
||||
cblas_cgbmv(CblasRowMajor, INVALID, 0, 0, 0, 0,
|
||||
cblas_cgbmv(CblasRowMajor, INVALID, 0, 0, 0, 0,
|
||||
ALPHA, A, 1, X, 1, BETA, Y, 1 );
|
||||
chkxer();
|
||||
cblas_info = 3; RowMajorStrg = TRUE;
|
||||
cblas_cgbmv(CblasRowMajor, CblasNoTrans, INVALID, 0, 0, 0,
|
||||
cblas_cgbmv(CblasRowMajor, CblasNoTrans, INVALID, 0, 0, 0,
|
||||
ALPHA, A, 1, X, 1, BETA, Y, 1 );
|
||||
chkxer();
|
||||
cblas_info = 4; RowMajorStrg = TRUE;
|
||||
cblas_cgbmv(CblasRowMajor, CblasNoTrans, 0, INVALID, 0, 0,
|
||||
cblas_cgbmv(CblasRowMajor, CblasNoTrans, 0, INVALID, 0, 0,
|
||||
ALPHA, A, 1, X, 1, BETA, Y, 1 );
|
||||
chkxer();
|
||||
cblas_info = 5; RowMajorStrg = TRUE;
|
||||
cblas_cgbmv(CblasRowMajor, CblasNoTrans, 0, 0, INVALID, 0,
|
||||
cblas_cgbmv(CblasRowMajor, CblasNoTrans, 0, 0, INVALID, 0,
|
||||
ALPHA, A, 1, X, 1, BETA, Y, 1 );
|
||||
chkxer();
|
||||
cblas_info = 6; RowMajorStrg = TRUE;
|
||||
cblas_cgbmv(CblasRowMajor, CblasNoTrans, 2, 0, 0, INVALID,
|
||||
cblas_cgbmv(CblasRowMajor, CblasNoTrans, 2, 0, 0, INVALID,
|
||||
ALPHA, A, 1, X, 1, BETA, Y, 1 );
|
||||
chkxer();
|
||||
cblas_info = 9; RowMajorStrg = TRUE;
|
||||
cblas_cgbmv(CblasRowMajor, CblasNoTrans, 0, 0, 1, 0,
|
||||
cblas_cgbmv(CblasRowMajor, CblasNoTrans, 0, 0, 1, 0,
|
||||
ALPHA, A, 1, X, 1, BETA, Y, 1 );
|
||||
chkxer();
|
||||
cblas_info = 11; RowMajorStrg = TRUE;
|
||||
cblas_cgbmv(CblasRowMajor, CblasNoTrans, 0, 0, 0, 0,
|
||||
cblas_cgbmv(CblasRowMajor, CblasNoTrans, 0, 0, 0, 0,
|
||||
ALPHA, A, 1, X, 0, BETA, Y, 1 );
|
||||
chkxer();
|
||||
cblas_info = 14; RowMajorStrg = TRUE;
|
||||
cblas_cgbmv(CblasRowMajor, CblasNoTrans, 0, 0, 0, 0,
|
||||
cblas_cgbmv(CblasRowMajor, CblasNoTrans, 0, 0, 0, 0,
|
||||
ALPHA, A, 1, X, 1, BETA, Y, 0 );
|
||||
chkxer();
|
||||
} else if (strncmp( sf,"cblas_chemv",11)==0) {
|
||||
cblas_rout = "cblas_chemv";
|
||||
cblas_info = 1; RowMajorStrg = FALSE;
|
||||
cblas_chemv(INVALID, CblasUpper, 0,
|
||||
cblas_chemv(INVALID, CblasUpper, 0,
|
||||
ALPHA, A, 1, X, 1, BETA, Y, 1 );
|
||||
chkxer();
|
||||
cblas_info = 2; RowMajorStrg = FALSE;
|
||||
cblas_chemv(CblasColMajor, INVALID, 0,
|
||||
cblas_chemv(CblasColMajor, INVALID, 0,
|
||||
ALPHA, A, 1, X, 1, BETA, Y, 1 );
|
||||
chkxer();
|
||||
cblas_info = 3; RowMajorStrg = FALSE;
|
||||
cblas_chemv(CblasColMajor, CblasUpper, INVALID,
|
||||
cblas_chemv(CblasColMajor, CblasUpper, INVALID,
|
||||
ALPHA, A, 1, X, 1, BETA, Y, 1 );
|
||||
chkxer();
|
||||
cblas_info = 6; RowMajorStrg = FALSE;
|
||||
cblas_chemv(CblasColMajor, CblasUpper, 2,
|
||||
cblas_chemv(CblasColMajor, CblasUpper, 2,
|
||||
ALPHA, A, 1, X, 1, BETA, Y, 1 );
|
||||
chkxer();
|
||||
cblas_info = 8; RowMajorStrg = FALSE;
|
||||
cblas_chemv(CblasColMajor, CblasUpper, 0,
|
||||
cblas_chemv(CblasColMajor, CblasUpper, 0,
|
||||
ALPHA, A, 1, X, 0, BETA, Y, 1 );
|
||||
chkxer();
|
||||
cblas_info = 11; RowMajorStrg = FALSE;
|
||||
cblas_chemv(CblasColMajor, CblasUpper, 0,
|
||||
cblas_chemv(CblasColMajor, CblasUpper, 0,
|
||||
ALPHA, A, 1, X, 1, BETA, Y, 0 );
|
||||
chkxer();
|
||||
cblas_info = 2; RowMajorStrg = TRUE;
|
||||
cblas_chemv(CblasRowMajor, INVALID, 0,
|
||||
cblas_chemv(CblasRowMajor, INVALID, 0,
|
||||
ALPHA, A, 1, X, 1, BETA, Y, 1 );
|
||||
chkxer();
|
||||
cblas_info = 3; RowMajorStrg = TRUE;
|
||||
cblas_chemv(CblasRowMajor, CblasUpper, INVALID,
|
||||
cblas_chemv(CblasRowMajor, CblasUpper, INVALID,
|
||||
ALPHA, A, 1, X, 1, BETA, Y, 1 );
|
||||
chkxer();
|
||||
cblas_info = 6; RowMajorStrg = TRUE;
|
||||
cblas_chemv(CblasRowMajor, CblasUpper, 2,
|
||||
cblas_chemv(CblasRowMajor, CblasUpper, 2,
|
||||
ALPHA, A, 1, X, 1, BETA, Y, 1 );
|
||||
chkxer();
|
||||
cblas_info = 8; RowMajorStrg = TRUE;
|
||||
cblas_chemv(CblasRowMajor, CblasUpper, 0,
|
||||
cblas_chemv(CblasRowMajor, CblasUpper, 0,
|
||||
ALPHA, A, 1, X, 0, BETA, Y, 1 );
|
||||
chkxer();
|
||||
cblas_info = 11; RowMajorStrg = TRUE;
|
||||
cblas_chemv(CblasRowMajor, CblasUpper, 0,
|
||||
cblas_chemv(CblasRowMajor, CblasUpper, 0,
|
||||
ALPHA, A, 1, X, 1, BETA, Y, 0 );
|
||||
chkxer();
|
||||
} else if (strncmp( sf,"cblas_chbmv",11)==0) {
|
||||
cblas_rout = "cblas_chbmv";
|
||||
cblas_info = 1; RowMajorStrg = FALSE;
|
||||
cblas_chbmv(INVALID, CblasUpper, 0, 0,
|
||||
cblas_chbmv(INVALID, CblasUpper, 0, 0,
|
||||
ALPHA, A, 1, X, 1, BETA, Y, 1 );
|
||||
chkxer();
|
||||
cblas_info = 2; RowMajorStrg = FALSE;
|
||||
cblas_chbmv(CblasColMajor, INVALID, 0, 0,
|
||||
cblas_chbmv(CblasColMajor, INVALID, 0, 0,
|
||||
ALPHA, A, 1, X, 1, BETA, Y, 1 );
|
||||
chkxer();
|
||||
cblas_info = 3; RowMajorStrg = FALSE;
|
||||
cblas_chbmv(CblasColMajor, CblasUpper, INVALID, 0,
|
||||
cblas_chbmv(CblasColMajor, CblasUpper, INVALID, 0,
|
||||
ALPHA, A, 1, X, 1, BETA, Y, 1 );
|
||||
chkxer();
|
||||
cblas_info = 4; RowMajorStrg = FALSE;
|
||||
cblas_chbmv(CblasColMajor, CblasUpper, 0, INVALID,
|
||||
cblas_chbmv(CblasColMajor, CblasUpper, 0, INVALID,
|
||||
ALPHA, A, 1, X, 1, BETA, Y, 1 );
|
||||
chkxer();
|
||||
cblas_info = 7; RowMajorStrg = FALSE;
|
||||
cblas_chbmv(CblasColMajor, CblasUpper, 0, 1,
|
||||
cblas_chbmv(CblasColMajor, CblasUpper, 0, 1,
|
||||
ALPHA, A, 1, X, 1, BETA, Y, 1 );
|
||||
chkxer();
|
||||
cblas_info = 9; RowMajorStrg = FALSE;
|
||||
cblas_chbmv(CblasColMajor, CblasUpper, 0, 0,
|
||||
cblas_chbmv(CblasColMajor, CblasUpper, 0, 0,
|
||||
ALPHA, A, 1, X, 0, BETA, Y, 1 );
|
||||
chkxer();
|
||||
cblas_info = 12; RowMajorStrg = FALSE;
|
||||
cblas_chbmv(CblasColMajor, CblasUpper, 0, 0,
|
||||
cblas_chbmv(CblasColMajor, CblasUpper, 0, 0,
|
||||
ALPHA, A, 1, X, 1, BETA, Y, 0 );
|
||||
chkxer();
|
||||
cblas_info = 2; RowMajorStrg = TRUE;
|
||||
cblas_chbmv(CblasRowMajor, INVALID, 0, 0,
|
||||
cblas_chbmv(CblasRowMajor, INVALID, 0, 0,
|
||||
ALPHA, A, 1, X, 1, BETA, Y, 1 );
|
||||
chkxer();
|
||||
cblas_info = 3; RowMajorStrg = TRUE;
|
||||
cblas_chbmv(CblasRowMajor, CblasUpper, INVALID, 0,
|
||||
cblas_chbmv(CblasRowMajor, CblasUpper, INVALID, 0,
|
||||
ALPHA, A, 1, X, 1, BETA, Y, 1 );
|
||||
chkxer();
|
||||
cblas_info = 4; RowMajorStrg = TRUE;
|
||||
cblas_chbmv(CblasRowMajor, CblasUpper, 0, INVALID,
|
||||
cblas_chbmv(CblasRowMajor, CblasUpper, 0, INVALID,
|
||||
ALPHA, A, 1, X, 1, BETA, Y, 1 );
|
||||
chkxer();
|
||||
cblas_info = 7; RowMajorStrg = TRUE;
|
||||
cblas_chbmv(CblasRowMajor, CblasUpper, 0, 1,
|
||||
cblas_chbmv(CblasRowMajor, CblasUpper, 0, 1,
|
||||
ALPHA, A, 1, X, 1, BETA, Y, 1 );
|
||||
chkxer();
|
||||
cblas_info = 9; RowMajorStrg = TRUE;
|
||||
cblas_chbmv(CblasRowMajor, CblasUpper, 0, 0,
|
||||
cblas_chbmv(CblasRowMajor, CblasUpper, 0, 0,
|
||||
ALPHA, A, 1, X, 0, BETA, Y, 1 );
|
||||
chkxer();
|
||||
cblas_info = 12; RowMajorStrg = TRUE;
|
||||
cblas_chbmv(CblasRowMajor, CblasUpper, 0, 0,
|
||||
cblas_chbmv(CblasRowMajor, CblasUpper, 0, 0,
|
||||
ALPHA, A, 1, X, 1, BETA, Y, 0 );
|
||||
chkxer();
|
||||
} else if (strncmp( sf,"cblas_chpmv",11)==0) {
|
||||
cblas_rout = "cblas_chpmv";
|
||||
cblas_info = 1; RowMajorStrg = FALSE;
|
||||
cblas_chpmv(INVALID, CblasUpper, 0,
|
||||
cblas_chpmv(INVALID, CblasUpper, 0,
|
||||
ALPHA, A, X, 1, BETA, Y, 1 );
|
||||
chkxer();
|
||||
cblas_info = 2; RowMajorStrg = FALSE;
|
||||
cblas_chpmv(CblasColMajor, INVALID, 0,
|
||||
cblas_chpmv(CblasColMajor, INVALID, 0,
|
||||
ALPHA, A, X, 1, BETA, Y, 1 );
|
||||
chkxer();
|
||||
cblas_info = 3; RowMajorStrg = FALSE;
|
||||
cblas_chpmv(CblasColMajor, CblasUpper, INVALID,
|
||||
cblas_chpmv(CblasColMajor, CblasUpper, INVALID,
|
||||
ALPHA, A, X, 1, BETA, Y, 1 );
|
||||
chkxer();
|
||||
cblas_info = 7; RowMajorStrg = FALSE;
|
||||
cblas_chpmv(CblasColMajor, CblasUpper, 0,
|
||||
cblas_chpmv(CblasColMajor, CblasUpper, 0,
|
||||
ALPHA, A, X, 0, BETA, Y, 1 );
|
||||
chkxer();
|
||||
cblas_info = 10; RowMajorStrg = FALSE;
|
||||
cblas_chpmv(CblasColMajor, CblasUpper, 0,
|
||||
cblas_chpmv(CblasColMajor, CblasUpper, 0,
|
||||
ALPHA, A, X, 1, BETA, Y, 0 );
|
||||
chkxer();
|
||||
cblas_info = 2; RowMajorStrg = TRUE;
|
||||
cblas_chpmv(CblasRowMajor, INVALID, 0,
|
||||
cblas_chpmv(CblasRowMajor, INVALID, 0,
|
||||
ALPHA, A, X, 1, BETA, Y, 1 );
|
||||
chkxer();
|
||||
cblas_info = 3; RowMajorStrg = TRUE;
|
||||
cblas_chpmv(CblasRowMajor, CblasUpper, INVALID,
|
||||
cblas_chpmv(CblasRowMajor, CblasUpper, INVALID,
|
||||
ALPHA, A, X, 1, BETA, Y, 1 );
|
||||
chkxer();
|
||||
cblas_info = 7; RowMajorStrg = TRUE;
|
||||
cblas_chpmv(CblasRowMajor, CblasUpper, 0,
|
||||
cblas_chpmv(CblasRowMajor, CblasUpper, 0,
|
||||
ALPHA, A, X, 0, BETA, Y, 1 );
|
||||
chkxer();
|
||||
cblas_info = 10; RowMajorStrg = TRUE;
|
||||
cblas_chpmv(CblasRowMajor, CblasUpper, 0,
|
||||
cblas_chpmv(CblasRowMajor, CblasUpper, 0,
|
||||
ALPHA, A, X, 1, BETA, Y, 0 );
|
||||
chkxer();
|
||||
} else if (strncmp( sf,"cblas_ctrmv",11)==0) {
|
||||
cblas_rout = "cblas_ctrmv";
|
||||
cblas_info = 1; RowMajorStrg = FALSE;
|
||||
cblas_ctrmv(INVALID, CblasUpper, CblasNoTrans,
|
||||
cblas_ctrmv(INVALID, CblasUpper, CblasNoTrans,
|
||||
CblasNonUnit, 0, A, 1, X, 1 );
|
||||
chkxer();
|
||||
cblas_info = 2; RowMajorStrg = FALSE;
|
||||
cblas_ctrmv(CblasColMajor, INVALID, CblasNoTrans,
|
||||
cblas_ctrmv(CblasColMajor, INVALID, CblasNoTrans,
|
||||
CblasNonUnit, 0, A, 1, X, 1 );
|
||||
chkxer();
|
||||
cblas_info = 3; RowMajorStrg = FALSE;
|
||||
cblas_ctrmv(CblasColMajor, CblasUpper, INVALID,
|
||||
cblas_ctrmv(CblasColMajor, CblasUpper, INVALID,
|
||||
CblasNonUnit, 0, A, 1, X, 1 );
|
||||
chkxer();
|
||||
cblas_info = 4; RowMajorStrg = FALSE;
|
||||
cblas_ctrmv(CblasColMajor, CblasUpper, CblasNoTrans,
|
||||
cblas_ctrmv(CblasColMajor, CblasUpper, CblasNoTrans,
|
||||
INVALID, 0, A, 1, X, 1 );
|
||||
chkxer();
|
||||
cblas_info = 5; RowMajorStrg = FALSE;
|
||||
cblas_ctrmv(CblasColMajor, CblasUpper, CblasNoTrans,
|
||||
cblas_ctrmv(CblasColMajor, CblasUpper, CblasNoTrans,
|
||||
CblasNonUnit, INVALID, A, 1, X, 1 );
|
||||
chkxer();
|
||||
cblas_info = 7; RowMajorStrg = FALSE;
|
||||
cblas_ctrmv(CblasColMajor, CblasUpper, CblasNoTrans,
|
||||
cblas_ctrmv(CblasColMajor, CblasUpper, CblasNoTrans,
|
||||
CblasNonUnit, 2, A, 1, X, 1 );
|
||||
chkxer();
|
||||
cblas_info = 9; RowMajorStrg = FALSE;
|
||||
cblas_ctrmv(CblasColMajor, CblasUpper, CblasNoTrans,
|
||||
cblas_ctrmv(CblasColMajor, CblasUpper, CblasNoTrans,
|
||||
CblasNonUnit, 0, A, 1, X, 0 );
|
||||
chkxer();
|
||||
cblas_info = 2; RowMajorStrg = TRUE;
|
||||
cblas_ctrmv(CblasRowMajor, INVALID, CblasNoTrans,
|
||||
cblas_ctrmv(CblasRowMajor, INVALID, CblasNoTrans,
|
||||
CblasNonUnit, 0, A, 1, X, 1 );
|
||||
chkxer();
|
||||
cblas_info = 3; RowMajorStrg = TRUE;
|
||||
cblas_ctrmv(CblasRowMajor, CblasUpper, INVALID,
|
||||
cblas_ctrmv(CblasRowMajor, CblasUpper, INVALID,
|
||||
CblasNonUnit, 0, A, 1, X, 1 );
|
||||
chkxer();
|
||||
cblas_info = 4; RowMajorStrg = TRUE;
|
||||
cblas_ctrmv(CblasRowMajor, CblasUpper, CblasNoTrans,
|
||||
cblas_ctrmv(CblasRowMajor, CblasUpper, CblasNoTrans,
|
||||
INVALID, 0, A, 1, X, 1 );
|
||||
chkxer();
|
||||
cblas_info = 5; RowMajorStrg = TRUE;
|
||||
cblas_ctrmv(CblasRowMajor, CblasUpper, CblasNoTrans,
|
||||
cblas_ctrmv(CblasRowMajor, CblasUpper, CblasNoTrans,
|
||||
CblasNonUnit, INVALID, A, 1, X, 1 );
|
||||
chkxer();
|
||||
cblas_info = 7; RowMajorStrg = TRUE;
|
||||
cblas_ctrmv(CblasRowMajor, CblasUpper, CblasNoTrans,
|
||||
cblas_ctrmv(CblasRowMajor, CblasUpper, CblasNoTrans,
|
||||
CblasNonUnit, 2, A, 1, X, 1 );
|
||||
chkxer();
|
||||
cblas_info = 9; RowMajorStrg = TRUE;
|
||||
cblas_ctrmv(CblasRowMajor, CblasUpper, CblasNoTrans,
|
||||
cblas_ctrmv(CblasRowMajor, CblasUpper, CblasNoTrans,
|
||||
CblasNonUnit, 0, A, 1, X, 0 );
|
||||
chkxer();
|
||||
} else if (strncmp( sf,"cblas_ctbmv",11)==0) {
|
||||
cblas_rout = "cblas_ctbmv";
|
||||
cblas_info = 1; RowMajorStrg = FALSE;
|
||||
cblas_ctbmv(INVALID, CblasUpper, CblasNoTrans,
|
||||
cblas_ctbmv(INVALID, CblasUpper, CblasNoTrans,
|
||||
CblasNonUnit, 0, 0, A, 1, X, 1 );
|
||||
chkxer();
|
||||
cblas_info = 2; RowMajorStrg = FALSE;
|
||||
cblas_ctbmv(CblasColMajor, INVALID, CblasNoTrans,
|
||||
cblas_ctbmv(CblasColMajor, INVALID, CblasNoTrans,
|
||||
CblasNonUnit, 0, 0, A, 1, X, 1 );
|
||||
chkxer();
|
||||
cblas_info = 3; RowMajorStrg = FALSE;
|
||||
cblas_ctbmv(CblasColMajor, CblasUpper, INVALID,
|
||||
cblas_ctbmv(CblasColMajor, CblasUpper, INVALID,
|
||||
CblasNonUnit, 0, 0, A, 1, X, 1 );
|
||||
chkxer();
|
||||
cblas_info = 4; RowMajorStrg = FALSE;
|
||||
cblas_ctbmv(CblasColMajor, CblasUpper, CblasNoTrans,
|
||||
cblas_ctbmv(CblasColMajor, CblasUpper, CblasNoTrans,
|
||||
INVALID, 0, 0, A, 1, X, 1 );
|
||||
chkxer();
|
||||
cblas_info = 5; RowMajorStrg = FALSE;
|
||||
cblas_ctbmv(CblasColMajor, CblasUpper, CblasNoTrans,
|
||||
cblas_ctbmv(CblasColMajor, CblasUpper, CblasNoTrans,
|
||||
CblasNonUnit, INVALID, 0, A, 1, X, 1 );
|
||||
chkxer();
|
||||
cblas_info = 6; RowMajorStrg = FALSE;
|
||||
cblas_ctbmv(CblasColMajor, CblasUpper, CblasNoTrans,
|
||||
cblas_ctbmv(CblasColMajor, CblasUpper, CblasNoTrans,
|
||||
CblasNonUnit, 0, INVALID, A, 1, X, 1 );
|
||||
chkxer();
|
||||
cblas_info = 8; RowMajorStrg = FALSE;
|
||||
cblas_ctbmv(CblasColMajor, CblasUpper, CblasNoTrans,
|
||||
cblas_ctbmv(CblasColMajor, CblasUpper, CblasNoTrans,
|
||||
CblasNonUnit, 0, 1, A, 1, X, 1 );
|
||||
chkxer();
|
||||
cblas_info = 10; RowMajorStrg = FALSE;
|
||||
cblas_ctbmv(CblasColMajor, CblasUpper, CblasNoTrans,
|
||||
cblas_ctbmv(CblasColMajor, CblasUpper, CblasNoTrans,
|
||||
CblasNonUnit, 0, 0, A, 1, X, 0 );
|
||||
chkxer();
|
||||
cblas_info = 2; RowMajorStrg = TRUE;
|
||||
cblas_ctbmv(CblasRowMajor, INVALID, CblasNoTrans,
|
||||
cblas_ctbmv(CblasRowMajor, INVALID, CblasNoTrans,
|
||||
CblasNonUnit, 0, 0, A, 1, X, 1 );
|
||||
chkxer();
|
||||
cblas_info = 3; RowMajorStrg = TRUE;
|
||||
cblas_ctbmv(CblasRowMajor, CblasUpper, INVALID,
|
||||
cblas_ctbmv(CblasRowMajor, CblasUpper, INVALID,
|
||||
CblasNonUnit, 0, 0, A, 1, X, 1 );
|
||||
chkxer();
|
||||
cblas_info = 4; RowMajorStrg = TRUE;
|
||||
cblas_ctbmv(CblasRowMajor, CblasUpper, CblasNoTrans,
|
||||
cblas_ctbmv(CblasRowMajor, CblasUpper, CblasNoTrans,
|
||||
INVALID, 0, 0, A, 1, X, 1 );
|
||||
chkxer();
|
||||
cblas_info = 5; RowMajorStrg = TRUE;
|
||||
cblas_ctbmv(CblasRowMajor, CblasUpper, CblasNoTrans,
|
||||
cblas_ctbmv(CblasRowMajor, CblasUpper, CblasNoTrans,
|
||||
CblasNonUnit, INVALID, 0, A, 1, X, 1 );
|
||||
chkxer();
|
||||
cblas_info = 6; RowMajorStrg = TRUE;
|
||||
cblas_ctbmv(CblasRowMajor, CblasUpper, CblasNoTrans,
|
||||
cblas_ctbmv(CblasRowMajor, CblasUpper, CblasNoTrans,
|
||||
CblasNonUnit, 0, INVALID, A, 1, X, 1 );
|
||||
chkxer();
|
||||
cblas_info = 8; RowMajorStrg = TRUE;
|
||||
cblas_ctbmv(CblasRowMajor, CblasUpper, CblasNoTrans,
|
||||
cblas_ctbmv(CblasRowMajor, CblasUpper, CblasNoTrans,
|
||||
CblasNonUnit, 0, 1, A, 1, X, 1 );
|
||||
chkxer();
|
||||
cblas_info = 10; RowMajorStrg = TRUE;
|
||||
cblas_ctbmv(CblasRowMajor, CblasUpper, CblasNoTrans,
|
||||
cblas_ctbmv(CblasRowMajor, CblasUpper, CblasNoTrans,
|
||||
CblasNonUnit, 0, 0, A, 1, X, 0 );
|
||||
chkxer();
|
||||
} else if (strncmp( sf,"cblas_ctpmv",11)==0) {
|
||||
cblas_rout = "cblas_ctpmv";
|
||||
cblas_info = 1; RowMajorStrg = FALSE;
|
||||
cblas_ctpmv(INVALID, CblasUpper, CblasNoTrans,
|
||||
cblas_ctpmv(INVALID, CblasUpper, CblasNoTrans,
|
||||
CblasNonUnit, 0, A, X, 1 );
|
||||
chkxer();
|
||||
cblas_info = 2; RowMajorStrg = FALSE;
|
||||
cblas_ctpmv(CblasColMajor, INVALID, CblasNoTrans,
|
||||
cblas_ctpmv(CblasColMajor, INVALID, CblasNoTrans,
|
||||
CblasNonUnit, 0, A, X, 1 );
|
||||
chkxer();
|
||||
cblas_info = 3; RowMajorStrg = FALSE;
|
||||
cblas_ctpmv(CblasColMajor, CblasUpper, INVALID,
|
||||
cblas_ctpmv(CblasColMajor, CblasUpper, INVALID,
|
||||
CblasNonUnit, 0, A, X, 1 );
|
||||
chkxer();
|
||||
cblas_info = 4; RowMajorStrg = FALSE;
|
||||
cblas_ctpmv(CblasColMajor, CblasUpper, CblasNoTrans,
|
||||
cblas_ctpmv(CblasColMajor, CblasUpper, CblasNoTrans,
|
||||
INVALID, 0, A, X, 1 );
|
||||
chkxer();
|
||||
cblas_info = 5; RowMajorStrg = FALSE;
|
||||
cblas_ctpmv(CblasColMajor, CblasUpper, CblasNoTrans,
|
||||
cblas_ctpmv(CblasColMajor, CblasUpper, CblasNoTrans,
|
||||
CblasNonUnit, INVALID, A, X, 1 );
|
||||
chkxer();
|
||||
cblas_info = 8; RowMajorStrg = FALSE;
|
||||
cblas_ctpmv(CblasColMajor, CblasUpper, CblasNoTrans,
|
||||
cblas_ctpmv(CblasColMajor, CblasUpper, CblasNoTrans,
|
||||
CblasNonUnit, 0, A, X, 0 );
|
||||
chkxer();
|
||||
cblas_info = 2; RowMajorStrg = TRUE;
|
||||
cblas_ctpmv(CblasRowMajor, INVALID, CblasNoTrans,
|
||||
cblas_ctpmv(CblasRowMajor, INVALID, CblasNoTrans,
|
||||
CblasNonUnit, 0, A, X, 1 );
|
||||
chkxer();
|
||||
cblas_info = 3; RowMajorStrg = TRUE;
|
||||
cblas_ctpmv(CblasRowMajor, CblasUpper, INVALID,
|
||||
cblas_ctpmv(CblasRowMajor, CblasUpper, INVALID,
|
||||
CblasNonUnit, 0, A, X, 1 );
|
||||
chkxer();
|
||||
cblas_info = 4; RowMajorStrg = TRUE;
|
||||
cblas_ctpmv(CblasRowMajor, CblasUpper, CblasNoTrans,
|
||||
cblas_ctpmv(CblasRowMajor, CblasUpper, CblasNoTrans,
|
||||
INVALID, 0, A, X, 1 );
|
||||
chkxer();
|
||||
cblas_info = 5; RowMajorStrg = TRUE;
|
||||
cblas_ctpmv(CblasRowMajor, CblasUpper, CblasNoTrans,
|
||||
cblas_ctpmv(CblasRowMajor, CblasUpper, CblasNoTrans,
|
||||
CblasNonUnit, INVALID, A, X, 1 );
|
||||
chkxer();
|
||||
cblas_info = 8; RowMajorStrg = TRUE;
|
||||
cblas_ctpmv(CblasRowMajor, CblasUpper, CblasNoTrans,
|
||||
cblas_ctpmv(CblasRowMajor, CblasUpper, CblasNoTrans,
|
||||
CblasNonUnit, 0, A, X, 0 );
|
||||
chkxer();
|
||||
} else if (strncmp( sf,"cblas_ctrsv",11)==0) {
|
||||
cblas_rout = "cblas_ctrsv";
|
||||
cblas_info = 1; RowMajorStrg = FALSE;
|
||||
cblas_ctrsv(INVALID, CblasUpper, CblasNoTrans,
|
||||
cblas_ctrsv(INVALID, CblasUpper, CblasNoTrans,
|
||||
CblasNonUnit, 0, A, 1, X, 1 );
|
||||
chkxer();
|
||||
cblas_info = 2; RowMajorStrg = FALSE;
|
||||
cblas_ctrsv(CblasColMajor, INVALID, CblasNoTrans,
|
||||
cblas_ctrsv(CblasColMajor, INVALID, CblasNoTrans,
|
||||
CblasNonUnit, 0, A, 1, X, 1 );
|
||||
chkxer();
|
||||
cblas_info = 3; RowMajorStrg = FALSE;
|
||||
cblas_ctrsv(CblasColMajor, CblasUpper, INVALID,
|
||||
cblas_ctrsv(CblasColMajor, CblasUpper, INVALID,
|
||||
CblasNonUnit, 0, A, 1, X, 1 );
|
||||
chkxer();
|
||||
cblas_info = 4; RowMajorStrg = FALSE;
|
||||
cblas_ctrsv(CblasColMajor, CblasUpper, CblasNoTrans,
|
||||
cblas_ctrsv(CblasColMajor, CblasUpper, CblasNoTrans,
|
||||
INVALID, 0, A, 1, X, 1 );
|
||||
chkxer();
|
||||
cblas_info = 5; RowMajorStrg = FALSE;
|
||||
cblas_ctrsv(CblasColMajor, CblasUpper, CblasNoTrans,
|
||||
cblas_ctrsv(CblasColMajor, CblasUpper, CblasNoTrans,
|
||||
CblasNonUnit, INVALID, A, 1, X, 1 );
|
||||
chkxer();
|
||||
cblas_info = 7; RowMajorStrg = FALSE;
|
||||
cblas_ctrsv(CblasColMajor, CblasUpper, CblasNoTrans,
|
||||
cblas_ctrsv(CblasColMajor, CblasUpper, CblasNoTrans,
|
||||
CblasNonUnit, 2, A, 1, X, 1 );
|
||||
chkxer();
|
||||
cblas_info = 9; RowMajorStrg = FALSE;
|
||||
cblas_ctrsv(CblasColMajor, CblasUpper, CblasNoTrans,
|
||||
cblas_ctrsv(CblasColMajor, CblasUpper, CblasNoTrans,
|
||||
CblasNonUnit, 0, A, 1, X, 0 );
|
||||
chkxer();
|
||||
cblas_info = 2; RowMajorStrg = TRUE;
|
||||
cblas_ctrsv(CblasRowMajor, INVALID, CblasNoTrans,
|
||||
cblas_ctrsv(CblasRowMajor, INVALID, CblasNoTrans,
|
||||
CblasNonUnit, 0, A, 1, X, 1 );
|
||||
chkxer();
|
||||
cblas_info = 3; RowMajorStrg = TRUE;
|
||||
cblas_ctrsv(CblasRowMajor, CblasUpper, INVALID,
|
||||
cblas_ctrsv(CblasRowMajor, CblasUpper, INVALID,
|
||||
CblasNonUnit, 0, A, 1, X, 1 );
|
||||
chkxer();
|
||||
cblas_info = 4; RowMajorStrg = TRUE;
|
||||
cblas_ctrsv(CblasRowMajor, CblasUpper, CblasNoTrans,
|
||||
cblas_ctrsv(CblasRowMajor, CblasUpper, CblasNoTrans,
|
||||
INVALID, 0, A, 1, X, 1 );
|
||||
chkxer();
|
||||
cblas_info = 5; RowMajorStrg = TRUE;
|
||||
cblas_ctrsv(CblasRowMajor, CblasUpper, CblasNoTrans,
|
||||
cblas_ctrsv(CblasRowMajor, CblasUpper, CblasNoTrans,
|
||||
CblasNonUnit, INVALID, A, 1, X, 1 );
|
||||
chkxer();
|
||||
cblas_info = 7; RowMajorStrg = TRUE;
|
||||
cblas_ctrsv(CblasRowMajor, CblasUpper, CblasNoTrans,
|
||||
cblas_ctrsv(CblasRowMajor, CblasUpper, CblasNoTrans,
|
||||
CblasNonUnit, 2, A, 1, X, 1 );
|
||||
chkxer();
|
||||
cblas_info = 9; RowMajorStrg = TRUE;
|
||||
cblas_ctrsv(CblasRowMajor, CblasUpper, CblasNoTrans,
|
||||
cblas_ctrsv(CblasRowMajor, CblasUpper, CblasNoTrans,
|
||||
CblasNonUnit, 0, A, 1, X, 0 );
|
||||
chkxer();
|
||||
} else if (strncmp( sf,"cblas_ctbsv",11)==0) {
|
||||
cblas_rout = "cblas_ctbsv";
|
||||
cblas_info = 1; RowMajorStrg = FALSE;
|
||||
cblas_ctbsv(INVALID, CblasUpper, CblasNoTrans,
|
||||
cblas_ctbsv(INVALID, CblasUpper, CblasNoTrans,
|
||||
CblasNonUnit, 0, 0, A, 1, X, 1 );
|
||||
chkxer();
|
||||
cblas_info = 2; RowMajorStrg = FALSE;
|
||||
cblas_ctbsv(CblasColMajor, INVALID, CblasNoTrans,
|
||||
cblas_ctbsv(CblasColMajor, INVALID, CblasNoTrans,
|
||||
CblasNonUnit, 0, 0, A, 1, X, 1 );
|
||||
chkxer();
|
||||
cblas_info = 3; RowMajorStrg = FALSE;
|
||||
cblas_ctbsv(CblasColMajor, CblasUpper, INVALID,
|
||||
cblas_ctbsv(CblasColMajor, CblasUpper, INVALID,
|
||||
CblasNonUnit, 0, 0, A, 1, X, 1 );
|
||||
chkxer();
|
||||
cblas_info = 4; RowMajorStrg = FALSE;
|
||||
cblas_ctbsv(CblasColMajor, CblasUpper, CblasNoTrans,
|
||||
cblas_ctbsv(CblasColMajor, CblasUpper, CblasNoTrans,
|
||||
INVALID, 0, 0, A, 1, X, 1 );
|
||||
chkxer();
|
||||
cblas_info = 5; RowMajorStrg = FALSE;
|
||||
cblas_ctbsv(CblasColMajor, CblasUpper, CblasNoTrans,
|
||||
cblas_ctbsv(CblasColMajor, CblasUpper, CblasNoTrans,
|
||||
CblasNonUnit, INVALID, 0, A, 1, X, 1 );
|
||||
chkxer();
|
||||
cblas_info = 6; RowMajorStrg = FALSE;
|
||||
cblas_ctbsv(CblasColMajor, CblasUpper, CblasNoTrans,
|
||||
cblas_ctbsv(CblasColMajor, CblasUpper, CblasNoTrans,
|
||||
CblasNonUnit, 0, INVALID, A, 1, X, 1 );
|
||||
chkxer();
|
||||
cblas_info = 8; RowMajorStrg = FALSE;
|
||||
cblas_ctbsv(CblasColMajor, CblasUpper, CblasNoTrans,
|
||||
cblas_ctbsv(CblasColMajor, CblasUpper, CblasNoTrans,
|
||||
CblasNonUnit, 0, 1, A, 1, X, 1 );
|
||||
chkxer();
|
||||
cblas_info = 10; RowMajorStrg = FALSE;
|
||||
cblas_ctbsv(CblasColMajor, CblasUpper, CblasNoTrans,
|
||||
cblas_ctbsv(CblasColMajor, CblasUpper, CblasNoTrans,
|
||||
CblasNonUnit, 0, 0, A, 1, X, 0 );
|
||||
chkxer();
|
||||
cblas_info = 2; RowMajorStrg = TRUE;
|
||||
cblas_ctbsv(CblasRowMajor, INVALID, CblasNoTrans,
|
||||
cblas_ctbsv(CblasRowMajor, INVALID, CblasNoTrans,
|
||||
CblasNonUnit, 0, 0, A, 1, X, 1 );
|
||||
chkxer();
|
||||
cblas_info = 3; RowMajorStrg = TRUE;
|
||||
cblas_ctbsv(CblasRowMajor, CblasUpper, INVALID,
|
||||
cblas_ctbsv(CblasRowMajor, CblasUpper, INVALID,
|
||||
CblasNonUnit, 0, 0, A, 1, X, 1 );
|
||||
chkxer();
|
||||
cblas_info = 4; RowMajorStrg = TRUE;
|
||||
cblas_ctbsv(CblasRowMajor, CblasUpper, CblasNoTrans,
|
||||
cblas_ctbsv(CblasRowMajor, CblasUpper, CblasNoTrans,
|
||||
INVALID, 0, 0, A, 1, X, 1 );
|
||||
chkxer();
|
||||
cblas_info = 5; RowMajorStrg = TRUE;
|
||||
cblas_ctbsv(CblasRowMajor, CblasUpper, CblasNoTrans,
|
||||
cblas_ctbsv(CblasRowMajor, CblasUpper, CblasNoTrans,
|
||||
CblasNonUnit, INVALID, 0, A, 1, X, 1 );
|
||||
chkxer();
|
||||
cblas_info = 6; RowMajorStrg = TRUE;
|
||||
cblas_ctbsv(CblasRowMajor, CblasUpper, CblasNoTrans,
|
||||
cblas_ctbsv(CblasRowMajor, CblasUpper, CblasNoTrans,
|
||||
CblasNonUnit, 0, INVALID, A, 1, X, 1 );
|
||||
chkxer();
|
||||
cblas_info = 8; RowMajorStrg = TRUE;
|
||||
cblas_ctbsv(CblasRowMajor, CblasUpper, CblasNoTrans,
|
||||
cblas_ctbsv(CblasRowMajor, CblasUpper, CblasNoTrans,
|
||||
CblasNonUnit, 0, 1, A, 1, X, 1 );
|
||||
chkxer();
|
||||
cblas_info = 10; RowMajorStrg = TRUE;
|
||||
cblas_ctbsv(CblasRowMajor, CblasUpper, CblasNoTrans,
|
||||
cblas_ctbsv(CblasRowMajor, CblasUpper, CblasNoTrans,
|
||||
CblasNonUnit, 0, 0, A, 1, X, 0 );
|
||||
chkxer();
|
||||
} else if (strncmp( sf,"cblas_ctpsv",11)==0) {
|
||||
cblas_rout = "cblas_ctpsv";
|
||||
cblas_info = 1; RowMajorStrg = FALSE;
|
||||
cblas_ctpsv(INVALID, CblasUpper, CblasNoTrans,
|
||||
cblas_ctpsv(INVALID, CblasUpper, CblasNoTrans,
|
||||
CblasNonUnit, 0, A, X, 1 );
|
||||
chkxer();
|
||||
cblas_info = 2; RowMajorStrg = FALSE;
|
||||
cblas_ctpsv(CblasColMajor, INVALID, CblasNoTrans,
|
||||
cblas_ctpsv(CblasColMajor, INVALID, CblasNoTrans,
|
||||
CblasNonUnit, 0, A, X, 1 );
|
||||
chkxer();
|
||||
cblas_info = 3; RowMajorStrg = FALSE;
|
||||
cblas_ctpsv(CblasColMajor, CblasUpper, INVALID,
|
||||
cblas_ctpsv(CblasColMajor, CblasUpper, INVALID,
|
||||
CblasNonUnit, 0, A, X, 1 );
|
||||
chkxer();
|
||||
cblas_info = 4; RowMajorStrg = FALSE;
|
||||
cblas_ctpsv(CblasColMajor, CblasUpper, CblasNoTrans,
|
||||
cblas_ctpsv(CblasColMajor, CblasUpper, CblasNoTrans,
|
||||
INVALID, 0, A, X, 1 );
|
||||
chkxer();
|
||||
cblas_info = 5; RowMajorStrg = FALSE;
|
||||
cblas_ctpsv(CblasColMajor, CblasUpper, CblasNoTrans,
|
||||
cblas_ctpsv(CblasColMajor, CblasUpper, CblasNoTrans,
|
||||
CblasNonUnit, INVALID, A, X, 1 );
|
||||
chkxer();
|
||||
cblas_info = 8; RowMajorStrg = FALSE;
|
||||
cblas_ctpsv(CblasColMajor, CblasUpper, CblasNoTrans,
|
||||
cblas_ctpsv(CblasColMajor, CblasUpper, CblasNoTrans,
|
||||
CblasNonUnit, 0, A, X, 0 );
|
||||
chkxer();
|
||||
cblas_info = 2; RowMajorStrg = TRUE;
|
||||
cblas_ctpsv(CblasRowMajor, INVALID, CblasNoTrans,
|
||||
cblas_ctpsv(CblasRowMajor, INVALID, CblasNoTrans,
|
||||
CblasNonUnit, 0, A, X, 1 );
|
||||
chkxer();
|
||||
cblas_info = 3; RowMajorStrg = TRUE;
|
||||
cblas_ctpsv(CblasRowMajor, CblasUpper, INVALID,
|
||||
cblas_ctpsv(CblasRowMajor, CblasUpper, INVALID,
|
||||
CblasNonUnit, 0, A, X, 1 );
|
||||
chkxer();
|
||||
cblas_info = 4; RowMajorStrg = TRUE;
|
||||
cblas_ctpsv(CblasRowMajor, CblasUpper, CblasNoTrans,
|
||||
cblas_ctpsv(CblasRowMajor, CblasUpper, CblasNoTrans,
|
||||
INVALID, 0, A, X, 1 );
|
||||
chkxer();
|
||||
cblas_info = 5; RowMajorStrg = TRUE;
|
||||
cblas_ctpsv(CblasRowMajor, CblasUpper, CblasNoTrans,
|
||||
cblas_ctpsv(CblasRowMajor, CblasUpper, CblasNoTrans,
|
||||
CblasNonUnit, INVALID, A, X, 1 );
|
||||
chkxer();
|
||||
cblas_info = 8; RowMajorStrg = TRUE;
|
||||
cblas_ctpsv(CblasRowMajor, CblasUpper, CblasNoTrans,
|
||||
cblas_ctpsv(CblasRowMajor, CblasUpper, CblasNoTrans,
|
||||
CblasNonUnit, 0, A, X, 0 );
|
||||
chkxer();
|
||||
} else if (strncmp( sf,"cblas_cgeru",10)==0) {
|
||||
@@ -818,7 +818,7 @@ void F77_c2chke(char *rout) {
|
||||
cblas_info = 6; RowMajorStrg = FALSE;
|
||||
cblas_chpr(CblasColMajor, CblasUpper, 0, RALPHA, X, 0, A );
|
||||
chkxer();
|
||||
}
|
||||
}
|
||||
if (cblas_ok == TRUE)
|
||||
printf(" %-12s PASSED THE TESTS OF ERROR-EXITS\n", cblas_rout);
|
||||
else
|
||||
|
||||
246
ctest/c_c3chke.c
246
ctest/c_c3chke.c
@@ -30,7 +30,7 @@ void F77_c3chke(char * rout) {
|
||||
B[4] = {0.0,0.0,0.0,0.0},
|
||||
C[4] = {0.0,0.0,0.0,0.0},
|
||||
ALPHA[2] = {0.0,0.0},
|
||||
BETA[2] = {0.0,0.0},
|
||||
BETA[2] = {0.0,0.0},
|
||||
RALPHA = 0.0, RBETA = 0.0;
|
||||
extern int cblas_info, cblas_lerr, cblas_ok;
|
||||
extern int RowMajorStrg;
|
||||
@@ -45,19 +45,249 @@ void F77_c3chke(char * rout) {
|
||||
F77_xerbla(cblas_rout,&cblas_info);
|
||||
}
|
||||
|
||||
if (strncmp( sf,"cblas_cgemm" ,11)==0) {
|
||||
cblas_rout = "cblas_cgemm" ;
|
||||
|
||||
if (strncmp( sf,"cblas_cgemm3m" ,13)==0) {
|
||||
cblas_rout = "cblas_cgemm3" ;
|
||||
|
||||
cblas_info = 1;
|
||||
cblas_cgemm( INVALID, CblasNoTrans, CblasNoTrans, 0, 0, 0,
|
||||
cblas_cgemm3m( INVALID, CblasNoTrans, CblasNoTrans, 0, 0, 0,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 1;
|
||||
cblas_cgemm( INVALID, CblasNoTrans, CblasTrans, 0, 0, 0,
|
||||
cblas_cgemm3m( INVALID, CblasNoTrans, CblasTrans, 0, 0, 0,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 1;
|
||||
cblas_cgemm( INVALID, CblasTrans, CblasNoTrans, 0, 0, 0,
|
||||
cblas_cgemm3m( INVALID, CblasTrans, CblasNoTrans, 0, 0, 0,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 1;
|
||||
cblas_cgemm3m( INVALID, CblasTrans, CblasTrans, 0, 0, 0,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 2; RowMajorStrg = FALSE;
|
||||
cblas_cgemm3m( CblasColMajor, INVALID, CblasNoTrans, 0, 0, 0,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 2; RowMajorStrg = FALSE;
|
||||
cblas_cgemm3m( CblasColMajor, INVALID, CblasTrans, 0, 0, 0,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 3; RowMajorStrg = FALSE;
|
||||
cblas_cgemm3m( CblasColMajor, CblasNoTrans, INVALID, 0, 0, 0,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 3; RowMajorStrg = FALSE;
|
||||
cblas_cgemm3m( CblasColMajor, CblasTrans, INVALID, 0, 0, 0,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 4; RowMajorStrg = FALSE;
|
||||
cblas_cgemm3m( CblasColMajor, CblasNoTrans, CblasNoTrans, INVALID, 0, 0,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 4; RowMajorStrg = FALSE;
|
||||
cblas_cgemm3m( CblasColMajor, CblasNoTrans, CblasTrans, INVALID, 0, 0,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 4; RowMajorStrg = FALSE;
|
||||
cblas_cgemm3m( CblasColMajor, CblasTrans, CblasNoTrans, INVALID, 0, 0,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 4; RowMajorStrg = FALSE;
|
||||
cblas_cgemm3m( CblasColMajor, CblasTrans, CblasTrans, INVALID, 0, 0,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 5; RowMajorStrg = FALSE;
|
||||
cblas_cgemm3m( CblasColMajor, CblasNoTrans, CblasNoTrans, 0, INVALID, 0,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 5; RowMajorStrg = FALSE;
|
||||
cblas_cgemm3m( CblasColMajor, CblasNoTrans, CblasTrans, 0, INVALID, 0,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 5; RowMajorStrg = FALSE;
|
||||
cblas_cgemm3m( CblasColMajor, CblasTrans, CblasNoTrans, 0, INVALID, 0,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 5; RowMajorStrg = FALSE;
|
||||
cblas_cgemm3m( CblasColMajor, CblasTrans, CblasTrans, 0, INVALID, 0,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 6; RowMajorStrg = FALSE;
|
||||
cblas_cgemm3m( CblasColMajor, CblasNoTrans, CblasNoTrans, 0, 0, INVALID,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 6; RowMajorStrg = FALSE;
|
||||
cblas_cgemm3m( CblasColMajor, CblasNoTrans, CblasTrans, 0, 0, INVALID,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 6; RowMajorStrg = FALSE;
|
||||
cblas_cgemm3m( CblasColMajor, CblasTrans, CblasNoTrans, 0, 0, INVALID,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 6; RowMajorStrg = FALSE;
|
||||
cblas_cgemm3m( CblasColMajor, CblasTrans, CblasTrans, 0, 0, INVALID,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 9; RowMajorStrg = FALSE;
|
||||
cblas_cgemm3m( CblasColMajor, CblasNoTrans, CblasNoTrans, 2, 0, 0,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 2 );
|
||||
chkxer();
|
||||
cblas_info = 9; RowMajorStrg = FALSE;
|
||||
cblas_cgemm3m( CblasColMajor, CblasNoTrans, CblasTrans, 2, 0, 0,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 2 );
|
||||
chkxer();
|
||||
cblas_info = 9; RowMajorStrg = FALSE;
|
||||
cblas_cgemm3m( CblasColMajor, CblasTrans, CblasNoTrans, 0, 0, 2,
|
||||
ALPHA, A, 1, B, 2, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 9; RowMajorStrg = FALSE;
|
||||
cblas_cgemm3m( CblasColMajor, CblasTrans, CblasTrans, 0, 0, 2,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 11; RowMajorStrg = FALSE;
|
||||
cblas_cgemm3m( CblasColMajor, CblasNoTrans, CblasNoTrans, 0, 0, 2,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 11; RowMajorStrg = FALSE;
|
||||
cblas_cgemm3m( CblasColMajor, CblasTrans, CblasNoTrans, 0, 0, 2,
|
||||
ALPHA, A, 2, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 11; RowMajorStrg = FALSE;
|
||||
cblas_cgemm3m( CblasColMajor, CblasNoTrans, CblasTrans, 0, 2, 0,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 11; RowMajorStrg = FALSE;
|
||||
cblas_cgemm3m( CblasColMajor, CblasTrans, CblasTrans, 0, 2, 0,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 14; RowMajorStrg = FALSE;
|
||||
cblas_cgemm3m( CblasColMajor, CblasNoTrans, CblasNoTrans, 2, 0, 0,
|
||||
ALPHA, A, 2, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 14; RowMajorStrg = FALSE;
|
||||
cblas_cgemm3m( CblasColMajor, CblasNoTrans, CblasTrans, 2, 0, 0,
|
||||
ALPHA, A, 2, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 14; RowMajorStrg = FALSE;
|
||||
cblas_cgemm3m( CblasColMajor, CblasTrans, CblasNoTrans, 2, 0, 0,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 14; RowMajorStrg = FALSE;
|
||||
cblas_cgemm3m( CblasColMajor, CblasTrans, CblasTrans, 2, 0, 0,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 4; RowMajorStrg = TRUE;
|
||||
cblas_cgemm3m( CblasRowMajor, CblasNoTrans, CblasNoTrans, INVALID, 0, 0,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 4; RowMajorStrg = TRUE;
|
||||
cblas_cgemm3m( CblasRowMajor, CblasNoTrans, CblasTrans, INVALID, 0, 0,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 4; RowMajorStrg = TRUE;
|
||||
cblas_cgemm3m( CblasRowMajor, CblasTrans, CblasNoTrans, INVALID, 0, 0,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 4; RowMajorStrg = TRUE;
|
||||
cblas_cgemm3m( CblasRowMajor, CblasTrans, CblasTrans, INVALID, 0, 0,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 5; RowMajorStrg = TRUE;
|
||||
cblas_cgemm3m( CblasRowMajor, CblasNoTrans, CblasNoTrans, 0, INVALID, 0,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 5; RowMajorStrg = TRUE;
|
||||
cblas_cgemm3m( CblasRowMajor, CblasNoTrans, CblasTrans, 0, INVALID, 0,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 5; RowMajorStrg = TRUE;
|
||||
cblas_cgemm3m( CblasRowMajor, CblasTrans, CblasNoTrans, 0, INVALID, 0,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 5; RowMajorStrg = TRUE;
|
||||
cblas_cgemm3m( CblasRowMajor, CblasTrans, CblasTrans, 0, INVALID, 0,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 6; RowMajorStrg = TRUE;
|
||||
cblas_cgemm3m( CblasRowMajor, CblasNoTrans, CblasNoTrans, 0, 0, INVALID,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 6; RowMajorStrg = TRUE;
|
||||
cblas_cgemm3m( CblasRowMajor, CblasNoTrans, CblasTrans, 0, 0, INVALID,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 6; RowMajorStrg = TRUE;
|
||||
cblas_cgemm3m( CblasRowMajor, CblasTrans, CblasNoTrans, 0, 0, INVALID,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 6; RowMajorStrg = TRUE;
|
||||
cblas_cgemm3m( CblasRowMajor, CblasTrans, CblasTrans, 0, 0, INVALID,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 9; RowMajorStrg = TRUE;
|
||||
cblas_cgemm3m( CblasRowMajor, CblasNoTrans, CblasNoTrans, 0, 0, 2,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 2 );
|
||||
chkxer();
|
||||
cblas_info = 9; RowMajorStrg = TRUE;
|
||||
cblas_cgemm3m( CblasRowMajor, CblasNoTrans, CblasTrans, 0, 0, 2,
|
||||
ALPHA, A, 1, B, 2, BETA, C, 2 );
|
||||
chkxer();
|
||||
cblas_info = 9; RowMajorStrg = TRUE;
|
||||
cblas_cgemm3m( CblasRowMajor, CblasTrans, CblasNoTrans, 2, 0, 0,
|
||||
ALPHA, A, 1, B, 2, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 9; RowMajorStrg = TRUE;
|
||||
cblas_cgemm3m( CblasRowMajor, CblasTrans, CblasTrans, 2, 0, 0,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 11; RowMajorStrg = TRUE;
|
||||
cblas_cgemm3m( CblasRowMajor, CblasNoTrans, CblasNoTrans, 0, 2, 0,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 11; RowMajorStrg = TRUE;
|
||||
cblas_cgemm3m( CblasRowMajor, CblasTrans, CblasNoTrans, 0, 2, 0,
|
||||
ALPHA, A, 2, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 11; RowMajorStrg = TRUE;
|
||||
cblas_cgemm3m( CblasRowMajor, CblasNoTrans, CblasTrans, 0, 0, 2,
|
||||
ALPHA, A, 2, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 11; RowMajorStrg = TRUE;
|
||||
cblas_cgemm3m( CblasRowMajor, CblasTrans, CblasTrans, 0, 0, 2,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 14; RowMajorStrg = TRUE;
|
||||
cblas_cgemm3m( CblasRowMajor, CblasNoTrans, CblasNoTrans, 0, 2, 0,
|
||||
ALPHA, A, 1, B, 2, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 14; RowMajorStrg = TRUE;
|
||||
cblas_cgemm3m( CblasRowMajor, CblasNoTrans, CblasTrans, 0, 2, 0,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 14; RowMajorStrg = TRUE;
|
||||
cblas_cgemm3m( CblasRowMajor, CblasTrans, CblasNoTrans, 0, 2, 0,
|
||||
ALPHA, A, 1, B, 2, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 14; RowMajorStrg = TRUE;
|
||||
cblas_cgemm3m( CblasRowMajor, CblasTrans, CblasTrans, 0, 2, 0,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
|
||||
} else if (strncmp( sf,"cblas_cgemm" ,11)==0) {
|
||||
cblas_rout = "cblas_cgemm" ;
|
||||
|
||||
|
||||
cblas_info = 1;
|
||||
cblas_cgemm( INVALID, CblasNoTrans, CblasNoTrans, 0, 0, 0,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 1;
|
||||
cblas_cgemm( INVALID, CblasNoTrans, CblasTrans, 0, 0, 0,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 1;
|
||||
cblas_cgemm( INVALID, CblasTrans, CblasNoTrans, 0, 0, 0,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 1;
|
||||
@@ -272,7 +502,7 @@ void F77_c3chke(char * rout) {
|
||||
cblas_cgemm( CblasRowMajor, CblasTrans, CblasTrans, 0, 2, 0,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
|
||||
|
||||
} else if (strncmp( sf,"cblas_chemm" ,11)==0) {
|
||||
cblas_rout = "cblas_chemm" ;
|
||||
|
||||
@@ -1696,7 +1926,7 @@ void F77_c3chke(char * rout) {
|
||||
cblas_csyr2k(CblasColMajor, CblasLower, CblasTrans, 2, 0,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
|
||||
|
||||
}
|
||||
|
||||
if (cblas_ok == 1 )
|
||||
|
||||
@@ -16,21 +16,21 @@ void F77_caxpy(const int *N, const void *alpha, void *X,
|
||||
return;
|
||||
}
|
||||
|
||||
void F77_ccopy(const int *N, void *X, const int *incX,
|
||||
void F77_ccopy(const int *N, void *X, const int *incX,
|
||||
void *Y, const int *incY)
|
||||
{
|
||||
cblas_ccopy(*N, X, *incX, Y, *incY);
|
||||
return;
|
||||
}
|
||||
|
||||
void F77_cdotc(const int *N, void *X, const int *incX,
|
||||
void F77_cdotc(const int *N, void *X, const int *incX,
|
||||
void *Y, const int *incY, void *dotc)
|
||||
{
|
||||
cblas_cdotc_sub(*N, X, *incX, Y, *incY, dotc);
|
||||
return;
|
||||
}
|
||||
|
||||
void F77_cdotu(const int *N, void *X, const int *incX,
|
||||
void F77_cdotu(const int *N, void *X, const int *incX,
|
||||
void *Y, const int *incY,void *dotu)
|
||||
{
|
||||
cblas_cdotu_sub(*N, X, *incX, Y, *incY, dotu);
|
||||
|
||||
@@ -8,9 +8,9 @@
|
||||
#include "common.h"
|
||||
#include "cblas_test.h"
|
||||
|
||||
void F77_cgemv(int *order, char *transp, int *m, int *n,
|
||||
void F77_cgemv(int *order, char *transp, int *m, int *n,
|
||||
const void *alpha,
|
||||
CBLAS_TEST_COMPLEX *a, int *lda, const void *x, int *incx,
|
||||
CBLAS_TEST_COMPLEX *a, int *lda, const void *x, int *incx,
|
||||
const void *beta, void *y, int *incy) {
|
||||
|
||||
CBLAS_TEST_COMPLEX *A;
|
||||
@@ -38,9 +38,9 @@ void F77_cgemv(int *order, char *transp, int *m, int *n,
|
||||
*m, *n, alpha, a, *lda, x, *incx, beta, y, *incy );
|
||||
}
|
||||
|
||||
void F77_cgbmv(int *order, char *transp, int *m, int *n, int *kl, int *ku,
|
||||
CBLAS_TEST_COMPLEX *alpha, CBLAS_TEST_COMPLEX *a, int *lda,
|
||||
CBLAS_TEST_COMPLEX *x, int *incx,
|
||||
void F77_cgbmv(int *order, char *transp, int *m, int *n, int *kl, int *ku,
|
||||
CBLAS_TEST_COMPLEX *alpha, CBLAS_TEST_COMPLEX *a, int *lda,
|
||||
CBLAS_TEST_COMPLEX *x, int *incx,
|
||||
CBLAS_TEST_COMPLEX *beta, CBLAS_TEST_COMPLEX *y, int *incy) {
|
||||
|
||||
CBLAS_TEST_COMPLEX *A;
|
||||
@@ -85,8 +85,8 @@ void F77_cgbmv(int *order, char *transp, int *m, int *n, int *kl, int *ku,
|
||||
*incx, beta, y, *incy );
|
||||
}
|
||||
|
||||
void F77_cgeru(int *order, int *m, int *n, CBLAS_TEST_COMPLEX *alpha,
|
||||
CBLAS_TEST_COMPLEX *x, int *incx, CBLAS_TEST_COMPLEX *y, int *incy,
|
||||
void F77_cgeru(int *order, int *m, int *n, CBLAS_TEST_COMPLEX *alpha,
|
||||
CBLAS_TEST_COMPLEX *x, int *incx, CBLAS_TEST_COMPLEX *y, int *incy,
|
||||
CBLAS_TEST_COMPLEX *a, int *lda){
|
||||
|
||||
CBLAS_TEST_COMPLEX *A;
|
||||
@@ -114,8 +114,8 @@ void F77_cgeru(int *order, int *m, int *n, CBLAS_TEST_COMPLEX *alpha,
|
||||
cblas_cgeru( UNDEFINED, *m, *n, alpha, x, *incx, y, *incy, a, *lda );
|
||||
}
|
||||
|
||||
void F77_cgerc(int *order, int *m, int *n, CBLAS_TEST_COMPLEX *alpha,
|
||||
CBLAS_TEST_COMPLEX *x, int *incx, CBLAS_TEST_COMPLEX *y, int *incy,
|
||||
void F77_cgerc(int *order, int *m, int *n, CBLAS_TEST_COMPLEX *alpha,
|
||||
CBLAS_TEST_COMPLEX *x, int *incx, CBLAS_TEST_COMPLEX *y, int *incy,
|
||||
CBLAS_TEST_COMPLEX *a, int *lda) {
|
||||
CBLAS_TEST_COMPLEX *A;
|
||||
int i,j,LDA;
|
||||
@@ -165,7 +165,7 @@ void F77_chemv(int *order, char *uplow, int *n, CBLAS_TEST_COMPLEX *alpha,
|
||||
free(A);
|
||||
}
|
||||
else if (*order == TEST_COL_MJR)
|
||||
cblas_chemv( CblasColMajor, uplo, *n, alpha, a, *lda, x, *incx,
|
||||
cblas_chemv( CblasColMajor, uplo, *n, alpha, a, *lda, x, *incx,
|
||||
beta, y, *incy );
|
||||
else
|
||||
cblas_chemv( UNDEFINED, uplo, *n, alpha, a, *lda, x, *incx,
|
||||
@@ -173,7 +173,7 @@ void F77_chemv(int *order, char *uplow, int *n, CBLAS_TEST_COMPLEX *alpha,
|
||||
}
|
||||
|
||||
void F77_chbmv(int *order, char *uplow, int *n, int *k,
|
||||
CBLAS_TEST_COMPLEX *alpha, CBLAS_TEST_COMPLEX *a, int *lda,
|
||||
CBLAS_TEST_COMPLEX *alpha, CBLAS_TEST_COMPLEX *a, int *lda,
|
||||
CBLAS_TEST_COMPLEX *x, int *incx, CBLAS_TEST_COMPLEX *beta,
|
||||
CBLAS_TEST_COMPLEX *y, int *incy){
|
||||
|
||||
@@ -186,7 +186,7 @@ int i,irow,j,jcol,LDA;
|
||||
|
||||
if (*order == TEST_ROW_MJR) {
|
||||
if (uplo != CblasUpper && uplo != CblasLower )
|
||||
cblas_chbmv(CblasRowMajor, UNDEFINED, *n, *k, alpha, a, *lda, x,
|
||||
cblas_chbmv(CblasRowMajor, UNDEFINED, *n, *k, alpha, a, *lda, x,
|
||||
*incx, beta, y, *incy );
|
||||
else {
|
||||
LDA = *k+2;
|
||||
@@ -237,7 +237,7 @@ int i,irow,j,jcol,LDA;
|
||||
}
|
||||
|
||||
void F77_chpmv(int *order, char *uplow, int *n, CBLAS_TEST_COMPLEX *alpha,
|
||||
CBLAS_TEST_COMPLEX *ap, CBLAS_TEST_COMPLEX *x, int *incx,
|
||||
CBLAS_TEST_COMPLEX *ap, CBLAS_TEST_COMPLEX *x, int *incx,
|
||||
CBLAS_TEST_COMPLEX *beta, CBLAS_TEST_COMPLEX *y, int *incy){
|
||||
|
||||
CBLAS_TEST_COMPLEX *A, *AP;
|
||||
@@ -247,7 +247,7 @@ void F77_chpmv(int *order, char *uplow, int *n, CBLAS_TEST_COMPLEX *alpha,
|
||||
get_uplo_type(uplow,&uplo);
|
||||
if (*order == TEST_ROW_MJR) {
|
||||
if (uplo != CblasUpper && uplo != CblasLower )
|
||||
cblas_chpmv(CblasRowMajor, UNDEFINED, *n, alpha, ap, x, *incx,
|
||||
cblas_chpmv(CblasRowMajor, UNDEFINED, *n, alpha, ap, x, *incx,
|
||||
beta, y, *incy);
|
||||
else {
|
||||
LDA = *n;
|
||||
@@ -344,7 +344,7 @@ void F77_ctbmv(int *order, char *uplow, char *transp, char *diagn,
|
||||
}
|
||||
}
|
||||
}
|
||||
cblas_ctbmv(CblasRowMajor, uplo, trans, diag, *n, *k, A, LDA, x,
|
||||
cblas_ctbmv(CblasRowMajor, uplo, trans, diag, *n, *k, A, LDA, x,
|
||||
*incx);
|
||||
free(A);
|
||||
}
|
||||
@@ -371,7 +371,7 @@ void F77_ctbsv(int *order, char *uplow, char *transp, char *diagn,
|
||||
|
||||
if (*order == TEST_ROW_MJR) {
|
||||
if (uplo != CblasUpper && uplo != CblasLower )
|
||||
cblas_ctbsv(CblasRowMajor, UNDEFINED, trans, diag, *n, *k, a, *lda, x,
|
||||
cblas_ctbsv(CblasRowMajor, UNDEFINED, trans, diag, *n, *k, a, *lda, x,
|
||||
*incx);
|
||||
else {
|
||||
LDA = *k+2;
|
||||
@@ -408,7 +408,7 @@ void F77_ctbsv(int *order, char *uplow, char *transp, char *diagn,
|
||||
}
|
||||
}
|
||||
}
|
||||
cblas_ctbsv(CblasRowMajor, uplo, trans, diag, *n, *k, A, LDA,
|
||||
cblas_ctbsv(CblasRowMajor, uplo, trans, diag, *n, *k, A, LDA,
|
||||
x, *incx);
|
||||
free(A);
|
||||
}
|
||||
@@ -674,7 +674,7 @@ void F77_chpr2(int *order, char *uplow, int *n, CBLAS_TEST_COMPLEX *alpha,
|
||||
|
||||
if (*order == TEST_ROW_MJR) {
|
||||
if (uplo != CblasUpper && uplo != CblasLower )
|
||||
cblas_chpr2( CblasRowMajor, UNDEFINED, *n, alpha, x, *incx, y,
|
||||
cblas_chpr2( CblasRowMajor, UNDEFINED, *n, alpha, x, *incx, y,
|
||||
*incy, ap );
|
||||
else {
|
||||
LDA = *n;
|
||||
@@ -752,7 +752,7 @@ void F77_cher(int *order, char *uplow, int *n, float *alpha,
|
||||
LDA = *n+1;
|
||||
A=(CBLAS_TEST_COMPLEX*)malloc((*n)*LDA*sizeof( CBLAS_TEST_COMPLEX ));
|
||||
|
||||
for( i=0; i<*n; i++ )
|
||||
for( i=0; i<*n; i++ )
|
||||
for( j=0; j<*n; j++ ) {
|
||||
A[ LDA*i+j ].real=a[ (*lda)*j+i ].real;
|
||||
A[ LDA*i+j ].imag=a[ (*lda)*j+i ].imag;
|
||||
@@ -786,7 +786,7 @@ void F77_cher2(int *order, char *uplow, int *n, CBLAS_TEST_COMPLEX *alpha,
|
||||
LDA = *n+1;
|
||||
A= ( CBLAS_TEST_COMPLEX* )malloc((*n)*LDA*sizeof(CBLAS_TEST_COMPLEX ) );
|
||||
|
||||
for( i=0; i<*n; i++ )
|
||||
for( i=0; i<*n; i++ )
|
||||
for( j=0; j<*n; j++ ) {
|
||||
A[ LDA*i+j ].real=a[ (*lda)*j+i ].real;
|
||||
A[ LDA*i+j ].imag=a[ (*lda)*j+i ].imag;
|
||||
|
||||
142
ctest/c_cblas3.c
142
ctest/c_cblas3.c
@@ -12,9 +12,9 @@
|
||||
#define TEST_ROW_MJR 1
|
||||
#define UNDEFINED -1
|
||||
|
||||
void F77_cgemm(int *order, char *transpa, char *transpb, int *m, int *n,
|
||||
void F77_cgemm(int *order, char *transpa, char *transpb, int *m, int *n,
|
||||
int *k, CBLAS_TEST_COMPLEX *alpha, CBLAS_TEST_COMPLEX *a, int *lda,
|
||||
CBLAS_TEST_COMPLEX *b, int *ldb, CBLAS_TEST_COMPLEX *beta,
|
||||
CBLAS_TEST_COMPLEX *b, int *ldb, CBLAS_TEST_COMPLEX *beta,
|
||||
CBLAS_TEST_COMPLEX *c, int *ldc ) {
|
||||
|
||||
CBLAS_TEST_COMPLEX *A, *B, *C;
|
||||
@@ -88,6 +88,7 @@ void F77_cgemm(int *order, char *transpa, char *transpb, int *m, int *n,
|
||||
cblas_cgemm( UNDEFINED, transa, transb, *m, *n, *k, alpha, a, *lda,
|
||||
b, *ldb, beta, c, *ldc );
|
||||
}
|
||||
|
||||
void F77_chemm(int *order, char *rtlf, char *uplow, int *m, int *n,
|
||||
CBLAS_TEST_COMPLEX *alpha, CBLAS_TEST_COMPLEX *a, int *lda,
|
||||
CBLAS_TEST_COMPLEX *b, int *ldb, CBLAS_TEST_COMPLEX *beta,
|
||||
@@ -134,7 +135,7 @@ void F77_chemm(int *order, char *rtlf, char *uplow, int *m, int *n,
|
||||
C[i*LDC+j].real=c[j*(*ldc)+i].real;
|
||||
C[i*LDC+j].imag=c[j*(*ldc)+i].imag;
|
||||
}
|
||||
cblas_chemm( CblasRowMajor, side, uplo, *m, *n, alpha, A, LDA, B, LDB,
|
||||
cblas_chemm( CblasRowMajor, side, uplo, *m, *n, alpha, A, LDA, B, LDB,
|
||||
beta, C, LDC );
|
||||
for( j=0; j<*n; j++ )
|
||||
for( i=0; i<*m; i++ ) {
|
||||
@@ -146,10 +147,10 @@ void F77_chemm(int *order, char *rtlf, char *uplow, int *m, int *n,
|
||||
free(C);
|
||||
}
|
||||
else if (*order == TEST_COL_MJR)
|
||||
cblas_chemm( CblasColMajor, side, uplo, *m, *n, alpha, a, *lda, b, *ldb,
|
||||
cblas_chemm( CblasColMajor, side, uplo, *m, *n, alpha, a, *lda, b, *ldb,
|
||||
beta, c, *ldc );
|
||||
else
|
||||
cblas_chemm( UNDEFINED, side, uplo, *m, *n, alpha, a, *lda, b, *ldb,
|
||||
cblas_chemm( UNDEFINED, side, uplo, *m, *n, alpha, a, *lda, b, *ldb,
|
||||
beta, c, *ldc );
|
||||
}
|
||||
void F77_csymm(int *order, char *rtlf, char *uplow, int *m, int *n,
|
||||
@@ -190,7 +191,7 @@ void F77_csymm(int *order, char *rtlf, char *uplow, int *m, int *n,
|
||||
for( j=0; j<*n; j++ )
|
||||
for( i=0; i<*m; i++ )
|
||||
C[i*LDC+j]=c[j*(*ldc)+i];
|
||||
cblas_csymm( CblasRowMajor, side, uplo, *m, *n, alpha, A, LDA, B, LDB,
|
||||
cblas_csymm( CblasRowMajor, side, uplo, *m, *n, alpha, A, LDA, B, LDB,
|
||||
beta, C, LDC );
|
||||
for( j=0; j<*n; j++ )
|
||||
for( i=0; i<*m; i++ )
|
||||
@@ -200,15 +201,15 @@ void F77_csymm(int *order, char *rtlf, char *uplow, int *m, int *n,
|
||||
free(C);
|
||||
}
|
||||
else if (*order == TEST_COL_MJR)
|
||||
cblas_csymm( CblasColMajor, side, uplo, *m, *n, alpha, a, *lda, b, *ldb,
|
||||
cblas_csymm( CblasColMajor, side, uplo, *m, *n, alpha, a, *lda, b, *ldb,
|
||||
beta, c, *ldc );
|
||||
else
|
||||
cblas_csymm( UNDEFINED, side, uplo, *m, *n, alpha, a, *lda, b, *ldb,
|
||||
cblas_csymm( UNDEFINED, side, uplo, *m, *n, alpha, a, *lda, b, *ldb,
|
||||
beta, c, *ldc );
|
||||
}
|
||||
|
||||
void F77_cherk(int *order, char *uplow, char *transp, int *n, int *k,
|
||||
float *alpha, CBLAS_TEST_COMPLEX *a, int *lda,
|
||||
float *alpha, CBLAS_TEST_COMPLEX *a, int *lda,
|
||||
float *beta, CBLAS_TEST_COMPLEX *c, int *ldc ) {
|
||||
|
||||
int i,j,LDA,LDC;
|
||||
@@ -245,7 +246,7 @@ void F77_cherk(int *order, char *uplow, char *transp, int *n, int *k,
|
||||
C[i*LDC+j].real=c[j*(*ldc)+i].real;
|
||||
C[i*LDC+j].imag=c[j*(*ldc)+i].imag;
|
||||
}
|
||||
cblas_cherk(CblasRowMajor, uplo, trans, *n, *k, *alpha, A, LDA, *beta,
|
||||
cblas_cherk(CblasRowMajor, uplo, trans, *n, *k, *alpha, A, LDA, *beta,
|
||||
C, LDC );
|
||||
for( j=0; j<*n; j++ )
|
||||
for( i=0; i<*n; i++ ) {
|
||||
@@ -256,15 +257,15 @@ void F77_cherk(int *order, char *uplow, char *transp, int *n, int *k,
|
||||
free(C);
|
||||
}
|
||||
else if (*order == TEST_COL_MJR)
|
||||
cblas_cherk(CblasColMajor, uplo, trans, *n, *k, *alpha, a, *lda, *beta,
|
||||
cblas_cherk(CblasColMajor, uplo, trans, *n, *k, *alpha, a, *lda, *beta,
|
||||
c, *ldc );
|
||||
else
|
||||
cblas_cherk(UNDEFINED, uplo, trans, *n, *k, *alpha, a, *lda, *beta,
|
||||
cblas_cherk(UNDEFINED, uplo, trans, *n, *k, *alpha, a, *lda, *beta,
|
||||
c, *ldc );
|
||||
}
|
||||
|
||||
void F77_csyrk(int *order, char *uplow, char *transp, int *n, int *k,
|
||||
CBLAS_TEST_COMPLEX *alpha, CBLAS_TEST_COMPLEX *a, int *lda,
|
||||
CBLAS_TEST_COMPLEX *alpha, CBLAS_TEST_COMPLEX *a, int *lda,
|
||||
CBLAS_TEST_COMPLEX *beta, CBLAS_TEST_COMPLEX *c, int *ldc ) {
|
||||
|
||||
int i,j,LDA,LDC;
|
||||
@@ -301,7 +302,7 @@ void F77_csyrk(int *order, char *uplow, char *transp, int *n, int *k,
|
||||
C[i*LDC+j].real=c[j*(*ldc)+i].real;
|
||||
C[i*LDC+j].imag=c[j*(*ldc)+i].imag;
|
||||
}
|
||||
cblas_csyrk(CblasRowMajor, uplo, trans, *n, *k, alpha, A, LDA, beta,
|
||||
cblas_csyrk(CblasRowMajor, uplo, trans, *n, *k, alpha, A, LDA, beta,
|
||||
C, LDC );
|
||||
for( j=0; j<*n; j++ )
|
||||
for( i=0; i<*n; i++ ) {
|
||||
@@ -312,10 +313,10 @@ void F77_csyrk(int *order, char *uplow, char *transp, int *n, int *k,
|
||||
free(C);
|
||||
}
|
||||
else if (*order == TEST_COL_MJR)
|
||||
cblas_csyrk(CblasColMajor, uplo, trans, *n, *k, alpha, a, *lda, beta,
|
||||
cblas_csyrk(CblasColMajor, uplo, trans, *n, *k, alpha, a, *lda, beta,
|
||||
c, *ldc );
|
||||
else
|
||||
cblas_csyrk(UNDEFINED, uplo, trans, *n, *k, alpha, a, *lda, beta,
|
||||
cblas_csyrk(UNDEFINED, uplo, trans, *n, *k, alpha, a, *lda, beta,
|
||||
c, *ldc );
|
||||
}
|
||||
void F77_cher2k(int *order, char *uplow, char *transp, int *n, int *k,
|
||||
@@ -364,7 +365,7 @@ void F77_cher2k(int *order, char *uplow, char *transp, int *n, int *k,
|
||||
C[i*LDC+j].real=c[j*(*ldc)+i].real;
|
||||
C[i*LDC+j].imag=c[j*(*ldc)+i].imag;
|
||||
}
|
||||
cblas_cher2k(CblasRowMajor, uplo, trans, *n, *k, alpha, A, LDA,
|
||||
cblas_cher2k(CblasRowMajor, uplo, trans, *n, *k, alpha, A, LDA,
|
||||
B, LDB, *beta, C, LDC );
|
||||
for( j=0; j<*n; j++ )
|
||||
for( i=0; i<*n; i++ ) {
|
||||
@@ -376,10 +377,10 @@ void F77_cher2k(int *order, char *uplow, char *transp, int *n, int *k,
|
||||
free(C);
|
||||
}
|
||||
else if (*order == TEST_COL_MJR)
|
||||
cblas_cher2k(CblasColMajor, uplo, trans, *n, *k, alpha, a, *lda,
|
||||
cblas_cher2k(CblasColMajor, uplo, trans, *n, *k, alpha, a, *lda,
|
||||
b, *ldb, *beta, c, *ldc );
|
||||
else
|
||||
cblas_cher2k(UNDEFINED, uplo, trans, *n, *k, alpha, a, *lda,
|
||||
cblas_cher2k(UNDEFINED, uplo, trans, *n, *k, alpha, a, *lda,
|
||||
b, *ldb, *beta, c, *ldc );
|
||||
}
|
||||
void F77_csyr2k(int *order, char *uplow, char *transp, int *n, int *k,
|
||||
@@ -428,7 +429,7 @@ void F77_csyr2k(int *order, char *uplow, char *transp, int *n, int *k,
|
||||
C[i*LDC+j].real=c[j*(*ldc)+i].real;
|
||||
C[i*LDC+j].imag=c[j*(*ldc)+i].imag;
|
||||
}
|
||||
cblas_csyr2k(CblasRowMajor, uplo, trans, *n, *k, alpha, A, LDA,
|
||||
cblas_csyr2k(CblasRowMajor, uplo, trans, *n, *k, alpha, A, LDA,
|
||||
B, LDB, beta, C, LDC );
|
||||
for( j=0; j<*n; j++ )
|
||||
for( i=0; i<*n; i++ ) {
|
||||
@@ -440,14 +441,14 @@ void F77_csyr2k(int *order, char *uplow, char *transp, int *n, int *k,
|
||||
free(C);
|
||||
}
|
||||
else if (*order == TEST_COL_MJR)
|
||||
cblas_csyr2k(CblasColMajor, uplo, trans, *n, *k, alpha, a, *lda,
|
||||
cblas_csyr2k(CblasColMajor, uplo, trans, *n, *k, alpha, a, *lda,
|
||||
b, *ldb, beta, c, *ldc );
|
||||
else
|
||||
cblas_csyr2k(UNDEFINED, uplo, trans, *n, *k, alpha, a, *lda,
|
||||
cblas_csyr2k(UNDEFINED, uplo, trans, *n, *k, alpha, a, *lda,
|
||||
b, *ldb, beta, c, *ldc );
|
||||
}
|
||||
void F77_ctrmm(int *order, char *rtlf, char *uplow, char *transp, char *diagn,
|
||||
int *m, int *n, CBLAS_TEST_COMPLEX *alpha, CBLAS_TEST_COMPLEX *a,
|
||||
int *m, int *n, CBLAS_TEST_COMPLEX *alpha, CBLAS_TEST_COMPLEX *a,
|
||||
int *lda, CBLAS_TEST_COMPLEX *b, int *ldb) {
|
||||
int i,j,LDA,LDB;
|
||||
CBLAS_TEST_COMPLEX *A, *B;
|
||||
@@ -487,7 +488,7 @@ void F77_ctrmm(int *order, char *rtlf, char *uplow, char *transp, char *diagn,
|
||||
B[i*LDB+j].real=b[j*(*ldb)+i].real;
|
||||
B[i*LDB+j].imag=b[j*(*ldb)+i].imag;
|
||||
}
|
||||
cblas_ctrmm(CblasRowMajor, side, uplo, trans, diag, *m, *n, alpha,
|
||||
cblas_ctrmm(CblasRowMajor, side, uplo, trans, diag, *m, *n, alpha,
|
||||
A, LDA, B, LDB );
|
||||
for( j=0; j<*n; j++ )
|
||||
for( i=0; i<*m; i++ ) {
|
||||
@@ -498,15 +499,15 @@ void F77_ctrmm(int *order, char *rtlf, char *uplow, char *transp, char *diagn,
|
||||
free(B);
|
||||
}
|
||||
else if (*order == TEST_COL_MJR)
|
||||
cblas_ctrmm(CblasColMajor, side, uplo, trans, diag, *m, *n, alpha,
|
||||
cblas_ctrmm(CblasColMajor, side, uplo, trans, diag, *m, *n, alpha,
|
||||
a, *lda, b, *ldb);
|
||||
else
|
||||
cblas_ctrmm(UNDEFINED, side, uplo, trans, diag, *m, *n, alpha,
|
||||
cblas_ctrmm(UNDEFINED, side, uplo, trans, diag, *m, *n, alpha,
|
||||
a, *lda, b, *ldb);
|
||||
}
|
||||
|
||||
void F77_ctrsm(int *order, char *rtlf, char *uplow, char *transp, char *diagn,
|
||||
int *m, int *n, CBLAS_TEST_COMPLEX *alpha, CBLAS_TEST_COMPLEX *a,
|
||||
int *m, int *n, CBLAS_TEST_COMPLEX *alpha, CBLAS_TEST_COMPLEX *a,
|
||||
int *lda, CBLAS_TEST_COMPLEX *b, int *ldb) {
|
||||
int i,j,LDA,LDB;
|
||||
CBLAS_TEST_COMPLEX *A, *B;
|
||||
@@ -546,7 +547,7 @@ void F77_ctrsm(int *order, char *rtlf, char *uplow, char *transp, char *diagn,
|
||||
B[i*LDB+j].real=b[j*(*ldb)+i].real;
|
||||
B[i*LDB+j].imag=b[j*(*ldb)+i].imag;
|
||||
}
|
||||
cblas_ctrsm(CblasRowMajor, side, uplo, trans, diag, *m, *n, alpha,
|
||||
cblas_ctrsm(CblasRowMajor, side, uplo, trans, diag, *m, *n, alpha,
|
||||
A, LDA, B, LDB );
|
||||
for( j=0; j<*n; j++ )
|
||||
for( i=0; i<*m; i++ ) {
|
||||
@@ -557,9 +558,90 @@ void F77_ctrsm(int *order, char *rtlf, char *uplow, char *transp, char *diagn,
|
||||
free(B);
|
||||
}
|
||||
else if (*order == TEST_COL_MJR)
|
||||
cblas_ctrsm(CblasColMajor, side, uplo, trans, diag, *m, *n, alpha,
|
||||
cblas_ctrsm(CblasColMajor, side, uplo, trans, diag, *m, *n, alpha,
|
||||
a, *lda, b, *ldb);
|
||||
else
|
||||
cblas_ctrsm(UNDEFINED, side, uplo, trans, diag, *m, *n, alpha,
|
||||
cblas_ctrsm(UNDEFINED, side, uplo, trans, diag, *m, *n, alpha,
|
||||
a, *lda, b, *ldb);
|
||||
}
|
||||
|
||||
|
||||
|
||||
void F77_cgemm3m(int *order, char *transpa, char *transpb, int *m, int *n,
|
||||
int *k, CBLAS_TEST_COMPLEX *alpha, CBLAS_TEST_COMPLEX *a, int *lda,
|
||||
CBLAS_TEST_COMPLEX *b, int *ldb, CBLAS_TEST_COMPLEX *beta,
|
||||
CBLAS_TEST_COMPLEX *c, int *ldc ) {
|
||||
|
||||
CBLAS_TEST_COMPLEX *A, *B, *C;
|
||||
int i,j,LDA, LDB, LDC;
|
||||
enum CBLAS_TRANSPOSE transa, transb;
|
||||
|
||||
get_transpose_type(transpa, &transa);
|
||||
get_transpose_type(transpb, &transb);
|
||||
|
||||
if (*order == TEST_ROW_MJR) {
|
||||
if (transa == CblasNoTrans) {
|
||||
LDA = *k+1;
|
||||
A=(CBLAS_TEST_COMPLEX*)malloc((*m)*LDA*sizeof(CBLAS_TEST_COMPLEX));
|
||||
for( i=0; i<*m; i++ )
|
||||
for( j=0; j<*k; j++ ) {
|
||||
A[i*LDA+j].real=a[j*(*lda)+i].real;
|
||||
A[i*LDA+j].imag=a[j*(*lda)+i].imag;
|
||||
}
|
||||
}
|
||||
else {
|
||||
LDA = *m+1;
|
||||
A=(CBLAS_TEST_COMPLEX* )malloc(LDA*(*k)*sizeof(CBLAS_TEST_COMPLEX));
|
||||
for( i=0; i<*k; i++ )
|
||||
for( j=0; j<*m; j++ ) {
|
||||
A[i*LDA+j].real=a[j*(*lda)+i].real;
|
||||
A[i*LDA+j].imag=a[j*(*lda)+i].imag;
|
||||
}
|
||||
}
|
||||
|
||||
if (transb == CblasNoTrans) {
|
||||
LDB = *n+1;
|
||||
B=(CBLAS_TEST_COMPLEX* )malloc((*k)*LDB*sizeof(CBLAS_TEST_COMPLEX) );
|
||||
for( i=0; i<*k; i++ )
|
||||
for( j=0; j<*n; j++ ) {
|
||||
B[i*LDB+j].real=b[j*(*ldb)+i].real;
|
||||
B[i*LDB+j].imag=b[j*(*ldb)+i].imag;
|
||||
}
|
||||
}
|
||||
else {
|
||||
LDB = *k+1;
|
||||
B=(CBLAS_TEST_COMPLEX* )malloc(LDB*(*n)*sizeof(CBLAS_TEST_COMPLEX));
|
||||
for( i=0; i<*n; i++ )
|
||||
for( j=0; j<*k; j++ ) {
|
||||
B[i*LDB+j].real=b[j*(*ldb)+i].real;
|
||||
B[i*LDB+j].imag=b[j*(*ldb)+i].imag;
|
||||
}
|
||||
}
|
||||
|
||||
LDC = *n+1;
|
||||
C=(CBLAS_TEST_COMPLEX* )malloc((*m)*LDC*sizeof(CBLAS_TEST_COMPLEX));
|
||||
for( j=0; j<*n; j++ )
|
||||
for( i=0; i<*m; i++ ) {
|
||||
C[i*LDC+j].real=c[j*(*ldc)+i].real;
|
||||
C[i*LDC+j].imag=c[j*(*ldc)+i].imag;
|
||||
}
|
||||
cblas_cgemm3m( CblasRowMajor, transa, transb, *m, *n, *k, alpha, A, LDA,
|
||||
B, LDB, beta, C, LDC );
|
||||
for( j=0; j<*n; j++ )
|
||||
for( i=0; i<*m; i++ ) {
|
||||
c[j*(*ldc)+i].real=C[i*LDC+j].real;
|
||||
c[j*(*ldc)+i].imag=C[i*LDC+j].imag;
|
||||
}
|
||||
free(A);
|
||||
free(B);
|
||||
free(C);
|
||||
}
|
||||
else if (*order == TEST_COL_MJR)
|
||||
cblas_cgemm3m( CblasColMajor, transa, transb, *m, *n, *k, alpha, a, *lda,
|
||||
b, *ldb, beta, c, *ldc );
|
||||
else
|
||||
cblas_cgemm3m( UNDEFINED, transa, transb, *m, *n, *k, alpha, a, *lda,
|
||||
b, *ldb, beta, c, *ldc );
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -348,13 +348,13 @@
|
||||
160 IF (CORDER) THEN
|
||||
CALL CCHK3( SNAMES( ISNUM ), EPS, THRESH, NOUT, NTRA, TRACE,
|
||||
$ REWI, FATAL, NIDIM, IDIM, NKB, KB, NINC, INC,
|
||||
$ NMAX, INCMAX, A, AA, AS, Y, YY, YS, YT, G, Z,
|
||||
$ NMAX, INCMAX, A, AA, AS, Y, YY, YS, YT, G, Z,
|
||||
$ 0 )
|
||||
END IF
|
||||
IF (RORDER) THEN
|
||||
CALL CCHK3( SNAMES( ISNUM ), EPS, THRESH, NOUT, NTRA, TRACE,
|
||||
$ REWI, FATAL, NIDIM, IDIM, NKB, KB, NINC, INC,
|
||||
$ NMAX, INCMAX, A, AA, AS, Y, YY, YS, YT, G, Z,
|
||||
$ NMAX, INCMAX, A, AA, AS, Y, YY, YS, YT, G, Z,
|
||||
$ 1 )
|
||||
END IF
|
||||
GO TO 200
|
||||
@@ -581,7 +581,7 @@
|
||||
CTRANS = ' CblasNoTrans'
|
||||
ELSE IF (TRANS.EQ.'T')THEN
|
||||
CTRANS = ' CblasTrans'
|
||||
ELSE
|
||||
ELSE
|
||||
CTRANS = 'CblasConjTrans'
|
||||
END IF
|
||||
TRAN = TRANS.EQ.'T'.OR.TRANS.EQ.'C'
|
||||
@@ -684,7 +684,7 @@
|
||||
*
|
||||
* See what data changed inside subroutines.
|
||||
*
|
||||
* IF(TRANS .NE. 'C' .OR. (INCX .GT. 0 .AND. INCY .GT. 0)) THEN
|
||||
* IF(TRANS .NE. 'C' .OR. (INCX .GT. 0 .AND. INCY .GT. 0)) THEN
|
||||
ISAME( 1 ) = TRANS.EQ.TRANSS
|
||||
ISAME( 2 ) = MS.EQ.M
|
||||
ISAME( 3 ) = NS.EQ.N
|
||||
@@ -925,7 +925,7 @@
|
||||
UPLO = ICH( IC: IC )
|
||||
IF (UPLO.EQ.'U')THEN
|
||||
CUPLO = ' CblasUpper'
|
||||
ELSE
|
||||
ELSE
|
||||
CUPLO = ' CblasLower'
|
||||
END IF
|
||||
*
|
||||
@@ -1284,7 +1284,7 @@
|
||||
UPLO = ICHU( ICU: ICU )
|
||||
IF (UPLO.EQ.'U')THEN
|
||||
CUPLO = ' CblasUpper'
|
||||
ELSE
|
||||
ELSE
|
||||
CUPLO = ' CblasLower'
|
||||
END IF
|
||||
*
|
||||
@@ -1294,7 +1294,7 @@
|
||||
CTRANS = ' CblasNoTrans'
|
||||
ELSE IF (TRANS.EQ.'T')THEN
|
||||
CTRANS = ' CblasTrans'
|
||||
ELSE
|
||||
ELSE
|
||||
CTRANS = 'CblasConjTrans'
|
||||
END IF
|
||||
*
|
||||
|
||||
@@ -424,7 +424,7 @@
|
||||
END
|
||||
SUBROUTINE CCHK1( SNAME, EPS, THRESH, NOUT, NTRA, TRACE, REWI,
|
||||
$ FATAL, NIDIM, IDIM, NALF, ALF, NBET, BET, NMAX,
|
||||
$ A, AA, AS, B, BB, BS, C, CC, CS, CT, G,
|
||||
$ A, AA, AS, B, BB, BS, C, CC, CS, CT, G,
|
||||
$ IORDER )
|
||||
*
|
||||
* Tests CGEMM.
|
||||
@@ -600,7 +600,7 @@
|
||||
IF( REWI )
|
||||
$ REWIND NTRA
|
||||
CALL CCGEMM( IORDER, TRANSA, TRANSB, M, N,
|
||||
$ K, ALPHA, AA, LDA, BB, LDB,
|
||||
$ K, ALPHA, AA, LDA, BB, LDB,
|
||||
$ BETA, CC, LDC )
|
||||
*
|
||||
* Check if error-exit was taken incorrectly.
|
||||
@@ -688,7 +688,7 @@
|
||||
*
|
||||
120 CONTINUE
|
||||
WRITE( NOUT, FMT = 9996 )SNAME
|
||||
CALL CPRCN1(NOUT, NC, SNAME, IORDER, TRANSA, TRANSB,
|
||||
CALL CPRCN1(NOUT, NC, SNAME, IORDER, TRANSA, TRANSB,
|
||||
$ M, N, K, ALPHA, LDA, LDB, BETA, LDC)
|
||||
*
|
||||
130 CONTINUE
|
||||
@@ -724,24 +724,24 @@
|
||||
CHARACTER*1 TRANSA, TRANSB
|
||||
CHARACTER*12 SNAME
|
||||
CHARACTER*14 CRC, CTA,CTB
|
||||
|
||||
|
||||
IF (TRANSA.EQ.'N')THEN
|
||||
CTA = ' CblasNoTrans'
|
||||
ELSE IF (TRANSA.EQ.'T')THEN
|
||||
CTA = ' CblasTrans'
|
||||
ELSE
|
||||
ELSE
|
||||
CTA = 'CblasConjTrans'
|
||||
END IF
|
||||
IF (TRANSB.EQ.'N')THEN
|
||||
CTB = ' CblasNoTrans'
|
||||
ELSE IF (TRANSB.EQ.'T')THEN
|
||||
CTB = ' CblasTrans'
|
||||
ELSE
|
||||
ELSE
|
||||
CTB = 'CblasConjTrans'
|
||||
END IF
|
||||
IF (IORDER.EQ.1)THEN
|
||||
CRC = ' CblasRowMajor'
|
||||
ELSE
|
||||
ELSE
|
||||
CRC = ' CblasColMajor'
|
||||
END IF
|
||||
WRITE(NOUT, FMT = 9995)NC,SNAME,CRC, CTA,CTB
|
||||
@@ -754,7 +754,7 @@
|
||||
*
|
||||
SUBROUTINE CCHK2( SNAME, EPS, THRESH, NOUT, NTRA, TRACE, REWI,
|
||||
$ FATAL, NIDIM, IDIM, NALF, ALF, NBET, BET, NMAX,
|
||||
$ A, AA, AS, B, BB, BS, C, CC, CS, CT, G,
|
||||
$ A, AA, AS, B, BB, BS, C, CC, CS, CT, G,
|
||||
$ IORDER )
|
||||
*
|
||||
* Tests CHEMM and CSYMM.
|
||||
@@ -910,9 +910,9 @@
|
||||
* Call the subroutine.
|
||||
*
|
||||
IF( TRACE )
|
||||
$ CALL CPRCN2(NTRA, NC, SNAME, IORDER,
|
||||
$ SIDE, UPLO, M, N, ALPHA, LDA, LDB,
|
||||
$ BETA, LDC)
|
||||
$ CALL CPRCN2(NTRA, NC, SNAME, IORDER,
|
||||
$ SIDE, UPLO, M, N, ALPHA, LDA, LDB,
|
||||
$ BETA, LDC)
|
||||
IF( REWI )
|
||||
$ REWIND NTRA
|
||||
IF( CONJ )THEN
|
||||
@@ -1015,7 +1015,7 @@
|
||||
110 CONTINUE
|
||||
WRITE( NOUT, FMT = 9996 )SNAME
|
||||
CALL CPRCN2(NOUT, NC, SNAME, IORDER, SIDE, UPLO, M, N, ALPHA, LDA,
|
||||
$ LDB, BETA, LDC)
|
||||
$ LDB, BETA, LDC)
|
||||
*
|
||||
120 CONTINUE
|
||||
RETURN
|
||||
@@ -1050,20 +1050,20 @@
|
||||
CHARACTER*1 SIDE, UPLO
|
||||
CHARACTER*12 SNAME
|
||||
CHARACTER*14 CRC, CS,CU
|
||||
|
||||
|
||||
IF (SIDE.EQ.'L')THEN
|
||||
CS = ' CblasLeft'
|
||||
ELSE
|
||||
ELSE
|
||||
CS = ' CblasRight'
|
||||
END IF
|
||||
IF (UPLO.EQ.'U')THEN
|
||||
CU = ' CblasUpper'
|
||||
ELSE
|
||||
ELSE
|
||||
CU = ' CblasLower'
|
||||
END IF
|
||||
IF (IORDER.EQ.1)THEN
|
||||
CRC = ' CblasRowMajor'
|
||||
ELSE
|
||||
ELSE
|
||||
CRC = ' CblasColMajor'
|
||||
END IF
|
||||
WRITE(NOUT, FMT = 9995)NC,SNAME,CRC, CS,CU
|
||||
@@ -1401,22 +1401,22 @@
|
||||
CHARACTER*1 SIDE, UPLO, TRANSA, DIAG
|
||||
CHARACTER*12 SNAME
|
||||
CHARACTER*14 CRC, CS, CU, CA, CD
|
||||
|
||||
|
||||
IF (SIDE.EQ.'L')THEN
|
||||
CS = ' CblasLeft'
|
||||
ELSE
|
||||
ELSE
|
||||
CS = ' CblasRight'
|
||||
END IF
|
||||
IF (UPLO.EQ.'U')THEN
|
||||
CU = ' CblasUpper'
|
||||
ELSE
|
||||
ELSE
|
||||
CU = ' CblasLower'
|
||||
END IF
|
||||
IF (TRANSA.EQ.'N')THEN
|
||||
CA = ' CblasNoTrans'
|
||||
ELSE IF (TRANSA.EQ.'T')THEN
|
||||
CA = ' CblasTrans'
|
||||
ELSE
|
||||
ELSE
|
||||
CA = 'CblasConjTrans'
|
||||
END IF
|
||||
IF (DIAG.EQ.'N')THEN
|
||||
@@ -1426,7 +1426,7 @@
|
||||
END IF
|
||||
IF (IORDER.EQ.1)THEN
|
||||
CRC = ' CblasRowMajor'
|
||||
ELSE
|
||||
ELSE
|
||||
CRC = ' CblasColMajor'
|
||||
END IF
|
||||
WRITE(NOUT, FMT = 9995)NC,SNAME,CRC, CS,CU
|
||||
@@ -1787,22 +1787,22 @@
|
||||
CHARACTER*1 UPLO, TRANSA
|
||||
CHARACTER*12 SNAME
|
||||
CHARACTER*14 CRC, CU, CA
|
||||
|
||||
|
||||
IF (UPLO.EQ.'U')THEN
|
||||
CU = ' CblasUpper'
|
||||
ELSE
|
||||
ELSE
|
||||
CU = ' CblasLower'
|
||||
END IF
|
||||
IF (TRANSA.EQ.'N')THEN
|
||||
CA = ' CblasNoTrans'
|
||||
ELSE IF (TRANSA.EQ.'T')THEN
|
||||
CA = ' CblasTrans'
|
||||
ELSE
|
||||
ELSE
|
||||
CA = 'CblasConjTrans'
|
||||
END IF
|
||||
IF (IORDER.EQ.1)THEN
|
||||
CRC = ' CblasRowMajor'
|
||||
ELSE
|
||||
ELSE
|
||||
CRC = ' CblasColMajor'
|
||||
END IF
|
||||
WRITE(NOUT, FMT = 9995)NC, SNAME, CRC, CU, CA
|
||||
@@ -1821,29 +1821,29 @@
|
||||
CHARACTER*1 UPLO, TRANSA
|
||||
CHARACTER*12 SNAME
|
||||
CHARACTER*14 CRC, CU, CA
|
||||
|
||||
|
||||
IF (UPLO.EQ.'U')THEN
|
||||
CU = ' CblasUpper'
|
||||
ELSE
|
||||
ELSE
|
||||
CU = ' CblasLower'
|
||||
END IF
|
||||
IF (TRANSA.EQ.'N')THEN
|
||||
CA = ' CblasNoTrans'
|
||||
ELSE IF (TRANSA.EQ.'T')THEN
|
||||
CA = ' CblasTrans'
|
||||
ELSE
|
||||
ELSE
|
||||
CA = 'CblasConjTrans'
|
||||
END IF
|
||||
IF (IORDER.EQ.1)THEN
|
||||
CRC = ' CblasRowMajor'
|
||||
ELSE
|
||||
ELSE
|
||||
CRC = ' CblasColMajor'
|
||||
END IF
|
||||
WRITE(NOUT, FMT = 9995)NC, SNAME, CRC, CU, CA
|
||||
WRITE(NOUT, FMT = 9994)N, K, ALPHA, LDA, BETA, LDC
|
||||
|
||||
9995 FORMAT( 1X, I6, ': ', A12,'(', 3( A14, ',') )
|
||||
9994 FORMAT( 10X, 2( I3, ',' ),
|
||||
9994 FORMAT( 10X, 2( I3, ',' ),
|
||||
$ F4.1, ', A,', I3, ',', F4.1, ', C,', I3, ').' )
|
||||
END
|
||||
*
|
||||
@@ -2040,7 +2040,7 @@
|
||||
IF( REWI )
|
||||
$ REWIND NTRA
|
||||
CALL CCSYR2K( IORDER, UPLO, TRANS, N, K,
|
||||
$ ALPHA, AA, LDA, BB, LDB, BETA,
|
||||
$ ALPHA, AA, LDA, BB, LDB, BETA,
|
||||
$ CC, LDC )
|
||||
END IF
|
||||
*
|
||||
@@ -2240,22 +2240,22 @@
|
||||
CHARACTER*1 UPLO, TRANSA
|
||||
CHARACTER*12 SNAME
|
||||
CHARACTER*14 CRC, CU, CA
|
||||
|
||||
|
||||
IF (UPLO.EQ.'U')THEN
|
||||
CU = ' CblasUpper'
|
||||
ELSE
|
||||
ELSE
|
||||
CU = ' CblasLower'
|
||||
END IF
|
||||
IF (TRANSA.EQ.'N')THEN
|
||||
CA = ' CblasNoTrans'
|
||||
ELSE IF (TRANSA.EQ.'T')THEN
|
||||
CA = ' CblasTrans'
|
||||
ELSE
|
||||
ELSE
|
||||
CA = 'CblasConjTrans'
|
||||
END IF
|
||||
IF (IORDER.EQ.1)THEN
|
||||
CRC = ' CblasRowMajor'
|
||||
ELSE
|
||||
ELSE
|
||||
CRC = ' CblasColMajor'
|
||||
END IF
|
||||
WRITE(NOUT, FMT = 9995)NC, SNAME, CRC, CU, CA
|
||||
@@ -2275,22 +2275,22 @@
|
||||
CHARACTER*1 UPLO, TRANSA
|
||||
CHARACTER*12 SNAME
|
||||
CHARACTER*14 CRC, CU, CA
|
||||
|
||||
|
||||
IF (UPLO.EQ.'U')THEN
|
||||
CU = ' CblasUpper'
|
||||
ELSE
|
||||
ELSE
|
||||
CU = ' CblasLower'
|
||||
END IF
|
||||
IF (TRANSA.EQ.'N')THEN
|
||||
CA = ' CblasNoTrans'
|
||||
ELSE IF (TRANSA.EQ.'T')THEN
|
||||
CA = ' CblasTrans'
|
||||
ELSE
|
||||
ELSE
|
||||
CA = 'CblasConjTrans'
|
||||
END IF
|
||||
IF (IORDER.EQ.1)THEN
|
||||
CRC = ' CblasRowMajor'
|
||||
ELSE
|
||||
ELSE
|
||||
CRC = ' CblasColMajor'
|
||||
END IF
|
||||
WRITE(NOUT, FMT = 9995)NC, SNAME, CRC, CU, CA
|
||||
|
||||
2786
ctest/c_cblat3_3m.f
Normal file
2786
ctest/c_cblat3_3m.f
Normal file
File diff suppressed because it is too large
Load Diff
290
ctest/c_d2chke.c
290
ctest/c_d2chke.c
@@ -26,9 +26,9 @@ void chkxer(void) {
|
||||
|
||||
void F77_d2chke(char *rout) {
|
||||
char *sf = ( rout ) ;
|
||||
double A[2] = {0.0,0.0},
|
||||
X[2] = {0.0,0.0},
|
||||
Y[2] = {0.0,0.0},
|
||||
double A[2] = {0.0,0.0},
|
||||
X[2] = {0.0,0.0},
|
||||
Y[2] = {0.0,0.0},
|
||||
ALPHA=0.0, BETA=0.0;
|
||||
extern int cblas_info, cblas_lerr, cblas_ok;
|
||||
extern int RowMajorStrg;
|
||||
@@ -46,588 +46,588 @@ void F77_d2chke(char *rout) {
|
||||
if (strncmp( sf,"cblas_dgemv",11)==0) {
|
||||
cblas_rout = "cblas_dgemv";
|
||||
cblas_info = 1;
|
||||
cblas_dgemv(INVALID, CblasNoTrans, 0, 0,
|
||||
cblas_dgemv(INVALID, CblasNoTrans, 0, 0,
|
||||
ALPHA, A, 1, X, 1, BETA, Y, 1 );
|
||||
chkxer();
|
||||
cblas_info = 2; RowMajorStrg = FALSE;
|
||||
cblas_dgemv(CblasColMajor, INVALID, 0, 0,
|
||||
cblas_dgemv(CblasColMajor, INVALID, 0, 0,
|
||||
ALPHA, A, 1, X, 1, BETA, Y, 1 );
|
||||
chkxer();
|
||||
cblas_info = 3; RowMajorStrg = FALSE;
|
||||
cblas_dgemv(CblasColMajor, CblasNoTrans, INVALID, 0,
|
||||
cblas_dgemv(CblasColMajor, CblasNoTrans, INVALID, 0,
|
||||
ALPHA, A, 1, X, 1, BETA, Y, 1 );
|
||||
chkxer();
|
||||
cblas_info = 4; RowMajorStrg = FALSE;
|
||||
cblas_dgemv(CblasColMajor, CblasNoTrans, 0, INVALID,
|
||||
cblas_dgemv(CblasColMajor, CblasNoTrans, 0, INVALID,
|
||||
ALPHA, A, 1, X, 1, BETA, Y, 1 );
|
||||
chkxer();
|
||||
cblas_info = 7; RowMajorStrg = FALSE;
|
||||
cblas_dgemv(CblasColMajor, CblasNoTrans, 2, 0,
|
||||
cblas_dgemv(CblasColMajor, CblasNoTrans, 2, 0,
|
||||
ALPHA, A, 1, X, 1, BETA, Y, 1 );
|
||||
chkxer();
|
||||
cblas_info = 9; RowMajorStrg = FALSE;
|
||||
cblas_dgemv(CblasColMajor, CblasNoTrans, 0, 0,
|
||||
cblas_dgemv(CblasColMajor, CblasNoTrans, 0, 0,
|
||||
ALPHA, A, 1, X, 0, BETA, Y, 1 );
|
||||
chkxer();
|
||||
cblas_info = 12; RowMajorStrg = FALSE;
|
||||
cblas_dgemv(CblasColMajor, CblasNoTrans, 0, 0,
|
||||
cblas_dgemv(CblasColMajor, CblasNoTrans, 0, 0,
|
||||
ALPHA, A, 1, X, 1, BETA, Y, 0 );
|
||||
chkxer();
|
||||
|
||||
cblas_info = 2; RowMajorStrg = TRUE; RowMajorStrg = TRUE;
|
||||
cblas_dgemv(CblasRowMajor, INVALID, 0, 0,
|
||||
cblas_dgemv(CblasRowMajor, INVALID, 0, 0,
|
||||
ALPHA, A, 1, X, 1, BETA, Y, 1 );
|
||||
chkxer();
|
||||
cblas_info = 3; RowMajorStrg = TRUE;
|
||||
cblas_dgemv(CblasRowMajor, CblasNoTrans, INVALID, 0,
|
||||
cblas_dgemv(CblasRowMajor, CblasNoTrans, INVALID, 0,
|
||||
ALPHA, A, 1, X, 1, BETA, Y, 1 );
|
||||
chkxer();
|
||||
cblas_info = 4; RowMajorStrg = TRUE;
|
||||
cblas_dgemv(CblasRowMajor, CblasNoTrans, 0, INVALID,
|
||||
cblas_dgemv(CblasRowMajor, CblasNoTrans, 0, INVALID,
|
||||
ALPHA, A, 1, X, 1, BETA, Y, 1 );
|
||||
chkxer();
|
||||
cblas_info = 7; RowMajorStrg = TRUE;
|
||||
cblas_dgemv(CblasRowMajor, CblasNoTrans, 0, 2,
|
||||
cblas_dgemv(CblasRowMajor, CblasNoTrans, 0, 2,
|
||||
ALPHA, A, 1, X, 1, BETA, Y, 1 );
|
||||
chkxer();
|
||||
cblas_info = 9; RowMajorStrg = TRUE;
|
||||
cblas_dgemv(CblasRowMajor, CblasNoTrans, 0, 0,
|
||||
cblas_dgemv(CblasRowMajor, CblasNoTrans, 0, 0,
|
||||
ALPHA, A, 1, X, 0, BETA, Y, 1 );
|
||||
chkxer();
|
||||
cblas_info = 12; RowMajorStrg = TRUE;
|
||||
cblas_dgemv(CblasRowMajor, CblasNoTrans, 0, 0,
|
||||
cblas_dgemv(CblasRowMajor, CblasNoTrans, 0, 0,
|
||||
ALPHA, A, 1, X, 1, BETA, Y, 0 );
|
||||
chkxer();
|
||||
} else if (strncmp( sf,"cblas_dgbmv",11)==0) {
|
||||
cblas_rout = "cblas_dgbmv";
|
||||
cblas_info = 1; RowMajorStrg = FALSE;
|
||||
cblas_dgbmv(INVALID, CblasNoTrans, 0, 0, 0, 0,
|
||||
cblas_dgbmv(INVALID, CblasNoTrans, 0, 0, 0, 0,
|
||||
ALPHA, A, 1, X, 1, BETA, Y, 1 );
|
||||
chkxer();
|
||||
cblas_info = 2; RowMajorStrg = FALSE;
|
||||
cblas_dgbmv(CblasColMajor, INVALID, 0, 0, 0, 0,
|
||||
cblas_dgbmv(CblasColMajor, INVALID, 0, 0, 0, 0,
|
||||
ALPHA, A, 1, X, 1, BETA, Y, 1 );
|
||||
chkxer();
|
||||
cblas_info = 3; RowMajorStrg = FALSE;
|
||||
cblas_dgbmv(CblasColMajor, CblasNoTrans, INVALID, 0, 0, 0,
|
||||
cblas_dgbmv(CblasColMajor, CblasNoTrans, INVALID, 0, 0, 0,
|
||||
ALPHA, A, 1, X, 1, BETA, Y, 1 );
|
||||
chkxer();
|
||||
cblas_info = 4; RowMajorStrg = FALSE;
|
||||
cblas_dgbmv(CblasColMajor, CblasNoTrans, 0, INVALID, 0, 0,
|
||||
cblas_dgbmv(CblasColMajor, CblasNoTrans, 0, INVALID, 0, 0,
|
||||
ALPHA, A, 1, X, 1, BETA, Y, 1 );
|
||||
chkxer();
|
||||
cblas_info = 5; RowMajorStrg = FALSE;
|
||||
cblas_dgbmv(CblasColMajor, CblasNoTrans, 0, 0, INVALID, 0,
|
||||
cblas_dgbmv(CblasColMajor, CblasNoTrans, 0, 0, INVALID, 0,
|
||||
ALPHA, A, 1, X, 1, BETA, Y, 1 );
|
||||
chkxer();
|
||||
cblas_info = 6; RowMajorStrg = FALSE;
|
||||
cblas_dgbmv(CblasColMajor, CblasNoTrans, 2, 0, 0, INVALID,
|
||||
cblas_dgbmv(CblasColMajor, CblasNoTrans, 2, 0, 0, INVALID,
|
||||
ALPHA, A, 1, X, 1, BETA, Y, 1 );
|
||||
chkxer();
|
||||
cblas_info = 9; RowMajorStrg = FALSE;
|
||||
cblas_dgbmv(CblasColMajor, CblasNoTrans, 0, 0, 1, 0,
|
||||
cblas_dgbmv(CblasColMajor, CblasNoTrans, 0, 0, 1, 0,
|
||||
ALPHA, A, 1, X, 1, BETA, Y, 1 );
|
||||
chkxer();
|
||||
cblas_info = 11; RowMajorStrg = FALSE;
|
||||
cblas_dgbmv(CblasColMajor, CblasNoTrans, 0, 0, 0, 0,
|
||||
cblas_dgbmv(CblasColMajor, CblasNoTrans, 0, 0, 0, 0,
|
||||
ALPHA, A, 1, X, 0, BETA, Y, 1 );
|
||||
chkxer();
|
||||
cblas_info = 14; RowMajorStrg = FALSE;
|
||||
cblas_dgbmv(CblasColMajor, CblasNoTrans, 0, 0, 0, 0,
|
||||
cblas_dgbmv(CblasColMajor, CblasNoTrans, 0, 0, 0, 0,
|
||||
ALPHA, A, 1, X, 1, BETA, Y, 0 );
|
||||
chkxer();
|
||||
cblas_info = 2; RowMajorStrg = TRUE;
|
||||
cblas_dgbmv(CblasRowMajor, INVALID, 0, 0, 0, 0,
|
||||
cblas_dgbmv(CblasRowMajor, INVALID, 0, 0, 0, 0,
|
||||
ALPHA, A, 1, X, 1, BETA, Y, 1 );
|
||||
chkxer();
|
||||
cblas_info = 3; RowMajorStrg = TRUE;
|
||||
cblas_dgbmv(CblasRowMajor, CblasNoTrans, INVALID, 0, 0, 0,
|
||||
cblas_dgbmv(CblasRowMajor, CblasNoTrans, INVALID, 0, 0, 0,
|
||||
ALPHA, A, 1, X, 1, BETA, Y, 1 );
|
||||
chkxer();
|
||||
cblas_info = 4; RowMajorStrg = TRUE;
|
||||
cblas_dgbmv(CblasRowMajor, CblasNoTrans, 0, INVALID, 0, 0,
|
||||
cblas_dgbmv(CblasRowMajor, CblasNoTrans, 0, INVALID, 0, 0,
|
||||
ALPHA, A, 1, X, 1, BETA, Y, 1 );
|
||||
chkxer();
|
||||
cblas_info = 5; RowMajorStrg = TRUE;
|
||||
cblas_dgbmv(CblasRowMajor, CblasNoTrans, 0, 0, INVALID, 0,
|
||||
cblas_dgbmv(CblasRowMajor, CblasNoTrans, 0, 0, INVALID, 0,
|
||||
ALPHA, A, 1, X, 1, BETA, Y, 1 );
|
||||
chkxer();
|
||||
cblas_info = 6; RowMajorStrg = TRUE;
|
||||
cblas_dgbmv(CblasRowMajor, CblasNoTrans, 2, 0, 0, INVALID,
|
||||
cblas_dgbmv(CblasRowMajor, CblasNoTrans, 2, 0, 0, INVALID,
|
||||
ALPHA, A, 1, X, 1, BETA, Y, 1 );
|
||||
chkxer();
|
||||
cblas_info = 9; RowMajorStrg = TRUE;
|
||||
cblas_dgbmv(CblasRowMajor, CblasNoTrans, 0, 0, 1, 0,
|
||||
cblas_dgbmv(CblasRowMajor, CblasNoTrans, 0, 0, 1, 0,
|
||||
ALPHA, A, 1, X, 1, BETA, Y, 1 );
|
||||
chkxer();
|
||||
cblas_info = 11; RowMajorStrg = TRUE;
|
||||
cblas_dgbmv(CblasRowMajor, CblasNoTrans, 0, 0, 0, 0,
|
||||
cblas_dgbmv(CblasRowMajor, CblasNoTrans, 0, 0, 0, 0,
|
||||
ALPHA, A, 1, X, 0, BETA, Y, 1 );
|
||||
chkxer();
|
||||
cblas_info = 14; RowMajorStrg = TRUE;
|
||||
cblas_dgbmv(CblasRowMajor, CblasNoTrans, 0, 0, 0, 0,
|
||||
cblas_dgbmv(CblasRowMajor, CblasNoTrans, 0, 0, 0, 0,
|
||||
ALPHA, A, 1, X, 1, BETA, Y, 0 );
|
||||
chkxer();
|
||||
} else if (strncmp( sf,"cblas_dsymv",11)==0) {
|
||||
cblas_rout = "cblas_dsymv";
|
||||
cblas_info = 1; RowMajorStrg = FALSE;
|
||||
cblas_dsymv(INVALID, CblasUpper, 0,
|
||||
cblas_dsymv(INVALID, CblasUpper, 0,
|
||||
ALPHA, A, 1, X, 1, BETA, Y, 1 );
|
||||
chkxer();
|
||||
cblas_info = 2; RowMajorStrg = FALSE;
|
||||
cblas_dsymv(CblasColMajor, INVALID, 0,
|
||||
cblas_dsymv(CblasColMajor, INVALID, 0,
|
||||
ALPHA, A, 1, X, 1, BETA, Y, 1 );
|
||||
chkxer();
|
||||
cblas_info = 3; RowMajorStrg = FALSE;
|
||||
cblas_dsymv(CblasColMajor, CblasUpper, INVALID,
|
||||
cblas_dsymv(CblasColMajor, CblasUpper, INVALID,
|
||||
ALPHA, A, 1, X, 1, BETA, Y, 1 );
|
||||
chkxer();
|
||||
cblas_info = 6; RowMajorStrg = FALSE;
|
||||
cblas_dsymv(CblasColMajor, CblasUpper, 2,
|
||||
cblas_dsymv(CblasColMajor, CblasUpper, 2,
|
||||
ALPHA, A, 1, X, 1, BETA, Y, 1 );
|
||||
chkxer();
|
||||
cblas_info = 8; RowMajorStrg = FALSE;
|
||||
cblas_dsymv(CblasColMajor, CblasUpper, 0,
|
||||
cblas_dsymv(CblasColMajor, CblasUpper, 0,
|
||||
ALPHA, A, 1, X, 0, BETA, Y, 1 );
|
||||
chkxer();
|
||||
cblas_info = 11; RowMajorStrg = FALSE;
|
||||
cblas_dsymv(CblasColMajor, CblasUpper, 0,
|
||||
cblas_dsymv(CblasColMajor, CblasUpper, 0,
|
||||
ALPHA, A, 1, X, 1, BETA, Y, 0 );
|
||||
chkxer();
|
||||
cblas_info = 2; RowMajorStrg = TRUE;
|
||||
cblas_dsymv(CblasRowMajor, INVALID, 0,
|
||||
cblas_dsymv(CblasRowMajor, INVALID, 0,
|
||||
ALPHA, A, 1, X, 1, BETA, Y, 1 );
|
||||
chkxer();
|
||||
cblas_info = 3; RowMajorStrg = TRUE;
|
||||
cblas_dsymv(CblasRowMajor, CblasUpper, INVALID,
|
||||
cblas_dsymv(CblasRowMajor, CblasUpper, INVALID,
|
||||
ALPHA, A, 1, X, 1, BETA, Y, 1 );
|
||||
chkxer();
|
||||
cblas_info = 6; RowMajorStrg = TRUE;
|
||||
cblas_dsymv(CblasRowMajor, CblasUpper, 2,
|
||||
cblas_dsymv(CblasRowMajor, CblasUpper, 2,
|
||||
ALPHA, A, 1, X, 1, BETA, Y, 1 );
|
||||
chkxer();
|
||||
cblas_info = 8; RowMajorStrg = TRUE;
|
||||
cblas_dsymv(CblasRowMajor, CblasUpper, 0,
|
||||
cblas_dsymv(CblasRowMajor, CblasUpper, 0,
|
||||
ALPHA, A, 1, X, 0, BETA, Y, 1 );
|
||||
chkxer();
|
||||
cblas_info = 11; RowMajorStrg = TRUE;
|
||||
cblas_dsymv(CblasRowMajor, CblasUpper, 0,
|
||||
cblas_dsymv(CblasRowMajor, CblasUpper, 0,
|
||||
ALPHA, A, 1, X, 1, BETA, Y, 0 );
|
||||
chkxer();
|
||||
} else if (strncmp( sf,"cblas_dsbmv",11)==0) {
|
||||
cblas_rout = "cblas_dsbmv";
|
||||
cblas_info = 1; RowMajorStrg = FALSE;
|
||||
cblas_dsbmv(INVALID, CblasUpper, 0, 0,
|
||||
cblas_dsbmv(INVALID, CblasUpper, 0, 0,
|
||||
ALPHA, A, 1, X, 1, BETA, Y, 1 );
|
||||
chkxer();
|
||||
cblas_info = 2; RowMajorStrg = FALSE;
|
||||
cblas_dsbmv(CblasColMajor, INVALID, 0, 0,
|
||||
cblas_dsbmv(CblasColMajor, INVALID, 0, 0,
|
||||
ALPHA, A, 1, X, 1, BETA, Y, 1 );
|
||||
chkxer();
|
||||
cblas_info = 3; RowMajorStrg = FALSE;
|
||||
cblas_dsbmv(CblasColMajor, CblasUpper, INVALID, 0,
|
||||
cblas_dsbmv(CblasColMajor, CblasUpper, INVALID, 0,
|
||||
ALPHA, A, 1, X, 1, BETA, Y, 1 );
|
||||
chkxer();
|
||||
cblas_info = 4; RowMajorStrg = FALSE;
|
||||
cblas_dsbmv(CblasColMajor, CblasUpper, 0, INVALID,
|
||||
cblas_dsbmv(CblasColMajor, CblasUpper, 0, INVALID,
|
||||
ALPHA, A, 1, X, 1, BETA, Y, 1 );
|
||||
chkxer();
|
||||
cblas_info = 7; RowMajorStrg = FALSE;
|
||||
cblas_dsbmv(CblasColMajor, CblasUpper, 0, 1,
|
||||
cblas_dsbmv(CblasColMajor, CblasUpper, 0, 1,
|
||||
ALPHA, A, 1, X, 1, BETA, Y, 1 );
|
||||
chkxer();
|
||||
cblas_info = 9; RowMajorStrg = FALSE;
|
||||
cblas_dsbmv(CblasColMajor, CblasUpper, 0, 0,
|
||||
cblas_dsbmv(CblasColMajor, CblasUpper, 0, 0,
|
||||
ALPHA, A, 1, X, 0, BETA, Y, 1 );
|
||||
chkxer();
|
||||
cblas_info = 12; RowMajorStrg = FALSE;
|
||||
cblas_dsbmv(CblasColMajor, CblasUpper, 0, 0,
|
||||
cblas_dsbmv(CblasColMajor, CblasUpper, 0, 0,
|
||||
ALPHA, A, 1, X, 1, BETA, Y, 0 );
|
||||
chkxer();
|
||||
cblas_info = 2; RowMajorStrg = TRUE;
|
||||
cblas_dsbmv(CblasRowMajor, INVALID, 0, 0,
|
||||
cblas_dsbmv(CblasRowMajor, INVALID, 0, 0,
|
||||
ALPHA, A, 1, X, 1, BETA, Y, 1 );
|
||||
chkxer();
|
||||
cblas_info = 3; RowMajorStrg = TRUE;
|
||||
cblas_dsbmv(CblasRowMajor, CblasUpper, INVALID, 0,
|
||||
cblas_dsbmv(CblasRowMajor, CblasUpper, INVALID, 0,
|
||||
ALPHA, A, 1, X, 1, BETA, Y, 1 );
|
||||
chkxer();
|
||||
cblas_info = 4; RowMajorStrg = TRUE;
|
||||
cblas_dsbmv(CblasRowMajor, CblasUpper, 0, INVALID,
|
||||
cblas_dsbmv(CblasRowMajor, CblasUpper, 0, INVALID,
|
||||
ALPHA, A, 1, X, 1, BETA, Y, 1 );
|
||||
chkxer();
|
||||
cblas_info = 7; RowMajorStrg = TRUE;
|
||||
cblas_dsbmv(CblasRowMajor, CblasUpper, 0, 1,
|
||||
cblas_dsbmv(CblasRowMajor, CblasUpper, 0, 1,
|
||||
ALPHA, A, 1, X, 1, BETA, Y, 1 );
|
||||
chkxer();
|
||||
cblas_info = 9; RowMajorStrg = TRUE;
|
||||
cblas_dsbmv(CblasRowMajor, CblasUpper, 0, 0,
|
||||
cblas_dsbmv(CblasRowMajor, CblasUpper, 0, 0,
|
||||
ALPHA, A, 1, X, 0, BETA, Y, 1 );
|
||||
chkxer();
|
||||
cblas_info = 12; RowMajorStrg = TRUE;
|
||||
cblas_dsbmv(CblasRowMajor, CblasUpper, 0, 0,
|
||||
cblas_dsbmv(CblasRowMajor, CblasUpper, 0, 0,
|
||||
ALPHA, A, 1, X, 1, BETA, Y, 0 );
|
||||
chkxer();
|
||||
} else if (strncmp( sf,"cblas_dspmv",11)==0) {
|
||||
cblas_rout = "cblas_dspmv";
|
||||
cblas_info = 1; RowMajorStrg = FALSE;
|
||||
cblas_dspmv(INVALID, CblasUpper, 0,
|
||||
cblas_dspmv(INVALID, CblasUpper, 0,
|
||||
ALPHA, A, X, 1, BETA, Y, 1 );
|
||||
chkxer();
|
||||
cblas_info = 2; RowMajorStrg = FALSE;
|
||||
cblas_dspmv(CblasColMajor, INVALID, 0,
|
||||
cblas_dspmv(CblasColMajor, INVALID, 0,
|
||||
ALPHA, A, X, 1, BETA, Y, 1 );
|
||||
chkxer();
|
||||
cblas_info = 3; RowMajorStrg = FALSE;
|
||||
cblas_dspmv(CblasColMajor, CblasUpper, INVALID,
|
||||
cblas_dspmv(CblasColMajor, CblasUpper, INVALID,
|
||||
ALPHA, A, X, 1, BETA, Y, 1 );
|
||||
chkxer();
|
||||
cblas_info = 7; RowMajorStrg = FALSE;
|
||||
cblas_dspmv(CblasColMajor, CblasUpper, 0,
|
||||
cblas_dspmv(CblasColMajor, CblasUpper, 0,
|
||||
ALPHA, A, X, 0, BETA, Y, 1 );
|
||||
chkxer();
|
||||
cblas_info = 10; RowMajorStrg = FALSE;
|
||||
cblas_dspmv(CblasColMajor, CblasUpper, 0,
|
||||
cblas_dspmv(CblasColMajor, CblasUpper, 0,
|
||||
ALPHA, A, X, 1, BETA, Y, 0 );
|
||||
chkxer();
|
||||
cblas_info = 2; RowMajorStrg = TRUE;
|
||||
cblas_dspmv(CblasRowMajor, INVALID, 0,
|
||||
cblas_dspmv(CblasRowMajor, INVALID, 0,
|
||||
ALPHA, A, X, 1, BETA, Y, 1 );
|
||||
chkxer();
|
||||
cblas_info = 3; RowMajorStrg = TRUE;
|
||||
cblas_dspmv(CblasRowMajor, CblasUpper, INVALID,
|
||||
cblas_dspmv(CblasRowMajor, CblasUpper, INVALID,
|
||||
ALPHA, A, X, 1, BETA, Y, 1 );
|
||||
chkxer();
|
||||
cblas_info = 7; RowMajorStrg = TRUE;
|
||||
cblas_dspmv(CblasRowMajor, CblasUpper, 0,
|
||||
cblas_dspmv(CblasRowMajor, CblasUpper, 0,
|
||||
ALPHA, A, X, 0, BETA, Y, 1 );
|
||||
chkxer();
|
||||
cblas_info = 10; RowMajorStrg = TRUE;
|
||||
cblas_dspmv(CblasRowMajor, CblasUpper, 0,
|
||||
cblas_dspmv(CblasRowMajor, CblasUpper, 0,
|
||||
ALPHA, A, X, 1, BETA, Y, 0 );
|
||||
chkxer();
|
||||
} else if (strncmp( sf,"cblas_dtrmv",11)==0) {
|
||||
cblas_rout = "cblas_dtrmv";
|
||||
cblas_info = 1; RowMajorStrg = FALSE;
|
||||
cblas_dtrmv(INVALID, CblasUpper, CblasNoTrans,
|
||||
cblas_dtrmv(INVALID, CblasUpper, CblasNoTrans,
|
||||
CblasNonUnit, 0, A, 1, X, 1 );
|
||||
chkxer();
|
||||
cblas_info = 2; RowMajorStrg = FALSE;
|
||||
cblas_dtrmv(CblasColMajor, INVALID, CblasNoTrans,
|
||||
cblas_dtrmv(CblasColMajor, INVALID, CblasNoTrans,
|
||||
CblasNonUnit, 0, A, 1, X, 1 );
|
||||
chkxer();
|
||||
cblas_info = 3; RowMajorStrg = FALSE;
|
||||
cblas_dtrmv(CblasColMajor, CblasUpper, INVALID,
|
||||
cblas_dtrmv(CblasColMajor, CblasUpper, INVALID,
|
||||
CblasNonUnit, 0, A, 1, X, 1 );
|
||||
chkxer();
|
||||
cblas_info = 4; RowMajorStrg = FALSE;
|
||||
cblas_dtrmv(CblasColMajor, CblasUpper, CblasNoTrans,
|
||||
cblas_dtrmv(CblasColMajor, CblasUpper, CblasNoTrans,
|
||||
INVALID, 0, A, 1, X, 1 );
|
||||
chkxer();
|
||||
cblas_info = 5; RowMajorStrg = FALSE;
|
||||
cblas_dtrmv(CblasColMajor, CblasUpper, CblasNoTrans,
|
||||
cblas_dtrmv(CblasColMajor, CblasUpper, CblasNoTrans,
|
||||
CblasNonUnit, INVALID, A, 1, X, 1 );
|
||||
chkxer();
|
||||
cblas_info = 7; RowMajorStrg = FALSE;
|
||||
cblas_dtrmv(CblasColMajor, CblasUpper, CblasNoTrans,
|
||||
cblas_dtrmv(CblasColMajor, CblasUpper, CblasNoTrans,
|
||||
CblasNonUnit, 2, A, 1, X, 1 );
|
||||
chkxer();
|
||||
cblas_info = 9; RowMajorStrg = FALSE;
|
||||
cblas_dtrmv(CblasColMajor, CblasUpper, CblasNoTrans,
|
||||
cblas_dtrmv(CblasColMajor, CblasUpper, CblasNoTrans,
|
||||
CblasNonUnit, 0, A, 1, X, 0 );
|
||||
chkxer();
|
||||
cblas_info = 2; RowMajorStrg = TRUE;
|
||||
cblas_dtrmv(CblasRowMajor, INVALID, CblasNoTrans,
|
||||
cblas_dtrmv(CblasRowMajor, INVALID, CblasNoTrans,
|
||||
CblasNonUnit, 0, A, 1, X, 1 );
|
||||
chkxer();
|
||||
cblas_info = 3; RowMajorStrg = TRUE;
|
||||
cblas_dtrmv(CblasRowMajor, CblasUpper, INVALID,
|
||||
cblas_dtrmv(CblasRowMajor, CblasUpper, INVALID,
|
||||
CblasNonUnit, 0, A, 1, X, 1 );
|
||||
chkxer();
|
||||
cblas_info = 4; RowMajorStrg = TRUE;
|
||||
cblas_dtrmv(CblasRowMajor, CblasUpper, CblasNoTrans,
|
||||
cblas_dtrmv(CblasRowMajor, CblasUpper, CblasNoTrans,
|
||||
INVALID, 0, A, 1, X, 1 );
|
||||
chkxer();
|
||||
cblas_info = 5; RowMajorStrg = TRUE;
|
||||
cblas_dtrmv(CblasRowMajor, CblasUpper, CblasNoTrans,
|
||||
cblas_dtrmv(CblasRowMajor, CblasUpper, CblasNoTrans,
|
||||
CblasNonUnit, INVALID, A, 1, X, 1 );
|
||||
chkxer();
|
||||
cblas_info = 7; RowMajorStrg = TRUE;
|
||||
cblas_dtrmv(CblasRowMajor, CblasUpper, CblasNoTrans,
|
||||
cblas_dtrmv(CblasRowMajor, CblasUpper, CblasNoTrans,
|
||||
CblasNonUnit, 2, A, 1, X, 1 );
|
||||
chkxer();
|
||||
cblas_info = 9; RowMajorStrg = TRUE;
|
||||
cblas_dtrmv(CblasRowMajor, CblasUpper, CblasNoTrans,
|
||||
cblas_dtrmv(CblasRowMajor, CblasUpper, CblasNoTrans,
|
||||
CblasNonUnit, 0, A, 1, X, 0 );
|
||||
chkxer();
|
||||
} else if (strncmp( sf,"cblas_dtbmv",11)==0) {
|
||||
cblas_rout = "cblas_dtbmv";
|
||||
cblas_info = 1; RowMajorStrg = FALSE;
|
||||
cblas_dtbmv(INVALID, CblasUpper, CblasNoTrans,
|
||||
cblas_dtbmv(INVALID, CblasUpper, CblasNoTrans,
|
||||
CblasNonUnit, 0, 0, A, 1, X, 1 );
|
||||
chkxer();
|
||||
cblas_info = 2; RowMajorStrg = FALSE;
|
||||
cblas_dtbmv(CblasColMajor, INVALID, CblasNoTrans,
|
||||
cblas_dtbmv(CblasColMajor, INVALID, CblasNoTrans,
|
||||
CblasNonUnit, 0, 0, A, 1, X, 1 );
|
||||
chkxer();
|
||||
cblas_info = 3; RowMajorStrg = FALSE;
|
||||
cblas_dtbmv(CblasColMajor, CblasUpper, INVALID,
|
||||
cblas_dtbmv(CblasColMajor, CblasUpper, INVALID,
|
||||
CblasNonUnit, 0, 0, A, 1, X, 1 );
|
||||
chkxer();
|
||||
cblas_info = 4; RowMajorStrg = FALSE;
|
||||
cblas_dtbmv(CblasColMajor, CblasUpper, CblasNoTrans,
|
||||
cblas_dtbmv(CblasColMajor, CblasUpper, CblasNoTrans,
|
||||
INVALID, 0, 0, A, 1, X, 1 );
|
||||
chkxer();
|
||||
cblas_info = 5; RowMajorStrg = FALSE;
|
||||
cblas_dtbmv(CblasColMajor, CblasUpper, CblasNoTrans,
|
||||
cblas_dtbmv(CblasColMajor, CblasUpper, CblasNoTrans,
|
||||
CblasNonUnit, INVALID, 0, A, 1, X, 1 );
|
||||
chkxer();
|
||||
cblas_info = 6; RowMajorStrg = FALSE;
|
||||
cblas_dtbmv(CblasColMajor, CblasUpper, CblasNoTrans,
|
||||
cblas_dtbmv(CblasColMajor, CblasUpper, CblasNoTrans,
|
||||
CblasNonUnit, 0, INVALID, A, 1, X, 1 );
|
||||
chkxer();
|
||||
cblas_info = 8; RowMajorStrg = FALSE;
|
||||
cblas_dtbmv(CblasColMajor, CblasUpper, CblasNoTrans,
|
||||
cblas_dtbmv(CblasColMajor, CblasUpper, CblasNoTrans,
|
||||
CblasNonUnit, 0, 1, A, 1, X, 1 );
|
||||
chkxer();
|
||||
cblas_info = 10; RowMajorStrg = FALSE;
|
||||
cblas_dtbmv(CblasColMajor, CblasUpper, CblasNoTrans,
|
||||
cblas_dtbmv(CblasColMajor, CblasUpper, CblasNoTrans,
|
||||
CblasNonUnit, 0, 0, A, 1, X, 0 );
|
||||
chkxer();
|
||||
cblas_info = 2; RowMajorStrg = TRUE;
|
||||
cblas_dtbmv(CblasRowMajor, INVALID, CblasNoTrans,
|
||||
cblas_dtbmv(CblasRowMajor, INVALID, CblasNoTrans,
|
||||
CblasNonUnit, 0, 0, A, 1, X, 1 );
|
||||
chkxer();
|
||||
cblas_info = 3; RowMajorStrg = TRUE;
|
||||
cblas_dtbmv(CblasRowMajor, CblasUpper, INVALID,
|
||||
cblas_dtbmv(CblasRowMajor, CblasUpper, INVALID,
|
||||
CblasNonUnit, 0, 0, A, 1, X, 1 );
|
||||
chkxer();
|
||||
cblas_info = 4; RowMajorStrg = TRUE;
|
||||
cblas_dtbmv(CblasRowMajor, CblasUpper, CblasNoTrans,
|
||||
cblas_dtbmv(CblasRowMajor, CblasUpper, CblasNoTrans,
|
||||
INVALID, 0, 0, A, 1, X, 1 );
|
||||
chkxer();
|
||||
cblas_info = 5; RowMajorStrg = TRUE;
|
||||
cblas_dtbmv(CblasRowMajor, CblasUpper, CblasNoTrans,
|
||||
cblas_dtbmv(CblasRowMajor, CblasUpper, CblasNoTrans,
|
||||
CblasNonUnit, INVALID, 0, A, 1, X, 1 );
|
||||
chkxer();
|
||||
cblas_info = 6; RowMajorStrg = TRUE;
|
||||
cblas_dtbmv(CblasRowMajor, CblasUpper, CblasNoTrans,
|
||||
cblas_dtbmv(CblasRowMajor, CblasUpper, CblasNoTrans,
|
||||
CblasNonUnit, 0, INVALID, A, 1, X, 1 );
|
||||
chkxer();
|
||||
cblas_info = 8; RowMajorStrg = TRUE;
|
||||
cblas_dtbmv(CblasRowMajor, CblasUpper, CblasNoTrans,
|
||||
cblas_dtbmv(CblasRowMajor, CblasUpper, CblasNoTrans,
|
||||
CblasNonUnit, 0, 1, A, 1, X, 1 );
|
||||
chkxer();
|
||||
cblas_info = 10; RowMajorStrg = TRUE;
|
||||
cblas_dtbmv(CblasRowMajor, CblasUpper, CblasNoTrans,
|
||||
cblas_dtbmv(CblasRowMajor, CblasUpper, CblasNoTrans,
|
||||
CblasNonUnit, 0, 0, A, 1, X, 0 );
|
||||
chkxer();
|
||||
} else if (strncmp( sf,"cblas_dtpmv",11)==0) {
|
||||
cblas_rout = "cblas_dtpmv";
|
||||
cblas_info = 1; RowMajorStrg = FALSE;
|
||||
cblas_dtpmv(INVALID, CblasUpper, CblasNoTrans,
|
||||
cblas_dtpmv(INVALID, CblasUpper, CblasNoTrans,
|
||||
CblasNonUnit, 0, A, X, 1 );
|
||||
chkxer();
|
||||
cblas_info = 2; RowMajorStrg = FALSE;
|
||||
cblas_dtpmv(CblasColMajor, INVALID, CblasNoTrans,
|
||||
cblas_dtpmv(CblasColMajor, INVALID, CblasNoTrans,
|
||||
CblasNonUnit, 0, A, X, 1 );
|
||||
chkxer();
|
||||
cblas_info = 3; RowMajorStrg = FALSE;
|
||||
cblas_dtpmv(CblasColMajor, CblasUpper, INVALID,
|
||||
cblas_dtpmv(CblasColMajor, CblasUpper, INVALID,
|
||||
CblasNonUnit, 0, A, X, 1 );
|
||||
chkxer();
|
||||
cblas_info = 4; RowMajorStrg = FALSE;
|
||||
cblas_dtpmv(CblasColMajor, CblasUpper, CblasNoTrans,
|
||||
cblas_dtpmv(CblasColMajor, CblasUpper, CblasNoTrans,
|
||||
INVALID, 0, A, X, 1 );
|
||||
chkxer();
|
||||
cblas_info = 5; RowMajorStrg = FALSE;
|
||||
cblas_dtpmv(CblasColMajor, CblasUpper, CblasNoTrans,
|
||||
cblas_dtpmv(CblasColMajor, CblasUpper, CblasNoTrans,
|
||||
CblasNonUnit, INVALID, A, X, 1 );
|
||||
chkxer();
|
||||
cblas_info = 8; RowMajorStrg = FALSE;
|
||||
cblas_dtpmv(CblasColMajor, CblasUpper, CblasNoTrans,
|
||||
cblas_dtpmv(CblasColMajor, CblasUpper, CblasNoTrans,
|
||||
CblasNonUnit, 0, A, X, 0 );
|
||||
chkxer();
|
||||
cblas_info = 2; RowMajorStrg = TRUE;
|
||||
cblas_dtpmv(CblasRowMajor, INVALID, CblasNoTrans,
|
||||
cblas_dtpmv(CblasRowMajor, INVALID, CblasNoTrans,
|
||||
CblasNonUnit, 0, A, X, 1 );
|
||||
chkxer();
|
||||
cblas_info = 3; RowMajorStrg = TRUE;
|
||||
cblas_dtpmv(CblasRowMajor, CblasUpper, INVALID,
|
||||
cblas_dtpmv(CblasRowMajor, CblasUpper, INVALID,
|
||||
CblasNonUnit, 0, A, X, 1 );
|
||||
chkxer();
|
||||
cblas_info = 4; RowMajorStrg = TRUE;
|
||||
cblas_dtpmv(CblasRowMajor, CblasUpper, CblasNoTrans,
|
||||
cblas_dtpmv(CblasRowMajor, CblasUpper, CblasNoTrans,
|
||||
INVALID, 0, A, X, 1 );
|
||||
chkxer();
|
||||
cblas_info = 5; RowMajorStrg = TRUE;
|
||||
cblas_dtpmv(CblasRowMajor, CblasUpper, CblasNoTrans,
|
||||
cblas_dtpmv(CblasRowMajor, CblasUpper, CblasNoTrans,
|
||||
CblasNonUnit, INVALID, A, X, 1 );
|
||||
chkxer();
|
||||
cblas_info = 8; RowMajorStrg = TRUE;
|
||||
cblas_dtpmv(CblasRowMajor, CblasUpper, CblasNoTrans,
|
||||
cblas_dtpmv(CblasRowMajor, CblasUpper, CblasNoTrans,
|
||||
CblasNonUnit, 0, A, X, 0 );
|
||||
chkxer();
|
||||
} else if (strncmp( sf,"cblas_dtrsv",11)==0) {
|
||||
cblas_rout = "cblas_dtrsv";
|
||||
cblas_info = 1; RowMajorStrg = FALSE;
|
||||
cblas_dtrsv(INVALID, CblasUpper, CblasNoTrans,
|
||||
cblas_dtrsv(INVALID, CblasUpper, CblasNoTrans,
|
||||
CblasNonUnit, 0, A, 1, X, 1 );
|
||||
chkxer();
|
||||
cblas_info = 2; RowMajorStrg = FALSE;
|
||||
cblas_dtrsv(CblasColMajor, INVALID, CblasNoTrans,
|
||||
cblas_dtrsv(CblasColMajor, INVALID, CblasNoTrans,
|
||||
CblasNonUnit, 0, A, 1, X, 1 );
|
||||
chkxer();
|
||||
cblas_info = 3; RowMajorStrg = FALSE;
|
||||
cblas_dtrsv(CblasColMajor, CblasUpper, INVALID,
|
||||
cblas_dtrsv(CblasColMajor, CblasUpper, INVALID,
|
||||
CblasNonUnit, 0, A, 1, X, 1 );
|
||||
chkxer();
|
||||
cblas_info = 4; RowMajorStrg = FALSE;
|
||||
cblas_dtrsv(CblasColMajor, CblasUpper, CblasNoTrans,
|
||||
cblas_dtrsv(CblasColMajor, CblasUpper, CblasNoTrans,
|
||||
INVALID, 0, A, 1, X, 1 );
|
||||
chkxer();
|
||||
cblas_info = 5; RowMajorStrg = FALSE;
|
||||
cblas_dtrsv(CblasColMajor, CblasUpper, CblasNoTrans,
|
||||
cblas_dtrsv(CblasColMajor, CblasUpper, CblasNoTrans,
|
||||
CblasNonUnit, INVALID, A, 1, X, 1 );
|
||||
chkxer();
|
||||
cblas_info = 7; RowMajorStrg = FALSE;
|
||||
cblas_dtrsv(CblasColMajor, CblasUpper, CblasNoTrans,
|
||||
cblas_dtrsv(CblasColMajor, CblasUpper, CblasNoTrans,
|
||||
CblasNonUnit, 2, A, 1, X, 1 );
|
||||
chkxer();
|
||||
cblas_info = 9; RowMajorStrg = FALSE;
|
||||
cblas_dtrsv(CblasColMajor, CblasUpper, CblasNoTrans,
|
||||
cblas_dtrsv(CblasColMajor, CblasUpper, CblasNoTrans,
|
||||
CblasNonUnit, 0, A, 1, X, 0 );
|
||||
chkxer();
|
||||
cblas_info = 2; RowMajorStrg = TRUE;
|
||||
cblas_dtrsv(CblasRowMajor, INVALID, CblasNoTrans,
|
||||
cblas_dtrsv(CblasRowMajor, INVALID, CblasNoTrans,
|
||||
CblasNonUnit, 0, A, 1, X, 1 );
|
||||
chkxer();
|
||||
cblas_info = 3; RowMajorStrg = TRUE;
|
||||
cblas_dtrsv(CblasRowMajor, CblasUpper, INVALID,
|
||||
cblas_dtrsv(CblasRowMajor, CblasUpper, INVALID,
|
||||
CblasNonUnit, 0, A, 1, X, 1 );
|
||||
chkxer();
|
||||
cblas_info = 4; RowMajorStrg = TRUE;
|
||||
cblas_dtrsv(CblasRowMajor, CblasUpper, CblasNoTrans,
|
||||
cblas_dtrsv(CblasRowMajor, CblasUpper, CblasNoTrans,
|
||||
INVALID, 0, A, 1, X, 1 );
|
||||
chkxer();
|
||||
cblas_info = 5; RowMajorStrg = TRUE;
|
||||
cblas_dtrsv(CblasRowMajor, CblasUpper, CblasNoTrans,
|
||||
cblas_dtrsv(CblasRowMajor, CblasUpper, CblasNoTrans,
|
||||
CblasNonUnit, INVALID, A, 1, X, 1 );
|
||||
chkxer();
|
||||
cblas_info = 7; RowMajorStrg = TRUE;
|
||||
cblas_dtrsv(CblasRowMajor, CblasUpper, CblasNoTrans,
|
||||
cblas_dtrsv(CblasRowMajor, CblasUpper, CblasNoTrans,
|
||||
CblasNonUnit, 2, A, 1, X, 1 );
|
||||
chkxer();
|
||||
cblas_info = 9; RowMajorStrg = TRUE;
|
||||
cblas_dtrsv(CblasRowMajor, CblasUpper, CblasNoTrans,
|
||||
cblas_dtrsv(CblasRowMajor, CblasUpper, CblasNoTrans,
|
||||
CblasNonUnit, 0, A, 1, X, 0 );
|
||||
chkxer();
|
||||
} else if (strncmp( sf,"cblas_dtbsv",11)==0) {
|
||||
cblas_rout = "cblas_dtbsv";
|
||||
cblas_info = 1; RowMajorStrg = FALSE;
|
||||
cblas_dtbsv(INVALID, CblasUpper, CblasNoTrans,
|
||||
cblas_dtbsv(INVALID, CblasUpper, CblasNoTrans,
|
||||
CblasNonUnit, 0, 0, A, 1, X, 1 );
|
||||
chkxer();
|
||||
cblas_info = 2; RowMajorStrg = FALSE;
|
||||
cblas_dtbsv(CblasColMajor, INVALID, CblasNoTrans,
|
||||
cblas_dtbsv(CblasColMajor, INVALID, CblasNoTrans,
|
||||
CblasNonUnit, 0, 0, A, 1, X, 1 );
|
||||
chkxer();
|
||||
cblas_info = 3; RowMajorStrg = FALSE;
|
||||
cblas_dtbsv(CblasColMajor, CblasUpper, INVALID,
|
||||
cblas_dtbsv(CblasColMajor, CblasUpper, INVALID,
|
||||
CblasNonUnit, 0, 0, A, 1, X, 1 );
|
||||
chkxer();
|
||||
cblas_info = 4; RowMajorStrg = FALSE;
|
||||
cblas_dtbsv(CblasColMajor, CblasUpper, CblasNoTrans,
|
||||
cblas_dtbsv(CblasColMajor, CblasUpper, CblasNoTrans,
|
||||
INVALID, 0, 0, A, 1, X, 1 );
|
||||
chkxer();
|
||||
cblas_info = 5; RowMajorStrg = FALSE;
|
||||
cblas_dtbsv(CblasColMajor, CblasUpper, CblasNoTrans,
|
||||
cblas_dtbsv(CblasColMajor, CblasUpper, CblasNoTrans,
|
||||
CblasNonUnit, INVALID, 0, A, 1, X, 1 );
|
||||
chkxer();
|
||||
cblas_info = 6; RowMajorStrg = FALSE;
|
||||
cblas_dtbsv(CblasColMajor, CblasUpper, CblasNoTrans,
|
||||
cblas_dtbsv(CblasColMajor, CblasUpper, CblasNoTrans,
|
||||
CblasNonUnit, 0, INVALID, A, 1, X, 1 );
|
||||
chkxer();
|
||||
cblas_info = 8; RowMajorStrg = FALSE;
|
||||
cblas_dtbsv(CblasColMajor, CblasUpper, CblasNoTrans,
|
||||
cblas_dtbsv(CblasColMajor, CblasUpper, CblasNoTrans,
|
||||
CblasNonUnit, 0, 1, A, 1, X, 1 );
|
||||
chkxer();
|
||||
cblas_info = 10; RowMajorStrg = FALSE;
|
||||
cblas_dtbsv(CblasColMajor, CblasUpper, CblasNoTrans,
|
||||
cblas_dtbsv(CblasColMajor, CblasUpper, CblasNoTrans,
|
||||
CblasNonUnit, 0, 0, A, 1, X, 0 );
|
||||
chkxer();
|
||||
cblas_info = 2; RowMajorStrg = TRUE;
|
||||
cblas_dtbsv(CblasRowMajor, INVALID, CblasNoTrans,
|
||||
cblas_dtbsv(CblasRowMajor, INVALID, CblasNoTrans,
|
||||
CblasNonUnit, 0, 0, A, 1, X, 1 );
|
||||
chkxer();
|
||||
cblas_info = 3; RowMajorStrg = TRUE;
|
||||
cblas_dtbsv(CblasRowMajor, CblasUpper, INVALID,
|
||||
cblas_dtbsv(CblasRowMajor, CblasUpper, INVALID,
|
||||
CblasNonUnit, 0, 0, A, 1, X, 1 );
|
||||
chkxer();
|
||||
cblas_info = 4; RowMajorStrg = TRUE;
|
||||
cblas_dtbsv(CblasRowMajor, CblasUpper, CblasNoTrans,
|
||||
cblas_dtbsv(CblasRowMajor, CblasUpper, CblasNoTrans,
|
||||
INVALID, 0, 0, A, 1, X, 1 );
|
||||
chkxer();
|
||||
cblas_info = 5; RowMajorStrg = TRUE;
|
||||
cblas_dtbsv(CblasRowMajor, CblasUpper, CblasNoTrans,
|
||||
cblas_dtbsv(CblasRowMajor, CblasUpper, CblasNoTrans,
|
||||
CblasNonUnit, INVALID, 0, A, 1, X, 1 );
|
||||
chkxer();
|
||||
cblas_info = 6; RowMajorStrg = TRUE;
|
||||
cblas_dtbsv(CblasRowMajor, CblasUpper, CblasNoTrans,
|
||||
cblas_dtbsv(CblasRowMajor, CblasUpper, CblasNoTrans,
|
||||
CblasNonUnit, 0, INVALID, A, 1, X, 1 );
|
||||
chkxer();
|
||||
cblas_info = 8; RowMajorStrg = TRUE;
|
||||
cblas_dtbsv(CblasRowMajor, CblasUpper, CblasNoTrans,
|
||||
cblas_dtbsv(CblasRowMajor, CblasUpper, CblasNoTrans,
|
||||
CblasNonUnit, 0, 1, A, 1, X, 1 );
|
||||
chkxer();
|
||||
cblas_info = 10; RowMajorStrg = TRUE;
|
||||
cblas_dtbsv(CblasRowMajor, CblasUpper, CblasNoTrans,
|
||||
cblas_dtbsv(CblasRowMajor, CblasUpper, CblasNoTrans,
|
||||
CblasNonUnit, 0, 0, A, 1, X, 0 );
|
||||
chkxer();
|
||||
} else if (strncmp( sf,"cblas_dtpsv",11)==0) {
|
||||
cblas_rout = "cblas_dtpsv";
|
||||
cblas_info = 1; RowMajorStrg = FALSE;
|
||||
cblas_dtpsv(INVALID, CblasUpper, CblasNoTrans,
|
||||
cblas_dtpsv(INVALID, CblasUpper, CblasNoTrans,
|
||||
CblasNonUnit, 0, A, X, 1 );
|
||||
chkxer();
|
||||
cblas_info = 2; RowMajorStrg = FALSE;
|
||||
cblas_dtpsv(CblasColMajor, INVALID, CblasNoTrans,
|
||||
cblas_dtpsv(CblasColMajor, INVALID, CblasNoTrans,
|
||||
CblasNonUnit, 0, A, X, 1 );
|
||||
chkxer();
|
||||
cblas_info = 3; RowMajorStrg = FALSE;
|
||||
cblas_dtpsv(CblasColMajor, CblasUpper, INVALID,
|
||||
cblas_dtpsv(CblasColMajor, CblasUpper, INVALID,
|
||||
CblasNonUnit, 0, A, X, 1 );
|
||||
chkxer();
|
||||
cblas_info = 4; RowMajorStrg = FALSE;
|
||||
cblas_dtpsv(CblasColMajor, CblasUpper, CblasNoTrans,
|
||||
cblas_dtpsv(CblasColMajor, CblasUpper, CblasNoTrans,
|
||||
INVALID, 0, A, X, 1 );
|
||||
chkxer();
|
||||
cblas_info = 5; RowMajorStrg = FALSE;
|
||||
cblas_dtpsv(CblasColMajor, CblasUpper, CblasNoTrans,
|
||||
cblas_dtpsv(CblasColMajor, CblasUpper, CblasNoTrans,
|
||||
CblasNonUnit, INVALID, A, X, 1 );
|
||||
chkxer();
|
||||
cblas_info = 8; RowMajorStrg = FALSE;
|
||||
cblas_dtpsv(CblasColMajor, CblasUpper, CblasNoTrans,
|
||||
cblas_dtpsv(CblasColMajor, CblasUpper, CblasNoTrans,
|
||||
CblasNonUnit, 0, A, X, 0 );
|
||||
chkxer();
|
||||
cblas_info = 2; RowMajorStrg = TRUE;
|
||||
cblas_dtpsv(CblasRowMajor, INVALID, CblasNoTrans,
|
||||
cblas_dtpsv(CblasRowMajor, INVALID, CblasNoTrans,
|
||||
CblasNonUnit, 0, A, X, 1 );
|
||||
chkxer();
|
||||
cblas_info = 3; RowMajorStrg = TRUE;
|
||||
cblas_dtpsv(CblasRowMajor, CblasUpper, INVALID,
|
||||
cblas_dtpsv(CblasRowMajor, CblasUpper, INVALID,
|
||||
CblasNonUnit, 0, A, X, 1 );
|
||||
chkxer();
|
||||
cblas_info = 4; RowMajorStrg = TRUE;
|
||||
cblas_dtpsv(CblasRowMajor, CblasUpper, CblasNoTrans,
|
||||
cblas_dtpsv(CblasRowMajor, CblasUpper, CblasNoTrans,
|
||||
INVALID, 0, A, X, 1 );
|
||||
chkxer();
|
||||
cblas_info = 5; RowMajorStrg = TRUE;
|
||||
cblas_dtpsv(CblasRowMajor, CblasUpper, CblasNoTrans,
|
||||
cblas_dtpsv(CblasRowMajor, CblasUpper, CblasNoTrans,
|
||||
CblasNonUnit, INVALID, A, X, 1 );
|
||||
chkxer();
|
||||
cblas_info = 8; RowMajorStrg = TRUE;
|
||||
cblas_dtpsv(CblasRowMajor, CblasUpper, CblasNoTrans,
|
||||
cblas_dtpsv(CblasRowMajor, CblasUpper, CblasNoTrans,
|
||||
CblasNonUnit, 0, A, X, 0 );
|
||||
chkxer();
|
||||
} else if (strncmp( sf,"cblas_dger",10)==0) {
|
||||
@@ -781,7 +781,7 @@ void F77_d2chke(char *rout) {
|
||||
cblas_info = 6; RowMajorStrg = FALSE;
|
||||
cblas_dspr(CblasColMajor, CblasUpper, 0, ALPHA, X, 0, A );
|
||||
chkxer();
|
||||
}
|
||||
}
|
||||
if (cblas_ok == TRUE)
|
||||
printf(" %-12s PASSED THE TESTS OF ERROR-EXITS\n", cblas_rout);
|
||||
else
|
||||
|
||||
@@ -26,9 +26,9 @@ void chkxer(void) {
|
||||
|
||||
void F77_d3chke(char *rout) {
|
||||
char *sf = ( rout ) ;
|
||||
double A[2] = {0.0,0.0},
|
||||
B[2] = {0.0,0.0},
|
||||
C[2] = {0.0,0.0},
|
||||
double A[2] = {0.0,0.0},
|
||||
B[2] = {0.0,0.0},
|
||||
C[2] = {0.0,0.0},
|
||||
ALPHA=0.0, BETA=0.0;
|
||||
extern int cblas_info, cblas_lerr, cblas_ok;
|
||||
extern int RowMajorStrg;
|
||||
@@ -39,7 +39,7 @@ void F77_d3chke(char *rout) {
|
||||
cblas_xerbla(cblas_info,cblas_rout,"");
|
||||
F77_xerbla(cblas_rout,&cblas_info);
|
||||
}
|
||||
|
||||
|
||||
cblas_ok = TRUE ;
|
||||
cblas_lerr = PASSED ;
|
||||
|
||||
@@ -47,15 +47,15 @@ void F77_d3chke(char *rout) {
|
||||
cblas_rout = "cblas_dgemm" ;
|
||||
|
||||
cblas_info = 1;
|
||||
cblas_dgemm( INVALID, CblasNoTrans, CblasNoTrans, 0, 0, 0,
|
||||
cblas_dgemm( INVALID, CblasNoTrans, CblasNoTrans, 0, 0, 0,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 1;
|
||||
cblas_dgemm( INVALID, CblasNoTrans, CblasTrans, 0, 0, 0,
|
||||
cblas_dgemm( INVALID, CblasNoTrans, CblasTrans, 0, 0, 0,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 1;
|
||||
cblas_dgemm( INVALID, CblasTrans, CblasNoTrans, 0, 0, 0,
|
||||
cblas_dgemm( INVALID, CblasTrans, CblasNoTrans, 0, 0, 0,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 1;
|
||||
|
||||
@@ -21,7 +21,7 @@ void F77_daxpy(const int *N, const double *alpha, const double *X,
|
||||
return;
|
||||
}
|
||||
|
||||
void F77_dcopy(const int *N, double *X, const int *incX,
|
||||
void F77_dcopy(const int *N, double *X, const int *incX,
|
||||
double *Y, const int *incY)
|
||||
{
|
||||
cblas_dcopy(*N, X, *incX, Y, *incY);
|
||||
|
||||
@@ -8,8 +8,8 @@
|
||||
#include "common.h"
|
||||
#include "cblas_test.h"
|
||||
|
||||
void F77_dgemv(int *order, char *transp, int *m, int *n, double *alpha,
|
||||
double *a, int *lda, double *x, int *incx, double *beta,
|
||||
void F77_dgemv(int *order, char *transp, int *m, int *n, double *alpha,
|
||||
double *a, int *lda, double *x, int *incx, double *beta,
|
||||
double *y, int *incy ) {
|
||||
|
||||
double *A;
|
||||
@@ -23,7 +23,7 @@ void F77_dgemv(int *order, char *transp, int *m, int *n, double *alpha,
|
||||
for( i=0; i<*m; i++ )
|
||||
for( j=0; j<*n; j++ )
|
||||
A[ LDA*i+j ]=a[ (*lda)*j+i ];
|
||||
cblas_dgemv( CblasRowMajor, trans,
|
||||
cblas_dgemv( CblasRowMajor, trans,
|
||||
*m, *n, *alpha, A, LDA, x, *incx, *beta, y, *incy );
|
||||
free(A);
|
||||
}
|
||||
@@ -68,9 +68,9 @@ void F77_dtrmv(int *order, char *uplow, char *transp, char *diagn,
|
||||
enum CBLAS_UPLO uplo;
|
||||
enum CBLAS_DIAG diag;
|
||||
|
||||
get_transpose_type(transp,&trans);
|
||||
get_uplo_type(uplow,&uplo);
|
||||
get_diag_type(diagn,&diag);
|
||||
get_transpose_type(transp,&trans);
|
||||
get_uplo_type(uplow,&uplo);
|
||||
get_diag_type(diagn,&diag);
|
||||
|
||||
if (*order == TEST_ROW_MJR) {
|
||||
LDA = *n+1;
|
||||
@@ -88,7 +88,7 @@ void F77_dtrmv(int *order, char *uplow, char *transp, char *diagn,
|
||||
}
|
||||
}
|
||||
|
||||
void F77_dtrsv(int *order, char *uplow, char *transp, char *diagn,
|
||||
void F77_dtrsv(int *order, char *uplow, char *transp, char *diagn,
|
||||
int *n, double *a, int *lda, double *x, int *incx ) {
|
||||
double *A;
|
||||
int i,j,LDA;
|
||||
@@ -112,7 +112,7 @@ void F77_dtrsv(int *order, char *uplow, char *transp, char *diagn,
|
||||
else
|
||||
cblas_dtrsv(CblasColMajor, uplo, trans, diag, *n, a, *lda, x, *incx );
|
||||
}
|
||||
void F77_dsymv(int *order, char *uplow, int *n, double *alpha, double *a,
|
||||
void F77_dsymv(int *order, char *uplow, int *n, double *alpha, double *a,
|
||||
int *lda, double *x, int *incx, double *beta, double *y,
|
||||
int *incy) {
|
||||
double *A;
|
||||
@@ -136,7 +136,7 @@ void F77_dsymv(int *order, char *uplow, int *n, double *alpha, double *a,
|
||||
*beta, y, *incy );
|
||||
}
|
||||
|
||||
void F77_dsyr(int *order, char *uplow, int *n, double *alpha, double *x,
|
||||
void F77_dsyr(int *order, char *uplow, int *n, double *alpha, double *x,
|
||||
int *incx, double *a, int *lda) {
|
||||
double *A;
|
||||
int i,j,LDA;
|
||||
@@ -160,7 +160,7 @@ void F77_dsyr(int *order, char *uplow, int *n, double *alpha, double *x,
|
||||
cblas_dsyr(CblasColMajor, uplo, *n, *alpha, x, *incx, a, *lda);
|
||||
}
|
||||
|
||||
void F77_dsyr2(int *order, char *uplow, int *n, double *alpha, double *x,
|
||||
void F77_dsyr2(int *order, char *uplow, int *n, double *alpha, double *x,
|
||||
int *incx, double *y, int *incy, double *a, int *lda) {
|
||||
double *A;
|
||||
int i,j,LDA;
|
||||
@@ -185,7 +185,7 @@ void F77_dsyr2(int *order, char *uplow, int *n, double *alpha, double *x,
|
||||
}
|
||||
|
||||
void F77_dgbmv(int *order, char *transp, int *m, int *n, int *kl, int *ku,
|
||||
double *alpha, double *a, int *lda, double *x, int *incx,
|
||||
double *alpha, double *a, int *lda, double *x, int *incx,
|
||||
double *beta, double *y, int *incy ) {
|
||||
|
||||
double *A;
|
||||
@@ -213,7 +213,7 @@ void F77_dgbmv(int *order, char *transp, int *m, int *n, int *kl, int *ku,
|
||||
for( j=jcol; j<(*n+*kl); j++ )
|
||||
A[ LDA*j+irow ]=a[ (*lda)*(j-jcol)+i ];
|
||||
}
|
||||
cblas_dgbmv( CblasRowMajor, trans, *m, *n, *kl, *ku, *alpha,
|
||||
cblas_dgbmv( CblasRowMajor, trans, *m, *n, *kl, *ku, *alpha,
|
||||
A, LDA, x, *incx, *beta, y, *incy );
|
||||
free(A);
|
||||
}
|
||||
@@ -230,9 +230,9 @@ void F77_dtbmv(int *order, char *uplow, char *transp, char *diagn,
|
||||
enum CBLAS_UPLO uplo;
|
||||
enum CBLAS_DIAG diag;
|
||||
|
||||
get_transpose_type(transp,&trans);
|
||||
get_uplo_type(uplow,&uplo);
|
||||
get_diag_type(diagn,&diag);
|
||||
get_transpose_type(transp,&trans);
|
||||
get_uplo_type(uplow,&uplo);
|
||||
get_diag_type(diagn,&diag);
|
||||
|
||||
if (*order == TEST_ROW_MJR) {
|
||||
LDA = *k+1;
|
||||
@@ -276,9 +276,9 @@ void F77_dtbsv(int *order, char *uplow, char *transp, char *diagn,
|
||||
enum CBLAS_UPLO uplo;
|
||||
enum CBLAS_DIAG diag;
|
||||
|
||||
get_transpose_type(transp,&trans);
|
||||
get_uplo_type(uplow,&uplo);
|
||||
get_diag_type(diagn,&diag);
|
||||
get_transpose_type(transp,&trans);
|
||||
get_uplo_type(uplow,&uplo);
|
||||
get_diag_type(diagn,&diag);
|
||||
|
||||
if (*order == TEST_ROW_MJR) {
|
||||
LDA = *k+1;
|
||||
@@ -315,7 +315,7 @@ void F77_dtbsv(int *order, char *uplow, char *transp, char *diagn,
|
||||
}
|
||||
|
||||
void F77_dsbmv(int *order, char *uplow, int *n, int *k, double *alpha,
|
||||
double *a, int *lda, double *x, int *incx, double *beta,
|
||||
double *a, int *lda, double *x, int *incx, double *beta,
|
||||
double *y, int *incy) {
|
||||
double *A;
|
||||
int i,j,irow,jcol,LDA;
|
||||
@@ -387,13 +387,13 @@ void F77_dspmv(int *order, char *uplow, int *n, double *alpha, double *ap,
|
||||
for( j=0; j<i+1; j++, k++ )
|
||||
AP[ k ]=A[ LDA*i+j ];
|
||||
}
|
||||
cblas_dspmv( CblasRowMajor, uplo, *n, *alpha, AP, x, *incx, *beta, y,
|
||||
cblas_dspmv( CblasRowMajor, uplo, *n, *alpha, AP, x, *incx, *beta, y,
|
||||
*incy );
|
||||
free(A);
|
||||
free(AP);
|
||||
}
|
||||
else
|
||||
cblas_dspmv( CblasColMajor, uplo, *n, *alpha, ap, x, *incx, *beta, y,
|
||||
cblas_dspmv( CblasColMajor, uplo, *n, *alpha, ap, x, *incx, *beta, y,
|
||||
*incy );
|
||||
}
|
||||
|
||||
@@ -405,9 +405,9 @@ void F77_dtpmv(int *order, char *uplow, char *transp, char *diagn,
|
||||
enum CBLAS_UPLO uplo;
|
||||
enum CBLAS_DIAG diag;
|
||||
|
||||
get_transpose_type(transp,&trans);
|
||||
get_uplo_type(uplow,&uplo);
|
||||
get_diag_type(diagn,&diag);
|
||||
get_transpose_type(transp,&trans);
|
||||
get_uplo_type(uplow,&uplo);
|
||||
get_diag_type(diagn,&diag);
|
||||
|
||||
if (*order == TEST_ROW_MJR) {
|
||||
LDA = *n;
|
||||
@@ -445,9 +445,9 @@ void F77_dtpsv(int *order, char *uplow, char *transp, char *diagn,
|
||||
enum CBLAS_UPLO uplo;
|
||||
enum CBLAS_DIAG diag;
|
||||
|
||||
get_transpose_type(transp,&trans);
|
||||
get_uplo_type(uplow,&uplo);
|
||||
get_diag_type(diagn,&diag);
|
||||
get_transpose_type(transp,&trans);
|
||||
get_uplo_type(uplow,&uplo);
|
||||
get_diag_type(diagn,&diag);
|
||||
|
||||
if (*order == TEST_ROW_MJR) {
|
||||
LDA = *n;
|
||||
@@ -478,7 +478,7 @@ void F77_dtpsv(int *order, char *uplow, char *transp, char *diagn,
|
||||
cblas_dtpsv( CblasColMajor, uplo, trans, diag, *n, ap, x, *incx );
|
||||
}
|
||||
|
||||
void F77_dspr(int *order, char *uplow, int *n, double *alpha, double *x,
|
||||
void F77_dspr(int *order, char *uplow, int *n, double *alpha, double *x,
|
||||
int *incx, double *ap ){
|
||||
double *A, *AP;
|
||||
int i,j,k,LDA;
|
||||
@@ -530,7 +530,7 @@ void F77_dspr(int *order, char *uplow, int *n, double *alpha, double *x,
|
||||
cblas_dspr( CblasColMajor, uplo, *n, *alpha, x, *incx, ap );
|
||||
}
|
||||
|
||||
void F77_dspr2(int *order, char *uplow, int *n, double *alpha, double *x,
|
||||
void F77_dspr2(int *order, char *uplow, int *n, double *alpha, double *x,
|
||||
int *incx, double *y, int *incy, double *ap ){
|
||||
double *A, *AP;
|
||||
int i,j,k,LDA;
|
||||
|
||||
@@ -12,7 +12,7 @@
|
||||
#define TEST_ROW_MJR 1
|
||||
#define UNDEFINED -1
|
||||
|
||||
void F77_dgemm(int *order, char *transpa, char *transpb, int *m, int *n,
|
||||
void F77_dgemm(int *order, char *transpa, char *transpb, int *m, int *n,
|
||||
int *k, double *alpha, double *a, int *lda, double *b, int *ldb,
|
||||
double *beta, double *c, int *ldc ) {
|
||||
|
||||
@@ -111,7 +111,7 @@ void F77_dsymm(int *order, char *rtlf, char *uplow, int *m, int *n,
|
||||
for( j=0; j<*n; j++ )
|
||||
for( i=0; i<*m; i++ )
|
||||
C[i*LDC+j]=c[j*(*ldc)+i];
|
||||
cblas_dsymm( CblasRowMajor, side, uplo, *m, *n, *alpha, A, LDA, B, LDB,
|
||||
cblas_dsymm( CblasRowMajor, side, uplo, *m, *n, *alpha, A, LDA, B, LDB,
|
||||
*beta, C, LDC );
|
||||
for( j=0; j<*n; j++ )
|
||||
for( i=0; i<*m; i++ )
|
||||
@@ -121,15 +121,15 @@ void F77_dsymm(int *order, char *rtlf, char *uplow, int *m, int *n,
|
||||
free(C);
|
||||
}
|
||||
else if (*order == TEST_COL_MJR)
|
||||
cblas_dsymm( CblasColMajor, side, uplo, *m, *n, *alpha, a, *lda, b, *ldb,
|
||||
cblas_dsymm( CblasColMajor, side, uplo, *m, *n, *alpha, a, *lda, b, *ldb,
|
||||
*beta, c, *ldc );
|
||||
else
|
||||
cblas_dsymm( UNDEFINED, side, uplo, *m, *n, *alpha, a, *lda, b, *ldb,
|
||||
cblas_dsymm( UNDEFINED, side, uplo, *m, *n, *alpha, a, *lda, b, *ldb,
|
||||
*beta, c, *ldc );
|
||||
}
|
||||
|
||||
void F77_dsyrk(int *order, char *uplow, char *transp, int *n, int *k,
|
||||
double *alpha, double *a, int *lda,
|
||||
double *alpha, double *a, int *lda,
|
||||
double *beta, double *c, int *ldc ) {
|
||||
|
||||
int i,j,LDA,LDC;
|
||||
@@ -160,7 +160,7 @@ void F77_dsyrk(int *order, char *uplow, char *transp, int *n, int *k,
|
||||
for( i=0; i<*n; i++ )
|
||||
for( j=0; j<*n; j++ )
|
||||
C[i*LDC+j]=c[j*(*ldc)+i];
|
||||
cblas_dsyrk(CblasRowMajor, uplo, trans, *n, *k, *alpha, A, LDA, *beta,
|
||||
cblas_dsyrk(CblasRowMajor, uplo, trans, *n, *k, *alpha, A, LDA, *beta,
|
||||
C, LDC );
|
||||
for( j=0; j<*n; j++ )
|
||||
for( i=0; i<*n; i++ )
|
||||
@@ -169,10 +169,10 @@ void F77_dsyrk(int *order, char *uplow, char *transp, int *n, int *k,
|
||||
free(C);
|
||||
}
|
||||
else if (*order == TEST_COL_MJR)
|
||||
cblas_dsyrk(CblasColMajor, uplo, trans, *n, *k, *alpha, a, *lda, *beta,
|
||||
cblas_dsyrk(CblasColMajor, uplo, trans, *n, *k, *alpha, a, *lda, *beta,
|
||||
c, *ldc );
|
||||
else
|
||||
cblas_dsyrk(UNDEFINED, uplo, trans, *n, *k, *alpha, a, *lda, *beta,
|
||||
cblas_dsyrk(UNDEFINED, uplo, trans, *n, *k, *alpha, a, *lda, *beta,
|
||||
c, *ldc );
|
||||
}
|
||||
|
||||
@@ -215,7 +215,7 @@ void F77_dsyr2k(int *order, char *uplow, char *transp, int *n, int *k,
|
||||
for( i=0; i<*n; i++ )
|
||||
for( j=0; j<*n; j++ )
|
||||
C[i*LDC+j]=c[j*(*ldc)+i];
|
||||
cblas_dsyr2k(CblasRowMajor, uplo, trans, *n, *k, *alpha, A, LDA,
|
||||
cblas_dsyr2k(CblasRowMajor, uplo, trans, *n, *k, *alpha, A, LDA,
|
||||
B, LDB, *beta, C, LDC );
|
||||
for( j=0; j<*n; j++ )
|
||||
for( i=0; i<*n; i++ )
|
||||
@@ -225,14 +225,14 @@ void F77_dsyr2k(int *order, char *uplow, char *transp, int *n, int *k,
|
||||
free(C);
|
||||
}
|
||||
else if (*order == TEST_COL_MJR)
|
||||
cblas_dsyr2k(CblasColMajor, uplo, trans, *n, *k, *alpha, a, *lda,
|
||||
cblas_dsyr2k(CblasColMajor, uplo, trans, *n, *k, *alpha, a, *lda,
|
||||
b, *ldb, *beta, c, *ldc );
|
||||
else
|
||||
cblas_dsyr2k(UNDEFINED, uplo, trans, *n, *k, *alpha, a, *lda,
|
||||
cblas_dsyr2k(UNDEFINED, uplo, trans, *n, *k, *alpha, a, *lda,
|
||||
b, *ldb, *beta, c, *ldc );
|
||||
}
|
||||
void F77_dtrmm(int *order, char *rtlf, char *uplow, char *transp, char *diagn,
|
||||
int *m, int *n, double *alpha, double *a, int *lda, double *b,
|
||||
int *m, int *n, double *alpha, double *a, int *lda, double *b,
|
||||
int *ldb) {
|
||||
int i,j,LDA,LDB;
|
||||
double *A, *B;
|
||||
@@ -266,7 +266,7 @@ void F77_dtrmm(int *order, char *rtlf, char *uplow, char *transp, char *diagn,
|
||||
for( i=0; i<*m; i++ )
|
||||
for( j=0; j<*n; j++ )
|
||||
B[i*LDB+j]=b[j*(*ldb)+i];
|
||||
cblas_dtrmm(CblasRowMajor, side, uplo, trans, diag, *m, *n, *alpha,
|
||||
cblas_dtrmm(CblasRowMajor, side, uplo, trans, diag, *m, *n, *alpha,
|
||||
A, LDA, B, LDB );
|
||||
for( j=0; j<*n; j++ )
|
||||
for( i=0; i<*m; i++ )
|
||||
@@ -275,10 +275,10 @@ void F77_dtrmm(int *order, char *rtlf, char *uplow, char *transp, char *diagn,
|
||||
free(B);
|
||||
}
|
||||
else if (*order == TEST_COL_MJR)
|
||||
cblas_dtrmm(CblasColMajor, side, uplo, trans, diag, *m, *n, *alpha,
|
||||
cblas_dtrmm(CblasColMajor, side, uplo, trans, diag, *m, *n, *alpha,
|
||||
a, *lda, b, *ldb);
|
||||
else
|
||||
cblas_dtrmm(UNDEFINED, side, uplo, trans, diag, *m, *n, *alpha,
|
||||
cblas_dtrmm(UNDEFINED, side, uplo, trans, diag, *m, *n, *alpha,
|
||||
a, *lda, b, *ldb);
|
||||
}
|
||||
|
||||
@@ -317,7 +317,7 @@ void F77_dtrsm(int *order, char *rtlf, char *uplow, char *transp, char *diagn,
|
||||
for( i=0; i<*m; i++ )
|
||||
for( j=0; j<*n; j++ )
|
||||
B[i*LDB+j]=b[j*(*ldb)+i];
|
||||
cblas_dtrsm(CblasRowMajor, side, uplo, trans, diag, *m, *n, *alpha,
|
||||
cblas_dtrsm(CblasRowMajor, side, uplo, trans, diag, *m, *n, *alpha,
|
||||
A, LDA, B, LDB );
|
||||
for( j=0; j<*n; j++ )
|
||||
for( i=0; i<*m; i++ )
|
||||
@@ -326,9 +326,9 @@ void F77_dtrsm(int *order, char *rtlf, char *uplow, char *transp, char *diagn,
|
||||
free(B);
|
||||
}
|
||||
else if (*order == TEST_COL_MJR)
|
||||
cblas_dtrsm(CblasColMajor, side, uplo, trans, diag, *m, *n, *alpha,
|
||||
cblas_dtrsm(CblasColMajor, side, uplo, trans, diag, *m, *n, *alpha,
|
||||
a, *lda, b, *ldb);
|
||||
else
|
||||
cblas_dtrsm(UNDEFINED, side, uplo, trans, diag, *m, *n, *alpha,
|
||||
cblas_dtrsm(UNDEFINED, side, uplo, trans, diag, *m, *n, *alpha,
|
||||
a, *lda, b, *ldb);
|
||||
}
|
||||
|
||||
@@ -506,7 +506,7 @@
|
||||
80 CONTINUE
|
||||
MWPS(1) = 0.0
|
||||
DO 100 I = 2, 6
|
||||
MWPS(I) = 1.0
|
||||
MWPS(I) = 1.0
|
||||
100 CONTINUE
|
||||
DO 120 I = 7, 11
|
||||
MWPS(I) = -1.0
|
||||
@@ -589,7 +589,7 @@
|
||||
MWPSTX(K) = MWPTX(I,K)
|
||||
MWPSTY(K) = MWPTY(I,K)
|
||||
180 CONTINUE
|
||||
CALL DROTTEST(MWPN(I),COPYX,INCX,COPYY,INCY,MWPC(I),MWPS(I))
|
||||
CALL DROTTEST(MWPN(I),COPYX,INCX,COPYY,INCY,MWPC(I),MWPS(I))
|
||||
CALL STEST(5,COPYX,MWPSTX,MWPSTX,SFAC)
|
||||
CALL STEST(5,COPYY,MWPSTY,MWPSTY,SFAC)
|
||||
200 CONTINUE
|
||||
|
||||
@@ -572,7 +572,7 @@
|
||||
CTRANS = ' CblasNoTrans'
|
||||
ELSE IF (TRANS.EQ.'T')THEN
|
||||
CTRANS = ' CblasTrans'
|
||||
ELSE
|
||||
ELSE
|
||||
CTRANS = 'CblasConjTrans'
|
||||
END IF
|
||||
TRAN = TRANS.EQ.'T'.OR.TRANS.EQ.'C'
|
||||
@@ -921,7 +921,7 @@
|
||||
UPLO = ICH( IC: IC )
|
||||
IF (UPLO.EQ.'U')THEN
|
||||
CUPLO = ' CblasUpper'
|
||||
ELSE
|
||||
ELSE
|
||||
CUPLO = ' CblasLower'
|
||||
END IF
|
||||
*
|
||||
@@ -1215,7 +1215,7 @@
|
||||
LOGICAL LDE, LDERES
|
||||
EXTERNAL LDE, LDERES
|
||||
* .. External Subroutines ..
|
||||
EXTERNAL DMAKE, DMVCH, CDTBMV, CDTBSV, CDTPMV,
|
||||
EXTERNAL DMAKE, DMVCH, CDTBMV, CDTBSV, CDTPMV,
|
||||
$ CDTPSV, CDTRMV, CDTRSV
|
||||
* .. Intrinsic Functions ..
|
||||
INTRINSIC ABS, MAX
|
||||
@@ -1283,7 +1283,7 @@
|
||||
UPLO = ICHU( ICU: ICU )
|
||||
IF (UPLO.EQ.'U')THEN
|
||||
CUPLO = ' CblasUpper'
|
||||
ELSE
|
||||
ELSE
|
||||
CUPLO = ' CblasLower'
|
||||
END IF
|
||||
*
|
||||
@@ -1293,7 +1293,7 @@
|
||||
CTRANS = ' CblasNoTrans'
|
||||
ELSE IF (TRANS.EQ.'T')THEN
|
||||
CTRANS = ' CblasTrans'
|
||||
ELSE
|
||||
ELSE
|
||||
CTRANS = 'CblasConjTrans'
|
||||
END IF
|
||||
*
|
||||
@@ -1972,7 +1972,7 @@
|
||||
$ ALPHA, INCX, LDA
|
||||
IF( REWI )
|
||||
$ REWIND NTRA
|
||||
CALL CDSYR( IORDER, UPLO, N, ALPHA, XX, INCX,
|
||||
CALL CDSYR( IORDER, UPLO, N, ALPHA, XX, INCX,
|
||||
$ AA, LDA )
|
||||
ELSE IF( PACKED )THEN
|
||||
IF( TRACE )
|
||||
@@ -2737,7 +2737,7 @@
|
||||
WRITE( NOUT, FMT = 9998 )I, YT( I ),
|
||||
$ YY( 1 + ( I - 1 )*ABS( INCY ) )
|
||||
ELSE
|
||||
WRITE( NOUT, FMT = 9998 )I,
|
||||
WRITE( NOUT, FMT = 9998 )I,
|
||||
$ YY( 1 + ( I - 1 )*ABS( INCY ) ), YT(I)
|
||||
END IF
|
||||
60 CONTINUE
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user