Compare commits
1112 Commits
v0.2.0
...
integer_da
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
034ffa93fa | ||
|
|
3f1b57668e | ||
|
|
d8f18d32c3 | ||
|
|
bdb5c842fc | ||
|
|
e7c969e164 | ||
|
|
9bd962f655 | ||
|
|
4f5691e5c0 | ||
|
|
29293160a4 | ||
|
|
3e33afef2e | ||
|
|
8614057ea9 | ||
|
|
7f375f9e8f | ||
|
|
69c5169e7d | ||
|
|
e19948baa1 | ||
|
|
a2eaf234fc | ||
|
|
6a13a94e71 | ||
|
|
eff43d3289 | ||
|
|
9c4817d07b | ||
|
|
319f3a0451 | ||
|
|
02c7766f68 | ||
|
|
f38cb67ca8 | ||
|
|
eea2e30b74 | ||
|
|
19b8fd2aed | ||
|
|
0cc5212741 | ||
|
|
c47c8e8cf5 | ||
|
|
a11555c715 | ||
|
|
897d03518e | ||
|
|
23fbc5728e | ||
|
|
6d40fa587f | ||
|
|
22dcd79959 | ||
|
|
ea4df0aad3 | ||
|
|
e127fb8fd8 | ||
|
|
7fb718a7d8 | ||
|
|
24f58c8bb1 | ||
|
|
95b1faf667 | ||
|
|
2d9e406050 | ||
|
|
59083e3ce1 | ||
|
|
685be40339 | ||
|
|
31c9e399e9 | ||
|
|
7de6bb9889 | ||
|
|
d63034303b | ||
|
|
51ff17d46e | ||
|
|
905534942a | ||
|
|
18e90ee2e3 | ||
|
|
e00cccc41e | ||
|
|
73f09bf64f | ||
|
|
02e772c7e4 | ||
|
|
7aee913991 | ||
|
|
e50a933037 | ||
|
|
5f9011d6ef | ||
|
|
ebb9eba987 | ||
|
|
8e5a1083bb | ||
|
|
6743beb748 | ||
|
|
bcabf72c08 | ||
|
|
cda29f183b | ||
|
|
e52d36450a | ||
|
|
f8f2e261fe | ||
|
|
be3c843700 | ||
|
|
e6f57db846 | ||
|
|
9bfd267d51 | ||
|
|
924bc5372e | ||
|
|
2b83a69650 | ||
|
|
133c11a156 | ||
|
|
30f52d53df | ||
|
|
a124637329 | ||
|
|
642aaba2e0 | ||
|
|
4c616173e4 | ||
|
|
5e83d80725 | ||
|
|
b2e1797dc6 | ||
|
|
e216f686cb | ||
|
|
e42652f772 | ||
|
|
e77db2af31 | ||
|
|
37b00841ac | ||
|
|
fc0e0391f3 | ||
|
|
da0f27b9ac | ||
|
|
c22068c406 | ||
|
|
dee100d0e4 | ||
|
|
0273966abb | ||
|
|
3a67daa954 | ||
|
|
ab567d8443 | ||
|
|
3c09cea4b2 | ||
|
|
b4f2153dcd | ||
|
|
1c4b0eeae3 | ||
|
|
406d9d64e9 | ||
|
|
1bec9abb9a | ||
|
|
3814bf60d3 | ||
|
|
847e19c04e | ||
|
|
46c7b4d5c8 | ||
|
|
8e05d291b5 | ||
|
|
9da555e5f7 | ||
|
|
6d0db0151f | ||
|
|
37b9033c90 | ||
|
|
59e7a518c6 | ||
|
|
13889515b3 | ||
|
|
248c9340c3 | ||
|
|
e9f33b4ca7 | ||
|
|
f5d847122a | ||
|
|
a4c96eca67 | ||
|
|
fb02cb0a41 | ||
|
|
baa0363ea2 | ||
|
|
34ba66606a | ||
|
|
f615dc7603 | ||
|
|
331c417637 | ||
|
|
6c3a0b5d46 | ||
|
|
fd9fd42936 | ||
|
|
9798481979 | ||
|
|
d7a17ad85d | ||
|
|
d35f6c63c2 | ||
|
|
166d76e864 | ||
|
|
f9f127d838 | ||
|
|
62231ab337 | ||
|
|
3119def9a7 | ||
|
|
33b332372a | ||
|
|
fd838c75bc | ||
|
|
b57a60dac8 | ||
|
|
5c51163972 | ||
|
|
9299d8cfd6 | ||
|
|
0a3d3b945d | ||
|
|
4f680a7d61 | ||
|
|
ba926e807c | ||
|
|
60c6dec6e6 | ||
|
|
47898cca35 | ||
|
|
53bb924287 | ||
|
|
1e80b8b0d3 | ||
|
|
a901b065d3 | ||
|
|
3937e2a0a0 | ||
|
|
9707d608d5 | ||
|
|
701b9d7556 | ||
|
|
8977b3f235 | ||
|
|
f6426395ea | ||
|
|
0ac787eefe | ||
|
|
e5b96e55a7 | ||
|
|
a3491e1e88 | ||
|
|
e81a5d61e4 | ||
|
|
c674fa32be | ||
|
|
e34911a73d | ||
|
|
76dcaf2281 | ||
|
|
770fac92eb | ||
|
|
e95d64333a | ||
|
|
75c40bcc48 | ||
|
|
b62f9f4120 | ||
|
|
b6438dedea | ||
|
|
cdefdb21cd | ||
|
|
ea7f9dacf4 | ||
|
|
bf5dbb7e2a | ||
|
|
39cc6b21d3 | ||
|
|
771b18ae9c | ||
|
|
cfa9392ffa | ||
|
|
1ccd57ce80 | ||
|
|
65a847cd36 | ||
|
|
07ff001981 | ||
|
|
b17ccb4c5c | ||
|
|
63c6fcfa0a | ||
|
|
29cb47fc06 | ||
|
|
4e6c4046f7 | ||
|
|
229ce2ccd1 | ||
|
|
ef75be0e51 | ||
|
|
5344f335a8 | ||
|
|
5cb5af9333 | ||
|
|
41aad0407f | ||
|
|
f8f2e84659 | ||
|
|
34633fef01 | ||
|
|
ddf983d643 | ||
|
|
17b9db20f1 | ||
|
|
0dc559ed30 | ||
|
|
9566f5fdb0 | ||
|
|
4319769b79 | ||
|
|
e9d9a8eae3 | ||
|
|
cbb3ab80e7 | ||
|
|
cd9868b1b4 | ||
|
|
eb738148fe | ||
|
|
587e16fba3 | ||
|
|
4de7b9ae47 | ||
|
|
887aed634d | ||
|
|
6261342de3 | ||
|
|
1e566223ed | ||
|
|
113b48ca22 | ||
|
|
3e81c99b6b | ||
|
|
ec85c4a51d | ||
|
|
97de657d38 | ||
|
|
71966eba6c | ||
|
|
a359979e17 | ||
|
|
7a6a141bc4 | ||
|
|
b8ff6892f6 | ||
|
|
8fe7a9ce6f | ||
|
|
bc5fff7085 | ||
|
|
1943ea91a8 | ||
|
|
f5424fc9de | ||
|
|
0cf29ba6d2 | ||
|
|
50e18033e6 | ||
|
|
551b55d1c7 | ||
|
|
271ceb8bae | ||
|
|
5f846be2e4 | ||
|
|
fe7dcf98f3 | ||
|
|
2fb02626da | ||
|
|
a85c2785ae | ||
|
|
4806715c97 | ||
|
|
58c90d5937 | ||
|
|
2987bc7b40 | ||
|
|
695e0fa649 | ||
|
|
cbb23c46c2 | ||
|
|
0b4602b753 | ||
|
|
ac5a7e1c1b | ||
|
|
f1b9a4a1ca | ||
|
|
ae6b7caf32 | ||
|
|
f446d2368a | ||
|
|
dab4edd069 | ||
|
|
9d7057366d | ||
|
|
7f234f8ed1 | ||
|
|
9e829ce98f | ||
|
|
d49fd33885 | ||
|
|
f0f9b25bb6 | ||
|
|
7aae4a62e7 | ||
|
|
7a911569b8 | ||
|
|
466bfb8b86 | ||
|
|
70d1ba09b2 | ||
|
|
d293b78b64 | ||
|
|
9912dbbcf9 | ||
|
|
01bc462e8e | ||
|
|
3300f5ebff | ||
|
|
59e2c20557 | ||
|
|
b7c9566eea | ||
|
|
6df1b0be81 | ||
|
|
2ac1e076c1 | ||
|
|
9908b6031c | ||
|
|
8f100a14f2 | ||
|
|
53b5726b04 | ||
|
|
1a352b24e6 | ||
|
|
5194818d4b | ||
|
|
8a39cdb1c1 | ||
|
|
fd2478c9e2 | ||
|
|
0a1390f2d8 | ||
|
|
a8b0812feb | ||
|
|
a0fb68ab42 | ||
|
|
44c11165d5 | ||
|
|
564be4eb72 | ||
|
|
107c3ea7d5 | ||
|
|
bb8d698335 | ||
|
|
e0192a6914 | ||
|
|
bced4594bb | ||
|
|
cafba99b6b | ||
|
|
ac8f232b2a | ||
|
|
f98e1244c4 | ||
|
|
be95700b30 | ||
|
|
4aa534ae93 | ||
|
|
1cba8e7b11 | ||
|
|
d13e92f07e | ||
|
|
baa46e4fba | ||
|
|
faab7a181d | ||
|
|
8109d8232c | ||
|
|
debc6d1a05 | ||
|
|
e73a0113ec | ||
|
|
44f2bf9bae | ||
|
|
a057e5434d | ||
|
|
cd34e9701b | ||
|
|
7794766d3c | ||
|
|
658939faaa | ||
|
|
f511807fc0 | ||
|
|
c4d9d4e5f8 | ||
|
|
7c0a94ff47 | ||
|
|
cbbc80aad3 | ||
|
|
2be5c7a640 | ||
|
|
80f7786875 | ||
|
|
553e275407 | ||
|
|
7b3932b3f3 | ||
|
|
75207b1148 | ||
|
|
274828fa50 | ||
|
|
5ae1731fe6 | ||
|
|
c8eaf3ae2d | ||
|
|
3a7ab47ee9 | ||
|
|
cf5544b417 | ||
|
|
d143f84dd2 | ||
|
|
7794237475 | ||
|
|
a64fe9bcc9 | ||
|
|
2021d0f9d6 | ||
|
|
6df7a88930 | ||
|
|
53de943690 | ||
|
|
7f910010a0 | ||
|
|
3a5d8dbff9 | ||
|
|
2a60c6d4b0 | ||
|
|
0fc560ba23 | ||
|
|
d1800397f5 | ||
|
|
f4ff889491 | ||
|
|
210bec9111 | ||
|
|
f3b50dcf5b | ||
|
|
93eaba959d | ||
|
|
9570e56965 | ||
|
|
d7f91f8b4f | ||
|
|
53f1277b6b | ||
|
|
bc99faef1b | ||
|
|
848c0f16f7 | ||
|
|
e2fc8c8c2c | ||
|
|
53e6dbf6ca | ||
|
|
868f8a8756 | ||
|
|
db7e6366cd | ||
|
|
2702323f7d | ||
|
|
20cd850125 | ||
|
|
5fa6158731 | ||
|
|
84badf8086 | ||
|
|
c8cc4a0d22 | ||
|
|
3885eebdb8 | ||
|
|
ee74445155 | ||
|
|
9d2ace8bac | ||
|
|
b55f997302 | ||
|
|
29125864b3 | ||
|
|
e45c960c2c | ||
|
|
55e81da379 | ||
|
|
ac76b6267f | ||
|
|
f1b96c4846 | ||
|
|
16d6be852d | ||
|
|
53ec5789e2 | ||
|
|
95a707ced3 | ||
|
|
5d97b0754c | ||
|
|
8a9e868919 | ||
|
|
7e404de3de | ||
|
|
e4472ad850 | ||
|
|
fb0b4552a5 | ||
|
|
6f73ffc114 | ||
|
|
c8b0645266 | ||
|
|
ec05ff3f64 | ||
|
|
f6f9122660 | ||
|
|
8247f38dc1 | ||
|
|
ef6374196d | ||
|
|
f824c2b751 | ||
|
|
4ba4ab623f | ||
|
|
4f39447c05 | ||
|
|
74c9465672 | ||
|
|
a69dd3fbc5 | ||
|
|
101dd08173 | ||
|
|
493d4fe7e5 | ||
|
|
0a22816e70 | ||
|
|
c3cd6e7e32 | ||
|
|
11eab4c019 | ||
|
|
4568d32b6b | ||
|
|
c1a6374c6f | ||
|
|
dc05937313 | ||
|
|
2470129132 | ||
|
|
8c582d362d | ||
|
|
11e34ddd1b | ||
|
|
9528f0d9ee | ||
|
|
b06550519e | ||
|
|
6093ee5363 | ||
|
|
07c66b1960 | ||
|
|
58b075daef | ||
|
|
09fcd3a341 | ||
|
|
726ad085cb | ||
|
|
6fe416976d | ||
|
|
dbc2eff029 | ||
|
|
462b4885ff | ||
|
|
aa54fe064c | ||
|
|
006ef3ea01 | ||
|
|
60f17628cc | ||
|
|
c9bad1403a | ||
|
|
2f8927376f | ||
|
|
d945a2b06d | ||
|
|
ca6c8d06ce | ||
|
|
7aa43c8928 | ||
|
|
891b960854 | ||
|
|
95a8caa2f3 | ||
|
|
5c0d0ecbde | ||
|
|
8c05b8105b | ||
|
|
c80084a98f | ||
|
|
2bab92961f | ||
|
|
9175b8bd5f | ||
|
|
793f2d43b0 | ||
|
|
a4dde45f87 | ||
|
|
7fa7ea3e1e | ||
|
|
3fbc13eb65 | ||
|
|
db6917303f | ||
|
|
c2fdeb6c22 | ||
|
|
f7eb81a846 | ||
|
|
edc329883c | ||
|
|
793175be3a | ||
|
|
83c4ba8d32 | ||
|
|
271af406f3 | ||
|
|
f5f50b3563 | ||
|
|
651dd22d7d | ||
|
|
f329f77bd0 | ||
|
|
7c611a2f95 | ||
|
|
296564e369 | ||
|
|
27af6e35d3 | ||
|
|
a183ad1df4 | ||
|
|
799a0eabbd | ||
|
|
ca63503e61 | ||
|
|
4f83217df6 | ||
|
|
5087096711 | ||
|
|
21f7768b26 | ||
|
|
46bc4fd50c | ||
|
|
1cc02b4337 | ||
|
|
6e223db7fc | ||
|
|
1d33547222 | ||
|
|
3ea4dadd30 | ||
|
|
1b10ff129a | ||
|
|
125610d23b | ||
|
|
e213a42cde | ||
|
|
e4663be46a | ||
|
|
11637b6926 | ||
|
|
80bf3e6a35 | ||
|
|
6acbafe45b | ||
|
|
5392d11b04 | ||
|
|
c0fe95fb72 | ||
|
|
d9d4077c93 | ||
|
|
02eb72ac42 | ||
|
|
c06f9986d4 | ||
|
|
2cce125c79 | ||
|
|
b3938fe371 | ||
|
|
e6668dd83b | ||
|
|
c8a4a56177 | ||
|
|
3c5732615d | ||
|
|
134fa320e6 | ||
|
|
a79df1ff49 | ||
|
|
7ceb25d7b3 | ||
|
|
f2eb480738 | ||
|
|
c94762bb56 | ||
|
|
51413925bd | ||
|
|
b985cea65d | ||
|
|
d286daa2ba | ||
|
|
bcb115b55b | ||
|
|
3dd094f17a | ||
|
|
339ab34c4c | ||
|
|
7424e2b609 | ||
|
|
73594cff73 | ||
|
|
880597b301 | ||
|
|
9c835431d0 | ||
|
|
1d4ffddf69 | ||
|
|
b0e7810a6b | ||
|
|
2b92a8c499 | ||
|
|
274b8dc91a | ||
|
|
74b237ca22 | ||
|
|
c353abd38c | ||
|
|
0acce17979 | ||
|
|
2016a685e6 | ||
|
|
1b9a6aac30 | ||
|
|
e27433ab6a | ||
|
|
7961404a40 | ||
|
|
cedc1f4b14 | ||
|
|
0884b73c69 | ||
|
|
9bd9472ae9 | ||
|
|
2e2473f390 | ||
|
|
c4a423a642 | ||
|
|
47688e24e9 | ||
|
|
61ef0c3419 | ||
|
|
698e77dba4 | ||
|
|
2081f6e8ff | ||
|
|
dc6b809f15 | ||
|
|
0f08684649 | ||
|
|
552119c484 | ||
|
|
94d3cfaa10 | ||
|
|
13348b2137 | ||
|
|
783a7d2202 | ||
|
|
50e99a52ea | ||
|
|
9964ed2f79 | ||
|
|
d5b976f92d | ||
|
|
f7267d9b0e | ||
|
|
e0c080a28c | ||
|
|
e80b144932 | ||
|
|
02a504c0b8 | ||
|
|
be94db096c | ||
|
|
b079df9ef4 | ||
|
|
aee61456a4 | ||
|
|
01a119abfc | ||
|
|
1fad2b759f | ||
|
|
e1e83a1b71 | ||
|
|
1127f5a2d7 | ||
|
|
0ae4cc2803 | ||
|
|
99efbbbad5 | ||
|
|
22e5aee2dd | ||
|
|
249917700d | ||
|
|
7a8949e0ce | ||
|
|
b82108f899 | ||
|
|
8373ad4ec2 | ||
|
|
35d37e124f | ||
|
|
d8ba46efdb | ||
|
|
a15f22a1f6 | ||
|
|
b94ea89f52 | ||
|
|
35f668bb14 | ||
|
|
4ebbf758f5 | ||
|
|
8615d6ec87 | ||
|
|
6c2ead30f0 | ||
|
|
f41f03ab83 | ||
|
|
365e8de346 | ||
|
|
578d1b6219 | ||
|
|
a6ae079b17 | ||
|
|
d10db52edb | ||
|
|
dabab2b5f4 | ||
|
|
aa2709c4e0 | ||
|
|
9d6f2b594e | ||
|
|
a13bcc1716 | ||
|
|
d2c82d7543 | ||
|
|
0517672dd0 | ||
|
|
15d5dfa92c | ||
|
|
d83373db61 | ||
|
|
88b6bf251a | ||
|
|
4a2ab7460b | ||
|
|
86d8c8978b | ||
|
|
316df0e821 | ||
|
|
438002204d | ||
|
|
23203d52c1 | ||
|
|
73545a79cd | ||
|
|
a19d209005 | ||
|
|
8602816536 | ||
|
|
d52863cfd7 | ||
|
|
c6361d63c2 | ||
|
|
53bfa51ee0 | ||
|
|
ff9cfca24c | ||
|
|
a86d349a51 | ||
|
|
7b277f0110 | ||
|
|
faeab93df0 | ||
|
|
21a6b5f79e | ||
|
|
cee257f384 | ||
|
|
7bfb3011e8 | ||
|
|
8c8f596238 | ||
|
|
bff575d0b1 | ||
|
|
faf3ac0aad | ||
|
|
a40116de25 | ||
|
|
b31ec99372 | ||
|
|
0ac073fa94 | ||
|
|
25e899b60b | ||
|
|
219bcb119d | ||
|
|
5664445543 | ||
|
|
89da450800 | ||
|
|
c26bbee489 | ||
|
|
ced13574a0 | ||
|
|
fe858873af | ||
|
|
a8d4d1c4d3 | ||
|
|
c4ccb3fbb2 | ||
|
|
a748d3a75d | ||
|
|
a5ab231ad4 | ||
|
|
dbaeea7b59 | ||
|
|
10a16bd690 | ||
|
|
406f5bd22b | ||
|
|
a0ae53966f | ||
|
|
0d75f3b6a2 | ||
|
|
abad6f66d6 | ||
|
|
2ff66e661d | ||
|
|
5e55034922 | ||
|
|
9a9e810239 | ||
|
|
45be9ac111 | ||
|
|
9f201558c9 | ||
|
|
d4237cb7f3 | ||
|
|
d2a8ff4b04 | ||
|
|
f331cb1a76 | ||
|
|
9ed981c5dc | ||
|
|
aaa9d7fbf8 | ||
|
|
ebc95e6f11 | ||
|
|
61a2c50e8e | ||
|
|
4f98f8c9b3 | ||
|
|
536875d463 | ||
|
|
65f2fba4c3 | ||
|
|
eea6f51df9 | ||
|
|
6fc4646709 | ||
|
|
ac029f81b3 | ||
|
|
c0cf875a82 | ||
|
|
b6d904838e | ||
|
|
5379eff022 | ||
|
|
aaddb05411 | ||
|
|
e52532a9fe | ||
|
|
e826a5a6af | ||
|
|
165d5436b5 | ||
|
|
409b52255c | ||
|
|
5953972a5a | ||
|
|
d751224ea4 | ||
|
|
4a5938b5cc | ||
|
|
d18bc5468f | ||
|
|
8877c6db51 | ||
|
|
c38379c9dd | ||
|
|
a0b07c1440 | ||
|
|
43fbdb7a5a | ||
|
|
777cebc8c7 | ||
|
|
aa5c73e20f | ||
|
|
5e5ef28ca0 | ||
|
|
650ed34336 | ||
|
|
189ca1bcee | ||
|
|
4c1caa7454 | ||
|
|
7bb19cf90e | ||
|
|
2a94aaaf2e | ||
|
|
5e4b4f6712 | ||
|
|
47e8950e77 | ||
|
|
f45f2c8465 | ||
|
|
10780ae650 | ||
|
|
9bae50f700 | ||
|
|
0758c1a374 | ||
|
|
564ff395f6 | ||
|
|
7fb78a5f01 | ||
|
|
8204ab4aa8 | ||
|
|
48d1325784 | ||
|
|
57bbc586ef | ||
|
|
bfef3c5dd1 | ||
|
|
d972f4a60a | ||
|
|
eebce01cf2 | ||
|
|
e2c39a4a8e | ||
|
|
1e8e6faa7e | ||
|
|
c7eb901496 | ||
|
|
2ed03ea0a2 | ||
|
|
de00e2937a | ||
|
|
e187b5e9d0 | ||
|
|
0947fc1c89 | ||
|
|
4d61607c9e | ||
|
|
781bfb6e66 | ||
|
|
79a82ba7f1 | ||
|
|
d63bd7fa5e | ||
|
|
e265c4ec86 | ||
|
|
0732238213 | ||
|
|
5f3b68b4d4 | ||
|
|
2424af62fd | ||
|
|
6b252033ae | ||
|
|
320c805905 | ||
|
|
e673848a9b | ||
|
|
a35a1a9ae7 | ||
|
|
793509a3b5 | ||
|
|
020f36f970 | ||
|
|
9d0cc399ac | ||
|
|
025fc914cc | ||
|
|
43bb633096 | ||
|
|
187237b622 | ||
|
|
66198faab6 | ||
|
|
47b22763f8 | ||
|
|
4d42368214 | ||
|
|
1140c489c9 | ||
|
|
804a306313 | ||
|
|
9db0fb8b02 | ||
|
|
692b14cecd | ||
|
|
322a178430 | ||
|
|
f80f29e256 | ||
|
|
2c556f093a | ||
|
|
3b027d2528 | ||
|
|
57526cae99 | ||
|
|
5de5ef118c | ||
|
|
b161ac29e3 | ||
|
|
b20ee6924a | ||
|
|
49bd98f410 | ||
|
|
a14f98ca7c | ||
|
|
138a841390 | ||
|
|
046e4013cb | ||
|
|
dd2d3e61ab | ||
|
|
3617c22a56 | ||
|
|
f9daebba0a | ||
|
|
9a557e90da | ||
|
|
2d557eb1e0 | ||
|
|
a789b77b75 | ||
|
|
75acf96d94 | ||
|
|
8c7687b419 | ||
|
|
3e0a7b931c | ||
|
|
306d9f2e35 | ||
|
|
7b8604ea29 | ||
|
|
ab69443bd4 | ||
|
|
b263e096af | ||
|
|
05bb391c3a | ||
|
|
0ab080987d | ||
|
|
d51868190e | ||
|
|
9b5be29886 | ||
|
|
768d2042d4 | ||
|
|
91eaea364b | ||
|
|
a136426624 | ||
|
|
2f0d2ce1ea | ||
|
|
0f6e79f918 | ||
|
|
ea74f331f4 | ||
|
|
53eaf41901 | ||
|
|
9423f980f6 | ||
|
|
c6156b2ef2 | ||
|
|
034a5b2083 | ||
|
|
27d4234d4d | ||
|
|
aae75b2461 | ||
|
|
a74ac84981 | ||
|
|
402d6e91db | ||
|
|
6d2c6b5c74 | ||
|
|
1b5a267cdd | ||
|
|
4a1575e748 | ||
|
|
339fef2649 | ||
|
|
b3254eecaf | ||
|
|
d910404f00 | ||
|
|
1cae4114a8 | ||
|
|
91ca041cc2 | ||
|
|
d2385f0d52 | ||
|
|
ec2dadde9b | ||
|
|
d56292e2e4 | ||
|
|
ffe70b1fdc | ||
|
|
ecbc85b954 | ||
|
|
9e38dbb658 | ||
|
|
d844901062 | ||
|
|
eab79631c3 | ||
|
|
d13aa79d26 | ||
|
|
dfaa489224 | ||
|
|
9fbf437177 | ||
|
|
38ce786754 | ||
|
|
2594728eb7 | ||
|
|
cc7876f660 | ||
|
|
51e5983599 | ||
|
|
65ebab0688 | ||
|
|
7594216412 | ||
|
|
fac07b0687 | ||
|
|
3169f524e4 | ||
|
|
d4833f1801 | ||
|
|
fe8b841c82 | ||
|
|
9b1b01a478 | ||
|
|
2a625447ea | ||
|
|
be18cd47f6 | ||
|
|
8191efc420 | ||
|
|
39dc69db4a | ||
|
|
04d51536a4 | ||
|
|
c947ab85dc | ||
|
|
0b6e13b689 | ||
|
|
e09dc279a2 | ||
|
|
1596ced242 | ||
|
|
4be4db590c | ||
|
|
5c648a8984 | ||
|
|
7581e2e9cb | ||
|
|
c44dc4dd3c | ||
|
|
ac7735e01f | ||
|
|
271ceeba15 | ||
|
|
71eecaaf37 | ||
|
|
9d3fae15a8 | ||
|
|
2d3c884294 | ||
|
|
d54a061713 | ||
|
|
86afb47e83 | ||
|
|
42a4dff056 | ||
|
|
5bc322a66c | ||
|
|
dec7ad0dfd | ||
|
|
274304bd03 | ||
|
|
5007a534c4 | ||
|
|
a537d7d8d7 | ||
|
|
b42145834f | ||
|
|
3d5e792c72 | ||
|
|
a9bd12da2c | ||
|
|
697e198e8a | ||
|
|
36b0f7fe1d | ||
|
|
d2b20c5c51 | ||
|
|
fd1d9fdb22 | ||
|
|
fe5f46c330 | ||
|
|
e25de3d182 | ||
|
|
25c6050593 | ||
|
|
12e02a00e0 | ||
|
|
29a3196f56 | ||
|
|
8776a73773 | ||
|
|
7e84acd3e8 | ||
|
|
33d3ab6e09 | ||
|
|
9a0f978929 | ||
|
|
7f210587f0 | ||
|
|
9f0a3a35b3 | ||
|
|
dbae93110b | ||
|
|
19cd5c64a2 | ||
|
|
9adf87495e | ||
|
|
440db4cdda | ||
|
|
cd93cae5a7 | ||
|
|
8565afb3c2 | ||
|
|
5bf7cf8d67 | ||
|
|
29a005c635 | ||
|
|
f1be3a168a | ||
|
|
410afda9b4 | ||
|
|
bf04544902 | ||
|
|
86283c0be1 | ||
|
|
f27cabfd08 | ||
|
|
23dd474cd0 | ||
|
|
f1b452e160 | ||
|
|
3dabd7e6e6 | ||
|
|
6f4a0ebe38 | ||
|
|
5048a80032 | ||
|
|
6e679266f8 | ||
|
|
f1db386211 | ||
|
|
6da558d2ab | ||
|
|
a2942456ef | ||
|
|
f750103336 | ||
|
|
00f33c0134 | ||
|
|
5b36cc0f47 | ||
|
|
c8f1aeb154 | ||
|
|
8fa93be06e | ||
|
|
1e8128f41c | ||
|
|
6d8095bcb9 | ||
|
|
2f5fdd2000 | ||
|
|
80a2e901b1 | ||
|
|
73770e60b8 | ||
|
|
ac50bccbd2 | ||
|
|
82015beaef | ||
|
|
6216ab8a7e | ||
|
|
370e3834a9 | ||
|
|
95aedfa0ff | ||
|
|
cba97daf3c | ||
|
|
5400a9f4e4 | ||
|
|
e31186efd4 | ||
|
|
2b801a00a5 | ||
|
|
b3eab8fcb7 | ||
|
|
6d9d70c55c | ||
|
|
dfd1064d7b | ||
|
|
02bc36ac79 | ||
|
|
5118a7f4d1 | ||
|
|
e172b70ea2 | ||
|
|
1cf4b974b2 | ||
|
|
7bccff1512 | ||
|
|
afe44b0241 | ||
|
|
a77c71eaf5 | ||
|
|
b2219b3478 | ||
|
|
f5a0038bad | ||
|
|
c937090121 | ||
|
|
fe8c5666f9 | ||
|
|
f6b50057e2 | ||
|
|
2840d56aeb | ||
|
|
2d49db2f5b | ||
|
|
04391e6d9c | ||
|
|
85484a42df | ||
|
|
3983011f0b | ||
|
|
2a1515c9dd | ||
|
|
31f51e78bc | ||
|
|
beffee7d91 | ||
|
|
a35f4343fa | ||
|
|
ce5626a384 | ||
|
|
e0b968c3a7 | ||
|
|
93f1074dd4 | ||
|
|
1c63180bb6 | ||
|
|
22a8fcc4b7 | ||
|
|
9965d48005 | ||
|
|
4a474ea7dc | ||
|
|
69ce737cc5 | ||
|
|
d13788d1b4 | ||
|
|
70411af888 | ||
|
|
16eb780e13 | ||
|
|
a746724e84 | ||
|
|
3f7b0cd994 | ||
|
|
cc6db2ecfe | ||
|
|
3175be4b3d | ||
|
|
a29e6592da | ||
|
|
212463dce9 | ||
|
|
037bd82bef | ||
|
|
eae4cfa3f6 | ||
|
|
6c4a7d0828 | ||
|
|
fe98de2f68 | ||
|
|
db389b5915 | ||
|
|
52f587db7f | ||
|
|
067e8417fd | ||
|
|
a82da3d069 | ||
|
|
1569bf14f8 | ||
|
|
df554aebd2 | ||
|
|
eae6920f2d | ||
|
|
c92ae012a6 | ||
|
|
f51a849d91 | ||
|
|
44ef70420c | ||
|
|
d488b1b1aa | ||
|
|
4070d9a123 | ||
|
|
0b90c0ec64 | ||
|
|
2b8ab8f55b | ||
|
|
1cb9579cd0 | ||
|
|
2638370844 | ||
|
|
89637f87c8 | ||
|
|
c0b1e41bec | ||
|
|
49faee1a51 | ||
|
|
c0159d44a3 | ||
|
|
c17a850c1c | ||
|
|
099853fff6 | ||
|
|
44d23881b5 | ||
|
|
2905042c6a | ||
|
|
32fb6b9bb2 | ||
|
|
673e453b3f | ||
|
|
143cca4dd5 | ||
|
|
aaeb8eaecd | ||
|
|
8aeec32ea0 | ||
|
|
87fc9de572 | ||
|
|
564aa60fec | ||
|
|
f645665dd6 | ||
|
|
e45a347cd2 | ||
|
|
99727ac013 | ||
|
|
6e0a2fbc0c | ||
|
|
0a22f99c58 | ||
|
|
79ba52115d | ||
|
|
835293cc1a | ||
|
|
b736aa8110 | ||
|
|
ae521ecc3e | ||
|
|
36adfe8d64 | ||
|
|
a07cc39571 | ||
|
|
cff70a666d | ||
|
|
b5c2ac4fd6 | ||
|
|
749f45ffc8 | ||
|
|
534c5ec919 | ||
|
|
bd2da90e13 | ||
|
|
84bd0aabaa | ||
|
|
5b504d6c23 | ||
|
|
72b1edaf1b | ||
|
|
a2930664f4 | ||
|
|
6e0db36373 | ||
|
|
1e1250b703 | ||
|
|
23186d9f21 | ||
|
|
e6ebbfd314 | ||
|
|
4471c77905 | ||
|
|
9f0fb6e662 | ||
|
|
f26b7a08aa | ||
|
|
63f14189e3 | ||
|
|
e39384432b | ||
|
|
c5437149c0 | ||
|
|
6f5b395009 | ||
|
|
d4f9571818 | ||
|
|
937d838619 | ||
|
|
a8f9b6a665 | ||
|
|
6209c8fc44 | ||
|
|
238ceb4ac0 | ||
|
|
77b572fa0b | ||
|
|
f69f89b846 | ||
|
|
c77032b0cc | ||
|
|
1b3b9e841d | ||
|
|
b67252c2e4 | ||
|
|
c69e73b868 | ||
|
|
b51e2ba1ee | ||
|
|
9c0a834f98 | ||
|
|
2a7503e563 | ||
|
|
fd0c388681 | ||
|
|
61a9582987 | ||
|
|
b681064c6c | ||
|
|
e80e285928 | ||
|
|
2ed0f6ab60 | ||
|
|
5448643557 | ||
|
|
824c3c4df3 | ||
|
|
c19a488af2 | ||
|
|
32d2ca3035 | ||
|
|
6df39ad9e7 | ||
|
|
3a96e4cbcb | ||
|
|
6f008abcef | ||
|
|
3eb5af1955 | ||
|
|
fbb75e58b1 | ||
|
|
f54f5bac9e | ||
|
|
5d3312142a | ||
|
|
886cbaf4e4 | ||
|
|
0c4074e10b | ||
|
|
cc522aa21d | ||
|
|
9c78fad721 | ||
|
|
6028232ad1 | ||
|
|
feb9a3889a | ||
|
|
32dbeb636d | ||
|
|
57944538b6 | ||
|
|
3ce2c62b0b | ||
|
|
50464997a3 | ||
|
|
8e7cad1650 | ||
|
|
590e6aeafc | ||
|
|
88ef307cef | ||
|
|
6e8501c8a1 | ||
|
|
fa916a0fac | ||
|
|
fb298b34ae | ||
|
|
16012767f4 | ||
|
|
bcbac31b47 | ||
|
|
8dc0c72583 | ||
|
|
89405a1a0b | ||
|
|
4f2b12b8a8 | ||
|
|
646e168d26 | ||
|
|
93dbbe1fb8 | ||
|
|
a135f5d9ed | ||
|
|
d0b6299b13 | ||
|
|
9e58dd509e | ||
|
|
7c8227101b | ||
|
|
f67fa62851 | ||
|
|
cd1d473ba0 | ||
|
|
56f160134d | ||
|
|
0ded1fcc1c | ||
|
|
a789b588cd | ||
|
|
8eaa04acbb | ||
|
|
d854b30ae6 | ||
|
|
d65bbec99b | ||
|
|
e4c39c7c26 | ||
|
|
ba800f0883 | ||
|
|
25491e42f9 | ||
|
|
960b0c88a7 | ||
|
|
65ffead0cf | ||
|
|
f2fb8c7035 | ||
|
|
9f59f384d8 | ||
|
|
23965f164c | ||
|
|
6a72840945 | ||
|
|
947457fb7c | ||
|
|
79120bf9a0 | ||
|
|
acb11905d5 | ||
|
|
109500178c | ||
|
|
e50a664865 | ||
|
|
357078b93e | ||
|
|
731220f870 | ||
|
|
69aa6c8fb1 | ||
|
|
60b263f3d2 | ||
|
|
7ac306e0da | ||
|
|
4cb454cdf2 | ||
|
|
19ad2fb128 | ||
|
|
5d96e4f224 | ||
|
|
6821677489 | ||
|
|
dbbda55e67 | ||
|
|
6c34a7f43c | ||
|
|
3326f3152c | ||
|
|
7641f6e253 | ||
|
|
48bdc1ad3b | ||
|
|
3ad29452d1 | ||
|
|
6e3f6f25a5 | ||
|
|
990efcab6e | ||
|
|
75a5dc3975 | ||
|
|
986d542acb | ||
|
|
6958c1a1aa | ||
|
|
a068d54981 | ||
|
|
d692ee07f7 | ||
|
|
1a57717b1a | ||
|
|
6b01d58712 | ||
|
|
35b943f17f | ||
|
|
e029242870 | ||
|
|
7a9b94b519 | ||
|
|
66b919d99f | ||
|
|
f4846afbad | ||
|
|
53588bc786 | ||
|
|
b47f13ee4c | ||
|
|
309f90e563 | ||
|
|
773c01f496 | ||
|
|
d831b2ff8b | ||
|
|
724ae159ce | ||
|
|
2c9a203bd1 | ||
|
|
f300ce3df5 | ||
|
|
e2c7c75715 | ||
|
|
66e64131ed | ||
|
|
5900b1462e | ||
|
|
9405f26f4b | ||
|
|
54e7b37630 | ||
|
|
529f1b5006 | ||
|
|
e5ac3007e0 | ||
|
|
0d0405b434 | ||
|
|
f1ce74ffdd | ||
|
|
d744c9590a | ||
|
|
3cc6ae793e | ||
|
|
4c2123c334 | ||
|
|
5155e3f509 | ||
|
|
5c8bf6ae0e | ||
|
|
6ae2f868fd | ||
|
|
a1ead62f28 | ||
|
|
0133580148 | ||
|
|
274246651d | ||
|
|
299b5a44dc | ||
|
|
a9500d0079 | ||
|
|
64ad8b9809 | ||
|
|
875d520ccf | ||
|
|
d311236dfd | ||
|
|
36e0982966 | ||
|
|
8cdb795438 | ||
|
|
4db6660de4 | ||
|
|
0b08f7479e | ||
|
|
200e4acf15 | ||
|
|
99d1978df7 | ||
|
|
08bf6674d5 | ||
|
|
8b122ff9dc | ||
|
|
69200884e1 | ||
|
|
0d1518add9 | ||
|
|
91ed4e4450 | ||
|
|
fd3046b32a | ||
|
|
a4ee6f3915 | ||
|
|
a0363e9b48 | ||
|
|
b471d52e61 | ||
|
|
9fb341a9f8 | ||
|
|
fba6b590f2 | ||
|
|
97f68f7f3a | ||
|
|
1138817dd2 | ||
|
|
13f8fc0b1a | ||
|
|
bdf8d9411e | ||
|
|
bb10cb8442 | ||
|
|
d48cff8cf1 | ||
|
|
f19af5ecc0 | ||
|
|
bfaaa975e6 | ||
|
|
b7c0fa6bd2 | ||
|
|
7110d17146 | ||
|
|
e01b3d4b54 | ||
|
|
cea1a885b5 | ||
|
|
f78eb335d6 | ||
|
|
2345bdec68 | ||
|
|
5f0117385e | ||
|
|
6caf1bab73 | ||
|
|
01e3c984ce | ||
|
|
6751f7b9a7 | ||
|
|
d5717a97ea | ||
|
|
b45d43d295 | ||
|
|
dcfb69c2b5 | ||
|
|
e85549ee11 | ||
|
|
789f205177 | ||
|
|
378acfe826 | ||
|
|
538c764d2b | ||
|
|
0f26a21624 | ||
|
|
5c1efa1149 | ||
|
|
ca4136cf41 | ||
|
|
3a26470fb7 | ||
|
|
6c5899dff5 | ||
|
|
2df2878dfc | ||
|
|
0b719945c5 | ||
|
|
b1a54a0107 | ||
|
|
08c177ca36 | ||
|
|
2573311308 | ||
|
|
1d72b8bf1b | ||
|
|
758e34efbb | ||
|
|
735ca38b8f | ||
|
|
f76a384841 | ||
|
|
9419a43a7f | ||
|
|
b695680a33 | ||
|
|
d0e731e8b8 | ||
|
|
48f075cfd5 | ||
|
|
3e87648de3 | ||
|
|
fe4ab95cd5 | ||
|
|
801383effe | ||
|
|
54cd65e47f | ||
|
|
a55821a2ec | ||
|
|
068861a927 | ||
|
|
d007cca61d | ||
|
|
a92895939e | ||
|
|
7bd1834d59 | ||
|
|
1b056c5328 | ||
|
|
e8306f623a | ||
|
|
3108a1853d | ||
|
|
25f1a573fd | ||
|
|
71d29fa3d0 | ||
|
|
50848e34ec | ||
|
|
4a5d08d0cf | ||
|
|
6fcdaa4387 | ||
|
|
699fc7641f | ||
|
|
3692b4d631 | ||
|
|
4b7677a916 | ||
|
|
5719b7a58d | ||
|
|
f22bfe6a55 | ||
|
|
551f478477 | ||
|
|
a430880729 | ||
|
|
a507b56ab1 | ||
|
|
f430e54daf | ||
|
|
0a958b6a02 |
10
.gitignore
vendored
10
.gitignore
vendored
@@ -4,10 +4,16 @@
|
||||
*.dylib
|
||||
*.def
|
||||
*.o
|
||||
*.out
|
||||
lapack-3.1.1
|
||||
lapack-3.1.1.tgz
|
||||
lapack-3.4.1
|
||||
lapack-3.4.1.tgz
|
||||
lapack-3.4.2
|
||||
lapack-3.4.2.tgz
|
||||
lapack-netlib/make.inc
|
||||
lapack-netlib/lapacke/include/lapacke_mangling.h
|
||||
lapack-netlib/TESTING/testing_results.txt
|
||||
*.so
|
||||
*.a
|
||||
.svn
|
||||
@@ -15,8 +21,10 @@ lapack-3.4.1.tgz
|
||||
lib.grd
|
||||
nohup.out
|
||||
config.h
|
||||
config_kernel.h
|
||||
Makefile.conf
|
||||
Makefile.conf_last
|
||||
Makefile_kernel.conf
|
||||
config_last.h
|
||||
getarch
|
||||
getarch_2nd
|
||||
@@ -35,6 +43,8 @@ ctest/xzcblat2
|
||||
ctest/xzcblat3
|
||||
exports/linktest.c
|
||||
exports/linux.def
|
||||
kernel/setparam_*.c
|
||||
kernel/kernel_*.h
|
||||
test/CBLAT2.SUMM
|
||||
test/CBLAT3.SUMM
|
||||
test/DBLAT2.SUMM
|
||||
|
||||
24
.travis.yml
Normal file
24
.travis.yml
Normal file
@@ -0,0 +1,24 @@
|
||||
language: c
|
||||
compiler:
|
||||
- gcc
|
||||
|
||||
env:
|
||||
- TARGET_BOX=LINUX64 BTYPE="BINARY=64"
|
||||
- TARGET_BOX=LINUX64 BTYPE="BINARY=64 USE_OPENMP=1"
|
||||
- TARGET_BOX=LINUX64 BTYPE="BINARY=64 INTERFACE64=1"
|
||||
- TARGET_BOX=LINUX32 BTYPE="BINARY=32"
|
||||
- TARGET_BOX=WIN64 BTYPE="BINARY=64 HOSTCC=gcc CC=x86_64-w64-mingw32-gcc FC=x86_64-w64-mingw32-gfortran"
|
||||
|
||||
before_install:
|
||||
- sudo apt-get update -qq
|
||||
- sudo apt-get install -qq gfortran
|
||||
- if [[ "$TARGET_BOX" == "WIN64" ]]; then sudo apt-get install -qq binutils-mingw-w64-x86-64 gcc-mingw-w64-x86-64 gfortran-mingw-w64-x86-64; fi
|
||||
- if [[ "$TARGET_BOX" == "LINUX32" ]]; then sudo apt-get install -qq gcc-multilib gfortran-multilib; fi
|
||||
|
||||
script: make QUIET_MAKE=1 DYNAMIC_ARCH=1 TARGET=NEHALEM NUM_THREADS=32 $BTYPE
|
||||
|
||||
# whitelist
|
||||
branches:
|
||||
only:
|
||||
- master
|
||||
- develop
|
||||
39
BACKERS.md
Normal file
39
BACKERS.md
Normal file
@@ -0,0 +1,39 @@
|
||||
Thank you for the support.
|
||||
|
||||
### [2013.8] [Testbed for OpenBLAS project](https://www.bountysource.com/fundraisers/443-testbed-for-openblas-project)
|
||||
|
||||
https://www.bountysource.com/fundraisers/443-testbed-for-openblas-project/pledges
|
||||
|
||||
In chronological order:
|
||||
|
||||
* aeberspaecher
|
||||
* fmolina
|
||||
* saullocastro
|
||||
* xianyi
|
||||
* cuda
|
||||
* carter
|
||||
* StefanKarpinski
|
||||
* staticfloat
|
||||
* sebastien-villemot
|
||||
* JeffBezanson
|
||||
* ihnorton
|
||||
* simonp0420
|
||||
* andrioni
|
||||
* Tim Holy
|
||||
* ivarne
|
||||
* johnmyleswhite
|
||||
* traz
|
||||
* Jean-Francis Roy
|
||||
* bkalpert
|
||||
* Anirban
|
||||
* pgermain
|
||||
* alexandre.lacoste.18
|
||||
* foges
|
||||
* ssam
|
||||
* WestleyArgentum
|
||||
* daniebmariani
|
||||
* pjpuglia
|
||||
* albarrentine
|
||||
* Alexander Vogt
|
||||
|
||||
|
||||
131
CONTRIBUTORS.md
Normal file
131
CONTRIBUTORS.md
Normal file
@@ -0,0 +1,131 @@
|
||||
# Contributions to the OpenBLAS project
|
||||
|
||||
## Creator & Maintainer
|
||||
|
||||
* Zhang Xianyi <traits.zhang@gmail.com>
|
||||
|
||||
## Active Developers
|
||||
|
||||
* Wang Qian <traz0824@gmail.com>
|
||||
* Optimize BLAS3 on ICT Loongson 3A.
|
||||
* Optimize BLAS3 on Intel Sandy Bridge.
|
||||
|
||||
* Werner Saar <wernsaar@googlemail.com>
|
||||
* [2013-03-04] Optimize AVX and FMA4 DGEMM on AMD Bulldozer
|
||||
* [2013-04-27] Optimize AVX and FMA4 TRSM on AMD Bulldozer
|
||||
* [2013-06-09] Optimize AVX and FMA4 SGEMM on AMD Bulldozer
|
||||
* [2013-06-11] Optimize AVX and FMA4 ZGEMM on AMD Bulldozer
|
||||
* [2013-06-12] Optimize AVX and FMA4 CGEMM on AMD Bulldozer
|
||||
* [2013-06-16] Optimize dgemv_n kernel on AMD Bulldozer
|
||||
* [2013-06-20] Optimize ddot, daxpy kernel on AMD Bulldozer
|
||||
* [2013-06-21] Optimize dcopy kernel on AMD Bulldozer
|
||||
* Porting and Optimization on ARM Cortex-A9
|
||||
* Optimization on AMD Piledriver
|
||||
* Optimization on Intel Haswell
|
||||
|
||||
## Previous Developers
|
||||
|
||||
* Zaheer Chothia <zaheer.chothia@gmail.com>
|
||||
* Improve the compatibility about complex number
|
||||
* Build LAPACKE: C interface to LAPACK
|
||||
* Improve the windows build.
|
||||
|
||||
* Chen Shaohu <huhumartinwar@gmail.com>
|
||||
* Optimize GEMV on the Loongson 3A processor.
|
||||
|
||||
* Luo Wen
|
||||
* Intern. Test Level-2 BLAS.
|
||||
|
||||
## Contributors
|
||||
|
||||
In chronological order:
|
||||
|
||||
* pipping <http://page.mi.fu-berlin.de/pipping>
|
||||
* [2011-06-11] Make USE_OPENMP=0 disable openmp.
|
||||
|
||||
* Stefan Karpinski <stefan@karpinski.org>
|
||||
* [2011-12-28] Fix a bug about SystemStubs on Mac OS X.
|
||||
|
||||
* Alexander Eberspächer <https://github.com/aeberspaecher>
|
||||
* [2012-05-02] Add note on patch for segfaults on Linux kernel 2.6.32.
|
||||
|
||||
* Mike Nolta <mike@nolta.net>
|
||||
* [2012-05-19] Fix building bug on FreeBSD and NetBSD.
|
||||
|
||||
* Sylvestre Ledru <https://github.com/sylvestre>
|
||||
* [2012-07-01] Improve the detection of sparc. Fix building bug under
|
||||
Hurd and kfreebsd.
|
||||
|
||||
* Jameson Nash <https://github.com/vtjnash>
|
||||
* [2012-08-20] Provide support for passing CFLAGS, FFLAGS, PFLAGS, FPFLAGS to
|
||||
make on the command line.
|
||||
|
||||
* Alexander Nasonov <alnsn@yandex.ru>
|
||||
* [2012-11-10] Fix NetBSD build.
|
||||
|
||||
* Sébastien Villemot <sebastien@debian.org>
|
||||
* [2012-11-14] Fix compilation with TARGET=GENERIC. Patch applied to Debian package.
|
||||
* [2013-08-28] Avoid failure on qemu guests declaring an Athlon CPU without 3dnow!
|
||||
|
||||
* Kang-Che Sung <Explorer09@gmail.com>
|
||||
* [2013-05-17] Fix typo in the document. Re-order the architecture list in getarch.c.
|
||||
|
||||
* Kenneth Hoste <kenneth.hoste@gmail.com>
|
||||
* [2013-05-22] Adjust Makefile about downloading LAPACK source files.
|
||||
|
||||
* Lei WANG <https://github.com/wlbksy>
|
||||
* [2013-05-22] Fix a bug about wget.
|
||||
|
||||
* Dan Luu <http://www.linkedin.com/in/danluu>
|
||||
* [2013-06-30] Add Intel Haswell support (using sandybridge optimizations).
|
||||
|
||||
* grisuthedragon <https://github.com/grisuthedragon>
|
||||
* [2013-07-11] create openblas_get_parallel to retrieve information which parallelization
|
||||
model is used by OpenBLAS.
|
||||
|
||||
* Elliot Saba <staticfloat@gmail.com>
|
||||
* [2013-07-22] Add in return value for `interface/trtri.c`
|
||||
|
||||
* Sébastien Fabbro <bicatali@gentoo.org>
|
||||
* [2013-07-24] Modify makefile to respect user's LDFLAGS
|
||||
* [2013-07-24] Add stack markings for GNU as arch-independent for assembler files
|
||||
|
||||
* Viral B. Shah <viral@mayin.org>
|
||||
* [2013-08-21] Patch LAPACK XLASD4.f as discussed in JuliaLang/julia#2340
|
||||
|
||||
* Lars Buitinck <https://github.com/larsmans>
|
||||
* [2013-08-28] get rid of the generated cblas_noconst.h file
|
||||
* [2013-08-28] Missing threshold in gemm.c
|
||||
* [2013-08-28] fix default prefix handling in makefiles
|
||||
|
||||
* yieldthought <https://github.com/yieldthought>
|
||||
* [2013-10-08] Remove -Wl,--retain-symbols-file from dynamic link line to fix tool support
|
||||
|
||||
* Keno Fischer <https://github.com/loladiro>
|
||||
* [2013-10-23] Use FC instead of CC to link the dynamic library on OS X
|
||||
|
||||
* Christopher Meng <cickumqt@gmail.com>
|
||||
* [2013-12-09] Add DESTDIR support for easier building on RPM based distros.
|
||||
Use install command instead of cp to install files with permissions control.
|
||||
|
||||
* Lucas Beyer <lucasb.eyer.be@gmail.com>
|
||||
* [2013-12-10] Added support for NO_SHARED in make install.
|
||||
|
||||
* carlkl <https://github.com/carlkl>
|
||||
* [2013-12-13] Fixed LAPACKE building bug on Windows
|
||||
|
||||
* Isaac Dunham <https://github.com/idunham>
|
||||
* [2014-08-03] Fixed link error on Linux/musl
|
||||
|
||||
* Dave Nuechterlein
|
||||
* [2014-10-10] trmm and sgemm kernels (optimized for APM's X-Gene 1).
|
||||
ARMv8 support.
|
||||
|
||||
* Dan Kortschak
|
||||
* [2015-01-07] Added test for drotmg bug #484.
|
||||
|
||||
* Ton van den Heuvel <https://github.com/ton>
|
||||
* [2015-03-18] Fix race condition during shutdown causing a crash in gotoblas_set_affinity().
|
||||
|
||||
* [Your name or handle] <[email or website]>
|
||||
* [Date] [Brief summary of your changes]
|
||||
306
Changelog.txt
306
Changelog.txt
@@ -1,4 +1,260 @@
|
||||
OpenBLAS ChangeLog
|
||||
====================================================================
|
||||
Version 0.2.14
|
||||
24-Mar-2015
|
||||
common:
|
||||
* Improve OpenBLASConfig.cmake. (#474, #475. Thanks, xantares.)
|
||||
* Improve ger and gemv for small matrices by stack allocation.
|
||||
e.g. make -DMAX_STACK_ALLOC=2048 (#482. Thanks, Jerome Robert.)
|
||||
* Introduce openblas_get_num_threads and openblas_get_num_procs.
|
||||
(#497. Thanks, Erik Schnetter.)
|
||||
* Add ATLAS-style ?geadd function. (#509. Thanks, Martin Köhler.)
|
||||
* Fix c/zsyr bug with negative incx. (#492.)
|
||||
* Fix race condition during shutdown causing a crash in
|
||||
gotoblas_set_affinity(). (#508. Thanks, Ton van den Heuvel.)
|
||||
|
||||
x86/x86-64:
|
||||
* Support AMD Streamroller.
|
||||
|
||||
ARM:
|
||||
* Add Cortex-A9 and Cortex-A15 targets.
|
||||
|
||||
====================================================================
|
||||
Version 0.2.13
|
||||
3-Dec-2014
|
||||
common:
|
||||
* Add SYMBOLPREFIX and SYMBOLSUFFIX makefile options
|
||||
for adding a prefix or suffix to all exported symbol names
|
||||
in the shared library.(#459, Thanks Tony Kelman)
|
||||
* Provide OpenBLASConfig.cmake at installation.
|
||||
* Fix Fortran compiler detection on FreeBSD.
|
||||
(#470, Thanks Mike Nolta)
|
||||
|
||||
|
||||
x86/x86-64:
|
||||
* Add generic kernel files for x86-64. make TARGET=GENERIC
|
||||
* Fix a bug of sgemm kernel on Intel Sandy Bridge.
|
||||
* Fix c_check bug on some amd64 systems. (#471, Thanks Mike Nolta)
|
||||
|
||||
ARM:
|
||||
* Support APM's X-Gene 1 AArch64 processors.
|
||||
Optimize trmm and sgemm. (#465, Thanks Dave Nuechterlein)
|
||||
|
||||
====================================================================
|
||||
Version 0.2.12
|
||||
13-Oct-2014
|
||||
common:
|
||||
* Added CBLAS interface for ?omatcopy and ?imatcopy.
|
||||
* Enable ?gemm3m functions.
|
||||
* Added benchmark for ?gemm3m.
|
||||
* Optimized multithreading lower limits.
|
||||
* Disabled SYMM3M and HEMM3M functions
|
||||
because of segment violations.
|
||||
|
||||
x86/x86-64:
|
||||
* Improved axpy and symv performance on AMD Bulldozer.
|
||||
* Improved gemv performance on modern Intel and AMD CPUs.
|
||||
|
||||
====================================================================
|
||||
Version 0.2.11
|
||||
18-Aug-2014
|
||||
common:
|
||||
* Added some benchmark codes.
|
||||
* Fix link error on Linux/musl.(Thanks Isaac Dunham)
|
||||
|
||||
x86/x86-64:
|
||||
* Improved s/c/zgemm performance for Intel Haswell.
|
||||
* Improved s/d/c/zgemv performance.
|
||||
* Support the big numa machine.(EXPERIMENT)
|
||||
|
||||
ARM:
|
||||
* Fix detection when cpuinfo uses "Processor". (Thanks Isaiah)
|
||||
|
||||
====================================================================
|
||||
Version 0.2.10
|
||||
16-Jul-2014
|
||||
common:
|
||||
* Added BLAS extensions as following.
|
||||
s/d/c/zaxpby, s/d/c/zimatcopy, s/d/c/zomatcopy.
|
||||
* Added OPENBLAS_CORETYPE environment for dynamic_arch. (a86d34)
|
||||
* Added NO_AVX2 flag for old binutils. (#401)
|
||||
* Support outputing the CPU corename on runtime.(#407)
|
||||
* Patched LAPACK to fix bug 114, 117, 118.
|
||||
(http://www.netlib.org/lapack/bug_list.html)
|
||||
* Disabled ?gemm3m for a work-around fix. (#400)
|
||||
x86/x86-64:
|
||||
* Fixed lots of bugs for optimized kernels on sandybridge,Haswell,
|
||||
bulldozer, and piledriver.
|
||||
https://github.com/xianyi/OpenBLAS/wiki/Fixed-optimized-kernels-To-do-List
|
||||
|
||||
ARM:
|
||||
* Improved LAPACK testing.
|
||||
|
||||
====================================================================
|
||||
Version 0.2.9
|
||||
10-Jun-2014
|
||||
common:
|
||||
* Improved the result for LAPACK testing. (#372)
|
||||
* Installed DLL to prefix/bin instead of prefix/lib. (#366)
|
||||
* Build import library on Windows.(#374)
|
||||
x86/x86-64:
|
||||
* To improve LAPACK testing, we fallback some kernels. (#372)
|
||||
https://github.com/xianyi/OpenBLAS/wiki/Fixed-optimized-kernels-To-do-List
|
||||
|
||||
====================================================================
|
||||
Version 0.2.9.rc2
|
||||
06-Mar-2014
|
||||
common:
|
||||
* Added OPENBLAS_VERBOSE environment variable.(#338)
|
||||
* Make OpenBLAS thread-pool resilient to fork via pthread_atfork.
|
||||
(#294, Thank Olivier Grisel)
|
||||
* Rewrote rotmg
|
||||
* Fixed sdsdot bug.
|
||||
x86/x86-64:
|
||||
* Detect Intel Haswell for new Macbook.
|
||||
|
||||
====================================================================
|
||||
Version 0.2.9.rc1
|
||||
13-Jan-2013
|
||||
common:
|
||||
* Update LAPACK to 3.5.0 version
|
||||
* Fixed compatiable issues with Clang and Pathscale compilers.
|
||||
|
||||
x86/x86-64:
|
||||
* Optimization on Intel Haswell.
|
||||
* Enable optimization kernels on AMD Bulldozer and Piledriver.
|
||||
|
||||
ARM:
|
||||
* Support ARMv6 and ARMv7 ISA.
|
||||
* Optimization on ARM Cortex-A9.
|
||||
|
||||
====================================================================
|
||||
Version 0.2.8
|
||||
01-Aug-2013
|
||||
common:
|
||||
* Support Open64 5.0. (#266)
|
||||
* Add executable stack markings. (#262, Thank Sébastien Fabbro)
|
||||
* Respect user's LDFLAGS (Thank Sébastien Fabbro)
|
||||
|
||||
x86/x86-64:
|
||||
* Rollback bulldozer and piledriver kernels to barcelona kernels (#263)
|
||||
We will fix the compuational error bug in bulldozer and piledriver kernels.
|
||||
|
||||
====================================================================
|
||||
Version 0.2.7
|
||||
20-Jul-2013
|
||||
common:
|
||||
* Support LSB (Linux Standard Base) 4.1.
|
||||
e.g. make CC=lsbcc
|
||||
* Include LAPACK 3.4.2 source codes to the repo.
|
||||
Avoid downloading at compile time.
|
||||
* Add NO_PARALLEL_MAKE flag to disable parallel make.
|
||||
* Create openblas_get_parallel to retrieve information which
|
||||
parallelization model is used by OpenBLAS. (Thank grisuthedragon)
|
||||
* Detect LLVM/Clang compiler. The default compiler is Clang on Mac OS X.
|
||||
* Change LIBSUFFIX from .lib to .a on windows.
|
||||
* A work-around for dtrti_U single thread bug. Replace it with LAPACK codes. (#191)
|
||||
|
||||
x86/x86-64:
|
||||
* Optimize c/zgemm, trsm, dgemv_n, ddot, daxpy, dcopy on
|
||||
AMD Bulldozer. (Thank Werner Saar)
|
||||
* Add Intel Haswell support (using Sandybridge optimizations).
|
||||
(Thank Dan Luu)
|
||||
* Add AMD Piledriver support (using Bulldozer optimizations).
|
||||
* Fix the computational error in zgemm avx kernel on
|
||||
Sandybridge. (#237)
|
||||
* Fix the overflow bug in gemv.
|
||||
* Fix the overflow bug in multi-threaded BLAS3, getrf when NUM_THREADS
|
||||
is very large.(#214, #221, #246).
|
||||
MIPS64:
|
||||
* Support loongcc (Open64 based) compiler for ICT Loongson 3A/B.
|
||||
|
||||
Power:
|
||||
* Support Power7 by old Power6 kernels. (#220)
|
||||
|
||||
====================================================================
|
||||
Version 0.2.6
|
||||
2-Mar-2013
|
||||
common:
|
||||
* Improved OpenMP performance slightly. (d744c9)
|
||||
* Improved cblas.h compatibility with Intel MKL.(#185)
|
||||
* Fixed the overflowing bug in single thread cholesky factorization.
|
||||
* Fixed the overflowing buffer bug of multithreading hbmv and sbmv.(#174)
|
||||
|
||||
x86/x86-64:
|
||||
* Added AMD Bulldozer x86-64 S/DGEMM AVX kernels. (Thank Werner Saar)
|
||||
We will tune the performance in future.
|
||||
* Auto-detect Intel Xeon E7540.
|
||||
* Fixed the overflowing buffer bug of gemv. (#173)
|
||||
* Fixed the bug of s/cdot about invalid reading NAN on x86_64. (#189)
|
||||
|
||||
MIPS64:
|
||||
|
||||
====================================================================
|
||||
Version 0.2.5
|
||||
26-Nov-2012
|
||||
common:
|
||||
* Added NO_SHARED flag to disable generating the shared library.
|
||||
* Compile LAPACKE with ILP64 modle when INTERFACE64=1 (#158)
|
||||
* Export LAPACK 3.4.2 symbols in shared library. (#147)
|
||||
* Only detect the number of physical CPU cores on Mac OSX. (#157)
|
||||
* Fixed NetBSD build. (#155)
|
||||
* Fixed compilation with TARGET=GENERIC. (#160)
|
||||
x86/x86-64:
|
||||
* Restore the original CPU affinity when calling
|
||||
openblas_set_num_threads(1) (#153)
|
||||
* Fixed a SEGFAULT bug in dgemv_t when m is very large.(#154)
|
||||
MIPS64:
|
||||
|
||||
====================================================================
|
||||
Version 0.2.4
|
||||
8-Oct-2012
|
||||
common:
|
||||
* Upgraded LAPACK to 3.4.2 version. (#145)
|
||||
* Provided support for passing CFLAGS, FFLAGS, PFLAGS,
|
||||
FPFLAGS to make. (#137)
|
||||
* f77blas.h:compatibility for compilers without C99 complex
|
||||
number support. (#141)
|
||||
x86/x86-64:
|
||||
* Added NO_AVX flag. Check OS supporting AVX on runtime. (#139)
|
||||
* Fixed zdot incompatibility ABI issue with GCC 4.7 on
|
||||
Windows 32-bit. (#140)
|
||||
MIPS64:
|
||||
* Fixed the generation of shared library bug.
|
||||
* Fixed the detection bug on the Loongson 3A server.
|
||||
====================================================================
|
||||
Version 0.2.3
|
||||
20-Aug-2012
|
||||
common:
|
||||
* Fixed LAPACK unstable bug about ?laswp. (#130)
|
||||
* Fixed the shared library bug about unloading the library on
|
||||
Linux (#132).
|
||||
* Fixed the compilation failure on BlueGene/P (TARGET=PPC440FP2)
|
||||
Please use gcc and IBM xlf. (#134)
|
||||
x86/x86-64:
|
||||
* Supported goto_set_num_threads and openblas_set_num_threads
|
||||
APIs in Windows. They can set the number of threads on runtime.
|
||||
|
||||
====================================================================
|
||||
Version 0.2.2
|
||||
6-July-2012
|
||||
common:
|
||||
* Fixed exporting DLL functions bug on Windows/MingW
|
||||
* Support GNU Hurd (Thank Sylvestre Ledru)
|
||||
* Support kfreebsd kernel (Thank Sylvestre Ledru)
|
||||
x86/x86-64:
|
||||
* Support Intel Sandy Bridge 22nm desktop/mobile CPU
|
||||
SPARC:
|
||||
* Improve the detection of SPARC (Thank Sylvestre Ledru)
|
||||
|
||||
====================================================================
|
||||
Version 0.2.1
|
||||
30-Jun-2012
|
||||
common:
|
||||
x86/x86-64:
|
||||
* Fixed the SEGFAULT bug about hyper-theading
|
||||
* Support AMD Bulldozer by using GotoBLAS2 AMD Barcelona codes
|
||||
|
||||
====================================================================
|
||||
Version 0.2.0
|
||||
26-Jun-2012
|
||||
@@ -26,14 +282,14 @@ x86/x86_64:
|
||||
* Auto-detect Intel Sandy Bridge Core i7-3xxx & Xeon E7 Westmere-EX.
|
||||
* Test alpha=Nan in dscale.
|
||||
* Fixed a SEGFAULT bug in samax on x86 windows.
|
||||
|
||||
|
||||
====================================================================
|
||||
Version 0.1.0
|
||||
23-Mar-2012
|
||||
common:
|
||||
* Set soname of shared library on Linux.
|
||||
* Added LIBNAMESUFFIX flag in Makefile.rule. The user can use
|
||||
this flag to control the library name, e.g. libopenblas.a,
|
||||
* Added LIBNAMESUFFIX flag in Makefile.rule. The user can use
|
||||
this flag to control the library name, e.g. libopenblas.a,
|
||||
libopenblas_ifort.a or libopenblas_omp.a.
|
||||
* Added GEMM_MULTITHREAD_THRESHOLD flag in Makefile.rule.
|
||||
The lib use single thread in GEMM function with small matrices.
|
||||
@@ -64,7 +320,7 @@ x86/x86_64:
|
||||
Version 0.1 alpha2.4
|
||||
18-Sep-2011
|
||||
common:
|
||||
* Fixed a bug about installation. The header file "fblas77.h"
|
||||
* Fixed a bug about installation. The header file "fblas77.h"
|
||||
works fine now.
|
||||
* Fixed #61 a building bug about setting TARGET and DYNAMIC_ARCH.
|
||||
* Try to handle absolute path of shared library in OSX. (#57)
|
||||
@@ -73,16 +329,16 @@ common:
|
||||
$(PREFIX)/lib
|
||||
|
||||
x86/x86_64:
|
||||
* Fixed #58 zdot/xdot SEGFAULT bug with GCC-4.6 on x86. According
|
||||
to i386 calling convention, The callee should remove the first
|
||||
hidden parameter.Thank Mr. John for this patch.
|
||||
* Fixed #58 zdot/xdot SEGFAULT bug with GCC-4.6 on x86. According
|
||||
to i386 calling convention, The callee should remove the first
|
||||
hidden parameter.Thank Mr. John for this patch.
|
||||
|
||||
====================================================================
|
||||
Version 0.1 alpha2.3
|
||||
5-Sep-2011
|
||||
|
||||
x86/x86_64:
|
||||
* Added DTB_ENTRIES into dynamic arch setting parameters. Now,
|
||||
* Added DTB_ENTRIES into dynamic arch setting parameters. Now,
|
||||
it can read DTB_ENTRIES on runtime. (Refs issue #55 on github)
|
||||
|
||||
====================================================================
|
||||
@@ -90,7 +346,7 @@ Version 0.1 alpha2.2
|
||||
14-Jul-2011
|
||||
|
||||
common:
|
||||
* Fixed a building bug when DYNAMIC_ARCH=1 & INTERFACE64=1.
|
||||
* Fixed a building bug when DYNAMIC_ARCH=1 & INTERFACE64=1.
|
||||
(Refs issue #44 on github)
|
||||
|
||||
====================================================================
|
||||
@@ -98,7 +354,7 @@ Version 0.1 alpha2.1
|
||||
28-Jun-2011
|
||||
|
||||
common:
|
||||
* Stop the build and output the error message when detecting
|
||||
* Stop the build and output the error message when detecting
|
||||
fortran compiler failed. (Refs issue #42 on github)
|
||||
|
||||
====================================================================
|
||||
@@ -106,16 +362,16 @@ Version 0.1 alpha2
|
||||
23-Jun-2011
|
||||
|
||||
common:
|
||||
* Fixed blasint undefined bug in <cblas.h> file. Other software
|
||||
* Fixed blasint undefined bug in <cblas.h> file. Other software
|
||||
could include this header successfully(Refs issue #13 on github)
|
||||
* Fixed the SEGFAULT bug on 64 cores. On SMP server, the number
|
||||
of CPUs or cores should be less than or equal to 64.(Refs issue #14
|
||||
* Fixed the SEGFAULT bug on 64 cores. On SMP server, the number
|
||||
of CPUs or cores should be less than or equal to 64.(Refs issue #14
|
||||
on github)
|
||||
* Support "void goto_set_num_threads(int num_threads)" and "void
|
||||
openblas_set_num_threads(int num_threads)" when USE_OPENMP=1
|
||||
* Added extern "C" to support C++. Thank Tasio for the patch(Refs
|
||||
* Added extern "C" to support C++. Thank Tasio for the patch(Refs
|
||||
issue #21 on github)
|
||||
* Provided an error message when the arch is not supported.(Refs
|
||||
* Provided an error message when the arch is not supported.(Refs
|
||||
issue #19 on github)
|
||||
* Fixed issue #23. Fixed a bug of f_check script about generating link flags.
|
||||
* Added openblas_set_num_threads for Fortran.
|
||||
@@ -130,10 +386,10 @@ x86/x86_64:
|
||||
* Fixed #28 a wrong result of dsdot on x86_64.
|
||||
* Fixed #32 a SEGFAULT bug of zdotc with gcc-4.6.
|
||||
* Fixed #33 ztrmm bug on Nehalem.
|
||||
* Walk round #27 the low performance axpy issue with small imput size & multithreads.
|
||||
* Work-around #27 the low performance axpy issue with small imput size & multithreads.
|
||||
|
||||
MIPS64:
|
||||
* Fixed #28 a wrong result of dsdot on Loongson3A/MIPS64.
|
||||
* Fixed #28 a wrong result of dsdot on Loongson3A/MIPS64.
|
||||
* Optimized single/double precision BLAS Level3 on Loongson3A/MIPS64. (Refs #2)
|
||||
* Optimized single/double precision axpy function on Loongson3A/MIPS64. (Refs #3)
|
||||
|
||||
@@ -142,9 +398,9 @@ Version 0.1 alpha1
|
||||
20-Mar-2011
|
||||
|
||||
common:
|
||||
* Support "make NO_LAPACK=1" to build the library without
|
||||
* Support "make NO_LAPACK=1" to build the library without
|
||||
LAPACK functions.
|
||||
* Fixed randomly SEGFAULT when nodemask==NULL with above Linux 2.6.34.
|
||||
* Fixed randomly SEGFAULT when nodemask==NULL with above Linux 2.6.34.
|
||||
Thank Mr.Ei-ji Nakama providing this patch. (Refs issue #12 on github)
|
||||
* Added DEBUG=1 rule in Makefile.rule to build debug version.
|
||||
* Disable compiling quad precision in reference BLAS library(netlib BLAS).
|
||||
@@ -153,15 +409,15 @@ common:
|
||||
* Imported GotoBLAS2 1.13 BSD version
|
||||
|
||||
x86/x86_64:
|
||||
* On x86 32bits, fixed a bug in zdot_sse2.S line 191. This would casue
|
||||
zdotu & zdotc failures.Instead,Walk around it. (Refs issue #8 #9 on github)
|
||||
* Modified ?axpy functions to return same netlib BLAS results
|
||||
* On x86 32bits, fixed a bug in zdot_sse2.S line 191. This would casue
|
||||
zdotu & zdotc failures. Instead, work-around it. (Refs issue #8 #9 on github)
|
||||
* Modified ?axpy functions to return same netlib BLAS results
|
||||
when incx==0 or incy==0 (Refs issue #7 on github)
|
||||
* Modified ?swap functions to return same netlib BLAS results
|
||||
* Modified ?swap functions to return same netlib BLAS results
|
||||
when incx==0 or incy==0 (Refs issue #6 on github)
|
||||
* Modified ?rot functions to return same netlib BLAS results
|
||||
* Modified ?rot functions to return same netlib BLAS results
|
||||
when incx==0 or incy==0 (Refs issue #4 on github)
|
||||
* Detect Intel Westmere,Intel Clarkdale and Intel Arrandale
|
||||
* Detect Intel Westmere,Intel Clarkdale and Intel Arrandale
|
||||
to use Nehalem codes.
|
||||
* Fixed a typo bug about compiling dynamic ARCH library.
|
||||
MIPS64:
|
||||
|
||||
@@ -83,7 +83,7 @@
|
||||
4. Suported precision
|
||||
|
||||
Now x86/x86_64 version support 80bit FP precision in addition to
|
||||
normal double presicion and single precision. Currently only
|
||||
normal double presicion and single precision. Currently only
|
||||
gfortran supports 80bit FP with "REAL*10".
|
||||
|
||||
|
||||
|
||||
@@ -32,9 +32,9 @@
|
||||
|
||||
GotoBLAS2 build complete.
|
||||
|
||||
OS ... Linux
|
||||
Architecture ... x86_64
|
||||
BINARY ... 64bit
|
||||
OS ... Linux
|
||||
Architecture ... x86_64
|
||||
BINARY ... 64bit
|
||||
C compiler ... GCC (command line : gcc)
|
||||
Fortran compiler ... PATHSCALE (command line : pathf90)
|
||||
Library Name ... libgoto_barcelonap-r1.27.a (Multi threaded; Max
|
||||
|
||||
@@ -56,7 +56,7 @@
|
||||
|
||||
1.6 Q I use OpenMP compiler. How can I use GotoBLAS2 with it?
|
||||
|
||||
A Please understand that OpenMP is a compromised method to use
|
||||
A Please understand that OpenMP is a compromised method to use
|
||||
thread. If you want to use OpenMP based code with GotoBLAS2, you
|
||||
should enable "USE_OPENMP=1" in Makefile.rule.
|
||||
|
||||
|
||||
@@ -9,10 +9,10 @@
|
||||
|
||||
If you want to allocate 64 large pages,
|
||||
|
||||
$shell> echo 0 > /pros/sys/vm/nr_hugepages # need to be reset
|
||||
$shell> echo 65 > /pros/sys/vm/nr_hugepages # add 1 extra page
|
||||
$shell> echo 3355443200 > /pros/sys/kernel/shmmax # just large number
|
||||
$shell> echo 3355443200 > /pros/sys/kernel/shmall
|
||||
$shell> echo 0 > /proc/sys/vm/nr_hugepages # need to be reset
|
||||
$shell> echo 65 > /proc/sys/vm/nr_hugepages # add 1 extra page
|
||||
$shell> echo 3355443200 > /proc/sys/kernel/shmmax # just large number
|
||||
$shell> echo 3355443200 > /proc/sys/kernel/shmall
|
||||
|
||||
Also may add a few lines into /etc/security/limits.conf file.
|
||||
|
||||
@@ -43,7 +43,7 @@
|
||||
F) Other aarchitecture which doesn't have Large TLB enhancement
|
||||
|
||||
If you have root permission, please install device driver which
|
||||
located in drivers/mapper.
|
||||
located in drivers/mapper.
|
||||
|
||||
$shell> cd drivers/mapper
|
||||
$shell> make
|
||||
|
||||
@@ -4,7 +4,7 @@
|
||||
probably you created too many threads or process. Basically GotoBLAS
|
||||
assumes that available cores that you specify are exclusively for
|
||||
BLAS computation. Even one small thread/process conflicts with BLAS
|
||||
threads, performance will become worse.
|
||||
threads, performance will become worse.
|
||||
|
||||
The best solution is to reduce your number of threads or insert
|
||||
some synchronization mechanism and suspend your threads until BLAS
|
||||
@@ -19,4 +19,4 @@
|
||||
|
||||
|
||||
Anyway, if you see any weird performance loss, it means your code or
|
||||
algorithm is not optimal.
|
||||
algorithm is not optimal.
|
||||
|
||||
27
LICENSE
27
LICENSE
@@ -1,4 +1,4 @@
|
||||
Copyright (c) 2011,2012 Lab of Parallel Software and Computational Science,ISCAS
|
||||
Copyright (c) 2011-2014, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
@@ -12,17 +12,18 @@ met:
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the ISCAS nor the names of its contributors may
|
||||
be used to endorse or promote products derived from this software
|
||||
without specific prior written permission.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written
|
||||
permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
197
Makefile
197
Makefile
@@ -3,8 +3,8 @@ include ./Makefile.system
|
||||
|
||||
BLASDIRS = interface driver/level2 driver/level3 driver/others
|
||||
|
||||
ifndef DYNAMIC_ARCH
|
||||
BLASDIRS += kernel
|
||||
ifneq ($(DYNAMIC_ARCH), 1)
|
||||
BLASDIRS += kernel
|
||||
endif
|
||||
|
||||
ifdef UTEST_CHECK
|
||||
@@ -15,23 +15,21 @@ ifdef SANITY_CHECK
|
||||
BLASDIRS += reference
|
||||
endif
|
||||
|
||||
ifndef PREFIX
|
||||
PREFIX = /opt/OpenBLAS
|
||||
endif
|
||||
|
||||
SUBDIRS = $(BLASDIRS)
|
||||
ifneq ($(NO_LAPACK), 1)
|
||||
SUBDIRS += lapack
|
||||
endif
|
||||
|
||||
LAPACK_NOOPT := $(filter-out -O0 -O1 -O2 -O3 -Ofast,$(LAPACK_FFLAGS))
|
||||
|
||||
SUBDIRS_ALL = $(SUBDIRS) test ctest utest exports benchmark ../laswp ../bench
|
||||
|
||||
.PHONY : all libs netlib test ctest shared install
|
||||
.NOTPARALLEL : all libs prof lapack-test install
|
||||
.NOTPARALLEL : all libs prof lapack-test install blas-test
|
||||
|
||||
all :: libs netlib tests shared
|
||||
@echo
|
||||
@echo " OpenBLAS build complete."
|
||||
@echo " OpenBLAS build complete. ($(LIB_COMPONENTS))"
|
||||
@echo
|
||||
@echo " OS ... $(OSNAME) "
|
||||
@echo " Architecture ... $(ARCH) "
|
||||
@@ -40,11 +38,17 @@ ifndef BINARY64
|
||||
else
|
||||
@echo " BINARY ... 64bit "
|
||||
endif
|
||||
|
||||
ifdef INTERFACE64
|
||||
ifneq ($(INTERFACE64), 0)
|
||||
@echo " Use 64 bits int (equivalent to \"-i8\" in Fortran) "
|
||||
endif
|
||||
endif
|
||||
|
||||
@echo " C compiler ... $(C_COMPILER) (command line : $(CC))"
|
||||
ifndef NOFORTRAN
|
||||
@echo " Fortran compiler ... $(F_COMPILER) (command line : $(FC))"
|
||||
endif
|
||||
ifneq ($(OSNAME), AIX)
|
||||
@echo -n " Library Name ... $(LIBNAME)"
|
||||
else
|
||||
@@ -59,7 +63,7 @@ endif
|
||||
|
||||
ifeq ($(USE_OPENMP), 1)
|
||||
@echo
|
||||
@echo " Use OpenMP in the multithreading. Becasue of ignoring OPENBLAS_NUM_THREADS and GOTO_NUM_THREADS flags, "
|
||||
@echo " Use OpenMP in the multithreading. Because of ignoring OPENBLAS_NUM_THREADS and GOTO_NUM_THREADS flags, "
|
||||
@echo " you should use OMP_NUM_THREADS environment variable to control the number of threads."
|
||||
@echo
|
||||
endif
|
||||
@@ -80,30 +84,30 @@ endif
|
||||
@echo
|
||||
|
||||
shared :
|
||||
ifndef NO_SHARED
|
||||
ifeq ($(OSNAME), Linux)
|
||||
$(MAKE) -C exports so
|
||||
-ln -fs $(LIBSONAME) $(LIBPREFIX).so
|
||||
-ln -fs $(LIBSONAME) $(LIBPREFIX).so.$(MAJOR_VERSION)
|
||||
@$(MAKE) -C exports so
|
||||
@-ln -fs $(LIBSONAME) $(LIBPREFIX).so
|
||||
@-ln -fs $(LIBSONAME) $(LIBPREFIX).so.$(MAJOR_VERSION)
|
||||
endif
|
||||
ifeq ($(OSNAME), FreeBSD)
|
||||
$(MAKE) -C exports so
|
||||
-ln -fs $(LIBSONAME) $(LIBPREFIX).so
|
||||
@$(MAKE) -C exports so
|
||||
@-ln -fs $(LIBSONAME) $(LIBPREFIX).so
|
||||
endif
|
||||
ifeq ($(OSNAME), NetBSD)
|
||||
$(MAKE) -C exports so
|
||||
-ln -fs $(LIBSONAME) $(LIBPREFIX).so
|
||||
@$(MAKE) -C exports so
|
||||
@-ln -fs $(LIBSONAME) $(LIBPREFIX).so
|
||||
endif
|
||||
ifeq ($(OSNAME), Darwin)
|
||||
$(MAKE) -C exports dyn
|
||||
-ln -fs $(LIBDYNNAME) $(LIBPREFIX).dylib
|
||||
@$(MAKE) -C exports dyn
|
||||
@-ln -fs $(LIBDYNNAME) $(LIBPREFIX).dylib
|
||||
endif
|
||||
ifeq ($(OSNAME), WINNT)
|
||||
$(MAKE) -C exports dll
|
||||
-ln -fs $(LIBDLLNAME) $(LIBPREFIX).dll
|
||||
@$(MAKE) -C exports dll
|
||||
endif
|
||||
ifeq ($(OSNAME), CYGWIN_NT)
|
||||
$(MAKE) -C exports dll
|
||||
-ln -fs $(LIBDLLNAME) $(LIBPREFIX).dll
|
||||
@$(MAKE) -C exports dll
|
||||
endif
|
||||
endif
|
||||
|
||||
tests :
|
||||
@@ -129,32 +133,40 @@ ifeq ($(CORE), UNKOWN)
|
||||
$(error OpenBLAS: Detecting CPU failed. Please set TARGET explicitly, e.g. make TARGET=your_cpu_target. Please read README for the detail.)
|
||||
endif
|
||||
ifeq ($(NOFORTRAN), 1)
|
||||
$(error OpenBLAS: Detecting fortran compiler failed. Please install fortran compiler, e.g. gfortran, ifort, openf90.)
|
||||
$(info OpenBLAS: Detecting fortran compiler failed. Cannot compile LAPACK. Only compile BLAS.)
|
||||
endif
|
||||
-ln -fs $(LIBNAME) $(LIBPREFIX).$(LIBSUFFIX)
|
||||
for d in $(SUBDIRS) ; \
|
||||
ifeq ($(NO_STATIC), 1)
|
||||
ifeq ($(NO_SHARED), 1)
|
||||
$(error OpenBLAS: neither static nor shared are enabled.)
|
||||
endif
|
||||
endif
|
||||
@-ln -fs $(LIBNAME) $(LIBPREFIX).$(LIBSUFFIX)
|
||||
@for d in $(SUBDIRS) ; \
|
||||
do if test -d $$d; then \
|
||||
$(MAKE) -C $$d $(@F) || exit 1 ; \
|
||||
fi; \
|
||||
done
|
||||
#Save the config files for installation
|
||||
cp Makefile.conf Makefile.conf_last
|
||||
cp config.h config_last.h
|
||||
@cp Makefile.conf Makefile.conf_last
|
||||
@cp config.h config_last.h
|
||||
ifdef QUAD_PRECISION
|
||||
echo "#define QUAD_PRECISION">> config_last.h
|
||||
@echo "#define QUAD_PRECISION">> config_last.h
|
||||
endif
|
||||
ifeq ($(EXPRECISION), 1)
|
||||
echo "#define EXPRECISION">> config_last.h
|
||||
@echo "#define EXPRECISION">> config_last.h
|
||||
endif
|
||||
##
|
||||
ifdef DYNAMIC_ARCH
|
||||
$(MAKE) -C kernel commonlibs || exit 1
|
||||
for d in $(DYNAMIC_CORE) ; \
|
||||
##
|
||||
ifeq ($(DYNAMIC_ARCH), 1)
|
||||
@$(MAKE) -C kernel commonlibs || exit 1
|
||||
@for d in $(DYNAMIC_CORE) ; \
|
||||
do $(MAKE) GOTOBLAS_MAKEFILE= -C kernel TARGET_CORE=$$d kernel || exit 1 ;\
|
||||
done
|
||||
echo DYNAMIC_ARCH=1 >> Makefile.conf_last
|
||||
@echo DYNAMIC_ARCH=1 >> Makefile.conf_last
|
||||
endif
|
||||
touch lib.grd
|
||||
ifdef USE_THREAD
|
||||
@echo USE_THREAD=$(USE_THREAD) >> Makefile.conf_last
|
||||
endif
|
||||
@touch lib.grd
|
||||
|
||||
prof : prof_blas prof_lapack
|
||||
|
||||
@@ -165,7 +177,7 @@ prof_blas :
|
||||
$(MAKE) -C $$d prof || exit 1 ; \
|
||||
fi; \
|
||||
done
|
||||
ifdef DYNAMIC_ARCH
|
||||
ifeq ($(DYNAMIC_ARCH), 1)
|
||||
$(MAKE) -C kernel commonprof || exit 1
|
||||
endif
|
||||
|
||||
@@ -177,14 +189,14 @@ blas :
|
||||
fi; \
|
||||
done
|
||||
|
||||
hpl :
|
||||
hpl :
|
||||
ln -fs $(LIBNAME) $(LIBPREFIX).$(LIBSUFFIX)
|
||||
for d in $(BLASDIRS) ../laswp exports ; \
|
||||
do if test -d $$d; then \
|
||||
$(MAKE) -C $$d $(@F) || exit 1 ; \
|
||||
fi; \
|
||||
done
|
||||
ifdef DYNAMIC_ARCH
|
||||
ifeq ($(DYNAMIC_ARCH), 1)
|
||||
$(MAKE) -C kernel commonlibs || exit 1
|
||||
for d in $(DYNAMIC_CORE) ; \
|
||||
do $(MAKE) GOTOBLAS_MAKEFILE= -C kernel TARGET_CORE=$$d kernel || exit 1 ;\
|
||||
@@ -200,99 +212,89 @@ hpl_p :
|
||||
done
|
||||
|
||||
ifeq ($(NO_LAPACK), 1)
|
||||
netlib :
|
||||
netlib :
|
||||
|
||||
else
|
||||
netlib : lapack-3.4.1 patch.for_lapack-3.4.1 $(NETLIB_LAPACK_DIR)/make.inc
|
||||
netlib : lapack_prebuild
|
||||
ifndef NOFORTRAN
|
||||
-@$(MAKE) -C $(NETLIB_LAPACK_DIR) lapacklib
|
||||
@$(MAKE) -C $(NETLIB_LAPACK_DIR) lapacklib
|
||||
@$(MAKE) -C $(NETLIB_LAPACK_DIR) tmglib
|
||||
endif
|
||||
ifndef NO_LAPACKE
|
||||
-@$(MAKE) -C $(NETLIB_LAPACK_DIR) lapackelib
|
||||
@$(MAKE) -C $(NETLIB_LAPACK_DIR) lapackelib
|
||||
endif
|
||||
endif
|
||||
|
||||
prof_lapack : lapack-3.4.1 $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@$(MAKE) -C $(NETLIB_LAPACK_DIR) lapack_prof
|
||||
prof_lapack : lapack_prebuild
|
||||
@$(MAKE) -C $(NETLIB_LAPACK_DIR) lapack_prof
|
||||
|
||||
$(NETLIB_LAPACK_DIR)/make.inc :
|
||||
lapack_prebuild :
|
||||
ifndef NOFORTRAN
|
||||
-@echo "FORTRAN = $(FC)" > $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@echo "OPTS = $(FFLAGS)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@echo "POPTS = $(FPFLAGS)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@echo "NOOPT = $(FFLAGS) -O0" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@echo "PNOOPT = $(FPFLAGS) -O0" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@echo "OPTS = $(LAPACK_FFLAGS)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@echo "POPTS = $(LAPACK_FPFLAGS)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@echo "NOOPT = -O0 $(LAPACK_NOOPT)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@echo "PNOOPT = $(LAPACK_FPFLAGS) -O0" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@echo "LOADOPTS = $(FFLAGS) $(EXTRALIB)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@echo "CC = $(CC)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@echo "CFLAGS = $(CFLAGS)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@echo "override CFLAGS = $(LAPACK_CFLAGS)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@echo "ARCH = $(AR)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@echo "ARCHFLAGS = -ru" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@echo "RANLIB = $(RANLIB)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@echo "LAPACKLIB = ../$(LIBNAME)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@echo "TMGLIB = ../$(LIBNAME)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@echo "BLASLIB = ../../../$(LIBNAME)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@echo "LAPACKELIB = ../$(LIBNAME)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@echo "LAPACKLIB_P = ../$(LIBNAME_P)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@echo "SUFFIX = $(SUFFIX)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@echo "PSUFFIX = $(PSUFFIX)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
# -@echo "CEXTRALIB = $(CEXTRALIB)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@echo "CEXTRALIB = $(EXTRALIB)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
ifeq ($(FC), gfortran)
|
||||
-@echo "TIMER = INT_ETIME" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
ifdef SMP
|
||||
-@echo "LOADER = $(FC) -pthread" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
else
|
||||
-@echo "LOADER = $(FC)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
endif
|
||||
else
|
||||
-@echo "TIMER = NONE" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@echo "LOADER = $(FC)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
endif
|
||||
-@cat make.inc >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
endif
|
||||
|
||||
lapack-3.4.1 : lapack-3.4.1.tgz
|
||||
large.tgz :
|
||||
ifndef NOFORTRAN
|
||||
ifndef NO_LAPACK
|
||||
@if test `$(MD5SUM) lapack-3.4.1.tgz | $(AWK) '{print $$1}'` = 44c3869c38c8335c2b9c2a8bb276eb55; then \
|
||||
echo $(TAR) zxf $< ;\
|
||||
$(TAR) zxf $< && (cd $(NETLIB_LAPACK_DIR); $(PATCH) -p1 < ../patch.for_lapack-3.4.1) ;\
|
||||
rm -f $(NETLIB_LAPACK_DIR)/lapacke/make.inc ;\
|
||||
else \
|
||||
rm -rf $(NETLIB_LAPACK_DIR) ;\
|
||||
echo " Cannot download lapack-3.4.1.tgz or the MD5 check sum is wrong (Please use orignal)."; \
|
||||
exit 1; \
|
||||
if [ ! -a $< ]; then
|
||||
-wget http://www.netlib.org/lapack/timing/large.tgz;
|
||||
fi
|
||||
endif
|
||||
endif
|
||||
|
||||
LAPACK_URL=http://www.netlib.org/lapack/lapack-3.4.1.tgz
|
||||
|
||||
lapack-3.4.1.tgz :
|
||||
ifndef NOFORTRAN
|
||||
#http://stackoverflow.com/questions/7656425/makefile-ifeq-logical-or
|
||||
ifeq ($(OSNAME), $(filter $(OSNAME),Darwin NetBSD))
|
||||
curl -O $(LAPACK_URL)
|
||||
else
|
||||
ifeq ($(OSNAME), FreeBSD)
|
||||
fetch $(LAPACK_URL)
|
||||
else
|
||||
wget $(LAPACK_URL)
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
large.tgz :
|
||||
ifndef NOFORTRAN
|
||||
-wget http://www.netlib.org/lapack/timing/large.tgz
|
||||
endif
|
||||
|
||||
timing.tgz :
|
||||
ifndef NOFORTRAN
|
||||
-wget http://www.netlib.org/lapack/timing/timing.tgz
|
||||
if [ ! -a $< ]; then
|
||||
-wget http://www.netlib.org/lapack/timing/timing.tgz;
|
||||
fi
|
||||
endif
|
||||
|
||||
lapack-timing : lapack-3.4.1 large.tgz timing.tgz
|
||||
lapack-timing : large.tgz timing.tgz
|
||||
ifndef NOFORTRAN
|
||||
(cd $(NETLIB_LAPACK_DIR); $(TAR) zxf ../timing.tgz TIMING)
|
||||
(cd $(NETLIB_LAPACK_DIR)/TIMING; $(TAR) zxf ../../large.tgz )
|
||||
make -C $(NETLIB_LAPACK_DIR) tmglib
|
||||
make -C $(NETLIB_LAPACK_DIR)/TIMING
|
||||
endif
|
||||
|
||||
|
||||
lapack-test :
|
||||
$(MAKE) -C $(NETLIB_LAPACK_DIR) tmglib
|
||||
$(MAKE) -C $(NETLIB_LAPACK_DIR)/TESTING xeigtstc xeigtstd xeigtsts xeigtstz xlintstc xlintstd xlintstds xlintsts xlintstz xlintstzc
|
||||
@rm -f $(NETLIB_LAPACK_DIR)/TESTING/*.out
|
||||
$(MAKE) -j 1 -C $(NETLIB_LAPACK_DIR)/TESTING
|
||||
$(GREP) failed $(NETLIB_LAPACK_DIR)/TESTING/*.out
|
||||
(cd $(NETLIB_LAPACK_DIR)/TESTING && rm -f x* *.out)
|
||||
make -j 1 -C $(NETLIB_LAPACK_DIR)/TESTING xeigtstc xeigtstd xeigtsts xeigtstz xlintstc xlintstd xlintstds xlintstrfd xlintstrfz xlintsts xlintstz xlintstzc xlintstrfs xlintstrfc
|
||||
(cd $(NETLIB_LAPACK_DIR); ./lapack_testing.py -r )
|
||||
|
||||
blas-test:
|
||||
(cd $(NETLIB_LAPACK_DIR)/BLAS && rm -f x* *.out)
|
||||
make -j 1 -C $(NETLIB_LAPACK_DIR) blas_testing
|
||||
(cd $(NETLIB_LAPACK_DIR)/BLAS && cat *.out)
|
||||
|
||||
|
||||
dummy :
|
||||
|
||||
@@ -310,10 +312,13 @@ clean ::
|
||||
#endif
|
||||
@$(MAKE) -C reference clean
|
||||
@rm -f *.$(LIBSUFFIX) *.so *~ *.exe getarch getarch_2nd *.dll *.lib *.$(SUFFIX) *.dwf $(LIBPREFIX).$(LIBSUFFIX) $(LIBPREFIX)_p.$(LIBSUFFIX) $(LIBPREFIX).so.$(MAJOR_VERSION) *.lnk myconfig.h
|
||||
ifeq ($(OSNAME), Darwin)
|
||||
@rm -rf getarch.dSYM getarch_2nd.dSYM
|
||||
endif
|
||||
@rm -f Makefile.conf config.h Makefile_kernel.conf config_kernel.h st* *.dylib
|
||||
@if test -d $(NETLIB_LAPACK_DIR); then \
|
||||
echo deleting $(NETLIB_LAPACK_DIR); \
|
||||
rm -rf $(NETLIB_LAPACK_DIR) ;\
|
||||
fi
|
||||
@touch $(NETLIB_LAPACK_DIR)/make.inc
|
||||
@$(MAKE) -C $(NETLIB_LAPACK_DIR) clean
|
||||
@rm -f $(NETLIB_LAPACK_DIR)/make.inc $(NETLIB_LAPACK_DIR)/lapacke/include/lapacke_mangling.h
|
||||
@rm -f *.grd Makefile.conf_last config_last.h
|
||||
@echo Done.
|
||||
@(cd $(NETLIB_LAPACK_DIR)/TESTING && rm -f x* *.out testing_results.txt)
|
||||
@echo Done.
|
||||
|
||||
@@ -50,7 +50,7 @@ endif
|
||||
|
||||
ifndef SMP
|
||||
LIBCXML = -lcxml -lots -lm
|
||||
LIBATLAS = -L/usr/lib/atlas3.7.8 -lf77blas -latlas -lm
|
||||
LIBATLAS = -L/usr/lib/atlas3.7.8 -lf77blas -latlas -lm
|
||||
else
|
||||
LIBCXML = -lcxmlp -lots -lm
|
||||
LIBATLAS = -L/usr/lib/atlas3.7.8p -llapack -lptcblas -lptf77blas -latlas -lpthread -lm
|
||||
|
||||
33
Makefile.arm
Normal file
33
Makefile.arm
Normal file
@@ -0,0 +1,33 @@
|
||||
# ifeq logical or
|
||||
ifeq ($(CORE), $(filter $(CORE),CORTEXA9 CORTEXA15))
|
||||
ifeq ($(OSNAME), Android)
|
||||
CCOMMON_OPT += -marm -mfpu=neon -mfloat-abi=hard -march=armv7-a
|
||||
FCOMMON_OPT += -marm -mfpu=neon -mfloat-abi=hard -march=armv7-a
|
||||
else
|
||||
CCOMMON_OPT += -marm -mfpu=vfpv3 -mfloat-abi=hard -march=armv7-a
|
||||
FCOMMON_OPT += -marm -mfpu=vfpv3 -mfloat-abi=hard -march=armv7-a
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(CORE), ARMV7)
|
||||
ifeq ($(OSNAME), Android)
|
||||
CCOMMON_OPT += -marm -mfpu=neon -mfloat-abi=hard -march=armv7-a
|
||||
FCOMMON_OPT += -marm -mfpu=neon -mfloat-abi=hard -march=armv7-a
|
||||
else
|
||||
CCOMMON_OPT += -marm -mfpu=vfpv3 -mfloat-abi=hard -march=armv7-a
|
||||
FCOMMON_OPT += -marm -mfpu=vfpv3 -mfloat-abi=hard -march=armv7-a
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(CORE), ARMV6)
|
||||
CCOMMON_OPT += -marm -mfpu=vfp -mfloat-abi=hard -march=armv6
|
||||
FCOMMON_OPT += -marm -mfpu=vfp -mfloat-abi=hard -march=armv6
|
||||
endif
|
||||
|
||||
|
||||
ifeq ($(CORE), ARMV5)
|
||||
CCOMMON_OPT += -marm -mfpu=vfp -mfloat-abi=hard -march=armv6
|
||||
FCOMMON_OPT += -marm -mfpu=vfp -mfloat-abi=hard -march=armv6
|
||||
endif
|
||||
|
||||
|
||||
7
Makefile.arm64
Normal file
7
Makefile.arm64
Normal file
@@ -0,0 +1,7 @@
|
||||
|
||||
ifeq ($(CORE), ARMV8)
|
||||
CCOMMON_OPT += -march=armv8-a
|
||||
FCOMMON_OPT += -march=armv8-a
|
||||
endif
|
||||
|
||||
|
||||
@@ -1,6 +1 @@
|
||||
COPT = -Wall -O2 # -DGEMMTEST
|
||||
ifdef BINARY64
|
||||
else
|
||||
# LDFLAGS = -m elf32ppc
|
||||
LDFLAGS = -m elf_i386
|
||||
endif
|
||||
|
||||
@@ -16,7 +16,7 @@ LIBMLIB = ../../level1/others/libmisc.a -L/opt/intel/fc/ia64/9.1.040/lib -L/opt
|
||||
LIBSCSL = -L/opt/scsl/1.4.1.0/lib -Wl,-rpath,/opt/scsl/1.4.1.0/lib -lscs
|
||||
|
||||
ifndef SMP
|
||||
LIBATLAS = -L/usr/lib/atlas3.6.0 -lf77blas -latlas -lm
|
||||
LIBATLAS = -L/usr/lib/atlas3.6.0 -lf77blas -latlas -lm
|
||||
else
|
||||
LIBATLAS = -L$(HOME)/misc/lib -L/usr/lib/atlas3.6.0p -llapack -lptcblas -lptf77blas -latlas -lpthread -lm
|
||||
endif
|
||||
|
||||
132
Makefile.install
132
Makefile.install
@@ -3,8 +3,14 @@ export GOTOBLAS_MAKEFILE = 1
|
||||
-include $(TOPDIR)/Makefile.conf_last
|
||||
include ./Makefile.system
|
||||
|
||||
OPENBLAS_INCLUDE_DIR:=$(PREFIX)/include
|
||||
OPENBLAS_LIBRARY_DIR:=$(PREFIX)/lib
|
||||
PREFIX ?= /opt/OpenBLAS
|
||||
|
||||
OPENBLAS_INCLUDE_DIR := $(PREFIX)/include
|
||||
OPENBLAS_LIBRARY_DIR := $(PREFIX)/lib
|
||||
OPENBLAS_BINARY_DIR := $(PREFIX)/bin
|
||||
OPENBLAS_BUILD_DIR := $(CURDIR)
|
||||
OPENBLAS_CMAKE_DIR := $(OPENBLAS_LIBRARY_DIR)/cmake/openblas
|
||||
OPENBLAS_CMAKE_CONFIG := OpenBLASConfig.cmake
|
||||
|
||||
.PHONY : install
|
||||
.NOTPARALLEL : install
|
||||
@@ -13,68 +19,98 @@ lib.grd :
|
||||
$(error OpenBLAS: Please run "make" firstly)
|
||||
|
||||
install : lib.grd
|
||||
@-mkdir -p $(PREFIX)
|
||||
@-mkdir -p $(OPENBLAS_INCLUDE_DIR)
|
||||
@-mkdir -p $(OPENBLAS_LIBRARY_DIR)
|
||||
@echo Generating openblas_config.h in $(OPENBLAS_INCLUDE_DIR)
|
||||
#for inc
|
||||
@echo \#ifndef OPENBLAS_CONFIG_H > $(OPENBLAS_INCLUDE_DIR)/openblas_config.h
|
||||
@echo \#define OPENBLAS_CONFIG_H >> $(OPENBLAS_INCLUDE_DIR)/openblas_config.h
|
||||
@cat config_last.h >> $(OPENBLAS_INCLUDE_DIR)/openblas_config.h
|
||||
@echo \#define VERSION \" OpenBLAS $(VERSION) \" >> $(OPENBLAS_INCLUDE_DIR)/openblas_config.h
|
||||
@cat openblas_config_template.h >> $(OPENBLAS_INCLUDE_DIR)/openblas_config.h
|
||||
@echo \#endif \/\* OPENBLAS_CONFIG_H \*\/ >> $(OPENBLAS_INCLUDE_DIR)/openblas_config.h
|
||||
@-mkdir -p $(DESTDIR)$(PREFIX)
|
||||
@-mkdir -p $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)
|
||||
@-mkdir -p $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)
|
||||
@-mkdir -p $(DESTDIR)$(OPENBLAS_BINARY_DIR)
|
||||
@-mkdir -p $(DESTDIR)$(OPENBLAS_CMAKE_DIR)
|
||||
@echo Generating openblas_config.h in $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)
|
||||
#for inc
|
||||
@echo \#ifndef OPENBLAS_CONFIG_H > $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/openblas_config.h
|
||||
@echo \#define OPENBLAS_CONFIG_H >> $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/openblas_config.h
|
||||
@awk 'NF {print $$1, "OPENBLAS_"$$2, $$3}' config_last.h >> $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/openblas_config.h
|
||||
@echo \#define OPENBLAS_VERSION \" OpenBLAS $(VERSION) \" >> $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/openblas_config.h
|
||||
@cat openblas_config_template.h >> $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/openblas_config.h
|
||||
@echo \#endif \/\* OPENBLAS_CONFIG_H \*\/ >> $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/openblas_config.h
|
||||
|
||||
@echo Generating f77blas.h in $(OPENBLAS_INCLUDE_DIR)
|
||||
@echo \#ifndef OPENBLAS_F77BLAS_H > $(OPENBLAS_INCLUDE_DIR)/f77blas.h
|
||||
@echo \#define OPENBLAS_F77BLAS_H >> $(OPENBLAS_INCLUDE_DIR)/f77blas.h
|
||||
@echo \#include \"openblas_config.h\" >> $(OPENBLAS_INCLUDE_DIR)/f77blas.h
|
||||
@cat common_interface.h >> $(OPENBLAS_INCLUDE_DIR)/f77blas.h
|
||||
@echo \#endif >> $(OPENBLAS_INCLUDE_DIR)/f77blas.h
|
||||
@echo Generating f77blas.h in $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)
|
||||
@echo \#ifndef OPENBLAS_F77BLAS_H > $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/f77blas.h
|
||||
@echo \#define OPENBLAS_F77BLAS_H >> $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/f77blas.h
|
||||
@echo \#include \"openblas_config.h\" >> $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/f77blas.h
|
||||
@cat common_interface.h >> $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/f77blas.h
|
||||
@echo \#endif >> $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/f77blas.h
|
||||
|
||||
@echo Generating cblas.h in $(OPENBLAS_INCLUDE_DIR)
|
||||
@sed 's/common/openblas_config/g' cblas.h > $(OPENBLAS_INCLUDE_DIR)/cblas.h
|
||||
ifndef NO_CBLAS
|
||||
@echo Generating cblas.h in $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)
|
||||
@sed 's/common/openblas_config/g' cblas.h > $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/cblas.h
|
||||
endif
|
||||
|
||||
ifndef NO_LAPACKE
|
||||
@echo Copying LAPACKE header files to $(OPENBLAS_LIBRARY_DIR)
|
||||
@-cp $(NETLIB_LAPACK_DIR)/lapacke/include/lapacke.h $(OPENBLAS_INCLUDE_DIR)/lapacke.h
|
||||
@-cp $(NETLIB_LAPACK_DIR)/lapacke/include/lapacke_config.h $(OPENBLAS_INCLUDE_DIR)/lapacke_config.h
|
||||
@-cp $(NETLIB_LAPACK_DIR)/lapacke/include/lapacke_mangling_with_flags.h $(OPENBLAS_INCLUDE_DIR)/lapacke_mangling.h
|
||||
@-cp $(NETLIB_LAPACK_DIR)/lapacke/include/lapacke_utils.h $(OPENBLAS_INCLUDE_DIR)/lapacke_utils.h
|
||||
@echo Copying LAPACKE header files to $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)
|
||||
@-install -pm644 $(NETLIB_LAPACK_DIR)/lapacke/include/lapacke.h $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke.h
|
||||
@-install -pm644 $(NETLIB_LAPACK_DIR)/lapacke/include/lapacke_config.h $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke_config.h
|
||||
@-install -pm644 $(NETLIB_LAPACK_DIR)/lapacke/include/lapacke_mangling_with_flags.h $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke_mangling.h
|
||||
@-install -pm644 $(NETLIB_LAPACK_DIR)/lapacke/include/lapacke_utils.h $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke_utils.h
|
||||
endif
|
||||
|
||||
#for install static library
|
||||
@echo Copy the static library to $(OPENBLAS_LIBRARY_DIR)
|
||||
@cp $(LIBNAME) $(OPENBLAS_LIBRARY_DIR)
|
||||
@-ln -fs $(OPENBLAS_LIBRARY_DIR)/$(LIBNAME) $(OPENBLAS_LIBRARY_DIR)/$(LIBPREFIX).$(LIBSUFFIX)
|
||||
#for install shared library
|
||||
@echo Copy the shared library to $(OPENBLAS_LIBRARY_DIR)
|
||||
#for install static library
|
||||
ifndef NO_STATIC
|
||||
@echo Copying the static library to $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)
|
||||
@install -pm644 $(LIBNAME) $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)
|
||||
@cd $(DESTDIR)$(OPENBLAS_LIBRARY_DIR) ; \
|
||||
ln -fs $(LIBNAME) $(LIBPREFIX).$(LIBSUFFIX)
|
||||
endif
|
||||
#for install shared library
|
||||
ifndef NO_SHARED
|
||||
@echo Copying the shared library to $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)
|
||||
ifeq ($(OSNAME), Linux)
|
||||
-cp $(LIBSONAME) $(OPENBLAS_LIBRARY_DIR)
|
||||
-ln -fs $(OPENBLAS_LIBRARY_DIR)/$(LIBSONAME) $(OPENBLAS_LIBRARY_DIR)/$(LIBPREFIX).so
|
||||
-ln -fs $(OPENBLAS_LIBRARY_DIR)/$(LIBSONAME) $(OPENBLAS_LIBRARY_DIR)/$(LIBPREFIX).so.$(MAJOR_VERSION)
|
||||
@install -pm755 $(LIBSONAME) $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)
|
||||
@cd $(DESTDIR)$(OPENBLAS_LIBRARY_DIR) ; \
|
||||
ln -fs $(LIBSONAME) $(LIBPREFIX).so ; \
|
||||
ln -fs $(LIBSONAME) $(LIBPREFIX).so.$(MAJOR_VERSION)
|
||||
endif
|
||||
ifeq ($(OSNAME), FreeBSD)
|
||||
-cp $(LIBSONAME) $(OPENBLAS_LIBRARY_DIR)
|
||||
-ln -fs $(OPENBLAS_LIBRARY_DIR)/$(LIBSONAME) $(OPENBLAS_LIBRARY_DIR)/$(LIBPREFIX).so
|
||||
@cp $(LIBSONAME) $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)
|
||||
@cd $(DESTDIR)$(OPENBLAS_LIBRARY_DIR) ; \
|
||||
ln -fs $(LIBSONAME) $(LIBPREFIX).so
|
||||
endif
|
||||
ifeq ($(OSNAME), NetBSD)
|
||||
-cp $(LIBSONAME) $(OPENBLAS_LIBRARY_DIR)
|
||||
-ln -fs $(OPENBLAS_LIBRARY_DIR)/$(LIBSONAME) $(OPENBLAS_LIBRARY_DIR)/$(LIBPREFIX).so
|
||||
@cp $(LIBSONAME) $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)
|
||||
@cd $(DESTDIR)$(OPENBLAS_LIBRARY_DIR) ; \
|
||||
ln -fs $(LIBSONAME) $(LIBPREFIX).so
|
||||
endif
|
||||
ifeq ($(OSNAME), Darwin)
|
||||
-cp $(LIBDYNNAME) $(OPENBLAS_LIBRARY_DIR)
|
||||
-install_name_tool -id $(OPENBLAS_LIBRARY_DIR)/$(LIBDYNNAME) $(OPENBLAS_LIBRARY_DIR)/$(LIBDYNNAME)
|
||||
-ln -fs $(OPENBLAS_LIBRARY_DIR)/$(LIBDYNNAME) $(OPENBLAS_LIBRARY_DIR)/$(LIBPREFIX).dylib
|
||||
ifeq ($(OSNAME), Darwin)
|
||||
@-cp $(LIBDYNNAME) $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)
|
||||
@-install_name_tool -id $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)/$(LIBDYNNAME) $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)/$(LIBDYNNAME)
|
||||
@cd $(DESTDIR)$(OPENBLAS_LIBRARY_DIR) ; \
|
||||
ln -fs $(LIBDYNNAME) $(LIBPREFIX).dylib
|
||||
endif
|
||||
ifeq ($(OSNAME), WINNT)
|
||||
-cp $(LIBDLLNAME) $(OPENBLAS_LIBRARY_DIR)
|
||||
-ln -fs $(OPENBLAS_LIBRARY_DIR)/$(LIBDLLNAME) $(OPENBLAS_LIBRARY_DIR)/$(LIBPREFIX).dll
|
||||
@-cp $(LIBDLLNAME) $(DESTDIR)$(OPENBLAS_BINARY_DIR)
|
||||
@-cp $(LIBDLLNAME).a $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)
|
||||
endif
|
||||
ifeq ($(OSNAME), CYGWIN_NT)
|
||||
-cp $(LIBDLLNAME) $(OPENBLAS_LIBRARY_DIR)
|
||||
-ln -fs $(OPENBLAS_LIBRARY_DIR)/$(LIBDLLNAME) $(OPENBLAS_LIBRARY_DIR)/$(LIBPREFIX).dll
|
||||
@-cp $(LIBDLLNAME) $(OPENBLAS_BINARY_DIR)
|
||||
endif
|
||||
endif
|
||||
#Generating OpenBLASConfig.cmake
|
||||
@echo Generating $(OPENBLAS_CMAKE_CONFIG) in $(DESTDIR)$(OPENBLAS_CMAKE_DIR)
|
||||
@echo "SET(OpenBLAS_VERSION \"${VERSION}\")" > $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)
|
||||
@echo "SET(OpenBLAS_INCLUDE_DIRS ${OPENBLAS_INCLUDE_DIR})" >> $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)
|
||||
ifndef NO_SHARED
|
||||
#ifeq logical or
|
||||
ifeq ($(OSNAME), $(filter $(OSNAME),Linux FreeBSD NetBSD))
|
||||
@echo "SET(OpenBLAS_LIBRARIES ${OPENBLAS_LIBRARY_DIR}/$(LIBPREFIX).so)" >> $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)
|
||||
endif
|
||||
ifeq ($(OSNAME), $(filter $(OSNAME),WINNT CYGWIN_NT))
|
||||
@echo "SET(OpenBLAS_LIBRARIES ${OPENBLAS_BINARY_DIR}/$(LIBDLLNAME))" >> $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)
|
||||
endif
|
||||
ifeq ($(OSNAME), Darwin)
|
||||
@echo "SET(OpenBLAS_LIBRARIES ${OPENBLAS_LIBRARY_DIR}/$(LIBPREFIX).dylib)" >> $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)
|
||||
endif
|
||||
else
|
||||
#only static
|
||||
@echo "SET(OpenBLAS_LIBRARIES ${OPENBLAS_LIBRARY_DIR}/$(LIBPREFIX).$(LIBSUFFIX))" >> $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)
|
||||
endif
|
||||
|
||||
@echo Install OK!
|
||||
|
||||
|
||||
@@ -5,7 +5,7 @@ FLAMEPATH = $(HOME)/flame/lib
|
||||
#ifeq ($(CORE), CELL)
|
||||
#CELL_SDK_ROOT = /opt/IBM/cell-sdk-1.1/sysroot/usr
|
||||
#SPU_CC = spu-gcc
|
||||
#EXTRALIB += -lspe
|
||||
#EXTRALIB += -lspe
|
||||
#endif
|
||||
|
||||
ifeq ($(OSNAME), Linux)
|
||||
@@ -17,13 +17,7 @@ endif
|
||||
endif
|
||||
|
||||
ifdef BINARY64
|
||||
ifeq ($(OSNAME), Linux)
|
||||
LDFLAGS = -m elf64ppc
|
||||
endif
|
||||
|
||||
ifeq ($(OSNAME), Darwin)
|
||||
LDFLAGS = -arch ppc64
|
||||
endif
|
||||
|
||||
ifeq ($(OSNAME), AIX)
|
||||
CCOMMON_OPT += -mpowerpc64 -maix64
|
||||
@@ -34,22 +28,17 @@ ifeq ($(COMPILER_F77), xlf)
|
||||
FCOMMON_OPT += -q64
|
||||
endif
|
||||
ARFLAGS = -X 64
|
||||
LDFLAGS = -b64
|
||||
ASFLAGS = -a64
|
||||
endif
|
||||
else
|
||||
ifeq ($(OSNAME), Linux)
|
||||
LDFLAGS = -m elf32ppc
|
||||
endif
|
||||
ifeq ($(OSNAME), AIX)
|
||||
CCOMMON_OPT += -Wa,-a32
|
||||
ARFLAGS = -X 32
|
||||
LDFLAGS = -b32
|
||||
ASFLAGS = -a32
|
||||
endif
|
||||
endif
|
||||
|
||||
# CCOMMON_OPT += -maltivec -mabi=altivec
|
||||
# CCOMMON_OPT += -maltivec -mabi=altivec
|
||||
|
||||
LIBFLAME = -L$(FLAMEPATH) -llapack2flame -lflame-lapack -lflame-base $(LIBS)
|
||||
|
||||
@@ -68,7 +57,7 @@ endif
|
||||
|
||||
LIBVECLIB = -framework VecLib
|
||||
ifndef SMP
|
||||
LIBATLAS = -L/usr/lib/atlas3.7.11 -lf77blas -latlas -lg2c -lm
|
||||
LIBATLAS = -L/usr/lib/atlas3.7.11 -lf77blas -latlas -lg2c -lm
|
||||
LIBESSL = -lessl $(ESSLPATH) ../../level1/others/libmisc.a -lm
|
||||
else
|
||||
LIBATLAS = -L/usr/lib/atlas3.7.11p -lptf77blas -latlas -lm -lpthread
|
||||
@@ -84,7 +73,7 @@ endif
|
||||
LIBVECLIB = /System/Library/Frameworks/vecLib.framework/Versions/Current/vecLib
|
||||
|
||||
ifndef SMP
|
||||
LIBATLAS = -L/usr/lib64/atlas3.7.11 -lf77blas -latlas -lg2c -lm
|
||||
LIBATLAS = -L/usr/lib64/atlas3.7.11 -lf77blas -latlas -lg2c -lm
|
||||
LIBESSL = -lessl $(ESSLPATH) -lm
|
||||
else
|
||||
LIBATLAS = -L/usr/lib64/atlas3.7.11p -lptf77blas -latlas -lm -lpthread
|
||||
|
||||
@@ -1,3 +1,5 @@
|
||||
# This is triggered by Makefile.system and runs before any of the code is built.
|
||||
|
||||
export BINARY
|
||||
export USE_OPENMP
|
||||
|
||||
@@ -21,7 +23,17 @@ all: getarch_2nd
|
||||
|
||||
config.h : c_check f_check getarch
|
||||
perl ./c_check $(TARGET_MAKE) $(TARGET_CONF) $(CC)
|
||||
ifneq ($(ONLY_CBLAS), 1)
|
||||
perl ./f_check $(TARGET_MAKE) $(TARGET_CONF) $(FC)
|
||||
else
|
||||
#When we only build CBLAS, we set NOFORTRAN=2
|
||||
echo "NOFORTRAN=2" >> $(TARGET_MAKE)
|
||||
echo "NO_FBLAS=1" >> $(TARGET_MAKE)
|
||||
echo "F_COMPILER=GFORTRAN" >> $(TARGET_MAKE)
|
||||
echo "BU=_" >> $(TARGET_MAKE)
|
||||
echo "#define BUNDERSCORE _" >> $(TARGET_CONF)
|
||||
echo "#define NEEDBUNDERSCORE 1" >> $(TARGET_CONF)
|
||||
endif
|
||||
./getarch 0 >> $(TARGET_MAKE)
|
||||
./getarch 1 >> $(TARGET_CONF)
|
||||
|
||||
@@ -1,12 +1,12 @@
|
||||
#
|
||||
# Beginning of user configuration
|
||||
# Beginning of user configuration
|
||||
#
|
||||
|
||||
# This library's version
|
||||
VERSION = 0.2.0
|
||||
VERSION = 0.2.14
|
||||
|
||||
# If you set the suffix, the library name will be libopenblas_$(LIBNAMESUFFIX).a
|
||||
# and libopenblas_$(LIBNAMESUFFIX).so. Meanwhile, the soname in shared library
|
||||
# and libopenblas_$(LIBNAMESUFFIX).so. Meanwhile, the soname in shared library
|
||||
# is libopenblas_$(LIBNAMESUFFIX).so.0.
|
||||
# LIBNAMESUFFIX = omp
|
||||
|
||||
@@ -24,10 +24,24 @@ VERSION = 0.2.0
|
||||
# Fortran compiler. Default is g77.
|
||||
# FC = gfortran
|
||||
|
||||
# Even you can specify cross compiler
|
||||
# Even you can specify cross compiler. Meanwhile, please set HOSTCC.
|
||||
|
||||
# cross compiler for Windows
|
||||
# CC = x86_64-w64-mingw32-gcc
|
||||
# FC = x86_64-w64-mingw32-gfortran
|
||||
|
||||
# cross compiler for 32bit ARM
|
||||
# CC = arm-linux-gnueabihf-gcc
|
||||
# FC = arm-linux-gnueabihf-gfortran
|
||||
|
||||
# cross compiler for 64bit ARM
|
||||
# CC = aarch64-linux-gnu-gcc
|
||||
# FC = aarch64-linux-gnu-gfortran
|
||||
|
||||
|
||||
# If you use the cross compiler, please set this host compiler.
|
||||
# HOSTCC = gcc
|
||||
|
||||
# If you need 32bit binary, define BINARY=32, otherwise define BINARY=64
|
||||
# BINARY=64
|
||||
|
||||
@@ -45,10 +59,20 @@ VERSION = 0.2.0
|
||||
# automatically detected by the the script.
|
||||
# NUM_THREADS = 24
|
||||
|
||||
# if you don't need to install the static library, please comment it in.
|
||||
# NO_STATIC = 1
|
||||
|
||||
# if you don't need generate the shared library, please comment it in.
|
||||
# NO_SHARED = 1
|
||||
|
||||
# If you don't need CBLAS interface, please comment it in.
|
||||
# NO_CBLAS = 1
|
||||
|
||||
# If you don't need LAPACK, please comment it in.
|
||||
# If you only want CBLAS interface without installing Fortran compiler,
|
||||
# please comment it in.
|
||||
# ONLY_CBLAS = 1
|
||||
|
||||
# If you don't need LAPACK, please comment it in.
|
||||
# If you set NO_LAPACK=1, the library automatically sets NO_LAPACKE=1.
|
||||
# NO_LAPACK = 1
|
||||
|
||||
@@ -66,10 +90,23 @@ VERSION = 0.2.0
|
||||
# Unfortunately most of kernel won't give us high quality buffer.
|
||||
# BLAS tries to find the best region before entering main function,
|
||||
# but it will consume time. If you don't like it, you can disable one.
|
||||
# NO_WARMUP = 1
|
||||
NO_WARMUP = 1
|
||||
|
||||
# If you want to disable CPU/Memory affinity on Linux.
|
||||
# NO_AFFINITY = 1
|
||||
NO_AFFINITY = 1
|
||||
|
||||
# if you are compiling for Linux and you have more than 16 numa nodes or more than 256 cpus
|
||||
# BIGNUMA = 1
|
||||
|
||||
# Don't use AVX kernel on Sandy Bridge. It is compatible with old compilers
|
||||
# and OS. However, the performance is low.
|
||||
# NO_AVX = 1
|
||||
|
||||
# Don't use Haswell optimizations if binutils is too old (e.g. RHEL6)
|
||||
# NO_AVX2 = 1
|
||||
|
||||
# Don't use parallel make.
|
||||
# NO_PARALLEL_MAKE = 1
|
||||
|
||||
# If you would like to know minute performance report of GotoBLAS.
|
||||
# FUNCTION_PROFILE = 1
|
||||
@@ -77,6 +114,9 @@ VERSION = 0.2.0
|
||||
# Support for IEEE quad precision(it's *real* REAL*16)( under testing)
|
||||
# QUAD_PRECISION = 1
|
||||
|
||||
# Support for integer matrix and vector (e.g. iaxpy)
|
||||
# INTEGER_PRECISION = 1
|
||||
|
||||
# Theads are still working for a while after finishing BLAS operation
|
||||
# to reduce thread activate/deactivate overhead. You can determine
|
||||
# time out to improve performance. This number should be from 4 to 30
|
||||
@@ -92,10 +132,10 @@ VERSION = 0.2.0
|
||||
# If you need to synchronize FP CSR between threads (for x86/x86_64 only).
|
||||
# CONSISTENT_FPCSR = 1
|
||||
|
||||
# If any gemm arguement m, n or k is less or equal this threshold, gemm will be execute
|
||||
# with single thread. You can use this flag to avoid the overhead of multi-threading
|
||||
# in small matrix sizes. The default value is 50.
|
||||
# GEMM_MULTITHREAD_THRESHOLD = 50
|
||||
# If any gemm arguement m, n or k is less or equal this threshold, gemm will be execute
|
||||
# with single thread. You can use this flag to avoid the overhead of multi-threading
|
||||
# in small matrix sizes. The default value is 4.
|
||||
# GEMM_MULTITHREAD_THRESHOLD = 4
|
||||
|
||||
# If you need santy check by comparing reference BLAS. It'll be very
|
||||
# slow (Not implemented yet).
|
||||
@@ -108,19 +148,33 @@ VERSION = 0.2.0
|
||||
# The installation directory.
|
||||
# PREFIX = /opt/OpenBLAS
|
||||
|
||||
# Common Optimization Flag; -O2 is enough.
|
||||
# DEBUG = 1
|
||||
# Common Optimization Flag;
|
||||
# The default -O2 is enough.
|
||||
# COMMON_OPT = -O2
|
||||
|
||||
ifeq ($(DEBUG), 1)
|
||||
COMMON_OPT += -g
|
||||
# -DDEBUG
|
||||
else
|
||||
COMMON_OPT += -O2
|
||||
endif
|
||||
# gfortran option for LAPACK
|
||||
# enable this flag only on 64bit Linux and if you need a thread safe lapack library
|
||||
# FCOMMON_OPT = -frecursive
|
||||
|
||||
# Profiling flags
|
||||
COMMON_PROF = -pg
|
||||
|
||||
# Build Debug version
|
||||
# DEBUG = 1
|
||||
|
||||
# Improve GEMV and GER for small matrices by stack allocation.
|
||||
# For details, https://github.com/xianyi/OpenBLAS/pull/482
|
||||
#
|
||||
# End of user configuration
|
||||
MAX_STACK_ALLOC=2048
|
||||
|
||||
# Add a prefix or suffix to all exported symbol names in the shared library.
|
||||
# Avoid conflicts with other BLAS libraries, especially when using
|
||||
# 64 bit integer interfaces in OpenBLAS.
|
||||
# For details, https://github.com/xianyi/OpenBLAS/pull/459
|
||||
#
|
||||
# SYMBOLPREFIX=
|
||||
# SYMBOLSUFFIX=
|
||||
|
||||
#
|
||||
# End of user configuration
|
||||
#
|
||||
|
||||
@@ -10,7 +10,6 @@ endif
|
||||
ifeq ($(COMPILER_F77), f90)
|
||||
FCOMMON_OPT += -xarch=v9
|
||||
endif
|
||||
LDFLAGS = -64
|
||||
else
|
||||
|
||||
CCOMMON_OPT += -mcpu=v9
|
||||
@@ -28,7 +27,7 @@ LIBNAME = $(LIBPREFIX).a
|
||||
|
||||
ifndef SMP
|
||||
LIBCXML = -L/opt/SUNWspro/lib/v9
|
||||
LIBATLAS = -L$(HOME)/misc/lib -lf77blas -latlas -lm
|
||||
LIBATLAS = -L$(HOME)/misc/lib -lf77blas -latlas -lm
|
||||
else
|
||||
LIBCXML = -lcxmlp -lots -lm
|
||||
endif
|
||||
|
||||
417
Makefile.system
417
Makefile.system
@@ -9,9 +9,7 @@ ifndef TOPDIR
|
||||
TOPDIR = .
|
||||
endif
|
||||
|
||||
ifndef NETLIB_LAPACK_DIR
|
||||
NETLIB_LAPACK_DIR = $(TOPDIR)/lapack-3.4.1
|
||||
endif
|
||||
NETLIB_LAPACK_DIR = $(TOPDIR)/lapack-netlib
|
||||
|
||||
# Default C compiler
|
||||
# - Only set if not specified on the command line or inherited from the environment.
|
||||
@@ -20,6 +18,13 @@ endif
|
||||
# - Default value is 'cc' which is not always a valid command (e.g. MinGW).
|
||||
ifeq ($(origin CC),default)
|
||||
CC = gcc
|
||||
# Change the default compile to clang on Mac OSX.
|
||||
# http://stackoverflow.com/questions/714100/os-detecting-makefile
|
||||
UNAME_S := $(shell uname -s)
|
||||
ifeq ($(UNAME_S),Darwin)
|
||||
CC = clang
|
||||
# EXTRALIB += -Wl,-no_compact_unwind
|
||||
endif
|
||||
endif
|
||||
|
||||
# Default Fortran compiler (FC) is selected by f_check.
|
||||
@@ -31,7 +36,7 @@ include $(TOPDIR)/$(MAKEFILE_RULE)
|
||||
endif
|
||||
|
||||
#
|
||||
# Beginning of system configuration
|
||||
# Beginning of system configuration
|
||||
#
|
||||
|
||||
ifndef HOSTCC
|
||||
@@ -42,27 +47,115 @@ ifdef TARGET
|
||||
GETARCH_FLAGS := -DFORCE_$(TARGET)
|
||||
endif
|
||||
|
||||
# Force fallbacks for 32bit
|
||||
|
||||
ifeq ($(BINARY), 32)
|
||||
ifeq ($(TARGET), HASWELL)
|
||||
GETARCH_FLAGS := -DFORCE_NEHALEM
|
||||
endif
|
||||
ifeq ($(TARGET), SANDYBRIDGE)
|
||||
GETARCH_FLAGS := -DFORCE_NEHALEM
|
||||
endif
|
||||
ifeq ($(TARGET), BULLDOZER)
|
||||
GETARCH_FLAGS := -DFORCE_BARCELONA
|
||||
endif
|
||||
ifeq ($(TARGET), PILEDRIVER)
|
||||
GETARCH_FLAGS := -DFORCE_BARCELONA
|
||||
endif
|
||||
ifeq ($(TARGET), STEAMROLLER)
|
||||
GETARCH_FLAGS := -DFORCE_BARCELONA
|
||||
endif
|
||||
ifeq ($(TARGET), EXCAVATOR)
|
||||
GETARCH_FLAGS := -DFORCE_BARCELONA
|
||||
endif
|
||||
endif
|
||||
|
||||
|
||||
#TARGET_CORE will override TARGET which is used in DYNAMIC_ARCH=1.
|
||||
#
|
||||
ifdef TARGET_CORE
|
||||
GETARCH_FLAGS := -DFORCE_$(TARGET_CORE)
|
||||
endif
|
||||
|
||||
# Force fallbacks for 32bit
|
||||
|
||||
ifeq ($(BINARY), 32)
|
||||
ifeq ($(TARGET_CORE), HASWELL)
|
||||
GETARCH_FLAGS := -DFORCE_NEHALEM
|
||||
endif
|
||||
ifeq ($(TARGET_CORE), SANDYBRIDGE)
|
||||
GETARCH_FLAGS := -DFORCE_NEHALEM
|
||||
endif
|
||||
ifeq ($(TARGET_CORE), BULLDOZER)
|
||||
GETARCH_FLAGS := -DFORCE_BARCELONA
|
||||
endif
|
||||
ifeq ($(TARGET_CORE), PILEDRIVER)
|
||||
GETARCH_FLAGS := -DFORCE_BARCELONA
|
||||
endif
|
||||
ifeq ($(TARGET_CORE), STEAMROLLER)
|
||||
GETARCH_FLAGS := -DFORCE_BARCELONA
|
||||
endif
|
||||
ifeq ($(TARGET_CORE), EXCAVATOR)
|
||||
GETARCH_FLAGS := -DFORCE_BARCELONA
|
||||
endif
|
||||
endif
|
||||
|
||||
|
||||
|
||||
|
||||
ifdef INTERFACE64
|
||||
ifneq ($(INTERFACE64), 0)
|
||||
GETARCH_FLAGS += -DUSE64BITINT
|
||||
endif
|
||||
endif
|
||||
|
||||
ifndef GEMM_MULTITHREAD_THRESHOLD
|
||||
GEMM_MULTITHREAD_THRESHOLD=50
|
||||
GEMM_MULTITHREAD_THRESHOLD=4
|
||||
endif
|
||||
GETARCH_FLAGS += -DGEMM_MULTITHREAD_THRESHOLD=$(GEMM_MULTITHREAD_THRESHOLD)
|
||||
|
||||
ifeq ($(NO_AVX), 1)
|
||||
GETARCH_FLAGS += -DNO_AVX
|
||||
endif
|
||||
|
||||
ifeq ($(BINARY), 32)
|
||||
GETARCH_FLAGS += -DNO_AVX
|
||||
endif
|
||||
|
||||
ifeq ($(NO_AVX2), 1)
|
||||
GETARCH_FLAGS += -DNO_AVX2
|
||||
endif
|
||||
|
||||
ifeq ($(DEBUG), 1)
|
||||
GETARCH_FLAGS += -g
|
||||
endif
|
||||
|
||||
ifeq ($(QUIET_MAKE), 1)
|
||||
MAKE += -s
|
||||
endif
|
||||
|
||||
ifndef NO_PARALLEL_MAKE
|
||||
NO_PARALLEL_MAKE=0
|
||||
endif
|
||||
GETARCH_FLAGS += -DNO_PARALLEL_MAKE=$(NO_PARALLEL_MAKE)
|
||||
|
||||
ifeq ($(HOSTCC), loongcc)
|
||||
GETARCH_FLAGS += -static
|
||||
endif
|
||||
|
||||
#if don't use Fortran, it will only compile CBLAS.
|
||||
ifeq ($(ONLY_CBLAS), 1)
|
||||
NO_LAPACK = 1
|
||||
else
|
||||
ONLY_CBLAS = 0
|
||||
endif
|
||||
GETARCH_FLAGS += -DGEMM_MULTITHREAD_THRESHOLD=$(GEMM_MULTITHREAD_THRESHOLD)
|
||||
|
||||
# This operation is expensive, so execution should be once.
|
||||
ifndef GOTOBLAS_MAKEFILE
|
||||
export GOTOBLAS_MAKEFILE = 1
|
||||
|
||||
# Generating Makefile.conf and config.h
|
||||
DUMMY := $(shell $(MAKE) -C $(TOPDIR) -f Makefile.getarch CC="$(CC)" FC="$(FC)" HOSTCC="$(HOSTCC)" CFLAGS="$(GETARCH_FLAGS)" BINARY=$(BINARY) USE_OPENMP=$(USE_OPENMP) TARGET_CORE=$(TARGET_CORE) all)
|
||||
DUMMY := $(shell $(MAKE) -C $(TOPDIR) -f Makefile.prebuild CC="$(CC)" FC="$(FC)" HOSTCC="$(HOSTCC)" CFLAGS="$(GETARCH_FLAGS)" BINARY=$(BINARY) USE_OPENMP=$(USE_OPENMP) TARGET_CORE=$(TARGET_CORE) ONLY_CBLAS=$(ONLY_CBLAS) all)
|
||||
|
||||
ifndef TARGET_CORE
|
||||
include $(TOPDIR)/Makefile.conf
|
||||
@@ -106,13 +199,21 @@ LD = $(CROSS_SUFFIX)ld
|
||||
RANLIB = $(CROSS_SUFFIX)ranlib
|
||||
NM = $(CROSS_SUFFIX)nm
|
||||
DLLWRAP = $(CROSS_SUFFIX)dllwrap
|
||||
OBJCOPY = $(CROSS_SUFFIX)objcopy
|
||||
OBJCONV = $(CROSS_SUFFIX)objconv
|
||||
|
||||
|
||||
# For detect fortran failed, only build BLAS.
|
||||
ifeq ($(NOFORTRAN), 1)
|
||||
NO_LAPACK = 1
|
||||
endif
|
||||
|
||||
#
|
||||
# OS dependent settings
|
||||
#
|
||||
|
||||
ifeq ($(OSNAME), Darwin)
|
||||
export MACOSX_DEPLOYMENT_TARGET=10.2
|
||||
export MACOSX_DEPLOYMENT_TARGET=10.6
|
||||
MD5SUM = md5 -r
|
||||
endif
|
||||
|
||||
@@ -121,11 +222,12 @@ MD5SUM = md5 -r
|
||||
endif
|
||||
|
||||
ifeq ($(OSNAME), NetBSD)
|
||||
MD5SUM = md5 -r
|
||||
MD5SUM = md5 -n
|
||||
endif
|
||||
|
||||
ifeq ($(OSNAME), Linux)
|
||||
EXTRALIB += -lm
|
||||
NO_EXPRECISION = 1
|
||||
endif
|
||||
|
||||
ifeq ($(OSNAME), AIX)
|
||||
@@ -140,7 +242,39 @@ EXTRALIB += -defaultlib:advapi32
|
||||
|
||||
SUFFIX = obj
|
||||
PSUFFIX = pobj
|
||||
LIBSUFFIX = lib
|
||||
LIBSUFFIX = a
|
||||
|
||||
ifeq ($(C_COMPILER), CLANG)
|
||||
CCOMMON_OPT += -DMS_ABI
|
||||
endif
|
||||
|
||||
ifeq ($(C_COMPILER), GCC)
|
||||
#Test for supporting MS_ABI
|
||||
GCCVERSIONGTEQ4 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 4)
|
||||
GCCVERSIONGT4 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \> 4)
|
||||
GCCMINORVERSIONGTEQ7 := $(shell expr `$(CC) -dumpversion | cut -f2 -d.` \>= 7)
|
||||
ifeq ($(GCCVERSIONGT4), 1)
|
||||
# GCC Majar version > 4
|
||||
# It is compatible with MSVC ABI.
|
||||
CCOMMON_OPT += -DMS_ABI
|
||||
endif
|
||||
|
||||
ifeq ($(GCCVERSIONGTEQ4), 1)
|
||||
ifeq ($(GCCMINORVERSIONGTEQ7), 1)
|
||||
# GCC Version >=4.7
|
||||
# It is compatible with MSVC ABI.
|
||||
CCOMMON_OPT += -DMS_ABI
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
# Ensure the correct stack alignment on Win32
|
||||
# http://permalink.gmane.org/gmane.comp.lib.openblas.general/97
|
||||
ifeq ($(ARCH), x86)
|
||||
CCOMMON_OPT += -mincoming-stack-boundary=2
|
||||
FCOMMON_OPT += -mincoming-stack-boundary=2
|
||||
endif
|
||||
|
||||
endif
|
||||
|
||||
ifeq ($(OSNAME), Interix)
|
||||
@@ -165,11 +299,20 @@ endif
|
||||
endif
|
||||
endif
|
||||
|
||||
# ifeq logical or
|
||||
ifeq ($(OSNAME), $(filter $(OSNAME),WINNT CYGWIN_NT Interix))
|
||||
OS_WINDOWS=1
|
||||
endif
|
||||
|
||||
ifdef QUAD_PRECISION
|
||||
CCOMMON_OPT += -DQUAD_PRECISION
|
||||
NO_EXPRECISION = 1
|
||||
endif
|
||||
|
||||
ifdef INTEGER_PRECISION
|
||||
CCOMMON_OPT += -DINTEGER_PRECISION
|
||||
endif
|
||||
|
||||
ifneq ($(ARCH), x86)
|
||||
ifneq ($(ARCH), x86_64)
|
||||
NO_EXPRECISION = 1
|
||||
@@ -185,6 +328,10 @@ ifdef SANITY_CHECK
|
||||
CCOMMON_OPT += -DSANITY_CHECK -DREFNAME=$(*F)f$(BU)
|
||||
endif
|
||||
|
||||
ifdef MAX_STACK_ALLOC
|
||||
CCOMMON_OPT += -DMAX_STACK_ALLOC=$(MAX_STACK_ALLOC)
|
||||
endif
|
||||
|
||||
#
|
||||
# Architecture dependent settings
|
||||
#
|
||||
@@ -195,11 +342,17 @@ NO_BINARY_MODE = 1
|
||||
endif
|
||||
ifndef NO_EXPRECISION
|
||||
ifeq ($(F_COMPILER), GFORTRAN)
|
||||
ifeq ($(C_COMPILER), GCC)
|
||||
# ifeq logical or. GCC or LSB
|
||||
ifeq ($(C_COMPILER), $(filter $(C_COMPILER),GCC LSB))
|
||||
EXPRECISION = 1
|
||||
CCOMMON_OPT += -DEXPRECISION -m128bit-long-double
|
||||
FCOMMON_OPT += -m128bit-long-double
|
||||
endif
|
||||
ifeq ($(C_COMPILER), CLANG)
|
||||
EXPRECISION = 1
|
||||
CCOMMON_OPT += -DEXPRECISION
|
||||
FCOMMON_OPT += -m128bit-long-double
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
@@ -207,11 +360,17 @@ endif
|
||||
ifeq ($(ARCH), x86_64)
|
||||
ifndef NO_EXPRECISION
|
||||
ifeq ($(F_COMPILER), GFORTRAN)
|
||||
ifeq ($(C_COMPILER), GCC)
|
||||
# ifeq logical or. GCC or LSB
|
||||
ifeq ($(C_COMPILER), $(filter $(C_COMPILER),GCC LSB))
|
||||
EXPRECISION = 1
|
||||
CCOMMON_OPT += -DEXPRECISION -m128bit-long-double
|
||||
FCOMMON_OPT += -m128bit-long-double
|
||||
endif
|
||||
ifeq ($(C_COMPILER), CLANG)
|
||||
EXPRECISION = 1
|
||||
CCOMMON_OPT += -DEXPRECISION
|
||||
FCOMMON_OPT += -m128bit-long-double
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
@@ -220,8 +379,21 @@ ifeq ($(C_COMPILER), INTEL)
|
||||
CCOMMON_OPT += -wd981
|
||||
endif
|
||||
|
||||
|
||||
ifeq ($(USE_OPENMP), 1)
|
||||
ifeq ($(C_COMPILER), GCC)
|
||||
|
||||
#check
|
||||
ifeq ($(USE_THREAD), 0)
|
||||
$(error OpenBLAS: Cannot set both USE_OPENMP=1 and USE_THREAD=0. The USE_THREAD=0 is only for building single thread version.)
|
||||
endif
|
||||
|
||||
# ifeq logical or. GCC or LSB
|
||||
ifeq ($(C_COMPILER), $(filter $(C_COMPILER),GCC LSB))
|
||||
CCOMMON_OPT += -fopenmp
|
||||
endif
|
||||
|
||||
ifeq ($(C_COMPILER), CLANG)
|
||||
$(error OpenBLAS: Clang didn't support OpenMP yet.)
|
||||
CCOMMON_OPT += -fopenmp
|
||||
endif
|
||||
|
||||
@@ -244,14 +416,20 @@ endif
|
||||
endif
|
||||
|
||||
|
||||
ifdef DYNAMIC_ARCH
|
||||
ifeq ($(DYNAMIC_ARCH), 1)
|
||||
ifeq ($(ARCH), x86)
|
||||
DYNAMIC_CORE = KATMAI COPPERMINE NORTHWOOD PRESCOTT BANIAS \
|
||||
CORE2 PENRYN DUNNINGTON NEHALEM SANDYBRIDGE ATHLON OPTERON OPTERON_SSE3 BARCELONA BOBCAT ATOM NANO
|
||||
CORE2 PENRYN DUNNINGTON NEHALEM ATHLON OPTERON OPTERON_SSE3 BARCELONA BOBCAT ATOM NANO
|
||||
endif
|
||||
|
||||
ifeq ($(ARCH), x86_64)
|
||||
DYNAMIC_CORE = PRESCOTT CORE2 PENRYN DUNNINGTON NEHALEM SANDYBRIDGE OPTERON OPTERON_SSE3 BARCELONA BOBCAT ATOM NANO
|
||||
DYNAMIC_CORE = PRESCOTT CORE2 PENRYN DUNNINGTON NEHALEM OPTERON OPTERON_SSE3 BARCELONA BOBCAT ATOM NANO
|
||||
ifneq ($(NO_AVX), 1)
|
||||
DYNAMIC_CORE += SANDYBRIDGE BULLDOZER PILEDRIVER STEAMROLLER EXCAVATOR
|
||||
endif
|
||||
ifneq ($(NO_AVX2), 1)
|
||||
DYNAMIC_CORE += HASWELL
|
||||
endif
|
||||
endif
|
||||
|
||||
ifndef DYNAMIC_CORE
|
||||
@@ -280,15 +458,35 @@ NO_BINARY_MODE = 1
|
||||
BINARY_DEFINED = 1
|
||||
endif
|
||||
|
||||
ifeq ($(ARCH), arm)
|
||||
NO_BINARY_MODE = 1
|
||||
BINARY_DEFINED = 1
|
||||
endif
|
||||
|
||||
ifeq ($(ARCH), arm64)
|
||||
NO_BINARY_MODE = 1
|
||||
BINARY_DEFINED = 1
|
||||
endif
|
||||
|
||||
|
||||
|
||||
|
||||
#
|
||||
# C Compiler dependent settings
|
||||
#
|
||||
|
||||
ifeq ($(C_COMPILER), GCC)
|
||||
|
||||
# ifeq logical or. GCC or CLANG or LSB
|
||||
# http://stackoverflow.com/questions/7656425/makefile-ifeq-logical-or
|
||||
ifeq ($(C_COMPILER), $(filter $(C_COMPILER),GCC CLANG LSB))
|
||||
CCOMMON_OPT += -Wall
|
||||
COMMON_PROF += -fno-inline
|
||||
NO_UNINITIALIZED_WARN = -Wno-uninitialized
|
||||
|
||||
ifeq ($(QUIET_MAKE), 1)
|
||||
CCOMMON_OPT += $(NO_UNINITIALIZED_WARN) -Wno-unused
|
||||
endif
|
||||
|
||||
ifdef NO_BINARY_MODE
|
||||
|
||||
ifeq ($(ARCH), mips64)
|
||||
@@ -300,12 +498,12 @@ endif
|
||||
BINARY_DEFINED = 1
|
||||
endif
|
||||
|
||||
ifeq ($(CORE), LOONGSON3A)
|
||||
ifeq ($(CORE), LOONGSON3A)
|
||||
CCOMMON_OPT += -march=mips64
|
||||
FCOMMON_OPT += -march=mips64
|
||||
endif
|
||||
|
||||
ifeq ($(CORE), LOONGSON3B)
|
||||
ifeq ($(CORE), LOONGSON3B)
|
||||
CCOMMON_OPT += -march=mips64
|
||||
FCOMMON_OPT += -march=mips64
|
||||
endif
|
||||
@@ -373,7 +571,10 @@ endif
|
||||
ifeq ($(F_COMPILER), GFORTRAN)
|
||||
CCOMMON_OPT += -DF_INTERFACE_GFORT
|
||||
FCOMMON_OPT += -Wall
|
||||
EXTRALIB += -lgfortran
|
||||
#Don't include -lgfortran, when NO_LAPACK=1 or lsbcc
|
||||
ifneq ($(NO_LAPACK), 1)
|
||||
EXTRALIB += -lgfortran
|
||||
endif
|
||||
ifdef NO_BINARY_MODE
|
||||
ifeq ($(ARCH), mips64)
|
||||
ifdef BINARY64
|
||||
@@ -386,13 +587,15 @@ else
|
||||
ifdef BINARY64
|
||||
FCOMMON_OPT += -m64
|
||||
ifdef INTERFACE64
|
||||
ifneq ($(INTERFACE64), 0)
|
||||
FCOMMON_OPT += -fdefault-integer-8
|
||||
endif
|
||||
endif
|
||||
else
|
||||
FCOMMON_OPT += -m32
|
||||
endif
|
||||
endif
|
||||
ifdef USE_OPENMP
|
||||
ifeq ($(USE_OPENMP), 1)
|
||||
FCOMMON_OPT += -fopenmp
|
||||
endif
|
||||
endif
|
||||
@@ -400,16 +603,18 @@ endif
|
||||
ifeq ($(F_COMPILER), INTEL)
|
||||
CCOMMON_OPT += -DF_INTERFACE_INTEL
|
||||
ifdef INTERFACE64
|
||||
ifneq ($(INTERFACE64), 0)
|
||||
FCOMMON_OPT += -i8
|
||||
endif
|
||||
ifdef USE_OPENMP
|
||||
endif
|
||||
ifeq ($(USE_OPENMP), 1)
|
||||
FCOMMON_OPT += -openmp
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(F_COMPILER), FUJITSU)
|
||||
CCOMMON_OPT += -DF_INTERFACE_FUJITSU
|
||||
ifdef USE_OPENMP
|
||||
ifeq ($(USE_OPENMP), 1)
|
||||
FCOMMON_OPT += -openmp
|
||||
endif
|
||||
endif
|
||||
@@ -420,12 +625,14 @@ CCOMMON_OPT += -DF_INTERFACE_IBM
|
||||
ifdef BINARY64
|
||||
FCOMMON_OPT += -q64
|
||||
ifdef INTERFACE64
|
||||
ifneq ($(INTERFACE64), 0)
|
||||
FCOMMON_OPT += -qintsize=8
|
||||
endif
|
||||
endif
|
||||
else
|
||||
FCOMMON_OPT += -q32
|
||||
endif
|
||||
ifdef USE_OPENMP
|
||||
ifeq ($(USE_OPENMP), 1)
|
||||
FCOMMON_OPT += -openmp
|
||||
endif
|
||||
endif
|
||||
@@ -435,13 +642,15 @@ CCOMMON_OPT += -DF_INTERFACE_PGI
|
||||
COMMON_PROF += -DPGICOMPILER
|
||||
ifdef BINARY64
|
||||
ifdef INTERFACE64
|
||||
ifneq ($(INTERFACE64), 0)
|
||||
FCOMMON_OPT += -i8
|
||||
endif
|
||||
endif
|
||||
FCOMMON_OPT += -tp p7-64
|
||||
else
|
||||
FCOMMON_OPT += -tp p7
|
||||
endif
|
||||
ifdef USE_OPENMP
|
||||
ifeq ($(USE_OPENMP), 1)
|
||||
FCOMMON_OPT += -mp
|
||||
endif
|
||||
endif
|
||||
@@ -450,9 +659,11 @@ ifeq ($(F_COMPILER), PATHSCALE)
|
||||
CCOMMON_OPT += -DF_INTERFACE_PATHSCALE
|
||||
ifdef BINARY64
|
||||
ifdef INTERFACE64
|
||||
ifneq ($(INTERFACE64), 0)
|
||||
FCOMMON_OPT += -i8
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
ifneq ($(ARCH), mips64)
|
||||
ifndef BINARY64
|
||||
@@ -468,7 +679,7 @@ FCOMMON_OPT += -mabi=n32
|
||||
endif
|
||||
endif
|
||||
|
||||
ifdef USE_OPENMP
|
||||
ifeq ($(USE_OPENMP), 1)
|
||||
FCOMMON_OPT += -mp
|
||||
endif
|
||||
endif
|
||||
@@ -477,28 +688,65 @@ ifeq ($(F_COMPILER), OPEN64)
|
||||
CCOMMON_OPT += -DF_INTERFACE_OPEN64
|
||||
ifdef BINARY64
|
||||
ifdef INTERFACE64
|
||||
ifneq ($(INTERFACE64), 0)
|
||||
FCOMMON_OPT += -i8
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(ARCH), mips64)
|
||||
ifndef BINARY64
|
||||
FCOMMON_OPT += -n32
|
||||
else
|
||||
FCOMMON_OPT += -n64
|
||||
endif
|
||||
ifeq ($(CORE), LOONGSON3A)
|
||||
FCOMMON_OPT += -loongson3 -static
|
||||
endif
|
||||
|
||||
ifeq ($(CORE), LOONGSON3B)
|
||||
FCOMMON_OPT += -loongson3 -static
|
||||
endif
|
||||
|
||||
else
|
||||
ifndef BINARY64
|
||||
FCOMMON_OPT += -m32
|
||||
else
|
||||
FCOMMON_OPT += -m64
|
||||
endif
|
||||
endif
|
||||
|
||||
ifdef USE_OPENMP
|
||||
ifeq ($(USE_OPENMP), 1)
|
||||
FEXTRALIB += -lstdc++
|
||||
FCOMMON_OPT += -mp
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(C_COMPILER), OPEN64)
|
||||
|
||||
ifeq ($(ARCH), mips64)
|
||||
ifndef BINARY64
|
||||
CCOMMON_OPT += -n32
|
||||
else
|
||||
CCOMMON_OPT += -n64
|
||||
endif
|
||||
ifeq ($(CORE), LOONGSON3A)
|
||||
CCOMMON_OPT += -loongson3 -static
|
||||
endif
|
||||
|
||||
ifeq ($(CORE), LOONGSON3B)
|
||||
CCOMMON_OPT += -loongson3 -static
|
||||
endif
|
||||
|
||||
else
|
||||
|
||||
ifndef BINARY64
|
||||
CCOMMON_OPT += -m32
|
||||
else
|
||||
CCOMMON_OPT += -m64
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(C_COMPILER), SUN)
|
||||
CCOMMON_OPT += -w
|
||||
@@ -516,35 +764,37 @@ FCOMMON_OPT += -m32
|
||||
else
|
||||
FCOMMON_OPT += -m64
|
||||
endif
|
||||
ifdef USE_OPENMP
|
||||
ifeq ($(USE_OPENMP), 1)
|
||||
FCOMMON_OPT += -xopenmp=parallel
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(F_COMPILER), COMPAQ)
|
||||
CCOMMON_OPT += -DF_INTERFACE_COMPAQ
|
||||
ifdef USE_OPENMP
|
||||
ifeq ($(USE_OPENMP), 1)
|
||||
FCOMMON_OPT += -openmp
|
||||
endif
|
||||
endif
|
||||
|
||||
ifdef BINARY64
|
||||
ifdef INTERFACE64
|
||||
CCOMMON_OPT +=
|
||||
ifneq ($(INTERFACE64), 0)
|
||||
CCOMMON_OPT +=
|
||||
#-DUSE64BITINT
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(NEED_PIC), 1)
|
||||
ifeq ($(C_COMPILER), IBM)
|
||||
CCOMMON_OPT += -qpic=large
|
||||
CCOMMON_OPT += -qpic=large
|
||||
else
|
||||
CCOMMON_OPT += -fPIC
|
||||
CCOMMON_OPT += -fPIC
|
||||
endif
|
||||
ifeq ($(F_COMPILER), SUN)
|
||||
FCOMMON_OPT += -pic
|
||||
else
|
||||
FCOMMON_OPT += -fPIC
|
||||
FCOMMON_OPT += -fPIC
|
||||
endif
|
||||
endif
|
||||
|
||||
@@ -562,6 +812,18 @@ ifeq ($(NO_LAPACKE), 1)
|
||||
CCOMMON_OPT += -DNO_LAPACKE
|
||||
endif
|
||||
|
||||
ifeq ($(NO_AVX), 1)
|
||||
CCOMMON_OPT += -DNO_AVX
|
||||
endif
|
||||
|
||||
ifeq ($(ARCH), x86)
|
||||
CCOMMON_OPT += -DNO_AVX
|
||||
endif
|
||||
|
||||
ifeq ($(NO_AVX2), 1)
|
||||
CCOMMON_OPT += -DNO_AVX2
|
||||
endif
|
||||
|
||||
ifdef SMP
|
||||
CCOMMON_OPT += -DSMP_SERVER
|
||||
|
||||
@@ -577,6 +839,10 @@ ifeq ($(USE_OPENMP), 1)
|
||||
CCOMMON_OPT += -DUSE_OPENMP
|
||||
endif
|
||||
|
||||
ifeq ($(BIGNUMA), 1)
|
||||
CCOMMON_OPT += -DBIGNUMA
|
||||
endif
|
||||
|
||||
endif
|
||||
|
||||
ifeq ($(NO_WARMUP), 1)
|
||||
@@ -614,6 +880,14 @@ else
|
||||
LIBPREFIX = libopenblas_$(LIBNAMESUFFIX)
|
||||
endif
|
||||
|
||||
ifndef SYMBOLPREFIX
|
||||
SYMBOLPREFIX =
|
||||
endif
|
||||
|
||||
ifndef SYMBOLSUFFIX
|
||||
SYMBOLSUFFIX =
|
||||
endif
|
||||
|
||||
KERNELDIR = $(TOPDIR)/kernel/$(ARCH)
|
||||
|
||||
include $(TOPDIR)/Makefile.$(ARCH)
|
||||
@@ -687,11 +961,46 @@ AWK = awk
|
||||
REVISION = -r$(VERSION)
|
||||
MAJOR_VERSION = $(word 1,$(subst ., ,$(VERSION)))
|
||||
|
||||
CFLAGS = $(COMMON_OPT) $(CCOMMON_OPT) -I$(TOPDIR)
|
||||
PFLAGS = $(COMMON_OPT) $(CCOMMON_OPT) -I$(TOPDIR) -DPROFILE $(COMMON_PROF)
|
||||
ifeq ($(DEBUG), 1)
|
||||
COMMON_OPT += -g
|
||||
endif
|
||||
|
||||
FFLAGS = $(COMMON_OPT) $(FCOMMON_OPT)
|
||||
FPFLAGS = $(COMMON_OPT) $(FCOMMON_OPT) $(COMMON_PROF)
|
||||
ifndef COMMON_OPT
|
||||
COMMON_OPT = -O2
|
||||
endif
|
||||
|
||||
|
||||
override CFLAGS += $(COMMON_OPT) $(CCOMMON_OPT) -I$(TOPDIR)
|
||||
override PFLAGS += $(COMMON_OPT) $(CCOMMON_OPT) -I$(TOPDIR) -DPROFILE $(COMMON_PROF)
|
||||
|
||||
override FFLAGS += $(COMMON_OPT) $(FCOMMON_OPT)
|
||||
override FPFLAGS += $(COMMON_OPT) $(FCOMMON_OPT) $(COMMON_PROF)
|
||||
#MAKEOVERRIDES =
|
||||
|
||||
#For LAPACK Fortran codes.
|
||||
#Disable -fopenmp for LAPACK Fortran codes on Windows.
|
||||
ifdef OS_WINDOWS
|
||||
LAPACK_FFLAGS := $(filter-out -fopenmp -mp -openmp -xopenmp=parallel,$(FFLAGS))
|
||||
LAPACK_FPFLAGS := $(filter-out -fopenmp -mp -openmp -xopenmp=parallel,$(FPFLAGS))
|
||||
else
|
||||
LAPACK_FFLAGS := $(FFLAGS)
|
||||
LAPACK_FPFLAGS := $(FPFLAGS)
|
||||
endif
|
||||
|
||||
LAPACK_CFLAGS = $(CFLAGS)
|
||||
LAPACK_CFLAGS += -DHAVE_LAPACK_CONFIG_H
|
||||
ifdef INTERFACE64
|
||||
ifneq ($(INTERFACE64), 0)
|
||||
LAPACK_CFLAGS += -DLAPACK_ILP64
|
||||
endif
|
||||
endif
|
||||
|
||||
ifdef OS_WINDOWS
|
||||
LAPACK_CFLAGS += -DOPENBLAS_OS_WINDOWS
|
||||
endif
|
||||
ifeq ($(C_COMPILER), LSB)
|
||||
LAPACK_CFLAGS += -DLAPACK_COMPLEX_STRUCTURE
|
||||
endif
|
||||
|
||||
ifndef SUFFIX
|
||||
SUFFIX = o
|
||||
@@ -705,7 +1014,7 @@ ifndef LIBSUFFIX
|
||||
LIBSUFFIX = a
|
||||
endif
|
||||
|
||||
ifndef DYNAMIC_ARCH
|
||||
ifneq ($(DYNAMIC_ARCH), 1)
|
||||
ifndef SMP
|
||||
LIBNAME = $(LIBPREFIX)_$(LIBCORE)$(REVISION).$(LIBSUFFIX)
|
||||
LIBNAME_P = $(LIBPREFIX)_$(LIBCORE)$(REVISION)_p.$(LIBSUFFIX)
|
||||
@@ -724,8 +1033,8 @@ endif
|
||||
endif
|
||||
|
||||
|
||||
LIBDLLNAME = $(LIBPREFIX).dll
|
||||
LIBSONAME = $(LIBNAME:.$(LIBSUFFIX)=.so)
|
||||
LIBDLLNAME = $(LIBNAME:.$(LIBSUFFIX)=.dll)
|
||||
LIBDYNNAME = $(LIBNAME:.$(LIBSUFFIX)=.dylib)
|
||||
LIBDEFNAME = $(LIBNAME:.$(LIBSUFFIX)=.def)
|
||||
LIBEXPNAME = $(LIBNAME:.$(LIBSUFFIX)=.exp)
|
||||
@@ -734,6 +1043,23 @@ LIBZIPNAME = $(LIBNAME:.$(LIBSUFFIX)=.zip)
|
||||
LIBS = $(TOPDIR)/$(LIBNAME)
|
||||
LIBS_P = $(TOPDIR)/$(LIBNAME_P)
|
||||
|
||||
|
||||
LIB_COMPONENTS = BLAS
|
||||
ifneq ($(NO_CBLAS), 1)
|
||||
LIB_COMPONENTS += CBLAS
|
||||
endif
|
||||
|
||||
ifneq ($(NO_LAPACK), 1)
|
||||
LIB_COMPONENTS += LAPACK
|
||||
ifneq ($(NO_LAPACKE), 1)
|
||||
LIB_COMPONENTS += LAPACKE
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(ONLY_CBLAS), 1)
|
||||
LIB_COMPONENTS = CBLAS
|
||||
endif
|
||||
|
||||
export OSNAME
|
||||
export ARCH
|
||||
export CORE
|
||||
@@ -744,6 +1070,7 @@ export CC
|
||||
export FC
|
||||
export BU
|
||||
export FU
|
||||
export NEED2UNDERSCORES
|
||||
export USE_THREAD
|
||||
export NUM_THREADS
|
||||
export NUM_CORES
|
||||
@@ -759,6 +1086,7 @@ export USE_OPENMP
|
||||
export CROSS
|
||||
export CROSS_SUFFIX
|
||||
export NOFORTRAN
|
||||
export NO_FBLAS
|
||||
export EXTRALIB
|
||||
export CEXTRALIB
|
||||
export FEXTRALIB
|
||||
@@ -771,6 +1099,10 @@ export HAVE_SSE4_2
|
||||
export HAVE_SSE4A
|
||||
export HAVE_SSE5
|
||||
export HAVE_AVX
|
||||
export HAVE_VFP
|
||||
export HAVE_VFPV3
|
||||
export HAVE_VFPV4
|
||||
export HAVE_NEON
|
||||
export KERNELDIR
|
||||
export FUNCTION_PROFILE
|
||||
export TARGET_CORE
|
||||
@@ -787,6 +1119,13 @@ export ZGEMM_UNROLL_M
|
||||
export ZGEMM_UNROLL_N
|
||||
export XGEMM_UNROLL_M
|
||||
export XGEMM_UNROLL_N
|
||||
export CGEMM3M_UNROLL_M
|
||||
export CGEMM3M_UNROLL_N
|
||||
export ZGEMM3M_UNROLL_M
|
||||
export ZGEMM3M_UNROLL_N
|
||||
export XGEMM3M_UNROLL_M
|
||||
export XGEMM3M_UNROLL_N
|
||||
|
||||
|
||||
ifdef USE_CUDA
|
||||
export CUDADIR
|
||||
|
||||
@@ -4,6 +4,7 @@ QBLASOBJS_P = $(QBLASOBJS:.$(SUFFIX)=.$(PSUFFIX))
|
||||
CBLASOBJS_P = $(CBLASOBJS:.$(SUFFIX)=.$(PSUFFIX))
|
||||
ZBLASOBJS_P = $(ZBLASOBJS:.$(SUFFIX)=.$(PSUFFIX))
|
||||
XBLASOBJS_P = $(XBLASOBJS:.$(SUFFIX)=.$(PSUFFIX))
|
||||
IBLASOBJS_P = $(IBLASOBJS:.$(SUFFIX)=.$(PSUFFIX))
|
||||
|
||||
COMMONOBJS_P = $(COMMONOBJS:.$(SUFFIX)=.$(PSUFFIX))
|
||||
|
||||
@@ -22,19 +23,26 @@ BLASOBJS += $(QBLASOBJS) $(XBLASOBJS)
|
||||
BLASOBJS_P += $(QBLASOBJS_P) $(XBLASOBJS_P)
|
||||
endif
|
||||
|
||||
$(SBLASOBJS) $(SBLASOBJS_P) : CFLAGS += -UDOUBLE -UCOMPLEX
|
||||
$(DBLASOBJS) $(DBLASOBJS_P) : CFLAGS += -DDOUBLE -UCOMPLEX
|
||||
$(QBLASOBJS) $(QBLASOBJS_P) : CFLAGS += -DXDOUBLE -UCOMPLEX
|
||||
$(CBLASOBJS) $(CBLASOBJS_P) : CFLAGS += -UDOUBLE -DCOMPLEX
|
||||
$(ZBLASOBJS) $(ZBLASOBJS_P) : CFLAGS += -DDOUBLE -DCOMPLEX
|
||||
$(XBLASOBJS) $(XBLASOBJS_P) : CFLAGS += -DXDOUBLE -DCOMPLEX
|
||||
ifdef INTEGER_PRECISION
|
||||
BLASOBJS += $(IBLASOBJS)
|
||||
BLASOBJS_P += $(IBLASOBJS_P)
|
||||
endif
|
||||
|
||||
$(SBLASOBJS_P) : CFLAGS += -DPROFILE $(COMMON_PROF)
|
||||
$(DBLASOBJS_P) : CFLAGS += -DPROFILE $(COMMON_PROF)
|
||||
$(QBLASOBJS_P) : CFLAGS += -DPROFILE $(COMMON_PROF)
|
||||
$(CBLASOBJS_P) : CFLAGS += -DPROFILE $(COMMON_PROF)
|
||||
$(ZBLASOBJS_P) : CFLAGS += -DPROFILE $(COMMON_PROF)
|
||||
$(XBLASOBJS_P) : CFLAGS += -DPROFILE $(COMMON_PROF)
|
||||
$(SBLASOBJS) $(SBLASOBJS_P) : override CFLAGS += -UDOUBLE -UCOMPLEX
|
||||
$(DBLASOBJS) $(DBLASOBJS_P) : override CFLAGS += -DDOUBLE -UCOMPLEX
|
||||
$(QBLASOBJS) $(QBLASOBJS_P) : override CFLAGS += -DXDOUBLE -UCOMPLEX
|
||||
$(CBLASOBJS) $(CBLASOBJS_P) : override CFLAGS += -UDOUBLE -DCOMPLEX
|
||||
$(ZBLASOBJS) $(ZBLASOBJS_P) : override CFLAGS += -DDOUBLE -DCOMPLEX
|
||||
$(XBLASOBJS) $(XBLASOBJS_P) : override CFLAGS += -DXDOUBLE -DCOMPLEX
|
||||
$(IBLASOBJS) $(IBLASOBJS_P) : override CFLAGS += -DINTEGER -UCOMPLEX
|
||||
|
||||
$(SBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF)
|
||||
$(DBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF)
|
||||
$(QBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF)
|
||||
$(CBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF)
|
||||
$(ZBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF)
|
||||
$(XBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF)
|
||||
$(IBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF)
|
||||
|
||||
libs :: $(BLASOBJS) $(COMMONOBJS)
|
||||
$(AR) $(ARFLAGS) -ru $(TOPDIR)/$(LIBNAME) $^
|
||||
@@ -57,7 +65,7 @@ commonlibs :: $(COMMONOBJS)
|
||||
commonprof :: $(COMMONOBJS_P)
|
||||
$(AR) $(ARFLAGS) -ru $(TOPDIR)/$(LIBNAME_P) $^
|
||||
|
||||
quick :
|
||||
quick :
|
||||
$(MAKE) -C $(TOPDIR) libs
|
||||
|
||||
bms.$(SUFFIX):bm.c $(TOPDIR)/../bench/bmcommon.h
|
||||
@@ -386,7 +394,7 @@ kbench_rank_k: kbench_rank_k.$(SUFFIX) $(DBLASOBJS) $(COMMONOBJS) $(OBJS) $(LIBS
|
||||
smallbench: smallbench.$(SUFFIX) $(BLASOBJS) $(COMMONOBJS) $(OBJS) $(LIBS)
|
||||
$(CC) -o $(@F) $(CFLAGS) $^ $(EXTRALIB) $(CEXTRALIB)
|
||||
|
||||
smallbench.mkl: smallbench.$(SUFFIX)
|
||||
smallbench.mkl: smallbench.$(SUFFIX)
|
||||
$(CC) -o $(@F) $(CFLAGS) $^ $(LIBMKL) $(EXTRALIB) $(CEXTRALIB)
|
||||
|
||||
bench.sun: bench.$(SUFFIX) $(OBJS)
|
||||
@@ -410,7 +418,7 @@ bench.acml: bench.$(SUFFIX) $(OBJS)
|
||||
bench.flame: bench.$(SUFFIX) $(OBJS)
|
||||
$(CC) -o $(@F) $(CFLAGS) $^ $(LIBFLAME) $(EXTRALIB) $(CEXTRALIB)
|
||||
|
||||
kbench.mkl: kbench.$(SUFFIX) $(OBJS)
|
||||
kbench.mkl: kbench.$(SUFFIX) $(OBJS)
|
||||
$(CC) -static -o $(@F) $(CFLAGS) $^ $(LIBMKL) $(EXTRALIB) $(CEXTRALIB)
|
||||
|
||||
bench.mkl: bench.$(SUFFIX) $(OBJS)
|
||||
@@ -537,10 +545,10 @@ params.$(SUFFIX):param.c $(TOPDIR)/../bench/bmcommon.h
|
||||
$(CC) $(CFLAGS) -UDOUBLE -UCOMPLEX -c $< -o $(@F)
|
||||
|
||||
paramd.$(SUFFIX):param.c $(TOPDIR)/../bench/bmcommon.h
|
||||
$(CC) $(CFLAGS) -DDOUBLE -UCOMPLEX -c $< -o $(@F)
|
||||
$(CC) $(CFLAGS) -DDOUBLE -UCOMPLEX -c $< -o $(@F)
|
||||
|
||||
paramq.$(SUFFIX):param.c $(TOPDIR)/../bench/bmcommon.h
|
||||
$(CC) $(CFLAGS) -DXDOUBLE -UCOMPLEX -c $< -o $(@F)
|
||||
$(CC) $(CFLAGS) -DXDOUBLE -UCOMPLEX -c $< -o $(@F)
|
||||
|
||||
paramc.$(SUFFIX):paramz.c $(TOPDIR)/../bench/bmcommon.h
|
||||
$(CC) $(CFLAGS) -UDOUBLE -DCOMPLEX -c $< -o $(@F)
|
||||
@@ -555,10 +563,10 @@ params-ex.$(SUFFIX):param-ex.c $(TOPDIR)/../bench/bmcommon.h
|
||||
$(CC) $(CFLAGS) -UDOUBLE -UCOMPLEX -c $< -o $(@F)
|
||||
|
||||
paramd-ex.$(SUFFIX):param-ex.c $(TOPDIR)/../bench/bmcommon.h
|
||||
$(CC) $(CFLAGS) -DDOUBLE -UCOMPLEX -c $< -o $(@F)
|
||||
$(CC) $(CFLAGS) -DDOUBLE -UCOMPLEX -c $< -o $(@F)
|
||||
|
||||
paramq-ex.$(SUFFIX):param-ex.c $(TOPDIR)/../bench/bmcommon.h
|
||||
$(CC) $(CFLAGS) -DXDOUBLE -UCOMPLEX -c $< -o $(@F)
|
||||
$(CC) $(CFLAGS) -DXDOUBLE -UCOMPLEX -c $< -o $(@F)
|
||||
|
||||
paramc-ex.$(SUFFIX):paramz-ex.c $(TOPDIR)/../bench/bmcommon.h
|
||||
$(CC) $(CFLAGS) -UDOUBLE -DCOMPLEX -c $< -o $(@F)
|
||||
@@ -606,7 +614,8 @@ clean ::
|
||||
@if test -d $(ARCH); then \
|
||||
(cd $(ARCH) && $(MAKE) clean) \
|
||||
fi
|
||||
@rm -rf *.a *.s *.o *.po *.obj *.i *.so core core.* gmon.out *.cso \
|
||||
@find . -name '*.o' | xargs rm -rf
|
||||
@rm -rf *.a *.s *.po *.obj *.i *.so core core.* gmon.out *.cso \
|
||||
*.csx *.is *~ *.exe *.flame *.pdb *.dwf \
|
||||
gen_insn_flash.c gen_insn_flash *.stackdump *.dll *.exp *.lib \
|
||||
*.pc *.pcl *.def *.i *.prof linktest.c \
|
||||
|
||||
@@ -1,8 +1,5 @@
|
||||
# COMPILER_PREFIX = mingw32-
|
||||
|
||||
ifeq ($(OSNAME), Linux)
|
||||
LDFLAGS = -melf_i386
|
||||
endif
|
||||
|
||||
ifeq ($(OSNAME), Interix)
|
||||
ARFLAGS = -m x86
|
||||
@@ -17,7 +14,7 @@ endif
|
||||
# LIBMKL = -L$(MKLPATH)/32 -lmkl_lapack -lmkl_ia32 -lguide -lpthread -lm
|
||||
|
||||
ifndef SMP
|
||||
LIBATLAS = -L$(ATLAS) -lf77blas -latlas -lg2c -lm
|
||||
LIBATLAS = -L$(ATLAS) -lf77blas -latlas -lg2c -lm
|
||||
else
|
||||
LIBATLAS = -L$(ATLAS) -lptf77blas -latlas -lpthread -lg2c -lm
|
||||
endif
|
||||
@@ -53,7 +50,7 @@ LIBSUNPERF = -L/opt/SUNWspro/lib/sse2 -Wl,-R,/opt/SUNWspro/lib/sse2 -lsunperf
|
||||
LIBVECLIB = /System/Library/Frameworks/vecLib.framework/Versions/Current/vecLib
|
||||
|
||||
ifndef SMP
|
||||
LIBATLAS = -L$(ATLASPATH)/32 -lcblas -lf77blas -latlas -lm
|
||||
LIBATLAS = -L$(ATLASPATH)/32 -lcblas -lf77blas -latlas -lm
|
||||
else
|
||||
LIBATLAS = -L$(ATLASPATH)/32 -lptf77blas -lptatlas -lpthread -lm
|
||||
endif
|
||||
|
||||
@@ -2,25 +2,12 @@
|
||||
|
||||
ifeq ($(OSNAME), SunOS)
|
||||
ifdef BINARY64
|
||||
LDFLAGS = -64
|
||||
ifeq ($(F_COMPILER), SUN)
|
||||
FCOMMON_OPT += -m64
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(OSNAME), FreeBSD)
|
||||
LDFLAGS = -m elf_x86_64_fbsd
|
||||
endif
|
||||
|
||||
ifeq ($(OSNAME), Linux)
|
||||
LDFLAGS = -m elf_x86_64
|
||||
endif
|
||||
|
||||
ifeq ($(OSNAME), Darwin)
|
||||
LDFLAGS =
|
||||
endif
|
||||
|
||||
ifeq ($(OSNAME), Interix)
|
||||
ARFLAGS = -m x64
|
||||
endif
|
||||
@@ -41,7 +28,7 @@ endif
|
||||
|
||||
|
||||
ifndef SMP
|
||||
LIBATLAS = -L$(ATLASPATH)64 -llapack -lcblas -lf77blas -latlas -lm
|
||||
LIBATLAS = -L$(ATLASPATH)64 -llapack -lcblas -lf77blas -latlas -lm
|
||||
else
|
||||
LIBATLAS = -L$(ATLASPATH)64 -llapack -lptcblas -lptf77blas -latlas -lpthread -lm
|
||||
endif
|
||||
|
||||
66
README.md
66
README.md
@@ -1,11 +1,20 @@
|
||||
# OpenBLAS
|
||||
|
||||
[](https://travis-ci.org/xianyi/OpenBLAS)
|
||||
|
||||
## Introduction
|
||||
OpenBLAS is an optimized BLAS library based on GotoBLAS2 1.13 BSD version. OpenBLAS is an open source project supported by Lab of Parallel Software and Computational Science, ISCAS <http://www.rdcps.ac.cn>.
|
||||
OpenBLAS is an optimized BLAS library based on GotoBLAS2 1.13 BSD version.
|
||||
|
||||
Please read the documents on OpenBLAS wiki pages <http://github.com/xianyi/OpenBLAS/wiki>.
|
||||
|
||||
## Installation
|
||||
## Binary Packages
|
||||
We provide binary packages for the following platform.
|
||||
|
||||
* Windows x86/x86_64
|
||||
|
||||
You can download them from [file hosting on sourceforge.net](https://sourceforge.net/projects/openblas/files/).
|
||||
|
||||
## Installation from Source
|
||||
Download from project homepage. http://xianyi.github.com/OpenBLAS/
|
||||
|
||||
Or, check out codes from git://github.com/xianyi/OpenBLAS.git
|
||||
@@ -23,11 +32,15 @@ On X86 box, compile this library for loongson3a CPU.
|
||||
|
||||
make BINARY=64 CC=mips64el-unknown-linux-gnu-gcc FC=mips64el-unknown-linux-gnu-gfortran HOSTCC=gcc TARGET=LOONGSON3A
|
||||
|
||||
On X86 box, compile this library for loongson3a CPU with loongcc (based on Open64) compiler.
|
||||
|
||||
make CC=loongcc FC=loongf95 HOSTCC=gcc TARGET=LOONGSON3A CROSS=1 CROSS_SUFFIX=mips64el-st-linux-gnu- NO_LAPACKE=1 NO_SHARED=1 BINARY=32
|
||||
|
||||
### Debug version
|
||||
|
||||
make DEBUG=1
|
||||
|
||||
### Intall to the directory (Optional)
|
||||
### Install to the directory (optional)
|
||||
|
||||
Example:
|
||||
|
||||
@@ -42,23 +55,34 @@ Please read GotoBLAS_01Readme.txt
|
||||
|
||||
#### x86/x86-64:
|
||||
- **Intel Xeon 56xx (Westmere)**: Used GotoBLAS2 Nehalem codes.
|
||||
- **Intel Sandy Bridge**: Optimized Level-3 BLAS with AVX on x86-64.
|
||||
- **Intel Sandy Bridge**: Optimized Level-3 and Level-2 BLAS with AVX on x86-64.
|
||||
- **Intel Haswell**: Optimized Level-3 and Level-2 BLAS with AVX2 and FMA on x86-64.
|
||||
- **AMD Bobcat**: Used GotoBLAS2 Barcelona codes.
|
||||
- **AMD Bulldozer**: x86-64 ?GEMM FMA4 kernels. (Thank Werner Saar)
|
||||
- **AMD PILEDRIVER**: Uses Bulldozer codes with some optimizations.
|
||||
- **AMD STEAMROLLER**: Uses Bulldozer codes with some optimizations.
|
||||
|
||||
#### MIPS64:
|
||||
- **ICT Loongson 3A**: Optimized Level-3 BLAS and the part of Level-1,2.
|
||||
- **ICT Loongson 3B**: Experimental
|
||||
|
||||
#### ARM:
|
||||
- **ARMV6**: Optimized BLAS for vfpv2 and vfpv3-d16 ( e.g. BCM2835, Cortex M0+ )
|
||||
- **ARMV7**: Optimized BLAS for vfpv3-d32 ( e.g. Cortex A8, A9 and A15 )
|
||||
|
||||
#### ARM64:
|
||||
- **ARMV8**: Experimental
|
||||
|
||||
### Support OS:
|
||||
- **GNU/Linux**
|
||||
- **MingWin/Windows**: Please read <https://github.com/xianyi/OpenBLAS/wiki/How-to-use-OpenBLAS-in-Microsoft-Visual-Studio>.
|
||||
- **Darwin/Mac OS X**: Experimental. Although GotoBLAS2 supports Darwin, we are the beginner on Mac OS X.
|
||||
- **FreeBSD**: Supportted by community. We didn't test the library on this OS.
|
||||
- **FreeBSD**: Supported by community. We didn't test the library on this OS.
|
||||
|
||||
## Usages
|
||||
Link with libopenblas.a or -lopenblas for shared library.
|
||||
|
||||
### Set the number of threads with environment variables.
|
||||
### Set the number of threads with environment variables.
|
||||
|
||||
Examples:
|
||||
|
||||
@@ -68,7 +92,7 @@ Examples:
|
||||
|
||||
export GOTO_NUM_THREADS=4
|
||||
|
||||
or
|
||||
or
|
||||
|
||||
export OMP_NUM_THREADS=4
|
||||
|
||||
@@ -76,9 +100,9 @@ The priorities are OPENBLAS_NUM_THREADS > GOTO_NUM_THREADS > OMP_NUM_THREADS.
|
||||
|
||||
If you compile this lib with USE_OPENMP=1, you should set OMP_NUM_THREADS environment variable. OpenBLAS ignores OPENBLAS_NUM_THREADS and GOTO_NUM_THREADS with USE_OPENMP=1.
|
||||
|
||||
### Set the number of threads on runtime.
|
||||
### Set the number of threads on runtime.
|
||||
|
||||
We provided the below functions to controll the number of threads on runtime. So far, we didn't support changing the number of threads on Windows. On Windows, these functions are dummy.
|
||||
We provided the below functions to control the number of threads on runtime.
|
||||
|
||||
void goto_set_num_threads(int num_threads);
|
||||
|
||||
@@ -90,21 +114,25 @@ If you compile this lib with USE_OPENMP=1, you should use the above functions, t
|
||||
Please add a issue in https://github.com/xianyi/OpenBLAS/issues
|
||||
|
||||
## Contact
|
||||
OpenBLAS users mailing list: http://list.rdcps.ac.cn/mailman/listinfo/openblas
|
||||
* OpenBLAS users mailing list: https://groups.google.com/forum/#!forum/openblas-users
|
||||
* OpenBLAS developers mailing list: https://groups.google.com/forum/#!forum/openblas-dev
|
||||
|
||||
## ChangeLog
|
||||
Please see Changelog.txt to obtain the differences between GotoBLAS2 1.13 BSD version.
|
||||
|
||||
## Troubleshooting
|
||||
* Please read [Faq](https://github.com/xianyi/OpenBLAS/wiki/Faq) at first.
|
||||
* Please use gcc version 4.6 and above to compile Sandy Bridge AVX kernels on Linux/MingW/BSD.
|
||||
* Please use Clang version 3.1 and above to compile the library on Sandy Bridge microarchitecture. The Clang 3.0 will generate the wrong AVX binary code.
|
||||
* The number of CPUs/Cores should less than or equal to 256.
|
||||
* On Loongson 3A. make test would be failed because of pthread_create error. The error code is EAGAIN. However, it will be OK when you run the same testcase on shell.
|
||||
* The number of CPUs/Cores should less than or equal to 256. On Linux x86_64(amd64), there is experimental support for up to 1024 CPUs/Cores and 128 numa nodes if you build the library with BIGNUMA=1.
|
||||
* OpenBLAS does not set processor affinity by default. On Linux, you can enable processor affinity by commenting the line NO_AFFINITY=1 in Makefile.rule. But this may cause [the conflict with R parallel](https://stat.ethz.ch/pipermail/r-sig-hpc/2012-April/001348.html).
|
||||
* On Loongson 3A. make test would be failed because of pthread_create error. The error code is EAGAIN. However, it will be OK when you run the same testcase on shell.
|
||||
|
||||
## Specification of Git Branches
|
||||
We used the git branching model in this article (http://nvie.com/posts/a-successful-git-branching-model/).
|
||||
Now, there are 4 branches in github.com.
|
||||
* The master branch. This a main branch to reflect a production-ready state.
|
||||
* The develop branch. This a main branch to reflect a state with the latest delivered development changes for the next release.
|
||||
* The loongson3a branch. This is a feature branch. We develop Loongson3A codes on this branch. We will merge this feature to develop branch in future.
|
||||
* The gh-pages branch. This is for web pages
|
||||
## Contributing
|
||||
1. [Check for open issues](https://github.com/xianyi/OpenBLAS/issues) or open a fresh issue to start a discussion around a feature idea or a bug.
|
||||
1. Fork the [OpenBLAS](https://github.com/xianyi/OpenBLAS) repository to start making your changes.
|
||||
1. Write a test which shows that the bug was fixed or that the feature works as expected.
|
||||
1. Send a pull request. Make sure to add yourself to `CONTRIBUTORS.md`.
|
||||
|
||||
## Donation
|
||||
Please read [this wiki page](https://github.com/xianyi/OpenBLAS/wiki/Donation).
|
||||
|
||||
@@ -8,8 +8,8 @@ Supported List:
|
||||
1.X86/X86_64
|
||||
a)Intel CPU:
|
||||
P2
|
||||
COPPERMINE
|
||||
KATMAI
|
||||
COPPERMINE
|
||||
NORTHWOOD
|
||||
PRESCOTT
|
||||
BANIAS
|
||||
@@ -19,6 +19,7 @@ PENRYN
|
||||
DUNNINGTON
|
||||
NEHALEM
|
||||
SANDYBRIDGE
|
||||
HASWELL
|
||||
ATOM
|
||||
|
||||
b)AMD CPU:
|
||||
@@ -29,6 +30,10 @@ BARCELONA
|
||||
SHANGHAI
|
||||
ISTANBUL
|
||||
BOBCAT
|
||||
BULLDOZER
|
||||
PILEDRIVER
|
||||
STEAMROLLER
|
||||
EXCAVATOR
|
||||
|
||||
c)VIA CPU:
|
||||
SSE_GENERIC
|
||||
@@ -58,3 +63,12 @@ ITANIUM2
|
||||
SPARC
|
||||
SPARCV7
|
||||
|
||||
6.ARM CPU:
|
||||
CORTEXA15
|
||||
CORTEXA9
|
||||
ARMV7
|
||||
ARMV6
|
||||
ARMV5
|
||||
|
||||
7.ARM 64-bit CPU:
|
||||
ARMV8
|
||||
|
||||
9
benchmark/Make_exe.sh
Executable file
9
benchmark/Make_exe.sh
Executable file
@@ -0,0 +1,9 @@
|
||||
#!/bin/bash
|
||||
|
||||
for f in *.goto *.acml *.mkl *.atlas
|
||||
do
|
||||
if [ -f "$f" ]; then
|
||||
mv $f `echo $f|tr '.' '_'`.exe
|
||||
fi
|
||||
done
|
||||
|
||||
2179
benchmark/Makefile
2179
benchmark/Makefile
File diff suppressed because it is too large
Load Diff
196
benchmark/asum.c
Normal file
196
benchmark/asum.c
Normal file
@@ -0,0 +1,196 @@
|
||||
/***************************************************************************
|
||||
Copyright (c) 2014, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#ifdef __CYGWIN32__
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
#include "common.h"
|
||||
|
||||
|
||||
#undef ASUM
|
||||
|
||||
#ifdef COMPLEX
|
||||
#ifdef DOUBLE
|
||||
#define ASUM BLASFUNC(dzasum)
|
||||
#else
|
||||
#define ASUM BLASFUNC(scasum)
|
||||
#endif
|
||||
#else
|
||||
#ifdef DOUBLE
|
||||
#define ASUM BLASFUNC(dasum)
|
||||
#else
|
||||
#define ASUM BLASFUNC(sasum)
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(__WIN32__) || defined(__WIN64__)
|
||||
|
||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
||||
#endif
|
||||
|
||||
int gettimeofday(struct timeval *tv, void *tz){
|
||||
|
||||
FILETIME ft;
|
||||
unsigned __int64 tmpres = 0;
|
||||
static int tzflag;
|
||||
|
||||
if (NULL != tv)
|
||||
{
|
||||
GetSystemTimeAsFileTime(&ft);
|
||||
|
||||
tmpres |= ft.dwHighDateTime;
|
||||
tmpres <<= 32;
|
||||
tmpres |= ft.dwLowDateTime;
|
||||
|
||||
/*converting file time to unix epoch*/
|
||||
tmpres /= 10; /*convert into microseconds*/
|
||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
||||
|
||||
static void *huge_malloc(BLASLONG size){
|
||||
int shmid;
|
||||
void *address;
|
||||
|
||||
#ifndef SHM_HUGETLB
|
||||
#define SHM_HUGETLB 04000
|
||||
#endif
|
||||
|
||||
if ((shmid =shmget(IPC_PRIVATE,
|
||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
||||
printf( "Memory allocation failed(shmget).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
address = shmat(shmid, NULL, SHM_RND);
|
||||
|
||||
if ((BLASLONG)address == -1){
|
||||
printf( "Memory allocation failed(shmat).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
shmctl(shmid, IPC_RMID, 0);
|
||||
|
||||
return address;
|
||||
}
|
||||
|
||||
#define malloc huge_malloc
|
||||
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *x;
|
||||
FLOAT result;
|
||||
blasint m, i;
|
||||
blasint inc_x=1;
|
||||
int loops = 1;
|
||||
int l;
|
||||
char *p;
|
||||
|
||||
int from = 1;
|
||||
int to = 200;
|
||||
int step = 1;
|
||||
|
||||
struct timeval start, stop;
|
||||
double time1,timeg;
|
||||
|
||||
argc--;argv++;
|
||||
|
||||
if (argc > 0) { from = atol(*argv); argc--; argv++;}
|
||||
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
|
||||
if (argc > 0) { step = atol(*argv); argc--; argv++;}
|
||||
|
||||
if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p);
|
||||
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p);
|
||||
|
||||
fprintf(stderr, "From : %3d To : %3d Step = %3d Inc_x = %d Loops = %d\n", from, to, step,inc_x,loops);
|
||||
|
||||
if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
|
||||
#ifdef linux
|
||||
srandom(getpid());
|
||||
#endif
|
||||
|
||||
fprintf(stderr, " SIZE Flops\n");
|
||||
|
||||
for(m = from; m <= to; m += step)
|
||||
{
|
||||
|
||||
timeg=0;
|
||||
|
||||
fprintf(stderr, " %6d : ", (int)m);
|
||||
|
||||
|
||||
for (l=0; l<loops; l++)
|
||||
{
|
||||
|
||||
for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){
|
||||
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
result = ASUM (&m, x, &inc_x);
|
||||
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
|
||||
timeg += time1;
|
||||
|
||||
}
|
||||
|
||||
timeg /= loops;
|
||||
|
||||
#ifdef COMPLEX
|
||||
fprintf(stderr, " %10.2f MFlops\n", 4. * (double)m / timeg * 1.e-6);
|
||||
#else
|
||||
fprintf(stderr, " %10.2f MFlops\n", 2. * (double)m / timeg * 1.e-6);
|
||||
#endif
|
||||
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
201
benchmark/axpy.c
Normal file
201
benchmark/axpy.c
Normal file
@@ -0,0 +1,201 @@
|
||||
/***************************************************************************
|
||||
Copyright (c) 2014, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#ifdef __CYGWIN32__
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
#include "common.h"
|
||||
|
||||
|
||||
#undef AXPY
|
||||
|
||||
#ifdef COMPLEX
|
||||
#ifdef DOUBLE
|
||||
#define AXPY BLASFUNC(zaxpy)
|
||||
#else
|
||||
#define AXPY BLASFUNC(caxpy)
|
||||
#endif
|
||||
#else
|
||||
#ifdef DOUBLE
|
||||
#define AXPY BLASFUNC(daxpy)
|
||||
#else
|
||||
#define AXPY BLASFUNC(saxpy)
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(__WIN32__) || defined(__WIN64__)
|
||||
|
||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
||||
#endif
|
||||
|
||||
int gettimeofday(struct timeval *tv, void *tz){
|
||||
|
||||
FILETIME ft;
|
||||
unsigned __int64 tmpres = 0;
|
||||
static int tzflag;
|
||||
|
||||
if (NULL != tv)
|
||||
{
|
||||
GetSystemTimeAsFileTime(&ft);
|
||||
|
||||
tmpres |= ft.dwHighDateTime;
|
||||
tmpres <<= 32;
|
||||
tmpres |= ft.dwLowDateTime;
|
||||
|
||||
/*converting file time to unix epoch*/
|
||||
tmpres /= 10; /*convert into microseconds*/
|
||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
||||
|
||||
static void *huge_malloc(BLASLONG size){
|
||||
int shmid;
|
||||
void *address;
|
||||
|
||||
#ifndef SHM_HUGETLB
|
||||
#define SHM_HUGETLB 04000
|
||||
#endif
|
||||
|
||||
if ((shmid =shmget(IPC_PRIVATE,
|
||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
||||
printf( "Memory allocation failed(shmget).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
address = shmat(shmid, NULL, SHM_RND);
|
||||
|
||||
if ((BLASLONG)address == -1){
|
||||
printf( "Memory allocation failed(shmat).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
shmctl(shmid, IPC_RMID, 0);
|
||||
|
||||
return address;
|
||||
}
|
||||
|
||||
#define malloc huge_malloc
|
||||
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *x, *y;
|
||||
FLOAT alpha[2] = { 2.0, 2.0 };
|
||||
blasint m, i;
|
||||
blasint inc_x=1,inc_y=1;
|
||||
int loops = 1;
|
||||
int l;
|
||||
char *p;
|
||||
|
||||
int from = 1;
|
||||
int to = 200;
|
||||
int step = 1;
|
||||
|
||||
struct timeval start, stop;
|
||||
double time1,timeg;
|
||||
|
||||
argc--;argv++;
|
||||
|
||||
if (argc > 0) { from = atol(*argv); argc--; argv++;}
|
||||
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
|
||||
if (argc > 0) { step = atol(*argv); argc--; argv++;}
|
||||
|
||||
if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p);
|
||||
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p);
|
||||
if ((p = getenv("OPENBLAS_INCY"))) inc_y = atoi(p);
|
||||
|
||||
fprintf(stderr, "From : %3d To : %3d Step = %3d Inc_x = %d Inc_y = %d Loops = %d\n", from, to, step,inc_x,inc_y,loops);
|
||||
|
||||
if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
if (( y = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_y) * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
#ifdef linux
|
||||
srandom(getpid());
|
||||
#endif
|
||||
|
||||
fprintf(stderr, " SIZE Flops\n");
|
||||
|
||||
for(m = from; m <= to; m += step)
|
||||
{
|
||||
|
||||
timeg=0;
|
||||
|
||||
fprintf(stderr, " %6d : ", (int)m);
|
||||
|
||||
|
||||
for (l=0; l<loops; l++)
|
||||
{
|
||||
|
||||
for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){
|
||||
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
|
||||
for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){
|
||||
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
AXPY (&m, alpha, x, &inc_x, y, &inc_y );
|
||||
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
|
||||
timeg += time1;
|
||||
|
||||
}
|
||||
|
||||
timeg /= loops;
|
||||
|
||||
fprintf(stderr,
|
||||
" %10.2f MFlops\n",
|
||||
COMPSIZE * COMPSIZE * 2. * (double)m / timeg * 1.e-6);
|
||||
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
@@ -71,36 +71,43 @@ double fabs(double);
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
#if defined(__WIN32__) || defined(__WIN64__)
|
||||
|
||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
||||
#endif
|
||||
|
||||
int gettimeofday(struct timeval *tv, void *tz){
|
||||
|
||||
FILETIME ft;
|
||||
unsigned __int64 tmpres = 0;
|
||||
static int tzflag;
|
||||
|
||||
|
||||
if (NULL != tv)
|
||||
{
|
||||
GetSystemTimeAsFileTime(&ft);
|
||||
|
||||
|
||||
tmpres |= ft.dwHighDateTime;
|
||||
tmpres <<= 32;
|
||||
tmpres |= ft.dwLowDateTime;
|
||||
|
||||
|
||||
/*converting file time to unix epoch*/
|
||||
tmpres /= 10; /*convert into microseconds*/
|
||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
||||
}
|
||||
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
static __inline double getmflops(int ratio, int m, double secs){
|
||||
|
||||
|
||||
double mm = (double)m;
|
||||
double mulflops, addflops;
|
||||
|
||||
@@ -117,9 +124,13 @@ static __inline double getmflops(int ratio, int m, double secs){
|
||||
}
|
||||
|
||||
|
||||
int MAIN__(int argc, char *argv[]){
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
#ifndef COMPLEX
|
||||
char *trans[] = {"T", "N"};
|
||||
#else
|
||||
char *trans[] = {"C", "N"};
|
||||
#endif
|
||||
char *uplo[] = {"U", "L"};
|
||||
FLOAT alpha[] = {1.0, 0.0};
|
||||
FLOAT beta [] = {0.0, 0.0};
|
||||
@@ -137,7 +148,7 @@ int MAIN__(int argc, char *argv[]){
|
||||
struct timeval start, stop;
|
||||
double time1;
|
||||
|
||||
argc--;argv++;
|
||||
argc--;argv++;
|
||||
|
||||
if (argc > 0) { from = atol(*argv); argc--; argv++;}
|
||||
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
|
||||
@@ -148,17 +159,17 @@ int MAIN__(int argc, char *argv[]){
|
||||
if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
|
||||
if (( b = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
|
||||
for(m = from; m <= to; m += step){
|
||||
|
||||
|
||||
fprintf(stderr, "M = %6d : ", (int)m);
|
||||
|
||||
|
||||
for (uplos = 0; uplos < 2; uplos ++) {
|
||||
|
||||
|
||||
#ifndef COMPLEX
|
||||
if (uplos & 1) {
|
||||
for (j = 0; j < m; j++) {
|
||||
@@ -219,11 +230,11 @@ int MAIN__(int argc, char *argv[]){
|
||||
fprintf(stderr, "Info = %d\n", info);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
|
||||
maxerr = 0.;
|
||||
|
||||
|
||||
if (!(uplos & 1)) {
|
||||
for (j = 0; j < m; j++) {
|
||||
for(i = 0; i <= j; i++) {
|
||||
@@ -247,8 +258,8 @@ int MAIN__(int argc, char *argv[]){
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fprintf(stderr,
|
||||
|
||||
fprintf(stderr,
|
||||
#ifdef XDOUBLE
|
||||
" %Le %10.3f MFlops", maxerr,
|
||||
#else
|
||||
@@ -269,4 +280,4 @@ int MAIN__(int argc, char *argv[]){
|
||||
return 0;
|
||||
}
|
||||
|
||||
void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
|
||||
201
benchmark/copy.c
Normal file
201
benchmark/copy.c
Normal file
@@ -0,0 +1,201 @@
|
||||
/***************************************************************************
|
||||
Copyright (c) 2014, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#ifdef __CYGWIN32__
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
#include "common.h"
|
||||
|
||||
|
||||
#undef COPY
|
||||
|
||||
#ifdef COMPLEX
|
||||
#ifdef DOUBLE
|
||||
#define COPY BLASFUNC(zcopy)
|
||||
#else
|
||||
#define COPY BLASFUNC(ccopy)
|
||||
#endif
|
||||
#else
|
||||
#ifdef DOUBLE
|
||||
#define COPY BLASFUNC(dcopy)
|
||||
#else
|
||||
#define COPY BLASFUNC(scopy)
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(__WIN32__) || defined(__WIN64__)
|
||||
|
||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
||||
#endif
|
||||
|
||||
int gettimeofday(struct timeval *tv, void *tz){
|
||||
|
||||
FILETIME ft;
|
||||
unsigned __int64 tmpres = 0;
|
||||
static int tzflag;
|
||||
|
||||
if (NULL != tv)
|
||||
{
|
||||
GetSystemTimeAsFileTime(&ft);
|
||||
|
||||
tmpres |= ft.dwHighDateTime;
|
||||
tmpres <<= 32;
|
||||
tmpres |= ft.dwLowDateTime;
|
||||
|
||||
/*converting file time to unix epoch*/
|
||||
tmpres /= 10; /*convert into microseconds*/
|
||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
||||
|
||||
static void *huge_malloc(BLASLONG size){
|
||||
int shmid;
|
||||
void *address;
|
||||
|
||||
#ifndef SHM_HUGETLB
|
||||
#define SHM_HUGETLB 04000
|
||||
#endif
|
||||
|
||||
if ((shmid =shmget(IPC_PRIVATE,
|
||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
||||
printf( "Memory allocation failed(shmget).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
address = shmat(shmid, NULL, SHM_RND);
|
||||
|
||||
if ((BLASLONG)address == -1){
|
||||
printf( "Memory allocation failed(shmat).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
shmctl(shmid, IPC_RMID, 0);
|
||||
|
||||
return address;
|
||||
}
|
||||
|
||||
#define malloc huge_malloc
|
||||
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *x, *y;
|
||||
FLOAT alpha[2] = { 2.0, 2.0 };
|
||||
blasint m, i;
|
||||
blasint inc_x=1,inc_y=1;
|
||||
int loops = 1;
|
||||
int l;
|
||||
char *p;
|
||||
|
||||
int from = 1;
|
||||
int to = 200;
|
||||
int step = 1;
|
||||
|
||||
struct timeval start, stop;
|
||||
double time1,timeg;
|
||||
|
||||
argc--;argv++;
|
||||
|
||||
if (argc > 0) { from = atol(*argv); argc--; argv++;}
|
||||
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
|
||||
if (argc > 0) { step = atol(*argv); argc--; argv++;}
|
||||
|
||||
if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p);
|
||||
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p);
|
||||
if ((p = getenv("OPENBLAS_INCY"))) inc_y = atoi(p);
|
||||
|
||||
fprintf(stderr, "From : %3d To : %3d Step = %3d Inc_x = %d Inc_y = %d Loops = %d\n", from, to, step,inc_x,inc_y,loops);
|
||||
|
||||
if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
if (( y = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_y) * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
#ifdef linux
|
||||
srandom(getpid());
|
||||
#endif
|
||||
|
||||
fprintf(stderr, " SIZE Flops\n");
|
||||
|
||||
for(m = from; m <= to; m += step)
|
||||
{
|
||||
|
||||
timeg=0;
|
||||
|
||||
fprintf(stderr, " %6d : ", (int)m);
|
||||
|
||||
|
||||
for (l=0; l<loops; l++)
|
||||
{
|
||||
|
||||
for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){
|
||||
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
|
||||
for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){
|
||||
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
COPY (&m, x, &inc_x, y, &inc_y );
|
||||
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
|
||||
timeg += time1;
|
||||
|
||||
}
|
||||
|
||||
timeg /= loops;
|
||||
|
||||
fprintf(stderr,
|
||||
" %10.2f MBytes\n",
|
||||
COMPSIZE * sizeof(FLOAT) * 1. * (double)m / timeg * 1.e-6);
|
||||
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
195
benchmark/dot.c
Normal file
195
benchmark/dot.c
Normal file
@@ -0,0 +1,195 @@
|
||||
/***************************************************************************
|
||||
Copyright (c) 2014, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#ifdef __CYGWIN32__
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
#include "common.h"
|
||||
|
||||
|
||||
#undef DOT
|
||||
|
||||
|
||||
#ifdef DOUBLE
|
||||
#define DOT BLASFUNC(ddot)
|
||||
#else
|
||||
#define DOT BLASFUNC(sdot)
|
||||
#endif
|
||||
|
||||
|
||||
#if defined(__WIN32__) || defined(__WIN64__)
|
||||
|
||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
||||
#endif
|
||||
|
||||
int gettimeofday(struct timeval *tv, void *tz){
|
||||
|
||||
FILETIME ft;
|
||||
unsigned __int64 tmpres = 0;
|
||||
static int tzflag;
|
||||
|
||||
if (NULL != tv)
|
||||
{
|
||||
GetSystemTimeAsFileTime(&ft);
|
||||
|
||||
tmpres |= ft.dwHighDateTime;
|
||||
tmpres <<= 32;
|
||||
tmpres |= ft.dwLowDateTime;
|
||||
|
||||
/*converting file time to unix epoch*/
|
||||
tmpres /= 10; /*convert into microseconds*/
|
||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
||||
|
||||
static void *huge_malloc(BLASLONG size){
|
||||
int shmid;
|
||||
void *address;
|
||||
|
||||
#ifndef SHM_HUGETLB
|
||||
#define SHM_HUGETLB 04000
|
||||
#endif
|
||||
|
||||
if ((shmid =shmget(IPC_PRIVATE,
|
||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
||||
printf( "Memory allocation failed(shmget).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
address = shmat(shmid, NULL, SHM_RND);
|
||||
|
||||
if ((BLASLONG)address == -1){
|
||||
printf( "Memory allocation failed(shmat).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
shmctl(shmid, IPC_RMID, 0);
|
||||
|
||||
return address;
|
||||
}
|
||||
|
||||
#define malloc huge_malloc
|
||||
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *x, *y;
|
||||
FLOAT result;
|
||||
blasint m, i;
|
||||
blasint inc_x=1,inc_y=1;
|
||||
int loops = 1;
|
||||
int l;
|
||||
char *p;
|
||||
|
||||
int from = 1;
|
||||
int to = 200;
|
||||
int step = 1;
|
||||
|
||||
struct timeval start, stop;
|
||||
double time1,timeg;
|
||||
|
||||
argc--;argv++;
|
||||
|
||||
if (argc > 0) { from = atol(*argv); argc--; argv++;}
|
||||
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
|
||||
if (argc > 0) { step = atol(*argv); argc--; argv++;}
|
||||
|
||||
if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p);
|
||||
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p);
|
||||
if ((p = getenv("OPENBLAS_INCY"))) inc_y = atoi(p);
|
||||
|
||||
fprintf(stderr, "From : %3d To : %3d Step = %3d Inc_x = %d Inc_y = %d Loops = %d\n", from, to, step,inc_x,inc_y,loops);
|
||||
|
||||
if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
if (( y = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_y) * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
#ifdef linux
|
||||
srandom(getpid());
|
||||
#endif
|
||||
|
||||
fprintf(stderr, " SIZE Flops\n");
|
||||
|
||||
for(m = from; m <= to; m += step)
|
||||
{
|
||||
|
||||
timeg=0;
|
||||
|
||||
fprintf(stderr, " %6d : ", (int)m);
|
||||
|
||||
|
||||
for (l=0; l<loops; l++)
|
||||
{
|
||||
|
||||
for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){
|
||||
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
|
||||
for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){
|
||||
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
result = DOT (&m, x, &inc_x, y, &inc_y );
|
||||
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
|
||||
timeg += time1;
|
||||
|
||||
}
|
||||
|
||||
timeg /= loops;
|
||||
|
||||
fprintf(stderr,
|
||||
" %10.2f MFlops\n",
|
||||
COMPSIZE * COMPSIZE * 2. * (double)m / timeg * 1.e-6);
|
||||
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
261
benchmark/geev.c
Normal file
261
benchmark/geev.c
Normal file
@@ -0,0 +1,261 @@
|
||||
/*********************************************************************/
|
||||
/* Copyright 2009, 2010 The University of Texas at Austin. */
|
||||
/* All rights reserved. */
|
||||
/* */
|
||||
/* Redistribution and use in source and binary forms, with or */
|
||||
/* without modification, are permitted provided that the following */
|
||||
/* conditions are met: */
|
||||
/* */
|
||||
/* 1. Redistributions of source code must retain the above */
|
||||
/* copyright notice, this list of conditions and the following */
|
||||
/* disclaimer. */
|
||||
/* */
|
||||
/* 2. Redistributions in binary form must reproduce the above */
|
||||
/* copyright notice, this list of conditions and the following */
|
||||
/* disclaimer in the documentation and/or other materials */
|
||||
/* provided with the distribution. */
|
||||
/* */
|
||||
/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
|
||||
/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
|
||||
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
|
||||
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
|
||||
/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
|
||||
/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
|
||||
/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
|
||||
/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
|
||||
/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
|
||||
/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
|
||||
/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
|
||||
/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
|
||||
/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
|
||||
/* POSSIBILITY OF SUCH DAMAGE. */
|
||||
/* */
|
||||
/* The views and conclusions contained in the software and */
|
||||
/* documentation are those of the authors and should not be */
|
||||
/* interpreted as representing official policies, either expressed */
|
||||
/* or implied, of The University of Texas at Austin. */
|
||||
/*********************************************************************/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#ifdef __CYGWIN32__
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
#include "common.h"
|
||||
|
||||
|
||||
#undef GEEV
|
||||
|
||||
#ifndef COMPLEX
|
||||
#ifdef XDOUBLE
|
||||
#define GEEV BLASFUNC(qgeev)
|
||||
#elif defined(DOUBLE)
|
||||
#define GEEV BLASFUNC(dgeev)
|
||||
#else
|
||||
#define GEEV BLASFUNC(sgeev)
|
||||
#endif
|
||||
#else
|
||||
#ifdef XDOUBLE
|
||||
#define GEEV BLASFUNC(xgeev)
|
||||
#elif defined(DOUBLE)
|
||||
#define GEEV BLASFUNC(zgeev)
|
||||
#else
|
||||
#define GEEV BLASFUNC(cgeev)
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifndef COMPLEX
|
||||
extern void GEEV( char* jobvl, char* jobvr, blasint* n, FLOAT* a,
|
||||
blasint* lda, FLOAT* wr, FLOAT* wi, FLOAT* vl, blasint* ldvl,
|
||||
FLOAT* vr, blasint* ldvr, FLOAT* work, blasint* lwork, blasint* info );
|
||||
#else
|
||||
extern void GEEV( char* jobvl, char* jobvr, blasint* n, FLOAT* a,
|
||||
blasint* lda, FLOAT* wr, FLOAT* vl, blasint* ldvl,
|
||||
FLOAT* vr, blasint* ldvr, FLOAT* work, blasint* lwork, FLOAT *rwork, blasint* info );
|
||||
#endif
|
||||
|
||||
#if defined(__WIN32__) || defined(__WIN64__)
|
||||
|
||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
||||
#endif
|
||||
|
||||
int gettimeofday(struct timeval *tv, void *tz){
|
||||
|
||||
FILETIME ft;
|
||||
unsigned __int64 tmpres = 0;
|
||||
static int tzflag;
|
||||
|
||||
if (NULL != tv)
|
||||
{
|
||||
GetSystemTimeAsFileTime(&ft);
|
||||
|
||||
tmpres |= ft.dwHighDateTime;
|
||||
tmpres <<= 32;
|
||||
tmpres |= ft.dwLowDateTime;
|
||||
|
||||
/*converting file time to unix epoch*/
|
||||
tmpres /= 10; /*convert into microseconds*/
|
||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
||||
|
||||
static void *huge_malloc(BLASLONG size){
|
||||
int shmid;
|
||||
void *address;
|
||||
|
||||
#ifndef SHM_HUGETLB
|
||||
#define SHM_HUGETLB 04000
|
||||
#endif
|
||||
|
||||
if ((shmid =shmget(IPC_PRIVATE,
|
||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
||||
printf( "Memory allocation failed(shmget).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
address = shmat(shmid, NULL, SHM_RND);
|
||||
|
||||
if ((BLASLONG)address == -1){
|
||||
printf( "Memory allocation failed(shmat).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
shmctl(shmid, IPC_RMID, 0);
|
||||
|
||||
return address;
|
||||
}
|
||||
|
||||
#define malloc huge_malloc
|
||||
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *a,*vl,*vr,*wi,*wr,*work,*rwork;
|
||||
FLOAT wkopt[4];
|
||||
char job='V';
|
||||
char jobr='N';
|
||||
char *p;
|
||||
|
||||
blasint m, i, j, info,lwork;
|
||||
double factor = 26.33;
|
||||
|
||||
int from = 1;
|
||||
int to = 200;
|
||||
int step = 1;
|
||||
|
||||
struct timeval start, stop;
|
||||
double time1;
|
||||
|
||||
argc--;argv++;
|
||||
|
||||
if (argc > 0) { from = atol(*argv); argc--; argv++;}
|
||||
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
|
||||
if (argc > 0) { step = atol(*argv); argc--; argv++;}
|
||||
|
||||
if ((p = getenv("OPENBLAS_JOB"))) job=*p;
|
||||
|
||||
if ( job == 'N' ) factor = 10.0;
|
||||
|
||||
fprintf(stderr, "From : %3d To : %3d Step = %3d Job=%c\n", from, to, step,job);
|
||||
|
||||
if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
if (( vl = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
if (( vr = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
if (( wr = (FLOAT *)malloc(sizeof(FLOAT) * to * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
if (( wi = (FLOAT *)malloc(sizeof(FLOAT) * to * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
if (( rwork = (FLOAT *)malloc(sizeof(FLOAT) * to * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
for(j = 0; j < to; j++){
|
||||
for(i = 0; i < to * COMPSIZE; i++){
|
||||
a[i + j * to * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
lwork = -1;
|
||||
m=to;
|
||||
#ifndef COMPLEX
|
||||
GEEV (&job, &jobr, &m, a, &m, wr, wi, vl, &m, vr, &m, wkopt, &lwork, &info);
|
||||
#else
|
||||
GEEV (&job, &jobr, &m, a, &m, wr, vl, &m, vr, &m, wkopt, &lwork,rwork, &info);
|
||||
#endif
|
||||
|
||||
lwork = (blasint)wkopt[0];
|
||||
if (( work = (FLOAT *)malloc(sizeof(FLOAT) * lwork * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
|
||||
#ifdef linux
|
||||
srandom(getpid());
|
||||
#endif
|
||||
|
||||
fprintf(stderr, " SIZE FLops Time Lwork\n");
|
||||
|
||||
for(m = from; m <= to; m += step){
|
||||
|
||||
fprintf(stderr, " %6d : ", (int)m);
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
lwork = -1;
|
||||
#ifndef COMPLEX
|
||||
GEEV (&job, &jobr, &m, a, &m, wr, wi, vl, &m, vr, &m, wkopt, &lwork, &info);
|
||||
#else
|
||||
GEEV (&job, &jobr, &m, a, &m, wr, vl, &m, vr, &m, wkopt, &lwork,rwork, &info);
|
||||
#endif
|
||||
|
||||
lwork = (blasint)wkopt[0];
|
||||
#ifndef COMPLEX
|
||||
GEEV (&job, &jobr, &m, a, &m, wr, wi, vl, &m, vr, &m, work, &lwork, &info);
|
||||
#else
|
||||
GEEV (&job, &jobr, &m, a, &m, wr, vl, &m, vr, &m, work, &lwork,rwork, &info);
|
||||
#endif
|
||||
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
|
||||
if (info) {
|
||||
fprintf(stderr, "failed to compute eigenvalues .. %d\n", info);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
|
||||
fprintf(stderr,
|
||||
" %10.2f MFlops : %10.2f Sec : %d\n",
|
||||
COMPSIZE * COMPSIZE * factor * (double)m * (double)m * (double)m / time1 * 1.e-6,time1,lwork);
|
||||
|
||||
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
223
benchmark/gemm.c
Normal file
223
benchmark/gemm.c
Normal file
@@ -0,0 +1,223 @@
|
||||
/***************************************************************************
|
||||
Copyright (c) 2014, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#ifdef __CYGWIN32__
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
#include "common.h"
|
||||
|
||||
|
||||
#undef GEMM
|
||||
|
||||
#ifndef COMPLEX
|
||||
|
||||
#ifdef DOUBLE
|
||||
#define GEMM BLASFUNC(dgemm)
|
||||
#else
|
||||
#define GEMM BLASFUNC(sgemm)
|
||||
#endif
|
||||
|
||||
#else
|
||||
|
||||
#ifdef DOUBLE
|
||||
#define GEMM BLASFUNC(zgemm)
|
||||
#else
|
||||
#define GEMM BLASFUNC(cgemm)
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(__WIN32__) || defined(__WIN64__)
|
||||
|
||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
||||
#endif
|
||||
|
||||
int gettimeofday(struct timeval *tv, void *tz){
|
||||
|
||||
FILETIME ft;
|
||||
unsigned __int64 tmpres = 0;
|
||||
static int tzflag;
|
||||
|
||||
if (NULL != tv)
|
||||
{
|
||||
GetSystemTimeAsFileTime(&ft);
|
||||
|
||||
tmpres |= ft.dwHighDateTime;
|
||||
tmpres <<= 32;
|
||||
tmpres |= ft.dwLowDateTime;
|
||||
|
||||
/*converting file time to unix epoch*/
|
||||
tmpres /= 10; /*convert into microseconds*/
|
||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
||||
|
||||
static void *huge_malloc(BLASLONG size){
|
||||
int shmid;
|
||||
void *address;
|
||||
|
||||
#ifndef SHM_HUGETLB
|
||||
#define SHM_HUGETLB 04000
|
||||
#endif
|
||||
|
||||
if ((shmid =shmget(IPC_PRIVATE,
|
||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
||||
printf( "Memory allocation failed(shmget).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
address = shmat(shmid, NULL, SHM_RND);
|
||||
|
||||
if ((BLASLONG)address == -1){
|
||||
printf( "Memory allocation failed(shmat).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
shmctl(shmid, IPC_RMID, 0);
|
||||
|
||||
return address;
|
||||
}
|
||||
|
||||
#define malloc huge_malloc
|
||||
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *a, *b, *c;
|
||||
FLOAT alpha[] = {1.0, 1.0};
|
||||
FLOAT beta [] = {0.0, 0.0};
|
||||
char trans='N';
|
||||
blasint m, n, i, j;
|
||||
int loops = 1;
|
||||
int has_param_n=0;
|
||||
int l;
|
||||
char *p;
|
||||
|
||||
int from = 1;
|
||||
int to = 200;
|
||||
int step = 1;
|
||||
|
||||
struct timeval start, stop;
|
||||
double time1,timeg;
|
||||
|
||||
argc--;argv++;
|
||||
|
||||
if (argc > 0) { from = atol(*argv); argc--; argv++;}
|
||||
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
|
||||
if (argc > 0) { step = atol(*argv); argc--; argv++;}
|
||||
|
||||
if ((p = getenv("OPENBLAS_TRANS"))) trans=*p;
|
||||
|
||||
fprintf(stderr, "From : %3d To : %3d Step=%d : Trans=%c\n", from, to, step, trans);
|
||||
|
||||
if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
if (( b = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
if (( c = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
p = getenv("OPENBLAS_LOOPS");
|
||||
if ( p != NULL )
|
||||
loops = atoi(p);
|
||||
|
||||
if ((p = getenv("OPENBLAS_PARAM_N"))) {
|
||||
n = atoi(p);
|
||||
has_param_n=1;
|
||||
}
|
||||
|
||||
#ifdef linux
|
||||
srandom(getpid());
|
||||
#endif
|
||||
|
||||
for(j = 0; j < m; j++){
|
||||
for(i = 0; i < to * COMPSIZE; i++){
|
||||
a[i + j * to * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
b[i + j * to * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
c[i + j * to * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
fprintf(stderr, " SIZE Flops Time\n");
|
||||
|
||||
for(m = from; m <= to; m += step)
|
||||
{
|
||||
|
||||
timeg=0;
|
||||
|
||||
if ( has_param_n == 1 && n <= m )
|
||||
n=n;
|
||||
else
|
||||
n=m;
|
||||
|
||||
|
||||
|
||||
fprintf(stderr, " %6dx%d : ", (int)m, (int)n);
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
for (l=0; l<loops; l++)
|
||||
{
|
||||
|
||||
GEMM (&trans, &trans, &m, &n, &m, alpha, a, &m, b, &m, beta, c, &m );
|
||||
|
||||
|
||||
|
||||
}
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
|
||||
timeg = time1/loops;
|
||||
fprintf(stderr,
|
||||
" %10.2f MFlops %10.6f sec\n",
|
||||
COMPSIZE * COMPSIZE * 2. * (double)m * (double)m * (double)n / timeg * 1.e-6, time1);
|
||||
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
212
benchmark/gemm3m.c
Normal file
212
benchmark/gemm3m.c
Normal file
@@ -0,0 +1,212 @@
|
||||
/***************************************************************************
|
||||
Copyright (c) 2014, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#ifdef __CYGWIN32__
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
#include "common.h"
|
||||
|
||||
|
||||
#undef GEMM
|
||||
|
||||
#ifndef COMPLEX
|
||||
|
||||
#ifdef DOUBLE
|
||||
#define GEMM BLASFUNC(dgemm)
|
||||
#else
|
||||
#define GEMM BLASFUNC(sgemm)
|
||||
#endif
|
||||
|
||||
#else
|
||||
|
||||
#ifdef DOUBLE
|
||||
#define GEMM BLASFUNC(zgemm3m)
|
||||
#else
|
||||
#define GEMM BLASFUNC(cgemm3m)
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(__WIN32__) || defined(__WIN64__)
|
||||
|
||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
||||
#endif
|
||||
|
||||
int gettimeofday(struct timeval *tv, void *tz){
|
||||
|
||||
FILETIME ft;
|
||||
unsigned __int64 tmpres = 0;
|
||||
static int tzflag;
|
||||
|
||||
if (NULL != tv)
|
||||
{
|
||||
GetSystemTimeAsFileTime(&ft);
|
||||
|
||||
tmpres |= ft.dwHighDateTime;
|
||||
tmpres <<= 32;
|
||||
tmpres |= ft.dwLowDateTime;
|
||||
|
||||
/*converting file time to unix epoch*/
|
||||
tmpres /= 10; /*convert into microseconds*/
|
||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
||||
|
||||
static void *huge_malloc(BLASLONG size){
|
||||
int shmid;
|
||||
void *address;
|
||||
|
||||
#ifndef SHM_HUGETLB
|
||||
#define SHM_HUGETLB 04000
|
||||
#endif
|
||||
|
||||
if ((shmid =shmget(IPC_PRIVATE,
|
||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
||||
printf( "Memory allocation failed(shmget).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
address = shmat(shmid, NULL, SHM_RND);
|
||||
|
||||
if ((BLASLONG)address == -1){
|
||||
printf( "Memory allocation failed(shmat).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
shmctl(shmid, IPC_RMID, 0);
|
||||
|
||||
return address;
|
||||
}
|
||||
|
||||
#define malloc huge_malloc
|
||||
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *a, *b, *c;
|
||||
FLOAT alpha[] = {1.0, 1.0};
|
||||
FLOAT beta [] = {1.0, 1.0};
|
||||
char trans='N';
|
||||
blasint m, i, j;
|
||||
int loops = 1;
|
||||
int l;
|
||||
char *p;
|
||||
|
||||
int from = 1;
|
||||
int to = 200;
|
||||
int step = 1;
|
||||
|
||||
struct timeval start, stop;
|
||||
double time1,timeg;
|
||||
|
||||
argc--;argv++;
|
||||
|
||||
if (argc > 0) { from = atol(*argv); argc--; argv++;}
|
||||
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
|
||||
if (argc > 0) { step = atol(*argv); argc--; argv++;}
|
||||
|
||||
if ((p = getenv("OPENBLAS_TRANS"))) trans=*p;
|
||||
|
||||
fprintf(stderr, "From : %3d To : %3d Step=%d : Trans=%c\n", from, to, step, trans);
|
||||
|
||||
if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
if (( b = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
if (( c = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
p = getenv("OPENBLAS_LOOPS");
|
||||
if ( p != NULL )
|
||||
loops = atoi(p);
|
||||
|
||||
|
||||
#ifdef linux
|
||||
srandom(getpid());
|
||||
#endif
|
||||
|
||||
fprintf(stderr, " SIZE Flops\n");
|
||||
|
||||
for(m = from; m <= to; m += step)
|
||||
{
|
||||
|
||||
timeg=0;
|
||||
|
||||
fprintf(stderr, " %6d : ", (int)m);
|
||||
|
||||
for (l=0; l<loops; l++)
|
||||
{
|
||||
|
||||
for(j = 0; j < m; j++){
|
||||
for(i = 0; i < m * COMPSIZE; i++){
|
||||
a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
b[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
c[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
}
|
||||
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
GEMM (&trans, &trans, &m, &m, &m, alpha, a, &m, b, &m, beta, c, &m );
|
||||
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
|
||||
timeg += time1;
|
||||
|
||||
}
|
||||
|
||||
timeg /= loops;
|
||||
fprintf(stderr,
|
||||
" %10.2f MFlops\n",
|
||||
COMPSIZE * COMPSIZE * 2. * (double)m * (double)m * (double)m / timeg * 1.e-6);
|
||||
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
269
benchmark/gemv.c
Normal file
269
benchmark/gemv.c
Normal file
@@ -0,0 +1,269 @@
|
||||
/***************************************************************************
|
||||
Copyright (c) 2014, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#ifdef __CYGWIN32__
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
#include "common.h"
|
||||
|
||||
|
||||
#undef GEMV
|
||||
|
||||
#ifndef COMPLEX
|
||||
|
||||
#ifdef DOUBLE
|
||||
#define GEMV BLASFUNC(dgemv)
|
||||
#else
|
||||
#define GEMV BLASFUNC(sgemv)
|
||||
#endif
|
||||
|
||||
#else
|
||||
|
||||
#ifdef DOUBLE
|
||||
#define GEMV BLASFUNC(zgemv)
|
||||
#else
|
||||
#define GEMV BLASFUNC(cgemv)
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(__WIN32__) || defined(__WIN64__)
|
||||
|
||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
||||
#endif
|
||||
|
||||
int gettimeofday(struct timeval *tv, void *tz){
|
||||
|
||||
FILETIME ft;
|
||||
unsigned __int64 tmpres = 0;
|
||||
static int tzflag;
|
||||
|
||||
if (NULL != tv)
|
||||
{
|
||||
GetSystemTimeAsFileTime(&ft);
|
||||
|
||||
tmpres |= ft.dwHighDateTime;
|
||||
tmpres <<= 32;
|
||||
tmpres |= ft.dwLowDateTime;
|
||||
|
||||
/*converting file time to unix epoch*/
|
||||
tmpres /= 10; /*convert into microseconds*/
|
||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
||||
|
||||
static void *huge_malloc(BLASLONG size){
|
||||
int shmid;
|
||||
void *address;
|
||||
|
||||
#ifndef SHM_HUGETLB
|
||||
#define SHM_HUGETLB 04000
|
||||
#endif
|
||||
|
||||
if ((shmid =shmget(IPC_PRIVATE,
|
||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
||||
printf( "Memory allocation failed(shmget).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
address = shmat(shmid, NULL, SHM_RND);
|
||||
|
||||
if ((BLASLONG)address == -1){
|
||||
printf( "Memory allocation failed(shmat).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
shmctl(shmid, IPC_RMID, 0);
|
||||
|
||||
return address;
|
||||
}
|
||||
|
||||
#define malloc huge_malloc
|
||||
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *a, *x, *y;
|
||||
FLOAT alpha[] = {1.0, 1.0};
|
||||
FLOAT beta [] = {1.0, 1.0};
|
||||
char trans='N';
|
||||
blasint m, i, j;
|
||||
blasint inc_x=1,inc_y=1;
|
||||
blasint n=0;
|
||||
int has_param_n = 0;
|
||||
int has_param_m = 0;
|
||||
int loops = 1;
|
||||
int l;
|
||||
char *p;
|
||||
|
||||
int from = 1;
|
||||
int to = 200;
|
||||
int step = 1;
|
||||
|
||||
struct timeval start, stop;
|
||||
double time1,timeg;
|
||||
|
||||
argc--;argv++;
|
||||
|
||||
if (argc > 0) { from = atol(*argv); argc--; argv++;}
|
||||
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
|
||||
if (argc > 0) { step = atol(*argv); argc--; argv++;}
|
||||
|
||||
|
||||
int tomax = to;
|
||||
|
||||
if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p);
|
||||
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p);
|
||||
if ((p = getenv("OPENBLAS_INCY"))) inc_y = atoi(p);
|
||||
if ((p = getenv("OPENBLAS_TRANS"))) trans=*p;
|
||||
if ((p = getenv("OPENBLAS_PARAM_N"))) {
|
||||
n = atoi(p);
|
||||
if ((n>0)) has_param_n = 1;
|
||||
if ( n > tomax ) tomax = n;
|
||||
}
|
||||
if ( has_param_n == 0 )
|
||||
if ((p = getenv("OPENBLAS_PARAM_M"))) {
|
||||
m = atoi(p);
|
||||
if ((m>0)) has_param_m = 1;
|
||||
if ( m > tomax ) tomax = m;
|
||||
}
|
||||
|
||||
|
||||
|
||||
fprintf(stderr, "From : %3d To : %3d Step = %3d Trans = '%c' Inc_x = %d Inc_y = %d Loops = %d\n", from, to, step,trans,inc_x,inc_y,loops);
|
||||
|
||||
if (( a = (FLOAT *)malloc(sizeof(FLOAT) * tomax * tomax * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
if (( x = (FLOAT *)malloc(sizeof(FLOAT) * tomax * abs(inc_x) * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
if (( y = (FLOAT *)malloc(sizeof(FLOAT) * tomax * abs(inc_y) * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
#ifdef linux
|
||||
srandom(getpid());
|
||||
#endif
|
||||
|
||||
fprintf(stderr, " SIZE Flops\n");
|
||||
|
||||
if (has_param_m == 0)
|
||||
{
|
||||
|
||||
for(m = from; m <= to; m += step)
|
||||
{
|
||||
timeg=0;
|
||||
if ( has_param_n == 0 ) n = m;
|
||||
fprintf(stderr, " %6dx%d : ", (int)m,(int)n);
|
||||
for(j = 0; j < m; j++){
|
||||
for(i = 0; i < n * COMPSIZE; i++){
|
||||
a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
}
|
||||
|
||||
for (l=0; l<loops; l++)
|
||||
{
|
||||
|
||||
for(i = 0; i < n * COMPSIZE * abs(inc_x); i++){
|
||||
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
|
||||
for(i = 0; i < n * COMPSIZE * abs(inc_y); i++){
|
||||
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
GEMV (&trans, &m, &n, alpha, a, &m, x, &inc_x, beta, y, &inc_y );
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
timeg += time1;
|
||||
|
||||
}
|
||||
|
||||
timeg /= loops;
|
||||
|
||||
fprintf(stderr, " %10.2f MFlops\n", COMPSIZE * COMPSIZE * 2. * (double)m * (double)n / timeg * 1.e-6);
|
||||
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
|
||||
for(n = from; n <= to; n += step)
|
||||
{
|
||||
timeg=0;
|
||||
fprintf(stderr, " %6dx%d : ", (int)m,(int)n);
|
||||
for(j = 0; j < m; j++){
|
||||
for(i = 0; i < n * COMPSIZE; i++){
|
||||
a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
}
|
||||
|
||||
for (l=0; l<loops; l++)
|
||||
{
|
||||
|
||||
for(i = 0; i < n * COMPSIZE * abs(inc_x); i++){
|
||||
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
|
||||
for(i = 0; i < n * COMPSIZE * abs(inc_y); i++){
|
||||
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
GEMV (&trans, &m, &n, alpha, a, &m, x, &inc_x, beta, y, &inc_y );
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
timeg += time1;
|
||||
|
||||
}
|
||||
|
||||
timeg /= loops;
|
||||
|
||||
fprintf(stderr, " %10.2f MFlops\n", COMPSIZE * COMPSIZE * 2. * (double)m * (double)n / timeg * 1.e-6);
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
225
benchmark/ger.c
Normal file
225
benchmark/ger.c
Normal file
@@ -0,0 +1,225 @@
|
||||
/***************************************************************************
|
||||
Copyright (c) 2014, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#ifdef __CYGWIN32__
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
#include "common.h"
|
||||
|
||||
|
||||
#undef GER
|
||||
|
||||
#ifdef COMPLEX
|
||||
#ifdef DOUBLE
|
||||
#define GER BLASFUNC(zgeru)
|
||||
#else
|
||||
#define GER BLASFUNC(cgeru)
|
||||
#endif
|
||||
#else
|
||||
#ifdef DOUBLE
|
||||
#define GER BLASFUNC(dger)
|
||||
#else
|
||||
#define GER BLASFUNC(sger)
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
||||
#if defined(__WIN32__) || defined(__WIN64__)
|
||||
|
||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
||||
#endif
|
||||
|
||||
int gettimeofday(struct timeval *tv, void *tz){
|
||||
|
||||
FILETIME ft;
|
||||
unsigned __int64 tmpres = 0;
|
||||
static int tzflag;
|
||||
|
||||
if (NULL != tv)
|
||||
{
|
||||
GetSystemTimeAsFileTime(&ft);
|
||||
|
||||
tmpres |= ft.dwHighDateTime;
|
||||
tmpres <<= 32;
|
||||
tmpres |= ft.dwLowDateTime;
|
||||
|
||||
/*converting file time to unix epoch*/
|
||||
tmpres /= 10; /*convert into microseconds*/
|
||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
||||
|
||||
static void *huge_malloc(BLASLONG size){
|
||||
int shmid;
|
||||
void *address;
|
||||
|
||||
#ifndef SHM_HUGETLB
|
||||
#define SHM_HUGETLB 04000
|
||||
#endif
|
||||
|
||||
if ((shmid =shmget(IPC_PRIVATE,
|
||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
||||
printf( "Memory allocation failed(shmget).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
address = shmat(shmid, NULL, SHM_RND);
|
||||
|
||||
if ((BLASLONG)address == -1){
|
||||
printf( "Memory allocation failed(shmat).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
shmctl(shmid, IPC_RMID, 0);
|
||||
|
||||
return address;
|
||||
}
|
||||
|
||||
#define malloc huge_malloc
|
||||
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *a, *x, *y;
|
||||
FLOAT alpha[] = {1.0, 1.0};
|
||||
blasint m, i, j;
|
||||
blasint inc_x=1,inc_y=1;
|
||||
blasint n=0;
|
||||
int has_param_n = 0;
|
||||
int loops = 1;
|
||||
int l;
|
||||
char *p;
|
||||
|
||||
int from = 1;
|
||||
int to = 200;
|
||||
int step = 1;
|
||||
|
||||
struct timeval start, stop;
|
||||
double time1,timeg;
|
||||
|
||||
argc--;argv++;
|
||||
|
||||
if (argc > 0) { from = atol(*argv); argc--; argv++;}
|
||||
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
|
||||
if (argc > 0) { step = atol(*argv); argc--; argv++;}
|
||||
|
||||
if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p);
|
||||
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p);
|
||||
if ((p = getenv("OPENBLAS_INCY"))) inc_y = atoi(p);
|
||||
if ((p = getenv("OPENBLAS_PARAM_N"))) {
|
||||
n = atoi(p);
|
||||
if ((n>0) && (n<=to)) has_param_n = 1;
|
||||
}
|
||||
|
||||
if ( has_param_n == 1 )
|
||||
fprintf(stderr, "From : %3d To : %3d Step = %3d N = %d Inc_x = %d Inc_y = %d Loops = %d\n", from, to, step,n,inc_x,inc_y,loops);
|
||||
else
|
||||
fprintf(stderr, "From : %3d To : %3d Step = %3d Inc_x = %d Inc_y = %d Loops = %d\n", from, to, step,inc_x,inc_y,loops);
|
||||
|
||||
if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
if (( y = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_y) * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
#ifdef linux
|
||||
srandom(getpid());
|
||||
#endif
|
||||
|
||||
fprintf(stderr, " SIZE Flops\n");
|
||||
|
||||
for(m = from; m <= to; m += step)
|
||||
{
|
||||
|
||||
timeg=0;
|
||||
|
||||
if ( has_param_n == 0 ) n = m;
|
||||
|
||||
fprintf(stderr, " %6dx%d : ", (int)m,(int)n);
|
||||
|
||||
for(j = 0; j < m; j++){
|
||||
for(i = 0; i < n * COMPSIZE; i++){
|
||||
a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){
|
||||
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
|
||||
for(i = 0; i < n * COMPSIZE * abs(inc_y); i++){
|
||||
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
|
||||
for (l=0; l<loops; l++)
|
||||
{
|
||||
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
GER (&m, &n, alpha, x, &inc_x, y, &inc_y, a , &m);
|
||||
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
|
||||
timeg += time1;
|
||||
|
||||
}
|
||||
|
||||
timeg /= loops;
|
||||
|
||||
fprintf(stderr,
|
||||
" %10.2f MFlops\n",
|
||||
COMPSIZE * COMPSIZE * 2. * (double)m * (double)n / timeg * 1.e-6);
|
||||
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
|
||||
218
benchmark/gesv.c
Normal file
218
benchmark/gesv.c
Normal file
@@ -0,0 +1,218 @@
|
||||
/*********************************************************************/
|
||||
/* Copyright 2009, 2010 The University of Texas at Austin. */
|
||||
/* All rights reserved. */
|
||||
/* */
|
||||
/* Redistribution and use in source and binary forms, with or */
|
||||
/* without modification, are permitted provided that the following */
|
||||
/* conditions are met: */
|
||||
/* */
|
||||
/* 1. Redistributions of source code must retain the above */
|
||||
/* copyright notice, this list of conditions and the following */
|
||||
/* disclaimer. */
|
||||
/* */
|
||||
/* 2. Redistributions in binary form must reproduce the above */
|
||||
/* copyright notice, this list of conditions and the following */
|
||||
/* disclaimer in the documentation and/or other materials */
|
||||
/* provided with the distribution. */
|
||||
/* */
|
||||
/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
|
||||
/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
|
||||
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
|
||||
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
|
||||
/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
|
||||
/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
|
||||
/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
|
||||
/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
|
||||
/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
|
||||
/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
|
||||
/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
|
||||
/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
|
||||
/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
|
||||
/* POSSIBILITY OF SUCH DAMAGE. */
|
||||
/* */
|
||||
/* The views and conclusions contained in the software and */
|
||||
/* documentation are those of the authors and should not be */
|
||||
/* interpreted as representing official policies, either expressed */
|
||||
/* or implied, of The University of Texas at Austin. */
|
||||
/*********************************************************************/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#ifdef __CYGWIN32__
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
#include "common.h"
|
||||
|
||||
double fabs(double);
|
||||
|
||||
#undef GESV
|
||||
#undef GETRS
|
||||
|
||||
#ifndef COMPLEX
|
||||
#ifdef XDOUBLE
|
||||
#define GESV BLASFUNC(qgesv)
|
||||
#elif defined(DOUBLE)
|
||||
#define GESV BLASFUNC(dgesv)
|
||||
#else
|
||||
#define GESV BLASFUNC(sgesv)
|
||||
#endif
|
||||
#else
|
||||
#ifdef XDOUBLE
|
||||
#define GESV BLASFUNC(xgesv)
|
||||
#elif defined(DOUBLE)
|
||||
#define GESV BLASFUNC(zgesv)
|
||||
#else
|
||||
#define GESV BLASFUNC(cgesv)
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(__WIN32__) || defined(__WIN64__)
|
||||
|
||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
||||
#endif
|
||||
|
||||
int gettimeofday(struct timeval *tv, void *tz){
|
||||
|
||||
FILETIME ft;
|
||||
unsigned __int64 tmpres = 0;
|
||||
static int tzflag;
|
||||
|
||||
if (NULL != tv)
|
||||
{
|
||||
GetSystemTimeAsFileTime(&ft);
|
||||
|
||||
tmpres |= ft.dwHighDateTime;
|
||||
tmpres <<= 32;
|
||||
tmpres |= ft.dwLowDateTime;
|
||||
|
||||
/*converting file time to unix epoch*/
|
||||
tmpres /= 10; /*convert into microseconds*/
|
||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
||||
|
||||
static void *huge_malloc(BLASLONG size){
|
||||
int shmid;
|
||||
void *address;
|
||||
|
||||
#ifndef SHM_HUGETLB
|
||||
#define SHM_HUGETLB 04000
|
||||
#endif
|
||||
|
||||
if ((shmid =shmget(IPC_PRIVATE,
|
||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
||||
printf( "Memory allocation failed(shmget).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
address = shmat(shmid, NULL, SHM_RND);
|
||||
|
||||
if ((BLASLONG)address == -1){
|
||||
printf( "Memory allocation failed(shmat).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
shmctl(shmid, IPC_RMID, 0);
|
||||
|
||||
return address;
|
||||
}
|
||||
|
||||
#define malloc huge_malloc
|
||||
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *a, *b;
|
||||
blasint *ipiv;
|
||||
|
||||
blasint m, i, j, info;
|
||||
|
||||
int from = 1;
|
||||
int to = 200;
|
||||
int step = 1;
|
||||
|
||||
struct timeval start, stop;
|
||||
double time1;
|
||||
|
||||
argc--;argv++;
|
||||
|
||||
if (argc > 0) { from = atol(*argv); argc--; argv++;}
|
||||
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
|
||||
if (argc > 0) { step = atol(*argv); argc--; argv++;}
|
||||
|
||||
fprintf(stderr, "From : %3d To : %3d Step = %3d\n", from, to, step);
|
||||
|
||||
if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
if (( b = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
if (( ipiv = (blasint *)malloc(sizeof(blasint) * to * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
#ifdef linux
|
||||
srandom(getpid());
|
||||
#endif
|
||||
|
||||
fprintf(stderr, " SIZE Flops Time\n");
|
||||
|
||||
for(m = from; m <= to; m += step){
|
||||
|
||||
fprintf(stderr, " %dx%d : ", (int)m, (int)m);
|
||||
|
||||
for(j = 0; j < m; j++){
|
||||
for(i = 0; i < m * COMPSIZE; i++){
|
||||
a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
}
|
||||
|
||||
for(j = 0; j < m; j++){
|
||||
for(i = 0; i < m * COMPSIZE; i++){
|
||||
b[i + j * m * COMPSIZE] = 0.0;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
for (j = 0; j < m; ++j) {
|
||||
for (i = 0; i < m * COMPSIZE; ++i) {
|
||||
b[i] += a[i + j * m * COMPSIZE];
|
||||
}
|
||||
}
|
||||
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
GESV (&m, &m, a, &m, ipiv, b, &m, &info);
|
||||
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
|
||||
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
|
||||
|
||||
|
||||
fprintf(stderr,
|
||||
"%10.2f MFlops %10.6f s\n",
|
||||
COMPSIZE * COMPSIZE * (2. / 3. * (double)m * (double)m * (double)m + 2. * (double)m * (double)m * (double)m ) / (time1) * 1.e-6 , time1);
|
||||
|
||||
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
234
benchmark/getri.c
Normal file
234
benchmark/getri.c
Normal file
@@ -0,0 +1,234 @@
|
||||
/*********************************************************************/
|
||||
/* Copyright 2009, 2010 The University of Texas at Austin. */
|
||||
/* All rights reserved. */
|
||||
/* */
|
||||
/* Redistribution and use in source and binary forms, with or */
|
||||
/* without modification, are permitted provided that the following */
|
||||
/* conditions are met: */
|
||||
/* */
|
||||
/* 1. Redistributions of source code must retain the above */
|
||||
/* copyright notice, this list of conditions and the following */
|
||||
/* disclaimer. */
|
||||
/* */
|
||||
/* 2. Redistributions in binary form must reproduce the above */
|
||||
/* copyright notice, this list of conditions and the following */
|
||||
/* disclaimer in the documentation and/or other materials */
|
||||
/* provided with the distribution. */
|
||||
/* */
|
||||
/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
|
||||
/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
|
||||
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
|
||||
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
|
||||
/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
|
||||
/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
|
||||
/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
|
||||
/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
|
||||
/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
|
||||
/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
|
||||
/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
|
||||
/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
|
||||
/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
|
||||
/* POSSIBILITY OF SUCH DAMAGE. */
|
||||
/* */
|
||||
/* The views and conclusions contained in the software and */
|
||||
/* documentation are those of the authors and should not be */
|
||||
/* interpreted as representing official policies, either expressed */
|
||||
/* or implied, of The University of Texas at Austin. */
|
||||
/*********************************************************************/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#ifdef __CYGWIN32__
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
#include "common.h"
|
||||
|
||||
#undef GETRF
|
||||
#undef GETRI
|
||||
|
||||
#ifndef COMPLEX
|
||||
#ifdef XDOUBLE
|
||||
#define GETRF BLASFUNC(qgetrf)
|
||||
#define GETRI BLASFUNC(qgetri)
|
||||
#elif defined(DOUBLE)
|
||||
#define GETRF BLASFUNC(dgetrf)
|
||||
#define GETRI BLASFUNC(dgetri)
|
||||
#else
|
||||
#define GETRF BLASFUNC(sgetrf)
|
||||
#define GETRI BLASFUNC(sgetri)
|
||||
#endif
|
||||
#else
|
||||
#ifdef XDOUBLE
|
||||
#define GETRF BLASFUNC(xgetrf)
|
||||
#define GETRI BLASFUNC(xgetri)
|
||||
#elif defined(DOUBLE)
|
||||
#define GETRF BLASFUNC(zgetrf)
|
||||
#define GETRI BLASFUNC(zgetri)
|
||||
#else
|
||||
#define GETRF BLASFUNC(cgetrf)
|
||||
#define GETRI BLASFUNC(cgetri)
|
||||
#endif
|
||||
#endif
|
||||
|
||||
extern void GETRI(blasint *m, FLOAT *a, blasint *lda, blasint *ipiv, FLOAT *work, blasint *lwork, blasint *info);
|
||||
|
||||
#if defined(__WIN32__) || defined(__WIN64__)
|
||||
|
||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
||||
#endif
|
||||
|
||||
int gettimeofday(struct timeval *tv, void *tz){
|
||||
|
||||
FILETIME ft;
|
||||
unsigned __int64 tmpres = 0;
|
||||
static int tzflag;
|
||||
|
||||
if (NULL != tv)
|
||||
{
|
||||
GetSystemTimeAsFileTime(&ft);
|
||||
|
||||
tmpres |= ft.dwHighDateTime;
|
||||
tmpres <<= 32;
|
||||
tmpres |= ft.dwLowDateTime;
|
||||
|
||||
/*converting file time to unix epoch*/
|
||||
tmpres /= 10; /*convert into microseconds*/
|
||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
||||
|
||||
static void *huge_malloc(BLASLONG size){
|
||||
int shmid;
|
||||
void *address;
|
||||
|
||||
#ifndef SHM_HUGETLB
|
||||
#define SHM_HUGETLB 04000
|
||||
#endif
|
||||
|
||||
if ((shmid =shmget(IPC_PRIVATE,
|
||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
||||
printf( "Memory allocation failed(shmget).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
address = shmat(shmid, NULL, SHM_RND);
|
||||
|
||||
if ((BLASLONG)address == -1){
|
||||
printf( "Memory allocation failed(shmat).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
shmctl(shmid, IPC_RMID, 0);
|
||||
|
||||
return address;
|
||||
}
|
||||
|
||||
#define malloc huge_malloc
|
||||
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *a,*work;
|
||||
FLOAT wkopt[4];
|
||||
blasint *ipiv;
|
||||
blasint m, i, j, info,lwork;
|
||||
|
||||
int from = 1;
|
||||
int to = 200;
|
||||
int step = 1;
|
||||
|
||||
struct timeval start, stop;
|
||||
double time1;
|
||||
|
||||
argc--;argv++;
|
||||
|
||||
if (argc > 0) { from = atol(*argv); argc--; argv++;}
|
||||
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
|
||||
if (argc > 0) { step = atol(*argv); argc--; argv++;}
|
||||
|
||||
|
||||
fprintf(stderr, "From : %3d To : %3d Step = %3d\n", from, to, step);
|
||||
|
||||
if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
if (( ipiv = (blasint *)malloc(sizeof(blasint) * to * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
|
||||
|
||||
for(j = 0; j < to; j++){
|
||||
for(i = 0; i < to * COMPSIZE; i++){
|
||||
a[i + j * to * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
lwork = -1;
|
||||
m=to;
|
||||
|
||||
GETRI(&m, a, &m, ipiv, wkopt, &lwork, &info);
|
||||
|
||||
lwork = (blasint)wkopt[0];
|
||||
if (( work = (FLOAT *)malloc(sizeof(FLOAT) * lwork * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
|
||||
#ifdef linux
|
||||
srandom(getpid());
|
||||
#endif
|
||||
|
||||
fprintf(stderr, " SIZE FLops Time Lwork\n");
|
||||
|
||||
for(m = from; m <= to; m += step){
|
||||
|
||||
fprintf(stderr, " %6d : ", (int)m);
|
||||
|
||||
GETRF (&m, &m, a, &m, ipiv, &info);
|
||||
|
||||
if (info) {
|
||||
fprintf(stderr, "Matrix is not singular .. %d\n", info);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
lwork = -1;
|
||||
GETRI(&m, a, &m, ipiv, wkopt, &lwork, &info);
|
||||
|
||||
lwork = (blasint)wkopt[0];
|
||||
GETRI(&m, a, &m, ipiv, work, &lwork, &info);
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
|
||||
if (info) {
|
||||
fprintf(stderr, "failed compute inverse matrix .. %d\n", info);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
|
||||
fprintf(stderr,
|
||||
" %10.2f MFlops : %10.2f Sec : %d\n",
|
||||
COMPSIZE * COMPSIZE * (4.0/3.0 * (double)m * (double)m *(double)m - (double)m *(double)m + 5.0/3.0* (double)m) / time1 * 1.e-6,time1,lwork);
|
||||
|
||||
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
192
benchmark/hemm.c
Normal file
192
benchmark/hemm.c
Normal file
@@ -0,0 +1,192 @@
|
||||
/***************************************************************************
|
||||
Copyright (c) 2014, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#ifdef __CYGWIN32__
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
#include "common.h"
|
||||
|
||||
|
||||
#undef HEMM
|
||||
|
||||
#ifdef DOUBLE
|
||||
#define HEMM BLASFUNC(zhemm)
|
||||
#else
|
||||
#define HEMM BLASFUNC(chemm)
|
||||
#endif
|
||||
|
||||
|
||||
#if defined(__WIN32__) || defined(__WIN64__)
|
||||
|
||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
||||
#endif
|
||||
|
||||
int gettimeofday(struct timeval *tv, void *tz){
|
||||
|
||||
FILETIME ft;
|
||||
unsigned __int64 tmpres = 0;
|
||||
static int tzflag;
|
||||
|
||||
if (NULL != tv)
|
||||
{
|
||||
GetSystemTimeAsFileTime(&ft);
|
||||
|
||||
tmpres |= ft.dwHighDateTime;
|
||||
tmpres <<= 32;
|
||||
tmpres |= ft.dwLowDateTime;
|
||||
|
||||
/*converting file time to unix epoch*/
|
||||
tmpres /= 10; /*convert into microseconds*/
|
||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
||||
|
||||
static void *huge_malloc(BLASLONG size){
|
||||
int shmid;
|
||||
void *address;
|
||||
|
||||
#ifndef SHM_HUGETLB
|
||||
#define SHM_HUGETLB 04000
|
||||
#endif
|
||||
|
||||
if ((shmid =shmget(IPC_PRIVATE,
|
||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
||||
printf( "Memory allocation failed(shmget).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
address = shmat(shmid, NULL, SHM_RND);
|
||||
|
||||
if ((BLASLONG)address == -1){
|
||||
printf( "Memory allocation failed(shmat).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
shmctl(shmid, IPC_RMID, 0);
|
||||
|
||||
return address;
|
||||
}
|
||||
|
||||
#define malloc huge_malloc
|
||||
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *a, *b, *c;
|
||||
FLOAT alpha[] = {1.0, 1.0};
|
||||
FLOAT beta [] = {1.0, 1.0};
|
||||
char *p;
|
||||
|
||||
char side='L';
|
||||
char uplo='U';
|
||||
|
||||
if ((p = getenv("OPENBLAS_SIDE"))) side=*p;
|
||||
if ((p = getenv("OPENBLAS_UPLO"))) uplo=*p;
|
||||
|
||||
blasint m, i, j;
|
||||
|
||||
int from = 1;
|
||||
int to = 200;
|
||||
int step = 1;
|
||||
|
||||
struct timeval start, stop;
|
||||
double time1;
|
||||
|
||||
argc--;argv++;
|
||||
|
||||
if (argc > 0) { from = atol(*argv); argc--; argv++;}
|
||||
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
|
||||
if (argc > 0) { step = atol(*argv); argc--; argv++;}
|
||||
|
||||
fprintf(stderr, "From : %3d To : %3d Step = %3d Side = %c Uplo = %c\n", from, to, step,side,uplo);
|
||||
|
||||
if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
if (( b = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
if (( c = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
|
||||
|
||||
#ifdef linux
|
||||
srandom(getpid());
|
||||
#endif
|
||||
|
||||
fprintf(stderr, " SIZE Flops\n");
|
||||
|
||||
for(m = from; m <= to; m += step)
|
||||
{
|
||||
|
||||
fprintf(stderr, " %6d : ", (int)m);
|
||||
|
||||
for(j = 0; j < m; j++){
|
||||
for(i = 0; i < m * COMPSIZE; i++){
|
||||
a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
b[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
c[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
}
|
||||
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
HEMM (&side, &uplo, &m, &m, alpha, a, &m, b, &m, beta, c, &m );
|
||||
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
fprintf(stderr,
|
||||
" %10.2f MFlops\n",
|
||||
COMPSIZE * COMPSIZE * 2. * (double)m * (double)m * (double)m / time1 * 1.e-6);
|
||||
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
208
benchmark/hemv.c
Normal file
208
benchmark/hemv.c
Normal file
@@ -0,0 +1,208 @@
|
||||
/***************************************************************************
|
||||
Copyright (c) 2014, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#ifdef __CYGWIN32__
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
#include "common.h"
|
||||
|
||||
|
||||
#undef HEMV
|
||||
|
||||
|
||||
#ifdef DOUBLE
|
||||
#define HEMV BLASFUNC(zhemv)
|
||||
#else
|
||||
#define HEMV BLASFUNC(chemv)
|
||||
#endif
|
||||
|
||||
|
||||
#if defined(__WIN32__) || defined(__WIN64__)
|
||||
|
||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
||||
#endif
|
||||
|
||||
int gettimeofday(struct timeval *tv, void *tz){
|
||||
|
||||
FILETIME ft;
|
||||
unsigned __int64 tmpres = 0;
|
||||
static int tzflag;
|
||||
|
||||
if (NULL != tv)
|
||||
{
|
||||
GetSystemTimeAsFileTime(&ft);
|
||||
|
||||
tmpres |= ft.dwHighDateTime;
|
||||
tmpres <<= 32;
|
||||
tmpres |= ft.dwLowDateTime;
|
||||
|
||||
/*converting file time to unix epoch*/
|
||||
tmpres /= 10; /*convert into microseconds*/
|
||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
||||
|
||||
static void *huge_malloc(BLASLONG size){
|
||||
int shmid;
|
||||
void *address;
|
||||
|
||||
#ifndef SHM_HUGETLB
|
||||
#define SHM_HUGETLB 04000
|
||||
#endif
|
||||
|
||||
if ((shmid =shmget(IPC_PRIVATE,
|
||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
||||
printf( "Memory allocation failed(shmget).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
address = shmat(shmid, NULL, SHM_RND);
|
||||
|
||||
if ((BLASLONG)address == -1){
|
||||
printf( "Memory allocation failed(shmat).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
shmctl(shmid, IPC_RMID, 0);
|
||||
|
||||
return address;
|
||||
}
|
||||
|
||||
#define malloc huge_malloc
|
||||
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *a, *x, *y;
|
||||
FLOAT alpha[] = {1.0, 1.0};
|
||||
FLOAT beta [] = {1.0, 1.0};
|
||||
char uplo='L';
|
||||
blasint m, i, j;
|
||||
blasint inc_x=1,inc_y=1;
|
||||
int loops = 1;
|
||||
int l;
|
||||
char *p;
|
||||
|
||||
int from = 1;
|
||||
int to = 200;
|
||||
int step = 1;
|
||||
|
||||
struct timeval start, stop;
|
||||
double time1,timeg;
|
||||
|
||||
argc--;argv++;
|
||||
|
||||
if (argc > 0) { from = atol(*argv); argc--; argv++;}
|
||||
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
|
||||
if (argc > 0) { step = atol(*argv); argc--; argv++;}
|
||||
|
||||
if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p);
|
||||
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p);
|
||||
if ((p = getenv("OPENBLAS_INCY"))) inc_y = atoi(p);
|
||||
if ((p = getenv("OPENBLAS_UPLO"))) uplo=*p;
|
||||
|
||||
fprintf(stderr, "From : %3d To : %3d Step = %3d Uplo = '%c' Inc_x = %d Inc_y = %d Loops = %d\n", from, to, step,uplo,inc_x,inc_y,loops);
|
||||
|
||||
if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
if (( y = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_y) * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
#ifdef linux
|
||||
srandom(getpid());
|
||||
#endif
|
||||
|
||||
fprintf(stderr, " SIZE Flops\n");
|
||||
|
||||
for(m = from; m <= to; m += step)
|
||||
{
|
||||
|
||||
timeg=0;
|
||||
|
||||
fprintf(stderr, " %6dx%d : ", (int)m,(int)m);
|
||||
|
||||
for(j = 0; j < m; j++){
|
||||
for(i = 0; i < m * COMPSIZE; i++){
|
||||
a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
for (l=0; l<loops; l++)
|
||||
{
|
||||
|
||||
for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){
|
||||
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
|
||||
for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){
|
||||
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
HEMV (&uplo, &m, alpha, a, &m, x, &inc_x, beta, y, &inc_y );
|
||||
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
|
||||
timeg += time1;
|
||||
|
||||
}
|
||||
|
||||
timeg /= loops;
|
||||
|
||||
fprintf(stderr,
|
||||
" %10.2f MFlops\n",
|
||||
COMPSIZE * COMPSIZE * 2. * (double)m * (double)m / timeg * 1.e-6);
|
||||
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
191
benchmark/her2k.c
Normal file
191
benchmark/her2k.c
Normal file
@@ -0,0 +1,191 @@
|
||||
/***************************************************************************
|
||||
Copyright (c) 2014, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#ifdef __CYGWIN32__
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
#include "common.h"
|
||||
|
||||
|
||||
#undef HER2K
|
||||
#ifdef DOUBLE
|
||||
#define HER2K BLASFUNC(zher2k)
|
||||
#else
|
||||
#define HER2K BLASFUNC(cher2k)
|
||||
#endif
|
||||
|
||||
|
||||
#if defined(__WIN32__) || defined(__WIN64__)
|
||||
|
||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
||||
#endif
|
||||
|
||||
int gettimeofday(struct timeval *tv, void *tz){
|
||||
|
||||
FILETIME ft;
|
||||
unsigned __int64 tmpres = 0;
|
||||
static int tzflag;
|
||||
|
||||
if (NULL != tv)
|
||||
{
|
||||
GetSystemTimeAsFileTime(&ft);
|
||||
|
||||
tmpres |= ft.dwHighDateTime;
|
||||
tmpres <<= 32;
|
||||
tmpres |= ft.dwLowDateTime;
|
||||
|
||||
/*converting file time to unix epoch*/
|
||||
tmpres /= 10; /*convert into microseconds*/
|
||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
||||
|
||||
static void *huge_malloc(BLASLONG size){
|
||||
int shmid;
|
||||
void *address;
|
||||
|
||||
#ifndef SHM_HUGETLB
|
||||
#define SHM_HUGETLB 04000
|
||||
#endif
|
||||
|
||||
if ((shmid =shmget(IPC_PRIVATE,
|
||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
||||
printf( "Memory allocation failed(shmget).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
address = shmat(shmid, NULL, SHM_RND);
|
||||
|
||||
if ((BLASLONG)address == -1){
|
||||
printf( "Memory allocation failed(shmat).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
shmctl(shmid, IPC_RMID, 0);
|
||||
|
||||
return address;
|
||||
}
|
||||
|
||||
#define malloc huge_malloc
|
||||
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *a, *b, *c;
|
||||
FLOAT alpha[] = {1.0, 1.0};
|
||||
FLOAT beta [] = {1.0, 1.0};
|
||||
char *p;
|
||||
|
||||
char uplo='U';
|
||||
char trans='N';
|
||||
|
||||
if ((p = getenv("OPENBLAS_UPLO"))) uplo=*p;
|
||||
if ((p = getenv("OPENBLAS_TRANS"))) trans=*p;
|
||||
|
||||
blasint m, i, j;
|
||||
|
||||
int from = 1;
|
||||
int to = 200;
|
||||
int step = 1;
|
||||
|
||||
struct timeval start, stop;
|
||||
double time1;
|
||||
|
||||
argc--;argv++;
|
||||
|
||||
if (argc > 0) { from = atol(*argv); argc--; argv++;}
|
||||
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
|
||||
if (argc > 0) { step = atol(*argv); argc--; argv++;}
|
||||
|
||||
fprintf(stderr, "From : %3d To : %3d Step = %3d Uplo = %c Trans = %c\n", from, to, step,uplo,trans);
|
||||
|
||||
if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
if (( b = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
if (( c = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
|
||||
|
||||
#ifdef linux
|
||||
srandom(getpid());
|
||||
#endif
|
||||
|
||||
fprintf(stderr, " SIZE Flops\n");
|
||||
|
||||
for(m = from; m <= to; m += step)
|
||||
{
|
||||
|
||||
fprintf(stderr, " %6d : ", (int)m);
|
||||
|
||||
for(j = 0; j < m; j++){
|
||||
for(i = 0; i < m * COMPSIZE; i++){
|
||||
a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
b[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
c[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
}
|
||||
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
HER2K (&uplo, &trans, &m, &m, alpha, a, &m, b, &m, beta, c, &m );
|
||||
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
fprintf(stderr,
|
||||
" %10.2f MFlops\n",
|
||||
COMPSIZE * COMPSIZE * 2. * (double)m * (double)m * (double)m / time1 * 1.e-6);
|
||||
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
189
benchmark/herk.c
Normal file
189
benchmark/herk.c
Normal file
@@ -0,0 +1,189 @@
|
||||
/***************************************************************************
|
||||
Copyright (c) 2014, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#ifdef __CYGWIN32__
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
#include "common.h"
|
||||
|
||||
|
||||
#undef HERK
|
||||
|
||||
|
||||
#ifdef DOUBLE
|
||||
#define HERK BLASFUNC(zherk)
|
||||
#else
|
||||
#define HERK BLASFUNC(cherk)
|
||||
#endif
|
||||
|
||||
|
||||
#if defined(__WIN32__) || defined(__WIN64__)
|
||||
|
||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
||||
#endif
|
||||
|
||||
int gettimeofday(struct timeval *tv, void *tz){
|
||||
|
||||
FILETIME ft;
|
||||
unsigned __int64 tmpres = 0;
|
||||
static int tzflag;
|
||||
|
||||
if (NULL != tv)
|
||||
{
|
||||
GetSystemTimeAsFileTime(&ft);
|
||||
|
||||
tmpres |= ft.dwHighDateTime;
|
||||
tmpres <<= 32;
|
||||
tmpres |= ft.dwLowDateTime;
|
||||
|
||||
/*converting file time to unix epoch*/
|
||||
tmpres /= 10; /*convert into microseconds*/
|
||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
||||
|
||||
static void *huge_malloc(BLASLONG size){
|
||||
int shmid;
|
||||
void *address;
|
||||
|
||||
#ifndef SHM_HUGETLB
|
||||
#define SHM_HUGETLB 04000
|
||||
#endif
|
||||
|
||||
if ((shmid =shmget(IPC_PRIVATE,
|
||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
||||
printf( "Memory allocation failed(shmget).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
address = shmat(shmid, NULL, SHM_RND);
|
||||
|
||||
if ((BLASLONG)address == -1){
|
||||
printf( "Memory allocation failed(shmat).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
shmctl(shmid, IPC_RMID, 0);
|
||||
|
||||
return address;
|
||||
}
|
||||
|
||||
#define malloc huge_malloc
|
||||
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *a, *c;
|
||||
FLOAT alpha[] = {1.0, 1.0};
|
||||
FLOAT beta [] = {1.0, 1.0};
|
||||
char *p;
|
||||
|
||||
char uplo='U';
|
||||
char trans='N';
|
||||
|
||||
if ((p = getenv("OPENBLAS_UPLO"))) uplo=*p;
|
||||
if ((p = getenv("OPENBLAS_TRANS"))) trans=*p;
|
||||
|
||||
blasint m, i, j;
|
||||
|
||||
int from = 1;
|
||||
int to = 200;
|
||||
int step = 1;
|
||||
|
||||
struct timeval start, stop;
|
||||
double time1;
|
||||
|
||||
argc--;argv++;
|
||||
|
||||
if (argc > 0) { from = atol(*argv); argc--; argv++;}
|
||||
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
|
||||
if (argc > 0) { step = atol(*argv); argc--; argv++;}
|
||||
|
||||
fprintf(stderr, "From : %3d To : %3d Step = %3d Uplo = %c Trans = %c\n", from, to, step,uplo,trans);
|
||||
|
||||
|
||||
if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
if (( c = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
|
||||
|
||||
#ifdef linux
|
||||
srandom(getpid());
|
||||
#endif
|
||||
|
||||
fprintf(stderr, " SIZE Flops\n");
|
||||
|
||||
for(m = from; m <= to; m += step)
|
||||
{
|
||||
|
||||
fprintf(stderr, " %6d : ", (int)m);
|
||||
|
||||
for(j = 0; j < m; j++){
|
||||
for(i = 0; i < m * COMPSIZE; i++){
|
||||
a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
c[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
}
|
||||
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
HERK (&uplo, &trans, &m, &m, alpha, a, &m, beta, c, &m );
|
||||
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
fprintf(stderr,
|
||||
" %10.2f MFlops\n",
|
||||
COMPSIZE * COMPSIZE * 1. * (double)m * (double)m * (double)m / time1 * 1.e-6);
|
||||
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
@@ -83,22 +83,22 @@ int gettimeofday(struct timeval *tv, void *tz){
|
||||
FILETIME ft;
|
||||
unsigned __int64 tmpres = 0;
|
||||
static int tzflag;
|
||||
|
||||
|
||||
if (NULL != tv)
|
||||
{
|
||||
GetSystemTimeAsFileTime(&ft);
|
||||
|
||||
|
||||
tmpres |= ft.dwHighDateTime;
|
||||
tmpres <<= 32;
|
||||
tmpres |= ft.dwLowDateTime;
|
||||
|
||||
|
||||
/*converting file time to unix epoch*/
|
||||
tmpres /= 10; /*convert into microseconds*/
|
||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
||||
}
|
||||
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -137,7 +137,7 @@ static void *huge_malloc(BLASLONG size){
|
||||
|
||||
#endif
|
||||
|
||||
int MAIN__(int argc, char *argv[]){
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *a, *b;
|
||||
blasint *ipiv;
|
||||
@@ -154,7 +154,7 @@ int MAIN__(int argc, char *argv[]){
|
||||
struct timeval start, stop;
|
||||
double time1, time2;
|
||||
|
||||
argc--;argv++;
|
||||
argc--;argv++;
|
||||
|
||||
if (argc > 0) { from = atol(*argv); argc--; argv++;}
|
||||
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
|
||||
@@ -165,15 +165,15 @@ int MAIN__(int argc, char *argv[]){
|
||||
if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
|
||||
if (( b = (FLOAT *)malloc(sizeof(FLOAT) * to * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
|
||||
if (( ipiv = (blasint *)malloc(sizeof(blasint) * to * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
|
||||
#ifdef linux
|
||||
srandom(getpid());
|
||||
#endif
|
||||
@@ -181,7 +181,7 @@ int MAIN__(int argc, char *argv[]){
|
||||
fprintf(stderr, " SIZE Residual Decompose Solve Total\n");
|
||||
|
||||
for(m = from; m <= to; m += step){
|
||||
|
||||
|
||||
fprintf(stderr, " %6d : ", (int)m);
|
||||
|
||||
for(j = 0; j < m; j++){
|
||||
@@ -189,9 +189,9 @@ int MAIN__(int argc, char *argv[]){
|
||||
a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
for (i = 0; i < m * COMPSIZE; ++i) b[i] = 0.;
|
||||
|
||||
|
||||
for (j = 0; j < m; ++j) {
|
||||
for (i = 0; i < m * COMPSIZE; ++i) {
|
||||
b[i] += a[i + j * m * COMPSIZE];
|
||||
@@ -208,7 +208,7 @@ int MAIN__(int argc, char *argv[]){
|
||||
fprintf(stderr, "Matrix is not singular .. %d\n", info);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
@@ -221,7 +221,7 @@ int MAIN__(int argc, char *argv[]){
|
||||
fprintf(stderr, "Matrix is not singular .. %d\n", info);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
|
||||
time2 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
|
||||
maxerr = 0.;
|
||||
@@ -239,7 +239,7 @@ int MAIN__(int argc, char *argv[]){
|
||||
#endif
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
#ifdef XDOUBLE
|
||||
fprintf(stderr," %Le ", maxerr);
|
||||
#else
|
||||
@@ -247,7 +247,7 @@ int MAIN__(int argc, char *argv[]){
|
||||
#endif
|
||||
|
||||
fprintf(stderr,
|
||||
" %10.2f MFlops %10.2f MFlops %10.2f MFlops\n",
|
||||
" %10.2f MFlops %10.2f MFlops %10.2f MFlops\n",
|
||||
COMPSIZE * COMPSIZE * 2. / 3. * (double)m * (double)m * (double)m / time1 * 1.e-6,
|
||||
COMPSIZE * COMPSIZE * 2. * (double)m * (double)m / time2 * 1.e-6,
|
||||
COMPSIZE * COMPSIZE * (2. / 3. * (double)m * (double)m * (double)m + 2. * (double)m * (double)m) / (time1 + time2) * 1.e-6);
|
||||
@@ -270,4 +270,4 @@ int MAIN__(int argc, char *argv[]){
|
||||
return 0;
|
||||
}
|
||||
|
||||
void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
|
||||
65
benchmark/plot-filter.sh
Executable file
65
benchmark/plot-filter.sh
Executable file
@@ -0,0 +1,65 @@
|
||||
#!/bin/sh
|
||||
# **********************************************************************************
|
||||
# Copyright (c) 2014, The OpenBLAS Project
|
||||
# All rights reserved.
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are
|
||||
# met:
|
||||
# 1. Redistributions of source code must retain the above copyright
|
||||
# notice, this list of conditions and the following disclaimer.
|
||||
# 2. Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in
|
||||
# the documentation and/or other materials provided with the
|
||||
# distribution.
|
||||
# 3. Neither the name of the OpenBLAS project nor the names of
|
||||
# its contributors may be used to endorse or promote products
|
||||
# derived from this software without specific prior written permission.
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
# ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
# USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
# **********************************************************************************
|
||||
|
||||
# ************************************************************************
|
||||
# sample filter for data output from benchmark programs
|
||||
#
|
||||
# usage example:
|
||||
# ./dgemm.goto 2>&1|./plotfilter.sh >OpenBLAS
|
||||
# ************************************************************************
|
||||
|
||||
if [ $# -eq 1 ]
|
||||
then
|
||||
arg1=$1
|
||||
else
|
||||
arg1=0
|
||||
fi
|
||||
|
||||
case $arg1 in
|
||||
|
||||
L)
|
||||
# Linpack Benchmark
|
||||
awk '/MFlops/ { print $1,int($8) }'|tail --lines=+2
|
||||
;;
|
||||
|
||||
C)
|
||||
# Cholesky Benchmark
|
||||
awk '/MFlops/ { print $3,int($9) }'|tail --lines=+2
|
||||
;;
|
||||
|
||||
B)
|
||||
# Copy Benchmark
|
||||
awk '/MBytes/ { print $1,int($3) }'|tail --lines=+2
|
||||
;;
|
||||
|
||||
|
||||
*)
|
||||
awk '/MFlops/ { print $1,int($3) }'|tail --lines=+2
|
||||
;;
|
||||
esac
|
||||
|
||||
42
benchmark/plot-header
Normal file
42
benchmark/plot-header
Normal file
@@ -0,0 +1,42 @@
|
||||
# **********************************************************************************
|
||||
# Copyright (c) 2014, The OpenBLAS Project
|
||||
# All rights reserved.
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are
|
||||
# met:
|
||||
# 1. Redistributions of source code must retain the above copyright
|
||||
# notice, this list of conditions and the following disclaimer.
|
||||
# 2. Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in
|
||||
# the documentation and/or other materials provided with the
|
||||
# distribution.
|
||||
# 3. Neither the name of the OpenBLAS project nor the names of
|
||||
# its contributors may be used to endorse or promote products
|
||||
# derived from this software without specific prior written permission.
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
# ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
# USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
# **********************************************************************************
|
||||
|
||||
set term x11 font sans;
|
||||
set ylabel "MFlops";
|
||||
set xlabel "Size";
|
||||
set grid xtics;
|
||||
set grid ytics;
|
||||
set key left;
|
||||
set timestamp "generated on %Y-%m-%d by `whoami`"
|
||||
set title "Dtrsm\nUPLO=U TRANS=N SIDE=L\nBulldozer 1 Thread"
|
||||
plot 'OpenBLAS' smooth bezier, 'ACML' smooth bezier, 'MKL' smooth bezier;
|
||||
set output "print.png";
|
||||
show title;
|
||||
show plot;
|
||||
show output;
|
||||
|
||||
|
||||
286
benchmark/potrf.c
Normal file
286
benchmark/potrf.c
Normal file
@@ -0,0 +1,286 @@
|
||||
/*********************************************************************/
|
||||
/* Copyright 2009, 2010 The University of Texas at Austin. */
|
||||
/* All rights reserved. */
|
||||
/* */
|
||||
/* Redistribution and use in source and binary forms, with or */
|
||||
/* without modification, are permitted provided that the following */
|
||||
/* conditions are met: */
|
||||
/* */
|
||||
/* 1. Redistributions of source code must retain the above */
|
||||
/* copyright notice, this list of conditions and the following */
|
||||
/* disclaimer. */
|
||||
/* */
|
||||
/* 2. Redistributions in binary form must reproduce the above */
|
||||
/* copyright notice, this list of conditions and the following */
|
||||
/* disclaimer in the documentation and/or other materials */
|
||||
/* provided with the distribution. */
|
||||
/* */
|
||||
/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
|
||||
/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
|
||||
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
|
||||
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
|
||||
/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
|
||||
/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
|
||||
/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
|
||||
/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
|
||||
/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
|
||||
/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
|
||||
/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
|
||||
/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
|
||||
/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
|
||||
/* POSSIBILITY OF SUCH DAMAGE. */
|
||||
/* */
|
||||
/* The views and conclusions contained in the software and */
|
||||
/* documentation are those of the authors and should not be */
|
||||
/* interpreted as representing official policies, either expressed */
|
||||
/* or implied, of The University of Texas at Austin. */
|
||||
/*********************************************************************/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#ifdef __CYGWIN32__
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
#include "common.h"
|
||||
|
||||
double fabs(double);
|
||||
|
||||
#undef POTRF
|
||||
|
||||
#ifndef COMPLEX
|
||||
#ifdef XDOUBLE
|
||||
#define POTRF BLASFUNC(qpotrf)
|
||||
#define POTRS BLASFUNC(qpotrs)
|
||||
#define POTRI BLASFUNC(qpotri)
|
||||
#define SYRK BLASFUNC(qsyrk)
|
||||
#elif defined(DOUBLE)
|
||||
#define POTRF BLASFUNC(dpotrf)
|
||||
#define POTRS BLASFUNC(dpotrs)
|
||||
#define POTRI BLASFUNC(dpotri)
|
||||
#define SYRK BLASFUNC(dsyrk)
|
||||
#else
|
||||
#define POTRF BLASFUNC(spotrf)
|
||||
#define POTRS BLASFUNC(spotrs)
|
||||
#define POTRI BLASFUNC(spotri)
|
||||
#define SYRK BLASFUNC(ssyrk)
|
||||
#endif
|
||||
#else
|
||||
#ifdef XDOUBLE
|
||||
#define POTRF BLASFUNC(xpotrf)
|
||||
#define POTRS BLASFUNC(xpotrs)
|
||||
#define POTRI BLASFUNC(xpotri)
|
||||
#define SYRK BLASFUNC(xherk)
|
||||
#elif defined(DOUBLE)
|
||||
#define POTRF BLASFUNC(zpotrf)
|
||||
#define POTRS BLASFUNC(zpotrs)
|
||||
#define POTRI BLASFUNC(zpotri)
|
||||
#define SYRK BLASFUNC(zherk)
|
||||
#else
|
||||
#define POTRF BLASFUNC(cpotrf)
|
||||
#define POTRS BLASFUNC(cpotrs)
|
||||
#define POTRI BLASFUNC(cpotri)
|
||||
#define SYRK BLASFUNC(cherk)
|
||||
#endif
|
||||
#endif
|
||||
|
||||
// extern void POTRI(char *uplo, blasint *m, FLOAT *a, blasint *lda, blasint *info);
|
||||
// extern void POTRS(char *uplo, blasint *m, blasint *n, FLOAT *a, blasint *lda, FLOAT *b, blasint *ldb, blasint *info);
|
||||
|
||||
#if defined(__WIN32__) || defined(__WIN64__)
|
||||
|
||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
||||
#endif
|
||||
|
||||
int gettimeofday(struct timeval *tv, void *tz){
|
||||
|
||||
FILETIME ft;
|
||||
unsigned __int64 tmpres = 0;
|
||||
static int tzflag;
|
||||
|
||||
if (NULL != tv)
|
||||
{
|
||||
GetSystemTimeAsFileTime(&ft);
|
||||
|
||||
tmpres |= ft.dwHighDateTime;
|
||||
tmpres <<= 32;
|
||||
tmpres |= ft.dwLowDateTime;
|
||||
|
||||
/*converting file time to unix epoch*/
|
||||
tmpres /= 10; /*convert into microseconds*/
|
||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
#ifndef COMPLEX
|
||||
char *trans[] = {"T", "N"};
|
||||
#else
|
||||
char *trans[] = {"C", "N"};
|
||||
#endif
|
||||
char *uplo[] = {"U", "L"};
|
||||
FLOAT alpha[] = {1.0, 0.0};
|
||||
FLOAT beta [] = {0.0, 0.0};
|
||||
|
||||
FLOAT *a, *b;
|
||||
|
||||
char *p;
|
||||
char btest = 'F';
|
||||
|
||||
blasint m, i, j, info, uplos=0;
|
||||
double flops;
|
||||
|
||||
int from = 1;
|
||||
int to = 200;
|
||||
int step = 1;
|
||||
|
||||
struct timeval start, stop;
|
||||
double time1;
|
||||
|
||||
argc--;argv++;
|
||||
|
||||
if (argc > 0) { from = atol(*argv); argc--; argv++;}
|
||||
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
|
||||
if (argc > 0) { step = atol(*argv); argc--; argv++;}
|
||||
|
||||
if ((p = getenv("OPENBLAS_UPLO")))
|
||||
if (*p == 'L') uplos=1;
|
||||
|
||||
if ((p = getenv("OPENBLAS_TEST"))) btest=*p;
|
||||
|
||||
fprintf(stderr, "From : %3d To : %3d Step = %3d Uplo = %c\n", from, to, step,*uplo[uplos]);
|
||||
|
||||
if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
if (( b = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
for(m = from; m <= to; m += step){
|
||||
|
||||
#ifndef COMPLEX
|
||||
if (uplos & 1) {
|
||||
for (j = 0; j < m; j++) {
|
||||
for(i = 0; i < j; i++) a[i + j * m] = 0.;
|
||||
a[j + j * m] = ((double) rand() / (double) RAND_MAX) + 8.;
|
||||
for(i = j + 1; i < m; i++) a[i + j * m] = ((double) rand() / (double) RAND_MAX) - 0.5;
|
||||
}
|
||||
} else {
|
||||
for (j = 0; j < m; j++) {
|
||||
for(i = 0; i < j; i++) a[i + j * m] = ((double) rand() / (double) RAND_MAX) - 0.5;
|
||||
a[j + j * m] = ((double) rand() / (double) RAND_MAX) + 8.;
|
||||
for(i = j + 1; i < m; i++) a[i + j * m] = 0.;
|
||||
}
|
||||
}
|
||||
#else
|
||||
if (uplos & 1) {
|
||||
for (j = 0; j < m; j++) {
|
||||
for(i = 0; i < j; i++) {
|
||||
a[(i + j * m) * 2 + 0] = 0.;
|
||||
a[(i + j * m) * 2 + 1] = 0.;
|
||||
}
|
||||
|
||||
a[(j + j * m) * 2 + 0] = ((double) rand() / (double) RAND_MAX) + 8.;
|
||||
a[(j + j * m) * 2 + 1] = 0.;
|
||||
|
||||
for(i = j + 1; i < m; i++) {
|
||||
a[(i + j * m) * 2 + 0] = ((double) rand() / (double) RAND_MAX) - 0.5;
|
||||
a[(i + j * m) * 2 + 1] = ((double) rand() / (double) RAND_MAX) - 0.5;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
for (j = 0; j < m; j++) {
|
||||
for(i = 0; i < j; i++) {
|
||||
a[(i + j * m) * 2 + 0] = ((double) rand() / (double) RAND_MAX) - 0.5;
|
||||
a[(i + j * m) * 2 + 1] = ((double) rand() / (double) RAND_MAX) - 0.5;
|
||||
}
|
||||
|
||||
a[(j + j * m) * 2 + 0] = ((double) rand() / (double) RAND_MAX) + 8.;
|
||||
a[(j + j * m) * 2 + 1] = 0.;
|
||||
|
||||
for(i = j + 1; i < m; i++) {
|
||||
a[(i + j * m) * 2 + 0] = 0.;
|
||||
a[(i + j * m) * 2 + 1] = 0.;
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
SYRK(uplo[uplos], trans[uplos], &m, &m, alpha, a, &m, beta, b, &m);
|
||||
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
POTRF(uplo[uplos], &m, b, &m, &info);
|
||||
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
|
||||
if (info != 0) {
|
||||
fprintf(stderr, "Potrf info = %d\n", info);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
flops = COMPSIZE * COMPSIZE * (1.0/3.0 * (double)m * (double)m *(double)m +1.0/2.0* (double)m *(double)m + 1.0/6.0* (double)m) / time1 * 1.e-6;
|
||||
|
||||
if ( btest == 'S' )
|
||||
{
|
||||
|
||||
for(j = 0; j < to; j++){
|
||||
for(i = 0; i < to * COMPSIZE; i++){
|
||||
a[i + j * to * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
}
|
||||
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
POTRS(uplo[uplos], &m, &m, b, &m, a, &m, &info);
|
||||
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
|
||||
if (info != 0) {
|
||||
fprintf(stderr, "Potrs info = %d\n", info);
|
||||
exit(1);
|
||||
}
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
flops = COMPSIZE * COMPSIZE * (2.0 * (double)m * (double)m *(double)m ) / time1 * 1.e-6;
|
||||
|
||||
}
|
||||
|
||||
if ( btest == 'I' )
|
||||
{
|
||||
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
POTRI(uplo[uplos], &m, b, &m, &info);
|
||||
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
|
||||
if (info != 0) {
|
||||
fprintf(stderr, "Potri info = %d\n", info);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
flops = COMPSIZE * COMPSIZE * (2.0/3.0 * (double)m * (double)m *(double)m +1.0/2.0* (double)m *(double)m + 5.0/6.0* (double)m) / time1 * 1.e-6;
|
||||
}
|
||||
|
||||
fprintf(stderr, "%8d : %10.2f MFlops : %10.3f Sec : Test=%c\n",m,flops ,time1,btest);
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
|
||||
202
benchmark/scal.c
Normal file
202
benchmark/scal.c
Normal file
@@ -0,0 +1,202 @@
|
||||
/***************************************************************************
|
||||
Copyright (c) 2014, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#ifdef __CYGWIN32__
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
#include "common.h"
|
||||
|
||||
|
||||
#undef SCAL
|
||||
|
||||
#ifdef COMPLEX
|
||||
#ifdef DOUBLE
|
||||
#define SCAL BLASFUNC(zscal)
|
||||
#else
|
||||
#define SCAL BLASFUNC(cscal)
|
||||
#endif
|
||||
#else
|
||||
#ifdef DOUBLE
|
||||
#define SCAL BLASFUNC(dscal)
|
||||
#else
|
||||
#define SCAL BLASFUNC(sscal)
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(__WIN32__) || defined(__WIN64__)
|
||||
|
||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
||||
#endif
|
||||
|
||||
int gettimeofday(struct timeval *tv, void *tz){
|
||||
|
||||
FILETIME ft;
|
||||
unsigned __int64 tmpres = 0;
|
||||
static int tzflag;
|
||||
|
||||
if (NULL != tv)
|
||||
{
|
||||
GetSystemTimeAsFileTime(&ft);
|
||||
|
||||
tmpres |= ft.dwHighDateTime;
|
||||
tmpres <<= 32;
|
||||
tmpres |= ft.dwLowDateTime;
|
||||
|
||||
/*converting file time to unix epoch*/
|
||||
tmpres /= 10; /*convert into microseconds*/
|
||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
||||
|
||||
static void *huge_malloc(BLASLONG size){
|
||||
int shmid;
|
||||
void *address;
|
||||
|
||||
#ifndef SHM_HUGETLB
|
||||
#define SHM_HUGETLB 04000
|
||||
#endif
|
||||
|
||||
if ((shmid =shmget(IPC_PRIVATE,
|
||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
||||
printf( "Memory allocation failed(shmget).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
address = shmat(shmid, NULL, SHM_RND);
|
||||
|
||||
if ((BLASLONG)address == -1){
|
||||
printf( "Memory allocation failed(shmat).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
shmctl(shmid, IPC_RMID, 0);
|
||||
|
||||
return address;
|
||||
}
|
||||
|
||||
#define malloc huge_malloc
|
||||
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *x, *y;
|
||||
FLOAT alpha[2] = { 2.0, 2.0 };
|
||||
blasint m, i;
|
||||
blasint inc_x=1,inc_y=1;
|
||||
int loops = 1;
|
||||
int l;
|
||||
char *p;
|
||||
|
||||
int from = 1;
|
||||
int to = 200;
|
||||
int step = 1;
|
||||
|
||||
struct timeval start, stop;
|
||||
double time1,timeg;
|
||||
|
||||
argc--;argv++;
|
||||
|
||||
if (argc > 0) { from = atol(*argv); argc--; argv++;}
|
||||
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
|
||||
if (argc > 0) { step = atol(*argv); argc--; argv++;}
|
||||
|
||||
if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p);
|
||||
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p);
|
||||
|
||||
fprintf(stderr, "From : %3d To : %3d Step = %3d Inc_x = %d Inc_y = %d Loops = %d\n", from, to, step,inc_x,inc_y,loops);
|
||||
|
||||
if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
if (( y = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_y) * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
#ifdef linux
|
||||
srandom(getpid());
|
||||
#endif
|
||||
|
||||
fprintf(stderr, " SIZE Flops\n");
|
||||
|
||||
for(m = from; m <= to; m += step)
|
||||
{
|
||||
|
||||
timeg=0;
|
||||
|
||||
fprintf(stderr, " %6d : ", (int)m);
|
||||
|
||||
|
||||
for (l=0; l<loops; l++)
|
||||
{
|
||||
|
||||
for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){
|
||||
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
|
||||
for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){
|
||||
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
SCAL (&m, alpha, x, &inc_x);
|
||||
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
|
||||
timeg += time1;
|
||||
|
||||
}
|
||||
|
||||
timeg /= loops;
|
||||
|
||||
#ifdef COMPLEX
|
||||
fprintf(stderr, " %10.2f MFlops\n", 6. * (double)m / timeg * 1.e-6);
|
||||
#else
|
||||
fprintf(stderr, " %10.2f MFlops\n", 1. * (double)m / timeg * 1.e-6);
|
||||
#endif
|
||||
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
56
benchmark/scripts/NUMPY/cgemm.py
Executable file
56
benchmark/scripts/NUMPY/cgemm.py
Executable file
@@ -0,0 +1,56 @@
|
||||
#!/usr/bin/python
|
||||
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
import numpy
|
||||
from numpy.random import randn
|
||||
|
||||
def run_cgemm(N,l):
|
||||
|
||||
A = randn(N,N).astype('float32') + randn(N,N).astype('float32') * 1j;
|
||||
B = randn(N,N).astype('float32') + randn(N,N).astype('float32') * 1j;
|
||||
|
||||
start = time.time();
|
||||
for i in range(0,l):
|
||||
ref = numpy.dot(A,B)
|
||||
end = time.time()
|
||||
|
||||
timediff = (end -start)
|
||||
mflops = ( 8*N*N*N) *l / timediff
|
||||
mflops *= 1e-6
|
||||
|
||||
size = "%dx%d" % (N,N)
|
||||
print("%14s :\t%20f MFlops\t%20f sec" % (size,mflops,timediff))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
N=128
|
||||
NMAX=2048
|
||||
NINC=128
|
||||
LOOPS=1
|
||||
|
||||
z=0
|
||||
for arg in sys.argv:
|
||||
if z == 1:
|
||||
N = int(arg)
|
||||
elif z == 2:
|
||||
NMAX = int(arg)
|
||||
elif z == 3:
|
||||
NINC = int(arg)
|
||||
elif z == 4:
|
||||
LOOPS = int(arg)
|
||||
|
||||
z = z + 1
|
||||
|
||||
if 'OPENBLAS_LOOPS' in os.environ:
|
||||
p = os.environ['OPENBLAS_LOOPS']
|
||||
if p:
|
||||
LOOPS = int(p);
|
||||
|
||||
print("From: %d To: %d Step=%d Loops=%d" % (N, NMAX, NINC, LOOPS))
|
||||
print("\tSIZE\t\t\tFlops\t\t\t\t\tTime")
|
||||
|
||||
for i in range (N,NMAX+NINC,NINC):
|
||||
run_cgemm(i,LOOPS)
|
||||
|
||||
56
benchmark/scripts/NUMPY/cgemv.py
Executable file
56
benchmark/scripts/NUMPY/cgemv.py
Executable file
@@ -0,0 +1,56 @@
|
||||
#!/usr/bin/python
|
||||
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
import numpy
|
||||
from numpy.random import randn
|
||||
|
||||
def run_cgemv(N,l):
|
||||
|
||||
A = randn(N,N).astype('float32') + randn(N,N).astype('float32') * 1j;
|
||||
B = randn(N).astype('float32') + randn(N).astype('float32') * 1j;
|
||||
|
||||
start = time.time();
|
||||
for i in range(0,l):
|
||||
ref = numpy.dot(A,B)
|
||||
end = time.time()
|
||||
|
||||
timediff = (end -start)
|
||||
mflops = ( 8*N*N) *l / timediff
|
||||
mflops *= 1e-6
|
||||
|
||||
size = "%dx%d" % (N,N)
|
||||
print("%14s :\t%20f MFlops\t%20f sec" % (size,mflops,timediff))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
N=128
|
||||
NMAX=2048
|
||||
NINC=128
|
||||
LOOPS=1
|
||||
|
||||
z=0
|
||||
for arg in sys.argv:
|
||||
if z == 1:
|
||||
N = int(arg)
|
||||
elif z == 2:
|
||||
NMAX = int(arg)
|
||||
elif z == 3:
|
||||
NINC = int(arg)
|
||||
elif z == 4:
|
||||
LOOPS = int(arg)
|
||||
|
||||
z = z + 1
|
||||
|
||||
if 'OPENBLAS_LOOPS' in os.environ:
|
||||
p = os.environ['OPENBLAS_LOOPS']
|
||||
if p:
|
||||
LOOPS = int(p);
|
||||
|
||||
print("From: %d To: %d Step=%d Loops=%d" % (N, NMAX, NINC, LOOPS))
|
||||
print("\tSIZE\t\t\tFlops\t\t\t\t\tTime")
|
||||
|
||||
for i in range (N,NMAX+NINC,NINC):
|
||||
run_cgemv(i,LOOPS)
|
||||
|
||||
58
benchmark/scripts/NUMPY/daxpy.py
Executable file
58
benchmark/scripts/NUMPY/daxpy.py
Executable file
@@ -0,0 +1,58 @@
|
||||
#!/usr/bin/python
|
||||
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
import numpy
|
||||
from numpy.random import randn
|
||||
from scipy.linalg.blas import daxpy
|
||||
|
||||
|
||||
def run_daxpy(N,l):
|
||||
|
||||
x = randn(N).astype('float64')
|
||||
y = randn(N).astype('float64')
|
||||
|
||||
start = time.time();
|
||||
for i in range(0,l):
|
||||
y = daxpy(x,y, a=2.0 )
|
||||
end = time.time()
|
||||
|
||||
timediff = (end -start)
|
||||
mflops = ( 2*N ) *l / timediff
|
||||
mflops *= 1e-6
|
||||
|
||||
size = "%d" % (N)
|
||||
print("%14s :\t%20f MFlops\t%20f sec" % (size,mflops,timediff))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
N=128
|
||||
NMAX=2048
|
||||
NINC=128
|
||||
LOOPS=1
|
||||
|
||||
z=0
|
||||
for arg in sys.argv:
|
||||
if z == 1:
|
||||
N = int(arg)
|
||||
elif z == 2:
|
||||
NMAX = int(arg)
|
||||
elif z == 3:
|
||||
NINC = int(arg)
|
||||
elif z == 4:
|
||||
LOOPS = int(arg)
|
||||
|
||||
z = z + 1
|
||||
|
||||
if 'OPENBLAS_LOOPS' in os.environ:
|
||||
p = os.environ['OPENBLAS_LOOPS']
|
||||
if p:
|
||||
LOOPS = int(p);
|
||||
|
||||
print("From: %d To: %d Step=%d Loops=%d" % (N, NMAX, NINC, LOOPS))
|
||||
print("\tSIZE\t\t\tFlops\t\t\t\t\tTime")
|
||||
|
||||
for i in range (N,NMAX+NINC,NINC):
|
||||
run_daxpy(i,LOOPS)
|
||||
|
||||
56
benchmark/scripts/NUMPY/ddot.py
Executable file
56
benchmark/scripts/NUMPY/ddot.py
Executable file
@@ -0,0 +1,56 @@
|
||||
#!/usr/bin/python
|
||||
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
import numpy
|
||||
from numpy.random import randn
|
||||
|
||||
def run_ddot(N,l):
|
||||
|
||||
A = randn(N).astype('float64')
|
||||
B = randn(N).astype('float64')
|
||||
|
||||
start = time.time();
|
||||
for i in range(0,l):
|
||||
ref = numpy.dot(A,B)
|
||||
end = time.time()
|
||||
|
||||
timediff = (end -start)
|
||||
mflops = ( 2*N ) *l / timediff
|
||||
mflops *= 1e-6
|
||||
|
||||
size = "%d" % (N)
|
||||
print("%14s :\t%20f MFlops\t%20f sec" % (size,mflops,timediff))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
N=128
|
||||
NMAX=2048
|
||||
NINC=128
|
||||
LOOPS=1
|
||||
|
||||
z=0
|
||||
for arg in sys.argv:
|
||||
if z == 1:
|
||||
N = int(arg)
|
||||
elif z == 2:
|
||||
NMAX = int(arg)
|
||||
elif z == 3:
|
||||
NINC = int(arg)
|
||||
elif z == 4:
|
||||
LOOPS = int(arg)
|
||||
|
||||
z = z + 1
|
||||
|
||||
if 'OPENBLAS_LOOPS' in os.environ:
|
||||
p = os.environ['OPENBLAS_LOOPS']
|
||||
if p:
|
||||
LOOPS = int(p);
|
||||
|
||||
print("From: %d To: %d Step=%d Loops=%d" % (N, NMAX, NINC, LOOPS))
|
||||
print("\tSIZE\t\t\tFlops\t\t\t\t\tTime")
|
||||
|
||||
for i in range (N,NMAX+NINC,NINC):
|
||||
run_ddot(i,LOOPS)
|
||||
|
||||
55
benchmark/scripts/NUMPY/deig.py
Executable file
55
benchmark/scripts/NUMPY/deig.py
Executable file
@@ -0,0 +1,55 @@
|
||||
#!/usr/bin/python
|
||||
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
import numpy
|
||||
from numpy.random import randn
|
||||
|
||||
def run_deig(N,l):
|
||||
|
||||
A = randn(N,N).astype('float64')
|
||||
|
||||
start = time.time();
|
||||
for i in range(0,l):
|
||||
la,v = numpy.linalg.eig(A)
|
||||
end = time.time()
|
||||
|
||||
timediff = (end -start)
|
||||
mflops = ( 26.33 *N*N*N) *l / timediff
|
||||
mflops *= 1e-6
|
||||
|
||||
size = "%dx%d" % (N,N)
|
||||
print("%14s :\t%20f MFlops\t%20f sec" % (size,mflops,timediff))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
N=128
|
||||
NMAX=2048
|
||||
NINC=128
|
||||
LOOPS=1
|
||||
|
||||
z=0
|
||||
for arg in sys.argv:
|
||||
if z == 1:
|
||||
N = int(arg)
|
||||
elif z == 2:
|
||||
NMAX = int(arg)
|
||||
elif z == 3:
|
||||
NINC = int(arg)
|
||||
elif z == 4:
|
||||
LOOPS = int(arg)
|
||||
|
||||
z = z + 1
|
||||
|
||||
if 'OPENBLAS_LOOPS' in os.environ:
|
||||
p = os.environ['OPENBLAS_LOOPS']
|
||||
if p:
|
||||
LOOPS = int(p);
|
||||
|
||||
print("From: %d To: %d Step=%d Loops=%d" % (N, NMAX, NINC, LOOPS))
|
||||
print("\tSIZE\t\t\tFlops\t\t\t\t\tTime")
|
||||
|
||||
for i in range (N,NMAX+NINC,NINC):
|
||||
run_deig(i,LOOPS)
|
||||
|
||||
56
benchmark/scripts/NUMPY/dgemm.py
Executable file
56
benchmark/scripts/NUMPY/dgemm.py
Executable file
@@ -0,0 +1,56 @@
|
||||
#!/usr/bin/python
|
||||
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
import numpy
|
||||
from numpy.random import randn
|
||||
|
||||
def run_dgemm(N,l):
|
||||
|
||||
A = randn(N,N).astype('float64')
|
||||
B = randn(N,N).astype('float64')
|
||||
|
||||
start = time.time();
|
||||
for i in range(0,l):
|
||||
ref = numpy.dot(A,B)
|
||||
end = time.time()
|
||||
|
||||
timediff = (end -start)
|
||||
mflops = ( 2*N*N*N) *l / timediff
|
||||
mflops *= 1e-6
|
||||
|
||||
size = "%dx%d" % (N,N)
|
||||
print("%14s :\t%20f MFlops\t%20f sec" % (size,mflops,timediff))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
N=128
|
||||
NMAX=2048
|
||||
NINC=128
|
||||
LOOPS=1
|
||||
|
||||
z=0
|
||||
for arg in sys.argv:
|
||||
if z == 1:
|
||||
N = int(arg)
|
||||
elif z == 2:
|
||||
NMAX = int(arg)
|
||||
elif z == 3:
|
||||
NINC = int(arg)
|
||||
elif z == 4:
|
||||
LOOPS = int(arg)
|
||||
|
||||
z = z + 1
|
||||
|
||||
if 'OPENBLAS_LOOPS' in os.environ:
|
||||
p = os.environ['OPENBLAS_LOOPS']
|
||||
if p:
|
||||
LOOPS = int(p);
|
||||
|
||||
print("From: %d To: %d Step=%d Loops=%d" % (N, NMAX, NINC, LOOPS))
|
||||
print("\tSIZE\t\t\tFlops\t\t\t\t\tTime")
|
||||
|
||||
for i in range (N,NMAX+NINC,NINC):
|
||||
run_dgemm(i,LOOPS)
|
||||
|
||||
56
benchmark/scripts/NUMPY/dgemv.py
Executable file
56
benchmark/scripts/NUMPY/dgemv.py
Executable file
@@ -0,0 +1,56 @@
|
||||
#!/usr/bin/python
|
||||
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
import numpy
|
||||
from numpy.random import randn
|
||||
|
||||
def run_dgemv(N,l):
|
||||
|
||||
A = randn(N,N).astype('float64')
|
||||
B = randn(N).astype('float64')
|
||||
|
||||
start = time.time();
|
||||
for i in range(0,l):
|
||||
ref = numpy.dot(A,B)
|
||||
end = time.time()
|
||||
|
||||
timediff = (end -start)
|
||||
mflops = ( 2*N*N) *l / timediff
|
||||
mflops *= 1e-6
|
||||
|
||||
size = "%dx%d" % (N,N)
|
||||
print("%14s :\t%20f MFlops\t%20f sec" % (size,mflops,timediff))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
N=128
|
||||
NMAX=2048
|
||||
NINC=128
|
||||
LOOPS=1
|
||||
|
||||
z=0
|
||||
for arg in sys.argv:
|
||||
if z == 1:
|
||||
N = int(arg)
|
||||
elif z == 2:
|
||||
NMAX = int(arg)
|
||||
elif z == 3:
|
||||
NINC = int(arg)
|
||||
elif z == 4:
|
||||
LOOPS = int(arg)
|
||||
|
||||
z = z + 1
|
||||
|
||||
if 'OPENBLAS_LOOPS' in os.environ:
|
||||
p = os.environ['OPENBLAS_LOOPS']
|
||||
if p:
|
||||
LOOPS = int(p);
|
||||
|
||||
print("From: %d To: %d Step=%d Loops=%d" % (N, NMAX, NINC, LOOPS))
|
||||
print("\tSIZE\t\t\tFlops\t\t\t\t\tTime")
|
||||
|
||||
for i in range (N,NMAX+NINC,NINC):
|
||||
run_dgemv(i,LOOPS)
|
||||
|
||||
58
benchmark/scripts/NUMPY/dgesv.py
Executable file
58
benchmark/scripts/NUMPY/dgesv.py
Executable file
@@ -0,0 +1,58 @@
|
||||
#!/usr/bin/python
|
||||
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
import numpy
|
||||
from numpy.random import randn
|
||||
from scipy.linalg.lapack import dgesv
|
||||
|
||||
def run_dgesv(N,l):
|
||||
|
||||
a = randn(N,N).astype('float64')
|
||||
b = randn(N,N).astype('float64')
|
||||
|
||||
start = time.time();
|
||||
for i in range(0,l):
|
||||
dgesv(a,b,1,1)
|
||||
end = time.time()
|
||||
|
||||
timediff = (end -start)
|
||||
|
||||
mflops = ( 2.0/3.0 *N*N*N + 2.0*N*N*N) *l / timediff
|
||||
mflops *= 1e-6
|
||||
|
||||
size = "%dx%d" % (N,N)
|
||||
print("%14s :\t%20f MFlops\t%20f sec" % (size,mflops,timediff))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
N=128
|
||||
NMAX=2048
|
||||
NINC=128
|
||||
LOOPS=1
|
||||
|
||||
z=0
|
||||
for arg in sys.argv:
|
||||
if z == 1:
|
||||
N = int(arg)
|
||||
elif z == 2:
|
||||
NMAX = int(arg)
|
||||
elif z == 3:
|
||||
NINC = int(arg)
|
||||
elif z == 4:
|
||||
LOOPS = int(arg)
|
||||
|
||||
z = z + 1
|
||||
|
||||
if 'OPENBLAS_LOOPS' in os.environ:
|
||||
p = os.environ['OPENBLAS_LOOPS']
|
||||
if p:
|
||||
LOOPS = int(p);
|
||||
|
||||
print("From: %d To: %d Step=%d Loops=%d" % (N, NMAX, NINC, LOOPS))
|
||||
print("\tSIZE\t\t\tFlops\t\t\t\t\tTime")
|
||||
|
||||
for i in range (N,NMAX+NINC,NINC):
|
||||
run_dgesv(i,LOOPS)
|
||||
|
||||
56
benchmark/scripts/NUMPY/dsolve.py
Executable file
56
benchmark/scripts/NUMPY/dsolve.py
Executable file
@@ -0,0 +1,56 @@
|
||||
#!/usr/bin/python
|
||||
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
import numpy
|
||||
from numpy.random import randn
|
||||
|
||||
def run_dsolve(N,l):
|
||||
|
||||
A = randn(N,N).astype('float64')
|
||||
B = randn(N,N).astype('float64')
|
||||
|
||||
start = time.time();
|
||||
for i in range(0,l):
|
||||
ref = numpy.linalg.solve(A,B)
|
||||
end = time.time()
|
||||
|
||||
timediff = (end -start)
|
||||
mflops = ( 2.0/3.0 *N*N*N + 2.0*N*N*N) *l / timediff
|
||||
mflops *= 1e-6
|
||||
|
||||
size = "%dx%d" % (N,N)
|
||||
print("%14s :\t%20f MFlops\t%20f sec" % (size,mflops,timediff))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
N=128
|
||||
NMAX=2048
|
||||
NINC=128
|
||||
LOOPS=1
|
||||
|
||||
z=0
|
||||
for arg in sys.argv:
|
||||
if z == 1:
|
||||
N = int(arg)
|
||||
elif z == 2:
|
||||
NMAX = int(arg)
|
||||
elif z == 3:
|
||||
NINC = int(arg)
|
||||
elif z == 4:
|
||||
LOOPS = int(arg)
|
||||
|
||||
z = z + 1
|
||||
|
||||
if 'OPENBLAS_LOOPS' in os.environ:
|
||||
p = os.environ['OPENBLAS_LOOPS']
|
||||
if p:
|
||||
LOOPS = int(p);
|
||||
|
||||
print("From: %d To: %d Step=%d Loops=%d" % (N, NMAX, NINC, LOOPS))
|
||||
print("\tSIZE\t\t\tFlops\t\t\t\t\tTime")
|
||||
|
||||
for i in range (N,NMAX+NINC,NINC):
|
||||
run_dsolve(i,LOOPS)
|
||||
|
||||
56
benchmark/scripts/NUMPY/sdot.py
Executable file
56
benchmark/scripts/NUMPY/sdot.py
Executable file
@@ -0,0 +1,56 @@
|
||||
#!/usr/bin/python
|
||||
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
import numpy
|
||||
from numpy.random import randn
|
||||
|
||||
def run_sdot(N,l):
|
||||
|
||||
A = randn(N).astype('float32')
|
||||
B = randn(N).astype('float32')
|
||||
|
||||
start = time.time();
|
||||
for i in range(0,l):
|
||||
ref = numpy.dot(A,B)
|
||||
end = time.time()
|
||||
|
||||
timediff = (end -start)
|
||||
mflops = ( 2*N ) *l / timediff
|
||||
mflops *= 1e-6
|
||||
|
||||
size = "%d" % (N)
|
||||
print("%14s :\t%20f MFlops\t%20f sec" % (size,mflops,timediff))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
N=128
|
||||
NMAX=2048
|
||||
NINC=128
|
||||
LOOPS=1
|
||||
|
||||
z=0
|
||||
for arg in sys.argv:
|
||||
if z == 1:
|
||||
N = int(arg)
|
||||
elif z == 2:
|
||||
NMAX = int(arg)
|
||||
elif z == 3:
|
||||
NINC = int(arg)
|
||||
elif z == 4:
|
||||
LOOPS = int(arg)
|
||||
|
||||
z = z + 1
|
||||
|
||||
if 'OPENBLAS_LOOPS' in os.environ:
|
||||
p = os.environ['OPENBLAS_LOOPS']
|
||||
if p:
|
||||
LOOPS = int(p);
|
||||
|
||||
print("From: %d To: %d Step=%d Loops=%d" % (N, NMAX, NINC, LOOPS))
|
||||
print("\tSIZE\t\t\tFlops\t\t\t\t\tTime")
|
||||
|
||||
for i in range (N,NMAX+NINC,NINC):
|
||||
run_sdot(i,LOOPS)
|
||||
|
||||
56
benchmark/scripts/NUMPY/sgemm.py
Executable file
56
benchmark/scripts/NUMPY/sgemm.py
Executable file
@@ -0,0 +1,56 @@
|
||||
#!/usr/bin/python
|
||||
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
import numpy
|
||||
from numpy.random import randn
|
||||
|
||||
def run_sgemm(N,l):
|
||||
|
||||
A = randn(N,N).astype('float32')
|
||||
B = randn(N,N).astype('float32')
|
||||
|
||||
start = time.time();
|
||||
for i in range(0,l):
|
||||
ref = numpy.dot(A,B)
|
||||
end = time.time()
|
||||
|
||||
timediff = (end -start)
|
||||
mflops = ( 2*N*N*N) *l / timediff
|
||||
mflops *= 1e-6
|
||||
|
||||
size = "%dx%d" % (N,N)
|
||||
print("%14s :\t%20f MFlops\t%20f sec" % (size,mflops,timediff))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
N=128
|
||||
NMAX=2048
|
||||
NINC=128
|
||||
LOOPS=1
|
||||
|
||||
z=0
|
||||
for arg in sys.argv:
|
||||
if z == 1:
|
||||
N = int(arg)
|
||||
elif z == 2:
|
||||
NMAX = int(arg)
|
||||
elif z == 3:
|
||||
NINC = int(arg)
|
||||
elif z == 4:
|
||||
LOOPS = int(arg)
|
||||
|
||||
z = z + 1
|
||||
|
||||
if 'OPENBLAS_LOOPS' in os.environ:
|
||||
p = os.environ['OPENBLAS_LOOPS']
|
||||
if p:
|
||||
LOOPS = int(p);
|
||||
|
||||
print("From: %d To: %d Step=%d Loops=%d" % (N, NMAX, NINC, LOOPS))
|
||||
print("\tSIZE\t\t\tFlops\t\t\t\t\tTime")
|
||||
|
||||
for i in range (N,NMAX+NINC,NINC):
|
||||
run_sgemm(i,LOOPS)
|
||||
|
||||
56
benchmark/scripts/NUMPY/sgemv.py
Executable file
56
benchmark/scripts/NUMPY/sgemv.py
Executable file
@@ -0,0 +1,56 @@
|
||||
#!/usr/bin/python
|
||||
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
import numpy
|
||||
from numpy.random import randn
|
||||
|
||||
def run_sgemv(N,l):
|
||||
|
||||
A = randn(N,N).astype('float32')
|
||||
B = randn(N).astype('float32')
|
||||
|
||||
start = time.time();
|
||||
for i in range(0,l):
|
||||
ref = numpy.dot(A,B)
|
||||
end = time.time()
|
||||
|
||||
timediff = (end -start)
|
||||
mflops = ( 2*N*N) *l / timediff
|
||||
mflops *= 1e-6
|
||||
|
||||
size = "%dx%d" % (N,N)
|
||||
print("%14s :\t%20f MFlops\t%20f sec" % (size,mflops,timediff))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
N=128
|
||||
NMAX=2048
|
||||
NINC=128
|
||||
LOOPS=1
|
||||
|
||||
z=0
|
||||
for arg in sys.argv:
|
||||
if z == 1:
|
||||
N = int(arg)
|
||||
elif z == 2:
|
||||
NMAX = int(arg)
|
||||
elif z == 3:
|
||||
NINC = int(arg)
|
||||
elif z == 4:
|
||||
LOOPS = int(arg)
|
||||
|
||||
z = z + 1
|
||||
|
||||
if 'OPENBLAS_LOOPS' in os.environ:
|
||||
p = os.environ['OPENBLAS_LOOPS']
|
||||
if p:
|
||||
LOOPS = int(p);
|
||||
|
||||
print("From: %d To: %d Step=%d Loops=%d" % (N, NMAX, NINC, LOOPS))
|
||||
print("\tSIZE\t\t\tFlops\t\t\t\t\tTime")
|
||||
|
||||
for i in range (N,NMAX+NINC,NINC):
|
||||
run_sgemv(i,LOOPS)
|
||||
|
||||
56
benchmark/scripts/NUMPY/zgemm.py
Executable file
56
benchmark/scripts/NUMPY/zgemm.py
Executable file
@@ -0,0 +1,56 @@
|
||||
#!/usr/bin/python
|
||||
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
import numpy
|
||||
from numpy.random import randn
|
||||
|
||||
def run_zgemm(N,l):
|
||||
|
||||
A = randn(N,N).astype('float64') + randn(N,N).astype('float64') * 1j;
|
||||
B = randn(N,N).astype('float64') + randn(N,N).astype('float64') * 1j;
|
||||
|
||||
start = time.time();
|
||||
for i in range(0,l):
|
||||
ref = numpy.dot(A,B)
|
||||
end = time.time()
|
||||
|
||||
timediff = (end -start)
|
||||
mflops = ( 8*N*N*N) *l / timediff
|
||||
mflops *= 1e-6
|
||||
|
||||
size = "%dx%d" % (N,N)
|
||||
print("%14s :\t%20f MFlops\t%20f sec" % (size,mflops,timediff))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
N=128
|
||||
NMAX=2048
|
||||
NINC=128
|
||||
LOOPS=1
|
||||
|
||||
z=0
|
||||
for arg in sys.argv:
|
||||
if z == 1:
|
||||
N = int(arg)
|
||||
elif z == 2:
|
||||
NMAX = int(arg)
|
||||
elif z == 3:
|
||||
NINC = int(arg)
|
||||
elif z == 4:
|
||||
LOOPS = int(arg)
|
||||
|
||||
z = z + 1
|
||||
|
||||
if 'OPENBLAS_LOOPS' in os.environ:
|
||||
p = os.environ['OPENBLAS_LOOPS']
|
||||
if p:
|
||||
LOOPS = int(p);
|
||||
|
||||
print("From: %d To: %d Step=%d Loops=%d" % (N, NMAX, NINC, LOOPS))
|
||||
print("\tSIZE\t\t\tFlops\t\t\t\t\tTime")
|
||||
|
||||
for i in range (N,NMAX+NINC,NINC):
|
||||
run_zgemm(i,LOOPS)
|
||||
|
||||
56
benchmark/scripts/NUMPY/zgemv.py
Executable file
56
benchmark/scripts/NUMPY/zgemv.py
Executable file
@@ -0,0 +1,56 @@
|
||||
#!/usr/bin/python
|
||||
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
import numpy
|
||||
from numpy.random import randn
|
||||
|
||||
def run_zgemv(N,l):
|
||||
|
||||
A = randn(N,N).astype('float64') + randn(N,N).astype('float64') * 1j;
|
||||
B = randn(N).astype('float64') + randn(N).astype('float64') * 1j;
|
||||
|
||||
start = time.time();
|
||||
for i in range(0,l):
|
||||
ref = numpy.dot(A,B)
|
||||
end = time.time()
|
||||
|
||||
timediff = (end -start)
|
||||
mflops = ( 8*N*N) *l / timediff
|
||||
mflops *= 1e-6
|
||||
|
||||
size = "%dx%d" % (N,N)
|
||||
print("%14s :\t%20f MFlops\t%20f sec" % (size,mflops,timediff))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
N=128
|
||||
NMAX=2048
|
||||
NINC=128
|
||||
LOOPS=1
|
||||
|
||||
z=0
|
||||
for arg in sys.argv:
|
||||
if z == 1:
|
||||
N = int(arg)
|
||||
elif z == 2:
|
||||
NMAX = int(arg)
|
||||
elif z == 3:
|
||||
NINC = int(arg)
|
||||
elif z == 4:
|
||||
LOOPS = int(arg)
|
||||
|
||||
z = z + 1
|
||||
|
||||
if 'OPENBLAS_LOOPS' in os.environ:
|
||||
p = os.environ['OPENBLAS_LOOPS']
|
||||
if p:
|
||||
LOOPS = int(p);
|
||||
|
||||
print("From: %d To: %d Step=%d Loops=%d" % (N, NMAX, NINC, LOOPS))
|
||||
print("\tSIZE\t\t\tFlops\t\t\t\t\tTime")
|
||||
|
||||
for i in range (N,NMAX+NINC,NINC):
|
||||
run_zgemv(i,LOOPS)
|
||||
|
||||
56
benchmark/scripts/OCTAVE/cgemm.m
Executable file
56
benchmark/scripts/OCTAVE/cgemm.m
Executable file
@@ -0,0 +1,56 @@
|
||||
#!/usr/bin/octave --silent
|
||||
|
||||
nfrom = 128 ;
|
||||
nto = 2048;
|
||||
nstep = 128;
|
||||
loops = 1;
|
||||
|
||||
|
||||
arg_list = argv();
|
||||
for i = 1:nargin
|
||||
|
||||
switch(i)
|
||||
case 1
|
||||
nfrom = str2num(arg_list{i});
|
||||
case 2
|
||||
nto = str2num(arg_list{i});
|
||||
case 3
|
||||
nstep = str2num(arg_list{i});
|
||||
case 4
|
||||
loops = str2num(arg_list{i});
|
||||
|
||||
endswitch
|
||||
|
||||
endfor
|
||||
|
||||
p = getenv("OPENBLAS_LOOPS");
|
||||
if p
|
||||
loops = str2num(p);
|
||||
endif
|
||||
|
||||
printf("From %d To %d Step=%d Loops=%d\n",nfrom, nto, nstep, loops);
|
||||
printf(" SIZE FLOPS TIME\n");
|
||||
|
||||
n = nfrom;
|
||||
while n <= nto
|
||||
|
||||
A = single(rand(n,n)) + single(rand(n,n)) * 1i;
|
||||
B = single(rand(n,n)) + single(rand(n,n)) * 1i;
|
||||
start = clock();
|
||||
|
||||
l=0;
|
||||
while l < loops
|
||||
|
||||
C = A * B;
|
||||
l = l + 1;
|
||||
|
||||
endwhile
|
||||
|
||||
timeg = etime(clock(), start);
|
||||
mflops = ( 4.0 * 2.0*n*n*n *loops ) / ( timeg * 1.0e6 );
|
||||
|
||||
st1 = sprintf("%dx%d : ", n,n);
|
||||
printf("%20s %10.2f MFlops %10.6f sec\n", st1, mflops, timeg);
|
||||
n = n + nstep;
|
||||
|
||||
endwhile
|
||||
56
benchmark/scripts/OCTAVE/cgemv.m
Executable file
56
benchmark/scripts/OCTAVE/cgemv.m
Executable file
@@ -0,0 +1,56 @@
|
||||
#!/usr/bin/octave --silent
|
||||
|
||||
nfrom = 128 ;
|
||||
nto = 2048;
|
||||
nstep = 128;
|
||||
loops = 1;
|
||||
|
||||
|
||||
arg_list = argv();
|
||||
for i = 1:nargin
|
||||
|
||||
switch(i)
|
||||
case 1
|
||||
nfrom = str2num(arg_list{i});
|
||||
case 2
|
||||
nto = str2num(arg_list{i});
|
||||
case 3
|
||||
nstep = str2num(arg_list{i});
|
||||
case 4
|
||||
loops = str2num(arg_list{i});
|
||||
|
||||
endswitch
|
||||
|
||||
endfor
|
||||
|
||||
p = getenv("OPENBLAS_LOOPS");
|
||||
if p
|
||||
loops = str2num(p);
|
||||
endif
|
||||
|
||||
printf("From %d To %d Step=%d Loops=%d\n",nfrom, nto, nstep, loops);
|
||||
printf(" SIZE FLOPS TIME\n");
|
||||
|
||||
n = nfrom;
|
||||
while n <= nto
|
||||
|
||||
A = single(rand(n,n)) + single(rand(n,n)) * 1i;
|
||||
B = single(rand(n,1)) + single(rand(n,1)) * 1i;
|
||||
start = clock();
|
||||
|
||||
l=0;
|
||||
while l < loops
|
||||
|
||||
C = A * B;
|
||||
l = l + 1;
|
||||
|
||||
endwhile
|
||||
|
||||
timeg = etime(clock(), start);
|
||||
mflops = ( 4.0 * 2.0*n*n *loops ) / ( timeg * 1.0e6 );
|
||||
|
||||
st1 = sprintf("%dx%d : ", n,n);
|
||||
printf("%20s %10.2f MFlops %10.6f sec\n", st1, mflops, timeg);
|
||||
n = n + nstep;
|
||||
|
||||
endwhile
|
||||
56
benchmark/scripts/OCTAVE/deig.m
Executable file
56
benchmark/scripts/OCTAVE/deig.m
Executable file
@@ -0,0 +1,56 @@
|
||||
#!/usr/bin/octave --silent
|
||||
|
||||
nfrom = 128 ;
|
||||
nto = 2048;
|
||||
nstep = 128;
|
||||
loops = 1;
|
||||
|
||||
|
||||
arg_list = argv();
|
||||
for i = 1:nargin
|
||||
|
||||
switch(i)
|
||||
case 1
|
||||
nfrom = str2num(arg_list{i});
|
||||
case 2
|
||||
nto = str2num(arg_list{i});
|
||||
case 3
|
||||
nstep = str2num(arg_list{i});
|
||||
case 4
|
||||
loops = str2num(arg_list{i});
|
||||
|
||||
endswitch
|
||||
|
||||
endfor
|
||||
|
||||
p = getenv("OPENBLAS_LOOPS");
|
||||
if p
|
||||
loops = str2num(p);
|
||||
endif
|
||||
|
||||
printf("From %d To %d Step=%d Loops=%d\n",nfrom, nto, nstep, loops);
|
||||
printf(" SIZE FLOPS TIME\n");
|
||||
|
||||
n = nfrom;
|
||||
while n <= nto
|
||||
|
||||
A = double(rand(n,n));
|
||||
start = clock();
|
||||
|
||||
l=0;
|
||||
while l < loops
|
||||
|
||||
[V,lambda] = eig(A);
|
||||
l = l + 1;
|
||||
|
||||
endwhile
|
||||
|
||||
|
||||
timeg = etime(clock(), start);
|
||||
mflops = ( 26.33 *n*n*n ) *loops / ( timeg * 1.0e6 );
|
||||
|
||||
st1 = sprintf("%dx%d : ", n,n);
|
||||
printf("%20s %10.2f MFlops %10.6f sec\n", st1, mflops, timeg );
|
||||
n = n + nstep;
|
||||
|
||||
endwhile
|
||||
56
benchmark/scripts/OCTAVE/dgemm.m
Executable file
56
benchmark/scripts/OCTAVE/dgemm.m
Executable file
@@ -0,0 +1,56 @@
|
||||
#!/usr/bin/octave --silent
|
||||
|
||||
nfrom = 128 ;
|
||||
nto = 2048;
|
||||
nstep = 128;
|
||||
loops = 1;
|
||||
|
||||
|
||||
arg_list = argv();
|
||||
for i = 1:nargin
|
||||
|
||||
switch(i)
|
||||
case 1
|
||||
nfrom = str2num(arg_list{i});
|
||||
case 2
|
||||
nto = str2num(arg_list{i});
|
||||
case 3
|
||||
nstep = str2num(arg_list{i});
|
||||
case 4
|
||||
loops = str2num(arg_list{i});
|
||||
|
||||
endswitch
|
||||
|
||||
endfor
|
||||
|
||||
p = getenv("OPENBLAS_LOOPS");
|
||||
if p
|
||||
loops = str2num(p);
|
||||
endif
|
||||
|
||||
printf("From %d To %d Step=%d Loops=%d\n",nfrom, nto, nstep, loops);
|
||||
printf(" SIZE FLOPS TIME\n");
|
||||
|
||||
n = nfrom;
|
||||
while n <= nto
|
||||
|
||||
A = double(rand(n,n));
|
||||
B = double(rand(n,n));
|
||||
start = clock();
|
||||
|
||||
l=0;
|
||||
while l < loops
|
||||
|
||||
C = A * B;
|
||||
l = l + 1;
|
||||
|
||||
endwhile
|
||||
|
||||
timeg = etime(clock(), start);
|
||||
mflops = ( 2.0*n*n*n *loops ) / ( timeg * 1.0e6 );
|
||||
|
||||
st1 = sprintf("%dx%d : ", n,n);
|
||||
printf("%20s %10.2f MFlops %10.6f sec\n", st1, mflops, timeg);
|
||||
n = n + nstep;
|
||||
|
||||
endwhile
|
||||
56
benchmark/scripts/OCTAVE/dgemv.m
Executable file
56
benchmark/scripts/OCTAVE/dgemv.m
Executable file
@@ -0,0 +1,56 @@
|
||||
#!/usr/bin/octave --silent
|
||||
|
||||
nfrom = 128 ;
|
||||
nto = 2048;
|
||||
nstep = 128;
|
||||
loops = 1;
|
||||
|
||||
|
||||
arg_list = argv();
|
||||
for i = 1:nargin
|
||||
|
||||
switch(i)
|
||||
case 1
|
||||
nfrom = str2num(arg_list{i});
|
||||
case 2
|
||||
nto = str2num(arg_list{i});
|
||||
case 3
|
||||
nstep = str2num(arg_list{i});
|
||||
case 4
|
||||
loops = str2num(arg_list{i});
|
||||
|
||||
endswitch
|
||||
|
||||
endfor
|
||||
|
||||
p = getenv("OPENBLAS_LOOPS");
|
||||
if p
|
||||
loops = str2num(p);
|
||||
endif
|
||||
|
||||
printf("From %d To %d Step=%d Loops=%d\n",nfrom, nto, nstep, loops);
|
||||
printf(" SIZE FLOPS TIME\n");
|
||||
|
||||
n = nfrom;
|
||||
while n <= nto
|
||||
|
||||
A = double(rand(n,n));
|
||||
B = double(rand(n,1));
|
||||
start = clock();
|
||||
|
||||
l=0;
|
||||
while l < loops
|
||||
|
||||
C = A * B;
|
||||
l = l + 1;
|
||||
|
||||
endwhile
|
||||
|
||||
timeg = etime(clock(), start);
|
||||
mflops = ( 2.0*n*n *loops ) / ( timeg * 1.0e6 );
|
||||
|
||||
st1 = sprintf("%dx%d : ", n,n);
|
||||
printf("%20s %10.2f MFlops %10.6f sec\n", st1, mflops, timeg);
|
||||
n = n + nstep;
|
||||
|
||||
endwhile
|
||||
59
benchmark/scripts/OCTAVE/dsolve.m
Executable file
59
benchmark/scripts/OCTAVE/dsolve.m
Executable file
@@ -0,0 +1,59 @@
|
||||
#!/usr/bin/octave --silent
|
||||
|
||||
nfrom = 128 ;
|
||||
nto = 2048;
|
||||
nstep = 128;
|
||||
loops = 1;
|
||||
|
||||
|
||||
arg_list = argv();
|
||||
for i = 1:nargin
|
||||
|
||||
switch(i)
|
||||
case 1
|
||||
nfrom = str2num(arg_list{i});
|
||||
case 2
|
||||
nto = str2num(arg_list{i});
|
||||
case 3
|
||||
nstep = str2num(arg_list{i});
|
||||
case 4
|
||||
loops = str2num(arg_list{i});
|
||||
|
||||
endswitch
|
||||
|
||||
endfor
|
||||
|
||||
p = getenv("OPENBLAS_LOOPS");
|
||||
if p
|
||||
loops = str2num(p);
|
||||
endif
|
||||
|
||||
printf("From %d To %d Step=%d Loops=%d\n",nfrom, nto, nstep, loops);
|
||||
printf(" SIZE FLOPS TIME\n");
|
||||
|
||||
n = nfrom;
|
||||
while n <= nto
|
||||
|
||||
A = double(rand(n,n));
|
||||
B = double(rand(n,n));
|
||||
start = clock();
|
||||
|
||||
l=0;
|
||||
while l < loops
|
||||
|
||||
x = linsolve(A,B);
|
||||
#x = A / B;
|
||||
l = l + 1;
|
||||
|
||||
endwhile
|
||||
|
||||
|
||||
timeg = etime(clock(), start);
|
||||
#r = norm(A*x - B)/norm(B)
|
||||
mflops = ( 2.0/3.0 *n*n*n + 2.0*n*n*n ) *loops / ( timeg * 1.0e6 );
|
||||
|
||||
st1 = sprintf("%dx%d : ", n,n);
|
||||
printf("%20s %10.2f MFlops %10.6f sec\n", st1, mflops, timeg );
|
||||
n = n + nstep;
|
||||
|
||||
endwhile
|
||||
56
benchmark/scripts/OCTAVE/sgemm.m
Executable file
56
benchmark/scripts/OCTAVE/sgemm.m
Executable file
@@ -0,0 +1,56 @@
|
||||
#!/usr/bin/octave --silent
|
||||
|
||||
nfrom = 128 ;
|
||||
nto = 2048;
|
||||
nstep = 128;
|
||||
loops = 1;
|
||||
|
||||
|
||||
arg_list = argv();
|
||||
for i = 1:nargin
|
||||
|
||||
switch(i)
|
||||
case 1
|
||||
nfrom = str2num(arg_list{i});
|
||||
case 2
|
||||
nto = str2num(arg_list{i});
|
||||
case 3
|
||||
nstep = str2num(arg_list{i});
|
||||
case 4
|
||||
loops = str2num(arg_list{i});
|
||||
|
||||
endswitch
|
||||
|
||||
endfor
|
||||
|
||||
p = getenv("OPENBLAS_LOOPS");
|
||||
if p
|
||||
loops = str2num(p);
|
||||
endif
|
||||
|
||||
printf("From %d To %d Step=%d Loops=%d\n",nfrom, nto, nstep, loops);
|
||||
printf(" SIZE FLOPS TIME\n");
|
||||
|
||||
n = nfrom;
|
||||
while n <= nto
|
||||
|
||||
A = single(rand(n,n));
|
||||
B = single(rand(n,n));
|
||||
start = clock();
|
||||
|
||||
l=0;
|
||||
while l < loops
|
||||
|
||||
C = A * B;
|
||||
l = l + 1;
|
||||
|
||||
endwhile
|
||||
|
||||
timeg = etime(clock(), start);
|
||||
mflops = ( 2.0*n*n*n *loops ) / ( timeg * 1.0e6 );
|
||||
|
||||
st1 = sprintf("%dx%d : ", n,n);
|
||||
printf("%20s %10.2f MFlops %10.6f sec\n", st1, mflops, timeg);
|
||||
n = n + nstep;
|
||||
|
||||
endwhile
|
||||
56
benchmark/scripts/OCTAVE/sgemv.m
Executable file
56
benchmark/scripts/OCTAVE/sgemv.m
Executable file
@@ -0,0 +1,56 @@
|
||||
#!/usr/bin/octave --silent
|
||||
|
||||
nfrom = 128 ;
|
||||
nto = 2048;
|
||||
nstep = 128;
|
||||
loops = 1;
|
||||
|
||||
|
||||
arg_list = argv();
|
||||
for i = 1:nargin
|
||||
|
||||
switch(i)
|
||||
case 1
|
||||
nfrom = str2num(arg_list{i});
|
||||
case 2
|
||||
nto = str2num(arg_list{i});
|
||||
case 3
|
||||
nstep = str2num(arg_list{i});
|
||||
case 4
|
||||
loops = str2num(arg_list{i});
|
||||
|
||||
endswitch
|
||||
|
||||
endfor
|
||||
|
||||
p = getenv("OPENBLAS_LOOPS");
|
||||
if p
|
||||
loops = str2num(p);
|
||||
endif
|
||||
|
||||
printf("From %d To %d Step=%d Loops=%d\n",nfrom, nto, nstep, loops);
|
||||
printf(" SIZE FLOPS TIME\n");
|
||||
|
||||
n = nfrom;
|
||||
while n <= nto
|
||||
|
||||
A = single(rand(n,n));
|
||||
B = single(rand(n,1));
|
||||
start = clock();
|
||||
|
||||
l=0;
|
||||
while l < loops
|
||||
|
||||
C = A * B;
|
||||
l = l + 1;
|
||||
|
||||
endwhile
|
||||
|
||||
timeg = etime(clock(), start);
|
||||
mflops = ( 2.0*n*n *loops ) / ( timeg * 1.0e6 );
|
||||
|
||||
st1 = sprintf("%dx%d : ", n,n);
|
||||
printf("%20s %10.2f MFlops %10.6f sec\n", st1, mflops, timeg);
|
||||
n = n + nstep;
|
||||
|
||||
endwhile
|
||||
56
benchmark/scripts/OCTAVE/zgemm.m
Executable file
56
benchmark/scripts/OCTAVE/zgemm.m
Executable file
@@ -0,0 +1,56 @@
|
||||
#!/usr/bin/octave --silent
|
||||
|
||||
nfrom = 128 ;
|
||||
nto = 2048;
|
||||
nstep = 128;
|
||||
loops = 1;
|
||||
|
||||
|
||||
arg_list = argv();
|
||||
for i = 1:nargin
|
||||
|
||||
switch(i)
|
||||
case 1
|
||||
nfrom = str2num(arg_list{i});
|
||||
case 2
|
||||
nto = str2num(arg_list{i});
|
||||
case 3
|
||||
nstep = str2num(arg_list{i});
|
||||
case 4
|
||||
loops = str2num(arg_list{i});
|
||||
|
||||
endswitch
|
||||
|
||||
endfor
|
||||
|
||||
p = getenv("OPENBLAS_LOOPS");
|
||||
if p
|
||||
loops = str2num(p);
|
||||
endif
|
||||
|
||||
printf("From %d To %d Step=%d Loops=%d\n",nfrom, nto, nstep, loops);
|
||||
printf(" SIZE FLOPS TIME\n");
|
||||
|
||||
n = nfrom;
|
||||
while n <= nto
|
||||
|
||||
A = double(rand(n,n)) + double(rand(n,n)) * 1i;
|
||||
B = double(rand(n,n)) + double(rand(n,n)) * 1i;
|
||||
start = clock();
|
||||
|
||||
l=0;
|
||||
while l < loops
|
||||
|
||||
C = A * B;
|
||||
l = l + 1;
|
||||
|
||||
endwhile
|
||||
|
||||
timeg = etime(clock(), start);
|
||||
mflops = ( 4.0 * 2.0*n*n*n *loops ) / ( timeg * 1.0e6 );
|
||||
|
||||
st1 = sprintf("%dx%d : ", n,n);
|
||||
printf("%20s %10.2f MFlops %10.6f sec\n", st1, mflops, timeg);
|
||||
n = n + nstep;
|
||||
|
||||
endwhile
|
||||
56
benchmark/scripts/OCTAVE/zgemv.m
Executable file
56
benchmark/scripts/OCTAVE/zgemv.m
Executable file
@@ -0,0 +1,56 @@
|
||||
#!/usr/bin/octave --silent
|
||||
|
||||
nfrom = 128 ;
|
||||
nto = 2048;
|
||||
nstep = 128;
|
||||
loops = 1;
|
||||
|
||||
|
||||
arg_list = argv();
|
||||
for i = 1:nargin
|
||||
|
||||
switch(i)
|
||||
case 1
|
||||
nfrom = str2num(arg_list{i});
|
||||
case 2
|
||||
nto = str2num(arg_list{i});
|
||||
case 3
|
||||
nstep = str2num(arg_list{i});
|
||||
case 4
|
||||
loops = str2num(arg_list{i});
|
||||
|
||||
endswitch
|
||||
|
||||
endfor
|
||||
|
||||
p = getenv("OPENBLAS_LOOPS");
|
||||
if p
|
||||
loops = str2num(p);
|
||||
endif
|
||||
|
||||
printf("From %d To %d Step=%d Loops=%d\n",nfrom, nto, nstep, loops);
|
||||
printf(" SIZE FLOPS TIME\n");
|
||||
|
||||
n = nfrom;
|
||||
while n <= nto
|
||||
|
||||
A = double(rand(n,n)) + double(rand(n,n)) * 1i;
|
||||
B = double(rand(n,1)) + double(rand(n,1)) * 1i;
|
||||
start = clock();
|
||||
|
||||
l=0;
|
||||
while l < loops
|
||||
|
||||
C = A * B;
|
||||
l = l + 1;
|
||||
|
||||
endwhile
|
||||
|
||||
timeg = etime(clock(), start);
|
||||
mflops = ( 4.0 * 2.0*n*n *loops ) / ( timeg * 1.0e6 );
|
||||
|
||||
st1 = sprintf("%dx%d : ", n,n);
|
||||
printf("%20s %10.2f MFlops %10.6f sec\n", st1, mflops, timeg);
|
||||
n = n + nstep;
|
||||
|
||||
endwhile
|
||||
62
benchmark/scripts/R/deig.R
Executable file
62
benchmark/scripts/R/deig.R
Executable file
@@ -0,0 +1,62 @@
|
||||
#!/usr/bin/Rscript
|
||||
|
||||
argv <- commandArgs(trailingOnly = TRUE)
|
||||
|
||||
nfrom = 128
|
||||
nto = 2048
|
||||
nstep = 128
|
||||
loops = 1
|
||||
|
||||
if ( length(argv) > 0 ) {
|
||||
|
||||
for ( z in 1:length(argv) ) {
|
||||
|
||||
if ( z == 1 ) {
|
||||
nfrom <- as.numeric(argv[z])
|
||||
} else if ( z==2 ) {
|
||||
nto <- as.numeric(argv[z])
|
||||
} else if ( z==3 ) {
|
||||
nstep <- as.numeric(argv[z])
|
||||
} else if ( z==4 ) {
|
||||
loops <- as.numeric(argv[z])
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
p=Sys.getenv("OPENBLAS_LOOPS")
|
||||
if ( p != "" ) {
|
||||
loops <- as.numeric(p)
|
||||
}
|
||||
|
||||
|
||||
cat(sprintf("From %.0f To %.0f Step=%.0f Loops=%.0f\n",nfrom, nto, nstep, loops))
|
||||
cat(sprintf(" SIZE Flops Time\n"))
|
||||
|
||||
n = nfrom
|
||||
while ( n <= nto ) {
|
||||
|
||||
A <- matrix(runif(n*n), ncol = n, nrow = n, byrow = TRUE)
|
||||
|
||||
l = 1
|
||||
|
||||
start <- proc.time()[3]
|
||||
|
||||
while ( l <= loops ) {
|
||||
|
||||
ev <- eigen(A)
|
||||
l = l + 1
|
||||
}
|
||||
|
||||
end <- proc.time()[3]
|
||||
timeg = end - start
|
||||
mflops = (26.66 *n*n*n ) * loops / ( timeg * 1.0e6 )
|
||||
|
||||
st = sprintf("%.0fx%.0f :",n , n)
|
||||
cat(sprintf("%20s %10.2f MFlops %10.6f sec\n", st, mflops, timeg))
|
||||
|
||||
n = n + nstep
|
||||
|
||||
}
|
||||
|
||||
|
||||
63
benchmark/scripts/R/dgemm.R
Executable file
63
benchmark/scripts/R/dgemm.R
Executable file
@@ -0,0 +1,63 @@
|
||||
#!/usr/bin/Rscript
|
||||
|
||||
argv <- commandArgs(trailingOnly = TRUE)
|
||||
|
||||
nfrom = 128
|
||||
nto = 2048
|
||||
nstep = 128
|
||||
loops = 1
|
||||
|
||||
if ( length(argv) > 0 ) {
|
||||
|
||||
for ( z in 1:length(argv) ) {
|
||||
|
||||
if ( z == 1 ) {
|
||||
nfrom <- as.numeric(argv[z])
|
||||
} else if ( z==2 ) {
|
||||
nto <- as.numeric(argv[z])
|
||||
} else if ( z==3 ) {
|
||||
nstep <- as.numeric(argv[z])
|
||||
} else if ( z==4 ) {
|
||||
loops <- as.numeric(argv[z])
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
p=Sys.getenv("OPENBLAS_LOOPS")
|
||||
if ( p != "" ) {
|
||||
loops <- as.numeric(p)
|
||||
}
|
||||
|
||||
|
||||
cat(sprintf("From %.0f To %.0f Step=%.0f Loops=%.0f\n",nfrom, nto, nstep, loops))
|
||||
cat(sprintf(" SIZE Flops Time\n"))
|
||||
|
||||
n = nfrom
|
||||
while ( n <= nto ) {
|
||||
|
||||
A <- matrix(runif(n*n), ncol = n, nrow = n, byrow = TRUE)
|
||||
B <- matrix(runif(n*n), ncol = n, nrow = n, byrow = TRUE)
|
||||
|
||||
l = 1
|
||||
|
||||
start <- proc.time()[3]
|
||||
|
||||
while ( l <= loops ) {
|
||||
|
||||
C <- A %*% B
|
||||
l = l + 1
|
||||
}
|
||||
|
||||
end <- proc.time()[3]
|
||||
timeg = end - start
|
||||
mflops = ( 2.0 *n*n*n ) * loops / ( timeg * 1.0e6 )
|
||||
|
||||
st = sprintf("%.0fx%.0f :",n , n)
|
||||
cat(sprintf("%20s %10.2f MFlops %10.6f sec\n", st, mflops, timeg))
|
||||
|
||||
n = n + nstep
|
||||
|
||||
}
|
||||
|
||||
|
||||
63
benchmark/scripts/R/dsolve.R
Executable file
63
benchmark/scripts/R/dsolve.R
Executable file
@@ -0,0 +1,63 @@
|
||||
#!/usr/bin/Rscript
|
||||
|
||||
argv <- commandArgs(trailingOnly = TRUE)
|
||||
|
||||
nfrom = 128
|
||||
nto = 2048
|
||||
nstep = 128
|
||||
loops = 1
|
||||
|
||||
if ( length(argv) > 0 ) {
|
||||
|
||||
for ( z in 1:length(argv) ) {
|
||||
|
||||
if ( z == 1 ) {
|
||||
nfrom <- as.numeric(argv[z])
|
||||
} else if ( z==2 ) {
|
||||
nto <- as.numeric(argv[z])
|
||||
} else if ( z==3 ) {
|
||||
nstep <- as.numeric(argv[z])
|
||||
} else if ( z==4 ) {
|
||||
loops <- as.numeric(argv[z])
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
p=Sys.getenv("OPENBLAS_LOOPS")
|
||||
if ( p != "" ) {
|
||||
loops <- as.numeric(p)
|
||||
}
|
||||
|
||||
|
||||
cat(sprintf("From %.0f To %.0f Step=%.0f Loops=%.0f\n",nfrom, nto, nstep, loops))
|
||||
cat(sprintf(" SIZE Flops Time\n"))
|
||||
|
||||
n = nfrom
|
||||
while ( n <= nto ) {
|
||||
|
||||
A <- matrix(runif(n*n), ncol = n, nrow = n, byrow = TRUE)
|
||||
B <- matrix(runif(n*n), ncol = n, nrow = n, byrow = TRUE)
|
||||
|
||||
l = 1
|
||||
|
||||
start <- proc.time()[3]
|
||||
|
||||
while ( l <= loops ) {
|
||||
|
||||
solve(A,B)
|
||||
l = l + 1
|
||||
}
|
||||
|
||||
end <- proc.time()[3]
|
||||
timeg = end - start
|
||||
mflops = (2.0/3.0 *n*n*n + 2.0 *n*n*n ) * loops / ( timeg * 1.0e6 )
|
||||
|
||||
st = sprintf("%.0fx%.0f :",n , n)
|
||||
cat(sprintf("%20s %10.2f MFlops %10.6f sec\n", st, mflops, timeg))
|
||||
|
||||
n = n + nstep
|
||||
|
||||
}
|
||||
|
||||
|
||||
201
benchmark/swap.c
Normal file
201
benchmark/swap.c
Normal file
@@ -0,0 +1,201 @@
|
||||
/***************************************************************************
|
||||
Copyright (c) 2014, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above swapright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above swapright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE SWAPRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#ifdef __CYGWIN32__
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
#include "common.h"
|
||||
|
||||
|
||||
#undef SWAP
|
||||
|
||||
#ifdef COMPLEX
|
||||
#ifdef DOUBLE
|
||||
#define SWAP BLASFUNC(zswap)
|
||||
#else
|
||||
#define SWAP BLASFUNC(cswap)
|
||||
#endif
|
||||
#else
|
||||
#ifdef DOUBLE
|
||||
#define SWAP BLASFUNC(dswap)
|
||||
#else
|
||||
#define SWAP BLASFUNC(sswap)
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(__WIN32__) || defined(__WIN64__)
|
||||
|
||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
||||
#endif
|
||||
|
||||
int gettimeofday(struct timeval *tv, void *tz){
|
||||
|
||||
FILETIME ft;
|
||||
unsigned __int64 tmpres = 0;
|
||||
static int tzflag;
|
||||
|
||||
if (NULL != tv)
|
||||
{
|
||||
GetSystemTimeAsFileTime(&ft);
|
||||
|
||||
tmpres |= ft.dwHighDateTime;
|
||||
tmpres <<= 32;
|
||||
tmpres |= ft.dwLowDateTime;
|
||||
|
||||
/*converting file time to unix epoch*/
|
||||
tmpres /= 10; /*convert into microseconds*/
|
||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
||||
|
||||
static void *huge_malloc(BLASLONG size){
|
||||
int shmid;
|
||||
void *address;
|
||||
|
||||
#ifndef SHM_HUGETLB
|
||||
#define SHM_HUGETLB 04000
|
||||
#endif
|
||||
|
||||
if ((shmid =shmget(IPC_PRIVATE,
|
||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
||||
printf( "Memory allocation failed(shmget).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
address = shmat(shmid, NULL, SHM_RND);
|
||||
|
||||
if ((BLASLONG)address == -1){
|
||||
printf( "Memory allocation failed(shmat).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
shmctl(shmid, IPC_RMID, 0);
|
||||
|
||||
return address;
|
||||
}
|
||||
|
||||
#define malloc huge_malloc
|
||||
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *x, *y;
|
||||
FLOAT alpha[2] = { 2.0, 2.0 };
|
||||
blasint m, i;
|
||||
blasint inc_x=1,inc_y=1;
|
||||
int loops = 1;
|
||||
int l;
|
||||
char *p;
|
||||
|
||||
int from = 1;
|
||||
int to = 200;
|
||||
int step = 1;
|
||||
|
||||
struct timeval start, stop;
|
||||
double time1,timeg;
|
||||
|
||||
argc--;argv++;
|
||||
|
||||
if (argc > 0) { from = atol(*argv); argc--; argv++;}
|
||||
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
|
||||
if (argc > 0) { step = atol(*argv); argc--; argv++;}
|
||||
|
||||
if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p);
|
||||
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p);
|
||||
if ((p = getenv("OPENBLAS_INCY"))) inc_y = atoi(p);
|
||||
|
||||
fprintf(stderr, "From : %3d To : %3d Step = %3d Inc_x = %d Inc_y = %d Loops = %d\n", from, to, step,inc_x,inc_y,loops);
|
||||
|
||||
if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
if (( y = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_y) * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
#ifdef linux
|
||||
srandom(getpid());
|
||||
#endif
|
||||
|
||||
fprintf(stderr, " SIZE Flops\n");
|
||||
|
||||
for(m = from; m <= to; m += step)
|
||||
{
|
||||
|
||||
timeg=0;
|
||||
|
||||
fprintf(stderr, " %6d : ", (int)m);
|
||||
|
||||
|
||||
for (l=0; l<loops; l++)
|
||||
{
|
||||
|
||||
for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){
|
||||
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
|
||||
for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){
|
||||
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
SWAP (&m, x, &inc_x, y, &inc_y );
|
||||
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
|
||||
timeg += time1;
|
||||
|
||||
}
|
||||
|
||||
timeg /= loops;
|
||||
|
||||
fprintf(stderr,
|
||||
" %10.2f MBytes\n",
|
||||
COMPSIZE * sizeof(FLOAT) * 1. * (double)m / timeg * 1.e-6);
|
||||
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
203
benchmark/symm.c
Normal file
203
benchmark/symm.c
Normal file
@@ -0,0 +1,203 @@
|
||||
/***************************************************************************
|
||||
Copyright (c) 2014, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#ifdef __CYGWIN32__
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
#include "common.h"
|
||||
|
||||
|
||||
#undef SYMM
|
||||
|
||||
#ifndef COMPLEX
|
||||
|
||||
#ifdef DOUBLE
|
||||
#define SYMM BLASFUNC(dsymm)
|
||||
#else
|
||||
#define SYMM BLASFUNC(ssymm)
|
||||
#endif
|
||||
|
||||
#else
|
||||
|
||||
#ifdef DOUBLE
|
||||
#define SYMM BLASFUNC(zsymm)
|
||||
#else
|
||||
#define SYMM BLASFUNC(csymm)
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(__WIN32__) || defined(__WIN64__)
|
||||
|
||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
||||
#endif
|
||||
|
||||
int gettimeofday(struct timeval *tv, void *tz){
|
||||
|
||||
FILETIME ft;
|
||||
unsigned __int64 tmpres = 0;
|
||||
static int tzflag;
|
||||
|
||||
if (NULL != tv)
|
||||
{
|
||||
GetSystemTimeAsFileTime(&ft);
|
||||
|
||||
tmpres |= ft.dwHighDateTime;
|
||||
tmpres <<= 32;
|
||||
tmpres |= ft.dwLowDateTime;
|
||||
|
||||
/*converting file time to unix epoch*/
|
||||
tmpres /= 10; /*convert into microseconds*/
|
||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
||||
|
||||
static void *huge_malloc(BLASLONG size){
|
||||
int shmid;
|
||||
void *address;
|
||||
|
||||
#ifndef SHM_HUGETLB
|
||||
#define SHM_HUGETLB 04000
|
||||
#endif
|
||||
|
||||
if ((shmid =shmget(IPC_PRIVATE,
|
||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
||||
printf( "Memory allocation failed(shmget).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
address = shmat(shmid, NULL, SHM_RND);
|
||||
|
||||
if ((BLASLONG)address == -1){
|
||||
printf( "Memory allocation failed(shmat).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
shmctl(shmid, IPC_RMID, 0);
|
||||
|
||||
return address;
|
||||
}
|
||||
|
||||
#define malloc huge_malloc
|
||||
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *a, *b, *c;
|
||||
FLOAT alpha[] = {1.0, 1.0};
|
||||
FLOAT beta [] = {1.0, 1.0};
|
||||
char *p;
|
||||
|
||||
char side='L';
|
||||
char uplo='U';
|
||||
|
||||
if ((p = getenv("OPENBLAS_SIDE"))) side=*p;
|
||||
if ((p = getenv("OPENBLAS_UPLO"))) uplo=*p;
|
||||
|
||||
blasint m, i, j;
|
||||
|
||||
int from = 1;
|
||||
int to = 200;
|
||||
int step = 1;
|
||||
|
||||
struct timeval start, stop;
|
||||
double time1;
|
||||
|
||||
argc--;argv++;
|
||||
|
||||
if (argc > 0) { from = atol(*argv); argc--; argv++;}
|
||||
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
|
||||
if (argc > 0) { step = atol(*argv); argc--; argv++;}
|
||||
|
||||
fprintf(stderr, "From : %3d To : %3d Step = %3d Side = %c Uplo = %c\n", from, to, step,side,uplo);
|
||||
|
||||
if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
if (( b = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
if (( c = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
|
||||
|
||||
#ifdef linux
|
||||
srandom(getpid());
|
||||
#endif
|
||||
|
||||
fprintf(stderr, " SIZE Flops\n");
|
||||
|
||||
for(m = from; m <= to; m += step)
|
||||
{
|
||||
|
||||
fprintf(stderr, " %6d : ", (int)m);
|
||||
|
||||
for(j = 0; j < m; j++){
|
||||
for(i = 0; i < m * COMPSIZE; i++){
|
||||
a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
b[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
c[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
}
|
||||
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
SYMM (&side, &uplo, &m, &m, alpha, a, &m, b, &m, beta, c, &m );
|
||||
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
fprintf(stderr,
|
||||
" %10.2f MFlops\n",
|
||||
COMPSIZE * COMPSIZE * 2. * (double)m * (double)m * (double)m / time1 * 1.e-6);
|
||||
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
218
benchmark/symv.c
Normal file
218
benchmark/symv.c
Normal file
@@ -0,0 +1,218 @@
|
||||
/***************************************************************************
|
||||
Copyright (c) 2014, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#ifdef __CYGWIN32__
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
#include "common.h"
|
||||
|
||||
|
||||
#undef SYMV
|
||||
|
||||
#ifndef COMPLEX
|
||||
|
||||
#ifdef DOUBLE
|
||||
#define SYMV BLASFUNC(dsymv)
|
||||
#else
|
||||
#define SYMV BLASFUNC(ssymv)
|
||||
#endif
|
||||
|
||||
#else
|
||||
|
||||
#ifdef DOUBLE
|
||||
#define SYMV BLASFUNC(zsymv)
|
||||
#else
|
||||
#define SYMV BLASFUNC(csymv)
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(__WIN32__) || defined(__WIN64__)
|
||||
|
||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
||||
#endif
|
||||
|
||||
int gettimeofday(struct timeval *tv, void *tz){
|
||||
|
||||
FILETIME ft;
|
||||
unsigned __int64 tmpres = 0;
|
||||
static int tzflag;
|
||||
|
||||
if (NULL != tv)
|
||||
{
|
||||
GetSystemTimeAsFileTime(&ft);
|
||||
|
||||
tmpres |= ft.dwHighDateTime;
|
||||
tmpres <<= 32;
|
||||
tmpres |= ft.dwLowDateTime;
|
||||
|
||||
/*converting file time to unix epoch*/
|
||||
tmpres /= 10; /*convert into microseconds*/
|
||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
||||
|
||||
static void *huge_malloc(BLASLONG size){
|
||||
int shmid;
|
||||
void *address;
|
||||
|
||||
#ifndef SHM_HUGETLB
|
||||
#define SHM_HUGETLB 04000
|
||||
#endif
|
||||
|
||||
if ((shmid =shmget(IPC_PRIVATE,
|
||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
||||
printf( "Memory allocation failed(shmget).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
address = shmat(shmid, NULL, SHM_RND);
|
||||
|
||||
if ((BLASLONG)address == -1){
|
||||
printf( "Memory allocation failed(shmat).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
shmctl(shmid, IPC_RMID, 0);
|
||||
|
||||
return address;
|
||||
}
|
||||
|
||||
#define malloc huge_malloc
|
||||
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *a, *x, *y;
|
||||
FLOAT alpha[] = {1.0, 1.0};
|
||||
FLOAT beta [] = {1.0, 1.0};
|
||||
char uplo='L';
|
||||
blasint m, i, j;
|
||||
blasint inc_x=1,inc_y=1;
|
||||
int loops = 1;
|
||||
int l;
|
||||
char *p;
|
||||
|
||||
int from = 1;
|
||||
int to = 200;
|
||||
int step = 1;
|
||||
|
||||
struct timeval start, stop;
|
||||
double time1,timeg;
|
||||
|
||||
argc--;argv++;
|
||||
|
||||
if (argc > 0) { from = atol(*argv); argc--; argv++;}
|
||||
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
|
||||
if (argc > 0) { step = atol(*argv); argc--; argv++;}
|
||||
|
||||
if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p);
|
||||
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p);
|
||||
if ((p = getenv("OPENBLAS_INCY"))) inc_y = atoi(p);
|
||||
if ((p = getenv("OPENBLAS_UPLO"))) uplo=*p;
|
||||
|
||||
fprintf(stderr, "From : %3d To : %3d Step = %3d Uplo = '%c' Inc_x = %d Inc_y = %d Loops = %d\n", from, to, step,uplo,inc_x,inc_y,loops);
|
||||
|
||||
if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
if (( y = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_y) * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
#ifdef linux
|
||||
srandom(getpid());
|
||||
#endif
|
||||
|
||||
fprintf(stderr, " SIZE Flops\n");
|
||||
|
||||
for(m = from; m <= to; m += step)
|
||||
{
|
||||
|
||||
timeg=0;
|
||||
|
||||
fprintf(stderr, " %6dx%d : ", (int)m,(int)m);
|
||||
|
||||
for(j = 0; j < m; j++){
|
||||
for(i = 0; i < m * COMPSIZE; i++){
|
||||
a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
for (l=0; l<loops; l++)
|
||||
{
|
||||
|
||||
for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){
|
||||
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
|
||||
for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){
|
||||
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
SYMV (&uplo, &m, alpha, a, &m, x, &inc_x, beta, y, &inc_y );
|
||||
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
|
||||
timeg += time1;
|
||||
|
||||
}
|
||||
|
||||
timeg /= loops;
|
||||
|
||||
fprintf(stderr,
|
||||
" %10.2f MFlops\n",
|
||||
COMPSIZE * COMPSIZE * 2. * (double)m * (double)m / timeg * 1.e-6);
|
||||
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
203
benchmark/syr2k.c
Normal file
203
benchmark/syr2k.c
Normal file
@@ -0,0 +1,203 @@
|
||||
/***************************************************************************
|
||||
Copyright (c) 2014, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#ifdef __CYGWIN32__
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
#include "common.h"
|
||||
|
||||
|
||||
#undef SYR2K
|
||||
|
||||
#ifndef COMPLEX
|
||||
|
||||
#ifdef DOUBLE
|
||||
#define SYR2K BLASFUNC(dsyr2k)
|
||||
#else
|
||||
#define SYR2K BLASFUNC(ssyr2k)
|
||||
#endif
|
||||
|
||||
#else
|
||||
|
||||
#ifdef DOUBLE
|
||||
#define SYR2K BLASFUNC(zsyr2k)
|
||||
#else
|
||||
#define SYR2K BLASFUNC(csyr2k)
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(__WIN32__) || defined(__WIN64__)
|
||||
|
||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
||||
#endif
|
||||
|
||||
int gettimeofday(struct timeval *tv, void *tz){
|
||||
|
||||
FILETIME ft;
|
||||
unsigned __int64 tmpres = 0;
|
||||
static int tzflag;
|
||||
|
||||
if (NULL != tv)
|
||||
{
|
||||
GetSystemTimeAsFileTime(&ft);
|
||||
|
||||
tmpres |= ft.dwHighDateTime;
|
||||
tmpres <<= 32;
|
||||
tmpres |= ft.dwLowDateTime;
|
||||
|
||||
/*converting file time to unix epoch*/
|
||||
tmpres /= 10; /*convert into microseconds*/
|
||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
||||
|
||||
static void *huge_malloc(BLASLONG size){
|
||||
int shmid;
|
||||
void *address;
|
||||
|
||||
#ifndef SHM_HUGETLB
|
||||
#define SHM_HUGETLB 04000
|
||||
#endif
|
||||
|
||||
if ((shmid =shmget(IPC_PRIVATE,
|
||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
||||
printf( "Memory allocation failed(shmget).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
address = shmat(shmid, NULL, SHM_RND);
|
||||
|
||||
if ((BLASLONG)address == -1){
|
||||
printf( "Memory allocation failed(shmat).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
shmctl(shmid, IPC_RMID, 0);
|
||||
|
||||
return address;
|
||||
}
|
||||
|
||||
#define malloc huge_malloc
|
||||
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *a, *b, *c;
|
||||
FLOAT alpha[] = {1.0, 1.0};
|
||||
FLOAT beta [] = {1.0, 1.0};
|
||||
char *p;
|
||||
|
||||
char uplo='U';
|
||||
char trans='N';
|
||||
|
||||
if ((p = getenv("OPENBLAS_UPLO"))) uplo=*p;
|
||||
if ((p = getenv("OPENBLAS_TRANS"))) trans=*p;
|
||||
|
||||
blasint m, i, j;
|
||||
|
||||
int from = 1;
|
||||
int to = 200;
|
||||
int step = 1;
|
||||
|
||||
struct timeval start, stop;
|
||||
double time1;
|
||||
|
||||
argc--;argv++;
|
||||
|
||||
if (argc > 0) { from = atol(*argv); argc--; argv++;}
|
||||
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
|
||||
if (argc > 0) { step = atol(*argv); argc--; argv++;}
|
||||
|
||||
fprintf(stderr, "From : %3d To : %3d Step = %3d Uplo = %c Trans = %c\n", from, to, step,uplo,trans);
|
||||
|
||||
if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
if (( b = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
if (( c = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
|
||||
|
||||
#ifdef linux
|
||||
srandom(getpid());
|
||||
#endif
|
||||
|
||||
fprintf(stderr, " SIZE Flops\n");
|
||||
|
||||
for(m = from; m <= to; m += step)
|
||||
{
|
||||
|
||||
fprintf(stderr, " %6d : ", (int)m);
|
||||
|
||||
for(j = 0; j < m; j++){
|
||||
for(i = 0; i < m * COMPSIZE; i++){
|
||||
a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
b[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
c[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
}
|
||||
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
SYR2K (&uplo, &trans, &m, &m, alpha, a, &m, b, &m, beta, c, &m );
|
||||
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
fprintf(stderr,
|
||||
" %10.2f MFlops\n",
|
||||
COMPSIZE * COMPSIZE * 2. * (double)m * (double)m * (double)m / time1 * 1.e-6);
|
||||
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
199
benchmark/syrk.c
Normal file
199
benchmark/syrk.c
Normal file
@@ -0,0 +1,199 @@
|
||||
/***************************************************************************
|
||||
Copyright (c) 2014, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#ifdef __CYGWIN32__
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
#include "common.h"
|
||||
|
||||
|
||||
#undef SYRK
|
||||
|
||||
#ifndef COMPLEX
|
||||
|
||||
#ifdef DOUBLE
|
||||
#define SYRK BLASFUNC(dsyrk)
|
||||
#else
|
||||
#define SYRK BLASFUNC(ssyrk)
|
||||
#endif
|
||||
|
||||
#else
|
||||
|
||||
#ifdef DOUBLE
|
||||
#define SYRK BLASFUNC(zsyrk)
|
||||
#else
|
||||
#define SYRK BLASFUNC(csyrk)
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(__WIN32__) || defined(__WIN64__)
|
||||
|
||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
||||
#endif
|
||||
|
||||
int gettimeofday(struct timeval *tv, void *tz){
|
||||
|
||||
FILETIME ft;
|
||||
unsigned __int64 tmpres = 0;
|
||||
static int tzflag;
|
||||
|
||||
if (NULL != tv)
|
||||
{
|
||||
GetSystemTimeAsFileTime(&ft);
|
||||
|
||||
tmpres |= ft.dwHighDateTime;
|
||||
tmpres <<= 32;
|
||||
tmpres |= ft.dwLowDateTime;
|
||||
|
||||
/*converting file time to unix epoch*/
|
||||
tmpres /= 10; /*convert into microseconds*/
|
||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
||||
|
||||
static void *huge_malloc(BLASLONG size){
|
||||
int shmid;
|
||||
void *address;
|
||||
|
||||
#ifndef SHM_HUGETLB
|
||||
#define SHM_HUGETLB 04000
|
||||
#endif
|
||||
|
||||
if ((shmid =shmget(IPC_PRIVATE,
|
||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
||||
printf( "Memory allocation failed(shmget).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
address = shmat(shmid, NULL, SHM_RND);
|
||||
|
||||
if ((BLASLONG)address == -1){
|
||||
printf( "Memory allocation failed(shmat).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
shmctl(shmid, IPC_RMID, 0);
|
||||
|
||||
return address;
|
||||
}
|
||||
|
||||
#define malloc huge_malloc
|
||||
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *a, *c;
|
||||
FLOAT alpha[] = {1.0, 1.0};
|
||||
FLOAT beta [] = {1.0, 1.0};
|
||||
char *p;
|
||||
|
||||
char uplo='U';
|
||||
char trans='N';
|
||||
|
||||
if ((p = getenv("OPENBLAS_UPLO"))) uplo=*p;
|
||||
if ((p = getenv("OPENBLAS_TRANS"))) trans=*p;
|
||||
|
||||
blasint m, i, j;
|
||||
|
||||
int from = 1;
|
||||
int to = 200;
|
||||
int step = 1;
|
||||
|
||||
struct timeval start, stop;
|
||||
double time1;
|
||||
|
||||
argc--;argv++;
|
||||
|
||||
if (argc > 0) { from = atol(*argv); argc--; argv++;}
|
||||
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
|
||||
if (argc > 0) { step = atol(*argv); argc--; argv++;}
|
||||
|
||||
fprintf(stderr, "From : %3d To : %3d Step = %3d Uplo = %c Trans = %c\n", from, to, step,uplo,trans);
|
||||
|
||||
|
||||
if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
if (( c = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
|
||||
|
||||
#ifdef linux
|
||||
srandom(getpid());
|
||||
#endif
|
||||
|
||||
fprintf(stderr, " SIZE Flops\n");
|
||||
|
||||
for(m = from; m <= to; m += step)
|
||||
{
|
||||
|
||||
fprintf(stderr, " %6d : ", (int)m);
|
||||
|
||||
for(j = 0; j < m; j++){
|
||||
for(i = 0; i < m * COMPSIZE; i++){
|
||||
a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
c[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
}
|
||||
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
SYRK (&uplo, &trans, &m, &m, alpha, a, &m, beta, c, &m );
|
||||
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
fprintf(stderr,
|
||||
" %10.2f MFlops\n",
|
||||
COMPSIZE * COMPSIZE * 1. * (double)m * (double)m * (double)m / time1 * 1.e-6);
|
||||
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
42
benchmark/tplot-header
Normal file
42
benchmark/tplot-header
Normal file
@@ -0,0 +1,42 @@
|
||||
# **********************************************************************************
|
||||
# Copyright (c) 2014, The OpenBLAS Project
|
||||
# All rights reserved.
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are
|
||||
# met:
|
||||
# 1. Redistributions of source code must retain the above copyright
|
||||
# notice, this list of conditions and the following disclaimer.
|
||||
# 2. Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in
|
||||
# the documentation and/or other materials provided with the
|
||||
# distribution.
|
||||
# 3. Neither the name of the OpenBLAS project nor the names of
|
||||
# its contributors may be used to endorse or promote products
|
||||
# derived from this software without specific prior written permission.
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
# ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
# USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
# **********************************************************************************
|
||||
|
||||
set term x11 font sans;
|
||||
set ylabel "MFlops";
|
||||
set xlabel "Size";
|
||||
set grid xtics;
|
||||
set grid ytics;
|
||||
set key left;
|
||||
set timestamp "generated on %Y-%m-%d by `whoami`"
|
||||
set title "Sgemv\nTRANS=T\nBulldozer"
|
||||
plot '1-THREAD' smooth bezier, '2-THREADS' smooth bezier, '4-THREADS' smooth bezier;
|
||||
set output "print.png";
|
||||
show title;
|
||||
show plot;
|
||||
show output;
|
||||
|
||||
|
||||
202
benchmark/trmm.c
Normal file
202
benchmark/trmm.c
Normal file
@@ -0,0 +1,202 @@
|
||||
/***************************************************************************
|
||||
Copyright (c) 2014, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#ifdef __CYGWIN32__
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
#include "common.h"
|
||||
|
||||
|
||||
#undef TRMM
|
||||
|
||||
#ifndef COMPLEX
|
||||
|
||||
#ifdef DOUBLE
|
||||
#define TRMM BLASFUNC(dtrmm)
|
||||
#else
|
||||
#define TRMM BLASFUNC(strmm)
|
||||
#endif
|
||||
|
||||
#else
|
||||
|
||||
#ifdef DOUBLE
|
||||
#define TRMM BLASFUNC(ztrmm)
|
||||
#else
|
||||
#define TRMM BLASFUNC(ctrmm)
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(__WIN32__) || defined(__WIN64__)
|
||||
|
||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
||||
#endif
|
||||
|
||||
int gettimeofday(struct timeval *tv, void *tz){
|
||||
|
||||
FILETIME ft;
|
||||
unsigned __int64 tmpres = 0;
|
||||
static int tzflag;
|
||||
|
||||
if (NULL != tv)
|
||||
{
|
||||
GetSystemTimeAsFileTime(&ft);
|
||||
|
||||
tmpres |= ft.dwHighDateTime;
|
||||
tmpres <<= 32;
|
||||
tmpres |= ft.dwLowDateTime;
|
||||
|
||||
/*converting file time to unix epoch*/
|
||||
tmpres /= 10; /*convert into microseconds*/
|
||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
||||
|
||||
static void *huge_malloc(BLASLONG size){
|
||||
int shmid;
|
||||
void *address;
|
||||
|
||||
#ifndef SHM_HUGETLB
|
||||
#define SHM_HUGETLB 04000
|
||||
#endif
|
||||
|
||||
if ((shmid =shmget(IPC_PRIVATE,
|
||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
||||
printf( "Memory allocation failed(shmget).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
address = shmat(shmid, NULL, SHM_RND);
|
||||
|
||||
if ((BLASLONG)address == -1){
|
||||
printf( "Memory allocation failed(shmat).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
shmctl(shmid, IPC_RMID, 0);
|
||||
|
||||
return address;
|
||||
}
|
||||
|
||||
#define malloc huge_malloc
|
||||
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *a, *b;
|
||||
FLOAT alpha[] = {1.0, 1.0};
|
||||
FLOAT beta [] = {1.0, 1.0};
|
||||
char *p;
|
||||
|
||||
char side ='L';
|
||||
char uplo ='U';
|
||||
char trans='N';
|
||||
char diag ='U';
|
||||
|
||||
if ((p = getenv("OPENBLAS_SIDE"))) side=*p;
|
||||
if ((p = getenv("OPENBLAS_UPLO"))) uplo=*p;
|
||||
if ((p = getenv("OPENBLAS_TRANS"))) trans=*p;
|
||||
if ((p = getenv("OPENBLAS_DIAG"))) diag=*p;
|
||||
|
||||
blasint m, i, j;
|
||||
|
||||
int from = 1;
|
||||
int to = 200;
|
||||
int step = 1;
|
||||
|
||||
struct timeval start, stop;
|
||||
double time1;
|
||||
|
||||
argc--;argv++;
|
||||
|
||||
if (argc > 0) { from = atol(*argv); argc--; argv++;}
|
||||
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
|
||||
if (argc > 0) { step = atol(*argv); argc--; argv++;}
|
||||
|
||||
fprintf(stderr, "From : %3d To : %3d Step = %3d Side = %c Uplo = %c Trans = %c Diag = %c\n", from, to, step,side,uplo,trans,diag);
|
||||
|
||||
if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
if (( b = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
|
||||
|
||||
#ifdef linux
|
||||
srandom(getpid());
|
||||
#endif
|
||||
|
||||
fprintf(stderr, " SIZE Flops\n");
|
||||
|
||||
for(m = from; m <= to; m += step)
|
||||
{
|
||||
|
||||
fprintf(stderr, " %6d : ", (int)m);
|
||||
|
||||
for(j = 0; j < m; j++){
|
||||
for(i = 0; i < m * COMPSIZE; i++){
|
||||
a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
b[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
}
|
||||
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
TRMM (&side, &uplo, &trans, &diag, &m, &m, alpha, a, &m, b, &m);
|
||||
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
fprintf(stderr,
|
||||
" %10.2f MFlops\n",
|
||||
COMPSIZE * COMPSIZE * 1. * (double)m * (double)m * (double)m / time1 * 1.e-6);
|
||||
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
219
benchmark/trsm.c
Normal file
219
benchmark/trsm.c
Normal file
@@ -0,0 +1,219 @@
|
||||
/***************************************************************************
|
||||
Copyright (c) 2014, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#ifdef __CYGWIN32__
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
#include "common.h"
|
||||
|
||||
|
||||
#undef TRSM
|
||||
|
||||
#ifndef COMPLEX
|
||||
|
||||
#ifdef DOUBLE
|
||||
#define TRSM BLASFUNC(dtrsm)
|
||||
#else
|
||||
#define TRSM BLASFUNC(strsm)
|
||||
#endif
|
||||
|
||||
#else
|
||||
|
||||
#ifdef DOUBLE
|
||||
#define TRSM BLASFUNC(ztrsm)
|
||||
#else
|
||||
#define TRSM BLASFUNC(ctrsm)
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(__WIN32__) || defined(__WIN64__)
|
||||
|
||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
||||
#endif
|
||||
|
||||
int gettimeofday(struct timeval *tv, void *tz){
|
||||
|
||||
FILETIME ft;
|
||||
unsigned __int64 tmpres = 0;
|
||||
static int tzflag;
|
||||
|
||||
if (NULL != tv)
|
||||
{
|
||||
GetSystemTimeAsFileTime(&ft);
|
||||
|
||||
tmpres |= ft.dwHighDateTime;
|
||||
tmpres <<= 32;
|
||||
tmpres |= ft.dwLowDateTime;
|
||||
|
||||
/*converting file time to unix epoch*/
|
||||
tmpres /= 10; /*convert into microseconds*/
|
||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
||||
|
||||
static void *huge_malloc(BLASLONG size){
|
||||
int shmid;
|
||||
void *address;
|
||||
|
||||
#ifndef SHM_HUGETLB
|
||||
#define SHM_HUGETLB 04000
|
||||
#endif
|
||||
|
||||
if ((shmid =shmget(IPC_PRIVATE,
|
||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
||||
printf( "Memory allocation failed(shmget).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
address = shmat(shmid, NULL, SHM_RND);
|
||||
|
||||
if ((BLASLONG)address == -1){
|
||||
printf( "Memory allocation failed(shmat).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
shmctl(shmid, IPC_RMID, 0);
|
||||
|
||||
return address;
|
||||
}
|
||||
|
||||
#define malloc huge_malloc
|
||||
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *a, *b;
|
||||
FLOAT alpha[] = {1.0, 1.0};
|
||||
FLOAT beta [] = {1.0, 1.0};
|
||||
char *p;
|
||||
|
||||
char side ='L';
|
||||
char uplo ='U';
|
||||
char trans='N';
|
||||
char diag ='U';
|
||||
|
||||
|
||||
int l;
|
||||
int loops = 1;
|
||||
double timeg;
|
||||
|
||||
if ((p = getenv("OPENBLAS_SIDE"))) side=*p;
|
||||
if ((p = getenv("OPENBLAS_UPLO"))) uplo=*p;
|
||||
if ((p = getenv("OPENBLAS_TRANS"))) trans=*p;
|
||||
if ((p = getenv("OPENBLAS_DIAG"))) diag=*p;
|
||||
|
||||
p = getenv("OPENBLAS_LOOPS");
|
||||
if ( p != NULL )
|
||||
loops = atoi(p);
|
||||
|
||||
|
||||
blasint m, i, j;
|
||||
|
||||
int from = 1;
|
||||
int to = 200;
|
||||
int step = 1;
|
||||
|
||||
struct timeval start, stop;
|
||||
double time1;
|
||||
|
||||
argc--;argv++;
|
||||
|
||||
if (argc > 0) { from = atol(*argv); argc--; argv++;}
|
||||
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
|
||||
if (argc > 0) { step = atol(*argv); argc--; argv++;}
|
||||
|
||||
fprintf(stderr, "From : %3d To : %3d Step = %3d Side = %c Uplo = %c Trans = %c Diag = %c Loops = %d\n", from, to, step,side,uplo,trans,diag,loops);
|
||||
|
||||
if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
if (( b = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
|
||||
|
||||
#ifdef linux
|
||||
srandom(getpid());
|
||||
#endif
|
||||
|
||||
fprintf(stderr, " SIZE Flops\n");
|
||||
|
||||
for(m = from; m <= to; m += step)
|
||||
{
|
||||
|
||||
timeg=0.0;
|
||||
|
||||
fprintf(stderr, " %6d : ", (int)m);
|
||||
|
||||
for (l=0; l<loops; l++)
|
||||
{
|
||||
|
||||
|
||||
for(j = 0; j < m; j++){
|
||||
for(i = 0; i < m * COMPSIZE; i++){
|
||||
a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
b[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
}
|
||||
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
TRSM (&side, &uplo, &trans, &diag, &m, &m, alpha, a, &m, b, &m);
|
||||
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
|
||||
timeg += time1;
|
||||
}
|
||||
|
||||
time1 = timeg/loops;
|
||||
|
||||
fprintf(stderr, " %10.2f MFlops\n", COMPSIZE * COMPSIZE * 1. * (double)m * (double)m * (double)m / time1 * 1.e-6);
|
||||
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
196
benchmark/zdot-intel.c
Normal file
196
benchmark/zdot-intel.c
Normal file
@@ -0,0 +1,196 @@
|
||||
/***************************************************************************
|
||||
Copyright (c) 2014, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#ifdef __CYGWIN32__
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
#define RETURN_BY_STACK 1
|
||||
#include "common.h"
|
||||
|
||||
|
||||
#undef DOT
|
||||
|
||||
|
||||
#ifdef DOUBLE
|
||||
#define DOT BLASFUNC(zdotu)
|
||||
#else
|
||||
#define DOT BLASFUNC(cdotu)
|
||||
#endif
|
||||
|
||||
|
||||
#if defined(__WIN32__) || defined(__WIN64__)
|
||||
|
||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
||||
#endif
|
||||
|
||||
int gettimeofday(struct timeval *tv, void *tz){
|
||||
|
||||
FILETIME ft;
|
||||
unsigned __int64 tmpres = 0;
|
||||
static int tzflag;
|
||||
|
||||
if (NULL != tv)
|
||||
{
|
||||
GetSystemTimeAsFileTime(&ft);
|
||||
|
||||
tmpres |= ft.dwHighDateTime;
|
||||
tmpres <<= 32;
|
||||
tmpres |= ft.dwLowDateTime;
|
||||
|
||||
/*converting file time to unix epoch*/
|
||||
tmpres /= 10; /*convert into microseconds*/
|
||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
||||
|
||||
static void *huge_malloc(BLASLONG size){
|
||||
int shmid;
|
||||
void *address;
|
||||
|
||||
#ifndef SHM_HUGETLB
|
||||
#define SHM_HUGETLB 04000
|
||||
#endif
|
||||
|
||||
if ((shmid =shmget(IPC_PRIVATE,
|
||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
||||
printf( "Memory allocation failed(shmget).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
address = shmat(shmid, NULL, SHM_RND);
|
||||
|
||||
if ((BLASLONG)address == -1){
|
||||
printf( "Memory allocation failed(shmat).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
shmctl(shmid, IPC_RMID, 0);
|
||||
|
||||
return address;
|
||||
}
|
||||
|
||||
#define malloc huge_malloc
|
||||
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *x, *y;
|
||||
FLOAT _Complex result;
|
||||
blasint m, i;
|
||||
blasint inc_x=1,inc_y=1;
|
||||
int loops = 1;
|
||||
int l;
|
||||
char *p;
|
||||
|
||||
int from = 1;
|
||||
int to = 200;
|
||||
int step = 1;
|
||||
|
||||
struct timeval start, stop;
|
||||
double time1,timeg;
|
||||
|
||||
argc--;argv++;
|
||||
|
||||
if (argc > 0) { from = atol(*argv); argc--; argv++;}
|
||||
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
|
||||
if (argc > 0) { step = atol(*argv); argc--; argv++;}
|
||||
|
||||
if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p);
|
||||
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p);
|
||||
if ((p = getenv("OPENBLAS_INCY"))) inc_y = atoi(p);
|
||||
|
||||
fprintf(stderr, "From : %3d To : %3d Step = %3d Inc_x = %d Inc_y = %d Loops = %d\n", from, to, step,inc_x,inc_y,loops);
|
||||
|
||||
if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
if (( y = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_y) * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
#ifdef linux
|
||||
srandom(getpid());
|
||||
#endif
|
||||
|
||||
fprintf(stderr, " SIZE Flops\n");
|
||||
|
||||
for(m = from; m <= to; m += step)
|
||||
{
|
||||
|
||||
timeg=0;
|
||||
|
||||
fprintf(stderr, " %6d : ", (int)m);
|
||||
|
||||
|
||||
for (l=0; l<loops; l++)
|
||||
{
|
||||
|
||||
for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){
|
||||
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
|
||||
for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){
|
||||
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
DOT (&result, &m, x, &inc_x, y, &inc_y );
|
||||
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
|
||||
timeg += time1;
|
||||
|
||||
}
|
||||
|
||||
timeg /= loops;
|
||||
|
||||
fprintf(stderr,
|
||||
" %10.2f MFlops\n",
|
||||
COMPSIZE * COMPSIZE * 2. * (double)m / timeg * 1.e-6);
|
||||
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
195
benchmark/zdot.c
Normal file
195
benchmark/zdot.c
Normal file
@@ -0,0 +1,195 @@
|
||||
/***************************************************************************
|
||||
Copyright (c) 2014, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#ifdef __CYGWIN32__
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
#include "common.h"
|
||||
|
||||
|
||||
#undef DOT
|
||||
|
||||
|
||||
#ifdef DOUBLE
|
||||
#define DOT BLASFUNC(zdotu)
|
||||
#else
|
||||
#define DOT BLASFUNC(cdotu)
|
||||
#endif
|
||||
|
||||
|
||||
#if defined(__WIN32__) || defined(__WIN64__)
|
||||
|
||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
||||
#endif
|
||||
|
||||
int gettimeofday(struct timeval *tv, void *tz){
|
||||
|
||||
FILETIME ft;
|
||||
unsigned __int64 tmpres = 0;
|
||||
static int tzflag;
|
||||
|
||||
if (NULL != tv)
|
||||
{
|
||||
GetSystemTimeAsFileTime(&ft);
|
||||
|
||||
tmpres |= ft.dwHighDateTime;
|
||||
tmpres <<= 32;
|
||||
tmpres |= ft.dwLowDateTime;
|
||||
|
||||
/*converting file time to unix epoch*/
|
||||
tmpres /= 10; /*convert into microseconds*/
|
||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
||||
|
||||
static void *huge_malloc(BLASLONG size){
|
||||
int shmid;
|
||||
void *address;
|
||||
|
||||
#ifndef SHM_HUGETLB
|
||||
#define SHM_HUGETLB 04000
|
||||
#endif
|
||||
|
||||
if ((shmid =shmget(IPC_PRIVATE,
|
||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
||||
printf( "Memory allocation failed(shmget).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
address = shmat(shmid, NULL, SHM_RND);
|
||||
|
||||
if ((BLASLONG)address == -1){
|
||||
printf( "Memory allocation failed(shmat).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
shmctl(shmid, IPC_RMID, 0);
|
||||
|
||||
return address;
|
||||
}
|
||||
|
||||
#define malloc huge_malloc
|
||||
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *x, *y;
|
||||
FLOAT _Complex result;
|
||||
blasint m, i;
|
||||
blasint inc_x=1,inc_y=1;
|
||||
int loops = 1;
|
||||
int l;
|
||||
char *p;
|
||||
|
||||
int from = 1;
|
||||
int to = 200;
|
||||
int step = 1;
|
||||
|
||||
struct timeval start, stop;
|
||||
double time1,timeg;
|
||||
|
||||
argc--;argv++;
|
||||
|
||||
if (argc > 0) { from = atol(*argv); argc--; argv++;}
|
||||
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
|
||||
if (argc > 0) { step = atol(*argv); argc--; argv++;}
|
||||
|
||||
if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p);
|
||||
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p);
|
||||
if ((p = getenv("OPENBLAS_INCY"))) inc_y = atoi(p);
|
||||
|
||||
fprintf(stderr, "From : %3d To : %3d Step = %3d Inc_x = %d Inc_y = %d Loops = %d\n", from, to, step,inc_x,inc_y,loops);
|
||||
|
||||
if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
if (( y = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_y) * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
#ifdef linux
|
||||
srandom(getpid());
|
||||
#endif
|
||||
|
||||
fprintf(stderr, " SIZE Flops\n");
|
||||
|
||||
for(m = from; m <= to; m += step)
|
||||
{
|
||||
|
||||
timeg=0;
|
||||
|
||||
fprintf(stderr, " %6d : ", (int)m);
|
||||
|
||||
|
||||
for (l=0; l<loops; l++)
|
||||
{
|
||||
|
||||
for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){
|
||||
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
|
||||
for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){
|
||||
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
result = DOT (&m, x, &inc_x, y, &inc_y );
|
||||
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
|
||||
timeg += time1;
|
||||
|
||||
}
|
||||
|
||||
timeg /= loops;
|
||||
|
||||
fprintf(stderr,
|
||||
" %10.2f MFlops\n",
|
||||
COMPSIZE * COMPSIZE * 2. * (double)m / timeg * 1.e-6);
|
||||
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
48
c_check
48
c_check
@@ -3,6 +3,9 @@
|
||||
# Checking cross compile
|
||||
$hostos = `uname -s | sed -e s/\-.*//`; chop($hostos);
|
||||
$hostarch = `uname -m | sed -e s/i.86/x86/`;chop($hostarch);
|
||||
$hostarch = "x86_64" if ($hostarch eq "amd64");
|
||||
$hostarch = "arm" if ($hostarch =~ /^arm.*/);
|
||||
$hostarch = "arm64" if ($hostarch eq "aarch64");
|
||||
|
||||
$binary = $ENV{"BINARY"};
|
||||
|
||||
@@ -33,6 +36,8 @@ if ($ARGV[0] =~ /(.*)(-[.\d]+)/) {
|
||||
}
|
||||
|
||||
$compiler = "";
|
||||
$compiler = LSB if ($data =~ /COMPILER_LSB/);
|
||||
$compiler = CLANG if ($data =~ /COMPILER_CLANG/);
|
||||
$compiler = PGI if ($data =~ /COMPILER_PGI/);
|
||||
$compiler = PATHSCALE if ($data =~ /COMPILER_PATHSCALE/);
|
||||
$compiler = INTEL if ($data =~ /COMPILER_INTEL/);
|
||||
@@ -52,6 +57,7 @@ $os = osf if ($data =~ /OS_OSF/);
|
||||
$os = WINNT if ($data =~ /OS_WINNT/);
|
||||
$os = CYGWIN_NT if ($data =~ /OS_CYGWIN_NT/);
|
||||
$os = Interix if ($data =~ /OS_INTERIX/);
|
||||
$os = Android if ($data =~ /OS_ANDROID/);
|
||||
|
||||
$architecture = x86 if ($data =~ /ARCH_X86/);
|
||||
$architecture = x86_64 if ($data =~ /ARCH_X86_64/);
|
||||
@@ -61,6 +67,8 @@ $architecture = mips64 if ($data =~ /ARCH_MIPS64/);
|
||||
$architecture = alpha if ($data =~ /ARCH_ALPHA/);
|
||||
$architecture = sparc if ($data =~ /ARCH_SPARC/);
|
||||
$architecture = ia64 if ($data =~ /ARCH_IA64/);
|
||||
$architecture = arm if ($data =~ /ARCH_ARM/);
|
||||
$architecture = arm64 if ($data =~ /ARCH_ARM64/);
|
||||
|
||||
$defined = 0;
|
||||
|
||||
@@ -76,6 +84,10 @@ if (($architecture eq "mips32") || ($architecture eq "mips64")) {
|
||||
$defined = 1;
|
||||
}
|
||||
|
||||
if (($architecture eq "arm") || ($architecture eq "arm64")) {
|
||||
$defined = 1;
|
||||
}
|
||||
|
||||
if ($architecture eq "alpha") {
|
||||
$defined = 1;
|
||||
$binary = 64;
|
||||
@@ -117,7 +129,11 @@ if ($compiler eq "OPEN64") {
|
||||
$openmp = "-mp";
|
||||
}
|
||||
|
||||
if ($compiler eq "GCC") {
|
||||
if ($compiler eq "CLANG") {
|
||||
$openmp = "-fopenmp";
|
||||
}
|
||||
|
||||
if ($compiler eq "GCC" || $compiler eq "LSB") {
|
||||
$openmp = "-fopenmp";
|
||||
}
|
||||
|
||||
@@ -143,6 +159,8 @@ $architecture = mips64 if ($data =~ /ARCH_MIPS64/);
|
||||
$architecture = alpha if ($data =~ /ARCH_ALPHA/);
|
||||
$architecture = sparc if ($data =~ /ARCH_SPARC/);
|
||||
$architecture = ia64 if ($data =~ /ARCH_IA64/);
|
||||
$architecture = arm if ($data =~ /ARCH_ARM/);
|
||||
$architecture = arm64 if ($data =~ /ARCH_ARM64/);
|
||||
|
||||
$binformat = bin32;
|
||||
$binformat = bin64 if ($data =~ /BINARY_64/);
|
||||
@@ -170,9 +188,9 @@ $linker_a = "";
|
||||
|
||||
{
|
||||
$link = `$compiler_name -c ctest2.c -o ctest2.o 2>&1 && $compiler_name $openmp -v ctest2.o -o ctest2 2>&1 && rm -f ctest2.o ctest2 ctest2.exe`;
|
||||
|
||||
|
||||
$link =~ s/\-Y\sP\,/\-Y/g;
|
||||
|
||||
|
||||
@flags = split(/[\s\,\n]/, $link);
|
||||
# remove leading and trailing quotes from each flag.
|
||||
@flags = map {s/^['"]|['"]$//g; $_} @flags;
|
||||
@@ -183,15 +201,15 @@ $linker_a = "";
|
||||
&& ($flags !~ /^-LIST:/)
|
||||
&& ($flags !~ /^-LANG:/)
|
||||
) {
|
||||
$linker_L .= $flags . " "
|
||||
$linker_L .= $flags . " "
|
||||
}
|
||||
|
||||
if ($flags =~ /^\-Y/) {
|
||||
$linker_L .= "-Wl,". $flags . " "
|
||||
$linker_L .= "-Wl,". $flags . " "
|
||||
}
|
||||
|
||||
|
||||
if (
|
||||
($flags =~ /^\-l/)
|
||||
($flags =~ /^\-l/)
|
||||
&& ($flags !~ /gfortranbegin/)
|
||||
&& ($flags !~ /frtbegin/)
|
||||
&& ($flags !~ /pathfstart/)
|
||||
@@ -203,7 +221,7 @@ $linker_a = "";
|
||||
&& ($flags !~ /advapi32/)
|
||||
&& ($flags !~ /shell32/)
|
||||
) {
|
||||
$linker_l .= $flags . " "
|
||||
$linker_l .= $flags . " "
|
||||
}
|
||||
|
||||
$linker_a .= $flags . " " if $flags =~ /\.a$/;
|
||||
@@ -240,14 +258,14 @@ print CONFFILE "#define __64BIT__\t1\n" if $binformat eq bin64;
|
||||
print CONFFILE "#define FUNDERSCORE\t$need_fu\n" if $need_fu ne "";
|
||||
|
||||
if ($os eq "LINUX") {
|
||||
|
||||
@pthread = split(/\s+/, `nm /lib/libpthread.so* | grep _pthread_create`);
|
||||
|
||||
if ($pthread[2] ne "") {
|
||||
print CONFFILE "#define PTHREAD_CREATE_FUNC $pthread[2]\n";
|
||||
} else {
|
||||
|
||||
# @pthread = split(/\s+/, `nm /lib/libpthread.so* | grep _pthread_create`);
|
||||
|
||||
# if ($pthread[2] ne "") {
|
||||
# print CONFFILE "#define PTHREAD_CREATE_FUNC $pthread[2]\n";
|
||||
# } else {
|
||||
print CONFFILE "#define PTHREAD_CREATE_FUNC pthread_create\n";
|
||||
}
|
||||
# }
|
||||
} else {
|
||||
print CONFFILE "#define PTHREAD_CREATE_FUNC pthread_create\n";
|
||||
}
|
||||
|
||||
519
cblas.h
519
cblas.h
@@ -1,291 +1,364 @@
|
||||
#ifndef CBLAS_H
|
||||
#define CBLAS_H
|
||||
|
||||
#include <stddef.h>
|
||||
#include "common.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
/* Assume C declarations for C++ */
|
||||
#endif /* __cplusplus */
|
||||
|
||||
#include <stddef.h>
|
||||
#include "common.h"
|
||||
|
||||
/*Set the number of threads on runtime.*/
|
||||
void openblas_set_num_threads(int num_threads);
|
||||
void goto_set_num_threads(int num_threads);
|
||||
|
||||
/*Get the number of threads on runtime.*/
|
||||
int openblas_get_num_threads(void);
|
||||
|
||||
/*Get the number of physical processors (cores).*/
|
||||
int openblas_get_num_procs(void);
|
||||
|
||||
/*Get the build configure on runtime.*/
|
||||
char* openblas_get_config(void);
|
||||
|
||||
/*Get the CPU corename on runtime.*/
|
||||
char* openblas_get_corename(void);
|
||||
|
||||
/* Get the parallelization type which is used by OpenBLAS */
|
||||
int openblas_get_parallel(void);
|
||||
/* OpenBLAS is compiled for sequential use */
|
||||
#define OPENBLAS_SEQUENTIAL 0
|
||||
/* OpenBLAS is compiled using normal threading model */
|
||||
#define OPENBLAS_THREAD 1
|
||||
/* OpenBLAS is compiled using OpenMP threading model */
|
||||
#define OPENBLAS_OPENMP 2
|
||||
|
||||
|
||||
/*
|
||||
* Since all of GotoBlas was written without const,
|
||||
* we disable it at build time.
|
||||
*/
|
||||
#ifndef OPENBLAS_CONST
|
||||
# define OPENBLAS_CONST const
|
||||
#endif
|
||||
|
||||
|
||||
#define CBLAS_INDEX size_t
|
||||
|
||||
enum CBLAS_ORDER {CblasRowMajor=101, CblasColMajor=102};
|
||||
enum CBLAS_TRANSPOSE {CblasNoTrans=111, CblasTrans=112, CblasConjTrans=113, CblasConjNoTrans=114};
|
||||
enum CBLAS_UPLO {CblasUpper=121, CblasLower=122};
|
||||
enum CBLAS_DIAG {CblasNonUnit=131, CblasUnit=132};
|
||||
enum CBLAS_SIDE {CblasLeft=141, CblasRight=142};
|
||||
typedef enum CBLAS_ORDER {CblasRowMajor=101, CblasColMajor=102} CBLAS_ORDER;
|
||||
typedef enum CBLAS_TRANSPOSE {CblasNoTrans=111, CblasTrans=112, CblasConjTrans=113, CblasConjNoTrans=114} CBLAS_TRANSPOSE;
|
||||
typedef enum CBLAS_UPLO {CblasUpper=121, CblasLower=122} CBLAS_UPLO;
|
||||
typedef enum CBLAS_DIAG {CblasNonUnit=131, CblasUnit=132} CBLAS_DIAG;
|
||||
typedef enum CBLAS_SIDE {CblasLeft=141, CblasRight=142} CBLAS_SIDE;
|
||||
|
||||
float cblas_sdsdot(blasint n, float, float *x, blasint incx, float *y, blasint incy);
|
||||
double cblas_dsdot (blasint n, float *x, blasint incx, float *y, blasint incy);
|
||||
float cblas_sdot(blasint n, float *x, blasint incx, float *y, blasint incy);
|
||||
double cblas_ddot(blasint n, double *x, blasint incx, double *y, blasint incy);
|
||||
float cblas_sdsdot(OPENBLAS_CONST blasint n, OPENBLAS_CONST float alpha, OPENBLAS_CONST float *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST float *y, OPENBLAS_CONST blasint incy);
|
||||
double cblas_dsdot (OPENBLAS_CONST blasint n, OPENBLAS_CONST float *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST float *y, OPENBLAS_CONST blasint incy);
|
||||
float cblas_sdot(OPENBLAS_CONST blasint n, OPENBLAS_CONST float *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST float *y, OPENBLAS_CONST blasint incy);
|
||||
double cblas_ddot(OPENBLAS_CONST blasint n, OPENBLAS_CONST double *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST double *y, OPENBLAS_CONST blasint incy);
|
||||
|
||||
openblas_complex_float cblas_cdotu(blasint n, float *x, blasint incx, float *y, blasint incy);
|
||||
openblas_complex_float cblas_cdotc(blasint n, float *x, blasint incx, float *y, blasint incy);
|
||||
openblas_complex_double cblas_zdotu(blasint n, double *x, blasint incx, double *y, blasint incy);
|
||||
openblas_complex_double cblas_zdotc(blasint n, double *x, blasint incx, double *y, blasint incy);
|
||||
openblas_complex_float cblas_cdotu(OPENBLAS_CONST blasint n, OPENBLAS_CONST float *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST float *y, OPENBLAS_CONST blasint incy);
|
||||
openblas_complex_float cblas_cdotc(OPENBLAS_CONST blasint n, OPENBLAS_CONST float *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST float *y, OPENBLAS_CONST blasint incy);
|
||||
openblas_complex_double cblas_zdotu(OPENBLAS_CONST blasint n, OPENBLAS_CONST double *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST double *y, OPENBLAS_CONST blasint incy);
|
||||
openblas_complex_double cblas_zdotc(OPENBLAS_CONST blasint n, OPENBLAS_CONST double *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST double *y, OPENBLAS_CONST blasint incy);
|
||||
|
||||
void cblas_cdotu_sub(blasint n, float *x, blasint incx, float *y, blasint incy, openblas_complex_float *ret);
|
||||
void cblas_cdotc_sub(blasint n, float *x, blasint incx, float *y, blasint incy, openblas_complex_float *ret);
|
||||
void cblas_zdotu_sub(blasint n, double *x, blasint incx, double *y, blasint incy, openblas_complex_double *ret);
|
||||
void cblas_zdotc_sub(blasint n, double *x, blasint incx, double *y, blasint incy, openblas_complex_double *ret);
|
||||
void cblas_cdotu_sub(OPENBLAS_CONST blasint n, OPENBLAS_CONST float *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST float *y, OPENBLAS_CONST blasint incy, openblas_complex_float *ret);
|
||||
void cblas_cdotc_sub(OPENBLAS_CONST blasint n, OPENBLAS_CONST float *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST float *y, OPENBLAS_CONST blasint incy, openblas_complex_float *ret);
|
||||
void cblas_zdotu_sub(OPENBLAS_CONST blasint n, OPENBLAS_CONST double *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST double *y, OPENBLAS_CONST blasint incy, openblas_complex_double *ret);
|
||||
void cblas_zdotc_sub(OPENBLAS_CONST blasint n, OPENBLAS_CONST double *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST double *y, OPENBLAS_CONST blasint incy, openblas_complex_double *ret);
|
||||
|
||||
float cblas_sasum (blasint n, float *x, blasint incx);
|
||||
double cblas_dasum (blasint n, double *x, blasint incx);
|
||||
float cblas_scasum(blasint n, float *x, blasint incx);
|
||||
double cblas_dzasum(blasint n, double *x, blasint incx);
|
||||
float cblas_sasum (OPENBLAS_CONST blasint n, OPENBLAS_CONST float *x, OPENBLAS_CONST blasint incx);
|
||||
double cblas_dasum (OPENBLAS_CONST blasint n, OPENBLAS_CONST double *x, OPENBLAS_CONST blasint incx);
|
||||
float cblas_scasum(OPENBLAS_CONST blasint n, OPENBLAS_CONST float *x, OPENBLAS_CONST blasint incx);
|
||||
double cblas_dzasum(OPENBLAS_CONST blasint n, OPENBLAS_CONST double *x, OPENBLAS_CONST blasint incx);
|
||||
|
||||
float cblas_snrm2 (blasint N, float *X, blasint incX);
|
||||
double cblas_dnrm2 (blasint N, double *X, blasint incX);
|
||||
float cblas_scnrm2(blasint N, float *X, blasint incX);
|
||||
double cblas_dznrm2(blasint N, double *X, blasint incX);
|
||||
float cblas_snrm2 (OPENBLAS_CONST blasint N, OPENBLAS_CONST float *X, OPENBLAS_CONST blasint incX);
|
||||
double cblas_dnrm2 (OPENBLAS_CONST blasint N, OPENBLAS_CONST double *X, OPENBLAS_CONST blasint incX);
|
||||
float cblas_scnrm2(OPENBLAS_CONST blasint N, OPENBLAS_CONST float *X, OPENBLAS_CONST blasint incX);
|
||||
double cblas_dznrm2(OPENBLAS_CONST blasint N, OPENBLAS_CONST double *X, OPENBLAS_CONST blasint incX);
|
||||
|
||||
CBLAS_INDEX cblas_isamax(blasint n, float *x, blasint incx);
|
||||
CBLAS_INDEX cblas_idamax(blasint n, double *x, blasint incx);
|
||||
CBLAS_INDEX cblas_icamax(blasint n, float *x, blasint incx);
|
||||
CBLAS_INDEX cblas_izamax(blasint n, double *x, blasint incx);
|
||||
CBLAS_INDEX cblas_isamax(OPENBLAS_CONST blasint n, OPENBLAS_CONST float *x, OPENBLAS_CONST blasint incx);
|
||||
CBLAS_INDEX cblas_idamax(OPENBLAS_CONST blasint n, OPENBLAS_CONST double *x, OPENBLAS_CONST blasint incx);
|
||||
CBLAS_INDEX cblas_icamax(OPENBLAS_CONST blasint n, OPENBLAS_CONST float *x, OPENBLAS_CONST blasint incx);
|
||||
CBLAS_INDEX cblas_izamax(OPENBLAS_CONST blasint n, OPENBLAS_CONST double *x, OPENBLAS_CONST blasint incx);
|
||||
|
||||
void cblas_saxpy(blasint n, float, float *x, blasint incx, float *y, blasint incy);
|
||||
void cblas_daxpy(blasint n, double, double *x, blasint incx, double *y, blasint incy);
|
||||
void cblas_caxpy(blasint n, float *, float *x, blasint incx, float *y, blasint incy);
|
||||
void cblas_zaxpy(blasint n, double *, double *x, blasint incx, double *y, blasint incy);
|
||||
void cblas_saxpy(OPENBLAS_CONST blasint n, OPENBLAS_CONST float alpha, OPENBLAS_CONST float *x, OPENBLAS_CONST blasint incx, float *y, OPENBLAS_CONST blasint incy);
|
||||
void cblas_daxpy(OPENBLAS_CONST blasint n, OPENBLAS_CONST double alpha, OPENBLAS_CONST double *x, OPENBLAS_CONST blasint incx, double *y, OPENBLAS_CONST blasint incy);
|
||||
void cblas_caxpy(OPENBLAS_CONST blasint n, OPENBLAS_CONST float *alpha, OPENBLAS_CONST float *x, OPENBLAS_CONST blasint incx, float *y, OPENBLAS_CONST blasint incy);
|
||||
void cblas_zaxpy(OPENBLAS_CONST blasint n, OPENBLAS_CONST double *alpha, OPENBLAS_CONST double *x, OPENBLAS_CONST blasint incx, double *y, OPENBLAS_CONST blasint incy);
|
||||
|
||||
void cblas_scopy(blasint n, float *x, blasint incx, float *y, blasint incy);
|
||||
void cblas_dcopy(blasint n, double *x, blasint incx, double *y, blasint incy);
|
||||
void cblas_ccopy(blasint n, float *x, blasint incx, float *y, blasint incy);
|
||||
void cblas_zcopy(blasint n, double *x, blasint incx, double *y, blasint incy);
|
||||
void cblas_scopy(OPENBLAS_CONST blasint n, OPENBLAS_CONST float *x, OPENBLAS_CONST blasint incx, float *y, OPENBLAS_CONST blasint incy);
|
||||
void cblas_dcopy(OPENBLAS_CONST blasint n, OPENBLAS_CONST double *x, OPENBLAS_CONST blasint incx, double *y, OPENBLAS_CONST blasint incy);
|
||||
void cblas_ccopy(OPENBLAS_CONST blasint n, OPENBLAS_CONST float *x, OPENBLAS_CONST blasint incx, float *y, OPENBLAS_CONST blasint incy);
|
||||
void cblas_zcopy(OPENBLAS_CONST blasint n, OPENBLAS_CONST double *x, OPENBLAS_CONST blasint incx, double *y, OPENBLAS_CONST blasint incy);
|
||||
|
||||
void cblas_sswap(blasint n, float *x, blasint incx, float *y, blasint incy);
|
||||
void cblas_dswap(blasint n, double *x, blasint incx, double *y, blasint incy);
|
||||
void cblas_cswap(blasint n, float *x, blasint incx, float *y, blasint incy);
|
||||
void cblas_zswap(blasint n, double *x, blasint incx, double *y, blasint incy);
|
||||
void cblas_sswap(OPENBLAS_CONST blasint n, float *x, OPENBLAS_CONST blasint incx, float *y, OPENBLAS_CONST blasint incy);
|
||||
void cblas_dswap(OPENBLAS_CONST blasint n, double *x, OPENBLAS_CONST blasint incx, double *y, OPENBLAS_CONST blasint incy);
|
||||
void cblas_cswap(OPENBLAS_CONST blasint n, float *x, OPENBLAS_CONST blasint incx, float *y, OPENBLAS_CONST blasint incy);
|
||||
void cblas_zswap(OPENBLAS_CONST blasint n, double *x, OPENBLAS_CONST blasint incx, double *y, OPENBLAS_CONST blasint incy);
|
||||
|
||||
void cblas_srot(blasint N, float *X, blasint incX, float *Y, blasint incY, float c, float s);
|
||||
void cblas_drot(blasint N, double *X, blasint incX, double *Y, blasint incY, double c, double s);
|
||||
void cblas_srot(OPENBLAS_CONST blasint N, float *X, OPENBLAS_CONST blasint incX, float *Y, OPENBLAS_CONST blasint incY, OPENBLAS_CONST float c, OPENBLAS_CONST float s);
|
||||
void cblas_drot(OPENBLAS_CONST blasint N, double *X, OPENBLAS_CONST blasint incX, double *Y, OPENBLAS_CONST blasint incY, OPENBLAS_CONST double c, OPENBLAS_CONST double s);
|
||||
|
||||
void cblas_srotg(float *a, float *b, float *c, float *s);
|
||||
void cblas_drotg(double *a, double *b, double *c, double *s);
|
||||
|
||||
void cblas_srotm(blasint N, float *X, blasint incX, float *Y, blasint incY, float *P);
|
||||
void cblas_drotm(blasint N, double *X, blasint incX, double *Y, blasint incY, double *P);
|
||||
void cblas_srotm(OPENBLAS_CONST blasint N, float *X, OPENBLAS_CONST blasint incX, float *Y, OPENBLAS_CONST blasint incY, OPENBLAS_CONST float *P);
|
||||
void cblas_drotm(OPENBLAS_CONST blasint N, double *X, OPENBLAS_CONST blasint incX, double *Y, OPENBLAS_CONST blasint incY, OPENBLAS_CONST double *P);
|
||||
|
||||
void cblas_srotmg(float *d1, float *d2, float *b1, float b2, float *P);
|
||||
void cblas_drotmg(double *d1, double *d2, double *b1, double b2, double *P);
|
||||
void cblas_srotmg(float *d1, float *d2, float *b1, OPENBLAS_CONST float b2, float *P);
|
||||
void cblas_drotmg(double *d1, double *d2, double *b1, OPENBLAS_CONST double b2, double *P);
|
||||
|
||||
void cblas_sscal(blasint N, float alpha, float *X, blasint incX);
|
||||
void cblas_dscal(blasint N, double alpha, double *X, blasint incX);
|
||||
void cblas_cscal(blasint N, float *alpha, float *X, blasint incX);
|
||||
void cblas_zscal(blasint N, double *alpha, double *X, blasint incX);
|
||||
void cblas_csscal(blasint N, float alpha, float *X, blasint incX);
|
||||
void cblas_zdscal(blasint N, double alpha, double *X, blasint incX);
|
||||
void cblas_sscal(OPENBLAS_CONST blasint N, OPENBLAS_CONST float alpha, float *X, OPENBLAS_CONST blasint incX);
|
||||
void cblas_dscal(OPENBLAS_CONST blasint N, OPENBLAS_CONST double alpha, double *X, OPENBLAS_CONST blasint incX);
|
||||
void cblas_cscal(OPENBLAS_CONST blasint N, OPENBLAS_CONST float *alpha, float *X, OPENBLAS_CONST blasint incX);
|
||||
void cblas_zscal(OPENBLAS_CONST blasint N, OPENBLAS_CONST double *alpha, double *X, OPENBLAS_CONST blasint incX);
|
||||
void cblas_csscal(OPENBLAS_CONST blasint N, OPENBLAS_CONST float alpha, float *X, OPENBLAS_CONST blasint incX);
|
||||
void cblas_zdscal(OPENBLAS_CONST blasint N, OPENBLAS_CONST double alpha, double *X, OPENBLAS_CONST blasint incX);
|
||||
|
||||
void cblas_sgemv(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE trans, blasint m, blasint n,
|
||||
float alpha, float *a, blasint lda, float *x, blasint incx, float beta, float *y, blasint incy);
|
||||
void cblas_dgemv(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE trans, blasint m, blasint n,
|
||||
double alpha, double *a, blasint lda, double *x, blasint incx, double beta, double *y, blasint incy);
|
||||
void cblas_cgemv(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE trans, blasint m, blasint n,
|
||||
float *alpha, float *a, blasint lda, float *x, blasint incx, float *beta, float *y, blasint incy);
|
||||
void cblas_zgemv(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE trans, blasint m, blasint n,
|
||||
double *alpha, double *a, blasint lda, double *x, blasint incx, double *beta, double *y, blasint incy);
|
||||
void cblas_sgemv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_TRANSPOSE trans, OPENBLAS_CONST blasint m, OPENBLAS_CONST blasint n,
|
||||
OPENBLAS_CONST float alpha, OPENBLAS_CONST float *a, OPENBLAS_CONST blasint lda, OPENBLAS_CONST float *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST float beta, float *y, OPENBLAS_CONST blasint incy);
|
||||
void cblas_dgemv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_TRANSPOSE trans, OPENBLAS_CONST blasint m, OPENBLAS_CONST blasint n,
|
||||
OPENBLAS_CONST double alpha, OPENBLAS_CONST double *a, OPENBLAS_CONST blasint lda, OPENBLAS_CONST double *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST double beta, double *y, OPENBLAS_CONST blasint incy);
|
||||
void cblas_cgemv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_TRANSPOSE trans, OPENBLAS_CONST blasint m, OPENBLAS_CONST blasint n,
|
||||
OPENBLAS_CONST float *alpha, OPENBLAS_CONST float *a, OPENBLAS_CONST blasint lda, OPENBLAS_CONST float *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST float *beta, float *y, OPENBLAS_CONST blasint incy);
|
||||
void cblas_zgemv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_TRANSPOSE trans, OPENBLAS_CONST blasint m, OPENBLAS_CONST blasint n,
|
||||
OPENBLAS_CONST double *alpha, OPENBLAS_CONST double *a, OPENBLAS_CONST blasint lda, OPENBLAS_CONST double *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST double *beta, double *y, OPENBLAS_CONST blasint incy);
|
||||
|
||||
void cblas_sger (enum CBLAS_ORDER order, blasint M, blasint N, float alpha, float *X, blasint incX, float *Y, blasint incY, float *A, blasint lda);
|
||||
void cblas_dger (enum CBLAS_ORDER order, blasint M, blasint N, double alpha, double *X, blasint incX, double *Y, blasint incY, double *A, blasint lda);
|
||||
void cblas_cgeru(enum CBLAS_ORDER order, blasint M, blasint N, float *alpha, float *X, blasint incX, float *Y, blasint incY, float *A, blasint lda);
|
||||
void cblas_cgerc(enum CBLAS_ORDER order, blasint M, blasint N, float *alpha, float *X, blasint incX, float *Y, blasint incY, float *A, blasint lda);
|
||||
void cblas_zgeru(enum CBLAS_ORDER order, blasint M, blasint N, double *alpha, double *X, blasint incX, double *Y, blasint incY, double *A, blasint lda);
|
||||
void cblas_zgerc(enum CBLAS_ORDER order, blasint M, blasint N, double *alpha, double *X, blasint incX, double *Y, blasint incY, double *A, blasint lda);
|
||||
void cblas_sger (OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST float alpha, OPENBLAS_CONST float *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST float *Y, OPENBLAS_CONST blasint incY, float *A, OPENBLAS_CONST blasint lda);
|
||||
void cblas_dger (OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST double alpha, OPENBLAS_CONST double *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST double *Y, OPENBLAS_CONST blasint incY, double *A, OPENBLAS_CONST blasint lda);
|
||||
void cblas_cgeru(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST float *alpha, OPENBLAS_CONST float *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST float *Y, OPENBLAS_CONST blasint incY, float *A, OPENBLAS_CONST blasint lda);
|
||||
void cblas_cgerc(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST float *alpha, OPENBLAS_CONST float *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST float *Y, OPENBLAS_CONST blasint incY, float *A, OPENBLAS_CONST blasint lda);
|
||||
void cblas_zgeru(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST double *alpha, OPENBLAS_CONST double *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST double *Y, OPENBLAS_CONST blasint incY, double *A, OPENBLAS_CONST blasint lda);
|
||||
void cblas_zgerc(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST double *alpha, OPENBLAS_CONST double *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST double *Y, OPENBLAS_CONST blasint incY, double *A, OPENBLAS_CONST blasint lda);
|
||||
|
||||
void cblas_strsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, blasint N, float *A, blasint lda, float *X, blasint incX);
|
||||
void cblas_dtrsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, blasint N, double *A, blasint lda, double *X, blasint incX);
|
||||
void cblas_ctrsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, blasint N, float *A, blasint lda, float *X, blasint incX);
|
||||
void cblas_ztrsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, blasint N, double *A, blasint lda, double *X, blasint incX);
|
||||
void cblas_strsv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag, OPENBLAS_CONST blasint N, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, float *X, OPENBLAS_CONST blasint incX);
|
||||
void cblas_dtrsv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag, OPENBLAS_CONST blasint N, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, double *X, OPENBLAS_CONST blasint incX);
|
||||
void cblas_ctrsv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag, OPENBLAS_CONST blasint N, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, float *X, OPENBLAS_CONST blasint incX);
|
||||
void cblas_ztrsv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag, OPENBLAS_CONST blasint N, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, double *X, OPENBLAS_CONST blasint incX);
|
||||
|
||||
void cblas_strmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, blasint N, float *A, blasint lda, float *X, blasint incX);
|
||||
void cblas_dtrmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, blasint N, double *A, blasint lda, double *X, blasint incX);
|
||||
void cblas_ctrmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, blasint N, float *A, blasint lda, float *X, blasint incX);
|
||||
void cblas_ztrmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, blasint N, double *A, blasint lda, double *X, blasint incX);
|
||||
void cblas_strmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag, OPENBLAS_CONST blasint N, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, float *X, OPENBLAS_CONST blasint incX);
|
||||
void cblas_dtrmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag, OPENBLAS_CONST blasint N, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, double *X, OPENBLAS_CONST blasint incX);
|
||||
void cblas_ctrmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag, OPENBLAS_CONST blasint N, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, float *X, OPENBLAS_CONST blasint incX);
|
||||
void cblas_ztrmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag, OPENBLAS_CONST blasint N, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, double *X, OPENBLAS_CONST blasint incX);
|
||||
|
||||
void cblas_ssyr(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, float alpha, float *X, blasint incX, float *A, blasint lda);
|
||||
void cblas_dsyr(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, double alpha, double *X, blasint incX, double *A, blasint lda);
|
||||
void cblas_cher(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, float alpha, float *X, blasint incX, float *A, blasint lda);
|
||||
void cblas_zher(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, double alpha, double *X, blasint incX, double *A, blasint lda);
|
||||
void cblas_ssyr(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST float alpha, OPENBLAS_CONST float *X, OPENBLAS_CONST blasint incX, float *A, OPENBLAS_CONST blasint lda);
|
||||
void cblas_dsyr(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST double alpha, OPENBLAS_CONST double *X, OPENBLAS_CONST blasint incX, double *A, OPENBLAS_CONST blasint lda);
|
||||
void cblas_cher(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST float alpha, OPENBLAS_CONST float *X, OPENBLAS_CONST blasint incX, float *A, OPENBLAS_CONST blasint lda);
|
||||
void cblas_zher(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST double alpha, OPENBLAS_CONST double *X, OPENBLAS_CONST blasint incX, double *A, OPENBLAS_CONST blasint lda);
|
||||
|
||||
void cblas_ssyr2(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo,blasint N, float alpha, float *X,
|
||||
blasint incX, float *Y, blasint incY, float *A, blasint lda);
|
||||
void cblas_dsyr2(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, double alpha, double *X,
|
||||
blasint incX, double *Y, blasint incY, double *A, blasint lda);
|
||||
void cblas_cher2(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, float *alpha, float *X, blasint incX,
|
||||
float *Y, blasint incY, float *A, blasint lda);
|
||||
void cblas_zher2(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, double *alpha, double *X, blasint incX,
|
||||
double *Y, blasint incY, double *A, blasint lda);
|
||||
void cblas_ssyr2(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo,OPENBLAS_CONST blasint N, OPENBLAS_CONST float alpha, OPENBLAS_CONST float *X,
|
||||
OPENBLAS_CONST blasint incX, OPENBLAS_CONST float *Y, OPENBLAS_CONST blasint incY, float *A, OPENBLAS_CONST blasint lda);
|
||||
void cblas_dsyr2(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST double alpha, OPENBLAS_CONST double *X,
|
||||
OPENBLAS_CONST blasint incX, OPENBLAS_CONST double *Y, OPENBLAS_CONST blasint incY, double *A, OPENBLAS_CONST blasint lda);
|
||||
void cblas_cher2(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST float *alpha, OPENBLAS_CONST float *X, OPENBLAS_CONST blasint incX,
|
||||
OPENBLAS_CONST float *Y, OPENBLAS_CONST blasint incY, float *A, OPENBLAS_CONST blasint lda);
|
||||
void cblas_zher2(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST double *alpha, OPENBLAS_CONST double *X, OPENBLAS_CONST blasint incX,
|
||||
OPENBLAS_CONST double *Y, OPENBLAS_CONST blasint incY, double *A, OPENBLAS_CONST blasint lda);
|
||||
|
||||
void cblas_sgbmv(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE TransA, blasint M, blasint N,
|
||||
blasint KL, blasint KU, float alpha, float *A, blasint lda, float *X, blasint incX, float beta, float *Y, blasint incY);
|
||||
void cblas_dgbmv(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE TransA, blasint M, blasint N,
|
||||
blasint KL, blasint KU, double alpha, double *A, blasint lda, double *X, blasint incX, double beta, double *Y, blasint incY);
|
||||
void cblas_cgbmv(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE TransA, blasint M, blasint N,
|
||||
blasint KL, blasint KU, float *alpha, float *A, blasint lda, float *X, blasint incX, float *beta, float *Y, blasint incY);
|
||||
void cblas_zgbmv(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE TransA, blasint M, blasint N,
|
||||
blasint KL, blasint KU, double *alpha, double *A, blasint lda, double *X, blasint incX, double *beta, double *Y, blasint incY);
|
||||
void cblas_sgbmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N,
|
||||
OPENBLAS_CONST blasint KL, OPENBLAS_CONST blasint KU, OPENBLAS_CONST float alpha, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST float *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST float beta, float *Y, OPENBLAS_CONST blasint incY);
|
||||
void cblas_dgbmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N,
|
||||
OPENBLAS_CONST blasint KL, OPENBLAS_CONST blasint KU, OPENBLAS_CONST double alpha, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST double *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST double beta, double *Y, OPENBLAS_CONST blasint incY);
|
||||
void cblas_cgbmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N,
|
||||
OPENBLAS_CONST blasint KL, OPENBLAS_CONST blasint KU, OPENBLAS_CONST float *alpha, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST float *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST float *beta, float *Y, OPENBLAS_CONST blasint incY);
|
||||
void cblas_zgbmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N,
|
||||
OPENBLAS_CONST blasint KL, OPENBLAS_CONST blasint KU, OPENBLAS_CONST double *alpha, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST double *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST double *beta, double *Y, OPENBLAS_CONST blasint incY);
|
||||
|
||||
void cblas_ssbmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, blasint K, float alpha, float *A,
|
||||
blasint lda, float *X, blasint incX, float beta, float *Y, blasint incY);
|
||||
void cblas_dsbmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, blasint K, double alpha, double *A,
|
||||
blasint lda, double *X, blasint incX, double beta, double *Y, blasint incY);
|
||||
void cblas_ssbmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST float alpha, OPENBLAS_CONST float *A,
|
||||
OPENBLAS_CONST blasint lda, OPENBLAS_CONST float *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST float beta, float *Y, OPENBLAS_CONST blasint incY);
|
||||
void cblas_dsbmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST double alpha, OPENBLAS_CONST double *A,
|
||||
OPENBLAS_CONST blasint lda, OPENBLAS_CONST double *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST double beta, double *Y, OPENBLAS_CONST blasint incY);
|
||||
|
||||
|
||||
void cblas_stbmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
|
||||
blasint N, blasint K, float *A, blasint lda, float *X, blasint incX);
|
||||
void cblas_dtbmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
|
||||
blasint N, blasint K, double *A, blasint lda, double *X, blasint incX);
|
||||
void cblas_ctbmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
|
||||
blasint N, blasint K, float *A, blasint lda, float *X, blasint incX);
|
||||
void cblas_ztbmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
|
||||
blasint N, blasint K, double *A, blasint lda, double *X, blasint incX);
|
||||
void cblas_stbmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag,
|
||||
OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, float *X, OPENBLAS_CONST blasint incX);
|
||||
void cblas_dtbmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag,
|
||||
OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, double *X, OPENBLAS_CONST blasint incX);
|
||||
void cblas_ctbmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag,
|
||||
OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, float *X, OPENBLAS_CONST blasint incX);
|
||||
void cblas_ztbmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag,
|
||||
OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, double *X, OPENBLAS_CONST blasint incX);
|
||||
|
||||
void cblas_stbsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
|
||||
blasint N, blasint K, float *A, blasint lda, float *X, blasint incX);
|
||||
void cblas_dtbsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
|
||||
blasint N, blasint K, double *A, blasint lda, double *X, blasint incX);
|
||||
void cblas_ctbsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
|
||||
blasint N, blasint K, float *A, blasint lda, float *X, blasint incX);
|
||||
void cblas_ztbsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
|
||||
blasint N, blasint K, double *A, blasint lda, double *X, blasint incX);
|
||||
void cblas_stbsv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag,
|
||||
OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, float *X, OPENBLAS_CONST blasint incX);
|
||||
void cblas_dtbsv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag,
|
||||
OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, double *X, OPENBLAS_CONST blasint incX);
|
||||
void cblas_ctbsv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag,
|
||||
OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, float *X, OPENBLAS_CONST blasint incX);
|
||||
void cblas_ztbsv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag,
|
||||
OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, double *X, OPENBLAS_CONST blasint incX);
|
||||
|
||||
void cblas_stpmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
|
||||
blasint N, float *Ap, float *X, blasint incX);
|
||||
void cblas_dtpmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
|
||||
blasint N, double *Ap, double *X, blasint incX);
|
||||
void cblas_ctpmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
|
||||
blasint N, float *Ap, float *X, blasint incX);
|
||||
void cblas_ztpmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
|
||||
blasint N, double *Ap, double *X, blasint incX);
|
||||
void cblas_stpmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag,
|
||||
OPENBLAS_CONST blasint N, OPENBLAS_CONST float *Ap, float *X, OPENBLAS_CONST blasint incX);
|
||||
void cblas_dtpmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag,
|
||||
OPENBLAS_CONST blasint N, OPENBLAS_CONST double *Ap, double *X, OPENBLAS_CONST blasint incX);
|
||||
void cblas_ctpmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag,
|
||||
OPENBLAS_CONST blasint N, OPENBLAS_CONST float *Ap, float *X, OPENBLAS_CONST blasint incX);
|
||||
void cblas_ztpmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag,
|
||||
OPENBLAS_CONST blasint N, OPENBLAS_CONST double *Ap, double *X, OPENBLAS_CONST blasint incX);
|
||||
|
||||
void cblas_stpsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
|
||||
blasint N, float *Ap, float *X, blasint incX);
|
||||
void cblas_dtpsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
|
||||
blasint N, double *Ap, double *X, blasint incX);
|
||||
void cblas_ctpsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
|
||||
blasint N, float *Ap, float *X, blasint incX);
|
||||
void cblas_ztpsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
|
||||
blasint N, double *Ap, double *X, blasint incX);
|
||||
void cblas_stpsv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag,
|
||||
OPENBLAS_CONST blasint N, OPENBLAS_CONST float *Ap, float *X, OPENBLAS_CONST blasint incX);
|
||||
void cblas_dtpsv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag,
|
||||
OPENBLAS_CONST blasint N, OPENBLAS_CONST double *Ap, double *X, OPENBLAS_CONST blasint incX);
|
||||
void cblas_ctpsv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag,
|
||||
OPENBLAS_CONST blasint N, OPENBLAS_CONST float *Ap, float *X, OPENBLAS_CONST blasint incX);
|
||||
void cblas_ztpsv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag,
|
||||
OPENBLAS_CONST blasint N, OPENBLAS_CONST double *Ap, double *X, OPENBLAS_CONST blasint incX);
|
||||
|
||||
void cblas_ssymv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, float alpha, float *A,
|
||||
blasint lda, float *X, blasint incX, float beta, float *Y, blasint incY);
|
||||
void cblas_dsymv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, double alpha, double *A,
|
||||
blasint lda, double *X, blasint incX, double beta, double *Y, blasint incY);
|
||||
void cblas_chemv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, float *alpha, float *A,
|
||||
blasint lda, float *X, blasint incX, float *beta, float *Y, blasint incY);
|
||||
void cblas_zhemv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, double *alpha, double *A,
|
||||
blasint lda, double *X, blasint incX, double *beta, double *Y, blasint incY);
|
||||
void cblas_ssymv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST float alpha, OPENBLAS_CONST float *A,
|
||||
OPENBLAS_CONST blasint lda, OPENBLAS_CONST float *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST float beta, float *Y, OPENBLAS_CONST blasint incY);
|
||||
void cblas_dsymv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST double alpha, OPENBLAS_CONST double *A,
|
||||
OPENBLAS_CONST blasint lda, OPENBLAS_CONST double *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST double beta, double *Y, OPENBLAS_CONST blasint incY);
|
||||
void cblas_chemv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST float *alpha, OPENBLAS_CONST float *A,
|
||||
OPENBLAS_CONST blasint lda, OPENBLAS_CONST float *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST float *beta, float *Y, OPENBLAS_CONST blasint incY);
|
||||
void cblas_zhemv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST double *alpha, OPENBLAS_CONST double *A,
|
||||
OPENBLAS_CONST blasint lda, OPENBLAS_CONST double *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST double *beta, double *Y, OPENBLAS_CONST blasint incY);
|
||||
|
||||
|
||||
void cblas_sspmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, float alpha, float *Ap,
|
||||
float *X, blasint incX, float beta, float *Y, blasint incY);
|
||||
void cblas_dspmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, double alpha, double *Ap,
|
||||
double *X, blasint incX, double beta, double *Y, blasint incY);
|
||||
void cblas_sspmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST float alpha, OPENBLAS_CONST float *Ap,
|
||||
OPENBLAS_CONST float *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST float beta, float *Y, OPENBLAS_CONST blasint incY);
|
||||
void cblas_dspmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST double alpha, OPENBLAS_CONST double *Ap,
|
||||
OPENBLAS_CONST double *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST double beta, double *Y, OPENBLAS_CONST blasint incY);
|
||||
|
||||
void cblas_sspr(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, float alpha, float *X, blasint incX, float *Ap);
|
||||
void cblas_dspr(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, double alpha, double *X, blasint incX, double *Ap);
|
||||
void cblas_sspr(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST float alpha, OPENBLAS_CONST float *X, OPENBLAS_CONST blasint incX, float *Ap);
|
||||
void cblas_dspr(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST double alpha, OPENBLAS_CONST double *X, OPENBLAS_CONST blasint incX, double *Ap);
|
||||
|
||||
void cblas_chpr(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, float alpha, float *X, blasint incX, float *A);
|
||||
void cblas_zhpr(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, double alpha, double *X,blasint incX, double *A);
|
||||
void cblas_chpr(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST float alpha, OPENBLAS_CONST float *X, OPENBLAS_CONST blasint incX, float *A);
|
||||
void cblas_zhpr(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST double alpha, OPENBLAS_CONST double *X,OPENBLAS_CONST blasint incX, double *A);
|
||||
|
||||
void cblas_sspr2(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, float alpha, float *X, blasint incX, float *Y, blasint incY, float *A);
|
||||
void cblas_dspr2(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, double alpha, double *X, blasint incX, double *Y, blasint incY, double *A);
|
||||
void cblas_chpr2(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, float *alpha, float *X, blasint incX, float *Y, blasint incY, float *Ap);
|
||||
void cblas_zhpr2(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, double *alpha, double *X, blasint incX, double *Y, blasint incY, double *Ap);
|
||||
void cblas_sspr2(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST float alpha, OPENBLAS_CONST float *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST float *Y, OPENBLAS_CONST blasint incY, float *A);
|
||||
void cblas_dspr2(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST double alpha, OPENBLAS_CONST double *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST double *Y, OPENBLAS_CONST blasint incY, double *A);
|
||||
void cblas_chpr2(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST float *alpha, OPENBLAS_CONST float *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST float *Y, OPENBLAS_CONST blasint incY, float *Ap);
|
||||
void cblas_zhpr2(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST double *alpha, OPENBLAS_CONST double *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST double *Y, OPENBLAS_CONST blasint incY, double *Ap);
|
||||
|
||||
void cblas_chbmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, blasint K,
|
||||
float *alpha, float *A, blasint lda, float *X, blasint incX, float *beta, float *Y, blasint incY);
|
||||
void cblas_zhbmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, blasint K,
|
||||
double *alpha, double *A, blasint lda, double *X, blasint incX, double *beta, double *Y, blasint incY);
|
||||
void cblas_chbmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K,
|
||||
OPENBLAS_CONST float *alpha, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST float *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST float *beta, float *Y, OPENBLAS_CONST blasint incY);
|
||||
void cblas_zhbmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K,
|
||||
OPENBLAS_CONST double *alpha, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST double *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST double *beta, double *Y, OPENBLAS_CONST blasint incY);
|
||||
|
||||
void cblas_chpmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N,
|
||||
float *alpha, float *Ap, float *X, blasint incX, float *beta, float *Y, blasint incY);
|
||||
void cblas_zhpmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N,
|
||||
double *alpha, double *Ap, double *X, blasint incX, double *beta, double *Y, blasint incY);
|
||||
void cblas_chpmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N,
|
||||
OPENBLAS_CONST float *alpha, OPENBLAS_CONST float *Ap, OPENBLAS_CONST float *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST float *beta, float *Y, OPENBLAS_CONST blasint incY);
|
||||
void cblas_zhpmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N,
|
||||
OPENBLAS_CONST double *alpha, OPENBLAS_CONST double *Ap, OPENBLAS_CONST double *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST double *beta, double *Y, OPENBLAS_CONST blasint incY);
|
||||
|
||||
void cblas_sgemm(enum CBLAS_ORDER Order, enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANSPOSE TransB, blasint M, blasint N, blasint K,
|
||||
float alpha, float *A, blasint lda, float *B, blasint ldb, float beta, float *C, blasint ldc);
|
||||
void cblas_dgemm(enum CBLAS_ORDER Order, enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANSPOSE TransB, blasint M, blasint N, blasint K,
|
||||
double alpha, double *A, blasint lda, double *B, blasint ldb, double beta, double *C, blasint ldc);
|
||||
void cblas_cgemm(enum CBLAS_ORDER Order, enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANSPOSE TransB, blasint M, blasint N, blasint K,
|
||||
float *alpha, float *A, blasint lda, float *B, blasint ldb, float *beta, float *C, blasint ldc);
|
||||
void cblas_zgemm(enum CBLAS_ORDER Order, enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANSPOSE TransB, blasint M, blasint N, blasint K,
|
||||
double *alpha, double *A, blasint lda, double *B, blasint ldb, double *beta, double *C, blasint ldc);
|
||||
void cblas_sgemm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransB, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K,
|
||||
OPENBLAS_CONST float alpha, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST float *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST float beta, float *C, OPENBLAS_CONST blasint ldc);
|
||||
void cblas_dgemm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransB, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K,
|
||||
OPENBLAS_CONST double alpha, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST double *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST double beta, double *C, OPENBLAS_CONST blasint ldc);
|
||||
void cblas_cgemm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransB, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K,
|
||||
OPENBLAS_CONST float *alpha, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST float *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST float *beta, float *C, OPENBLAS_CONST blasint ldc);
|
||||
void cblas_cgemm3m(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransB, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K,
|
||||
OPENBLAS_CONST float *alpha, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST float *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST float *beta, float *C, OPENBLAS_CONST blasint ldc);
|
||||
void cblas_zgemm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransB, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K,
|
||||
OPENBLAS_CONST double *alpha, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST double *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST double *beta, double *C, OPENBLAS_CONST blasint ldc);
|
||||
void cblas_zgemm3m(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransB, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K,
|
||||
OPENBLAS_CONST double *alpha, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST double *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST double *beta, double *C, OPENBLAS_CONST blasint ldc);
|
||||
|
||||
void cblas_ssymm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, blasint M, blasint N,
|
||||
float alpha, float *A, blasint lda, float *B, blasint ldb, float beta, float *C, blasint ldc);
|
||||
void cblas_dsymm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, blasint M, blasint N,
|
||||
double alpha, double *A, blasint lda, double *B, blasint ldb, double beta, double *C, blasint ldc);
|
||||
void cblas_csymm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, blasint M, blasint N,
|
||||
float *alpha, float *A, blasint lda, float *B, blasint ldb, float *beta, float *C, blasint ldc);
|
||||
void cblas_zsymm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, blasint M, blasint N,
|
||||
double *alpha, double *A, blasint lda, double *B, blasint ldb, double *beta, double *C, blasint ldc);
|
||||
|
||||
void cblas_ssyrk(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans,
|
||||
blasint N, blasint K, float alpha, float *A, blasint lda, float beta, float *C, blasint ldc);
|
||||
void cblas_dsyrk(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans,
|
||||
blasint N, blasint K, double alpha, double *A, blasint lda, double beta, double *C, blasint ldc);
|
||||
void cblas_csyrk(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans,
|
||||
blasint N, blasint K, float *alpha, float *A, blasint lda, float *beta, float *C, blasint ldc);
|
||||
void cblas_zsyrk(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans,
|
||||
blasint N, blasint K, double *alpha, double *A, blasint lda, double *beta, double *C, blasint ldc);
|
||||
void cblas_ssymm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_SIDE Side, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N,
|
||||
OPENBLAS_CONST float alpha, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST float *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST float beta, float *C, OPENBLAS_CONST blasint ldc);
|
||||
void cblas_dsymm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_SIDE Side, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N,
|
||||
OPENBLAS_CONST double alpha, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST double *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST double beta, double *C, OPENBLAS_CONST blasint ldc);
|
||||
void cblas_csymm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_SIDE Side, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N,
|
||||
OPENBLAS_CONST float *alpha, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST float *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST float *beta, float *C, OPENBLAS_CONST blasint ldc);
|
||||
void cblas_zsymm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_SIDE Side, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N,
|
||||
OPENBLAS_CONST double *alpha, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST double *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST double *beta, double *C, OPENBLAS_CONST blasint ldc);
|
||||
|
||||
void cblas_ssyr2k(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans,
|
||||
blasint N, blasint K, float alpha, float *A, blasint lda, float *B, blasint ldb, float beta, float *C, blasint ldc);
|
||||
void cblas_dsyr2k(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans,
|
||||
blasint N, blasint K, double alpha, double *A, blasint lda, double *B, blasint ldb, double beta, double *C, blasint ldc);
|
||||
void cblas_csyr2k(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans,
|
||||
blasint N, blasint K, float *alpha, float *A, blasint lda, float *B, blasint ldb, float *beta, float *C, blasint ldc);
|
||||
void cblas_zsyr2k(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans,
|
||||
blasint N, blasint K, double *alpha, double *A, blasint lda, double *B, blasint ldb, double *beta, double *C, blasint ldc);
|
||||
void cblas_ssyrk(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE Trans,
|
||||
OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST float alpha, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST float beta, float *C, OPENBLAS_CONST blasint ldc);
|
||||
void cblas_dsyrk(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE Trans,
|
||||
OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST double alpha, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST double beta, double *C, OPENBLAS_CONST blasint ldc);
|
||||
void cblas_csyrk(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE Trans,
|
||||
OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST float *alpha, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST float *beta, float *C, OPENBLAS_CONST blasint ldc);
|
||||
void cblas_zsyrk(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE Trans,
|
||||
OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST double *alpha, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST double *beta, double *C, OPENBLAS_CONST blasint ldc);
|
||||
|
||||
void cblas_strmm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA,
|
||||
enum CBLAS_DIAG Diag, blasint M, blasint N, float alpha, float *A, blasint lda, float *B, blasint ldb);
|
||||
void cblas_dtrmm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA,
|
||||
enum CBLAS_DIAG Diag, blasint M, blasint N, double alpha, double *A, blasint lda, double *B, blasint ldb);
|
||||
void cblas_ctrmm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA,
|
||||
enum CBLAS_DIAG Diag, blasint M, blasint N, float *alpha, float *A, blasint lda, float *B, blasint ldb);
|
||||
void cblas_ztrmm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA,
|
||||
enum CBLAS_DIAG Diag, blasint M, blasint N, double *alpha, double *A, blasint lda, double *B, blasint ldb);
|
||||
void cblas_ssyr2k(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE Trans,
|
||||
OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST float alpha, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST float *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST float beta, float *C, OPENBLAS_CONST blasint ldc);
|
||||
void cblas_dsyr2k(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE Trans,
|
||||
OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST double alpha, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST double *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST double beta, double *C, OPENBLAS_CONST blasint ldc);
|
||||
void cblas_csyr2k(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE Trans,
|
||||
OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST float *alpha, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST float *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST float *beta, float *C, OPENBLAS_CONST blasint ldc);
|
||||
void cblas_zsyr2k(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE Trans,
|
||||
OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST double *alpha, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST double *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST double *beta, double *C, OPENBLAS_CONST blasint ldc);
|
||||
|
||||
void cblas_strsm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA,
|
||||
enum CBLAS_DIAG Diag, blasint M, blasint N, float alpha, float *A, blasint lda, float *B, blasint ldb);
|
||||
void cblas_dtrsm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA,
|
||||
enum CBLAS_DIAG Diag, blasint M, blasint N, double alpha, double *A, blasint lda, double *B, blasint ldb);
|
||||
void cblas_ctrsm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA,
|
||||
enum CBLAS_DIAG Diag, blasint M, blasint N, float *alpha, float *A, blasint lda, float *B, blasint ldb);
|
||||
void cblas_ztrsm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA,
|
||||
enum CBLAS_DIAG Diag, blasint M, blasint N, double *alpha, double *A, blasint lda, double *B, blasint ldb);
|
||||
void cblas_strmm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_SIDE Side, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA,
|
||||
OPENBLAS_CONST enum CBLAS_DIAG Diag, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST float alpha, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, float *B, OPENBLAS_CONST blasint ldb);
|
||||
void cblas_dtrmm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_SIDE Side, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA,
|
||||
OPENBLAS_CONST enum CBLAS_DIAG Diag, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST double alpha, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, double *B, OPENBLAS_CONST blasint ldb);
|
||||
void cblas_ctrmm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_SIDE Side, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA,
|
||||
OPENBLAS_CONST enum CBLAS_DIAG Diag, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST float *alpha, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, float *B, OPENBLAS_CONST blasint ldb);
|
||||
void cblas_ztrmm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_SIDE Side, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA,
|
||||
OPENBLAS_CONST enum CBLAS_DIAG Diag, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST double *alpha, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, double *B, OPENBLAS_CONST blasint ldb);
|
||||
|
||||
void cblas_chemm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, blasint M, blasint N,
|
||||
float *alpha, float *A, blasint lda, float *B, blasint ldb, float *beta, float *C, blasint ldc);
|
||||
void cblas_zhemm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, blasint M, blasint N,
|
||||
double *alpha, double *A, blasint lda, double *B, blasint ldb, double *beta, double *C, blasint ldc);
|
||||
void cblas_strsm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_SIDE Side, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA,
|
||||
OPENBLAS_CONST enum CBLAS_DIAG Diag, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST float alpha, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, float *B, OPENBLAS_CONST blasint ldb);
|
||||
void cblas_dtrsm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_SIDE Side, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA,
|
||||
OPENBLAS_CONST enum CBLAS_DIAG Diag, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST double alpha, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, double *B, OPENBLAS_CONST blasint ldb);
|
||||
void cblas_ctrsm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_SIDE Side, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA,
|
||||
OPENBLAS_CONST enum CBLAS_DIAG Diag, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST float *alpha, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, float *B, OPENBLAS_CONST blasint ldb);
|
||||
void cblas_ztrsm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_SIDE Side, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA,
|
||||
OPENBLAS_CONST enum CBLAS_DIAG Diag, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST double *alpha, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, double *B, OPENBLAS_CONST blasint ldb);
|
||||
|
||||
void cblas_cherk(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans, blasint N, blasint K,
|
||||
float alpha, float *A, blasint lda, float beta, float *C, blasint ldc);
|
||||
void cblas_zherk(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans, blasint N, blasint K,
|
||||
double alpha, double *A, blasint lda, double beta, double *C, blasint ldc);
|
||||
void cblas_chemm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_SIDE Side, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N,
|
||||
OPENBLAS_CONST float *alpha, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST float *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST float *beta, float *C, OPENBLAS_CONST blasint ldc);
|
||||
void cblas_zhemm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_SIDE Side, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N,
|
||||
OPENBLAS_CONST double *alpha, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST double *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST double *beta, double *C, OPENBLAS_CONST blasint ldc);
|
||||
|
||||
void cblas_cher2k(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans, blasint N, blasint K,
|
||||
float *alpha, float *A, blasint lda, float *B, blasint ldb, float beta, float *C, blasint ldc);
|
||||
void cblas_zher2k(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans, blasint N, blasint K,
|
||||
double *alpha, double *A, blasint lda, double *B, blasint ldb, double beta, double *C, blasint ldc);
|
||||
void cblas_cherk(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE Trans, OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K,
|
||||
OPENBLAS_CONST float alpha, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST float beta, float *C, OPENBLAS_CONST blasint ldc);
|
||||
void cblas_zherk(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE Trans, OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K,
|
||||
OPENBLAS_CONST double alpha, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST double beta, double *C, OPENBLAS_CONST blasint ldc);
|
||||
|
||||
void cblas_cher2k(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE Trans, OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K,
|
||||
OPENBLAS_CONST float *alpha, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST float *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST float beta, float *C, OPENBLAS_CONST blasint ldc);
|
||||
void cblas_zher2k(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE Trans, OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K,
|
||||
OPENBLAS_CONST double *alpha, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST double *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST double beta, double *C, OPENBLAS_CONST blasint ldc);
|
||||
|
||||
void cblas_xerbla(blasint p, char *rout, char *form, ...);
|
||||
|
||||
/*** BLAS extensions ***/
|
||||
|
||||
void cblas_saxpby(OPENBLAS_CONST blasint n, OPENBLAS_CONST float alpha, OPENBLAS_CONST float *x, OPENBLAS_CONST blasint incx,OPENBLAS_CONST float beta, float *y, OPENBLAS_CONST blasint incy);
|
||||
|
||||
void cblas_daxpby(OPENBLAS_CONST blasint n, OPENBLAS_CONST double alpha, OPENBLAS_CONST double *x, OPENBLAS_CONST blasint incx,OPENBLAS_CONST double beta, double *y, OPENBLAS_CONST blasint incy);
|
||||
|
||||
void cblas_caxpby(OPENBLAS_CONST blasint n, OPENBLAS_CONST float *alpha, OPENBLAS_CONST float *x, OPENBLAS_CONST blasint incx,OPENBLAS_CONST float *beta, float *y, OPENBLAS_CONST blasint incy);
|
||||
|
||||
void cblas_zaxpby(OPENBLAS_CONST blasint n, OPENBLAS_CONST double *alpha, OPENBLAS_CONST double *x, OPENBLAS_CONST blasint incx,OPENBLAS_CONST double *beta, double *y, OPENBLAS_CONST blasint incy);
|
||||
|
||||
void cblas_somatcopy(OPENBLAS_CONST enum CBLAS_ORDER CORDER, OPENBLAS_CONST enum CBLAS_TRANSPOSE CTRANS, OPENBLAS_CONST blasint crows, OPENBLAS_CONST blasint ccols, OPENBLAS_CONST float calpha, OPENBLAS_CONST float *a,
|
||||
OPENBLAS_CONST blasint clda, float *b, OPENBLAS_CONST blasint cldb);
|
||||
void cblas_domatcopy(OPENBLAS_CONST enum CBLAS_ORDER CORDER, OPENBLAS_CONST enum CBLAS_TRANSPOSE CTRANS, OPENBLAS_CONST blasint crows, OPENBLAS_CONST blasint ccols, OPENBLAS_CONST double calpha, OPENBLAS_CONST double *a,
|
||||
OPENBLAS_CONST blasint clda, double *b, OPENBLAS_CONST blasint cldb);
|
||||
void cblas_comatcopy(OPENBLAS_CONST enum CBLAS_ORDER CORDER, OPENBLAS_CONST enum CBLAS_TRANSPOSE CTRANS, OPENBLAS_CONST blasint crows, OPENBLAS_CONST blasint ccols, OPENBLAS_CONST float* calpha, OPENBLAS_CONST float* a,
|
||||
OPENBLAS_CONST blasint clda, float*b, OPENBLAS_CONST blasint cldb);
|
||||
void cblas_zomatcopy(OPENBLAS_CONST enum CBLAS_ORDER CORDER, OPENBLAS_CONST enum CBLAS_TRANSPOSE CTRANS, OPENBLAS_CONST blasint crows, OPENBLAS_CONST blasint ccols, OPENBLAS_CONST double* calpha, OPENBLAS_CONST double* a,
|
||||
OPENBLAS_CONST blasint clda, double *b, OPENBLAS_CONST blasint cldb);
|
||||
|
||||
void cblas_simatcopy(OPENBLAS_CONST enum CBLAS_ORDER CORDER, OPENBLAS_CONST enum CBLAS_TRANSPOSE CTRANS, OPENBLAS_CONST blasint crows, OPENBLAS_CONST blasint ccols, OPENBLAS_CONST float calpha, float *a,
|
||||
OPENBLAS_CONST blasint clda, OPENBLAS_CONST blasint cldb);
|
||||
void cblas_dimatcopy(OPENBLAS_CONST enum CBLAS_ORDER CORDER, OPENBLAS_CONST enum CBLAS_TRANSPOSE CTRANS, OPENBLAS_CONST blasint crows, OPENBLAS_CONST blasint ccols, OPENBLAS_CONST double calpha, double *a,
|
||||
OPENBLAS_CONST blasint clda, OPENBLAS_CONST blasint cldb);
|
||||
void cblas_cimatcopy(OPENBLAS_CONST enum CBLAS_ORDER CORDER, OPENBLAS_CONST enum CBLAS_TRANSPOSE CTRANS, OPENBLAS_CONST blasint crows, OPENBLAS_CONST blasint ccols, OPENBLAS_CONST float* calpha, float* a,
|
||||
OPENBLAS_CONST blasint clda, OPENBLAS_CONST blasint cldb);
|
||||
void cblas_zimatcopy(OPENBLAS_CONST enum CBLAS_ORDER CORDER, OPENBLAS_CONST enum CBLAS_TRANSPOSE CTRANS, OPENBLAS_CONST blasint crows, OPENBLAS_CONST blasint ccols, OPENBLAS_CONST double* calpha, double* a,
|
||||
OPENBLAS_CONST blasint clda, OPENBLAS_CONST blasint cldb);
|
||||
|
||||
void cblas_sgeadd(OPENBLAS_CONST enum CBLAS_ORDER CORDER,OPENBLAS_CONST blasint crows, OPENBLAS_CONST blasint ccols, OPENBLAS_CONST float calpha, float *a, OPENBLAS_CONST blasint clda, OPENBLAS_CONST float cbeta,
|
||||
float *c, OPENBLAS_CONST blasint cldc);
|
||||
void cblas_dgeadd(OPENBLAS_CONST enum CBLAS_ORDER CORDER,OPENBLAS_CONST blasint crows, OPENBLAS_CONST blasint ccols, OPENBLAS_CONST double calpha, double *a, OPENBLAS_CONST blasint clda, OPENBLAS_CONST double cbeta,
|
||||
double *c, OPENBLAS_CONST blasint cldc);
|
||||
void cblas_cgeadd(OPENBLAS_CONST enum CBLAS_ORDER CORDER,OPENBLAS_CONST blasint crows, OPENBLAS_CONST blasint ccols, OPENBLAS_CONST float *calpha, float *a, OPENBLAS_CONST blasint clda, OPENBLAS_CONST float *cbeta,
|
||||
float *c, OPENBLAS_CONST blasint cldc);
|
||||
void cblas_zgeadd(OPENBLAS_CONST enum CBLAS_ORDER CORDER,OPENBLAS_CONST blasint crows, OPENBLAS_CONST blasint ccols, OPENBLAS_CONST double *calpha, double *a, OPENBLAS_CONST blasint clda, OPENBLAS_CONST double *cbeta,
|
||||
double *c, OPENBLAS_CONST blasint cldc);
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
||||
#endif /* __cplusplus */
|
||||
|
||||
#endif
|
||||
|
||||
350
cblas_noconst.h
Normal file
350
cblas_noconst.h
Normal file
@@ -0,0 +1,350 @@
|
||||
#ifndef CBLAS_H
|
||||
#define CBLAS_H
|
||||
|
||||
#include <stddef.h>
|
||||
#include "common.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
/* Assume C declarations for C++ */
|
||||
#endif /* __cplusplus */
|
||||
|
||||
/*Set the number of threads on runtime.*/
|
||||
void openblas_set_num_threads(int num_threads);
|
||||
void goto_set_num_threads(int num_threads);
|
||||
|
||||
/*Get the number of threads on runtime.*/
|
||||
int openblas_get_num_threads(void);
|
||||
|
||||
/*Get the number of physical processors (cores).*/
|
||||
int openblas_get_num_procs(void);
|
||||
|
||||
/*Get the build configure on runtime.*/
|
||||
char* openblas_get_config(void);
|
||||
|
||||
/* Get the parallelization type which is used by OpenBLAS */
|
||||
int openblas_get_parallel(void);
|
||||
/* OpenBLAS is compiled for sequential use */
|
||||
#define OPENBLAS_SEQUENTIAL 0
|
||||
/* OpenBLAS is compiled using normal threading model */
|
||||
#define OPENBLAS_THREAD 1
|
||||
/* OpenBLAS is compiled using OpenMP threading model */
|
||||
#define OPENBLAS_OPENMP 2
|
||||
|
||||
|
||||
#define CBLAS_INDEX size_t
|
||||
|
||||
typedef enum CBLAS_ORDER {CblasRowMajor=101, CblasColMajor=102} CBLAS_ORDER;
|
||||
typedef enum CBLAS_TRANSPOSE {CblasNoTrans=111, CblasTrans=112, CblasConjTrans=113, CblasConjNoTrans=114} CBLAS_TRANSPOSE;
|
||||
typedef enum CBLAS_UPLO {CblasUpper=121, CblasLower=122} CBLAS_UPLO;
|
||||
typedef enum CBLAS_DIAG {CblasNonUnit=131, CblasUnit=132} CBLAS_DIAG;
|
||||
typedef enum CBLAS_SIDE {CblasLeft=141, CblasRight=142} CBLAS_SIDE;
|
||||
|
||||
float cblas_sdsdot(blasint n, float alpha, float *x, blasint incx, float *y, blasint incy);
|
||||
double cblas_dsdot (blasint n, float *x, blasint incx, float *y, blasint incy);
|
||||
float cblas_sdot(blasint n, float *x, blasint incx, float *y, blasint incy);
|
||||
double cblas_ddot(blasint n, double *x, blasint incx, double *y, blasint incy);
|
||||
|
||||
openblas_complex_float cblas_cdotu(blasint n, float *x, blasint incx, float *y, blasint incy);
|
||||
openblas_complex_float cblas_cdotc(blasint n, float *x, blasint incx, float *y, blasint incy);
|
||||
openblas_complex_double cblas_zdotu(blasint n, double *x, blasint incx, double *y, blasint incy);
|
||||
openblas_complex_double cblas_zdotc(blasint n, double *x, blasint incx, double *y, blasint incy);
|
||||
|
||||
void cblas_cdotu_sub(blasint n, float *x, blasint incx, float *y, blasint incy, openblas_complex_float *ret);
|
||||
void cblas_cdotc_sub(blasint n, float *x, blasint incx, float *y, blasint incy, openblas_complex_float *ret);
|
||||
void cblas_zdotu_sub(blasint n, double *x, blasint incx, double *y, blasint incy, openblas_complex_double *ret);
|
||||
void cblas_zdotc_sub(blasint n, double *x, blasint incx, double *y, blasint incy, openblas_complex_double *ret);
|
||||
|
||||
float cblas_sasum (blasint n, float *x, blasint incx);
|
||||
double cblas_dasum (blasint n, double *x, blasint incx);
|
||||
float cblas_scasum(blasint n, float *x, blasint incx);
|
||||
double cblas_dzasum(blasint n, double *x, blasint incx);
|
||||
|
||||
float cblas_snrm2 (blasint N, float *X, blasint incX);
|
||||
double cblas_dnrm2 (blasint N, double *X, blasint incX);
|
||||
float cblas_scnrm2(blasint N, float *X, blasint incX);
|
||||
double cblas_dznrm2(blasint N, double *X, blasint incX);
|
||||
|
||||
CBLAS_INDEX cblas_isamax(blasint n, float *x, blasint incx);
|
||||
CBLAS_INDEX cblas_idamax(blasint n, double *x, blasint incx);
|
||||
CBLAS_INDEX cblas_icamax(blasint n, float *x, blasint incx);
|
||||
CBLAS_INDEX cblas_izamax(blasint n, double *x, blasint incx);
|
||||
|
||||
void cblas_saxpy(blasint n, float alpha, float *x, blasint incx, float *y, blasint incy);
|
||||
void cblas_daxpy(blasint n, double alpha, double *x, blasint incx, double *y, blasint incy);
|
||||
void cblas_caxpy(blasint n, float *alpha, float *x, blasint incx, float *y, blasint incy);
|
||||
void cblas_zaxpy(blasint n, double *alpha, double *x, blasint incx, double *y, blasint incy);
|
||||
|
||||
void cblas_scopy(blasint n, float *x, blasint incx, float *y, blasint incy);
|
||||
void cblas_dcopy(blasint n, double *x, blasint incx, double *y, blasint incy);
|
||||
void cblas_ccopy(blasint n, float *x, blasint incx, float *y, blasint incy);
|
||||
void cblas_zcopy(blasint n, double *x, blasint incx, double *y, blasint incy);
|
||||
|
||||
void cblas_sswap(blasint n, float *x, blasint incx, float *y, blasint incy);
|
||||
void cblas_dswap(blasint n, double *x, blasint incx, double *y, blasint incy);
|
||||
void cblas_cswap(blasint n, float *x, blasint incx, float *y, blasint incy);
|
||||
void cblas_zswap(blasint n, double *x, blasint incx, double *y, blasint incy);
|
||||
|
||||
void cblas_srot(blasint N, float *X, blasint incX, float *Y, blasint incY, float c, float s);
|
||||
void cblas_drot(blasint N, double *X, blasint incX, double *Y, blasint incY, double c, double s);
|
||||
|
||||
void cblas_srotg(float *a, float *b, float *c, float *s);
|
||||
void cblas_drotg(double *a, double *b, double *c, double *s);
|
||||
|
||||
void cblas_srotm(blasint N, float *X, blasint incX, float *Y, blasint incY, float *P);
|
||||
void cblas_drotm(blasint N, double *X, blasint incX, double *Y, blasint incY, double *P);
|
||||
|
||||
void cblas_srotmg(float *d1, float *d2, float *b1, float b2, float *P);
|
||||
void cblas_drotmg(double *d1, double *d2, double *b1, double b2, double *P);
|
||||
|
||||
void cblas_sscal(blasint N, float alpha, float *X, blasint incX);
|
||||
void cblas_dscal(blasint N, double alpha, double *X, blasint incX);
|
||||
void cblas_cscal(blasint N, float *alpha, float *X, blasint incX);
|
||||
void cblas_zscal(blasint N, double *alpha, double *X, blasint incX);
|
||||
void cblas_csscal(blasint N, float alpha, float *X, blasint incX);
|
||||
void cblas_zdscal(blasint N, double alpha, double *X, blasint incX);
|
||||
|
||||
void cblas_sgemv(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE trans, blasint m, blasint n,
|
||||
float alpha, float *a, blasint lda, float *x, blasint incx, float beta, float *y, blasint incy);
|
||||
void cblas_dgemv(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE trans, blasint m, blasint n,
|
||||
double alpha, double *a, blasint lda, double *x, blasint incx, double beta, double *y, blasint incy);
|
||||
void cblas_cgemv(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE trans, blasint m, blasint n,
|
||||
float *alpha, float *a, blasint lda, float *x, blasint incx, float *beta, float *y, blasint incy);
|
||||
void cblas_zgemv(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE trans, blasint m, blasint n,
|
||||
double *alpha, double *a, blasint lda, double *x, blasint incx, double *beta, double *y, blasint incy);
|
||||
|
||||
void cblas_sger (enum CBLAS_ORDER order, blasint M, blasint N, float alpha, float *X, blasint incX, float *Y, blasint incY, float *A, blasint lda);
|
||||
void cblas_dger (enum CBLAS_ORDER order, blasint M, blasint N, double alpha, double *X, blasint incX, double *Y, blasint incY, double *A, blasint lda);
|
||||
void cblas_cgeru(enum CBLAS_ORDER order, blasint M, blasint N, float *alpha, float *X, blasint incX, float *Y, blasint incY, float *A, blasint lda);
|
||||
void cblas_cgerc(enum CBLAS_ORDER order, blasint M, blasint N, float *alpha, float *X, blasint incX, float *Y, blasint incY, float *A, blasint lda);
|
||||
void cblas_zgeru(enum CBLAS_ORDER order, blasint M, blasint N, double *alpha, double *X, blasint incX, double *Y, blasint incY, double *A, blasint lda);
|
||||
void cblas_zgerc(enum CBLAS_ORDER order, blasint M, blasint N, double *alpha, double *X, blasint incX, double *Y, blasint incY, double *A, blasint lda);
|
||||
|
||||
void cblas_strsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, blasint N, float *A, blasint lda, float *X, blasint incX);
|
||||
void cblas_dtrsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, blasint N, double *A, blasint lda, double *X, blasint incX);
|
||||
void cblas_ctrsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, blasint N, float *A, blasint lda, float *X, blasint incX);
|
||||
void cblas_ztrsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, blasint N, double *A, blasint lda, double *X, blasint incX);
|
||||
|
||||
void cblas_strmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, blasint N, float *A, blasint lda, float *X, blasint incX);
|
||||
void cblas_dtrmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, blasint N, double *A, blasint lda, double *X, blasint incX);
|
||||
void cblas_ctrmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, blasint N, float *A, blasint lda, float *X, blasint incX);
|
||||
void cblas_ztrmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, blasint N, double *A, blasint lda, double *X, blasint incX);
|
||||
|
||||
void cblas_ssyr(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, float alpha, float *X, blasint incX, float *A, blasint lda);
|
||||
void cblas_dsyr(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, double alpha, double *X, blasint incX, double *A, blasint lda);
|
||||
void cblas_cher(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, float alpha, float *X, blasint incX, float *A, blasint lda);
|
||||
void cblas_zher(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, double alpha, double *X, blasint incX, double *A, blasint lda);
|
||||
|
||||
void cblas_ssyr2(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo,blasint N, float alpha, float *X,
|
||||
blasint incX, float *Y, blasint incY, float *A, blasint lda);
|
||||
void cblas_dsyr2(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, double alpha, double *X,
|
||||
blasint incX, double *Y, blasint incY, double *A, blasint lda);
|
||||
void cblas_cher2(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, float *alpha, float *X, blasint incX,
|
||||
float *Y, blasint incY, float *A, blasint lda);
|
||||
void cblas_zher2(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, double *alpha, double *X, blasint incX,
|
||||
double *Y, blasint incY, double *A, blasint lda);
|
||||
|
||||
void cblas_sgbmv(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE TransA, blasint M, blasint N,
|
||||
blasint KL, blasint KU, float alpha, float *A, blasint lda, float *X, blasint incX, float beta, float *Y, blasint incY);
|
||||
void cblas_dgbmv(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE TransA, blasint M, blasint N,
|
||||
blasint KL, blasint KU, double alpha, double *A, blasint lda, double *X, blasint incX, double beta, double *Y, blasint incY);
|
||||
void cblas_cgbmv(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE TransA, blasint M, blasint N,
|
||||
blasint KL, blasint KU, float *alpha, float *A, blasint lda, float *X, blasint incX, float *beta, float *Y, blasint incY);
|
||||
void cblas_zgbmv(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE TransA, blasint M, blasint N,
|
||||
blasint KL, blasint KU, double *alpha, double *A, blasint lda, double *X, blasint incX, double *beta, double *Y, blasint incY);
|
||||
|
||||
void cblas_ssbmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, blasint K, float alpha, float *A,
|
||||
blasint lda, float *X, blasint incX, float beta, float *Y, blasint incY);
|
||||
void cblas_dsbmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, blasint K, double alpha, double *A,
|
||||
blasint lda, double *X, blasint incX, double beta, double *Y, blasint incY);
|
||||
|
||||
|
||||
void cblas_stbmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
|
||||
blasint N, blasint K, float *A, blasint lda, float *X, blasint incX);
|
||||
void cblas_dtbmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
|
||||
blasint N, blasint K, double *A, blasint lda, double *X, blasint incX);
|
||||
void cblas_ctbmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
|
||||
blasint N, blasint K, float *A, blasint lda, float *X, blasint incX);
|
||||
void cblas_ztbmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
|
||||
blasint N, blasint K, double *A, blasint lda, double *X, blasint incX);
|
||||
|
||||
void cblas_stbsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
|
||||
blasint N, blasint K, float *A, blasint lda, float *X, blasint incX);
|
||||
void cblas_dtbsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
|
||||
blasint N, blasint K, double *A, blasint lda, double *X, blasint incX);
|
||||
void cblas_ctbsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
|
||||
blasint N, blasint K, float *A, blasint lda, float *X, blasint incX);
|
||||
void cblas_ztbsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
|
||||
blasint N, blasint K, double *A, blasint lda, double *X, blasint incX);
|
||||
|
||||
void cblas_stpmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
|
||||
blasint N, float *Ap, float *X, blasint incX);
|
||||
void cblas_dtpmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
|
||||
blasint N, double *Ap, double *X, blasint incX);
|
||||
void cblas_ctpmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
|
||||
blasint N, float *Ap, float *X, blasint incX);
|
||||
void cblas_ztpmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
|
||||
blasint N, double *Ap, double *X, blasint incX);
|
||||
|
||||
void cblas_stpsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
|
||||
blasint N, float *Ap, float *X, blasint incX);
|
||||
void cblas_dtpsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
|
||||
blasint N, double *Ap, double *X, blasint incX);
|
||||
void cblas_ctpsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
|
||||
blasint N, float *Ap, float *X, blasint incX);
|
||||
void cblas_ztpsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
|
||||
blasint N, double *Ap, double *X, blasint incX);
|
||||
|
||||
void cblas_ssymv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, float alpha, float *A,
|
||||
blasint lda, float *X, blasint incX, float beta, float *Y, blasint incY);
|
||||
void cblas_dsymv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, double alpha, double *A,
|
||||
blasint lda, double *X, blasint incX, double beta, double *Y, blasint incY);
|
||||
void cblas_chemv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, float *alpha, float *A,
|
||||
blasint lda, float *X, blasint incX, float *beta, float *Y, blasint incY);
|
||||
void cblas_zhemv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, double *alpha, double *A,
|
||||
blasint lda, double *X, blasint incX, double *beta, double *Y, blasint incY);
|
||||
|
||||
|
||||
void cblas_sspmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, float alpha, float *Ap,
|
||||
float *X, blasint incX, float beta, float *Y, blasint incY);
|
||||
void cblas_dspmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, double alpha, double *Ap,
|
||||
double *X, blasint incX, double beta, double *Y, blasint incY);
|
||||
|
||||
void cblas_sspr(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, float alpha, float *X, blasint incX, float *Ap);
|
||||
void cblas_dspr(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, double alpha, double *X, blasint incX, double *Ap);
|
||||
|
||||
void cblas_chpr(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, float alpha, float *X, blasint incX, float *A);
|
||||
void cblas_zhpr(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, double alpha, double *X,blasint incX, double *A);
|
||||
|
||||
void cblas_sspr2(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, float alpha, float *X, blasint incX, float *Y, blasint incY, float *A);
|
||||
void cblas_dspr2(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, double alpha, double *X, blasint incX, double *Y, blasint incY, double *A);
|
||||
void cblas_chpr2(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, float *alpha, float *X, blasint incX, float *Y, blasint incY, float *Ap);
|
||||
void cblas_zhpr2(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, double *alpha, double *X, blasint incX, double *Y, blasint incY, double *Ap);
|
||||
|
||||
void cblas_chbmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, blasint K,
|
||||
float *alpha, float *A, blasint lda, float *X, blasint incX, float *beta, float *Y, blasint incY);
|
||||
void cblas_zhbmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, blasint K,
|
||||
double *alpha, double *A, blasint lda, double *X, blasint incX, double *beta, double *Y, blasint incY);
|
||||
|
||||
void cblas_chpmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N,
|
||||
float *alpha, float *Ap, float *X, blasint incX, float *beta, float *Y, blasint incY);
|
||||
void cblas_zhpmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N,
|
||||
double *alpha, double *Ap, double *X, blasint incX, double *beta, double *Y, blasint incY);
|
||||
|
||||
void cblas_sgemm(enum CBLAS_ORDER Order, enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANSPOSE TransB, blasint M, blasint N, blasint K,
|
||||
float alpha, float *A, blasint lda, float *B, blasint ldb, float beta, float *C, blasint ldc);
|
||||
void cblas_dgemm(enum CBLAS_ORDER Order, enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANSPOSE TransB, blasint M, blasint N, blasint K,
|
||||
double alpha, double *A, blasint lda, double *B, blasint ldb, double beta, double *C, blasint ldc);
|
||||
void cblas_cgemm(enum CBLAS_ORDER Order, enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANSPOSE TransB, blasint M, blasint N, blasint K,
|
||||
float *alpha, float *A, blasint lda, float *B, blasint ldb, float *beta, float *C, blasint ldc);
|
||||
void cblas_cgemm3m(enum CBLAS_ORDER Order, enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANSPOSE TransB, blasint M, blasint N, blasint K,
|
||||
float *alpha, float *A, blasint lda, float *B, blasint ldb, float *beta, float *C, blasint ldc);
|
||||
void cblas_zgemm(enum CBLAS_ORDER Order, enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANSPOSE TransB, blasint M, blasint N, blasint K,
|
||||
double *alpha, double *A, blasint lda, double *B, blasint ldb, double *beta, double *C, blasint ldc);
|
||||
void cblas_zgemm3m(enum CBLAS_ORDER Order, enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANSPOSE TransB, blasint M, blasint N, blasint K,
|
||||
double *alpha, double *A, blasint lda, double *B, blasint ldb, double *beta, double *C, blasint ldc);
|
||||
|
||||
void cblas_ssymm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, blasint M, blasint N,
|
||||
float alpha, float *A, blasint lda, float *B, blasint ldb, float beta, float *C, blasint ldc);
|
||||
void cblas_dsymm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, blasint M, blasint N,
|
||||
double alpha, double *A, blasint lda, double *B, blasint ldb, double beta, double *C, blasint ldc);
|
||||
void cblas_csymm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, blasint M, blasint N,
|
||||
float *alpha, float *A, blasint lda, float *B, blasint ldb, float *beta, float *C, blasint ldc);
|
||||
void cblas_zsymm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, blasint M, blasint N,
|
||||
double *alpha, double *A, blasint lda, double *B, blasint ldb, double *beta, double *C, blasint ldc);
|
||||
|
||||
void cblas_ssyrk(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans,
|
||||
blasint N, blasint K, float alpha, float *A, blasint lda, float beta, float *C, blasint ldc);
|
||||
void cblas_dsyrk(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans,
|
||||
blasint N, blasint K, double alpha, double *A, blasint lda, double beta, double *C, blasint ldc);
|
||||
void cblas_csyrk(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans,
|
||||
blasint N, blasint K, float *alpha, float *A, blasint lda, float *beta, float *C, blasint ldc);
|
||||
void cblas_zsyrk(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans,
|
||||
blasint N, blasint K, double *alpha, double *A, blasint lda, double *beta, double *C, blasint ldc);
|
||||
|
||||
void cblas_ssyr2k(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans,
|
||||
blasint N, blasint K, float alpha, float *A, blasint lda, float *B, blasint ldb, float beta, float *C, blasint ldc);
|
||||
void cblas_dsyr2k(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans,
|
||||
blasint N, blasint K, double alpha, double *A, blasint lda, double *B, blasint ldb, double beta, double *C, blasint ldc);
|
||||
void cblas_csyr2k(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans,
|
||||
blasint N, blasint K, float *alpha, float *A, blasint lda, float *B, blasint ldb, float *beta, float *C, blasint ldc);
|
||||
void cblas_zsyr2k(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans,
|
||||
blasint N, blasint K, double *alpha, double *A, blasint lda, double *B, blasint ldb, double *beta, double *C, blasint ldc);
|
||||
|
||||
void cblas_strmm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA,
|
||||
enum CBLAS_DIAG Diag, blasint M, blasint N, float alpha, float *A, blasint lda, float *B, blasint ldb);
|
||||
void cblas_dtrmm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA,
|
||||
enum CBLAS_DIAG Diag, blasint M, blasint N, double alpha, double *A, blasint lda, double *B, blasint ldb);
|
||||
void cblas_ctrmm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA,
|
||||
enum CBLAS_DIAG Diag, blasint M, blasint N, float *alpha, float *A, blasint lda, float *B, blasint ldb);
|
||||
void cblas_ztrmm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA,
|
||||
enum CBLAS_DIAG Diag, blasint M, blasint N, double *alpha, double *A, blasint lda, double *B, blasint ldb);
|
||||
|
||||
void cblas_strsm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA,
|
||||
enum CBLAS_DIAG Diag, blasint M, blasint N, float alpha, float *A, blasint lda, float *B, blasint ldb);
|
||||
void cblas_dtrsm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA,
|
||||
enum CBLAS_DIAG Diag, blasint M, blasint N, double alpha, double *A, blasint lda, double *B, blasint ldb);
|
||||
void cblas_ctrsm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA,
|
||||
enum CBLAS_DIAG Diag, blasint M, blasint N, float *alpha, float *A, blasint lda, float *B, blasint ldb);
|
||||
void cblas_ztrsm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA,
|
||||
enum CBLAS_DIAG Diag, blasint M, blasint N, double *alpha, double *A, blasint lda, double *B, blasint ldb);
|
||||
|
||||
void cblas_chemm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, blasint M, blasint N,
|
||||
float *alpha, float *A, blasint lda, float *B, blasint ldb, float *beta, float *C, blasint ldc);
|
||||
void cblas_zhemm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, blasint M, blasint N,
|
||||
double *alpha, double *A, blasint lda, double *B, blasint ldb, double *beta, double *C, blasint ldc);
|
||||
|
||||
void cblas_cherk(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans, blasint N, blasint K,
|
||||
float alpha, float *A, blasint lda, float beta, float *C, blasint ldc);
|
||||
void cblas_zherk(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans, blasint N, blasint K,
|
||||
double alpha, double *A, blasint lda, double beta, double *C, blasint ldc);
|
||||
|
||||
void cblas_cher2k(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans, blasint N, blasint K,
|
||||
float *alpha, float *A, blasint lda, float *B, blasint ldb, float beta, float *C, blasint ldc);
|
||||
void cblas_zher2k(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans, blasint N, blasint K,
|
||||
double *alpha, double *A, blasint lda, double *B, blasint ldb, double beta, double *C, blasint ldc);
|
||||
|
||||
void cblas_xerbla(blasint p, char *rout, char *form, ...);
|
||||
|
||||
/*** BLAS extensions ***/
|
||||
|
||||
void cblas_saxpby(blasint n, float alpha, float *x, blasint incx,float beta, float *y, blasint incy);
|
||||
|
||||
void cblas_daxpby(blasint n, double alpha, double *x, blasint incx,double beta, double *y, blasint incy);
|
||||
|
||||
void cblas_caxpby(blasint n, float *alpha, float *x, blasint incx,float *beta, float *y, blasint incy);
|
||||
|
||||
void cblas_zaxpby(blasint n, double *alpha, double *x, blasint incx,double *beta, double *y, blasint incy);
|
||||
|
||||
void cblas_somatcopy( enum CBLAS_ORDER CORDER, enum CBLAS_TRANSPOSE CTRANS, blasint crows, blasint ccols, float calpha, float *a,
|
||||
blasint clda, float *b, blasint cldb);
|
||||
void cblas_domatcopy( enum CBLAS_ORDER CORDER, enum CBLAS_TRANSPOSE CTRANS, blasint crows, blasint ccols, double calpha, double *a,
|
||||
blasint clda, double *b, blasint cldb);
|
||||
void cblas_comatcopy( enum CBLAS_ORDER CORDER, enum CBLAS_TRANSPOSE CTRANS, blasint crows, blasint ccols, void* calpha, void* a,
|
||||
blasint clda, void *b, blasint cldb);
|
||||
void cblas_zomatcopy( enum CBLAS_ORDER CORDER, enum CBLAS_TRANSPOSE CTRANS, blasint crows, blasint ccols, void* calpha, void* a,
|
||||
blasint clda, void *b, blasint cldb);
|
||||
|
||||
void cblas_simatcopy( enum CBLAS_ORDER CORDER, enum CBLAS_TRANSPOSE CTRANS, blasint crows, blasint ccols, float calpha, float *a,
|
||||
blasint clda, blasint cldb);
|
||||
void cblas_dimatcopy( enum CBLAS_ORDER CORDER, enum CBLAS_TRANSPOSE CTRANS, blasint crows, blasint ccols, double calpha, double *a,
|
||||
blasint clda, blasint cldb);
|
||||
void cblas_cimatcopy( enum CBLAS_ORDER CORDER, enum CBLAS_TRANSPOSE CTRANS, blasint crows, blasint ccols, float* calpha, float* a,
|
||||
blasint clda, blasint cldb);
|
||||
void cblas_zimatcopy( enum CBLAS_ORDER CORDER, enum CBLAS_TRANSPOSE CTRANS, blasint crows, blasint ccols, double* calpha, double* a,
|
||||
blasint clda, blasint cldb);
|
||||
|
||||
void cblas_sgeadd( enum CBLAS_ORDER CORDER, blasint crows, blasint ccols, float calpha, float *a, blasint clda, float cbeta,
|
||||
float *c, blasint cldc);
|
||||
void cblas_dgeadd( enum CBLAS_ORDER CORDER, blasint crows, blasint ccols, double calpha, double *a, blasint clda, double cbeta,
|
||||
double *c, blasint cldc);
|
||||
void cblas_cgeadd( enum CBLAS_ORDER CORDER, blasint crows, blasint ccols, float *calpha, float *a, blasint clda, float *cbeta,
|
||||
float *c, blasint cldc);
|
||||
void cblas_zgeadd( enum CBLAS_ORDER CORDER, blasint crows, blasint ccols, double *calpha, double *a, blasint clda, double *cbeta,
|
||||
double *c, blasint cldc);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif /* __cplusplus */
|
||||
|
||||
#endif
|
||||
116
common.h
116
common.h
@@ -93,6 +93,10 @@ extern "C" {
|
||||
#include <sched.h>
|
||||
#endif
|
||||
|
||||
#ifdef OS_ANDROID
|
||||
#define NO_SYSV_IPC
|
||||
#endif
|
||||
|
||||
#ifdef OS_WINDOWS
|
||||
#ifdef ATOM
|
||||
#define GOTO_ATOM ATOM
|
||||
@@ -106,7 +110,9 @@ extern "C" {
|
||||
#endif
|
||||
#else
|
||||
#include <sys/mman.h>
|
||||
#ifndef NO_SYSV_IPC
|
||||
#include <sys/shm.h>
|
||||
#endif
|
||||
#include <sys/time.h>
|
||||
#include <unistd.h>
|
||||
#include <math.h>
|
||||
@@ -270,6 +276,11 @@ typedef int blasint;
|
||||
#define SIZE 8
|
||||
#define BASE_SHIFT 3
|
||||
#define ZBASE_SHIFT 4
|
||||
#elif defined(INTEGER) //extend for integer matrix
|
||||
#define FLOAT int
|
||||
#define SIZE 4
|
||||
#define BASE_SHIFT 2
|
||||
#define ZBASE_SHIFT 3
|
||||
#else
|
||||
#define FLOAT float
|
||||
#define SIZE 4
|
||||
@@ -310,10 +321,52 @@ typedef int blasint;
|
||||
#define YIELDING SwitchToThread()
|
||||
#endif
|
||||
|
||||
#if defined(ARMV7) || defined(ARMV6) || defined(ARMV8) || defined(ARMV5)
|
||||
#define YIELDING asm volatile ("nop;nop;nop;nop;nop;nop;nop;nop; \n");
|
||||
#endif
|
||||
|
||||
#ifdef BULLDOZER
|
||||
#ifndef YIELDING
|
||||
#define YIELDING __asm__ __volatile__ ("nop;nop;nop;nop;nop;nop;nop;nop;\n");
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef PILEDRIVER
|
||||
#ifndef YIELDING
|
||||
#define YIELDING __asm__ __volatile__ ("nop;nop;nop;nop;nop;nop;nop;nop;\n");
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/*
|
||||
#ifdef STEAMROLLER
|
||||
#ifndef YIELDING
|
||||
#define YIELDING __asm__ __volatile__ ("nop;nop;nop;nop;nop;nop;nop;nop;\n");
|
||||
#endif
|
||||
#endif
|
||||
*/
|
||||
|
||||
#ifndef YIELDING
|
||||
#define YIELDING sched_yield()
|
||||
#endif
|
||||
|
||||
/***
|
||||
To alloc job_t on heap or statck.
|
||||
please https://github.com/xianyi/OpenBLAS/issues/246
|
||||
***/
|
||||
#if defined(OS_WINDOWS)
|
||||
#define GETRF_MEM_ALLOC_THRESHOLD 32
|
||||
#define BLAS3_MEM_ALLOC_THRESHOLD 32
|
||||
#endif
|
||||
|
||||
#ifndef GETRF_MEM_ALLOC_THRESHOLD
|
||||
#define GETRF_MEM_ALLOC_THRESHOLD 80
|
||||
#endif
|
||||
|
||||
#ifndef BLAS3_MEM_ALLOC_THRESHOLD
|
||||
#define BLAS3_MEM_ALLOC_THRESHOLD 160
|
||||
#endif
|
||||
|
||||
#ifdef QUAD_PRECISION
|
||||
#include "common_quad.h"
|
||||
#endif
|
||||
@@ -346,12 +399,35 @@ typedef int blasint;
|
||||
#include "common_mips64.h"
|
||||
#endif
|
||||
|
||||
#ifdef ARCH_ARM
|
||||
#include "common_arm.h"
|
||||
#endif
|
||||
|
||||
#ifdef ARCH_ARM64
|
||||
#include "common_arm64.h"
|
||||
#endif
|
||||
|
||||
#ifndef ASSEMBLER
|
||||
#ifdef OS_WINDOWS
|
||||
typedef char env_var_t[MAX_PATH];
|
||||
#define readenv(p, n) GetEnvironmentVariable((n), (p), sizeof(p))
|
||||
#else
|
||||
typedef char* env_var_t;
|
||||
#define readenv(p, n) ((p)=getenv(n))
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef OS_LINUX
|
||||
#include "common_linux.h"
|
||||
#endif
|
||||
|
||||
#define MMAP_ACCESS (PROT_READ | PROT_WRITE)
|
||||
|
||||
#ifdef __NetBSD__
|
||||
#define MMAP_POLICY (MAP_PRIVATE | MAP_ANON)
|
||||
#else
|
||||
#define MMAP_POLICY (MAP_PRIVATE | MAP_ANONYMOUS)
|
||||
#endif
|
||||
|
||||
#include "param.h"
|
||||
#include "common_param.h"
|
||||
@@ -385,14 +461,17 @@ typedef int blasint;
|
||||
/* C99 supports complex floating numbers natively, which GCC also offers as an
|
||||
extension since version 3.0. If neither are available, use a compatible
|
||||
structure as fallback (see Clause 6.2.5.13 of the C99 standard). */
|
||||
#if defined(__STDC_IEC_559_COMPLEX__) || __STDC_VERSION__ >= 199901L || __GNUC__ >= 3
|
||||
#if (defined(__STDC_IEC_559_COMPLEX__) || __STDC_VERSION__ >= 199901L || \
|
||||
(__GNUC__ >= 3 && !defined(__cplusplus)))
|
||||
#define OPENBLAS_COMPLEX_C99
|
||||
typedef float _Complex openblas_complex_float;
|
||||
typedef double _Complex openblas_complex_double;
|
||||
typedef xdouble _Complex openblas_complex_xdouble;
|
||||
#else
|
||||
#define OPENBLAS_COMPLEX_STRUCT
|
||||
typedef struct { float real, imag; } openblas_complex_float;
|
||||
typedef struct { double real, imag; } openblas_complex_double;
|
||||
typedef struct { xdouble real, imag; } openblas_complex_xdouble;
|
||||
#endif
|
||||
#endif // ASSEMBLER
|
||||
|
||||
@@ -431,6 +510,8 @@ void blas_set_parameter(void);
|
||||
int blas_get_cpu_number(void);
|
||||
void *blas_memory_alloc (int);
|
||||
void blas_memory_free (void *);
|
||||
void *blas_memory_alloc_nolock (int); //use malloc without blas_lock
|
||||
void blas_memory_free_nolock (void *);
|
||||
|
||||
int get_num_procs (void);
|
||||
|
||||
@@ -464,14 +545,21 @@ static __inline void blas_unlock(volatile BLASULONG *address){
|
||||
*address = 0;
|
||||
}
|
||||
|
||||
static __inline int readenv(char *env) {
|
||||
|
||||
char *p;
|
||||
|
||||
p = getenv(env);
|
||||
|
||||
if (p == NULL) return 0; else return atoi(p);
|
||||
#ifdef OS_WINDOWS
|
||||
static __inline int readenv_atoi(char *env) {
|
||||
env_var_t p;
|
||||
return readenv(p,env) ? 0 : atoi(p);
|
||||
}
|
||||
#else
|
||||
static __inline int readenv_atoi(char *env) {
|
||||
char *p;
|
||||
if (( p = getenv(env) ))
|
||||
return (atoi(p));
|
||||
else
|
||||
return(0);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
#if !defined(XDOUBLE) || !defined(QUAD_PRECISION)
|
||||
@@ -480,7 +568,7 @@ static __inline void compinv(FLOAT *b, FLOAT ar, FLOAT ai){
|
||||
|
||||
#ifndef UNIT
|
||||
FLOAT ratio, den;
|
||||
|
||||
|
||||
if (
|
||||
#ifdef XDOUBLE
|
||||
(fabsl(ar)) >= (fabsl(ai))
|
||||
@@ -506,7 +594,7 @@ static __inline void compinv(FLOAT *b, FLOAT ar, FLOAT ai){
|
||||
b[0] = ONE;
|
||||
b[1] = ZERO;
|
||||
#endif
|
||||
|
||||
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -549,8 +637,10 @@ typedef struct {
|
||||
#include "common_level2.h"
|
||||
#include "common_level3.h"
|
||||
#include "common_lapack.h"
|
||||
|
||||
#ifdef CBLAS
|
||||
#include "cblas.h"
|
||||
# define OPENBLAS_CONST /* see comment in cblas.h */
|
||||
# include "cblas.h"
|
||||
#endif
|
||||
|
||||
#ifndef ASSEMBLER
|
||||
@@ -634,13 +724,13 @@ extern int gotoblas_profile;
|
||||
#define PRINT_DEBUG_CNAME
|
||||
#define PRINT_DEBUG_NAME
|
||||
#else
|
||||
#define PRINT_DEBUG_CNAME if (readenv("GOTO_DEBUG")) fprintf(stderr, "GotoBLAS : %s\n", CHAR_CNAME)
|
||||
#define PRINT_DEBUG_NAME if (readenv("GOTO_DEBUG")) fprintf(stderr, "GotoBLAS : %s\n", CHAR_NAME)
|
||||
#define PRINT_DEBUG_CNAME if (readenv_atoi("GOTO_DEBUG")) fprintf(stderr, "GotoBLAS : %s\n", CHAR_CNAME)
|
||||
#define PRINT_DEBUG_NAME if (readenv_atoi("GOTO_DEBUG")) fprintf(stderr, "GotoBLAS : %s\n", CHAR_NAME)
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
||||
|
||||
#endif /* __cplusplus */
|
||||
|
||||
#endif
|
||||
|
||||
@@ -150,9 +150,17 @@ REALNAME:
|
||||
#define PROFCODE .prologue 0
|
||||
#endif
|
||||
|
||||
#if defined(__linux__) && defined(__ELF__)
|
||||
#define GNUSTACK .section .note.GNU-stack,"",@progbits
|
||||
#else
|
||||
#define GNUSTACK
|
||||
#endif
|
||||
|
||||
#define EPILOGUE \
|
||||
.end REALNAME; \
|
||||
.ident VERSION
|
||||
.ident VERSION; \
|
||||
GNUSTACK
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef DOUBLE
|
||||
|
||||
143
common_arm.h
Normal file
143
common_arm.h
Normal file
@@ -0,0 +1,143 @@
|
||||
/*****************************************************************************
|
||||
Copyright (c) 2011-2015, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written
|
||||
permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
**********************************************************************************/
|
||||
|
||||
#ifndef COMMON_ARM
|
||||
#define COMMON_ARM
|
||||
|
||||
#if defined(ARMV5) || defined(ARMV6)
|
||||
|
||||
#define MB
|
||||
#define WMB
|
||||
|
||||
#else
|
||||
|
||||
#define MB __asm__ __volatile__ ("dmb ish" : : : "memory")
|
||||
#define WMB __asm__ __volatile__ ("dmb ishst" : : : "memory")
|
||||
|
||||
#endif
|
||||
|
||||
#define INLINE inline
|
||||
|
||||
#define RETURN_BY_COMPLEX
|
||||
|
||||
#ifndef ASSEMBLER
|
||||
|
||||
static void __inline blas_lock(volatile BLASULONG *address){
|
||||
|
||||
int register ret;
|
||||
|
||||
do {
|
||||
while (*address) {YIELDING;};
|
||||
|
||||
__asm__ __volatile__(
|
||||
"1: \n\t"
|
||||
"ldrex r2, [%1] \n\t"
|
||||
"mov r2, #0 \n\t"
|
||||
"strex r3, r2, [%1] \n\t"
|
||||
"cmp r3, #0 \n\t"
|
||||
"bne 1b \n\t"
|
||||
"mov %0 , r3 \n\t"
|
||||
: "=r"(ret), "=r"(address)
|
||||
: "1"(address)
|
||||
: "memory", "r2" , "r3"
|
||||
|
||||
|
||||
);
|
||||
|
||||
} while (ret);
|
||||
|
||||
}
|
||||
|
||||
|
||||
static inline unsigned long long rpcc(void){
|
||||
unsigned long long ret=0;
|
||||
double v;
|
||||
struct timeval tv;
|
||||
gettimeofday(&tv,NULL);
|
||||
v=(double) tv.tv_sec + (double) tv.tv_usec * 1e-6;
|
||||
ret = (unsigned long long) ( v * 1000.0d );
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline int blas_quickdivide(blasint x, blasint y){
|
||||
return x / y;
|
||||
}
|
||||
|
||||
#if defined(DOUBLE)
|
||||
#define GET_IMAGE(res) __asm__ __volatile__("vstr.f64 d1, %0" : "=m"(res) : : "memory")
|
||||
#else
|
||||
#define GET_IMAGE(res) __asm__ __volatile__("vstr.f32 s1, %0" : "=m"(res) : : "memory")
|
||||
#endif
|
||||
|
||||
#define GET_IMAGE_CANCEL
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
#ifndef F_INTERFACE
|
||||
#define REALNAME ASMNAME
|
||||
#else
|
||||
#define REALNAME ASMFNAME
|
||||
#endif
|
||||
|
||||
#if defined(ASSEMBLER) && !defined(NEEDPARAM)
|
||||
|
||||
#define PROLOGUE \
|
||||
.arm ;\
|
||||
.global REALNAME ;\
|
||||
.func REALNAME ;\
|
||||
REALNAME:
|
||||
|
||||
#define EPILOGUE
|
||||
|
||||
#define PROFCODE
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
#define SEEK_ADDRESS
|
||||
|
||||
#ifndef PAGESIZE
|
||||
#define PAGESIZE ( 4 << 10)
|
||||
#endif
|
||||
#define HUGE_PAGESIZE ( 4 << 20)
|
||||
|
||||
#define BUFFER_SIZE (16 << 20)
|
||||
|
||||
|
||||
#define BASE_ADDRESS (START_ADDRESS - BUFFER_SIZE * MAX_CPU_NUMBER)
|
||||
|
||||
#ifndef MAP_ANONYMOUS
|
||||
#define MAP_ANONYMOUS MAP_ANON
|
||||
#endif
|
||||
|
||||
#endif
|
||||
136
common_arm64.h
Normal file
136
common_arm64.h
Normal file
@@ -0,0 +1,136 @@
|
||||
/*****************************************************************************
|
||||
Copyright (c) 2011-2015, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written
|
||||
permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
**********************************************************************************/
|
||||
|
||||
#ifndef COMMON_ARM64
|
||||
#define COMMON_ARM64
|
||||
|
||||
#define MB __asm__ __volatile__ ("dmb ish" : : : "memory")
|
||||
#define WMB __asm__ __volatile__ ("dmb ishst" : : : "memory")
|
||||
|
||||
|
||||
#define INLINE inline
|
||||
|
||||
#define RETURN_BY_COMPLEX
|
||||
|
||||
#ifndef ASSEMBLER
|
||||
|
||||
static void __inline blas_lock(volatile BLASULONG *address){
|
||||
|
||||
int register ret;
|
||||
int register tmp;
|
||||
|
||||
do {
|
||||
while (*address) {YIELDING;};
|
||||
|
||||
__asm__ __volatile__(
|
||||
"1: \n\t"
|
||||
"ldaxr %2, [%1] \n\t"
|
||||
"mov %2, #0 \n\t"
|
||||
"stlxr %w0, %2, [%1] \n\t"
|
||||
"cbnz %w0, 1b \n\t"
|
||||
"mov %0 , #0 \n\t"
|
||||
: "=r"(ret), "=r"(address), "=r"(tmp)
|
||||
: "1"(address)
|
||||
: "memory", "%w0"
|
||||
//, "%r2" , "%r3"
|
||||
|
||||
|
||||
);
|
||||
|
||||
} while (ret);
|
||||
|
||||
}
|
||||
|
||||
|
||||
static inline unsigned long long rpcc(void){
|
||||
unsigned long long ret=0;
|
||||
double v;
|
||||
struct timeval tv;
|
||||
gettimeofday(&tv,NULL);
|
||||
v=(double) tv.tv_sec + (double) tv.tv_usec * 1e-6;
|
||||
ret = (unsigned long long) ( v * 1000.0d );
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline int blas_quickdivide(blasint x, blasint y){
|
||||
return x / y;
|
||||
}
|
||||
|
||||
#if defined(DOUBLE)
|
||||
#define GET_IMAGE(res) __asm__ __volatile__("str d1, %0" : "=m"(res) : : "memory")
|
||||
#else
|
||||
#define GET_IMAGE(res) __asm__ __volatile__("str s1, %0" : "=m"(res) : : "memory")
|
||||
#endif
|
||||
|
||||
#define GET_IMAGE_CANCEL
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
#ifndef F_INTERFACE
|
||||
#define REALNAME ASMNAME
|
||||
#else
|
||||
#define REALNAME ASMFNAME
|
||||
#endif
|
||||
|
||||
#if defined(ASSEMBLER) && !defined(NEEDPARAM)
|
||||
|
||||
#define PROLOGUE \
|
||||
.global REALNAME ;\
|
||||
.func REALNAME ;\
|
||||
REALNAME:
|
||||
|
||||
#define EPILOGUE
|
||||
|
||||
#define PROFCODE
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
#define SEEK_ADDRESS
|
||||
|
||||
#ifndef PAGESIZE
|
||||
#define PAGESIZE ( 4 << 10)
|
||||
#endif
|
||||
#define HUGE_PAGESIZE ( 4 << 20)
|
||||
|
||||
#define BUFFER_SIZE (16 << 20)
|
||||
|
||||
|
||||
#define BASE_ADDRESS (START_ADDRESS - BUFFER_SIZE * MAX_CPU_NUMBER)
|
||||
|
||||
#ifndef MAP_ANONYMOUS
|
||||
#define MAP_ANONYMOUS MAP_ANON
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
25
common_c.h
25
common_c.h
@@ -209,6 +209,19 @@
|
||||
#define CNEG_TCOPY cneg_tcopy
|
||||
#define CLASWP_NCOPY claswp_ncopy
|
||||
|
||||
#define CAXPBY_K caxpby_k
|
||||
|
||||
#define COMATCOPY_K_CN comatcopy_k_cn
|
||||
#define COMATCOPY_K_RN comatcopy_k_rn
|
||||
#define COMATCOPY_K_CT comatcopy_k_ct
|
||||
#define COMATCOPY_K_RT comatcopy_k_rt
|
||||
#define COMATCOPY_K_CNC comatcopy_k_cnc
|
||||
#define COMATCOPY_K_RNC comatcopy_k_rnc
|
||||
#define COMATCOPY_K_CTC comatcopy_k_ctc
|
||||
#define COMATCOPY_K_RTC comatcopy_k_rtc
|
||||
|
||||
#define CGEADD_K cgeadd_k
|
||||
|
||||
#else
|
||||
|
||||
#define CAMAX_K gotoblas -> camax_k
|
||||
@@ -380,6 +393,18 @@
|
||||
#define CNEG_TCOPY gotoblas -> cneg_tcopy
|
||||
#define CLASWP_NCOPY gotoblas -> claswp_ncopy
|
||||
|
||||
#define CAXPBY_K gotoblas -> caxpby_k
|
||||
|
||||
#define COMATCOPY_K_CN gotoblas -> comatcopy_k_cn
|
||||
#define COMATCOPY_K_RN gotoblas -> comatcopy_k_rn
|
||||
#define COMATCOPY_K_CT gotoblas -> comatcopy_k_ct
|
||||
#define COMATCOPY_K_RT gotoblas -> comatcopy_k_rt
|
||||
#define COMATCOPY_K_CNC gotoblas -> comatcopy_k_cnc
|
||||
#define COMATCOPY_K_RNC gotoblas -> comatcopy_k_rnc
|
||||
#define COMATCOPY_K_CTC gotoblas -> comatcopy_k_ctc
|
||||
#define COMATCOPY_K_RTC gotoblas -> comatcopy_k_rtc
|
||||
#define CGEADD_K gotoblas -> cgeadd_k
|
||||
|
||||
#endif
|
||||
|
||||
#define CGEMM_NN cgemm_nn
|
||||
|
||||
15
common_d.h
15
common_d.h
@@ -144,6 +144,13 @@
|
||||
#define DNEG_TCOPY dneg_tcopy
|
||||
#define DLASWP_NCOPY dlaswp_ncopy
|
||||
|
||||
#define DAXPBY_K daxpby_k
|
||||
#define DOMATCOPY_K_CN domatcopy_k_cn
|
||||
#define DOMATCOPY_K_RN domatcopy_k_rn
|
||||
#define DOMATCOPY_K_CT domatcopy_k_ct
|
||||
#define DOMATCOPY_K_RT domatcopy_k_rt
|
||||
#define DGEADD_K dgeadd_k
|
||||
|
||||
#else
|
||||
|
||||
#define DAMAX_K gotoblas -> damax_k
|
||||
@@ -255,6 +262,14 @@
|
||||
#define DNEG_TCOPY gotoblas -> dneg_tcopy
|
||||
#define DLASWP_NCOPY gotoblas -> dlaswp_ncopy
|
||||
|
||||
#define DAXPBY_K gotoblas -> daxpby_k
|
||||
#define DOMATCOPY_K_CN gotoblas -> domatcopy_k_cn
|
||||
#define DOMATCOPY_K_RN gotoblas -> domatcopy_k_rn
|
||||
#define DOMATCOPY_K_CT gotoblas -> domatcopy_k_ct
|
||||
#define DOMATCOPY_K_RT gotoblas -> domatcopy_k_rt
|
||||
|
||||
#define DGEADD_K gotoblas -> dgeadd_k
|
||||
|
||||
#endif
|
||||
|
||||
#define DGEMM_NN dgemm_nn
|
||||
|
||||
9
common_i.h
Normal file
9
common_i.h
Normal file
@@ -0,0 +1,9 @@
|
||||
#ifndef COMMON_I_H
|
||||
#define COMMON_I_H
|
||||
|
||||
#ifndef DYNAMIC_ARCH
|
||||
#define IAXPYU_K iaxpy_k
|
||||
#else
|
||||
#error
|
||||
#endif
|
||||
#endif
|
||||
@@ -58,10 +58,10 @@
|
||||
static __inline void blas_lock(volatile unsigned long *address){
|
||||
|
||||
unsigned long ret;
|
||||
|
||||
|
||||
do {
|
||||
while (*address) {YIELDING;};
|
||||
|
||||
|
||||
__asm__ __volatile__ ("mov ar.ccv=r0\n;;\n"
|
||||
"cmpxchg4.acq %0=[%2],%1,ar.ccv\n"
|
||||
: "=r"(ret) : "r"(1), "r"(address)
|
||||
@@ -379,8 +379,15 @@ REALNAME:
|
||||
#define PROFCODE
|
||||
#endif
|
||||
|
||||
#if defined(__linux__) && defined(__ELF__)
|
||||
#define GNUSTACK .section .note.GNU-stack,"",@progbits
|
||||
#else
|
||||
#define GNUSTACK
|
||||
#endif
|
||||
|
||||
#define EPILOGUE \
|
||||
.endp REALNAME
|
||||
.endp REALNAME ; \
|
||||
GNUSTACK
|
||||
|
||||
#define START_ADDRESS 0x20000fc800000000UL
|
||||
|
||||
|
||||
@@ -45,7 +45,7 @@ extern "C" {
|
||||
|
||||
int BLASFUNC(xerbla)(char *, blasint *info, blasint);
|
||||
|
||||
void BLASFUNC(openblas_set_num_threads)(int *);
|
||||
void openblas_set_num_threads_(int *);
|
||||
|
||||
FLOATRET BLASFUNC(sdot) (blasint *, float *, blasint *, float *, blasint *);
|
||||
FLOATRET BLASFUNC(sdsdot)(blasint *, float *, float *, blasint *, float *, blasint *);
|
||||
@@ -76,23 +76,24 @@ myxcomplex_t BLASFUNC(xdotu) (blasint *, xdouble *, blasint *, xdouble *,
|
||||
myxcomplex_t BLASFUNC(xdotc) (blasint *, xdouble *, blasint *, xdouble *, blasint *);
|
||||
|
||||
#elif defined RETURN_BY_STACK
|
||||
void BLASFUNC(cdotu) (float _Complex *, blasint *, float * , blasint *, float *, blasint *);
|
||||
void BLASFUNC(cdotc) (float _Complex *, blasint *, float *, blasint *, float *, blasint *);
|
||||
void BLASFUNC(zdotu) (double _Complex *, blasint *, double *, blasint *, double *, blasint *);
|
||||
void BLASFUNC(zdotc) (double _Complex *, blasint *, double *, blasint *, double *, blasint *);
|
||||
void BLASFUNC(xdotu) (xdouble _Complex *, blasint *, xdouble *, blasint *, xdouble *, blasint *);
|
||||
void BLASFUNC(xdotc) (xdouble _Complex *, blasint *, xdouble *, blasint *, xdouble *, blasint *);
|
||||
void BLASFUNC(cdotu) (openblas_complex_float *, blasint *, float * , blasint *, float *, blasint *);
|
||||
void BLASFUNC(cdotc) (openblas_complex_float *, blasint *, float *, blasint *, float *, blasint *);
|
||||
void BLASFUNC(zdotu) (openblas_complex_double *, blasint *, double *, blasint *, double *, blasint *);
|
||||
void BLASFUNC(zdotc) (openblas_complex_double *, blasint *, double *, blasint *, double *, blasint *);
|
||||
void BLASFUNC(xdotu) (openblas_complex_xdouble *, blasint *, xdouble *, blasint *, xdouble *, blasint *);
|
||||
void BLASFUNC(xdotc) (openblas_complex_xdouble *, blasint *, xdouble *, blasint *, xdouble *, blasint *);
|
||||
#else
|
||||
float _Complex BLASFUNC(cdotu) (blasint *, float *, blasint *, float *, blasint *);
|
||||
float _Complex BLASFUNC(cdotc) (blasint *, float *, blasint *, float *, blasint *);
|
||||
double _Complex BLASFUNC(zdotu) (blasint *, double *, blasint *, double *, blasint *);
|
||||
double _Complex BLASFUNC(zdotc) (blasint *, double *, blasint *, double *, blasint *);
|
||||
xdouble _Complex BLASFUNC(xdotu) (blasint *, xdouble *, blasint *, xdouble *, blasint *);
|
||||
xdouble _Complex BLASFUNC(xdotc) (blasint *, xdouble *, blasint *, xdouble *, blasint *);
|
||||
openblas_complex_float BLASFUNC(cdotu) (blasint *, float *, blasint *, float *, blasint *);
|
||||
openblas_complex_float BLASFUNC(cdotc) (blasint *, float *, blasint *, float *, blasint *);
|
||||
openblas_complex_double BLASFUNC(zdotu) (blasint *, double *, blasint *, double *, blasint *);
|
||||
openblas_complex_double BLASFUNC(zdotc) (blasint *, double *, blasint *, double *, blasint *);
|
||||
openblas_complex_xdouble BLASFUNC(xdotu) (blasint *, xdouble *, blasint *, xdouble *, blasint *);
|
||||
openblas_complex_xdouble BLASFUNC(xdotc) (blasint *, xdouble *, blasint *, xdouble *, blasint *);
|
||||
#endif
|
||||
|
||||
void BLASFUNC(saxpy) (blasint *, float *, float *, blasint *, float *, blasint *);
|
||||
void BLASFUNC(daxpy) (blasint *, double *, double *, blasint *, double *, blasint *);
|
||||
void BLASFUNC(iaxpy) (blasint *, int *, int *, blasint *, int *, blasint *);
|
||||
void BLASFUNC(qaxpy) (blasint *, xdouble *, xdouble *, blasint *, xdouble *, blasint *);
|
||||
void BLASFUNC(caxpy) (blasint *, float *, float *, blasint *, float *, blasint *);
|
||||
void BLASFUNC(zaxpy) (blasint *, double *, double *, blasint *, double *, blasint *);
|
||||
@@ -238,17 +239,17 @@ void BLASFUNC(xgeru)(blasint *, blasint *, xdouble *, xdouble *, blasint *,
|
||||
void BLASFUNC(xgerc)(blasint *, blasint *, xdouble *, xdouble *, blasint *,
|
||||
xdouble *, blasint *, xdouble *, blasint *);
|
||||
|
||||
void BLASFUNC(sgemv)(char *, blasint *, blasint *, float *, float *, blasint *,
|
||||
void BLASFUNC(sgemv)(char *, blasint *, blasint *, float *, float *, blasint *,
|
||||
float *, blasint *, float *, float *, blasint *);
|
||||
void BLASFUNC(dgemv)(char *, blasint *, blasint *, double *, double *, blasint *,
|
||||
void BLASFUNC(dgemv)(char *, blasint *, blasint *, double *, double *, blasint *,
|
||||
double *, blasint *, double *, double *, blasint *);
|
||||
void BLASFUNC(qgemv)(char *, blasint *, blasint *, xdouble *, xdouble *, blasint *,
|
||||
void BLASFUNC(qgemv)(char *, blasint *, blasint *, xdouble *, xdouble *, blasint *,
|
||||
xdouble *, blasint *, xdouble *, xdouble *, blasint *);
|
||||
void BLASFUNC(cgemv)(char *, blasint *, blasint *, float *, float *, blasint *,
|
||||
void BLASFUNC(cgemv)(char *, blasint *, blasint *, float *, float *, blasint *,
|
||||
float *, blasint *, float *, float *, blasint *);
|
||||
void BLASFUNC(zgemv)(char *, blasint *, blasint *, double *, double *, blasint *,
|
||||
void BLASFUNC(zgemv)(char *, blasint *, blasint *, double *, double *, blasint *,
|
||||
double *, blasint *, double *, double *, blasint *);
|
||||
void BLASFUNC(xgemv)(char *, blasint *, blasint *, xdouble *, xdouble *, blasint *,
|
||||
void BLASFUNC(xgemv)(char *, blasint *, blasint *, xdouble *, xdouble *, blasint *,
|
||||
xdouble *, blasint *, xdouble *, xdouble *, blasint *);
|
||||
|
||||
void BLASFUNC(strsv) (char *, char *, char *, blasint *, float *, blasint *,
|
||||
@@ -257,24 +258,24 @@ void BLASFUNC(dtrsv) (char *, char *, char *, blasint *, double *, blasint *,
|
||||
double *, blasint *);
|
||||
void BLASFUNC(qtrsv) (char *, char *, char *, blasint *, xdouble *, blasint *,
|
||||
xdouble *, blasint *);
|
||||
void BLASFUNC(ctrsv) (char *, char *, char *, blasint *, float *, blasint *,
|
||||
void BLASFUNC(ctrsv) (char *, char *, char *, blasint *, float *, blasint *,
|
||||
float *, blasint *);
|
||||
void BLASFUNC(ztrsv) (char *, char *, char *, blasint *, double *, blasint *,
|
||||
void BLASFUNC(ztrsv) (char *, char *, char *, blasint *, double *, blasint *,
|
||||
double *, blasint *);
|
||||
void BLASFUNC(xtrsv) (char *, char *, char *, blasint *, xdouble *, blasint *,
|
||||
void BLASFUNC(xtrsv) (char *, char *, char *, blasint *, xdouble *, blasint *,
|
||||
xdouble *, blasint *);
|
||||
|
||||
void BLASFUNC(strmv) (char *, char *, char *, blasint *, float *, blasint *,
|
||||
float *, blasint *);
|
||||
void BLASFUNC(dtrmv) (char *, char *, char *, blasint *, double *, blasint *,
|
||||
void BLASFUNC(dtrmv) (char *, char *, char *, blasint *, double *, blasint *,
|
||||
double *, blasint *);
|
||||
void BLASFUNC(qtrmv) (char *, char *, char *, blasint *, xdouble *, blasint *,
|
||||
void BLASFUNC(qtrmv) (char *, char *, char *, blasint *, xdouble *, blasint *,
|
||||
xdouble *, blasint *);
|
||||
void BLASFUNC(ctrmv) (char *, char *, char *, blasint *, float *, blasint *,
|
||||
void BLASFUNC(ctrmv) (char *, char *, char *, blasint *, float *, blasint *,
|
||||
float *, blasint *);
|
||||
void BLASFUNC(ztrmv) (char *, char *, char *, blasint *, double *, blasint *,
|
||||
void BLASFUNC(ztrmv) (char *, char *, char *, blasint *, double *, blasint *,
|
||||
double *, blasint *);
|
||||
void BLASFUNC(xtrmv) (char *, char *, char *, blasint *, xdouble *, blasint *,
|
||||
void BLASFUNC(xtrmv) (char *, char *, char *, blasint *, xdouble *, blasint *,
|
||||
xdouble *, blasint *);
|
||||
|
||||
void BLASFUNC(stpsv) (char *, char *, char *, blasint *, float *, float *, blasint *);
|
||||
@@ -305,24 +306,24 @@ void BLASFUNC(ctbsv) (char *, char *, char *, blasint *, blasint *, float *, bl
|
||||
void BLASFUNC(ztbsv) (char *, char *, char *, blasint *, blasint *, double *, blasint *, double *, blasint *);
|
||||
void BLASFUNC(xtbsv) (char *, char *, char *, blasint *, blasint *, xdouble *, blasint *, xdouble *, blasint *);
|
||||
|
||||
void BLASFUNC(ssymv) (char *, blasint *, float *, float *, blasint *,
|
||||
void BLASFUNC(ssymv) (char *, blasint *, float *, float *, blasint *,
|
||||
float *, blasint *, float *, float *, blasint *);
|
||||
void BLASFUNC(dsymv) (char *, blasint *, double *, double *, blasint *,
|
||||
void BLASFUNC(dsymv) (char *, blasint *, double *, double *, blasint *,
|
||||
double *, blasint *, double *, double *, blasint *);
|
||||
void BLASFUNC(qsymv) (char *, blasint *, xdouble *, xdouble *, blasint *,
|
||||
void BLASFUNC(qsymv) (char *, blasint *, xdouble *, xdouble *, blasint *,
|
||||
xdouble *, blasint *, xdouble *, xdouble *, blasint *);
|
||||
void BLASFUNC(csymv) (char *, blasint *, float *, float *, blasint *,
|
||||
void BLASFUNC(csymv) (char *, blasint *, float *, float *, blasint *,
|
||||
float *, blasint *, float *, float *, blasint *);
|
||||
void BLASFUNC(zsymv) (char *, blasint *, double *, double *, blasint *,
|
||||
void BLASFUNC(zsymv) (char *, blasint *, double *, double *, blasint *,
|
||||
double *, blasint *, double *, double *, blasint *);
|
||||
void BLASFUNC(xsymv) (char *, blasint *, xdouble *, xdouble *, blasint *,
|
||||
void BLASFUNC(xsymv) (char *, blasint *, xdouble *, xdouble *, blasint *,
|
||||
xdouble *, blasint *, xdouble *, xdouble *, blasint *);
|
||||
|
||||
void BLASFUNC(sspmv) (char *, blasint *, float *, float *,
|
||||
float *, blasint *, float *, float *, blasint *);
|
||||
void BLASFUNC(dspmv) (char *, blasint *, double *, double *,
|
||||
void BLASFUNC(dspmv) (char *, blasint *, double *, double *,
|
||||
double *, blasint *, double *, double *, blasint *);
|
||||
void BLASFUNC(qspmv) (char *, blasint *, xdouble *, xdouble *,
|
||||
void BLASFUNC(qspmv) (char *, blasint *, xdouble *, xdouble *,
|
||||
xdouble *, blasint *, xdouble *, xdouble *, blasint *);
|
||||
void BLASFUNC(cspmv) (char *, blasint *, float *, float *,
|
||||
float *, blasint *, float *, float *, blasint *);
|
||||
@@ -344,17 +345,17 @@ void BLASFUNC(zsyr) (char *, blasint *, double *, double *, blasint *,
|
||||
void BLASFUNC(xsyr) (char *, blasint *, xdouble *, xdouble *, blasint *,
|
||||
xdouble *, blasint *);
|
||||
|
||||
void BLASFUNC(ssyr2) (char *, blasint *, float *,
|
||||
void BLASFUNC(ssyr2) (char *, blasint *, float *,
|
||||
float *, blasint *, float *, blasint *, float *, blasint *);
|
||||
void BLASFUNC(dsyr2) (char *, blasint *, double *,
|
||||
void BLASFUNC(dsyr2) (char *, blasint *, double *,
|
||||
double *, blasint *, double *, blasint *, double *, blasint *);
|
||||
void BLASFUNC(qsyr2) (char *, blasint *, xdouble *,
|
||||
void BLASFUNC(qsyr2) (char *, blasint *, xdouble *,
|
||||
xdouble *, blasint *, xdouble *, blasint *, xdouble *, blasint *);
|
||||
void BLASFUNC(csyr2) (char *, blasint *, float *,
|
||||
void BLASFUNC(csyr2) (char *, blasint *, float *,
|
||||
float *, blasint *, float *, blasint *, float *, blasint *);
|
||||
void BLASFUNC(zsyr2) (char *, blasint *, double *,
|
||||
void BLASFUNC(zsyr2) (char *, blasint *, double *,
|
||||
double *, blasint *, double *, blasint *, double *, blasint *);
|
||||
void BLASFUNC(xsyr2) (char *, blasint *, xdouble *,
|
||||
void BLASFUNC(xsyr2) (char *, blasint *, xdouble *,
|
||||
xdouble *, blasint *, xdouble *, blasint *, xdouble *, blasint *);
|
||||
|
||||
void BLASFUNC(sspr) (char *, blasint *, float *, float *, blasint *,
|
||||
@@ -370,17 +371,17 @@ void BLASFUNC(zspr) (char *, blasint *, double *, double *, blasint *,
|
||||
void BLASFUNC(xspr) (char *, blasint *, xdouble *, xdouble *, blasint *,
|
||||
xdouble *);
|
||||
|
||||
void BLASFUNC(sspr2) (char *, blasint *, float *,
|
||||
void BLASFUNC(sspr2) (char *, blasint *, float *,
|
||||
float *, blasint *, float *, blasint *, float *);
|
||||
void BLASFUNC(dspr2) (char *, blasint *, double *,
|
||||
void BLASFUNC(dspr2) (char *, blasint *, double *,
|
||||
double *, blasint *, double *, blasint *, double *);
|
||||
void BLASFUNC(qspr2) (char *, blasint *, xdouble *,
|
||||
void BLASFUNC(qspr2) (char *, blasint *, xdouble *,
|
||||
xdouble *, blasint *, xdouble *, blasint *, xdouble *);
|
||||
void BLASFUNC(cspr2) (char *, blasint *, float *,
|
||||
void BLASFUNC(cspr2) (char *, blasint *, float *,
|
||||
float *, blasint *, float *, blasint *, float *);
|
||||
void BLASFUNC(zspr2) (char *, blasint *, double *,
|
||||
void BLASFUNC(zspr2) (char *, blasint *, double *,
|
||||
double *, blasint *, double *, blasint *, double *);
|
||||
void BLASFUNC(xspr2) (char *, blasint *, xdouble *,
|
||||
void BLASFUNC(xspr2) (char *, blasint *, xdouble *,
|
||||
xdouble *, blasint *, xdouble *, blasint *, xdouble *);
|
||||
|
||||
void BLASFUNC(cher) (char *, blasint *, float *, float *, blasint *,
|
||||
@@ -394,25 +395,25 @@ void BLASFUNC(chpr) (char *, blasint *, float *, float *, blasint *, float *
|
||||
void BLASFUNC(zhpr) (char *, blasint *, double *, double *, blasint *, double *);
|
||||
void BLASFUNC(xhpr) (char *, blasint *, xdouble *, xdouble *, blasint *, xdouble *);
|
||||
|
||||
void BLASFUNC(cher2) (char *, blasint *, float *,
|
||||
void BLASFUNC(cher2) (char *, blasint *, float *,
|
||||
float *, blasint *, float *, blasint *, float *, blasint *);
|
||||
void BLASFUNC(zher2) (char *, blasint *, double *,
|
||||
void BLASFUNC(zher2) (char *, blasint *, double *,
|
||||
double *, blasint *, double *, blasint *, double *, blasint *);
|
||||
void BLASFUNC(xher2) (char *, blasint *, xdouble *,
|
||||
void BLASFUNC(xher2) (char *, blasint *, xdouble *,
|
||||
xdouble *, blasint *, xdouble *, blasint *, xdouble *, blasint *);
|
||||
|
||||
void BLASFUNC(chpr2) (char *, blasint *, float *,
|
||||
void BLASFUNC(chpr2) (char *, blasint *, float *,
|
||||
float *, blasint *, float *, blasint *, float *);
|
||||
void BLASFUNC(zhpr2) (char *, blasint *, double *,
|
||||
void BLASFUNC(zhpr2) (char *, blasint *, double *,
|
||||
double *, blasint *, double *, blasint *, double *);
|
||||
void BLASFUNC(xhpr2) (char *, blasint *, xdouble *,
|
||||
void BLASFUNC(xhpr2) (char *, blasint *, xdouble *,
|
||||
xdouble *, blasint *, xdouble *, blasint *, xdouble *);
|
||||
|
||||
void BLASFUNC(chemv) (char *, blasint *, float *, float *, blasint *,
|
||||
void BLASFUNC(chemv) (char *, blasint *, float *, float *, blasint *,
|
||||
float *, blasint *, float *, float *, blasint *);
|
||||
void BLASFUNC(zhemv) (char *, blasint *, double *, double *, blasint *,
|
||||
void BLASFUNC(zhemv) (char *, blasint *, double *, double *, blasint *,
|
||||
double *, blasint *, double *, double *, blasint *);
|
||||
void BLASFUNC(xhemv) (char *, blasint *, xdouble *, xdouble *, blasint *,
|
||||
void BLASFUNC(xhemv) (char *, blasint *, xdouble *, xdouble *, blasint *,
|
||||
xdouble *, blasint *, xdouble *, xdouble *, blasint *);
|
||||
|
||||
void BLASFUNC(chpmv) (char *, blasint *, float *, float *,
|
||||
@@ -427,37 +428,37 @@ int BLASFUNC(dnorm)(char *, blasint *, blasint *, double *, blasint *);
|
||||
int BLASFUNC(cnorm)(char *, blasint *, blasint *, float *, blasint *);
|
||||
int BLASFUNC(znorm)(char *, blasint *, blasint *, double *, blasint *);
|
||||
|
||||
void BLASFUNC(sgbmv)(char *, blasint *, blasint *, blasint *, blasint *, float *, float *, blasint *,
|
||||
void BLASFUNC(sgbmv)(char *, blasint *, blasint *, blasint *, blasint *, float *, float *, blasint *,
|
||||
float *, blasint *, float *, float *, blasint *);
|
||||
void BLASFUNC(dgbmv)(char *, blasint *, blasint *, blasint *, blasint *, double *, double *, blasint *,
|
||||
void BLASFUNC(dgbmv)(char *, blasint *, blasint *, blasint *, blasint *, double *, double *, blasint *,
|
||||
double *, blasint *, double *, double *, blasint *);
|
||||
void BLASFUNC(qgbmv)(char *, blasint *, blasint *, blasint *, blasint *, xdouble *, xdouble *, blasint *,
|
||||
void BLASFUNC(qgbmv)(char *, blasint *, blasint *, blasint *, blasint *, xdouble *, xdouble *, blasint *,
|
||||
xdouble *, blasint *, xdouble *, xdouble *, blasint *);
|
||||
void BLASFUNC(cgbmv)(char *, blasint *, blasint *, blasint *, blasint *, float *, float *, blasint *,
|
||||
void BLASFUNC(cgbmv)(char *, blasint *, blasint *, blasint *, blasint *, float *, float *, blasint *,
|
||||
float *, blasint *, float *, float *, blasint *);
|
||||
void BLASFUNC(zgbmv)(char *, blasint *, blasint *, blasint *, blasint *, double *, double *, blasint *,
|
||||
void BLASFUNC(zgbmv)(char *, blasint *, blasint *, blasint *, blasint *, double *, double *, blasint *,
|
||||
double *, blasint *, double *, double *, blasint *);
|
||||
void BLASFUNC(xgbmv)(char *, blasint *, blasint *, blasint *, blasint *, xdouble *, xdouble *, blasint *,
|
||||
void BLASFUNC(xgbmv)(char *, blasint *, blasint *, blasint *, blasint *, xdouble *, xdouble *, blasint *,
|
||||
xdouble *, blasint *, xdouble *, xdouble *, blasint *);
|
||||
|
||||
void BLASFUNC(ssbmv)(char *, blasint *, blasint *, float *, float *, blasint *,
|
||||
void BLASFUNC(ssbmv)(char *, blasint *, blasint *, float *, float *, blasint *,
|
||||
float *, blasint *, float *, float *, blasint *);
|
||||
void BLASFUNC(dsbmv)(char *, blasint *, blasint *, double *, double *, blasint *,
|
||||
void BLASFUNC(dsbmv)(char *, blasint *, blasint *, double *, double *, blasint *,
|
||||
double *, blasint *, double *, double *, blasint *);
|
||||
void BLASFUNC(qsbmv)(char *, blasint *, blasint *, xdouble *, xdouble *, blasint *,
|
||||
void BLASFUNC(qsbmv)(char *, blasint *, blasint *, xdouble *, xdouble *, blasint *,
|
||||
xdouble *, blasint *, xdouble *, xdouble *, blasint *);
|
||||
void BLASFUNC(csbmv)(char *, blasint *, blasint *, float *, float *, blasint *,
|
||||
void BLASFUNC(csbmv)(char *, blasint *, blasint *, float *, float *, blasint *,
|
||||
float *, blasint *, float *, float *, blasint *);
|
||||
void BLASFUNC(zsbmv)(char *, blasint *, blasint *, double *, double *, blasint *,
|
||||
void BLASFUNC(zsbmv)(char *, blasint *, blasint *, double *, double *, blasint *,
|
||||
double *, blasint *, double *, double *, blasint *);
|
||||
void BLASFUNC(xsbmv)(char *, blasint *, blasint *, xdouble *, xdouble *, blasint *,
|
||||
void BLASFUNC(xsbmv)(char *, blasint *, blasint *, xdouble *, xdouble *, blasint *,
|
||||
xdouble *, blasint *, xdouble *, xdouble *, blasint *);
|
||||
|
||||
void BLASFUNC(chbmv)(char *, blasint *, blasint *, float *, float *, blasint *,
|
||||
void BLASFUNC(chbmv)(char *, blasint *, blasint *, float *, float *, blasint *,
|
||||
float *, blasint *, float *, float *, blasint *);
|
||||
void BLASFUNC(zhbmv)(char *, blasint *, blasint *, double *, double *, blasint *,
|
||||
void BLASFUNC(zhbmv)(char *, blasint *, blasint *, double *, double *, blasint *,
|
||||
double *, blasint *, double *, double *, blasint *);
|
||||
void BLASFUNC(xhbmv)(char *, blasint *, blasint *, xdouble *, xdouble *, blasint *,
|
||||
void BLASFUNC(xhbmv)(char *, blasint *, blasint *, xdouble *, xdouble *, blasint *,
|
||||
xdouble *, blasint *, xdouble *, xdouble *, blasint *);
|
||||
|
||||
/* Level 3 routines */
|
||||
@@ -606,18 +607,18 @@ int BLASFUNC(sgemt)(char *, blasint *, blasint *, float *, float *, blasint *,
|
||||
float *, blasint *);
|
||||
int BLASFUNC(dgemt)(char *, blasint *, blasint *, double *, double *, blasint *,
|
||||
double *, blasint *);
|
||||
int BLASFUNC(cgemt)(char *, blasint *, blasint *, float *, float *, blasint *,
|
||||
int BLASFUNC(cgemt)(char *, blasint *, blasint *, float *, float *, blasint *,
|
||||
float *, blasint *);
|
||||
int BLASFUNC(zgemt)(char *, blasint *, blasint *, double *, double *, blasint *,
|
||||
double *, blasint *);
|
||||
|
||||
int BLASFUNC(sgema)(char *, char *, blasint *, blasint *, float *,
|
||||
int BLASFUNC(sgema)(char *, char *, blasint *, blasint *, float *,
|
||||
float *, blasint *, float *, float *, blasint *, float *, blasint *);
|
||||
int BLASFUNC(dgema)(char *, char *, blasint *, blasint *, double *,
|
||||
double *, blasint *, double*, double *, blasint *, double*, blasint *);
|
||||
int BLASFUNC(cgema)(char *, char *, blasint *, blasint *, float *,
|
||||
float *, blasint *, float *, float *, blasint *, float *, blasint *);
|
||||
int BLASFUNC(zgema)(char *, char *, blasint *, blasint *, double *,
|
||||
int BLASFUNC(zgema)(char *, char *, blasint *, blasint *, double *,
|
||||
double *, blasint *, double*, double *, blasint *, double*, blasint *);
|
||||
|
||||
int BLASFUNC(sgems)(char *, char *, blasint *, blasint *, float *,
|
||||
@@ -642,6 +643,8 @@ int BLASFUNC(zgemc)(char *, char *, blasint *, blasint *, blasint *, double *,
|
||||
int BLASFUNC(xgemc)(char *, char *, blasint *, blasint *, blasint *, xdouble *,
|
||||
xdouble *, blasint *, xdouble *, blasint *, xdouble *, blasint *, xdouble *, xdouble *, blasint *);
|
||||
|
||||
/* Lapack routines */
|
||||
|
||||
int BLASFUNC(sgetf2)(blasint *, blasint *, float *, blasint *, blasint *, blasint *);
|
||||
int BLASFUNC(dgetf2)(blasint *, blasint *, double *, blasint *, blasint *, blasint *);
|
||||
int BLASFUNC(qgetf2)(blasint *, blasint *, xdouble *, blasint *, blasint *, blasint *);
|
||||
@@ -691,6 +694,13 @@ int BLASFUNC(cpotrf)(char *, blasint *, float *, blasint *, blasint *);
|
||||
int BLASFUNC(zpotrf)(char *, blasint *, double *, blasint *, blasint *);
|
||||
int BLASFUNC(xpotrf)(char *, blasint *, xdouble *, blasint *, blasint *);
|
||||
|
||||
int BLASFUNC(spotrs)(char *, blasint *, blasint *, float *, blasint *, float *, blasint *, blasint *);
|
||||
int BLASFUNC(dpotrs)(char *, blasint *, blasint *, double *, blasint *, double *, blasint *, blasint *);
|
||||
int BLASFUNC(qpotrs)(char *, blasint *, blasint *, xdouble *, blasint *, xdouble *, blasint *, blasint *);
|
||||
int BLASFUNC(cpotrs)(char *, blasint *, blasint *, float *, blasint *, float *, blasint *, blasint *);
|
||||
int BLASFUNC(zpotrs)(char *, blasint *, blasint *, double *, blasint *, double *, blasint *, blasint *);
|
||||
int BLASFUNC(xpotrs)(char *, blasint *, blasint *, xdouble *, blasint *, xdouble *, blasint *, blasint *);
|
||||
|
||||
int BLASFUNC(slauu2)(char *, blasint *, float *, blasint *, blasint *);
|
||||
int BLASFUNC(dlauu2)(char *, blasint *, double *, blasint *, blasint *);
|
||||
int BLASFUNC(qlauu2)(char *, blasint *, xdouble *, blasint *, blasint *);
|
||||
@@ -719,19 +729,6 @@ int BLASFUNC(ctrtri)(char *, char *, blasint *, float *, blasint *, blasint *);
|
||||
int BLASFUNC(ztrtri)(char *, char *, blasint *, double *, blasint *, blasint *);
|
||||
int BLASFUNC(xtrtri)(char *, char *, blasint *, xdouble *, blasint *, blasint *);
|
||||
|
||||
int BLASFUNC(spotri)(char *, blasint *, float *, blasint *, blasint *);
|
||||
int BLASFUNC(dpotri)(char *, blasint *, double *, blasint *, blasint *);
|
||||
int BLASFUNC(qpotri)(char *, blasint *, xdouble *, blasint *, blasint *);
|
||||
int BLASFUNC(cpotri)(char *, blasint *, float *, blasint *, blasint *);
|
||||
int BLASFUNC(zpotri)(char *, blasint *, double *, blasint *, blasint *);
|
||||
int BLASFUNC(xpotri)(char *, blasint *, xdouble *, blasint *, blasint *);
|
||||
|
||||
int BLASFUNC(slarf)(char *, blasint *, blasint *, float *, blasint *, float *, float *, blasint *, float *);
|
||||
int BLASFUNC(dlarf)(char *, blasint *, blasint *, double *, blasint *, double *, double *, blasint *, double *);
|
||||
int BLASFUNC(qlarf)(char *, blasint *, blasint *, xdouble *, blasint *, xdouble *, xdouble *, blasint *, xdouble *);
|
||||
int BLASFUNC(clarf)(char *, blasint *, blasint *, float *, blasint *, float *, float *, blasint *, float *);
|
||||
int BLASFUNC(zlarf)(char *, blasint *, blasint *, double *, blasint *, double *, double *, blasint *, double *);
|
||||
int BLASFUNC(xlarf)(char *, blasint *, blasint *, xdouble *, blasint *, xdouble *, xdouble *, blasint *, xdouble *);
|
||||
|
||||
FLOATRET BLASFUNC(slamch)(char *);
|
||||
double BLASFUNC(dlamch)(char *);
|
||||
@@ -741,9 +738,32 @@ FLOATRET BLASFUNC(slamc3)(float *, float *);
|
||||
double BLASFUNC(dlamc3)(double *, double *);
|
||||
xdouble BLASFUNC(qlamc3)(xdouble *, xdouble *);
|
||||
|
||||
/* BLAS extensions */
|
||||
|
||||
void BLASFUNC(saxpby) (blasint *, float *, float *, blasint *, float *, float *, blasint *);
|
||||
void BLASFUNC(daxpby) (blasint *, double *, double *, blasint *, double *, double *, blasint *);
|
||||
void BLASFUNC(caxpby) (blasint *, float *, float *, blasint *, float *, float *, blasint *);
|
||||
void BLASFUNC(zaxpby) (blasint *, double *, double *, blasint *, double *, double *, blasint *);
|
||||
|
||||
void BLASFUNC(somatcopy) (char *, char *, blasint *, blasint *, float *, float *, blasint *, float *, blasint *);
|
||||
void BLASFUNC(domatcopy) (char *, char *, blasint *, blasint *, double *, double *, blasint *, double *, blasint *);
|
||||
void BLASFUNC(comatcopy) (char *, char *, blasint *, blasint *, float *, float *, blasint *, float *, blasint *);
|
||||
void BLASFUNC(zomatcopy) (char *, char *, blasint *, blasint *, double *, double *, blasint *, double *, blasint *);
|
||||
|
||||
void BLASFUNC(simatcopy) (char *, char *, blasint *, blasint *, float *, float *, blasint *, blasint *);
|
||||
void BLASFUNC(dimatcopy) (char *, char *, blasint *, blasint *, double *, double *, blasint *, blasint *);
|
||||
void BLASFUNC(cimatcopy) (char *, char *, blasint *, blasint *, float *, float *, blasint *, blasint *);
|
||||
void BLASFUNC(zimatcopy) (char *, char *, blasint *, blasint *, double *, double *, blasint *, blasint *);
|
||||
|
||||
void BLASFUNC(sgeadd) (blasint *, blasint *, float *, float *, blasint *, float *, float *, blasint*);
|
||||
void BLASFUNC(dgeadd) (blasint *, blasint *, double *, double *, blasint *, double *, double *, blasint*);
|
||||
void BLASFUNC(cgeadd) (blasint *, blasint *, float *, float *, blasint *, float *, float *, blasint*);
|
||||
void BLASFUNC(zgeadd) (blasint *, blasint *, double *, double *, blasint *, double *, double *, blasint*);
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
||||
|
||||
#endif /* __cplusplus */
|
||||
|
||||
#endif
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user