Compare commits
1443 Commits
v0.3.5
...
small_matr
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
77460ac255 | ||
|
|
88e6806e3f | ||
|
|
4130d1732e | ||
|
|
255b6dd0fa | ||
|
|
741d6c5cb8 | ||
|
|
514a3d7d63 | ||
|
|
085aae8bdb | ||
|
|
712ca43069 | ||
|
|
5c6c2cd4f6 | ||
|
|
e54be4ba1c | ||
|
|
48a1364e10 | ||
|
|
0c1c903f1e | ||
|
|
a073fa870e | ||
|
|
b2053239fc | ||
|
|
b11bb6e728 | ||
|
|
1840bc5b52 | ||
|
|
7c0977c267 | ||
|
|
fb3d80c42a | ||
|
|
9ee21a0a39 | ||
|
|
bd3207b4b4 | ||
|
|
b8ebfc9335 | ||
|
|
7c1986640b | ||
|
|
71d33c952d | ||
|
|
6a3c074786 | ||
|
|
430f741b30 | ||
|
|
6f4dc7445d | ||
|
|
81fbe8d088 | ||
|
|
bb9cf766f5 | ||
|
|
75eeb265d7 | ||
|
|
2c72972570 | ||
|
|
6b731d917f | ||
|
|
5dcf47cd97 | ||
|
|
aa286e301b | ||
|
|
9f0ef9cdfc | ||
|
|
6bfc66663c | ||
|
|
a8c6fb9e1c | ||
|
|
5ec8f716cf | ||
|
|
82f8a0aeba | ||
|
|
d57d503c15 | ||
|
|
37ac23e8a3 | ||
|
|
6a93e3b2ba | ||
|
|
47ce1dd08f | ||
|
|
f5fcc5baec | ||
|
|
e740c4873d | ||
|
|
efdd237a91 | ||
|
|
4573cb2f43 | ||
|
|
2a4bb797db | ||
|
|
cbbe38bb88 | ||
|
|
619343278d | ||
|
|
fee361ae64 | ||
|
|
62f4c84f27 | ||
|
|
e115c97e05 | ||
|
|
07c334e7be | ||
|
|
e2828e30aa | ||
|
|
7219c9cb87 | ||
|
|
c9d32674ea | ||
|
|
64259d521a | ||
|
|
6f5ca44c1a | ||
|
|
d28b3f2776 | ||
|
|
ba3f7b3acf | ||
|
|
475b5c95b9 | ||
|
|
cd60080d4a | ||
|
|
4847bfdddd | ||
|
|
81dcfdcf39 | ||
|
|
0ef4b3f1f2 | ||
|
|
aa53a8a5cb | ||
|
|
aa3a1e7d8c | ||
|
|
aaf1a17168 | ||
|
|
53add6a80d | ||
|
|
9eb897cc01 | ||
|
|
7cead56258 | ||
|
|
6794ac3415 | ||
|
|
ecf4b9e0fc | ||
|
|
dfe5d09641 | ||
|
|
60cd5e55fc | ||
|
|
da9e2a7ada | ||
|
|
c88cbc5e0d | ||
|
|
589c74aed3 | ||
|
|
104aa678b0 | ||
|
|
c6b48e0394 | ||
|
|
4927251298 | ||
|
|
f77b6a83f4 | ||
|
|
39724e8128 | ||
|
|
525db5401c | ||
|
|
cb097beba2 | ||
|
|
7c02f4b1f7 | ||
|
|
383262035d | ||
|
|
5fa581c87e | ||
|
|
12918358aa | ||
|
|
200f5c44cc | ||
|
|
64e2e4aaf3 | ||
|
|
921ec4e9e2 | ||
|
|
d557584b71 | ||
|
|
a4ceb1ade9 | ||
|
|
4e1be0e481 | ||
|
|
49b83e00b7 | ||
|
|
769ed9ffad | ||
|
|
f194ad59e1 | ||
|
|
4fda217f99 | ||
|
|
9be2688c78 | ||
|
|
6a2a60038c | ||
|
|
251a09ec90 | ||
|
|
95d37e1575 | ||
|
|
3523bb778e | ||
|
|
a50d0e29c8 | ||
|
|
bf1f0734ff | ||
|
|
ca3561cab9 | ||
|
|
21072e502a | ||
|
|
7c6e56b5df | ||
|
|
661c6bfa5a | ||
|
|
9796e552ea | ||
|
|
d6b6e5ccd7 | ||
|
|
349b722d8d | ||
|
|
6c33764ca4 | ||
|
|
d1b9613fd4 | ||
|
|
3cfc74b1a0 | ||
|
|
9ae154ba89 | ||
|
|
9e21a100e3 | ||
|
|
31d30312dc | ||
|
|
fcfb7ffafb | ||
|
|
bbe119ee3b | ||
|
|
f4f74941bd | ||
|
|
a36eb19ae0 | ||
|
|
ce45af8151 | ||
|
|
6f38de06d2 | ||
|
|
09eb9d2584 | ||
|
|
791e046744 | ||
|
|
94bab9d1f9 | ||
|
|
97d6eb97b1 | ||
|
|
4afd11dae5 | ||
|
|
72ec6280c7 | ||
|
|
26b7f24d16 | ||
|
|
0db4218fed | ||
|
|
9d000ecaa2 | ||
|
|
a847d00366 | ||
|
|
0033f8be0d | ||
|
|
f308e741b2 | ||
|
|
4f5d26bb02 | ||
|
|
417c4e8af8 | ||
|
|
da17abec87 | ||
|
|
f8c2697701 | ||
|
|
b144423f0f | ||
|
|
bd2498c886 | ||
|
|
d8e2edfc20 | ||
|
|
419b8686d1 | ||
|
|
3ab15ff34c | ||
|
|
8916c4ae2c | ||
|
|
4fa283de66 | ||
|
|
5865c7d4d6 | ||
|
|
ae3a90f78f | ||
|
|
009864edde | ||
|
|
3de80b3f5a | ||
|
|
af1e140e35 | ||
|
|
d4a0299e16 | ||
|
|
f766024749 | ||
|
|
c502760bef | ||
|
|
29b5887d5f | ||
|
|
60188a8c82 | ||
|
|
1d63631afe | ||
|
|
e82bb953a7 | ||
|
|
ed7e155c35 | ||
|
|
45d819ca82 | ||
|
|
8751a69271 | ||
|
|
10a2923f64 | ||
|
|
5ff83a4261 | ||
|
|
5bc9680a86 | ||
|
|
4ab3651591 | ||
|
|
a83680b40b | ||
|
|
c3aa036e99 | ||
|
|
634e1305f9 | ||
|
|
c467516132 | ||
|
|
83f4746825 | ||
|
|
584ef8d4ae | ||
|
|
8dfda02e89 | ||
|
|
28d69e0097 | ||
|
|
c2467c9619 | ||
|
|
f86e749df4 | ||
|
|
d199c2787d | ||
|
|
e30ad0e521 | ||
|
|
d23419accc | ||
|
|
2f9c10810c | ||
|
|
f37e941d52 | ||
|
|
2a91452bdd | ||
|
|
c854ef5471 | ||
|
|
c0afc11742 | ||
|
|
c592f0f80a | ||
|
|
3f613b1301 | ||
|
|
72a0ec8e75 | ||
|
|
3446e58daf | ||
|
|
4ca8becc4b | ||
|
|
dfe819f3bd | ||
|
|
4369e52555 | ||
|
|
bb2f52844b | ||
|
|
571eadb880 | ||
|
|
df4ade070f | ||
|
|
e6b9275034 | ||
|
|
53ea5bfece | ||
|
|
93592d1260 | ||
|
|
6eaeb01263 | ||
|
|
45d542c9d1 | ||
|
|
086d87a302 | ||
|
|
af501eb753 | ||
|
|
0eb6c4dded | ||
|
|
de838c38ef | ||
|
|
478898b37a | ||
|
|
cde4690721 | ||
|
|
2389291766 | ||
|
|
a2d13ea611 | ||
|
|
1bd3cd66c2 | ||
|
|
1c53e1366d | ||
|
|
95dbeff66d | ||
|
|
3b673a24b7 | ||
|
|
1eb1979050 | ||
|
|
efc53b6e7e | ||
|
|
72888497e2 | ||
|
|
7e3e006af6 | ||
|
|
d906d14402 | ||
|
|
3785c0e82b | ||
|
|
f2d8879af6 | ||
|
|
6876221cf3 | ||
|
|
79cdcde717 | ||
|
|
18a11137f1 | ||
|
|
1dd712131e | ||
|
|
0ed2adf0b2 | ||
|
|
abf670757b | ||
|
|
41fc6f3cd2 | ||
|
|
007d9f97d7 | ||
|
|
63d26090f5 | ||
|
|
9fe930f205 | ||
|
|
3a1b58d54a | ||
|
|
f7659be4a0 | ||
|
|
bc6fd20a40 | ||
|
|
3ce469a34f | ||
|
|
ba2c5b404d | ||
|
|
f07a80354b | ||
|
|
fdd1b50263 | ||
|
|
b98923f33a | ||
|
|
4cb1db0e3b | ||
|
|
430e8b45fe | ||
|
|
88fe85f4e0 | ||
|
|
89091e6b64 | ||
|
|
522aaf53bf | ||
|
|
c3574ffe53 | ||
|
|
4e28dc6353 | ||
|
|
13c28889a2 | ||
|
|
0e3ac4a06b | ||
|
|
28915eed72 | ||
|
|
7f60fb6b91 | ||
|
|
0464e662ad | ||
|
|
0f9a935a5a | ||
|
|
79cd69fea4 | ||
|
|
bb12c2c854 | ||
|
|
32c1c1e125 | ||
|
|
f1953b8b81 | ||
|
|
6e97df7b47 | ||
|
|
729303e5ed | ||
|
|
547965530f | ||
|
|
9b7877ccf1 | ||
|
|
f82fa802d1 | ||
|
|
3eda3d34c3 | ||
|
|
a8f42ae85c | ||
|
|
e6e2e531bc | ||
|
|
456dc04441 | ||
|
|
89323458a9 | ||
|
|
e153bdeb70 | ||
|
|
c2001f7756 | ||
|
|
c2b3f0b3f6 | ||
|
|
f16e39554d | ||
|
|
b1ee81228a | ||
|
|
9f7358d7dc | ||
|
|
54fa90fb25 | ||
|
|
5a709b8340 | ||
|
|
b31a68b835 | ||
|
|
86552bf4c7 | ||
|
|
a349d48d89 | ||
|
|
4db00121dc | ||
|
|
909897f13b | ||
|
|
e79245acd9 | ||
|
|
76d2612e0c | ||
|
|
ced49466f0 | ||
|
|
6e270f91ec | ||
|
|
200296b0f4 | ||
|
|
dd7a650792 | ||
|
|
4a4c50a7ce | ||
|
|
d069780e63 | ||
|
|
33c8790603 | ||
|
|
06387ac0e6 | ||
|
|
f1a18d245b | ||
|
|
2a3aa91354 | ||
|
|
ea5bdc3f72 | ||
|
|
9df79ae9a3 | ||
|
|
a1fc6041cd | ||
|
|
edb423d772 | ||
|
|
0e6eb8c247 | ||
|
|
d475db29c6 | ||
|
|
729ac6bd4a | ||
|
|
89fe17f20e | ||
|
|
bdd795ed03 | ||
|
|
e1038ea836 | ||
|
|
6baa9a778d | ||
|
|
cf46c9f84e | ||
|
|
55602fce56 | ||
|
|
3d5e159e7a | ||
|
|
2931feb575 | ||
|
|
20245ded5f | ||
|
|
2840432e49 | ||
|
|
ea78106c71 | ||
|
|
cb9dc36dd5 | ||
|
|
1b0b4349a1 | ||
|
|
71b6eaf459 | ||
|
|
43c0d4f312 | ||
|
|
d7c1677c20 | ||
|
|
0dbe61a612 | ||
|
|
62cf391cbb | ||
|
|
8c338616f9 | ||
|
|
f94c53ec0a | ||
|
|
8efba9b7c0 | ||
|
|
4fffa556d8 | ||
|
|
ce90e2bd3f | ||
|
|
948b6712ba | ||
|
|
2271c3506b | ||
|
|
db00b21445 | ||
|
|
58d26b4448 | ||
|
|
8e47d14053 | ||
|
|
cd10b35fe9 | ||
|
|
9472dd99cd | ||
|
|
7181665452 | ||
|
|
bd9ff820bc | ||
|
|
63e45def70 | ||
|
|
ec0f228632 | ||
|
|
90e2941c61 | ||
|
|
10d5f3c87b | ||
|
|
8353cb245a | ||
|
|
ec2dd7b875 | ||
|
|
4e82eb9f8a | ||
|
|
61300bb735 | ||
|
|
33e9b12464 | ||
|
|
90dba9f716 | ||
|
|
424d551e01 | ||
|
|
596f5df9e8 | ||
|
|
5dd14e3d48 | ||
|
|
a54e35e780 | ||
|
|
564b0d39ef | ||
|
|
5d58b11101 | ||
|
|
d394d4e677 | ||
|
|
9d3a317abc | ||
|
|
92372c70fc | ||
|
|
43bef4aaac | ||
|
|
aae6af94bb | ||
|
|
f4248af26e | ||
|
|
2d89603e9d | ||
|
|
26bc15258a | ||
|
|
141998dce2 | ||
|
|
3bd56846bb | ||
|
|
e7bbdfdf84 | ||
|
|
b6795db731 | ||
|
|
5e0dbf8dfe | ||
|
|
955d73127f | ||
|
|
a8c1bea7ae | ||
|
|
e43b49e064 | ||
|
|
3e28db7f38 | ||
|
|
4b69ee31af | ||
|
|
03ff213c51 | ||
|
|
299d1c8de0 | ||
|
|
70869d571f | ||
|
|
cba87222b2 | ||
|
|
f80dd2151e | ||
|
|
4412ee1754 | ||
|
|
f6104b68c1 | ||
|
|
84f2c71e93 | ||
|
|
06208c8d01 | ||
|
|
c90b28dee6 | ||
|
|
6275b43918 | ||
|
|
2db5178e2d | ||
|
|
57549f5c92 | ||
|
|
f5c4c28b98 | ||
|
|
239282d5e2 | ||
|
|
568674477c | ||
|
|
fa42588e1f | ||
|
|
8a6d26458b | ||
|
|
db86f516b9 | ||
|
|
aec353b5a7 | ||
|
|
c62fbefad4 | ||
|
|
04706e760d | ||
|
|
e1e543b145 | ||
|
|
e55ec82bb9 | ||
|
|
7353ea5afc | ||
|
|
6a04efb122 | ||
|
|
5afb66812f | ||
|
|
0d18f231fc | ||
|
|
2f4a8e5bc4 | ||
|
|
4f70512b97 | ||
|
|
8792fc4d5f | ||
|
|
577c5d9f8f | ||
|
|
6721f2750e | ||
|
|
b0b02a080d | ||
|
|
a1fc98dc57 | ||
|
|
d0737b0142 | ||
|
|
7dbb59b256 | ||
|
|
00172d440b | ||
|
|
d712ea724c | ||
|
|
61bbae3ac1 | ||
|
|
1c1ca2bc0a | ||
|
|
c7d668c248 | ||
|
|
a83a59b038 | ||
|
|
0a19bd813c | ||
|
|
e7afe8a969 | ||
|
|
f361de30a3 | ||
|
|
9f6d6f6cb6 | ||
|
|
22bb50fb81 | ||
|
|
236a3d8ce6 | ||
|
|
6b7ef6543a | ||
|
|
67cc4b9e16 | ||
|
|
250e6f8039 | ||
|
|
7a6d0016b0 | ||
|
|
e8e8a6e608 | ||
|
|
579811fb6a | ||
|
|
a87793e03c | ||
|
|
ac6a22ae78 | ||
|
|
ff010f496e | ||
|
|
7eb55504b1 | ||
|
|
84a9614345 | ||
|
|
b969533703 | ||
|
|
0f08f3efa6 | ||
|
|
c861b2a7bd | ||
|
|
cf62adffbb | ||
|
|
3eec7d382c | ||
|
|
5b0093b5fe | ||
|
|
f41600e66f | ||
|
|
f5efecb7ca | ||
|
|
a52bdd9d7b | ||
|
|
db3226a646 | ||
|
|
69b6e258d8 | ||
|
|
3d4db4d002 | ||
|
|
99dde1d2c9 | ||
|
|
ee6b3df02c | ||
|
|
25e879fe92 | ||
|
|
d237dc1360 | ||
|
|
8692456226 | ||
|
|
d1d69e1b9a | ||
|
|
20d0cb2f65 | ||
|
|
e7f0da9295 | ||
|
|
e9bfa2291a | ||
|
|
2a28448a96 | ||
|
|
a33d177430 | ||
|
|
f73391c9c9 | ||
|
|
7905383cb5 | ||
|
|
a8cbd451bf | ||
|
|
eecd8c3204 | ||
|
|
ea85eb2e02 | ||
|
|
66f89c0aaf | ||
|
|
8d07cf9b67 | ||
|
|
7b4773b24d | ||
|
|
69f277f8ee | ||
|
|
3a6d51c2fd | ||
|
|
1c7771df96 | ||
|
|
a56c9ec52a | ||
|
|
4ae6d1a01b | ||
|
|
7972beb375 | ||
|
|
41e802443a | ||
|
|
7bd8624b79 | ||
|
|
806f89166e | ||
|
|
f059e614eb | ||
|
|
e13b6773ee | ||
|
|
a05243d0f2 | ||
|
|
c6af9bbb32 | ||
|
|
144be81ca1 | ||
|
|
07cdd5d05c | ||
|
|
567d2760e6 | ||
|
|
018bb3e433 | ||
|
|
79fd006c58 | ||
|
|
8229c163b7 | ||
|
|
a986d42ea6 | ||
|
|
b6a948fbee | ||
|
|
0cc352417e | ||
|
|
fe47dc8673 | ||
|
|
9f67d03d3b | ||
|
|
50f4fb2fbd | ||
|
|
6a14b34c20 | ||
|
|
8c7c1395da | ||
|
|
5f6f6a2c7d | ||
|
|
71cf2acdef | ||
|
|
1d9773b800 | ||
|
|
a46a8c4956 | ||
|
|
7ae737e04c | ||
|
|
64daad4365 | ||
|
|
b8307768e2 | ||
|
|
6d54c94760 | ||
|
|
c0da205412 | ||
|
|
a06d78556d | ||
|
|
af8a619e1f | ||
|
|
62b9608986 | ||
|
|
717c604aeb | ||
|
|
ce33da4cab | ||
|
|
a1b181cea2 | ||
|
|
cdc0e9011e | ||
|
|
fa049d49c2 | ||
|
|
d45c53ecf1 | ||
|
|
c2840997db | ||
|
|
c775458299 | ||
|
|
c0649aa694 | ||
|
|
2428dc9fd3 | ||
|
|
0fe0914437 | ||
|
|
4cfd8a3f57 | ||
|
|
ee2e758278 | ||
|
|
2d8781b0dc | ||
|
|
c436e8af7b | ||
|
|
a0a3bf7c81 | ||
|
|
ae3f2c2e49 | ||
|
|
83ecf9fea7 | ||
|
|
649733ff15 | ||
|
|
3e8f1c6cc5 | ||
|
|
2f4c5bb3a9 | ||
|
|
4e1c4e67d4 | ||
|
|
b9a2a3c540 | ||
|
|
047dfb216d | ||
|
|
cd8871f1a1 | ||
|
|
b25ae1fc60 | ||
|
|
3f7f7ab7e2 | ||
|
|
9c22170f52 | ||
|
|
08e1d8cbae | ||
|
|
ff40a4e726 | ||
|
|
51019feae1 | ||
|
|
bec7923a0d | ||
|
|
c5bdd21352 | ||
|
|
d2d16d091e | ||
|
|
b6a6ccbbea | ||
|
|
8b720f7365 | ||
|
|
14df234edb | ||
|
|
bbeda55b7b | ||
|
|
efcf89aec7 | ||
|
|
37d456f7e0 | ||
|
|
0b9e96922b | ||
|
|
0c8162eba6 | ||
|
|
9a94a30132 | ||
|
|
09c7a191bd | ||
|
|
8a8df530e2 | ||
|
|
37f46f2fa0 | ||
|
|
9afc561be4 | ||
|
|
dca3e0cf20 | ||
|
|
c9f8db979b | ||
|
|
18099de976 | ||
|
|
97c36ca58c | ||
|
|
9f5a74f3c7 | ||
|
|
2afb10975d | ||
|
|
dbef479227 | ||
|
|
208c7e7ca5 | ||
|
|
32c847df45 | ||
|
|
e0df9485d4 | ||
|
|
0f1a2b12f9 | ||
|
|
233838b4bc | ||
|
|
13f9afbd99 | ||
|
|
de74e11641 | ||
|
|
ad9e53154d | ||
|
|
6e70621b0d | ||
|
|
e6edb7431f | ||
|
|
114dbec947 | ||
|
|
d68e4ba59b | ||
|
|
635c9e4e09 | ||
|
|
2afc074803 | ||
|
|
5d6c688a7e | ||
|
|
87baf9cfe6 | ||
|
|
c0ca7d6258 | ||
|
|
f682d19ed4 | ||
|
|
790d50fbba | ||
|
|
59243d49ab | ||
|
|
d41f83e128 | ||
|
|
ee4ca7ca6b | ||
|
|
e326c89ae8 | ||
|
|
21f6c4b5a9 | ||
|
|
7ca4ffdbdd | ||
|
|
0f65c05cd1 | ||
|
|
917d243580 | ||
|
|
43c2e845ab | ||
|
|
33f76a6c37 | ||
|
|
960dec234f | ||
|
|
6b92979f35 | ||
|
|
014fc13995 | ||
|
|
f1e05676a0 | ||
|
|
d221c50f27 | ||
|
|
f14013da7f | ||
|
|
4f371b0fbf | ||
|
|
02d60c1563 | ||
|
|
2e6963259b | ||
|
|
e94590e400 | ||
|
|
69d4687142 | ||
|
|
a731a9bbb9 | ||
|
|
1aa5907a2c | ||
|
|
ea8eec5d17 | ||
|
|
97ce6bbce2 | ||
|
|
a9aeb6745c | ||
|
|
0af9991cc9 | ||
|
|
19f3a4091c | ||
|
|
c623a965f9 | ||
|
|
e1062400c4 | ||
|
|
a66f4d80c8 | ||
|
|
dd22eb7621 | ||
|
|
2352331e60 | ||
|
|
430ee31e66 | ||
|
|
8164fd1328 | ||
|
|
531c6b96d6 | ||
|
|
1b980001dd | ||
|
|
2515e1152f | ||
|
|
ddcbed6690 | ||
|
|
f8ec538c82 | ||
|
|
ca4f7dceff | ||
|
|
1ddf9f1067 | ||
|
|
4c5fac5a2b | ||
|
|
320e2648cd | ||
|
|
9b732696c6 | ||
|
|
c9dcb3d4a4 | ||
|
|
3bb7f0138e | ||
|
|
c93ae92579 | ||
|
|
87ac1ceb0b | ||
|
|
9e40c080f2 | ||
|
|
903854c168 | ||
|
|
a2ff577a30 | ||
|
|
97a32cb0a5 | ||
|
|
f1746e7284 | ||
|
|
f6fcbd7906 | ||
|
|
1e8410f18c | ||
|
|
07454bf4d5 | ||
|
|
4046985913 | ||
|
|
75577f95a7 | ||
|
|
33d92c7a37 | ||
|
|
e57b11acca | ||
|
|
71e5669c3e | ||
|
|
e8d82c01d4 | ||
|
|
0b39cf95b0 | ||
|
|
76b2cec6ce | ||
|
|
276c1791ea | ||
|
|
c5bbfd8fee | ||
|
|
130c1741e5 | ||
|
|
8f782f0673 | ||
|
|
6a517dcb6a | ||
|
|
9f39f0a2c3 | ||
|
|
1a88c4ab26 | ||
|
|
0b44802164 | ||
|
|
2c242b4cef | ||
|
|
0bfb7336d2 | ||
|
|
403cde104e | ||
|
|
634f2bddda | ||
|
|
aeea14ee40 | ||
|
|
18bcc36a69 | ||
|
|
0e7f43c898 | ||
|
|
79e201fbba | ||
|
|
4326dcb460 | ||
|
|
e32f3b1447 | ||
|
|
d483e9270a | ||
|
|
01834aee33 | ||
|
|
b0558c11b9 | ||
|
|
f566787e6e | ||
|
|
e3368cbf18 | ||
|
|
d92bd5be24 | ||
|
|
46e4b12946 | ||
|
|
5e94aa4877 | ||
|
|
93f3e27574 | ||
|
|
785c389b0e | ||
|
|
c222b25b81 | ||
|
|
221da8bf05 | ||
|
|
eb285b4d20 | ||
|
|
cafdd999b8 | ||
|
|
92ca92a46c | ||
|
|
486c35c5dc | ||
|
|
0e05ea9bac | ||
|
|
5ba3699f41 | ||
|
|
8eefa530cd | ||
|
|
de40d47edf | ||
|
|
7c162b8a21 | ||
|
|
0544cbc806 | ||
|
|
120d20731f | ||
|
|
dc345d84df | ||
|
|
616921fd91 | ||
|
|
8a9e9a82a1 | ||
|
|
7ea5e07d1c | ||
|
|
cb6ef49857 | ||
|
|
63994e1cdb | ||
|
|
496e3019bc | ||
|
|
169be3f097 | ||
|
|
6ccbb089c2 | ||
|
|
59ebe3636a | ||
|
|
5a6bba3061 | ||
|
|
dff173e50e | ||
|
|
7e5cbb6f35 | ||
|
|
303bdb673b | ||
|
|
754433f420 | ||
|
|
7f0d523b42 | ||
|
|
c353d8b106 | ||
|
|
579be3aa9d | ||
|
|
449e8ea443 | ||
|
|
3bec250cf9 | ||
|
|
f03dd23e90 | ||
|
|
fb5eb47558 | ||
|
|
fa93d63365 | ||
|
|
90e6c66a57 | ||
|
|
32d97330b3 | ||
|
|
29eaf4b6d7 | ||
|
|
47c1bf7f4d | ||
|
|
2b55f0ad30 | ||
|
|
a5b32ab06c | ||
|
|
50545b19d0 | ||
|
|
b3cbd60d7a | ||
|
|
70199d1905 | ||
|
|
cfe63d8cc2 | ||
|
|
d55b10830f | ||
|
|
c1c10cbb21 | ||
|
|
5989841524 | ||
|
|
68a43db358 | ||
|
|
9694037b23 | ||
|
|
71faa1c1a7 | ||
|
|
3447d04eaf | ||
|
|
8b5cdcc64c | ||
|
|
4e00d96a78 | ||
|
|
ce9ea8f826 | ||
|
|
0b909203cb | ||
|
|
096da2f51a | ||
|
|
2f96a2c55b | ||
|
|
833bd0f8ff | ||
|
|
77b8f49556 | ||
|
|
1c3e20ce48 | ||
|
|
83b6be7976 | ||
|
|
081b188529 | ||
|
|
f3f969f681 | ||
|
|
8019e70211 | ||
|
|
8d2a796f49 | ||
|
|
8dc9fd4dfe | ||
|
|
abc67bdd74 | ||
|
|
1f62a82789 | ||
|
|
e9fb8f62b1 | ||
|
|
fbf4f48f4a | ||
|
|
b9ad450295 | ||
|
|
e011ad820a | ||
|
|
ff42e68652 | ||
|
|
23f322f997 | ||
|
|
093d37de8d | ||
|
|
d65e9a2bbd | ||
|
|
78100b8093 | ||
|
|
70f45749b9 | ||
|
|
e5dcdeb550 | ||
|
|
952cc2ba38 | ||
|
|
feaafbedd3 | ||
|
|
1c67567008 | ||
|
|
4e979bf75b | ||
|
|
daa4310db5 | ||
|
|
b8f3605132 | ||
|
|
b36018be6d | ||
|
|
3a100b2797 | ||
|
|
38742d5547 | ||
|
|
bd4c032f52 | ||
|
|
9dc9b7b95e | ||
|
|
9f5cdc49d4 | ||
|
|
b7b408a120 | ||
|
|
92b10212de | ||
|
|
b73bf01378 | ||
|
|
eb3c9f1db9 | ||
|
|
fd2ff2714f | ||
|
|
2ea2bd99c7 | ||
|
|
fbb894948c | ||
|
|
e711659c90 | ||
|
|
893e6e57c4 | ||
|
|
456ee2e1f0 | ||
|
|
9998f8ed8b | ||
|
|
80db5f11e1 | ||
|
|
52de4cc8fd | ||
|
|
44028581cc | ||
|
|
86ab939936 | ||
|
|
375b1875c8 | ||
|
|
6c85cb1869 | ||
|
|
995768bbc5 | ||
|
|
96ad579428 | ||
|
|
8d84403205 | ||
|
|
8729db117c | ||
|
|
0833a4846a | ||
|
|
50f7fc1401 | ||
|
|
d1b53806be | ||
|
|
a0f0a802fc | ||
|
|
700fe5b5ee | ||
|
|
bb2729c855 | ||
|
|
aae44d040d | ||
|
|
6362c34ee6 | ||
|
|
f60840c420 | ||
|
|
109e18cd96 | ||
|
|
ae1579be13 | ||
|
|
3ccf8885ac | ||
|
|
454847588e | ||
|
|
0257f26488 | ||
|
|
c45b7aef14 | ||
|
|
312060d0d6 | ||
|
|
cd765f094b | ||
|
|
64639f440f | ||
|
|
3a66c8cac1 | ||
|
|
4c35b8dbaa | ||
|
|
ed9af2f7da | ||
|
|
5fd1edead9 | ||
|
|
26478eb0d0 | ||
|
|
eeecd623d8 | ||
|
|
3ce6bcdb5f | ||
|
|
6fbe51072b | ||
|
|
611445c7f8 | ||
|
|
2cd9306bb5 | ||
|
|
c418c81224 | ||
|
|
025741f16a | ||
|
|
0ae49d2990 | ||
|
|
105e26e12a | ||
|
|
f41d52665d | ||
|
|
d573d24de7 | ||
|
|
31d6c2eb7d | ||
|
|
b7cc69ee62 | ||
|
|
aeef942c4f | ||
|
|
445ca2f418 | ||
|
|
13226e3101 | ||
|
|
1a6ea8ee6d | ||
|
|
c6ecb195e6 | ||
|
|
b28db31429 | ||
|
|
6baa9b07d7 | ||
|
|
a4896b5538 | ||
|
|
3938e59569 | ||
|
|
9d5079008f | ||
|
|
3518617f5b | ||
|
|
715f4650d9 | ||
|
|
10705183ce | ||
|
|
235599f17a | ||
|
|
b863b32ac5 | ||
|
|
dd04143d4a | ||
|
|
f3a6164bff | ||
|
|
dedd822d1a | ||
|
|
2181fb7047 | ||
|
|
a9b62c03f8 | ||
|
|
97762234f9 | ||
|
|
948d11fc51 | ||
|
|
c815b8fb85 | ||
|
|
e20709e976 | ||
|
|
934e601e93 | ||
|
|
a4c3668f99 | ||
|
|
867232c6a4 | ||
|
|
5aaf70ef95 | ||
|
|
ae2a0995cc | ||
|
|
83dae28ae2 | ||
|
|
da986d2e83 | ||
|
|
6bc487de35 | ||
|
|
cf2a8e410c | ||
|
|
eb1e9c8c92 | ||
|
|
f95989cbc1 | ||
|
|
f3065a0eed | ||
|
|
04226f1e97 | ||
|
|
0925ef70db | ||
|
|
371e6f73d4 | ||
|
|
d117dfd505 | ||
|
|
883c39773a | ||
|
|
b09b5be0a4 | ||
|
|
bfb5fbdb4d | ||
|
|
3da6d66da9 | ||
|
|
08fa83aba2 | ||
|
|
63d3ee8dfc | ||
|
|
1191db1a49 | ||
|
|
1f6071590d | ||
|
|
0caf1434c9 | ||
|
|
73128f3883 | ||
|
|
cad0d150db | ||
|
|
eba0aeb7cd | ||
|
|
0c07c356c1 | ||
|
|
82b75f97e5 | ||
|
|
7887c45077 | ||
|
|
3e67017ac8 | ||
|
|
b3ac6ee222 | ||
|
|
6082e556cd | ||
|
|
92315173d5 | ||
|
|
351d12b94e | ||
|
|
bf73aa141b | ||
|
|
71e96163db | ||
|
|
819e852ae7 | ||
|
|
4e466d739c | ||
|
|
4c6a457358 | ||
|
|
836c414e22 | ||
|
|
d403eb3c2f | ||
|
|
3cd97f1a80 | ||
|
|
9955f0996f | ||
|
|
430c11e135 | ||
|
|
fbacd2605d | ||
|
|
6fa89b06a1 | ||
|
|
68597002ea | ||
|
|
d2a6285549 | ||
|
|
d999688d1a | ||
|
|
928fe1b28e | ||
|
|
ccc28c6d60 | ||
|
|
ae43b75a6a | ||
|
|
54fc06fd70 | ||
|
|
1df9a2013d | ||
|
|
274ff5cdb8 | ||
|
|
eb2eddf241 | ||
|
|
8691825944 | ||
|
|
7dc8a76f60 | ||
|
|
df857551c0 | ||
|
|
85ccdce8c4 | ||
|
|
aeabe0a83f | ||
|
|
1b90989662 | ||
|
|
e3e8b5cdca | ||
|
|
69b16a894d | ||
|
|
6782e5767d | ||
|
|
48f5a89f92 | ||
|
|
4ae1610f37 | ||
|
|
911c3e2f4b | ||
|
|
fab49e49e5 | ||
|
|
b687fba5bc | ||
|
|
46a8c2519a | ||
|
|
e9437eebd2 | ||
|
|
3a39062cfc | ||
|
|
eaa0be1313 | ||
|
|
6ff013bae0 | ||
|
|
0d669e04bb | ||
|
|
17cdd9f9e1 | ||
|
|
6bcb06fcb1 | ||
|
|
b7315f8401 | ||
|
|
9b19e9e1b0 | ||
|
|
6bd67ddbab | ||
|
|
5da9484d93 | ||
|
|
844629af57 | ||
|
|
2beaa82c05 | ||
|
|
e8a2aed2b9 | ||
|
|
f262031685 | ||
|
|
5f6206fa2d | ||
|
|
f2cde2ccfb | ||
|
|
ba7838d2e1 | ||
|
|
a448884a63 | ||
|
|
17609f88f1 | ||
|
|
3a2df19db6 | ||
|
|
d2093a40d3 | ||
|
|
aa04b0925e | ||
|
|
258ac56e0a | ||
|
|
56837e9d92 | ||
|
|
bb5413863f | ||
|
|
32f5907fef | ||
|
|
ac10236cc8 | ||
|
|
8617d75548 | ||
|
|
c07d78b9e9 | ||
|
|
6355c25dde | ||
|
|
5e244d80f2 | ||
|
|
ede5efebab | ||
|
|
84908d60d2 | ||
|
|
596a22325a | ||
|
|
7f58f3ad0e | ||
|
|
c0d570a357 | ||
|
|
6b83079368 | ||
|
|
673e5a0495 | ||
|
|
bfa2cc7d64 | ||
|
|
e7c4d6705a | ||
|
|
2a1911cc14 | ||
|
|
9f7a9a32e3 | ||
|
|
2463938879 | ||
|
|
5d6525c87c | ||
|
|
6cb47ea3f0 | ||
|
|
459bb9291d | ||
|
|
3f1077ce6f | ||
|
|
eb45eb6942 | ||
|
|
f2becb777a | ||
|
|
5997b6b491 | ||
|
|
4b21b646ea | ||
|
|
7ec7b999a5 | ||
|
|
af9ac0898a | ||
|
|
c7b5a459b6 | ||
|
|
9b2f0323d6 | ||
|
|
9f6984fe4b | ||
|
|
42203dafdc | ||
|
|
a4f17a9297 | ||
|
|
733d97b2df | ||
|
|
ea747cf933 | ||
|
|
4de545aa7d | ||
|
|
6e9a93ec19 | ||
|
|
fde8a8e6a0 | ||
|
|
256fc15f5f | ||
|
|
ee498525e0 | ||
|
|
1fec0570f6 | ||
|
|
b5af7b9c78 | ||
|
|
f3c314550c | ||
|
|
847c20c9b7 | ||
|
|
4c22828812 | ||
|
|
e79712d969 | ||
|
|
be09551cdf | ||
|
|
ec1ef6aa9e | ||
|
|
11c59acfb1 | ||
|
|
bf0d92a310 | ||
|
|
db066151ee | ||
|
|
3a55dca2dc | ||
|
|
7d380f7d79 | ||
|
|
300f158d3b | ||
|
|
3635fdbf2b | ||
|
|
b6552b11eb | ||
|
|
3dc6b26eff | ||
|
|
5fdf9ad24f | ||
|
|
2fe967c542 | ||
|
|
6d8595351c | ||
|
|
f40200f559 | ||
|
|
a95a5e52b8 | ||
|
|
e3d846ab57 | ||
|
|
8506386d82 | ||
|
|
9ef96b32a6 | ||
|
|
b48c025974 | ||
|
|
a1fce67743 | ||
|
|
103b32fdb7 | ||
|
|
aef9804089 | ||
|
|
303869f572 | ||
|
|
02d9203981 | ||
|
|
7b6808b69c | ||
|
|
5f36f18148 | ||
|
|
d47fe78b0e | ||
|
|
ebe2f47a0f | ||
|
|
20d417762f | ||
|
|
321288597c | ||
|
|
be147a9f28 | ||
|
|
c275290ea6 | ||
|
|
b7bbb02447 | ||
|
|
bf1430f7d7 | ||
|
|
dccff2e785 | ||
|
|
5c3458a6e7 | ||
|
|
1776ad82c0 | ||
|
|
4e2f81cfa1 | ||
|
|
acf6002ab2 | ||
|
|
96a794e9fd | ||
|
|
3d36c45116 | ||
|
|
648491e1aa | ||
|
|
2dfb804cb9 | ||
|
|
4c153ec9da | ||
|
|
7eecd8e39c | ||
|
|
f0406a7708 | ||
|
|
561f3fd995 | ||
|
|
30efed14d1 | ||
|
|
af2e7f28fc | ||
|
|
4250e6ed64 | ||
|
|
7b0b7c11d2 | ||
|
|
d14cf1ccf4 | ||
|
|
3f6ab1582a | ||
|
|
28e96458e5 | ||
|
|
95fb98f556 | ||
|
|
4801c6d36b | ||
|
|
9440fa607d | ||
|
|
94db259e5b | ||
|
|
f49f8047ac | ||
|
|
825777faab | ||
|
|
9c89757562 | ||
|
|
b0b7600bef | ||
|
|
9b04baeaee | ||
|
|
8a074b3965 | ||
|
|
211ab03b14 | ||
|
|
1733f927e6 | ||
|
|
182b06d6ad | ||
|
|
7a9050d681 | ||
|
|
0ba29fd262 | ||
|
|
bafa021ed6 | ||
|
|
b89d9762a2 | ||
|
|
08dedf4c5e | ||
|
|
b89c781637 | ||
|
|
dd7ff77f4b | ||
|
|
8fb76134bc | ||
|
|
04d671aae2 | ||
|
|
f69a0be712 | ||
|
|
ae9e8b131e | ||
|
|
9086543f50 | ||
|
|
abea977ded | ||
|
|
6b6c9b1441 | ||
|
|
a97b301aaa | ||
|
|
2f13f04224 | ||
|
|
7c7505a778 | ||
|
|
5a4f1a2118 | ||
|
|
3b761892df | ||
|
|
eebfeba768 | ||
|
|
7684c4f8f8 | ||
|
|
7faf42b7bb | ||
|
|
a575f1e4c7 | ||
|
|
cdbfb891da | ||
|
|
280552b988 | ||
|
|
bbd4bb0154 | ||
|
|
6d3efb2b58 | ||
|
|
d9ff2cd90d | ||
|
|
2a43062de7 | ||
|
|
4ea794a522 | ||
|
|
ece0bfb881 | ||
|
|
1f4b6a5d5d | ||
|
|
be8f70d269 | ||
|
|
e674e1c735 | ||
|
|
6ca898b63b | ||
|
|
26411acd56 | ||
|
|
0ab4076dd8 | ||
|
|
a0caa762b3 | ||
|
|
900d5a3205 | ||
|
|
a17cf36225 | ||
|
|
148c4cc5fd | ||
|
|
d0c3543c3f | ||
|
|
909ad04aef | ||
|
|
417efd41c6 | ||
|
|
9cdc828afa | ||
|
|
7a9a4dbc4f | ||
|
|
a469b32cf4 | ||
|
|
27649b9543 | ||
|
|
16f3df5d35 | ||
|
|
1aded69821 | ||
|
|
c00289ba54 | ||
|
|
8fe794f059 | ||
|
|
74c10b57c6 | ||
|
|
c5495d2056 | ||
|
|
c70496b108 | ||
|
|
ca8d8835f5 | ||
|
|
d76b20b4d2 | ||
|
|
85af04da3c | ||
|
|
11e0dcbffb | ||
|
|
79366ff7a9 | ||
|
|
21d05a4835 | ||
|
|
940f38f6dd | ||
|
|
1778fd4219 | ||
|
|
969dd6175e | ||
|
|
d8d5682481 | ||
|
|
f66c11fc22 | ||
|
|
5ecffc28f2 | ||
|
|
86dda5c2fa | ||
|
|
1e52572be3 | ||
|
|
d2cb610272 | ||
|
|
a211bc9b6a | ||
|
|
9208ab8603 | ||
|
|
b43deb4ad6 | ||
|
|
b911525c81 | ||
|
|
7ff44e0016 | ||
|
|
e3cb8ad2d6 | ||
|
|
7aa6faad5f | ||
|
|
3d94ab660f | ||
|
|
cd99dfe034 | ||
|
|
dadafcdcd8 | ||
|
|
d40c109eb0 | ||
|
|
608cd69b66 | ||
|
|
231472c4c6 | ||
|
|
612c2d78e0 | ||
|
|
dc110e179d | ||
|
|
9184590c33 | ||
|
|
a0aaf308ed | ||
|
|
15f925fe9a | ||
|
|
21acf03e9a | ||
|
|
ff807473bb | ||
|
|
58829c0988 | ||
|
|
d86f0b9e74 | ||
|
|
63554d5dec | ||
|
|
43068288e9 | ||
|
|
999a04f101 | ||
|
|
3cb1c8d210 | ||
|
|
ff1bfe7b16 | ||
|
|
9ea30f3788 | ||
|
|
a3d4c65d62 | ||
|
|
e1fc02095c | ||
|
|
0cd6d8508f | ||
|
|
c2f152c470 | ||
|
|
4efbac28ed | ||
|
|
406c7242f4 | ||
|
|
53703585aa | ||
|
|
ad20ceaa68 | ||
|
|
dd77a3f0e2 | ||
|
|
a598ab1d32 | ||
|
|
16fd8e3dbe | ||
|
|
aa4c41bad2 | ||
|
|
5cf434167a | ||
|
|
3a49e8c05a | ||
|
|
95e2cf32e1 | ||
|
|
70cea0b96b | ||
|
|
ae0dec77ec | ||
|
|
e47b63466b | ||
|
|
7d1b468d9d | ||
|
|
575a84398a | ||
|
|
5cabda79d0 | ||
|
|
c516209581 | ||
|
|
a6a8cc2b7f | ||
|
|
3d7debbb28 | ||
|
|
5a9cce2bf6 | ||
|
|
6a8b4269b5 | ||
|
|
b1561ecc68 | ||
|
|
7ed8431527 | ||
|
|
a387a23518 | ||
|
|
b46875b76b | ||
|
|
858e609e1f | ||
|
|
3f427c0cf9 | ||
|
|
c95317158f | ||
|
|
47f892198c | ||
|
|
b43c8382c8 | ||
|
|
daf2fec12d | ||
|
|
4f8143b098 | ||
|
|
bfeb9c16b0 | ||
|
|
15cb124012 | ||
|
|
97d5034ed3 | ||
|
|
9763f872fc | ||
|
|
628b335e83 | ||
|
|
0f105dd8a5 | ||
|
|
9c4edd38f2 | ||
|
|
1036299da0 | ||
|
|
5b0398186e | ||
|
|
452859f4e1 | ||
|
|
2cd463eabd | ||
|
|
11530b76f7 | ||
|
|
91943b7325 | ||
|
|
268c28db7d | ||
|
|
2aad88d5b9 | ||
|
|
0bd956fd21 | ||
|
|
bbd9d98664 | ||
|
|
798c448b0c | ||
|
|
9a19616a28 | ||
|
|
6b41eb9c0c | ||
|
|
ccfb7ead15 | ||
|
|
40e53e52d6 | ||
|
|
744779d335 | ||
|
|
bcdf1d4917 | ||
|
|
e06b8438b4 | ||
|
|
9229d6859b | ||
|
|
21d146a8de | ||
|
|
7f4e36d219 | ||
|
|
c04a729081 | ||
|
|
100d94f94e | ||
|
|
d17da6c6a4 | ||
|
|
1679de5e59 | ||
|
|
246ca29679 | ||
|
|
9d717cb5ee | ||
|
|
e3bc83f2a8 | ||
|
|
70f2a4e0d7 | ||
|
|
706dfe263b | ||
|
|
688fa9201c | ||
|
|
cdbe0f0235 | ||
|
|
f8b82bc6dc | ||
|
|
3e3ccb9011 | ||
|
|
94ab4e6fb2 | ||
|
|
c3cfc6986b | ||
|
|
b9f4943a14 | ||
|
|
79cfc24a62 | ||
|
|
5c42287c4f | ||
|
|
32c7063cb0 | ||
|
|
c19a449096 | ||
|
|
3d1e36d4cb | ||
|
|
4f9d3e4b28 | ||
|
|
4dec151d0b | ||
|
|
7c51cc8527 | ||
|
|
853a18bc17 | ||
|
|
3ae122e2c7 | ||
|
|
b043a5962e | ||
|
|
8502030e5e | ||
|
|
8ba9e2a61a | ||
|
|
4ad694eda1 | ||
|
|
dff4a197a5 | ||
|
|
a5425575b1 | ||
|
|
1006ff8a7b | ||
|
|
e608d4f7fe | ||
|
|
4fc17d0d75 | ||
|
|
c3e30b2bc2 | ||
|
|
03d7110900 | ||
|
|
3ce28fb81a | ||
|
|
04f2226ea6 | ||
|
|
b1393c7a97 | ||
|
|
7e3eb9b25d | ||
|
|
f074d7d146 | ||
|
|
f18ab6c17b | ||
|
|
946ec6c3b8 | ||
|
|
5b95534afc | ||
|
|
f7a06463d9 | ||
|
|
b0c714ef60 | ||
|
|
8d3d29e4d7 | ||
|
|
b7f59da42d | ||
|
|
db3dc9e282 | ||
|
|
4290afdae2 | ||
|
|
4741ce803b | ||
|
|
11cfd0bd75 | ||
|
|
651ab01d2b | ||
|
|
d7b2c53c0b | ||
|
|
e4864a8933 | ||
|
|
10d841d8b9 | ||
|
|
12f2b76748 | ||
|
|
6c83b878f6 | ||
|
|
fb4dae7124 | ||
|
|
760842dda1 | ||
|
|
53f482ee72 | ||
|
|
783ba8058f | ||
|
|
af480b02a4 | ||
|
|
e4a79be6bb | ||
|
|
e5c316c6b9 | ||
|
|
25427926bc | ||
|
|
edb8143141 | ||
|
|
c4868d11c0 | ||
|
|
4c321ae571 | ||
|
|
2ffb727187 | ||
|
|
d66214c946 | ||
|
|
fd34820b99 | ||
|
|
918a0cc4d1 | ||
|
|
0db9c03e7e | ||
|
|
6eee1beac5 | ||
|
|
e5df5958cc | ||
|
|
343b301d14 | ||
|
|
45333d5793 | ||
|
|
e29b0cfcc4 | ||
|
|
78d9910236 | ||
|
|
e12cdf58ef | ||
|
|
1860c9456d | ||
|
|
aec905498f | ||
|
|
56089991e2 | ||
|
|
f9bb76d29a | ||
|
|
8242b1fe3f | ||
|
|
efb9038f72 | ||
|
|
e976557d29 | ||
|
|
9d8be15789 | ||
|
|
d752799a0f | ||
|
|
f209fc7fa9 | ||
|
|
c26c0b77a7 | ||
|
|
1c6da2d03c | ||
|
|
4255a58cd2 | ||
|
|
d3e4725548 | ||
|
|
adb419ed67 | ||
|
|
46e415b140 | ||
|
|
cd5a59b9cf | ||
|
|
69a97ca7b9 | ||
|
|
b55c586fac | ||
|
|
056917d616 | ||
|
|
718efcec6f | ||
|
|
f9d67bb5e8 | ||
|
|
76bb74fcd4 | ||
|
|
0a54c98b9d | ||
|
|
bec54ae366 | ||
|
|
63d7bad8a5 | ||
|
|
ab1630f9fa | ||
|
|
b824fa70eb | ||
|
|
91481a3e4e | ||
|
|
dc6ac9eab0 | ||
|
|
f583674109 | ||
|
|
77fe70019f | ||
|
|
03a2bf2602 | ||
|
|
69edc5bbe7 | ||
|
|
7039770165 | ||
|
|
641767f846 | ||
|
|
af6e2253a2 | ||
|
|
5952e586ce | ||
|
|
f10408aae8 | ||
|
|
d70ae3ab43 | ||
|
|
1391fc46d2 | ||
|
|
11a43e8116 | ||
|
|
817fe9865c | ||
|
|
f4b82d7bc4 | ||
|
|
61526480f9 | ||
|
|
81daf6bc38 | ||
|
|
a38aa56e76 | ||
|
|
729e925174 | ||
|
|
498ac98581 | ||
|
|
cd9ea45463 | ||
|
|
f9c5023e04 | ||
|
|
4abc375a91 | ||
|
|
874df65491 | ||
|
|
1f4b61f572 | ||
|
|
282230c303 | ||
|
|
cce574c3e0 | ||
|
|
877023e1e1 | ||
|
|
265142edd5 | ||
|
|
885a3c4350 | ||
|
|
4b512f84dd | ||
|
|
72d3e7c9b4 | ||
|
|
bdc73a49e0 | ||
|
|
1249ee1fd0 | ||
|
|
42df9efa0c | ||
|
|
82124729af | ||
|
|
29416cb5a3 | ||
|
|
48b9b94f7f | ||
|
|
86a824c97f | ||
|
|
808410c2c7 | ||
|
|
eaf20f0e7a | ||
|
|
fcd814a8d2 | ||
|
|
dc4d3bccd5 | ||
|
|
c7143c1019 | ||
|
|
04873bb174 | ||
|
|
c8ef9fb220 | ||
|
|
5be61f4b47 | ||
|
|
3d155cff83 | ||
|
|
7d47f0a82d | ||
|
|
a529c71a74 | ||
|
|
ea1716ce2a | ||
|
|
0f24b39ebf | ||
|
|
89b60dab8a | ||
|
|
58dd7e4501 | ||
|
|
36b844af88 | ||
|
|
e882b239aa | ||
|
|
3f7bb87a2a | ||
|
|
e908ac2a51 | ||
|
|
8533aca964 | ||
|
|
16494cb7c4 | ||
|
|
b56b34a75c | ||
|
|
21eda8b577 | ||
|
|
24288803b3 | ||
|
|
f0d834b824 | ||
|
|
63bbd7b0d7 | ||
|
|
b111829226 | ||
|
|
010d59bfee | ||
|
|
83b5c6b92d | ||
|
|
bbfdd6c0fe | ||
|
|
cda81cfae0 | ||
|
|
32b0f1168e | ||
|
|
b495e54310 | ||
|
|
d5e6940253 | ||
|
|
24e697eadb | ||
|
|
3e9fd6359d | ||
|
|
43a4572038 | ||
|
|
256eb588bb | ||
|
|
a034e65512 | ||
|
|
8c3386be87 | ||
|
|
3e601bd419 | ||
|
|
478d3c4569 | ||
|
|
3afceb6c2a | ||
|
|
7af8b21dbb | ||
|
|
1e3ada6db4 | ||
|
|
2777a7f506 | ||
|
|
b70fd23836 | ||
|
|
def0385caa | ||
|
|
29dc72889f | ||
|
|
b815a04c87 | ||
|
|
dbc9a060ef | ||
|
|
00401489c2 | ||
|
|
1a7925b3a3 | ||
|
|
406f835f00 | ||
|
|
621dedb37b | ||
|
|
b731e8246f | ||
|
|
ecc31b743f | ||
|
|
5d89d6b143 | ||
|
|
67432b23c2 | ||
|
|
21c0f2af7b | ||
|
|
ad2c386d6a | ||
|
|
be66f5d5c2 | ||
|
|
c2ffef8156 | ||
|
|
e7455f500c | ||
|
|
3eafcfa650 | ||
|
|
8d99dba86b | ||
|
|
1650311246 | ||
|
|
cf5d48e833 | ||
|
|
191677b902 | ||
|
|
31ed19e8b9 | ||
|
|
e1574fa2b4 | ||
|
|
68eb3146ce | ||
|
|
0afaae4b23 | ||
|
|
ae1d1f74f7 | ||
|
|
94cd946b96 | ||
|
|
ed01f4932a | ||
|
|
1aa840a0a2 | ||
|
|
802f0dbde1 | ||
|
|
20d1aad13f | ||
|
|
d11554c88f | ||
|
|
ed704185ab | ||
|
|
2940798ea7 | ||
|
|
e6c0e39492 | ||
|
|
453bfa7e71 | ||
|
|
23229011db |
192
.drone.yml
Normal file
192
.drone.yml
Normal file
@@ -0,0 +1,192 @@
|
||||
---
|
||||
kind: pipeline
|
||||
name: arm64_gcc_make
|
||||
|
||||
platform:
|
||||
os: linux
|
||||
arch: arm64
|
||||
|
||||
steps:
|
||||
- name: Build and Test
|
||||
image: ubuntu:18.04
|
||||
environment:
|
||||
CC: gcc
|
||||
COMMON_FLAGS: 'DYNAMIC_ARCH=1 TARGET=ARMV8 NUM_THREADS=32'
|
||||
commands:
|
||||
- echo "MAKE_FLAGS:= $COMMON_FLAGS"
|
||||
- apt-get update -y
|
||||
- apt-get install -y make $CC gfortran perl
|
||||
- $CC --version
|
||||
- make QUIET_MAKE=1 $COMMON_FLAGS
|
||||
- make -C test $COMMON_FLAGS
|
||||
- make -C ctest $COMMON_FLAGS
|
||||
- make -C utest $COMMON_FLAGS
|
||||
|
||||
---
|
||||
kind: pipeline
|
||||
name: arm32_gcc_make
|
||||
|
||||
platform:
|
||||
os: linux
|
||||
arch: arm
|
||||
|
||||
steps:
|
||||
- name: Build and Test
|
||||
image: ubuntu:18.04
|
||||
environment:
|
||||
CC: gcc
|
||||
COMMON_FLAGS: 'DYNAMIC_ARCH=1 TARGET=ARMV6 NUM_THREADS=32'
|
||||
commands:
|
||||
- echo "MAKE_FLAGS:= $COMMON_FLAGS"
|
||||
- apt-get update -y
|
||||
- apt-get install -y make $CC gfortran perl
|
||||
- $CC --version
|
||||
- make QUIET_MAKE=1 $COMMON_FLAGS
|
||||
- make -C test $COMMON_FLAGS
|
||||
- make -C ctest $COMMON_FLAGS
|
||||
- make -C utest $COMMON_FLAGS
|
||||
|
||||
---
|
||||
kind: pipeline
|
||||
name: arm64_clang_make
|
||||
|
||||
platform:
|
||||
os: linux
|
||||
arch: arm64
|
||||
|
||||
steps:
|
||||
- name: Build and Test
|
||||
image: ubuntu:18.04
|
||||
environment:
|
||||
CC: clang
|
||||
COMMON_FLAGS: 'DYNAMIC_ARCH=1 TARGET=ARMV8 NUM_THREADS=32'
|
||||
commands:
|
||||
- echo "MAKE_FLAGS:= $COMMON_FLAGS"
|
||||
- apt-get update -y
|
||||
- apt-get install -y make $CC gfortran perl
|
||||
- $CC --version
|
||||
- make QUIET_MAKE=1 $COMMON_FLAGS
|
||||
- make -C test $COMMON_FLAGS
|
||||
- make -C ctest $COMMON_FLAGS
|
||||
- make -C utest $COMMON_FLAGS
|
||||
|
||||
---
|
||||
kind: pipeline
|
||||
name: arm32_clang_cmake
|
||||
|
||||
platform:
|
||||
os: linux
|
||||
arch: arm
|
||||
|
||||
steps:
|
||||
- name: Build and Test
|
||||
image: ubuntu:18.04
|
||||
environment:
|
||||
CC: clang
|
||||
CMAKE_FLAGS: '-DDYNAMIC_ARCH=1 -DTARGET=ARMV6 -DNUM_THREADS=32 -DNOFORTRAN=ON -DBUILD_WITHOUT_LAPACK=ON'
|
||||
commands:
|
||||
- echo "CMAKE_FLAGS:= $CMAKE_FLAGS"
|
||||
- apt-get update -y
|
||||
- apt-get install -y make $CC g++ perl cmake
|
||||
- $CC --version
|
||||
- mkdir build && cd build
|
||||
- cmake $CMAKE_FLAGS ..
|
||||
- make -j
|
||||
- ctest -V
|
||||
|
||||
---
|
||||
kind: pipeline
|
||||
name: arm64_gcc_cmake
|
||||
|
||||
platform:
|
||||
os: linux
|
||||
arch: arm64
|
||||
|
||||
steps:
|
||||
- name: Build and Test
|
||||
image: ubuntu:18.04
|
||||
environment:
|
||||
CC: gcc
|
||||
CMAKE_FLAGS: '-DDYNAMIC_ARCH=1 -DTARGET=ARMV8 -DNUM_THREADS=32 -DNOFORTRAN=ON -DBUILD_WITHOUT_LAPACK=ON'
|
||||
commands:
|
||||
- echo "CMAKE_FLAGS:= $CMAKE_FLAGS"
|
||||
- apt-get update -y
|
||||
- apt-get install -y make $CC g++ perl cmake
|
||||
- $CC --version
|
||||
- mkdir build && cd build
|
||||
- cmake $CMAKE_FLAGS ..
|
||||
- make -j
|
||||
- ctest -V
|
||||
|
||||
---
|
||||
kind: pipeline
|
||||
name: arm64_clang_cmake
|
||||
|
||||
platform:
|
||||
os: linux
|
||||
arch: arm64
|
||||
|
||||
steps:
|
||||
- name: Build and Test
|
||||
image: ubuntu:18.04
|
||||
environment:
|
||||
CC: clang
|
||||
CMAKE_FLAGS: '-DDYNAMIC_ARCH=1 -DTARGET=ARMV8 -DNUM_THREADS=32 -DNOFORTRAN=ON -DBUILD_WITHOUT_LAPACK=ON'
|
||||
commands:
|
||||
- echo "CMAKE_FLAGS:= $CMAKE_FLAGS"
|
||||
- apt-get update -y
|
||||
- apt-get install -y make $CC g++ perl cmake
|
||||
- $CC --version
|
||||
- mkdir build && cd build
|
||||
- cmake $CMAKE_FLAGS ..
|
||||
- make -j
|
||||
- ctest -V
|
||||
|
||||
---
|
||||
kind: pipeline
|
||||
name: arm64_native_test
|
||||
|
||||
platform:
|
||||
os: linux
|
||||
arch: arm64
|
||||
|
||||
steps:
|
||||
- name: Build and Test
|
||||
image: ubuntu:18.04
|
||||
environment:
|
||||
CC: gcc
|
||||
COMMON_FLAGS: 'USE_OPENMP=1'
|
||||
commands:
|
||||
- echo "MAKE_FLAGS:= $COMMON_FLAGS"
|
||||
- apt-get update -y
|
||||
- apt-get install -y make $CC gfortran perl python g++
|
||||
- $CC --version
|
||||
- make QUIET_MAKE=1 $COMMON_FLAGS
|
||||
- make -C test $COMMON_FLAGS
|
||||
- make -C ctest $COMMON_FLAGS
|
||||
- make -C utest $COMMON_FLAGS
|
||||
- make -C cpp_thread_test dgemm_tester
|
||||
---
|
||||
kind: pipeline
|
||||
name: epyc_native_test
|
||||
|
||||
platform:
|
||||
os: linux
|
||||
arch: amd64
|
||||
|
||||
steps:
|
||||
- name: Build and Test
|
||||
image: ubuntu:18.04
|
||||
environment:
|
||||
CC: gcc
|
||||
COMMON_FLAGS: 'USE_OPENMP=1'
|
||||
commands:
|
||||
- echo "MAKE_FLAGS:= $COMMON_FLAGS"
|
||||
- apt-get update -y
|
||||
- apt-get install -y make $CC gfortran perl python g++
|
||||
- $CC --version
|
||||
- make QUIET_MAKE=1 $COMMON_FLAGS
|
||||
- make -C test $COMMON_FLAGS
|
||||
- make -C ctest $COMMON_FLAGS
|
||||
- make -C utest $COMMON_FLAGS
|
||||
- make -C cpp_thread_test dgemm_tester
|
||||
103
.github/workflows/dynamic_arch.yml
vendored
Normal file
103
.github/workflows/dynamic_arch.yml
vendored
Normal file
@@ -0,0 +1,103 @@
|
||||
name: continuous build
|
||||
|
||||
on: [push, pull_request]
|
||||
|
||||
jobs:
|
||||
build:
|
||||
runs-on: ${{ matrix.os }}
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
os: [ubuntu-latest, macos-latest]
|
||||
fortran: [gfortran, flang]
|
||||
build: [cmake, make]
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v2
|
||||
|
||||
- name: Compilation cache
|
||||
uses: actions/cache@v2
|
||||
with:
|
||||
path: ~/.ccache
|
||||
# We include the commit sha in the cache key, as new cache entries are
|
||||
# only created if there is no existing entry for the key yet.
|
||||
key: ${{ runner.os }}-ccache-${{ github.sha }}
|
||||
# Restore any ccache cache entry, if none for
|
||||
# ${{ runner.os }}-ccache-${{ github.sha }} exists
|
||||
restore-keys: |
|
||||
${{ runner.os }}-ccache-
|
||||
|
||||
- name: Print system information
|
||||
run: |
|
||||
if [ "$RUNNER_OS" == "Linux" ]; then
|
||||
cat /proc/cpuinfo
|
||||
elif [ "$RUNNER_OS" == "macOS" ]; then
|
||||
sysctl -a | grep machdep.cpu
|
||||
else
|
||||
echo "$RUNNER_OS not supported"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
- name: Install Dependencies
|
||||
run: |
|
||||
if [ "$RUNNER_OS" == "Linux" ]; then
|
||||
sudo apt-get install -y gfortran cmake ccache
|
||||
elif [ "$RUNNER_OS" == "macOS" ]; then
|
||||
brew install coreutils cmake ccache
|
||||
else
|
||||
echo "$RUNNER_OS not supported"
|
||||
exit 1
|
||||
fi
|
||||
ccache -M 300M # Limit the ccache size; Github's overall cache limit is 5GB
|
||||
|
||||
- name: gfortran build
|
||||
if: matrix.build == 'make' && matrix.fortran == 'gfortran'
|
||||
run: |
|
||||
if [ "$RUNNER_OS" == "Linux" ]; then
|
||||
export PATH="/usr/lib/ccache:${PATH}"
|
||||
elif [ "$RUNNER_OS" == "macOS" ]; then
|
||||
export PATH="$(brew --prefix)/opt/ccache/libexec:${PATH}"
|
||||
else
|
||||
echo "$RUNNER_OS not supported"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
make -j$(nproc) DYNAMIC_ARCH=1 USE_OPENMP=0
|
||||
|
||||
- name: flang build
|
||||
if: matrix.build == 'make' && matrix.fortran == 'flang'
|
||||
run: |
|
||||
if [ "$RUNNER_OS" == "Linux" ]; then
|
||||
export PATH="/usr/lib/ccache:${PATH}"
|
||||
elif [ "$RUNNER_OS" == "macOS" ]; then
|
||||
exit 0
|
||||
else
|
||||
echo "$RUNNER_OS not supported"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
cd /usr/
|
||||
sudo wget -nv https://github.com/flang-compiler/flang/releases/download/flang_20190329/flang-20190329-x86-70.tgz
|
||||
sudo tar xf flang-20190329-x86-70.tgz
|
||||
sudo rm flang-20190329-x86-70.tgz
|
||||
cd -
|
||||
|
||||
make -j$(nproc) DYNAMIC_ARCH=1 USE_OPENMP=0 FC=flang
|
||||
|
||||
|
||||
- name: CMake gfortran build
|
||||
if: matrix.build == 'cmake' && matrix.fortran == 'gfortran'
|
||||
run: |
|
||||
if [ "$RUNNER_OS" == "Linux" ]; then
|
||||
export PATH="/usr/lib/ccache:${PATH}"
|
||||
elif [ "$RUNNER_OS" == "macOS" ]; then
|
||||
export PATH="$(brew --prefix)/opt/ccache/libexec:${PATH}"
|
||||
else
|
||||
echo "$RUNNER_OS not supported"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
mkdir build
|
||||
cd build
|
||||
cmake -DDYNAMIC_ARCH=1 -DNOFORTRAN=0 -DBUILD_WITHOUT_LAPACK=0 -DCMAKE_VERBOSE_MAKEFILE=ON -DCMAKE_BUILD_TYPE=Release ..
|
||||
make -j$(nproc)
|
||||
79
.github/workflows/nightly-Homebrew-build.yml
vendored
Normal file
79
.github/workflows/nightly-Homebrew-build.yml
vendored
Normal file
@@ -0,0 +1,79 @@
|
||||
# Only the "head" branch of the OpenBLAS package is tested
|
||||
|
||||
on:
|
||||
push:
|
||||
paths:
|
||||
- '**/nightly-Homebrew-build.yml'
|
||||
pull_request:
|
||||
branches:
|
||||
- develop
|
||||
paths:
|
||||
- '**/nightly-Homebrew-build.yml'
|
||||
schedule:
|
||||
- cron: 45 7 * * *
|
||||
# This is 7:45 AM UTC daily, late at night in the USA
|
||||
|
||||
# Since push and pull_request will still always be building and testing the `develop` branch,
|
||||
# it only makes sense to test if this file has been changed
|
||||
|
||||
name: Nightly-Homebrew-Build
|
||||
jobs:
|
||||
build-OpenBLAS-with-Homebrew:
|
||||
runs-on: macos-latest
|
||||
env:
|
||||
DEVELOPER_DIR: /Applications/Xcode_11.4.1.app/Contents/Developer
|
||||
HOMEBREW_DEVELOPER: "ON"
|
||||
HOMEBREW_DISPLAY_INSTALL_TIMES: "ON"
|
||||
HOMEBREW_NO_ANALYTICS: "ON"
|
||||
HOMEBREW_NO_AUTO_UPDATE: "ON"
|
||||
HOMEBREW_NO_BOTTLE_SOURCE_FALLBACK: "ON"
|
||||
HOMEBREW_NO_INSTALL_CLEANUP: "ON"
|
||||
|
||||
steps:
|
||||
- name: Random delay for cron job
|
||||
run: |
|
||||
delay=$(( RANDOM % 600 ))
|
||||
printf 'Delaying for %s seconds on event %s' ${delay} "${{ github.event_name }}"
|
||||
sleep ${delay}
|
||||
if: github.event_name == 'schedule'
|
||||
|
||||
- uses: actions/checkout@v2
|
||||
# This isn't even needed, technically. Homebrew will get `develop` via git
|
||||
|
||||
- name: Update Homebrew
|
||||
if: github.event_name != 'pull_request'
|
||||
run: brew update || true
|
||||
|
||||
- name: Install prerequisites
|
||||
run: brew install --fetch-HEAD --HEAD --only-dependencies --keep-tmp openblas
|
||||
|
||||
- name: Install and bottle OpenBLAS
|
||||
run: brew install --fetch-HEAD --HEAD --build-bottle --keep-tmp openblas
|
||||
# the HEAD flags tell Homebrew to build the develop branch fetch via git
|
||||
|
||||
- name: Create bottle
|
||||
run: |
|
||||
brew bottle -v openblas
|
||||
mkdir bottles
|
||||
mv *.bottle.tar.gz bottles
|
||||
|
||||
- name: Upload bottle
|
||||
uses: actions/upload-artifact@v1
|
||||
with:
|
||||
name: openblas--HEAD.catalina.bottle.tar.gz
|
||||
path: bottles
|
||||
|
||||
- name: Show linkage
|
||||
run: brew linkage -v openblas
|
||||
|
||||
- name: Test openblas
|
||||
run: brew test --HEAD --verbose openblas
|
||||
|
||||
- name: Audit openblas formula
|
||||
run: |
|
||||
brew audit --strict openblas
|
||||
brew cat openblas
|
||||
|
||||
- name: Post logs on failure
|
||||
if: failure()
|
||||
run: brew gist-logs --with-hostname -v openblas
|
||||
5
.gitignore
vendored
5
.gitignore
vendored
@@ -70,6 +70,7 @@ test/SBLAT2.SUMM
|
||||
test/SBLAT3.SUMM
|
||||
test/ZBLAT2.SUMM
|
||||
test/ZBLAT3.SUMM
|
||||
test/SHBLAT3.SUMM
|
||||
test/cblat1
|
||||
test/cblat2
|
||||
test/cblat3
|
||||
@@ -79,6 +80,7 @@ test/dblat3
|
||||
test/sblat1
|
||||
test/sblat2
|
||||
test/sblat3
|
||||
test/test_shgemm
|
||||
test/zblat1
|
||||
test/zblat2
|
||||
test/zblat3
|
||||
@@ -87,4 +89,5 @@ build.*
|
||||
*.swp
|
||||
benchmark/*.goto
|
||||
benchmark/smallscaling
|
||||
|
||||
CMakeCache.txt
|
||||
CMakeFiles/*
|
||||
|
||||
99
.travis.yml
99
.travis.yml
@@ -16,8 +16,7 @@ matrix:
|
||||
before_script: &common-before
|
||||
- COMMON_FLAGS="DYNAMIC_ARCH=1 TARGET=NEHALEM NUM_THREADS=32"
|
||||
script:
|
||||
- set -e
|
||||
- travis_wait 45 make QUIET_MAKE=1 $COMMON_FLAGS $BTYPE
|
||||
- make QUIET_MAKE=1 $COMMON_FLAGS $BTYPE
|
||||
- make -C test $COMMON_FLAGS $BTYPE
|
||||
- make -C ctest $COMMON_FLAGS $BTYPE
|
||||
- make -C utest $COMMON_FLAGS $BTYPE
|
||||
@@ -25,6 +24,25 @@ matrix:
|
||||
- TARGET_BOX=LINUX64
|
||||
- BTYPE="BINARY=64"
|
||||
|
||||
- <<: *test-ubuntu
|
||||
os: linux-ppc64le
|
||||
before_script:
|
||||
- COMMON_FLAGS="DYNAMIC_ARCH=1 TARGET=POWER8 NUM_THREADS=32"
|
||||
env:
|
||||
# for matrix annotation only
|
||||
- TARGET_BOX=PPC64LE_LINUX
|
||||
- BTYPE="BINARY=64 USE_OPENMP=1"
|
||||
|
||||
- <<: *test-ubuntu
|
||||
os: linux
|
||||
arch: s390x
|
||||
before_script:
|
||||
- COMMON_FLAGS="DYNAMIC_ARCH=1 TARGET=Z13 NUM_THREADS=32"
|
||||
env:
|
||||
# for matrix annotation only
|
||||
- TARGET_BOX=IBMZ_LINUX
|
||||
- BTYPE="BINARY=64 USE_OPENMP=1"
|
||||
|
||||
- <<: *test-ubuntu
|
||||
env:
|
||||
- TARGET_BOX=LINUX64
|
||||
@@ -57,6 +75,23 @@ matrix:
|
||||
- TARGET_BOX=LINUX32
|
||||
- BTYPE="BINARY=32"
|
||||
|
||||
- os: linux
|
||||
arch: ppc64le
|
||||
dist: bionic
|
||||
compiler: gcc
|
||||
before_script:
|
||||
- sudo add-apt-repository 'ppa:ubuntu-toolchain-r/test' -y
|
||||
- sudo apt-get update
|
||||
- sudo apt-get install gcc-9 gfortran-9 -y
|
||||
script:
|
||||
- make QUIET_MAKE=1 BINARY=64 USE_OPENMP=1 CC=gcc-9 FC=gfortran-9
|
||||
- make -C test $COMMON_FLAGS $BTYPE
|
||||
- make -C ctest $COMMON_FLAGS $BTYPE
|
||||
- make -C utest $COMMON_FLAGS $BTYPE
|
||||
env:
|
||||
# for matrix annotation only
|
||||
- TARGET_BOX=PPC64LE_LINUX_P9
|
||||
|
||||
- os: linux
|
||||
compiler: gcc
|
||||
addons:
|
||||
@@ -89,7 +124,6 @@ matrix:
|
||||
- sudo sh alpine-chroot-install -p 'build-base gfortran perl linux-headers'
|
||||
before_script: *common-before
|
||||
script:
|
||||
- set -e
|
||||
# XXX: Disable some warnings for now to avoid exceeding Travis limit for log size.
|
||||
- alpine make QUIET_MAKE=1 $COMMON_FLAGS $BTYPE
|
||||
CFLAGS="-Wno-misleading-indentation -Wno-sign-conversion -Wno-incompatible-pointer-types"
|
||||
@@ -132,7 +166,6 @@ matrix:
|
||||
before_script:
|
||||
- COMMON_ARGS="-DTARGET=NEHALEM -DNUM_THREADS=32"
|
||||
script:
|
||||
- set -e
|
||||
- mkdir build
|
||||
- CONFIG=Release
|
||||
- cmake -Bbuild -H. $CMAKE_ARGS $COMMON_ARGS -DCMAKE_BUILD_TYPE=$CONFIG
|
||||
@@ -149,56 +182,34 @@ matrix:
|
||||
|
||||
- &test-macos
|
||||
os: osx
|
||||
osx_image: xcode8
|
||||
osx_image: xcode10.1
|
||||
before_script:
|
||||
- COMMON_FLAGS="DYNAMIC_ARCH=1 TARGET=NEHALEM NUM_THREADS=32"
|
||||
- COMMON_FLAGS="DYNAMIC_ARCH=1 NUM_THREADS=32"
|
||||
- brew update
|
||||
- brew install gcc # for gfortran
|
||||
- brew install gcc@8 # for gfortran
|
||||
script:
|
||||
- travis_wait 45 make QUIET_MAKE=1 $COMMON_FLAGS $BTYPE
|
||||
env:
|
||||
- BTYPE="BINARY=64 INTERFACE64=1"
|
||||
- BTYPE="TARGET=NEHALEM BINARY=64 INTERFACE64=1 FC=gfortran-8"
|
||||
|
||||
- <<: *test-macos
|
||||
osx_image: xcode10.0
|
||||
env:
|
||||
- BTYPE="BINARY=32"
|
||||
- BTYPE="TARGET=NEHALEM BINARY=32 NOFORTRAN=1"
|
||||
|
||||
- &emulated-arm
|
||||
dist: trusty
|
||||
sudo: required
|
||||
services: docker
|
||||
env: IMAGE_ARCH=arm32 TARGET_ARCH=ARMV6 COMPILER=gcc
|
||||
name: "Emulated Build for ARMV6 with gcc"
|
||||
before_install: sudo docker run --rm --privileged multiarch/qemu-user-static:register --reset
|
||||
script: |
|
||||
echo "FROM openblas/alpine:${IMAGE_ARCH}
|
||||
COPY . /tmp/openblas
|
||||
RUN mkdir /tmp/openblas/build && \
|
||||
cd /tmp/openblas/build && \
|
||||
CC=${COMPILER} cmake -D DYNAMIC_ARCH=OFF \
|
||||
-D TARGET=${TARGET_ARCH} \
|
||||
-D BUILD_SHARED_LIBS=ON \
|
||||
-D BUILD_WITHOUT_LAPACK=ON \
|
||||
-D BUILD_WITHOUT_CBLAS=ON \
|
||||
-D CMAKE_BUILD_TYPE=Release ../ && \
|
||||
cmake --build ." > Dockerfile
|
||||
docker build .
|
||||
- <<: *emulated-arm
|
||||
env: IMAGE_ARCH=arm32 TARGET_ARCH=ARMV6 COMPILER=clang
|
||||
name: "Emulated Build for ARMV6 with clang"
|
||||
- <<: *emulated-arm
|
||||
env: IMAGE_ARCH=arm64 TARGET_ARCH=ARMV8 COMPILER=gcc
|
||||
name: "Emulated Build for ARMV8 with gcc"
|
||||
- <<: *emulated-arm
|
||||
env: IMAGE_ARCH=arm64 TARGET_ARCH=ARMV8 COMPILER=clang
|
||||
name: "Emulated Build for ARMV8 with clang"
|
||||
|
||||
allow_failures:
|
||||
- env: IMAGE_ARCH=arm32 TARGET_ARCH=ARMV6 COMPILER=gcc
|
||||
- env: IMAGE_ARCH=arm32 TARGET_ARCH=ARMV6 COMPILER=clang
|
||||
- env: IMAGE_ARCH=arm64 TARGET_ARCH=ARMV8 COMPILER=gcc
|
||||
- env: IMAGE_ARCH=arm64 TARGET_ARCH=ARMV8 COMPILER=clang
|
||||
- <<: *test-macos
|
||||
osx_image: xcode10.1
|
||||
env:
|
||||
- CC="/Applications/Xcode-10.1.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/clang"
|
||||
- CFLAGS="-O2 -Wno-macro-redefined -isysroot /Applications/Xcode-10.1.app/Contents/Developer/Platforms/iPhoneOS.platform/Developer/SDKs/iPhoneOS12.1.sdk -arch arm64 -miphoneos-version-min=10.0"
|
||||
- BTYPE="TARGET=ARMV8 BINARY=64 HOSTCC=clang NOFORTRAN=1"
|
||||
|
||||
- <<: *test-macos
|
||||
osx_image: xcode10.1
|
||||
env:
|
||||
- CC="/Applications/Xcode-10.1.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/clang"
|
||||
- CFLAGS="-O2 -mno-thumb -Wno-macro-redefined -isysroot /Applications/Xcode-10.1.app/Contents/Developer/Platforms/iPhoneOS.platform/Developer/SDKs/iPhoneOS12.1.sdk -arch armv7 -miphoneos-version-min=5.1"
|
||||
- BTYPE="TARGET=ARMV7 HOSTCC=clang NOFORTRAN=1"
|
||||
# whitelist
|
||||
branches:
|
||||
only:
|
||||
|
||||
@@ -6,7 +6,7 @@ cmake_minimum_required(VERSION 2.8.5)
|
||||
project(OpenBLAS C ASM)
|
||||
set(OpenBLAS_MAJOR_VERSION 0)
|
||||
set(OpenBLAS_MINOR_VERSION 3)
|
||||
set(OpenBLAS_PATCH_VERSION 5)
|
||||
set(OpenBLAS_PATCH_VERSION 10.dev)
|
||||
set(OpenBLAS_VERSION "${OpenBLAS_MAJOR_VERSION}.${OpenBLAS_MINOR_VERSION}.${OpenBLAS_PATCH_VERSION}")
|
||||
|
||||
# Adhere to GNU filesystem layout conventions
|
||||
@@ -20,9 +20,15 @@ if(MSVC)
|
||||
option(BUILD_WITHOUT_LAPACK "Do not build LAPACK and LAPACKE (Only BLAS or CBLAS)" ON)
|
||||
endif()
|
||||
option(BUILD_WITHOUT_CBLAS "Do not build the C interface (CBLAS) to the BLAS functions" OFF)
|
||||
option(DYNAMIC_ARCH "Include support for multiple CPU targets, with automatic selection at runtime (x86/x86_64 only)" OFF)
|
||||
option(DYNAMIC_OLDER "Include specific support for older cpu models (Penryn,Dunnington,Atom,Nano,Opteron) with DYNAMIC_ARCH" OFF)
|
||||
option(DYNAMIC_ARCH "Include support for multiple CPU targets, with automatic selection at runtime (x86/x86_64, aarch64 or ppc only)" OFF)
|
||||
option(DYNAMIC_OLDER "Include specific support for older x86 cpu models (Penryn,Dunnington,Atom,Nano,Opteron) with DYNAMIC_ARCH" OFF)
|
||||
option(BUILD_RELAPACK "Build with ReLAPACK (recursive implementation of several LAPACK functions on top of standard LAPACK)" OFF)
|
||||
option(USE_LOCKING "Use locks even in single-threaded builds to make them callable from multiple threads" OFF)
|
||||
if(${CMAKE_SYSTEM_NAME} MATCHES "Linux")
|
||||
option(NO_AFFINITY "Disable support for CPU affinity masks to avoid binding processes from e.g. R or numpy/scipy to a single core" ON)
|
||||
else()
|
||||
set(NO_AFFINITY 1)
|
||||
endif()
|
||||
|
||||
# Add a prefix or suffix to all exported symbol names in the shared library.
|
||||
# Avoids conflicts with other BLAS libraries, especially when using
|
||||
@@ -42,6 +48,19 @@ endif()
|
||||
|
||||
#######
|
||||
|
||||
if(MSVC AND MSVC_STATIC_CRT)
|
||||
set(CompilerFlags
|
||||
CMAKE_CXX_FLAGS
|
||||
CMAKE_CXX_FLAGS_DEBUG
|
||||
CMAKE_CXX_FLAGS_RELEASE
|
||||
CMAKE_C_FLAGS
|
||||
CMAKE_C_FLAGS_DEBUG
|
||||
CMAKE_C_FLAGS_RELEASE
|
||||
)
|
||||
foreach(CompilerFlag ${CompilerFlags})
|
||||
string(REPLACE "/MD" "/MT" ${CompilerFlag} "${${CompilerFlag}}")
|
||||
endforeach()
|
||||
endif()
|
||||
|
||||
message(WARNING "CMake support is experimental. It does not yet support all build options and may not produce the same Makefiles that OpenBLAS ships with.")
|
||||
|
||||
@@ -62,15 +81,19 @@ endif ()
|
||||
|
||||
set(SUBDIRS ${BLASDIRS})
|
||||
if (NOT NO_LAPACK)
|
||||
list(APPEND SUBDIRS lapack)
|
||||
if(BUILD_RELAPACK)
|
||||
list(APPEND SUBDIRS relapack/src)
|
||||
endif()
|
||||
list(APPEND SUBDIRS lapack)
|
||||
endif ()
|
||||
|
||||
if (NOT DEFINED BUILD_HALF)
|
||||
set (BUILD_HALF false)
|
||||
endif ()
|
||||
# set which float types we want to build for
|
||||
if (NOT DEFINED BUILD_SINGLE AND NOT DEFINED BUILD_DOUBLE AND NOT DEFINED BUILD_COMPLEX AND NOT DEFINED BUILD_COMPLEX16)
|
||||
# if none are defined, build for all
|
||||
# set(BUILD_HALF true)
|
||||
set(BUILD_SINGLE true)
|
||||
set(BUILD_DOUBLE true)
|
||||
set(BUILD_COMPLEX true)
|
||||
@@ -102,6 +125,11 @@ if (BUILD_COMPLEX16)
|
||||
list(APPEND FLOAT_TYPES "ZCOMPLEX") # defines COMPLEX and DOUBLE
|
||||
endif ()
|
||||
|
||||
if (BUILD_HALF)
|
||||
message(STATUS "Building Half Precision")
|
||||
list(APPEND FLOAT_TYPES "HALF") # defines nothing
|
||||
endif ()
|
||||
|
||||
if (NOT DEFINED CORE OR "${CORE}" STREQUAL "UNKNOWN")
|
||||
message(FATAL_ERROR "Detecting CPU failed. Please set TARGET explicitly, e.g. make TARGET=your_cpu_target. Please read README for details.")
|
||||
endif ()
|
||||
@@ -134,7 +162,7 @@ endif ()
|
||||
|
||||
# Only generate .def for dll on MSVC and always produce pdb files for debug and release
|
||||
if(MSVC)
|
||||
if (${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION} LESS 3.4)
|
||||
if (${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION} VERSION_LESS 3.4)
|
||||
set(OpenBLAS_DEF_FILE "${PROJECT_BINARY_DIR}/openblas.def")
|
||||
endif()
|
||||
set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} /Zi")
|
||||
@@ -149,15 +177,9 @@ if (${DYNAMIC_ARCH})
|
||||
endforeach()
|
||||
endif ()
|
||||
|
||||
# Only build shared libs for MSVC
|
||||
if (MSVC)
|
||||
set(BUILD_SHARED_LIBS ON)
|
||||
endif()
|
||||
|
||||
|
||||
# add objects to the openblas lib
|
||||
add_library(${OpenBLAS_LIBNAME} ${LA_SOURCES} ${LAPACKE_SOURCES} ${RELA_SOURCES} ${TARGET_OBJS} ${OpenBLAS_DEF_FILE})
|
||||
target_include_directories(${OpenBLAS_LIBNAME} INTERFACE $<INSTALL_INTERFACE:include>)
|
||||
target_include_directories(${OpenBLAS_LIBNAME} INTERFACE $<INSTALL_INTERFACE:include/openblas${SUFFIX64}>)
|
||||
|
||||
# Android needs to explicitly link against libm
|
||||
if(ANDROID)
|
||||
@@ -166,7 +188,7 @@ endif()
|
||||
|
||||
# Handle MSVC exports
|
||||
if(MSVC AND BUILD_SHARED_LIBS)
|
||||
if (${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION} LESS 3.4)
|
||||
if (${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION} VERSION_LESS 3.4)
|
||||
include("${PROJECT_SOURCE_DIR}/cmake/export.cmake")
|
||||
else()
|
||||
# Creates verbose .def file (51KB vs 18KB)
|
||||
@@ -199,7 +221,8 @@ if (USE_THREAD)
|
||||
target_link_libraries(${OpenBLAS_LIBNAME} ${CMAKE_THREAD_LIBS_INIT})
|
||||
endif()
|
||||
|
||||
if (MSVC OR NOT NOFORTRAN)
|
||||
#if (MSVC OR NOT NOFORTRAN)
|
||||
if (NOT NO_CBLAS)
|
||||
# Broken without fortran on unix
|
||||
add_subdirectory(utest)
|
||||
endif()
|
||||
@@ -210,6 +233,7 @@ if (NOT MSVC AND NOT NOFORTRAN)
|
||||
if(NOT NO_CBLAS)
|
||||
add_subdirectory(ctest)
|
||||
endif()
|
||||
add_subdirectory(lapack-netlib/TESTING)
|
||||
endif()
|
||||
|
||||
set_target_properties(${OpenBLAS_LIBNAME} PROPERTIES
|
||||
@@ -217,7 +241,15 @@ set_target_properties(${OpenBLAS_LIBNAME} PROPERTIES
|
||||
SOVERSION ${OpenBLAS_MAJOR_VERSION}
|
||||
)
|
||||
|
||||
if (BUILD_SHARED_LIBS AND NOT ${SYMBOLPREFIX}${SYMBOLSUFIX} STREQUAL "")
|
||||
if (BUILD_SHARED_LIBS AND BUILD_RELAPACK)
|
||||
if (NOT MSVC)
|
||||
target_link_libraries(${OpenBLAS_LIBNAME} "-Wl,-allow-multiple-definition")
|
||||
else()
|
||||
set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} /FORCE:MULTIPLE")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if (BUILD_SHARED_LIBS AND NOT ${SYMBOLPREFIX}${SYMBOLSUFFIX} STREQUAL "")
|
||||
if (NOT DEFINED ARCH)
|
||||
set(ARCH_IN "x86_64")
|
||||
else()
|
||||
@@ -314,7 +346,7 @@ install (FILES ${OPENBLAS_CONFIG_H} DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
|
||||
if(NOT NOFORTRAN)
|
||||
message(STATUS "Generating f77blas.h in ${CMAKE_INSTALL_INCLUDEDIR}")
|
||||
|
||||
set(F77BLAS_H ${CMAKE_BINARY_DIR}/f77blas.h)
|
||||
set(F77BLAS_H ${CMAKE_BINARY_DIR}/generated/f77blas.h)
|
||||
file(WRITE ${F77BLAS_H} "#ifndef OPENBLAS_F77BLAS_H\n")
|
||||
file(APPEND ${F77BLAS_H} "#define OPENBLAS_F77BLAS_H\n")
|
||||
file(APPEND ${F77BLAS_H} "#include \"openblas_config.h\"\n")
|
||||
@@ -326,11 +358,23 @@ endif()
|
||||
|
||||
if(NOT NO_CBLAS)
|
||||
message (STATUS "Generating cblas.h in ${CMAKE_INSTALL_INCLUDEDIR}")
|
||||
|
||||
set(CBLAS_H ${CMAKE_BINARY_DIR}/generated/cblas.h)
|
||||
file(READ ${CMAKE_CURRENT_SOURCE_DIR}/cblas.h CBLAS_H_CONTENTS)
|
||||
string(REPLACE "common" "openblas_config" CBLAS_H_CONTENTS_NEW "${CBLAS_H_CONTENTS}")
|
||||
file(WRITE ${CMAKE_BINARY_DIR}/cblas.tmp "${CBLAS_H_CONTENTS_NEW}")
|
||||
install (FILES ${CMAKE_BINARY_DIR}/cblas.tmp DESTINATION ${CMAKE_INSTALL_INCLUDEDIR} RENAME cblas.h)
|
||||
if (NOT ${SYMBOLPREFIX} STREQUAL "")
|
||||
string(REPLACE " cblas" " ${SYMBOLPREFIX}cblas" CBLAS_H_CONTENTS "${CBLAS_H_CONTENTS_NEW}")
|
||||
string(REPLACE " openblas" " ${SYMBOLPREFIX}openblas" CBLAS_H_CONTENTS_NEW "${CBLAS_H_CONTENTS}")
|
||||
string (REPLACE " ${SYMBOLPREFIX}openblas_complex" " openblas_complex" CBLAS_H_CONTENTS "${CBLAS_H_CONTENTS_NEW}")
|
||||
string(REPLACE " goto" " ${SYMBOLPREFIX}goto" CBLAS_H_CONTENTS_NEW "${CBLAS_H_CONTENTS}")
|
||||
endif()
|
||||
if (NOT ${SYMBOLSUFFIX} STREQUAL "")
|
||||
string(REGEX REPLACE "(cblas[^ (]*)" "\\1${SYMBOLSUFFIX}" CBLAS_H_CONTENTS "${CBLAS_H_CONTENTS_NEW}")
|
||||
string(REGEX REPLACE "(openblas[^ (]*)" "\\1${SYMBOLSUFFIX}" CBLAS_H_CONTENTS_NEW "${CBLAS_H_CONTENTS}")
|
||||
string(REGEX REPLACE "(openblas_complex[^ ]*)${SYMBOLSUFFIX}" "\\1" CBLAS_H_CONTENTS "${CBLAS_H_CONTENTS_NEW}")
|
||||
string(REGEX REPLACE "(goto[^ (]*)" "\\1${SYMBOLSUFFIX}" CBLAS_H_CONTENTS_NEW "${CBLAS_H_CONTENTS}")
|
||||
endif()
|
||||
file(WRITE ${CBLAS_H} "${CBLAS_H_CONTENTS_NEW}")
|
||||
install (FILES ${CBLAS_H} DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
|
||||
endif()
|
||||
|
||||
if(NOT NO_LAPACKE)
|
||||
@@ -345,11 +389,9 @@ if(NOT NO_LAPACKE)
|
||||
install (FILES ${CMAKE_BINARY_DIR}/lapacke_mangling.h DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/openblas${SUFFIX64})
|
||||
endif()
|
||||
|
||||
include(FindPkgConfig QUIET)
|
||||
if(PKG_CONFIG_FOUND)
|
||||
configure_file(${PROJECT_SOURCE_DIR}/cmake/openblas.pc.in ${PROJECT_BINARY_DIR}/openblas${SUFFIX64}.pc @ONLY)
|
||||
install (FILES ${PROJECT_BINARY_DIR}/openblas${SUFFIX64}.pc DESTINATION ${CMAKE_INSTALL_LIBDIR}/pkgconfig/)
|
||||
endif()
|
||||
# Install pkg-config files
|
||||
configure_file(${PROJECT_SOURCE_DIR}/cmake/openblas.pc.in ${PROJECT_BINARY_DIR}/openblas${SUFFIX64}.pc @ONLY)
|
||||
install (FILES ${PROJECT_BINARY_DIR}/openblas${SUFFIX64}.pc DESTINATION ${CMAKE_INSTALL_LIBDIR}/pkgconfig/)
|
||||
|
||||
|
||||
# GNUInstallDirs "DATADIR" wrong here; CMake search path wants "share".
|
||||
|
||||
@@ -167,4 +167,26 @@ In chronological order:
|
||||
* [2017-02-26] ztrmm kernel for IBM z13
|
||||
* [2017-03-13] strmm and ctrmm kernel for IBM z13
|
||||
* [2017-09-01] initial Blas Level-1,2 (double precision) for IBM z13
|
||||
* [2018-03-07] added missing Blas Level 1-2 (double precision) simd codes
|
||||
* [2019-02-01] added missing Blas Level-1,2 (single precision) simd codes
|
||||
* [2019-03-14] power9 dgemm/dtrmm kernel
|
||||
* [2019-04-29] power9 sgemm/strmm kernel
|
||||
|
||||
* Jiachen Wang <https://github.com/wjc404>
|
||||
* [2019-07-29] optimize AVX2 DGEMM
|
||||
* [2019-10-20] AVX512 DGEMM kernel (4x8)
|
||||
* [2019-11-06] optimize AVX512 SGEMM
|
||||
* [2019-11-12] AVX512 CGEMM & ZGEMM kernels
|
||||
* [2019-12-23] optimize AVX2 CGEMM and ZGEMM
|
||||
* [2019-12-30] AVX2 CGEMM3M & ZGEMM3M kernels
|
||||
* [2020-01-07] optimize AVX2 SGEMM and STRMM
|
||||
|
||||
* Rajalakshmi Srinivasaraghavan <https://github.com/RajalakshmiSR>
|
||||
* [2020-04-15] Half-precision GEMM for bfloat16
|
||||
|
||||
* Marius Hillenbrand <https://github.com/mhillenibm>
|
||||
* [2020-05-12] Revise dynamic architecture detection for IBM z
|
||||
* [2020-05-12] Add new sgemm and strmm kernel for IBM z14
|
||||
|
||||
* Danfeng Zhang <https://github.com/craft-zhang>
|
||||
* [2020-05-20] Improve performance of SGEMM and STRMM on Arm Cortex-A53
|
||||
292
Changelog.txt
292
Changelog.txt
@@ -1,4 +1,296 @@
|
||||
OpenBLAS ChangeLog
|
||||
====================================================================
|
||||
Version 0.3.10
|
||||
14-Jun-2020
|
||||
|
||||
common:
|
||||
* Improved thread locking behaviour in blas_server and parallel getrf
|
||||
* Imported bugfix 394 from LAPACK (spurious reference to "XERBL"
|
||||
due to overlong lines)
|
||||
* Imported bugfix 403 from LAPACK (compile option "recursive" required
|
||||
for correctness with Intel and PGI)
|
||||
* Imported bugfix 408 from LAPACK (wrong scaling in ZHEEQUB)
|
||||
* Imported bugfix 411 from LAPACK (infinite loop in LARGV/LARTG/LARTGP)
|
||||
* Fixed mismatches between BUFFERSIZE and GEMM_UNROLL parameters that
|
||||
could lead to crashes at large matrix sizes
|
||||
* Restored internal soname in dynamic libraries on FreeBSD and Dragonfly
|
||||
* Added API (openblas_setaffinity) to set the thread affinity on Linux
|
||||
* Added initial infrastructure for half-precision floating point
|
||||
(bfloat16) support with a generic implementation of SHGEMM
|
||||
* Added CMAKE build system support for building the cblas_Xgemm3m
|
||||
functions
|
||||
* Fixed CMAKE support for building in a path with embedded spaces
|
||||
* Fixed CMAKE (non)handling of NO_EXPRECISION and MAX_STACK_ALLOC
|
||||
* Fixed GCC version detection in the Makefiles
|
||||
* Allowed overriding the names of AR, AS and LD in Makefile builds
|
||||
|
||||
POWER:
|
||||
* Fixed big-endian POWER8 ELFv2 builds on FreeBSD
|
||||
* Fixed GCC version checks and DYNAMIC_ARCH builds on POWER9
|
||||
* Fixed CMAKE build support for POWER9
|
||||
* fixed a potential race condition in the thread buffer allocation
|
||||
* Worked around LAPACK test failures on PPC G4
|
||||
|
||||
MIPS:
|
||||
* Fixed a potential race condition in the thread buffer allocation
|
||||
* Added support for MIPS 24K/24KE family based on P5600 kernels
|
||||
|
||||
MIPS64:
|
||||
* fixed a potential race condition in the thread buffer allocation
|
||||
* Added TARGET=GENERIC
|
||||
|
||||
ARMV7:
|
||||
* Fixed a race condition in the thread buffer allocation
|
||||
|
||||
ARMV8:
|
||||
* Fixed a race condition in the thread buffer allocation
|
||||
* Fixed zero initialisation in the assembly for SGEMM and DGEMM BETA
|
||||
* Improved performance of the ThunderX2 DAXPY kernel
|
||||
* Added an optimized SGEMM kernel for Cortex A53
|
||||
* Fixed Makefile support for INTERFACE64 (8-byte integer)
|
||||
|
||||
x86_64:
|
||||
* Fixed a syntax error in the CMAKE setup for SkylakeX
|
||||
* Improved performance of STRSM on Haswell, SkylakeX and Ryzen
|
||||
* Improved SGEMM performance on SGEMM for workloads with ldc a
|
||||
multiple of 1024
|
||||
* Improved DGEMM performance on Skylake X
|
||||
* Fixed unwanted AVX512-dependency of SGEMM in DYNAMIC_ARCH
|
||||
builds created on SkylakeX
|
||||
* Removed data alignment requirement in the SSE2 copy kernels
|
||||
that could cause spurious crashes
|
||||
* Added a workaround for an optimizer bug in AppleClang 11.0.3
|
||||
* Fixed LAPACK test failures due to wrong options for Intel Fortran
|
||||
* Fixed compilation and LAPACK test results with recent Flang
|
||||
and AMD AOCC
|
||||
* Fixed DYNAMIC_ARCH builds with CMAKE on OS X
|
||||
* Fixed missing exports of cblas_i?amin, cblas_i?min, cblas_i?max,
|
||||
cblas_?sum, cblas_?gemm3m in the shared library on OS
|
||||
* Fixed reporting of cpu name in DYNAMIC_ARCH builds (would sometimes
|
||||
show the name of an older generation chip supported by the same kernels)
|
||||
|
||||
IBM Z:
|
||||
* Improved performance of SGEMM/STRMM and DGEMM/DTRMM on Z14
|
||||
|
||||
====================================================================
|
||||
Version 0.3.9
|
||||
1-Mar-2020
|
||||
|
||||
common:
|
||||
* Fixed a miscompilation of the GETRF functions with CMAKE
|
||||
* Imported bugfix 390 from LAPACK (missing NaN propagation in xCOMBSSQ)
|
||||
* The size of the memory buffer used for splitting GEMM tasks across
|
||||
multiple threads can now be configured in the build system.
|
||||
|
||||
POWER:
|
||||
* Fixed several compilation problems related to endianness
|
||||
and ELF version on POWER8 and POWER9
|
||||
* Fixed use of the absolute value IAMIN/IAMAX instead of IMIN/IMAX
|
||||
* Fixed a race condition in the level3 blas code
|
||||
|
||||
MIPS64:
|
||||
* Fixed use of the absoltute value IAMIN/IAMAX instead of IMIN/IMAX
|
||||
|
||||
ARMV7:
|
||||
* Fixed a race condition in the level3 blas code
|
||||
* Fixed compilation on Android
|
||||
ARMV8:
|
||||
* Added support for Ampere EMAG8180
|
||||
* Added support for Neoverse N1
|
||||
* Improved performance of the blas_lock function
|
||||
* Fixed a race condition in the level3 blas code
|
||||
* Fixed a performance regression on TSV110-based servers
|
||||
|
||||
x86_64:
|
||||
* Fixed a long-standing error with undeclared register overwrites
|
||||
in the DSCAL microkernel for HASWELL,SKYLAKEX and ZEN
|
||||
* Fixed a long-standing bug in the SSE implementation of IAMAX
|
||||
* Fixed a CMAKE build failure with DYNAMIC_ARCH
|
||||
* Fixed cpu autodetection of Goldmont+, Cannon Lake and Ice Lake
|
||||
* Fixed a compilation failure on OSX with compiler name containing dash
|
||||
* Fixed compilation with MinGW on SkylakeX
|
||||
* Improved speed of the AVX512 GEMM3M kernel on SkylakeX
|
||||
* Added an AVX512 STRMM kernel for SkylakeX
|
||||
* Improved GEMM performance on Haswell and Zen
|
||||
|
||||
zarch:
|
||||
* fixed compilation of the DYNAMIC_ARCH code
|
||||
|
||||
====================================================================
|
||||
Version 0.3.8
|
||||
9-Feb-2020
|
||||
|
||||
common:
|
||||
` * LAPACK has been updated to 3.9.0 (plus patches up to
|
||||
January 2nd, 2020)
|
||||
* CMAKE support has been improved in several areas including
|
||||
cross-compilation
|
||||
* a thread race condition in the GEMM3M kernels was resolved
|
||||
* the "generic" (plain C) gemm beta kernel used by many targets
|
||||
has been sped up
|
||||
* an optimized version of the LAPACK trtrs functions has been added
|
||||
* an incompatibilty between the LAPACK tests and the OpenBLAS
|
||||
implementation of XERBLA was resolved, removing the numerous
|
||||
warnings about wrong error exits in the former
|
||||
* support for NetBSD has been added
|
||||
* support for compilation with g95 and non-GNU versions of ld
|
||||
has been improved
|
||||
* support for compilation with (upcoming) gcc 10 has been added
|
||||
|
||||
POWER:
|
||||
* worked around miscompilation of several POWER8 and POWER9
|
||||
kernels by older versions of gcc
|
||||
* added support for big-endian POWER8 and for compilation on AIX
|
||||
* corrected bugs in the big-endian support for PPC440 and PPC970
|
||||
* DYNAMIC_ARCH support is now available in CMAKE builds as well
|
||||
|
||||
ARMV8:
|
||||
* performance of DGEMM_BETA and SGEMM_NCOPY has been improved
|
||||
* compilation for 32bit works again
|
||||
* performance of the RPCC function has been improved
|
||||
* improved performance on small systems
|
||||
* DYNAMIC_ARCH support is now available in CMAKE builds as well
|
||||
* cross-compilation from OSX to IOS was simplified
|
||||
|
||||
x86_64:
|
||||
* a new AVX512 DGEMM kernel was added and the AVX512 SGEMM kernel
|
||||
was significantly improved
|
||||
* optimized AVX512 kernels for CGEMM and ZGEMM have been added
|
||||
* AVX2 kernels for STRMM, SGEMM, and CGEMM have been significantly
|
||||
sped up and optimized CGEMM3M and ZGEMM3M kernels have been added
|
||||
* added support for QEMU virtual cpus
|
||||
* a compilation problem with PGI and SUN compilers was fixed
|
||||
* Intel "Goldmont plus" is now autodetected
|
||||
* a potential crash on program exit on MS Windows has been fixed
|
||||
|
||||
x86:
|
||||
* an unwanted case sensitivity in the implementation of LSAME
|
||||
on older 32bit AMD cpus was fixed
|
||||
|
||||
zarch:
|
||||
* Z15 is now supported as Z14
|
||||
* DYNAMIC_ARCH is now available on ZARCH as well
|
||||
|
||||
====================================================================
|
||||
Version 0.3.7
|
||||
11-Aug 2019
|
||||
|
||||
common:
|
||||
* having the gmake special variables TARGET_ARCH or TARGET_MACH
|
||||
defined no longer causes build failures in ctest or utest
|
||||
* defining NO_AFFINITY or USE_TLS to 0 in gmake builds no longer
|
||||
has the same effect as setting them to 1
|
||||
* a new test program was added to allow checking the library for
|
||||
thread safety
|
||||
* a new option USE_LOCKING was added to ensure thread safety when
|
||||
OpenBLAS itself is built without multithreading but will be
|
||||
called from multiple threads.
|
||||
* a build failure on Linux with glibc versions earlier than 2.5
|
||||
was fixed
|
||||
* a runtime error with CPU enumeration (and NO_AFFINITY not set)
|
||||
on glibc 2.6 was fixed
|
||||
* NO_AFFINITY was added to the CMAKE options (and defaults to being
|
||||
active on Linux, as in the gmake builds)
|
||||
|
||||
x86_64:
|
||||
* the build-time logic for detection of AVX512 availability in
|
||||
the processor and compiler was fixed
|
||||
* gmake builds on OSX now set the internal name of the library to
|
||||
libopenblas.0.dylib (consistent with CMAKE)
|
||||
* the Haswell DGEMM kernel received a significant speedup through
|
||||
improved prefetch and load instructions
|
||||
* performance of DGEMM, DTRMM, DTRSM and ZDOT on Zen/Zen2 was markedly
|
||||
increased by avoiding vpermpd instructions
|
||||
* the SKYLAKEX (AVX512) DGEMM helper functions have now been disabled
|
||||
to fix remaining errors in DGEMM, DSYMM and DTRMM
|
||||
|
||||
POWER:
|
||||
* added support for building on FreeBSD/powerpc64 and FreeBSD/ppc970
|
||||
* added optimized kernels for POWER9 SGEMM and STRMM
|
||||
|
||||
ARMV7:
|
||||
* fixed the softfp implementations of xAMAX and IxAMAX
|
||||
* removed the predefined -march= flags on both ARMV5 and ARMV6 as
|
||||
they were appropriate for only a subset of platforms
|
||||
|
||||
====================================================================
|
||||
Version 0.3.6
|
||||
29-Apr-2019
|
||||
|
||||
common:
|
||||
* the build tools now check that a given cpu TARGET is actually valid
|
||||
* the build-time check of system features (c_check) has been made
|
||||
less dependent on particular perl features (this should mainly
|
||||
benefit building on Windows)
|
||||
* several problem with the ReLAPACK integration were fixed,
|
||||
including INTERFACE64 support and building a shared library
|
||||
* building with CMAKE on BSD systems was improved
|
||||
* a non-absolute SUM function was added based on the
|
||||
existing optimized code for ASUM
|
||||
* CBLAS interfaces to the IxMIN and IxMAX functions were added
|
||||
* a name clash between LAPACKE and BOOST headers was resolved
|
||||
* CMAKE builds with OpenMP failed to include the appropriate getrf_parallel
|
||||
kernels
|
||||
* a crash on thread (key) deletion with the USE_TLS=1 memory management
|
||||
option was fixed
|
||||
* restored several earlier fixes, in particular for OpenMP performance,
|
||||
building on BSD, and calling fork on CYGWIN, which had inadvertently
|
||||
been dropped in the 0.3.3 rewrite of the memory management code.
|
||||
|
||||
x86_64:
|
||||
* the AVX512 DGEMM kernel has been disabled again due to unsolved problems
|
||||
* building with old versions of MSVC was fixed
|
||||
* it is now possible to build a static library on Windows with CMAKE
|
||||
* accessing environment variables on CYGWIN at run time was fixed
|
||||
* the CMAKE build system now recognizes 32bit userspace on 64bit hardware
|
||||
* Intel "Denverton" atom and Hygon "Dhyana" zen CPUs are now autodetected
|
||||
* building for DYNAMIC_ARCH with a DYNAMIC_LIST of targets is now supported
|
||||
with CMAKE as well
|
||||
* building for DYNAMIC_ARCH with GENERIC as the default target is now supported
|
||||
* a buffer overflow in the SSE GEMM kernel for Intel Nano targets was fixed
|
||||
* assembly bugs involving undeclared modification of input operands were fixed
|
||||
in the AXPY, DOT, GEMV, GER, SCAL, SYMV and TRSM microkernels for Nehalem,
|
||||
Sandybridge, Haswell, Bulldozer and Piledriver. These would typically cause
|
||||
test failures or segfaults when compiled with recent versions of gcc from 8 onward.
|
||||
* a similar bug was fixed in the blas_quickdivide code used to split workloads
|
||||
in most functions
|
||||
* a bug in the IxMIN implementation for the GENERIC target made it return the result of IxMAX
|
||||
* fixed building on SkylakeX systems when either the compiler or the (emulated) operating
|
||||
environment does not support AVX512
|
||||
* improved GEMM performance on ZEN targets
|
||||
|
||||
x86:
|
||||
* build failures caused by the recently added checks for AVX512 were fixed
|
||||
* an inline assembly bug involving undeclared modification of an input argument was
|
||||
fixed in the blas_quickdivide code used to split workloads in most functions
|
||||
* a bug in the IMIN implementation for the GENERIC target made it return the result of IMAX
|
||||
|
||||
MIPS32:
|
||||
* a bug in the IMIN implementation made it return the result of IMAX
|
||||
|
||||
POWER:
|
||||
* single precision BLAS1/2 functions have received optimized POWER8 kernels
|
||||
* POWER9 is now a separate target, with an optimized DGEMM/DTRMM kernel
|
||||
* building on PPC970 systems under OSX Leopard or Tiger is now supported
|
||||
* out-of-bounds memory accesses in the gemm_beta microkernels were fixed
|
||||
* building a shared library on AIX is now supported for POWER6
|
||||
* DYNAMIC_ARCH support has been added for POWER6 and newer
|
||||
|
||||
ARMv7:
|
||||
* corrected xDOT behaviour with zero INC_X or INC_Y
|
||||
* a bug in the IMIN implementation made it return the result of IMAX
|
||||
|
||||
ARMv8:
|
||||
* added support for HiSilicon TSV110 cpus
|
||||
* the CMAKE build system now recognizes 32bit userspace on 64bit hardware
|
||||
* cross-compilation with CMAKE now works again
|
||||
* a bug in the IMIN implementation made it return the result of IMAX
|
||||
* ARMV8 builds with the BINARY=32 option are now automatically handled as ARMV7
|
||||
|
||||
IBM Z:
|
||||
* optimized microkernels for single precicion BLAS1/2 functions have been added
|
||||
for both Z13 and Z14
|
||||
|
||||
====================================================================
|
||||
Version 0.3.5
|
||||
31-Dec-2018
|
||||
|
||||
9
Jenkinsfile
vendored
Normal file
9
Jenkinsfile
vendored
Normal file
@@ -0,0 +1,9 @@
|
||||
node {
|
||||
stage('Checkout') {
|
||||
checkout
|
||||
}
|
||||
|
||||
stage('Build') {
|
||||
sh("make")
|
||||
}
|
||||
}
|
||||
69
Makefile
69
Makefile
@@ -34,7 +34,7 @@ endif
|
||||
|
||||
LAPACK_NOOPT := $(filter-out -O0 -O1 -O2 -O3 -Ofast,$(LAPACK_FFLAGS))
|
||||
|
||||
SUBDIRS_ALL = $(SUBDIRS) test ctest utest exports benchmark ../laswp ../bench
|
||||
SUBDIRS_ALL = $(SUBDIRS) test ctest utest exports benchmark ../laswp ../bench cpp_thread_test
|
||||
|
||||
.PHONY : all libs netlib $(RELA) test ctest shared install
|
||||
.NOTPARALLEL : all libs $(RELA) prof lapack-test install blas-test
|
||||
@@ -56,10 +56,21 @@ ifneq ($(INTERFACE64), 0)
|
||||
@echo " Use 64 bits int (equivalent to \"-i8\" in Fortran) "
|
||||
endif
|
||||
endif
|
||||
|
||||
@echo " C compiler ... $(C_COMPILER) (command line : $(CC))"
|
||||
@$(CC) --version > /dev/null 2>&1;\
|
||||
if [ $$? -eq 0 ]; then \
|
||||
cverinfo=`$(CC) --version | sed -n '1p'`; \
|
||||
echo " C compiler ... $(C_COMPILER) (cmd & version : $${cverinfo})";\
|
||||
else \
|
||||
echo " C compiler ... $(C_COMPILER) (command line : $(CC))";\
|
||||
fi
|
||||
ifeq ($(NOFORTRAN), $(filter 0,$(NOFORTRAN)))
|
||||
@echo " Fortran compiler ... $(F_COMPILER) (command line : $(FC))"
|
||||
@$(FC) --version > /dev/null 2>&1;\
|
||||
if [ $$? -eq 0 ]; then \
|
||||
fverinfo=`$(FC) --version | sed -n '1p'`; \
|
||||
echo " Fortran compiler ... $(F_COMPILER) (cmd & version : $${fverinfo})";\
|
||||
else \
|
||||
echo " Fortran compiler ... $(F_COMPILER) (command line : $(FC))";\
|
||||
fi
|
||||
endif
|
||||
ifneq ($(OSNAME), AIX)
|
||||
@echo -n " Library Name ... $(LIBNAME)"
|
||||
@@ -68,9 +79,13 @@ else
|
||||
endif
|
||||
|
||||
ifndef SMP
|
||||
@echo " (Single threaded) "
|
||||
@echo " (Single-threading) "
|
||||
else
|
||||
@echo " (Multi threaded; Max num-threads is $(NUM_THREADS))"
|
||||
@echo " (Multi-threading; Max num-threads is $(NUM_THREADS))"
|
||||
endif
|
||||
|
||||
ifeq ($(DYNAMIC_ARCH), 1)
|
||||
@echo " Supporting multiple $(ARCH) cpu models with minimum requirement for the common code being $(CORE)"
|
||||
endif
|
||||
|
||||
ifeq ($(USE_OPENMP), 1)
|
||||
@@ -96,19 +111,20 @@ endif
|
||||
@echo
|
||||
|
||||
shared :
|
||||
ifndef NO_SHARED
|
||||
ifeq ($(OSNAME), $(filter $(OSNAME),Linux SunOS Android Haiku))
|
||||
ifneq ($(NO_SHARED), 1)
|
||||
ifeq ($(OSNAME), $(filter $(OSNAME),Linux SunOS Android Haiku FreeBSD DragonFly))
|
||||
@$(MAKE) -C exports so
|
||||
@ln -fs $(LIBSONAME) $(LIBPREFIX).so
|
||||
@ln -fs $(LIBSONAME) $(LIBPREFIX).so.$(MAJOR_VERSION)
|
||||
endif
|
||||
ifeq ($(OSNAME), $(filter $(OSNAME),FreeBSD OpenBSD NetBSD DragonFly))
|
||||
ifeq ($(OSNAME), $(filter $(OSNAME),OpenBSD NetBSD))
|
||||
@$(MAKE) -C exports so
|
||||
@ln -fs $(LIBSONAME) $(LIBPREFIX).so
|
||||
endif
|
||||
ifeq ($(OSNAME), Darwin)
|
||||
@$(MAKE) -C exports dyn
|
||||
@ln -fs $(LIBDYNNAME) $(LIBPREFIX).dylib
|
||||
@ln -fs $(LIBDYNNAME) $(LIBPREFIX).$(MAJOR_VERSION).dylib
|
||||
endif
|
||||
ifeq ($(OSNAME), WINNT)
|
||||
@$(MAKE) -C exports dll
|
||||
@@ -123,10 +139,13 @@ ifeq ($(NOFORTRAN), $(filter 0,$(NOFORTRAN)))
|
||||
touch $(LIBNAME)
|
||||
ifndef NO_FBLAS
|
||||
$(MAKE) -C test all
|
||||
$(MAKE) -C utest all
|
||||
endif
|
||||
ifndef NO_CBLAS
|
||||
$(MAKE) -C utest all
|
||||
ifneq ($(NO_CBLAS), 1)
|
||||
$(MAKE) -C ctest all
|
||||
ifeq ($(CPP_THREAD_SAFETY_TEST), 1)
|
||||
$(MAKE) -C cpp_thread_test all
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
@@ -225,7 +244,7 @@ ifeq ($(NOFORTRAN), $(filter 0,$(NOFORTRAN)))
|
||||
@$(MAKE) -C $(NETLIB_LAPACK_DIR) lapacklib
|
||||
@$(MAKE) -C $(NETLIB_LAPACK_DIR) tmglib
|
||||
endif
|
||||
ifndef NO_LAPACKE
|
||||
ifneq ($(NO_LAPACKE), 1)
|
||||
@$(MAKE) -C $(NETLIB_LAPACK_DIR) lapackelib
|
||||
endif
|
||||
endif
|
||||
@@ -243,21 +262,22 @@ prof_lapack : lapack_prebuild
|
||||
|
||||
lapack_prebuild :
|
||||
ifeq ($(NOFORTRAN), $(filter 0,$(NOFORTRAN)))
|
||||
-@echo "FORTRAN = $(FC)" > $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@echo "OPTS = $(LAPACK_FFLAGS)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@echo "FC = $(FC)" > $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@echo "FFLAGS = $(LAPACK_FFLAGS)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@echo "FFLAGS_DRV = $(LAPACK_FFLAGS)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@echo "POPTS = $(LAPACK_FPFLAGS)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@echo "NOOPT = -O0 $(LAPACK_NOOPT)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@echo "FFLAGS_NOOPT = -O0 $(LAPACK_NOOPT)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@echo "PNOOPT = $(LAPACK_FPFLAGS) -O0" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@echo "LOADOPTS = $(FFLAGS) $(EXTRALIB)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@echo "LDFLAGS = $(FFLAGS) $(EXTRALIB)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@echo "CC = $(CC)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@echo "override CFLAGS = $(LAPACK_CFLAGS)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@echo "override ARCH = $(AR)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@echo "ARCHFLAGS = $(ARFLAGS) -ru" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@echo "AR = $(AR)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@echo "ARFLAGS = $(ARFLAGS) -ru" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@echo "RANLIB = $(RANLIB)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@echo "LAPACKLIB = ../$(LIBNAME)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@echo "TMGLIB = ../$(LIBNAME)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@echo "LAPACKLIB = ../../$(LIBNAME)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@echo "TMGLIB = ../../../$(LIBNAME)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@echo "BLASLIB = ../../../$(LIBNAME)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@echo "LAPACKELIB = ../$(LIBNAME)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@echo "LAPACKELIB = ../../../$(LIBNAME)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@echo "LAPACKLIB_P = ../$(LIBNAME_P)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@echo "SUFFIX = $(SUFFIX)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@echo "PSUFFIX = $(PSUFFIX)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
@@ -313,9 +333,9 @@ lapack-test :
|
||||
$(MAKE) -j 1 -C $(NETLIB_LAPACK_DIR)/TESTING/EIG xeigtstc xeigtstd xeigtsts xeigtstz
|
||||
$(MAKE) -j 1 -C $(NETLIB_LAPACK_DIR)/TESTING/LIN xlintstc xlintstd xlintstds xlintstrfd xlintstrfz xlintsts xlintstz xlintstzc xlintstrfs xlintstrfc
|
||||
ifneq ($(CROSS), 1)
|
||||
( cd $(NETLIB_LAPACK_DIR)/INSTALL; make all; ./testlsame; ./testslamch; ./testdlamch; \
|
||||
( cd $(NETLIB_LAPACK_DIR)/INSTALL; $(MAKE) all; ./testlsame; ./testslamch; ./testdlamch; \
|
||||
./testsecond; ./testdsecnd; ./testieee; ./testversion )
|
||||
(cd $(NETLIB_LAPACK_DIR); ./lapack_testing.py -r )
|
||||
(cd $(NETLIB_LAPACK_DIR); ./lapack_testing.py -r -b TESTING)
|
||||
endif
|
||||
|
||||
lapack-runtest:
|
||||
@@ -345,11 +365,12 @@ clean ::
|
||||
@$(MAKE) -C kernel clean
|
||||
#endif
|
||||
@$(MAKE) -C reference clean
|
||||
@rm -f *.$(LIBSUFFIX) *.so *~ *.exe getarch getarch_2nd *.dll *.lib *.$(SUFFIX) *.dwf $(LIBPREFIX).$(LIBSUFFIX) $(LIBPREFIX)_p.$(LIBSUFFIX) $(LIBPREFIX).so.$(MAJOR_VERSION) *.lnk myconfig.h
|
||||
@rm -f *.$(LIBSUFFIX) *.so *~ *.exe getarch getarch_2nd *.dll *.lib *.$(SUFFIX) *.dwf $(LIBPREFIX).$(LIBSUFFIX) $(LIBPREFIX)_p.$(LIBSUFFIX) $(LIBPREFIX).so.$(MAJOR_VERSION) *.lnk myconfig.h *.so.renamed *.a.renamed *.so.0
|
||||
ifeq ($(OSNAME), Darwin)
|
||||
@rm -rf getarch.dSYM getarch_2nd.dSYM
|
||||
endif
|
||||
@rm -f Makefile.conf config.h Makefile_kernel.conf config_kernel.h st* *.dylib
|
||||
@rm -f cblas.tmp cblas.tmp2
|
||||
@touch $(NETLIB_LAPACK_DIR)/make.inc
|
||||
@$(MAKE) -C $(NETLIB_LAPACK_DIR) clean
|
||||
@rm -f $(NETLIB_LAPACK_DIR)/make.inc $(NETLIB_LAPACK_DIR)/lapacke/include/lapacke_mangling.h
|
||||
|
||||
13
Makefile.arm
13
Makefile.arm
@@ -1,7 +1,7 @@
|
||||
ifeq ($(CORE), $(filter $(CORE),ARMV7 CORTEXA9 CORTEXA15))
|
||||
ifeq ($(OSNAME), Android)
|
||||
CCOMMON_OPT += -mfpu=neon -march=armv7-a
|
||||
FCOMMON_OPT += -mfpu=neon -march=armv7-a
|
||||
CCOMMON_OPT += -mfpu=neon -march=armv7-a
|
||||
FCOMMON_OPT += -mfpu=neon -march=armv7-a
|
||||
else
|
||||
CCOMMON_OPT += -mfpu=vfpv3 -march=armv7-a
|
||||
FCOMMON_OPT += -mfpu=vfpv3 -march=armv7-a
|
||||
@@ -9,11 +9,6 @@ endif
|
||||
endif
|
||||
|
||||
ifeq ($(CORE), ARMV6)
|
||||
CCOMMON_OPT += -mfpu=vfp -march=armv6
|
||||
FCOMMON_OPT += -mfpu=vfp -march=armv6
|
||||
endif
|
||||
|
||||
ifeq ($(CORE), ARMV5)
|
||||
CCOMMON_OPT += -march=armv5
|
||||
FCOMMON_OPT += -march=armv5
|
||||
CCOMMON_OPT += -mfpu=vfp
|
||||
FCOMMON_OPT += -mfpu=vfp
|
||||
endif
|
||||
|
||||
@@ -24,6 +24,23 @@ CCOMMON_OPT += -march=armv8-a -mtune=cortex-a73
|
||||
FCOMMON_OPT += -march=armv8-a -mtune=cortex-a73
|
||||
endif
|
||||
|
||||
# Use a72 tunings because Neoverse-N1 is only available
|
||||
# in GCC>=9
|
||||
ifeq ($(CORE), NEOVERSEN1)
|
||||
ifeq ($(GCCVERSIONGTEQ7), 1)
|
||||
ifeq ($(GCCVERSIONGTEQ9), 1)
|
||||
CCOMMON_OPT += -march=armv8.2-a -mtune=neoverse-n1
|
||||
FCOMMON_OPT += -march=armv8.2-a -mtune=neoverse-n1
|
||||
else
|
||||
CCOMMON_OPT += -march=armv8.2-a -mtune=cortex-a72
|
||||
FCOMMON_OPT += -march=armv8.2-a -mtune=cortex-a72
|
||||
endif
|
||||
else
|
||||
CCOMMON_OPT += -march=armv8-a -mtune=cortex-a72
|
||||
FCOMMON_OPT += -march=armv8-a -mtune=cortex-a72
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(CORE), THUNDERX)
|
||||
CCOMMON_OPT += -march=armv8-a -mtune=thunderx
|
||||
FCOMMON_OPT += -march=armv8-a -mtune=thunderx
|
||||
@@ -38,3 +55,21 @@ ifeq ($(CORE), THUNDERX2T99)
|
||||
CCOMMON_OPT += -march=armv8.1-a -mtune=thunderx2t99
|
||||
FCOMMON_OPT += -march=armv8.1-a -mtune=thunderx2t99
|
||||
endif
|
||||
|
||||
ifeq ($(CORE), THUNDERX3T110)
|
||||
ifeq ($(GCCVERSIONGTEQ10), 1)
|
||||
CCOMMON_OPT += -march=armv8.3-a -mtune=thunderx3t110
|
||||
FCOMMON_OPT += -march=armv8.3-a -mtune=thunderx3t110
|
||||
else
|
||||
CCOMMON_OPT += -march=armv8.1-a -mtune=thunderx2t99
|
||||
FCOMMON_OPT += -march=armv8.1-a -mtune=thunderx2t99
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(GCCVERSIONGTEQ9), 1)
|
||||
ifeq ($(CORE), TSV110)
|
||||
CCOMMON_OPT += -march=armv8.2-a -mtune=tsv110
|
||||
FCOMMON_OPT += -march=armv8.2-a -mtune=tsv110
|
||||
endif
|
||||
endif
|
||||
|
||||
|
||||
@@ -13,6 +13,14 @@ OPENBLAS_CMAKE_DIR := $(OPENBLAS_LIBRARY_DIR)/cmake/openblas
|
||||
OPENBLAS_CMAKE_CONFIG := OpenBLASConfig.cmake
|
||||
OPENBLAS_CMAKE_CONFIG_VERSION := OpenBLASConfigVersion.cmake
|
||||
OPENBLAS_PKGCONFIG_DIR := $(OPENBLAS_LIBRARY_DIR)/pkgconfig
|
||||
PKG_EXTRALIB := $(EXTRALIB)
|
||||
ifeq ($(USE_OPENMP), 1)
|
||||
ifeq ($(C_COMPILER), PGI)
|
||||
PKG_EXTRALIB += -lomp
|
||||
else
|
||||
PKG_EXTRALIB += -lgomp
|
||||
endif
|
||||
endif
|
||||
|
||||
.PHONY : install
|
||||
.NOTPARALLEL : install
|
||||
@@ -45,12 +53,28 @@ install : lib.grd
|
||||
|
||||
ifndef NO_CBLAS
|
||||
@echo Generating cblas.h in $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)
|
||||
@sed 's/common/openblas_config/g' cblas.h > "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/cblas.h"
|
||||
@cp cblas.h cblas.tmp
|
||||
ifdef SYMBOLPREFIX
|
||||
@sed 's/cblas[^( ]*/$(SYMBOLPREFIX)&/g' cblas.tmp > cblas.tmp2
|
||||
@sed 's/openblas[^( ]*/$(SYMBOLPREFIX)&/g' cblas.tmp2 > cblas.tmp
|
||||
#change back any openblas_complex_float and double that got hit
|
||||
@sed 's/$(SYMBOLPREFIX)openblas_complex_/openblas_complex_/g' cblas.tmp > cblas.tmp2
|
||||
@sed 's/goto[^( ]*/$(SYMBOLPREFIX)&/g' cblas.tmp2 > cblas.tmp
|
||||
endif
|
||||
ifdef SYMBOLSUFFIX
|
||||
@sed 's/cblas[^( ]*/&$(SYMBOLSUFFIX)/g' cblas.tmp > cblas.tmp2
|
||||
@sed 's/openblas[^( ]*/&$(SYMBOLSUFFIX)/g' cblas.tmp2 > cblas.tmp
|
||||
#change back any openblas_complex_float and double that got hit
|
||||
@sed 's/\(openblas_complex_\)\([^ ]*\)$(SYMBOLSUFFIX)/\1\2 /g' cblas.tmp > cblas.tmp2
|
||||
@sed 's/goto[^( ]*/&$(SYMBOLSUFFIX)/g' cblas.tmp2 > cblas.tmp
|
||||
endif
|
||||
@sed 's/common/openblas_config/g' cblas.tmp > "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/cblas.h"
|
||||
endif
|
||||
|
||||
ifneq ($(OSNAME), AIX)
|
||||
ifndef NO_LAPACKE
|
||||
@echo Copying LAPACKE header files to $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)
|
||||
@-install -pm644 $(NETLIB_LAPACK_DIR)/LAPACKE/include/lapack.h "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapack.h"
|
||||
@-install -pm644 $(NETLIB_LAPACK_DIR)/LAPACKE/include/lapacke.h "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke.h"
|
||||
@-install -pm644 $(NETLIB_LAPACK_DIR)/LAPACKE/include/lapacke_config.h "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke_config.h"
|
||||
@-install -pm644 $(NETLIB_LAPACK_DIR)/LAPACKE/include/lapacke_mangling_with_flags.h.in "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke_mangling.h"
|
||||
@@ -58,32 +82,33 @@ ifndef NO_LAPACKE
|
||||
endif
|
||||
|
||||
#for install static library
|
||||
ifndef NO_STATIC
|
||||
ifneq ($(NO_STATIC),1)
|
||||
@echo Copying the static library to $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)
|
||||
@install -pm644 $(LIBNAME) "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)"
|
||||
@cd "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)" ; \
|
||||
ln -fs $(LIBNAME) $(LIBPREFIX).$(LIBSUFFIX)
|
||||
endif
|
||||
#for install shared library
|
||||
ifndef NO_SHARED
|
||||
ifneq ($(NO_SHARED),1)
|
||||
@echo Copying the shared library to $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)
|
||||
ifeq ($(OSNAME), $(filter $(OSNAME),Linux SunOS Android Haiku))
|
||||
ifeq ($(OSNAME), $(filter $(OSNAME),Linux SunOS Android Haiku FreeBSD DragonFly))
|
||||
@install -pm755 $(LIBSONAME) "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)"
|
||||
@cd "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)" ; \
|
||||
ln -fs $(LIBSONAME) $(LIBPREFIX).so ; \
|
||||
ln -fs $(LIBSONAME) $(LIBPREFIX).so.$(MAJOR_VERSION)
|
||||
endif
|
||||
|
||||
ifeq ($(OSNAME), $(filter $(OSNAME),FreeBSD OpenBSD NetBSD DragonFly))
|
||||
ifeq ($(OSNAME), $(filter $(OSNAME),OpenBSD NetBSD))
|
||||
@cp $(LIBSONAME) "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)"
|
||||
@cd "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)" ; \
|
||||
ln -fs $(LIBSONAME) $(LIBPREFIX).so
|
||||
endif
|
||||
ifeq ($(OSNAME), Darwin)
|
||||
@-cp $(LIBDYNNAME) "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)"
|
||||
@-install_name_tool -id "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)/$(LIBDYNNAME)" "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)/$(LIBDYNNAME)"
|
||||
@-install_name_tool -id "$(OPENBLAS_LIBRARY_DIR)/$(LIBPREFIX).$(MAJOR_VERSION).dylib" "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)/$(LIBDYNNAME)"
|
||||
@cd "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)" ; \
|
||||
ln -fs $(LIBDYNNAME) $(LIBPREFIX).dylib
|
||||
ln -fs $(LIBDYNNAME) $(LIBPREFIX).dylib ; \
|
||||
ln -fs $(LIBDYNNAME) $(LIBPREFIX).$(MAJOR_VERSION).dylib
|
||||
endif
|
||||
ifeq ($(OSNAME), WINNT)
|
||||
@-cp $(LIBDLLNAME) "$(DESTDIR)$(OPENBLAS_BINARY_DIR)"
|
||||
@@ -99,6 +124,7 @@ else
|
||||
#install on AIX has different options syntax
|
||||
ifndef NO_LAPACKE
|
||||
@echo Copying LAPACKE header files to $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)
|
||||
@-installbsd -c -m 644 $(NETLIB_LAPACK_DIR)/LAPACKE/include/lapack.h "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapack.h"
|
||||
@-installbsd -c -m 644 $(NETLIB_LAPACK_DIR)/LAPACKE/include/lapacke.h "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke.h"
|
||||
@-installbsd -c -m 644 $(NETLIB_LAPACK_DIR)/LAPACKE/include/lapacke_config.h "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke_config.h"
|
||||
@-installbsd -c -m 644 $(NETLIB_LAPACK_DIR)/LAPACKE/include/lapacke_mangling_with_flags.h.in "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke_mangling.h"
|
||||
@@ -106,14 +132,14 @@ ifndef NO_LAPACKE
|
||||
endif
|
||||
|
||||
#for install static library
|
||||
ifndef NO_STATIC
|
||||
ifneq ($(NO_STATIC),1)
|
||||
@echo Copying the static library to $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)
|
||||
@installbsd -c -m 644 $(LIBNAME) "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)"
|
||||
@cd "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)" ; \
|
||||
ln -fs $(LIBNAME) $(LIBPREFIX).$(LIBSUFFIX)
|
||||
endif
|
||||
#for install shared library
|
||||
ifndef NO_SHARED
|
||||
ifneq ($(NO_SHARED),1)
|
||||
@echo Copying the shared library to $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)
|
||||
@installbsd -c -m 755 $(LIBSONAME) "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)"
|
||||
@cd "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)" ; \
|
||||
@@ -129,7 +155,7 @@ endif
|
||||
@echo 'includedir='$(OPENBLAS_INCLUDE_DIR) >> "$(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)/openblas.pc"
|
||||
@echo 'openblas_config= USE_64BITINT='$(USE_64BITINT) 'DYNAMIC_ARCH='$(DYNAMIC_ARCH) 'DYNAMIC_OLDER='$(DYNAMIC_OLDER) 'NO_CBLAS='$(NO_CBLAS) 'NO_LAPACK='$(NO_LAPACK) 'NO_LAPACKE='$(NO_LAPACKE) 'NO_AFFINITY='$(NO_AFFINITY) 'USE_OPENMP='$(USE_OPENMP) $(CORE) 'MAX_THREADS='$(NUM_THREADS)>> "$(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)/openblas.pc"
|
||||
@echo 'version='$(VERSION) >> "$(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)/openblas.pc"
|
||||
@echo 'extralib='$(EXTRALIB) >> "$(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)/openblas.pc"
|
||||
@echo 'extralib='$(PKG_EXTRALIB) >> "$(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)/openblas.pc"
|
||||
@cat openblas.pc.in >> "$(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)/openblas.pc"
|
||||
|
||||
|
||||
@@ -138,7 +164,7 @@ endif
|
||||
@echo "SET(OpenBLAS_VERSION \"${VERSION}\")" > "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)"
|
||||
@echo "SET(OpenBLAS_INCLUDE_DIRS ${OPENBLAS_INCLUDE_DIR})" >> "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)"
|
||||
|
||||
ifndef NO_SHARED
|
||||
ifneq ($(NO_SHARED),1)
|
||||
#ifeq logical or
|
||||
ifeq ($(OSNAME), $(filter $(OSNAME),Linux FreeBSD NetBSD OpenBSD DragonFly))
|
||||
@echo "SET(OpenBLAS_LIBRARIES ${OPENBLAS_LIBRARY_DIR}/$(LIBPREFIX).so)" >> "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)"
|
||||
@@ -165,4 +191,3 @@ endif
|
||||
@echo " endif ()" >> "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG_VERSION)"
|
||||
@echo "endif ()" >> "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG_VERSION)"
|
||||
@echo Install OK!
|
||||
|
||||
|
||||
@@ -9,18 +9,70 @@ else
|
||||
USE_OPENMP = 1
|
||||
endif
|
||||
|
||||
ifeq ($(CORE), POWER10)
|
||||
COMMON_OPT += -Ofast -mcpu=power10 -mtune=power10 -mvsx -fno-fast-math
|
||||
FCOMMON_OPT += -O2 -frecursive -mcpu=power10 -mtune=power10 -fno-fast-math
|
||||
endif
|
||||
|
||||
ifeq ($(CORE), POWER9)
|
||||
ifneq ($(C_COMPILER), PGI)
|
||||
CCOMMON_OPT += -Ofast -mvsx -fno-fast-math
|
||||
ifneq ($(GCCVERSIONGT4), 1)
|
||||
$(warning your compiler is too old to fully support POWER9, getting a newer version of gcc is recommended)
|
||||
CCOMMON_OPT += -mcpu=power8 -mtune=power8
|
||||
else
|
||||
CCOMMON_OPT += -mcpu=power9 -mtune=power9
|
||||
endif
|
||||
else
|
||||
CCOMMON_OPT += -fast -Mvect=simd -Mcache_align
|
||||
endif
|
||||
ifneq ($(F_COMPILER), PGI)
|
||||
FCOMMON_OPT += -O2 -frecursive -fno-fast-math
|
||||
ifneq ($(GCCVERSIONGT4), 1)
|
||||
$(warning your compiler is too old to fully support POWER9, getting a newer version of gcc is recommended)
|
||||
FCOMMON_OPT += -mcpu=power8 -mtune=power8
|
||||
else
|
||||
FCOMMON_OPT += -mcpu=power9 -mtune=power9
|
||||
endif
|
||||
else
|
||||
FCOMMON_OPT += -O2 -Mrecursive
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(CORE), POWER8)
|
||||
ifeq ($(USE_OPENMP), 1)
|
||||
COMMON_OPT += -Ofast -mcpu=power8 -mtune=power8 -mvsx -malign-power -DUSE_OPENMP -fno-fast-math -fopenmp
|
||||
FCOMMON_OPT += -O2 -frecursive -mcpu=power8 -mtune=power8 -malign-power -DUSE_OPENMP -fno-fast-math -fopenmp
|
||||
ifneq ($(C_COMPILER), PGI)
|
||||
CCOMMON_OPT += -Ofast -mcpu=power8 -mtune=power8 -mvsx -fno-fast-math
|
||||
else
|
||||
COMMON_OPT += -Ofast -mcpu=power8 -mtune=power8 -mvsx -malign-power -fno-fast-math
|
||||
FCOMMON_OPT += -O2 -frecursive -mcpu=power8 -mtune=power8 -malign-power -fno-fast-math
|
||||
CCOMMON_OPT += -fast -Mvect=simd -Mcache_align
|
||||
endif
|
||||
ifneq ($(F_COMPILER), PGI)
|
||||
ifeq ($(OSNAME), AIX)
|
||||
FCOMMON_OPT += -O1 -frecursive -mcpu=power8 -mtune=power8 -fno-fast-math
|
||||
else
|
||||
FCOMMON_OPT += -O2 -frecursive -mcpu=power8 -mtune=power8 -fno-fast-math
|
||||
endif
|
||||
else
|
||||
FCOMMON_OPT += -O2 -Mrecursive
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(USE_OPENMP), 1)
|
||||
ifneq ($(C_COMPILER), PGI)
|
||||
CCOMMON_OPT += -DUSE_OPENMP -fopenmp
|
||||
else
|
||||
CCOMMON_OPT += -DUSE_OPENMP -mp
|
||||
endif
|
||||
ifneq ($(F_COMPILER), PGI)
|
||||
FCOMMON_OPT += -DUSE_OPENMP -fopenmp
|
||||
else
|
||||
FCOMMON_OPT += -DUSE_OPENMP -mp
|
||||
endif
|
||||
endif
|
||||
|
||||
# workaround for C->FORTRAN ABI violation in LAPACKE
|
||||
ifeq ($(F_COMPILER), GFORTRAN)
|
||||
FCOMMON_OPT += -fno-optimize-sibling-calls
|
||||
endif
|
||||
|
||||
FLAMEPATH = $(HOME)/flame/lib
|
||||
|
||||
@@ -56,6 +108,9 @@ CCOMMON_OPT += -mpowerpc64 -maix64
|
||||
ifeq ($(COMPILER_F77), g77)
|
||||
FCOMMON_OPT += -mpowerpc64 -maix64
|
||||
endif
|
||||
ifeq ($(F_COMPILER), GFORTRAN)
|
||||
FCOMMON_OPT += -mpowerpc64 -maix64
|
||||
endif
|
||||
ifeq ($(COMPILER_F77), xlf)
|
||||
FCOMMON_OPT += -q64
|
||||
endif
|
||||
|
||||
@@ -17,7 +17,11 @@ ifdef CPUIDEMU
|
||||
EXFLAGS = -DCPUIDEMU -DVENDOR=99
|
||||
endif
|
||||
|
||||
ifeq ($(TARGET), 1004K)
|
||||
ifeq ($(TARGET), MIPS24K)
|
||||
TARGET_FLAGS = -mips32r2
|
||||
endif
|
||||
|
||||
ifeq ($(TARGET), MIPS1004K)
|
||||
TARGET_FLAGS = -mips32r2
|
||||
endif
|
||||
|
||||
@@ -42,7 +46,7 @@ all: getarch_2nd
|
||||
./getarch_2nd 1 >> $(TARGET_CONF)
|
||||
|
||||
config.h : c_check f_check getarch
|
||||
perl ./c_check $(TARGET_MAKE) $(TARGET_CONF) $(CC) $(TARGET_FLAGS)
|
||||
perl ./c_check $(TARGET_MAKE) $(TARGET_CONF) $(CC) $(TARGET_FLAGS) $(CFLAGS)
|
||||
ifneq ($(ONLY_CBLAS), 1)
|
||||
perl ./f_check $(TARGET_MAKE) $(TARGET_CONF) $(FC) $(TARGET_FLAGS)
|
||||
else
|
||||
@@ -59,13 +63,13 @@ endif
|
||||
|
||||
|
||||
getarch : getarch.c cpuid.S dummy $(CPUIDEMU)
|
||||
$(HOSTCC) $(CFLAGS) $(EXFLAGS) -o $(@F) getarch.c cpuid.S $(CPUIDEMU)
|
||||
$(HOSTCC) $(HOST_CFLAGS) $(EXFLAGS) -o $(@F) getarch.c cpuid.S $(CPUIDEMU)
|
||||
|
||||
getarch_2nd : getarch_2nd.c config.h dummy
|
||||
ifndef TARGET_CORE
|
||||
$(HOSTCC) -I. $(CFLAGS) -o $(@F) getarch_2nd.c
|
||||
$(HOSTCC) -I. $(HOST_CFLAGS) -o $(@F) getarch_2nd.c
|
||||
else
|
||||
$(HOSTCC) -I. $(CFLAGS) -DBUILD_KERNEL -o $(@F) getarch_2nd.c
|
||||
$(HOSTCC) -I. $(HOST_CFLAGS) -DBUILD_KERNEL -o $(@F) getarch_2nd.c
|
||||
endif
|
||||
|
||||
dummy:
|
||||
|
||||
106
Makefile.rule
106
Makefile.rule
@@ -3,7 +3,7 @@
|
||||
#
|
||||
|
||||
# This library's version
|
||||
VERSION = 0.3.5
|
||||
VERSION = 0.3.10.dev
|
||||
|
||||
# If you set the suffix, the library name will be libopenblas_$(LIBNAMESUFFIX).a
|
||||
# and libopenblas_$(LIBNAMESUFFIX).so. Meanwhile, the soname in shared library
|
||||
@@ -48,6 +48,8 @@ VERSION = 0.3.5
|
||||
# HOSTCC = gcc
|
||||
|
||||
# If you need 32bit binary, define BINARY=32, otherwise define BINARY=64
|
||||
# Please note that AVX is not available on 32-bit.
|
||||
# Setting BINARY=32 disables AVX/AVX2/AVX-512.
|
||||
# BINARY=64
|
||||
|
||||
# About threaded BLAS. It will be automatically detected if you don't
|
||||
@@ -56,8 +58,14 @@ VERSION = 0.3.5
|
||||
# For force setting for multi threaded, specify USE_THREAD = 1
|
||||
# USE_THREAD = 0
|
||||
|
||||
# If you want to build a single-threaded OpenBLAS, but expect to call this
|
||||
# from several concurrent threads in some other program, comment this in for
|
||||
# thread safety. (This is done automatically for USE_THREAD=1 , and should not
|
||||
# be necessary when USE_OPENMP=1)
|
||||
# USE_LOCKING = 1
|
||||
|
||||
# If you're going to use this library with OpenMP, please comment it in.
|
||||
# This flag is always set for POWER8. Don't modify the flag
|
||||
# This flag is always set for POWER8. Don't set USE_OPENMP = 0 if you're targeting POWER8.
|
||||
# USE_OPENMP = 1
|
||||
|
||||
# The OpenMP scheduler to use - by default this is "static" and you
|
||||
@@ -68,36 +76,54 @@ VERSION = 0.3.5
|
||||
# allow you to select the scheduler from the environment variable OMP_SCHEDULE
|
||||
# CCOMMON_OPT += -DOMP_SCHED=dynamic
|
||||
|
||||
# You can define maximum number of threads. Basically it should be
|
||||
# less than actual number of cores. If you don't specify one, it's
|
||||
# automatically detected by the the script.
|
||||
# You can define the maximum number of threads. Basically it should be less
|
||||
# than or equal to the number of CPU threads. If you don't specify one, it's
|
||||
# automatically detected by the build system.
|
||||
# If SMT (aka. HT) is enabled on the system, it may or may not be beneficial to
|
||||
# restrict NUM_THREADS to the number of physical cores. By default, the automatic
|
||||
# detection includes logical CPUs, thus allowing the use of SMT.
|
||||
# Users may opt at runtime to use less than NUM_THREADS threads.
|
||||
#
|
||||
# Note for package maintainers: you can build OpenBLAS with a large NUM_THREADS
|
||||
# value (eg. 32-256) if you expect your users to use that many threads. Due to the way
|
||||
# some internal structures are allocated, using a large NUM_THREADS value has a RAM
|
||||
# footprint penalty, even if users reduce the actual number of threads at runtime.
|
||||
# NUM_THREADS = 24
|
||||
|
||||
# If you have enabled USE_OPENMP and your application would call
|
||||
# OpenBLAS's calculation API from multi threads, please comment it in.
|
||||
# This flag defines how many instances of OpenBLAS's calculation API can
|
||||
# actually run in parallel. If more threads call OpenBLAS's calculation API,
|
||||
# OpenBLAS's calculation API from multiple threads, please comment this in.
|
||||
# This flag defines how many instances of OpenBLAS's calculation API can actually
|
||||
# run in parallel. If more than NUM_PARALLEL threads call OpenBLAS's calculation API,
|
||||
# they need to wait for the preceding API calls to finish or risk data corruption.
|
||||
# NUM_PARALLEL = 2
|
||||
|
||||
# if you don't need to install the static library, please comment it in.
|
||||
# When multithreading, OpenBLAS needs to use a memory buffer for communicating
|
||||
# and collating results for individual subranges of the original matrix. Since
|
||||
# the original GotoBLAS of the early 2000s, the default size of this buffer has
|
||||
# been set at a value of 32<<20 (which is 32MB) on x86_64 , twice that on PPC.
|
||||
# If you expect to handle large problem sizes (beyond about 30000x30000) uncomment
|
||||
# this line and adjust the (32<<n) factor if necessary. Usually an insufficient value
|
||||
# manifests itself as a crash in the relevant scal kernel (sscal_k, dscal_k etc)
|
||||
# BUFFERSIZE = 25
|
||||
|
||||
# If you don't need to install the static library, please comment this in.
|
||||
# NO_STATIC = 1
|
||||
|
||||
# if you don't need generate the shared library, please comment it in.
|
||||
# If you don't need to generate the shared library, please comment this in.
|
||||
# NO_SHARED = 1
|
||||
|
||||
# If you don't need CBLAS interface, please comment it in.
|
||||
# If you don't need the CBLAS interface, please comment this in.
|
||||
# NO_CBLAS = 1
|
||||
|
||||
# If you only want CBLAS interface without installing Fortran compiler,
|
||||
# please comment it in.
|
||||
# If you only want the CBLAS interface without installing a Fortran compiler,
|
||||
# please comment this in.
|
||||
# ONLY_CBLAS = 1
|
||||
|
||||
# If you don't need LAPACK, please comment it in.
|
||||
# If you set NO_LAPACK=1, the library automatically sets NO_LAPACKE=1.
|
||||
# If you don't need LAPACK, please comment this in.
|
||||
# If you set NO_LAPACK=1, the build system automatically sets NO_LAPACKE=1.
|
||||
# NO_LAPACK = 1
|
||||
|
||||
# If you don't need LAPACKE (C Interface to LAPACK), please comment it in.
|
||||
# If you don't need LAPACKE (C Interface to LAPACK), please comment this in.
|
||||
# NO_LAPACKE = 1
|
||||
|
||||
# Build LAPACK Deprecated functions since LAPACK 3.6.0
|
||||
@@ -106,7 +132,7 @@ BUILD_LAPACK_DEPRECATED = 1
|
||||
# Build RecursiveLAPACK on top of LAPACK
|
||||
# BUILD_RELAPACK = 1
|
||||
|
||||
# If you want to use legacy threaded Level 3 implementation.
|
||||
# If you want to use the legacy threaded Level 3 implementation.
|
||||
# USE_SIMPLE_THREADED_LEVEL3 = 1
|
||||
|
||||
# If you want to use the new, still somewhat experimental code that uses
|
||||
@@ -116,8 +142,8 @@ BUILD_LAPACK_DEPRECATED = 1
|
||||
# USE_TLS = 1
|
||||
|
||||
# If you want to drive whole 64bit region by BLAS. Not all Fortran
|
||||
# compiler supports this. It's safe to keep comment it out if you
|
||||
# are not sure(equivalent to "-i8" option).
|
||||
# compilers support this. It's safe to keep this commented out if you
|
||||
# are not sure. (This is equivalent to the "-i8" ifort option).
|
||||
# INTERFACE64 = 1
|
||||
|
||||
# Unfortunately most of kernel won't give us high quality buffer.
|
||||
@@ -125,10 +151,18 @@ BUILD_LAPACK_DEPRECATED = 1
|
||||
# but it will consume time. If you don't like it, you can disable one.
|
||||
NO_WARMUP = 1
|
||||
|
||||
# If you want to disable CPU/Memory affinity on Linux.
|
||||
# Comment this in if you want to disable OpenBLAS's CPU/Memory affinity handling.
|
||||
# This feature is only implemented on Linux, and is always disabled on other platforms.
|
||||
# Enabling affinity handling may improve performance, especially on NUMA systems, but
|
||||
# it may conflict with certain applications that also try to manage affinity.
|
||||
# This conflict can result in threads of the application calling OpenBLAS ending up locked
|
||||
# to the same core(s) as OpenBLAS, possibly binding all threads to a single core.
|
||||
# For this reason, affinity handling is disabled by default. Can be safely enabled if nothing
|
||||
# else modifies affinity settings.
|
||||
# Note: enabling affinity has been known to cause problems with NumPy and R
|
||||
NO_AFFINITY = 1
|
||||
|
||||
# if you are compiling for Linux and you have more than 16 numa nodes or more than 256 cpus
|
||||
# If you are compiling for Linux and you have more than 16 numa nodes or more than 256 cpus
|
||||
# BIGNUMA = 1
|
||||
|
||||
# Don't use AVX kernel on Sandy Bridge. It is compatible with old compilers
|
||||
@@ -138,6 +172,10 @@ NO_AFFINITY = 1
|
||||
# Don't use Haswell optimizations if binutils is too old (e.g. RHEL6)
|
||||
# NO_AVX2 = 1
|
||||
|
||||
# Don't use SkylakeX optimizations if binutils or compiler are too old (the build
|
||||
# system will try to determine this automatically)
|
||||
# NO_AVX512 = 1
|
||||
|
||||
# Don't use parallel make.
|
||||
# NO_PARALLEL_MAKE = 1
|
||||
|
||||
@@ -162,17 +200,17 @@ NO_AFFINITY = 1
|
||||
# time out to improve performance. This number should be from 4 to 30
|
||||
# which corresponds to (1 << n) cycles. For example, if you set to 26,
|
||||
# thread will be running for (1 << 26) cycles(about 25ms on 3.0GHz
|
||||
# system). Also you can control this mumber by THREAD_TIMEOUT
|
||||
# system). Also you can control this number by THREAD_TIMEOUT
|
||||
# CCOMMON_OPT += -DTHREAD_TIMEOUT=26
|
||||
|
||||
# Using special device driver for mapping physically contigous memory
|
||||
# Using special device driver for mapping physically contiguous memory
|
||||
# to the user space. If bigphysarea is enabled, it will use it.
|
||||
# DEVICEDRIVER_ALLOCATION = 1
|
||||
|
||||
# If you need to synchronize FP CSR between threads (for x86/x86_64 only).
|
||||
# CONSISTENT_FPCSR = 1
|
||||
|
||||
# If any gemm arguement m, n or k is less or equal this threshold, gemm will be execute
|
||||
# If any gemm argument m, n or k is less or equal this threshold, gemm will be execute
|
||||
# with single thread. (Actually in recent versions this is a factor proportional to the
|
||||
# number of floating point operations necessary for the given problem size, no longer
|
||||
# an individual dimension). You can use this setting to avoid the overhead of multi-
|
||||
@@ -180,7 +218,7 @@ NO_AFFINITY = 1
|
||||
# been reported to be optimal for certain workloads (50 is the recommended value for Julia).
|
||||
# GEMM_MULTITHREAD_THRESHOLD = 4
|
||||
|
||||
# If you need santy check by comparing reference BLAS. It'll be very
|
||||
# If you need sanity check by comparing results to reference BLAS. It'll be very
|
||||
# slow (Not implemented yet).
|
||||
# SANITY_CHECK = 1
|
||||
|
||||
@@ -220,6 +258,24 @@ COMMON_PROF = -pg
|
||||
# SYMBOLPREFIX=
|
||||
# SYMBOLSUFFIX=
|
||||
|
||||
# Run a C++ based thread safety tester after the build is done.
|
||||
# This is mostly intended as a developer feature to spot regressions, but users and
|
||||
# package maintainers can enable this if they have doubts about the thread safety of
|
||||
# the library, given the configuration in this file.
|
||||
# By default, the thread safety tester launches 52 concurrent calculations at the same
|
||||
# time.
|
||||
#
|
||||
# Please note that the test uses ~1300 MiB of RAM for the DGEMM test.
|
||||
#
|
||||
# The test requires CBLAS to be built, a C++11 capable compiler and the presence of
|
||||
# an OpenMP implementation. If you are cross-compiling this test will probably not
|
||||
# work at all.
|
||||
#
|
||||
# CPP_THREAD_SAFETY_TEST = 1
|
||||
|
||||
|
||||
# If you want to enable the experimental BFLOAT16 support
|
||||
# BUILD_HALF = 1
|
||||
#
|
||||
# End of user configuration
|
||||
#
|
||||
|
||||
237
Makefile.system
237
Makefile.system
@@ -9,15 +9,30 @@ ifndef TOPDIR
|
||||
TOPDIR = .
|
||||
endif
|
||||
|
||||
# If ARCH is not set, we use the host system's architecture for getarch compile options.
|
||||
ifndef ARCH
|
||||
HOSTARCH := $(shell uname -m)
|
||||
else
|
||||
HOSTARCH = $(ARCH)
|
||||
endif
|
||||
|
||||
# Catch conflicting usage of ARCH in some BSD environments
|
||||
ifeq ($(ARCH), amd64)
|
||||
override ARCH=x86_64
|
||||
else ifeq ($(ARCH), powerpc64)
|
||||
override ARCH=power
|
||||
else ifeq ($(ARCH), powerpc)
|
||||
override ARCH=power
|
||||
else ifeq ($(ARCH), i386)
|
||||
override ARCH=x86
|
||||
else ifeq ($(ARCH), armv6)
|
||||
override ARCH=arm
|
||||
else ifeq ($(ARCH), armv7)
|
||||
override ARCH=arm
|
||||
else ifeq ($(ARCH), aarch64)
|
||||
override ARCH=arm64
|
||||
else ifeq ($(ARCH), zarch)
|
||||
override ARCH=zarch
|
||||
endif
|
||||
|
||||
NETLIB_LAPACK_DIR = $(TOPDIR)/lapack-netlib
|
||||
@@ -65,6 +80,7 @@ endif
|
||||
|
||||
ifdef TARGET
|
||||
GETARCH_FLAGS := -DFORCE_$(TARGET)
|
||||
GETARCH_FLAGS += -DUSER_TARGET
|
||||
endif
|
||||
|
||||
# Force fallbacks for 32bit
|
||||
@@ -76,6 +92,9 @@ endif
|
||||
ifeq ($(TARGET), SKYLAKEX)
|
||||
GETARCH_FLAGS := -DFORCE_NEHALEM
|
||||
endif
|
||||
ifeq ($(TARGET), COOPERLAKE)
|
||||
GETARCH_FLAGS := -DFORCE_NEHALEM
|
||||
endif
|
||||
ifeq ($(TARGET), SANDYBRIDGE)
|
||||
GETARCH_FLAGS := -DFORCE_NEHALEM
|
||||
endif
|
||||
@@ -94,6 +113,12 @@ endif
|
||||
ifeq ($(TARGET), ZEN)
|
||||
GETARCH_FLAGS := -DFORCE_BARCELONA
|
||||
endif
|
||||
ifeq ($(TARGET), ARMV8)
|
||||
GETARCH_FLAGS := -DFORCE_ARMV7
|
||||
endif
|
||||
ifeq ($(TARGET), POWER8)
|
||||
GETARCH_FLAGS := -DFORCE_POWER6
|
||||
endif
|
||||
endif
|
||||
|
||||
|
||||
@@ -112,6 +137,9 @@ endif
|
||||
ifeq ($(TARGET_CORE), SKYLAKEX)
|
||||
GETARCH_FLAGS := -DFORCE_NEHALEM
|
||||
endif
|
||||
ifeq ($(TARGET_CORE), COOPERLAKE)
|
||||
GETARCH_FLAGS := -DFORCE_NEHALEM
|
||||
endif
|
||||
ifeq ($(TARGET_CORE), SANDYBRIDGE)
|
||||
GETARCH_FLAGS := -DFORCE_NEHALEM
|
||||
endif
|
||||
@@ -133,7 +161,12 @@ endif
|
||||
endif
|
||||
|
||||
|
||||
|
||||
# On x86_64 build getarch with march=native unless the compiler is PGI. This is required to detect AVX512 support in getarch.
|
||||
ifeq ($(HOSTARCH), x86_64)
|
||||
ifeq ($(findstring pgcc,$(HOSTCC)),)
|
||||
GETARCH_FLAGS += -march=native
|
||||
endif
|
||||
endif
|
||||
|
||||
ifdef INTERFACE64
|
||||
ifneq ($(INTERFACE64), 0)
|
||||
@@ -151,7 +184,8 @@ GETARCH_FLAGS += -DNO_AVX
|
||||
endif
|
||||
|
||||
ifeq ($(BINARY), 32)
|
||||
GETARCH_FLAGS += -DNO_AVX
|
||||
GETARCH_FLAGS += -DNO_AVX -DNO_AVX2 -DNO_AVX512
|
||||
NO_AVX512 = 1
|
||||
endif
|
||||
|
||||
ifeq ($(NO_AVX2), 1)
|
||||
@@ -190,12 +224,17 @@ else
|
||||
ONLY_CBLAS = 0
|
||||
endif
|
||||
|
||||
#For small matrix optimization
|
||||
ifeq ($(SMALL_MATRIX_OPT), 1)
|
||||
CCOMMON_OPT += -DSMALL_MATRIX_OPT
|
||||
endif
|
||||
|
||||
# This operation is expensive, so execution should be once.
|
||||
ifndef GOTOBLAS_MAKEFILE
|
||||
export GOTOBLAS_MAKEFILE = 1
|
||||
|
||||
# Generating Makefile.conf and config.h
|
||||
DUMMY := $(shell $(MAKE) -C $(TOPDIR) -f Makefile.prebuild CC="$(CC)" FC="$(FC)" HOSTCC="$(HOSTCC)" CFLAGS="$(GETARCH_FLAGS)" BINARY=$(BINARY) USE_OPENMP=$(USE_OPENMP) TARGET_CORE=$(TARGET_CORE) ONLY_CBLAS=$(ONLY_CBLAS) TARGET=$(TARGET) all)
|
||||
DUMMY := $(shell $(MAKE) -C $(TOPDIR) -f Makefile.prebuild CC="$(CC)" FC="$(FC)" HOSTCC="$(HOSTCC)" HOST_CFLAGS="$(GETARCH_FLAGS)" CFLAGS="$(CFLAGS)" BINARY=$(BINARY) USE_OPENMP=$(USE_OPENMP) TARGET_CORE=$(TARGET_CORE) ONLY_CBLAS=$(ONLY_CBLAS) TARGET=$(TARGET) all)
|
||||
|
||||
ifndef TARGET_CORE
|
||||
include $(TOPDIR)/Makefile.conf
|
||||
@@ -232,25 +271,50 @@ SMP = 1
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(SMP), 1)
|
||||
USE_LOCKING =
|
||||
endif
|
||||
|
||||
ifndef NEED_PIC
|
||||
NEED_PIC = 1
|
||||
endif
|
||||
|
||||
ARFLAGS =
|
||||
CPP = $(COMPILER) -E
|
||||
AR = $(CROSS_SUFFIX)ar
|
||||
AS = $(CROSS_SUFFIX)as
|
||||
LD = $(CROSS_SUFFIX)ld
|
||||
RANLIB = $(CROSS_SUFFIX)ranlib
|
||||
AR ?= $(CROSS_SUFFIX)ar
|
||||
AS ?= $(CROSS_SUFFIX)as
|
||||
LD ?= $(CROSS_SUFFIX)ld
|
||||
RANLIB ?= $(CROSS_SUFFIX)ranlib
|
||||
NM = $(CROSS_SUFFIX)nm
|
||||
DLLWRAP = $(CROSS_SUFFIX)dllwrap
|
||||
OBJCOPY = $(CROSS_SUFFIX)objcopy
|
||||
OBJCONV = $(CROSS_SUFFIX)objconv
|
||||
|
||||
|
||||
# For detect fortran failed, only build BLAS.
|
||||
# When fortran support was either not detected or actively deselected, only build BLAS.
|
||||
ifeq ($(NOFORTRAN), 1)
|
||||
NO_LAPACK = 1
|
||||
override FEXTRALIB =
|
||||
endif
|
||||
|
||||
ifeq ($(C_COMPILER), GCC)
|
||||
GCCVERSIONGTEQ4 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 4)
|
||||
GCCVERSIONGT4 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \> 4)
|
||||
GCCVERSIONEQ5 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` = 5)
|
||||
GCCVERSIONGT5 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \> 5)
|
||||
GCCVERSIONGTEQ7 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 7)
|
||||
GCCVERSIONGTEQ9 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 9)
|
||||
GCCVERSIONGTEQ11 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 11)
|
||||
GCCVERSIONGTEQ10 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 10)
|
||||
# Note that the behavior of -dumpversion is compile-time-configurable for
|
||||
# gcc-7.x and newer. Use -dumpfullversion there
|
||||
ifeq ($(GCCVERSIONGTEQ7),1)
|
||||
GCCDUMPVERSION_PARAM := -dumpfullversion
|
||||
else
|
||||
GCCDUMPVERSION_PARAM := -dumpversion
|
||||
endif
|
||||
GCCMINORVERSIONGTEQ2 := $(shell expr `$(CC) $(GCCDUMPVERSION_PARAM) | cut -f2 -d.` \>= 2)
|
||||
GCCMINORVERSIONGTEQ7 := $(shell expr `$(CC) $(GCCDUMPVERSION_PARAM) | cut -f2 -d.` \>= 7)
|
||||
endif
|
||||
|
||||
#
|
||||
@@ -299,13 +363,9 @@ ifeq ($(C_COMPILER), CLANG)
|
||||
CCOMMON_OPT += -DMS_ABI
|
||||
endif
|
||||
|
||||
ifeq ($(C_COMPILER), GCC)
|
||||
#Test for supporting MS_ABI
|
||||
GCCVERSIONGTEQ4 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 4)
|
||||
GCCVERSIONGT4 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \> 4)
|
||||
GCCMINORVERSIONGTEQ7 := $(shell expr `$(CC) -dumpversion | cut -f2 -d.` \>= 7)
|
||||
#Version tests for supporting specific features (MS_ABI, POWER9 intrinsics)
|
||||
ifeq ($(GCCVERSIONGT4), 1)
|
||||
# GCC Majar version > 4
|
||||
# GCC Major version > 4
|
||||
# It is compatible with MSVC ABI.
|
||||
CCOMMON_OPT += -DMS_ABI
|
||||
endif
|
||||
@@ -317,7 +377,6 @@ ifeq ($(GCCMINORVERSIONGTEQ7), 1)
|
||||
CCOMMON_OPT += -DMS_ABI
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
# Ensure the correct stack alignment on Win32
|
||||
# http://permalink.gmane.org/gmane.comp.lib.openblas.general/97
|
||||
@@ -383,6 +442,12 @@ ifneq ($(MAX_STACK_ALLOC), 0)
|
||||
CCOMMON_OPT += -DMAX_STACK_ALLOC=$(MAX_STACK_ALLOC)
|
||||
endif
|
||||
|
||||
ifdef USE_LOCKING
|
||||
ifneq ($(USE_LOCKING), 0)
|
||||
CCOMMON_OPT += -DUSE_LOCKING
|
||||
endif
|
||||
endif
|
||||
|
||||
#
|
||||
# Architecture dependent settings
|
||||
#
|
||||
@@ -503,7 +568,7 @@ DYNAMIC_CORE += HASWELL ZEN
|
||||
endif
|
||||
ifneq ($(NO_AVX512), 1)
|
||||
ifneq ($(NO_AVX2), 1)
|
||||
DYNAMIC_CORE += SKYLAKEX
|
||||
DYNAMIC_CORE += SKYLAKEX COOPERLAKE
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
@@ -518,9 +583,73 @@ endif
|
||||
|
||||
ifeq ($(ARCH), arm64)
|
||||
DYNAMIC_CORE = ARMV8
|
||||
DYNAMIC_CORE += CORTEXA53
|
||||
DYNAMIC_CORE += CORTEXA57
|
||||
DYNAMIC_CORE += CORTEXA72
|
||||
DYNAMIC_CORE += CORTEXA73
|
||||
DYNAMIC_CORE += NEOVERSEN1
|
||||
DYNAMIC_CORE += FALKOR
|
||||
DYNAMIC_CORE += THUNDERX
|
||||
DYNAMIC_CORE += THUNDERX2T99
|
||||
DYNAMIC_CORE += TSV110
|
||||
DYNAMIC_CORE += EMAG8180
|
||||
DYNAMIC_CORE += THUNDERX3T110
|
||||
endif
|
||||
|
||||
ifeq ($(ARCH), zarch)
|
||||
DYNAMIC_CORE = ZARCH_GENERIC
|
||||
|
||||
# Z13 is supported since gcc-5.2, gcc-6, and in RHEL 7.3 and newer
|
||||
ifeq ($(GCCVERSIONGT5), 1)
|
||||
ZARCH_SUPPORT_Z13 := 1
|
||||
else ifeq ($(GCCVERSIONEQ5), 1)
|
||||
ifeq ($(GCCMINORVERSIONGTEQ2), 1)
|
||||
ZARCH_SUPPORT_Z13 := 1
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(wildcard /etc/redhat-release), /etc/redhat-release)
|
||||
ifeq ($(shell source /etc/os-release ; expr $$VERSION_ID \>= "7.3"), 1)
|
||||
ZARCH_SUPPORT_Z13 := 1
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(ZARCH_SUPPORT_Z13), 1)
|
||||
DYNAMIC_CORE += Z13
|
||||
else
|
||||
$(info OpenBLAS: Not building Z13 kernels because gcc is older than 5.2 or 6.x)
|
||||
endif
|
||||
|
||||
ifeq ($(GCCVERSIONGTEQ7), 1)
|
||||
DYNAMIC_CORE += Z14
|
||||
else
|
||||
$(info OpenBLAS: Not building Z14 kernels because gcc is older than 7.x)
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(ARCH), power)
|
||||
DYNAMIC_CORE = POWER6
|
||||
DYNAMIC_CORE += POWER8
|
||||
ifneq ($(C_COMPILER), GCC)
|
||||
DYNAMIC_CORE += POWER9
|
||||
DYNAMIC_CORE += POWER10
|
||||
endif
|
||||
ifeq ($(C_COMPILER), GCC)
|
||||
ifeq ($(GCCVERSIONGT5), 1)
|
||||
DYNAMIC_CORE += POWER9
|
||||
else
|
||||
$(info, OpenBLAS: Your gcc version is too old to build the POWER9 kernels.)
|
||||
endif
|
||||
ifeq ($(GCCVERSIONGTEQ11), 1)
|
||||
DYNAMIC_CORE += POWER10
|
||||
else ifeq ($(GCCVERSIONGTEQ10), 1)
|
||||
ifeq ($(GCCMINORVERSIONGTEQ2), 1)
|
||||
DYNAMIC_CORE += POWER10
|
||||
endif
|
||||
else
|
||||
$(info, OpenBLAS: Your gcc version is too old to build the POWER10 kernels.)
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
# If DYNAMIC_CORE is not set, DYNAMIC_ARCH cannot do anything, so force it to empty
|
||||
@@ -575,6 +704,16 @@ endif
|
||||
ifeq ($(ARCH), arm64)
|
||||
NO_BINARY_MODE = 1
|
||||
BINARY_DEFINED = 1
|
||||
ifdef INTERFACE64
|
||||
ifneq ($(INTERFACE64), 0)
|
||||
ifeq ($(F_COMPILER), GFORTRAN)
|
||||
FCOMMON_OPT += -fdefault-integer-8
|
||||
endif
|
||||
ifeq ($(F_COMPILER), FLANG)
|
||||
FCOMMON_OPT += -i8
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
|
||||
@@ -620,7 +759,12 @@ CCOMMON_OPT += -march=mips64
|
||||
FCOMMON_OPT += -march=mips64
|
||||
endif
|
||||
|
||||
ifeq ($(CORE), 1004K)
|
||||
ifeq ($(CORE), MIPS24K)
|
||||
CCOMMON_OPT += -mips32r2 -mtune=24kc $(MSA_FLAGS)
|
||||
FCOMMON_OPT += -mips32r2 -mtune=24kc $(MSA_FLAGS)
|
||||
endif
|
||||
|
||||
ifeq ($(CORE), MIPS1004K)
|
||||
CCOMMON_OPT += -mips32r2 $(MSA_FLAGS)
|
||||
FCOMMON_OPT += -mips32r2 $(MSA_FLAGS)
|
||||
endif
|
||||
@@ -665,7 +809,18 @@ endif
|
||||
|
||||
ifeq ($(C_COMPILER), PGI)
|
||||
ifdef BINARY64
|
||||
CCOMMON_OPT += -tp p7-64
|
||||
ifeq ($(ARCH), x86_64)
|
||||
CCOMMON_OPT += -tp p7-64 -D__MMX__ -Mnollvm
|
||||
else
|
||||
ifeq ($(ARCH), power)
|
||||
ifeq ($(CORE), POWER8)
|
||||
CCOMMON_OPT += -tp pwr8
|
||||
endif
|
||||
ifeq ($(CORE), POWER9)
|
||||
CCOMMON_OPT += -tp pwr9
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
else
|
||||
CCOMMON_OPT += -tp p7
|
||||
endif
|
||||
@@ -685,6 +840,15 @@ endif
|
||||
|
||||
ifeq ($(F_COMPILER), FLANG)
|
||||
CCOMMON_OPT += -DF_INTERFACE_FLANG
|
||||
FCOMMON_OPT += -Mrecursive -Kieee
|
||||
ifeq ($(OSNAME), Linux)
|
||||
ifeq ($(ARCH), x86_64)
|
||||
FLANG_VENDOR := $(shell expr `$(FC) --version|cut -f 1 -d "."|head -1`)
|
||||
ifeq ($(FLANG_VENDOR),AOCC)
|
||||
FCOMMON_OPT += -fno-unroll-loops
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
ifdef BINARY64
|
||||
ifdef INTERFACE64
|
||||
ifneq ($(INTERFACE64), 0)
|
||||
@@ -725,6 +889,9 @@ else
|
||||
FCOMMON_OPT += -m32
|
||||
endif
|
||||
endif
|
||||
ifneq ($(NO_LAPACKE), 1)
|
||||
FCOMMON_OPT += -fno-second-underscore
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
@@ -733,6 +900,8 @@ CCOMMON_OPT += -DF_INTERFACE_GFORT
|
||||
FCOMMON_OPT += -Wall
|
||||
# make single-threaded LAPACK calls thread-safe #1847
|
||||
FCOMMON_OPT += -frecursive
|
||||
# work around ABI problem with passing single-character arguments
|
||||
FCOMMON_OPT += -fno-optimize-sibling-calls
|
||||
#Don't include -lgfortran, when NO_LAPACK=1 or lsbcc
|
||||
ifneq ($(NO_LAPACK), 1)
|
||||
EXTRALIB += -lgfortran
|
||||
@@ -775,6 +944,7 @@ ifneq ($(INTERFACE64), 0)
|
||||
FCOMMON_OPT += -i8
|
||||
endif
|
||||
endif
|
||||
FCOMMON_OPT += -recursive -fp-model strict -assume protect-parens
|
||||
ifeq ($(USE_OPENMP), 1)
|
||||
FCOMMON_OPT += -fopenmp
|
||||
endif
|
||||
@@ -814,10 +984,22 @@ ifneq ($(INTERFACE64), 0)
|
||||
FCOMMON_OPT += -i8
|
||||
endif
|
||||
endif
|
||||
ifeq ($(ARCH), x86_64)
|
||||
FCOMMON_OPT += -tp p7-64
|
||||
else
|
||||
ifeq ($(ARCH), power)
|
||||
ifeq ($(CORE), POWER8)
|
||||
FCOMMON_OPT += -tp pwr8
|
||||
endif
|
||||
ifeq ($(CORE), POWER9)
|
||||
FCOMMON_OPT += -tp pwr9
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
else
|
||||
FCOMMON_OPT += -tp p7
|
||||
endif
|
||||
FCOMMON_OPT += -Mrecursive
|
||||
ifeq ($(USE_OPENMP), 1)
|
||||
FCOMMON_OPT += -mp
|
||||
endif
|
||||
@@ -1038,10 +1220,14 @@ ifdef USE_SIMPLE_THREADED_LEVEL3
|
||||
CCOMMON_OPT += -DUSE_SIMPLE_THREADED_LEVEL3
|
||||
endif
|
||||
|
||||
ifdef USE_TLS
|
||||
ifeq ($(USE_TLS), 1)
|
||||
CCOMMON_OPT += -DUSE_TLS
|
||||
endif
|
||||
|
||||
ifeq ($(BUILD_HALF), 1)
|
||||
CCOMMON_OPT += -DBUILD_HALF
|
||||
endif
|
||||
|
||||
CCOMMON_OPT += -DVERSION=\"$(VERSION)\"
|
||||
|
||||
ifndef SYMBOLPREFIX
|
||||
@@ -1068,6 +1254,9 @@ KERNELDIR = $(TOPDIR)/kernel/$(ARCH)
|
||||
|
||||
include $(TOPDIR)/Makefile.$(ARCH)
|
||||
|
||||
ifneq ($(C_COMPILER), PGI)
|
||||
CCOMMON_OPT += -UASMNAME -UASMFNAME -UNAME -UCNAME -UCHAR_NAME -UCHAR_CNAME
|
||||
endif
|
||||
CCOMMON_OPT += -DASMNAME=$(FU)$(*F) -DASMFNAME=$(FU)$(*F)$(BU) -DNAME=$(*F)$(BU) -DCNAME=$(*F) -DCHAR_NAME=\"$(*F)$(BU)\" -DCHAR_CNAME=\"$(*F)\"
|
||||
|
||||
ifeq ($(CORE), PPC440)
|
||||
@@ -1091,8 +1280,12 @@ endif
|
||||
endif
|
||||
|
||||
ifdef NO_AFFINITY
|
||||
ifeq ($(NO_AFFINITY), 0)
|
||||
override undefine NO_AFFINITY
|
||||
else
|
||||
CCOMMON_OPT += -DNO_AFFINITY
|
||||
endif
|
||||
endif
|
||||
|
||||
ifdef FUNCTION_PROFILE
|
||||
CCOMMON_OPT += -DFUNCTION_PROFILE
|
||||
@@ -1156,7 +1349,6 @@ endif
|
||||
|
||||
override CFLAGS += $(COMMON_OPT) $(CCOMMON_OPT) -I$(TOPDIR)
|
||||
override PFLAGS += $(COMMON_OPT) $(CCOMMON_OPT) -I$(TOPDIR) -DPROFILE $(COMMON_PROF)
|
||||
|
||||
override FFLAGS += $(COMMON_OPT) $(FCOMMON_OPT)
|
||||
override FPFLAGS += $(FCOMMON_OPT) $(COMMON_PROF)
|
||||
#MAKEOVERRIDES =
|
||||
@@ -1262,6 +1454,8 @@ export OSNAME
|
||||
export ARCH
|
||||
export CORE
|
||||
export LIBCORE
|
||||
export __BYTE_ORDER__
|
||||
export ELF_VERSION
|
||||
export PGCPATH
|
||||
export CONFIG
|
||||
export CC
|
||||
@@ -1307,7 +1501,10 @@ export KERNELDIR
|
||||
export FUNCTION_PROFILE
|
||||
export TARGET_CORE
|
||||
export NO_AVX512
|
||||
export BUILD_HALF
|
||||
|
||||
export SHGEMM_UNROLL_M
|
||||
export SHGEMM_UNROLL_N
|
||||
export SGEMM_UNROLL_M
|
||||
export SGEMM_UNROLL_N
|
||||
export DGEMM_UNROLL_M
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
SHBLASOBJS_P = $(SHBLASOBJS:.$(SUFFIX)=.$(PSUFFIX))
|
||||
SBLASOBJS_P = $(SBLASOBJS:.$(SUFFIX)=.$(PSUFFIX))
|
||||
DBLASOBJS_P = $(DBLASOBJS:.$(SUFFIX)=.$(PSUFFIX))
|
||||
QBLASOBJS_P = $(QBLASOBJS:.$(SUFFIX)=.$(PSUFFIX))
|
||||
@@ -9,8 +10,8 @@ COMMONOBJS_P = $(COMMONOBJS:.$(SUFFIX)=.$(PSUFFIX))
|
||||
|
||||
HPLOBJS_P = $(HPLOBJS:.$(SUFFIX)=.$(PSUFFIX))
|
||||
|
||||
BLASOBJS = $(SBLASOBJS) $(DBLASOBJS) $(CBLASOBJS) $(ZBLASOBJS)
|
||||
BLASOBJS_P = $(SBLASOBJS_P) $(DBLASOBJS_P) $(CBLASOBJS_P) $(ZBLASOBJS_P)
|
||||
BLASOBJS = $(SHBLASOBJS) $(SBLASOBJS) $(DBLASOBJS) $(CBLASOBJS) $(ZBLASOBJS)
|
||||
BLASOBJS_P = $(SHBLASOBJS_P) $(SBLASOBJS_P) $(DBLASOBJS_P) $(CBLASOBJS_P) $(ZBLASOBJS_P)
|
||||
|
||||
ifdef EXPRECISION
|
||||
BLASOBJS += $(QBLASOBJS) $(XBLASOBJS)
|
||||
@@ -22,6 +23,7 @@ BLASOBJS += $(QBLASOBJS) $(XBLASOBJS)
|
||||
BLASOBJS_P += $(QBLASOBJS_P) $(XBLASOBJS_P)
|
||||
endif
|
||||
|
||||
$(SHBLASOBJS) $(SHBLASOBJS_P) : override CFLAGS += -DHALF -UDOUBLE -UCOMPLEX
|
||||
$(SBLASOBJS) $(SBLASOBJS_P) : override CFLAGS += -UDOUBLE -UCOMPLEX
|
||||
$(DBLASOBJS) $(DBLASOBJS_P) : override CFLAGS += -DDOUBLE -UCOMPLEX
|
||||
$(QBLASOBJS) $(QBLASOBJS_P) : override CFLAGS += -DXDOUBLE -UCOMPLEX
|
||||
@@ -29,6 +31,7 @@ $(CBLASOBJS) $(CBLASOBJS_P) : override CFLAGS += -UDOUBLE -DCOMPLEX
|
||||
$(ZBLASOBJS) $(ZBLASOBJS_P) : override CFLAGS += -DDOUBLE -DCOMPLEX
|
||||
$(XBLASOBJS) $(XBLASOBJS_P) : override CFLAGS += -DXDOUBLE -DCOMPLEX
|
||||
|
||||
$(SHBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF)
|
||||
$(SBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF)
|
||||
$(DBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF)
|
||||
$(QBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF)
|
||||
|
||||
@@ -15,10 +15,38 @@ CCOMMON_OPT += -march=skylake-avx512
|
||||
FCOMMON_OPT += -march=skylake-avx512
|
||||
ifeq ($(OSNAME), CYGWIN_NT)
|
||||
CCOMMON_OPT += -fno-asynchronous-unwind-tables
|
||||
FCOMMON_OPT += -fno-asynchronous-unwind-tables
|
||||
endif
|
||||
ifeq ($(OSNAME), WINNT)
|
||||
ifeq ($(C_COMPILER), GCC)
|
||||
CCOMMON_OPT += -fno-asynchronous-unwind-tables
|
||||
FCOMMON_OPT += -fno-asynchronous-unwind-tables
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(CORE), COOPERLAKE)
|
||||
ifndef DYNAMIC_ARCH
|
||||
ifndef NO_AVX512
|
||||
ifeq ($(C_COMPILER), GCC)
|
||||
# cooperlake support was added in 10.1
|
||||
GCCVERSIONGTEQ10 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 10)
|
||||
GCCMINORVERSIONGTEQ1 := $(shell expr `$(CC) -dumpversion | cut -f2 -d.` \>= 1)
|
||||
ifeq ($(GCCVERSIONGTEQ10)$(GCCMINORVERSIONGTEQ1), 11)
|
||||
CCOMMON_OPT += -march=cooperlake
|
||||
FCOMMON_OPT += -march=cooperlake
|
||||
endif
|
||||
endif
|
||||
ifeq ($(OSNAME), CYGWIN_NT)
|
||||
CCOMMON_OPT += -fno-asynchronous-unwind-tables
|
||||
FCOMMON_OPT += -fno-asynchronous-unwind-tables
|
||||
endif
|
||||
ifeq ($(OSNAME), WINNT)
|
||||
ifeq ($(C_COMPILER), GCC)
|
||||
CCOMMON_OPT += -fno-asynchronous-unwind-tables
|
||||
FCOMMON_OPT += -fno-asynchronous-unwind-tables
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
@@ -28,11 +56,25 @@ endif
|
||||
ifeq ($(CORE), HASWELL)
|
||||
ifndef DYNAMIC_ARCH
|
||||
ifndef NO_AVX2
|
||||
ifeq ($(C_COMPILER), GCC)
|
||||
# AVX2 support was added in 4.7.0
|
||||
GCCVERSIONGTEQ4 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 4)
|
||||
GCCMINORVERSIONGTEQ7 := $(shell expr `$(CC) -dumpversion | cut -f2 -d.` \>= 7)
|
||||
ifeq ($(GCCVERSIONGTEQ4)$(GCCMINORVERSIONGTEQ7), 11)
|
||||
CCOMMON_OPT += -mavx2
|
||||
endif
|
||||
endif
|
||||
ifeq ($(F_COMPILER), GFORTRAN)
|
||||
# AVX2 support was added in 4.7.0
|
||||
GCCVERSIONGTEQ4 := $(shell expr `$(FC) -dumpversion | cut -f1 -d.` \>= 4)
|
||||
GCCMINORVERSIONGTEQ7 := $(shell expr `$(FC) -dumpversion | cut -f2 -d.` \>= 7)
|
||||
ifeq ($(GCCVERSIONGTEQ4)$(GCCMINORVERSIONGTEQ7), 11)
|
||||
FCOMMON_OPT += -mavx2
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -4,3 +4,7 @@ CCOMMON_OPT += -march=z13 -mzvector
|
||||
FCOMMON_OPT += -march=z13 -mzvector
|
||||
endif
|
||||
|
||||
ifeq ($(CORE), Z14)
|
||||
CCOMMON_OPT += -march=z14 -mzvector -O3
|
||||
FCOMMON_OPT += -march=z14 -mzvector
|
||||
endif
|
||||
|
||||
77
README.md
77
README.md
@@ -6,11 +6,16 @@ Travis CI: [](https://ci.appveyor.com/project/xianyi/openblas/branch/develop)
|
||||
|
||||
Drone CI: [](https://cloud.drone.io/xianyi/OpenBLAS/)
|
||||
|
||||
[](https://dev.azure.com/xianyi/OpenBLAS/_build/latest?definitionId=1&branchName=develop)
|
||||
|
||||
|
||||
## Introduction
|
||||
|
||||
OpenBLAS is an optimized BLAS library based on GotoBLAS2 1.13 BSD version.
|
||||
|
||||
Please read the documentation on the OpenBLAS wiki pages: <http://github.com/xianyi/OpenBLAS/wiki>.
|
||||
Please read the documentation on the OpenBLAS wiki pages: <https://github.com/xianyi/OpenBLAS/wiki>.
|
||||
|
||||
## Binary Packages
|
||||
|
||||
@@ -22,8 +27,11 @@ You can download them from [file hosting on sourceforge.net](https://sourceforge
|
||||
|
||||
## Installation from Source
|
||||
|
||||
Download from project homepage, http://xianyi.github.com/OpenBLAS/, or check out the code
|
||||
using Git from https://github.com/xianyi/OpenBLAS.git.
|
||||
Download from project homepage, https://xianyi.github.com/OpenBLAS/, or check out the code
|
||||
using Git from https://github.com/xianyi/OpenBLAS.git. (If you want the most up to date version, be
|
||||
sure to use the develop branch - master is several years out of date due to a change of maintainership.)
|
||||
Buildtime parameters can be chosen in Makefile.rule, see there for a short description of each option.
|
||||
Most can also be given directly on the make or cmake command line.
|
||||
|
||||
### Dependencies
|
||||
|
||||
@@ -51,6 +59,10 @@ Examples:
|
||||
```sh
|
||||
make BINARY=64 CC=mips64el-unknown-linux-gnu-gcc FC=mips64el-unknown-linux-gnu-gfortran HOSTCC=gcc TARGET=LOONGSON3A
|
||||
```
|
||||
or same with the newer mips-crosscompiler put out by Loongson that defaults to the 32bit ABI:
|
||||
```sh
|
||||
make HOSTCC=gcc CC='/opt/mips-loongson-gcc7.3-linux-gnu/2019.06-29/bin/mips-linux-gnu-gcc -mabi=64' FC='/opt/mips-loongson-gcc7.3-linux-gnu/2019.06-29/bin/mips-linux-gnu-gfortran -mabi=64' TARGET=LOONGSON3A
|
||||
```
|
||||
|
||||
* On an x86 box, compile this library for a loongson3a CPU with loongcc (based on Open64) compiler:
|
||||
```sh
|
||||
@@ -63,9 +75,7 @@ A debug version can be built using `make DEBUG=1`.
|
||||
|
||||
### Compile with MASS support on Power CPU (optional)
|
||||
|
||||
The [IBM MASS](http://www-01.ibm.com/software/awdtools/mass/linux/mass-linux.html) library
|
||||
consists of a set of mathematical functions for C, C++, and Fortran applications that are
|
||||
are tuned for optimum performance on POWER architectures.
|
||||
The [IBM MASS](https://www.ibm.com/support/home/product/W511326D80541V01/other_software/mathematical_acceleration_subsystem) library consists of a set of mathematical functions for C, C++, and Fortran applications that are tuned for optimum performance on POWER architectures.
|
||||
OpenBLAS with MASS requires a 64-bit, little-endian OS on POWER.
|
||||
The library can be installed as shown:
|
||||
|
||||
@@ -101,7 +111,7 @@ The default installation directory is `/opt/OpenBLAS`.
|
||||
|
||||
## Supported CPUs and Operating Systems
|
||||
|
||||
Please read `GotoBLAS_01Readme.txt`.
|
||||
Please read `GotoBLAS_01Readme.txt` for older CPU models already supported by the 2010 GotoBLAS.
|
||||
|
||||
### Additional supported CPUs
|
||||
|
||||
@@ -109,12 +119,18 @@ Please read `GotoBLAS_01Readme.txt`.
|
||||
|
||||
- **Intel Xeon 56xx (Westmere)**: Used GotoBLAS2 Nehalem codes.
|
||||
- **Intel Sandy Bridge**: Optimized Level-3 and Level-2 BLAS with AVX on x86-64.
|
||||
- **Intel Haswell**: Optimized Level-3 and Level-2 BLAS with AVX2 and FMA on x86-64.
|
||||
- **Intel Skylake**: Optimized Level-3 and Level-2 BLAS with AVX512 and FMA on x86-64.
|
||||
- **Intel Haswell**: Optimized Level-3 and Level-2 BLAS with AVX2 and FMA on x86-64.
|
||||
- **Intel Skylake-X**: Optimized Level-3 and Level-2 BLAS with AVX512 and FMA on x86-64.
|
||||
- **AMD Bobcat**: Used GotoBLAS2 Barcelona codes.
|
||||
- **AMD Bulldozer**: x86-64 ?GEMM FMA4 kernels. (Thanks to Werner Saar)
|
||||
- **AMD PILEDRIVER**: Uses Bulldozer codes with some optimizations.
|
||||
- **AMD STEAMROLLER**: Uses Bulldozer codes with some optimizations.
|
||||
- **AMD ZEN**: Uses Haswell codes with some optimizations.
|
||||
|
||||
#### MIPS32
|
||||
|
||||
- **MIPS 1004K**: uses P5600 codes
|
||||
- **MIPS 24K**: uses P5600 codes
|
||||
|
||||
#### MIPS64
|
||||
|
||||
@@ -128,26 +144,57 @@ Please read `GotoBLAS_01Readme.txt`.
|
||||
|
||||
#### ARM64
|
||||
|
||||
- **ARMv8**: Experimental
|
||||
- **ARM Cortex-A57**: Experimental
|
||||
- **ARMv8**: Basic ARMV8 with small caches, optimized Level-3 and Level-2 BLAS
|
||||
- **Cortex-A53**: same as ARMV8 (different cpu specifications)
|
||||
- **Cortex A57**: Optimized Level-3 and Level-2 functions
|
||||
- **Cortex A72**: same as A57 ( different cpu specifications)
|
||||
- **Cortex A73**: same as A57 (different cpu specifications)
|
||||
- **Falkor**: same as A57 (different cpu specifications)
|
||||
- **ThunderX**: Optimized some Level-1 functions
|
||||
- **ThunderX2T99**: Optimized Level-3 BLAS and parts of Levels 1 and 2
|
||||
- **TSV110**: Optimized some Level-3 helper functions
|
||||
- **EMAG 8180**: preliminary support based on A57
|
||||
|
||||
#### PPC/PPC64
|
||||
|
||||
- **POWER8**: Optmized Level-3 BLAS and some Level-1, only with `USE_OPENMP=1`
|
||||
- **POWER8**: Optimized BLAS, only for PPC64LE (Little Endian), only with `USE_OPENMP=1`
|
||||
- **POWER9**: Optimized Level-3 BLAS (real) and some Level-1,2. PPC64LE with OpenMP only.
|
||||
|
||||
#### IBM zEnterprise System
|
||||
|
||||
- **Z13**: Optimized Level-3 BLAS and Level-1,2 (double precision)
|
||||
- **Z13**: Optimized Level-3 BLAS and Level-1,2
|
||||
- **Z14**: Optimized Level-3 BLAS and (single precision) Level-1,2
|
||||
|
||||
### Support for multiple targets in a single library
|
||||
|
||||
OpenBLAS can be built for multiple targets with runtime detection of the target cpu by specifiying DYNAMIC_ARCH=1 in Makefile.rule, on the gmake command line or as -DDYNAMIC_ARCH=TRUE in cmake.
|
||||
|
||||
For **x86_64**, the list of targets this activates contains Prescott, Core2, Nehalem, Barcelona, Sandybridge, Bulldozer, Piledriver, Steamroller, Excavator, Haswell, Zen, SkylakeX. For cpu generations not included in this list, the corresponding older model is used. If you also specify DYNAMIC_OLDER=1, specific support for Penryn, Dunnington, Opteron, Opteron/SSE3, Bobcat, Atom and Nano is added. Finally there is an option DYNAMIC_LIST that allows to specify an individual list of targets to include instead of the default.
|
||||
|
||||
DYNAMIC_ARCH is also supported on **x86**, where it translates to Katmai, Coppermine, Northwood, Prescott, Banias,
|
||||
Core2, Penryn, Dunnington, Nehalem, Athlon, Opteron, Opteron_SSE3, Barcelona, Bobcat, Atom and Nano.
|
||||
|
||||
On **ARMV8**, it enables support for CortexA53, CortexA57, CortexA72, CortexA73, Falkor, ThunderX, ThunderX2T99, TSV110 as well as generic ARMV8 cpus.
|
||||
|
||||
For **POWER**, the list encompasses POWER6, POWER8 and POWER9, on **ZARCH** it comprises Z13 and Z14.
|
||||
|
||||
The TARGET option can be used in conjunction with DYNAMIC_ARCH=1 to specify which cpu model should be assumed for all the
|
||||
common code in the library, usually you will want to set this to the oldest model you expect to encounter.
|
||||
Please note that it is not possible to combine support for different architectures, so no combined 32 and 64 bit or x86_64 and arm64 in the same library.
|
||||
|
||||
### Supported OS
|
||||
|
||||
- **GNU/Linux**
|
||||
- **MinGW or Visual Studio (CMake)/Windows**: Please read <https://github.com/xianyi/OpenBLAS/wiki/How-to-use-OpenBLAS-in-Microsoft-Visual-Studio>.
|
||||
- **Darwin/macOS**: Experimental. Although GotoBLAS2 supports Darwin, we are not macOS experts.
|
||||
- **Darwin/macOS/OSX/iOS**: Experimental. Although GotoBLAS2 already supports Darwin, we are not OSX/iOS experts.
|
||||
- **FreeBSD**: Supported by the community. We don't actively test the library on this OS.
|
||||
- **OpenBSD**: Supported by the community. We don't actively test the library on this OS.
|
||||
- **NetBSD**: Supported by the community. We don't actively test the library on this OS.
|
||||
- **DragonFly BSD**: Supported by the community. We don't actively test the library on this OS.
|
||||
- **Android**: Supported by the community. Please read <https://github.com/xianyi/OpenBLAS/wiki/How-to-build-OpenBLAS-for-Android>.
|
||||
- **AIX**: Supported on PPC up to POWER8
|
||||
- **Haiku**: Supported by the community. We don't actively test the library on this OS.
|
||||
- **SunOS**: Supported by the community. We don't actively test the library on this OS:
|
||||
|
||||
## Usage
|
||||
|
||||
@@ -202,7 +249,7 @@ Please see Changelog.txt to view the differences between OpenBLAS and GotoBLAS2
|
||||
* Please use Clang version 3.1 and above to compile the library on Sandy Bridge microarchitecture.
|
||||
Clang 3.0 will generate the wrong AVX binary code.
|
||||
* Please use GCC version 6 or LLVM version 6 and above to compile Skylake AVX512 kernels.
|
||||
* The number of CPUs/cores should less than or equal to 256. On Linux `x86_64` (`amd64`),
|
||||
* The number of CPUs/cores should be less than or equal to 256. On Linux `x86_64` (`amd64`),
|
||||
there is experimental support for up to 1024 CPUs/cores and 128 numa nodes if you build
|
||||
the library with `BIGNUMA=1`.
|
||||
* OpenBLAS does not set processor affinity by default.
|
||||
|
||||
@@ -22,6 +22,7 @@ SANDYBRIDGE
|
||||
HASWELL
|
||||
SKYLAKEX
|
||||
ATOM
|
||||
COOPERLAKE
|
||||
|
||||
b)AMD CPU:
|
||||
ATHLON
|
||||
@@ -48,6 +49,8 @@ POWER5
|
||||
POWER6
|
||||
POWER7
|
||||
POWER8
|
||||
POWER9
|
||||
POWER10
|
||||
PPCG4
|
||||
PPC970
|
||||
PPC970MP
|
||||
@@ -57,7 +60,8 @@ CELL
|
||||
|
||||
3.MIPS CPU:
|
||||
P5600
|
||||
1004K
|
||||
MIPS1004K
|
||||
MIPS24K
|
||||
|
||||
4.MIPS64 CPU:
|
||||
SICORTEX
|
||||
@@ -87,10 +91,15 @@ CORTEXA53
|
||||
CORTEXA57
|
||||
CORTEXA72
|
||||
CORTEXA73
|
||||
NEOVERSEN1
|
||||
EMAG8180
|
||||
FALKOR
|
||||
THUNDERX
|
||||
THUNDERX2T99
|
||||
TSV110
|
||||
THUNDERX3T110
|
||||
|
||||
9.System Z:
|
||||
ZARCH_GENERIC
|
||||
Z13
|
||||
Z14
|
||||
|
||||
22
appveyor.yml
22
appveyor.yml
@@ -35,7 +35,15 @@ environment:
|
||||
DYNAMIC_ARCH: ON
|
||||
WITH_FORTRAN: no
|
||||
- COMPILER: cl
|
||||
|
||||
- COMPILER: MinGW64-gcc-7.2.0-mingw
|
||||
DYNAMIC_ARCH: OFF
|
||||
WITH_FORTRAN: ignore
|
||||
- APPVEYOR_BUILD_WORKER_IMAGE: Visual Studio 2015
|
||||
COMPILER: MinGW-gcc-6.3.0-32
|
||||
- APPVEYOR_BUILD_WORKER_IMAGE: Visual Studio 2015
|
||||
COMPILER: MinGW-gcc-5.3.0
|
||||
WITH_FORTRAN: ignore
|
||||
|
||||
install:
|
||||
- if [%COMPILER%]==[clang-cl] call %CONDA_INSTALL_LOCN%\Scripts\activate.bat
|
||||
- if [%COMPILER%]==[clang-cl] conda config --add channels conda-forge --force
|
||||
@@ -52,10 +60,17 @@ install:
|
||||
before_build:
|
||||
- ps: if (-Not (Test-Path .\build)) { mkdir build }
|
||||
- cd build
|
||||
- set PATH=%PATH:C:\Program Files\Git\usr\bin;=%
|
||||
- if [%COMPILER%]==[MinGW-gcc-5.3.0] set PATH=C:\MinGW\bin;C:\msys64\usr\bin;C:\mingw-w64\x86_64-7.2.0-posix-seh-rt_v5-rev1\mingw64\bin;%PATH%
|
||||
- if [%COMPILER%]==[MinGW64-gcc-7.2.0-mingw] set PATH=C:\MinGW\bin;C:\mingw-w64\x86_64-7.2.0-posix-seh-rt_v5-rev1\mingw64\bin;%PATH%
|
||||
- if [%COMPILER%]==[MinGW-gcc-6.3.0-32] set PATH=C:\msys64\usr\bin;C:\mingw-w64\i686-6.3.0-posix-dwarf-rt_v5-rev1\mingw64\bin;%PATH%
|
||||
- if [%COMPILER%]==[cl] cmake -G "Visual Studio 15 2017 Win64" ..
|
||||
- if [%WITH_FORTRAN%]==[no] cmake -G "Ninja" -DCMAKE_CXX_COMPILER=clang-cl -DCMAKE_C_COMPILER=clang-cl ..
|
||||
- if [%COMPILER%]==[MinGW64-gcc-7.2.0-mingw] cmake -G "MinGW Makefiles" -DNOFORTRAN=1 ..
|
||||
- if [%COMPILER%]==[MinGW-gcc-6.3.0-32] cmake -G "MSYS Makefiles" -DNOFORTRAN=1 ..
|
||||
- if [%COMPILER%]==[MinGW-gcc-5.3.0] cmake -G "MSYS Makefiles" -DNOFORTRAN=1 ..
|
||||
- if [%WITH_FORTRAN%]==[no] cmake -G "Ninja" -DCMAKE_CXX_COMPILER=clang-cl -DCMAKE_C_COMPILER=clang-cl -DMSVC_STATIC_CRT=ON ..
|
||||
- if [%WITH_FORTRAN%]==[yes] cmake -G "Ninja" -DCMAKE_CXX_COMPILER=clang-cl -DCMAKE_C_COMPILER=clang-cl -DCMAKE_Fortran_COMPILER=flang -DBUILD_WITHOUT_LAPACK=no -DNOFORTRAN=0 ..
|
||||
- if [%DYNAMIC_ARCH%]==[ON] cmake -DDYNAMIC_ARCH=ON ..
|
||||
- if [%DYNAMIC_ARCH%]==[ON] cmake -DDYNAMIC_ARCH=ON -DDYNAMIC_LIST='CORE2;NEHALEM;SANDYBRIDGE;BULLDOZER;HASWELL' ..
|
||||
|
||||
build_script:
|
||||
- cmake --build .
|
||||
@@ -64,3 +79,4 @@ test_script:
|
||||
- echo Running Test
|
||||
- cd utest
|
||||
- openblas_utest
|
||||
|
||||
|
||||
71
azure-pipelines.yml
Normal file
71
azure-pipelines.yml
Normal file
@@ -0,0 +1,71 @@
|
||||
trigger:
|
||||
# start a new build for every push
|
||||
batch: False
|
||||
branches:
|
||||
include:
|
||||
- develop
|
||||
|
||||
jobs:
|
||||
# manylinux1 is useful to test because the
|
||||
# standard Docker container uses an old version
|
||||
# of gcc / glibc
|
||||
- job: manylinux1_gcc
|
||||
pool:
|
||||
vmImage: 'ubuntu-16.04'
|
||||
steps:
|
||||
- script: |
|
||||
echo "FROM quay.io/pypa/manylinux1_x86_64
|
||||
COPY . /tmp/openblas
|
||||
RUN cd /tmp/openblas && \
|
||||
COMMON_FLAGS='DYNAMIC_ARCH=1 TARGET=NEHALEM NUM_THREADS=32' && \
|
||||
BTYPE='BINARY=64' CC=gcc && \
|
||||
make QUIET_MAKE=1 $COMMON_FLAGS $BTYPE && \
|
||||
make -C test $COMMON_FLAGS $BTYPE && \
|
||||
make -C ctest $COMMON_FLAGS $BTYPE && \
|
||||
make -C utest $COMMON_FLAGS $BTYPE" > Dockerfile
|
||||
docker build .
|
||||
displayName: Run manylinux1 docker build
|
||||
- job: Intel_SDE_skx
|
||||
pool:
|
||||
vmImage: 'ubuntu-16.04'
|
||||
steps:
|
||||
- script: |
|
||||
# at the time of writing the available Azure Ubuntu vm image
|
||||
# does not support AVX512VL, so use more recent LTS version
|
||||
echo "FROM ubuntu:bionic
|
||||
COPY . /tmp/openblas
|
||||
RUN apt-get -y update && apt-get -y install \\
|
||||
cmake \\
|
||||
gfortran \\
|
||||
make \\
|
||||
wget
|
||||
RUN mkdir /tmp/SDE && cd /tmp/SDE && \\
|
||||
mkdir sde-external-8.35.0-2019-03-11-lin && \\
|
||||
wget --quiet -O sde-external-8.35.0-2019-03-11-lin.tar.bz2 https://www.dropbox.com/s/fopsnzj67572sj5/sde-external-8.35.0-2019-03-11-lin.tar.bz2?dl=0 && \\
|
||||
tar -xjvf sde-external-8.35.0-2019-03-11-lin.tar.bz2 -C /tmp/SDE/sde-external-8.35.0-2019-03-11-lin --strip-components=1
|
||||
RUN cd /tmp/openblas && CC=gcc make QUIET_MAKE=1 DYNAMIC_ARCH=1 NUM_THREADS=32 BINARY=64
|
||||
CMD cd /tmp/openblas && echo 0 > /proc/sys/kernel/yama/ptrace_scope && CC=gcc OPENBLAS_VERBOSE=2 /tmp/SDE/sde-external-8.35.0-2019-03-11-lin/sde64 -cpuid_in /tmp/SDE/sde-external-8.35.0-2019-03-11-lin/misc/cpuid/skx/cpuid.def -- make -C utest DYNAMIC_ARCH=1 NUM_THREADS=32 BINARY=64" > Dockerfile
|
||||
docker build -t intel_sde .
|
||||
# we need a privileged docker run for sde process attachment
|
||||
docker run --privileged intel_sde
|
||||
displayName: 'Run AVX512 SkylakeX docker build / test'
|
||||
|
||||
- job: Windows_cl
|
||||
pool:
|
||||
vmImage: 'windows-latest'
|
||||
steps:
|
||||
- task: CMake@1
|
||||
inputs:
|
||||
workingDirectory: 'build' # Optional
|
||||
cmakeArgs: '-G "Visual Studio 16 2019" ..'
|
||||
- task: CMake@1
|
||||
inputs:
|
||||
cmakeArgs: '--build . --config Release'
|
||||
workingDirectory: 'build'
|
||||
- script: |
|
||||
cd build
|
||||
cd utest
|
||||
dir
|
||||
openblas_utest.exe
|
||||
|
||||
|
||||
5786
benchmark/Makefile
5786
benchmark/Makefile
File diff suppressed because it is too large
Load Diff
191
benchmark/amax.c
Normal file
191
benchmark/amax.c
Normal file
@@ -0,0 +1,191 @@
|
||||
/***************************************************************************
|
||||
Copyright (c) 2016, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#ifdef __CYGWIN32__
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
#include "common.h"
|
||||
|
||||
|
||||
#undef AMAX
|
||||
|
||||
#ifdef COMPLEX
|
||||
#ifdef DOUBLE
|
||||
#define AMAX BLASFUNC(dzamax)
|
||||
#else
|
||||
#define AMAX BLASFUNC(scamax)
|
||||
#endif
|
||||
#else
|
||||
#ifdef DOUBLE
|
||||
#define AMAX BLASFUNC(damax)
|
||||
#else
|
||||
#define AMAX BLASFUNC(samax)
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(__WIN32__) || defined(__WIN64__)
|
||||
|
||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
||||
#endif
|
||||
|
||||
int gettimeofday(struct timeval *tv, void *tz){
|
||||
|
||||
FILETIME ft;
|
||||
unsigned __int64 tmpres = 0;
|
||||
static int tzflag;
|
||||
|
||||
if (NULL != tv)
|
||||
{
|
||||
GetSystemTimeAsFileTime(&ft);
|
||||
|
||||
tmpres |= ft.dwHighDateTime;
|
||||
tmpres <<= 32;
|
||||
tmpres |= ft.dwLowDateTime;
|
||||
|
||||
/*converting file time to unix epoch*/
|
||||
tmpres /= 10; /*convert into microseconds*/
|
||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
||||
|
||||
static void *huge_malloc(BLASLONG size){
|
||||
int shmid;
|
||||
void *address;
|
||||
|
||||
#ifndef SHM_HUGETLB
|
||||
#define SHM_HUGETLB 04000
|
||||
#endif
|
||||
|
||||
if ((shmid =shmget(IPC_PRIVATE,
|
||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
||||
printf( "Memory allocation failed(shmget).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
address = shmat(shmid, NULL, SHM_RND);
|
||||
|
||||
if ((BLASLONG)address == -1){
|
||||
printf( "Memory allocation failed(shmat).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
shmctl(shmid, IPC_RMID, 0);
|
||||
|
||||
return address;
|
||||
}
|
||||
|
||||
#define malloc huge_malloc
|
||||
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *x;
|
||||
blasint m, i;
|
||||
blasint inc_x=1;
|
||||
int loops = 1;
|
||||
int l;
|
||||
char *p;
|
||||
|
||||
|
||||
int from = 1;
|
||||
int to = 200;
|
||||
int step = 1;
|
||||
|
||||
struct timeval start, stop;
|
||||
double time1,timeg;
|
||||
|
||||
argc--;argv++;
|
||||
|
||||
if (argc > 0) { from = atol(*argv); argc--; argv++;}
|
||||
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
|
||||
if (argc > 0) { step = atol(*argv); argc--; argv++;}
|
||||
|
||||
if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p);
|
||||
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p);
|
||||
|
||||
fprintf(stderr, "From : %3d To : %3d Step = %3d Inc_x = %d Loops = %d\n", from, to, step,inc_x,loops);
|
||||
|
||||
if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
#ifdef linux
|
||||
srandom(getpid());
|
||||
#endif
|
||||
|
||||
fprintf(stderr, " SIZE Flops\n");
|
||||
|
||||
for(m = from; m <= to; m += step)
|
||||
{
|
||||
|
||||
timeg=0;
|
||||
|
||||
fprintf(stderr, " %6d : ", (int)m);
|
||||
|
||||
|
||||
for (l=0; l<loops; l++)
|
||||
{
|
||||
|
||||
for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){
|
||||
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
AMAX (&m, x, &inc_x);
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
|
||||
timeg += time1;
|
||||
|
||||
}
|
||||
|
||||
timeg /= loops;
|
||||
|
||||
fprintf(stderr,
|
||||
" %10.2f MFlops %10.6f sec\n",
|
||||
COMPSIZE * sizeof(FLOAT) * 1. * (double)m / timeg * 1.e-6, timeg);
|
||||
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
192
benchmark/amin.c
Normal file
192
benchmark/amin.c
Normal file
@@ -0,0 +1,192 @@
|
||||
/***************************************************************************
|
||||
Copyright (c) 2016, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#ifdef __CYGWIN32__
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
#include "common.h"
|
||||
|
||||
|
||||
#undef AMIN
|
||||
|
||||
#ifdef COMPLEX
|
||||
#ifdef DOUBLE
|
||||
#define AMIN BLASFUNC(dzamin)
|
||||
#else
|
||||
#define AMIN BLASFUNC(scamin)
|
||||
#endif
|
||||
#else
|
||||
#ifdef DOUBLE
|
||||
#define AMIN BLASFUNC(damin)
|
||||
#else
|
||||
#define AMIN BLASFUNC(samin)
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(__WIN32__) || defined(__WIN64__)
|
||||
|
||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
||||
#endif
|
||||
|
||||
int gettimeofday(struct timeval *tv, void *tz){
|
||||
|
||||
FILETIME ft;
|
||||
unsigned __int64 tmpres = 0;
|
||||
static int tzflag;
|
||||
|
||||
if (NULL != tv)
|
||||
{
|
||||
GetSystemTimeAsFileTime(&ft);
|
||||
|
||||
tmpres |= ft.dwHighDateTime;
|
||||
tmpres <<= 32;
|
||||
tmpres |= ft.dwLowDateTime;
|
||||
|
||||
/*converting file time to unix epoch*/
|
||||
tmpres /= 10; /*convert into microseconds*/
|
||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
||||
|
||||
static void *huge_malloc(BLASLONG size){
|
||||
int shmid;
|
||||
void *address;
|
||||
|
||||
#ifndef SHM_HUGETLB
|
||||
#define SHM_HUGETLB 04000
|
||||
#endif
|
||||
|
||||
if ((shmid =shmget(IPC_PRIVATE,
|
||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
||||
printf( "Memory allocation failed(shmget).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
address = shmat(shmid, NULL, SHM_RND);
|
||||
|
||||
if ((BLASLONG)address == -1){
|
||||
printf( "Memory allocation failed(shmat).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
shmctl(shmid, IPC_RMID, 0);
|
||||
|
||||
return address;
|
||||
}
|
||||
|
||||
#define malloc huge_malloc
|
||||
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *x;
|
||||
blasint m, i;
|
||||
blasint inc_x=1;
|
||||
int loops = 1;
|
||||
int l;
|
||||
char *p;
|
||||
|
||||
int from = 1;
|
||||
int to = 200;
|
||||
int step = 1;
|
||||
|
||||
struct timeval start, stop;
|
||||
double time1,timeg;
|
||||
|
||||
argc--;argv++;
|
||||
|
||||
if (argc > 0) { from = atol(*argv); argc--; argv++;}
|
||||
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
|
||||
if (argc > 0) { step = atol(*argv); argc--; argv++;}
|
||||
|
||||
if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p);
|
||||
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p);
|
||||
|
||||
fprintf(stderr, "From : %3d To : %3d Step = %3d Inc_x = %d Loops = %d\n", from, to, step,inc_x,loops);
|
||||
|
||||
if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
#ifdef linux
|
||||
srandom(getpid());
|
||||
#endif
|
||||
|
||||
fprintf(stderr, " SIZE Flops\n");
|
||||
|
||||
for(m = from; m <= to; m += step)
|
||||
{
|
||||
|
||||
timeg=0;
|
||||
|
||||
fprintf(stderr, " %6d : ", (int)m);
|
||||
|
||||
|
||||
for (l=0; l<loops; l++)
|
||||
{
|
||||
|
||||
for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){
|
||||
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
AMIN (&m, x, &inc_x);
|
||||
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
|
||||
timeg += time1;
|
||||
|
||||
}
|
||||
|
||||
timeg /= loops;
|
||||
|
||||
fprintf(stderr,
|
||||
" %10.2f MFlops %10.6f sec\n",
|
||||
COMPSIZE * sizeof(FLOAT) * 1. * (double)m / timeg * 1.e-6, timeg);
|
||||
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
202
benchmark/axpby.c
Normal file
202
benchmark/axpby.c
Normal file
@@ -0,0 +1,202 @@
|
||||
/***************************************************************************
|
||||
Copyright (c) 2014, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#ifdef __CYGWIN32__
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
#include "common.h"
|
||||
|
||||
|
||||
#undef AXPBY
|
||||
|
||||
#ifdef COMPLEX
|
||||
#ifdef DOUBLE
|
||||
#define AXPBY BLASFUNC(zaxpby)
|
||||
#else
|
||||
#define AXPBY BLASFUNC(caxpby)
|
||||
#endif
|
||||
#else
|
||||
#ifdef DOUBLE
|
||||
#define AXPBY BLASFUNC(daxpby)
|
||||
#else
|
||||
#define AXPBY BLASFUNC(saxpby)
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(__WIN32__) || defined(__WIN64__)
|
||||
|
||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
||||
#endif
|
||||
|
||||
int gettimeofday(struct timeval *tv, void *tz){
|
||||
|
||||
FILETIME ft;
|
||||
unsigned __int64 tmpres = 0;
|
||||
static int tzflag;
|
||||
|
||||
if (NULL != tv)
|
||||
{
|
||||
GetSystemTimeAsFileTime(&ft);
|
||||
|
||||
tmpres |= ft.dwHighDateTime;
|
||||
tmpres <<= 32;
|
||||
tmpres |= ft.dwLowDateTime;
|
||||
|
||||
/*converting file time to unix epoch*/
|
||||
tmpres /= 10; /*convert into microseconds*/
|
||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
||||
|
||||
static void *huge_malloc(BLASLONG size){
|
||||
int shmid;
|
||||
void *address;
|
||||
|
||||
#ifndef SHM_HUGETLB
|
||||
#define SHM_HUGETLB 04000
|
||||
#endif
|
||||
|
||||
if ((shmid =shmget(IPC_PRIVATE,
|
||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
||||
printf( "Memory allocation failed(shmget).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
address = shmat(shmid, NULL, SHM_RND);
|
||||
|
||||
if ((BLASLONG)address == -1){
|
||||
printf( "Memory allocation failed(shmat).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
shmctl(shmid, IPC_RMID, 0);
|
||||
|
||||
return address;
|
||||
}
|
||||
|
||||
#define malloc huge_malloc
|
||||
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *x, *y;
|
||||
FLOAT alpha[2] = { 2.0, 2.0 };
|
||||
FLOAT beta[2] = {2.0, 2.0};
|
||||
blasint m, i;
|
||||
blasint inc_x=1,inc_y=1;
|
||||
int loops = 1;
|
||||
int l;
|
||||
char *p;
|
||||
|
||||
int from = 1;
|
||||
int to = 200;
|
||||
int step = 1;
|
||||
|
||||
struct timeval start, stop;
|
||||
double time1,timeg;
|
||||
|
||||
argc--;argv++;
|
||||
|
||||
if (argc > 0) { from = atol(*argv); argc--; argv++;}
|
||||
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
|
||||
if (argc > 0) { step = atol(*argv); argc--; argv++;}
|
||||
|
||||
if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p);
|
||||
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p);
|
||||
if ((p = getenv("OPENBLAS_INCY"))) inc_y = atoi(p);
|
||||
|
||||
fprintf(stderr, "From : %3d To : %3d Step = %3d Inc_x = %d Inc_y = %d Loops = %d\n", from, to, step,inc_x,inc_y,loops);
|
||||
|
||||
if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
if (( y = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_y) * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
#ifdef linux
|
||||
srandom(getpid());
|
||||
#endif
|
||||
|
||||
fprintf(stderr, " SIZE Flops\n");
|
||||
|
||||
for(m = from; m <= to; m += step)
|
||||
{
|
||||
|
||||
timeg=0;
|
||||
|
||||
fprintf(stderr, " %6d : ", (int)m);
|
||||
|
||||
|
||||
for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){
|
||||
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
|
||||
for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){
|
||||
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
|
||||
for (l=0; l<loops; l++)
|
||||
{
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
AXPBY (&m, alpha, x, &inc_x, beta, y, &inc_y );
|
||||
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
|
||||
timeg += time1;
|
||||
|
||||
}
|
||||
|
||||
timeg /= loops;
|
||||
|
||||
fprintf(stderr,
|
||||
" %10.2f MFlops %10.6f sec\n",
|
||||
(COMPSIZE * COMPSIZE * 4. - COMPSIZE) * (double)m / timeg * 1.e-6, timeg);
|
||||
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
@@ -128,7 +128,7 @@ int main(int argc, char *argv[]){
|
||||
int to = 200;
|
||||
int step = 1;
|
||||
|
||||
struct timeval start, stop;
|
||||
struct timespec start, stop;
|
||||
double time1,timeg;
|
||||
|
||||
argc--;argv++;
|
||||
@@ -175,13 +175,13 @@ int main(int argc, char *argv[]){
|
||||
for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){
|
||||
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
clock_gettime( CLOCK_REALTIME, &start);
|
||||
|
||||
AXPY (&m, alpha, x, &inc_x, y, &inc_y );
|
||||
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
clock_gettime( CLOCK_REALTIME, &stop);
|
||||
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_nsec - start.tv_nsec)) * 1.e-9;
|
||||
|
||||
timeg += time1;
|
||||
|
||||
@@ -190,7 +190,7 @@ int main(int argc, char *argv[]){
|
||||
timeg /= loops;
|
||||
|
||||
fprintf(stderr,
|
||||
" %10.2f MFlops %10.6f sec\n",
|
||||
" %10.2f MFlops %10.9f sec\n",
|
||||
COMPSIZE * COMPSIZE * 2. * (double)m / timeg * 1.e-6, timeg);
|
||||
|
||||
}
|
||||
|
||||
@@ -173,46 +173,46 @@ int main(int argc, char *argv[]){
|
||||
#ifndef COMPLEX
|
||||
if (uplos & 1) {
|
||||
for (j = 0; j < m; j++) {
|
||||
for(i = 0; i < j; i++) a[i + j * m] = 0.;
|
||||
a[j + j * m] = ((double) rand() / (double) RAND_MAX) + 8.;
|
||||
for(i = j + 1; i < m; i++) a[i + j * m] = ((double) rand() / (double) RAND_MAX) - 0.5;
|
||||
for(i = 0; i < j; i++) a[(long)i + (long)j * (long)m] = 0.;
|
||||
a[(long)j + (long)j * (long)m] = ((double) rand() / (double) RAND_MAX) + 8.;
|
||||
for(i = j + 1; i < m; i++) a[(long)i + (long)j * (long)m] = ((double) rand() / (double) RAND_MAX) - 0.5;
|
||||
}
|
||||
} else {
|
||||
for (j = 0; j < m; j++) {
|
||||
for(i = 0; i < j; i++) a[i + j * m] = ((double) rand() / (double) RAND_MAX) - 0.5;
|
||||
a[j + j * m] = ((double) rand() / (double) RAND_MAX) + 8.;
|
||||
for(i = j + 1; i < m; i++) a[i + j * m] = 0.;
|
||||
for(i = 0; i < j; i++) a[(long)i + (long)j * (long)m] = ((double) rand() / (double) RAND_MAX) - 0.5;
|
||||
a[(long)j + (long)j * (long)m] = ((double) rand() / (double) RAND_MAX) + 8.;
|
||||
for(i = j + 1; i < m; i++) a[(long)i + (long)j * (long)m] = 0.;
|
||||
}
|
||||
}
|
||||
#else
|
||||
if (uplos & 1) {
|
||||
for (j = 0; j < m; j++) {
|
||||
for(i = 0; i < j; i++) {
|
||||
a[(i + j * m) * 2 + 0] = 0.;
|
||||
a[(i + j * m) * 2 + 1] = 0.;
|
||||
a[((long)i + (long)j * (long)m) * 2 + 0] = 0.;
|
||||
a[((long)i + (long)j * (long)m) * 2 + 1] = 0.;
|
||||
}
|
||||
|
||||
a[(j + j * m) * 2 + 0] = ((double) rand() / (double) RAND_MAX) + 8.;
|
||||
a[(j + j * m) * 2 + 1] = 0.;
|
||||
a[((long)j + (long)j * (long)m) * 2 + 0] = ((double) rand() / (double) RAND_MAX) + 8.;
|
||||
a[((long)j + (long)j * (long)m) * 2 + 1] = 0.;
|
||||
|
||||
for(i = j + 1; i < m; i++) {
|
||||
a[(i + j * m) * 2 + 0] = ((double) rand() / (double) RAND_MAX) - 0.5;
|
||||
a[(i + j * m) * 2 + 1] = ((double) rand() / (double) RAND_MAX) - 0.5;
|
||||
a[((long)i + (long)j * (long)m) * 2 + 0] = ((double) rand() / (double) RAND_MAX) - 0.5;
|
||||
a[((long)i + (long)j * (long)m) * 2 + 1] = ((double) rand() / (double) RAND_MAX) - 0.5;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
for (j = 0; j < m; j++) {
|
||||
for(i = 0; i < j; i++) {
|
||||
a[(i + j * m) * 2 + 0] = ((double) rand() / (double) RAND_MAX) - 0.5;
|
||||
a[(i + j * m) * 2 + 1] = ((double) rand() / (double) RAND_MAX) - 0.5;
|
||||
a[((long)i + (long)j * (long)m) * 2 + 0] = ((double) rand() / (double) RAND_MAX) - 0.5;
|
||||
a[((long)i + (long)j * (long)m) * 2 + 1] = ((double) rand() / (double) RAND_MAX) - 0.5;
|
||||
}
|
||||
|
||||
a[(j + j * m) * 2 + 0] = ((double) rand() / (double) RAND_MAX) + 8.;
|
||||
a[(j + j * m) * 2 + 1] = 0.;
|
||||
a[((long)j + (long)j * (long)m) * 2 + 0] = ((double) rand() / (double) RAND_MAX) + 8.;
|
||||
a[((long)j + (long)j * (long)m) * 2 + 1] = 0.;
|
||||
|
||||
for(i = j + 1; i < m; i++) {
|
||||
a[(i + j * m) * 2 + 0] = 0.;
|
||||
a[(i + j * m) * 2 + 1] = 0.;
|
||||
a[((long)i + (long)j * (long)m) * 2 + 0] = 0.;
|
||||
a[((long)i + (long)j * (long)m) * 2 + 1] = 0.;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -239,10 +239,13 @@ int main(int argc, char *argv[]){
|
||||
for (j = 0; j < m; j++) {
|
||||
for(i = 0; i <= j; i++) {
|
||||
#ifndef COMPLEX
|
||||
if (maxerr < fabs(a[i + j * m] - b[i + j * m])) maxerr = fabs(a[i + j * m] - b[i + j * m]);
|
||||
if (maxerr < fabs(a[(long)i + (long)j * (long)m] - b[(long)i + (long)j * (long)m]))
|
||||
maxerr = fabs(a[(long)i + (long)j * (long)m] - b[(long)i + (long)j * (long)m]);
|
||||
#else
|
||||
if (maxerr < fabs(a[(i + j * m) * 2 + 0] - b[(i + j * m) * 2 + 0])) maxerr = fabs(a[(i + j * m) * 2 + 0] - b[(i + j * m) * 2 + 0]);
|
||||
if (maxerr < fabs(a[(i + j * m) * 2 + 1] - b[(i + j * m) * 2 + 1])) maxerr = fabs(a[(i + j * m) * 2 + 1] - b[(i + j * m) * 2 + 1]);
|
||||
if (maxerr < fabs(a[((long)i + (long)j * (long)m) * 2 + 0] - b[((long)i + (long)j * (long)m) * 2 + 0]))
|
||||
maxerr = fabs(a[((long)i + (long)j * (long)m) * 2 + 0] - b[((long)i + (long)j * (long)m) * 2 + 0]);
|
||||
if (maxerr < fabs(a[((long)i + (long)j * (long)m) * 2 + 1] - b[((long)i + (long)j * (long)m) * 2 + 1]))
|
||||
maxerr = fabs(a[((long)i + (long)j * (long)m) * 2 + 1] - b[((long)i + (long)j * (long)m) * 2 + 1]);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
@@ -250,10 +253,13 @@ int main(int argc, char *argv[]){
|
||||
for (j = 0; j < m; j++) {
|
||||
for(i = j; i < m; i++) {
|
||||
#ifndef COMPLEX
|
||||
if (maxerr < fabs(a[i + j * m] - b[i + j * m])) maxerr = fabs(a[i + j * m] - b[i + j * m]);
|
||||
if (maxerr < fabs(a[(long)i + (long)j * (long)m] - b[(long)i + (long)j * (long)m]))
|
||||
maxerr = fabs(a[(long)i + (long)j * (long)m] - b[(long)i + (long)j * (long)m]);
|
||||
#else
|
||||
if (maxerr < fabs(a[(i + j * m) * 2 + 0] - b[(i + j * m) * 2 + 0])) maxerr = fabs(a[(i + j * m) * 2 + 0] - b[(i + j * m) * 2 + 0]);
|
||||
if (maxerr < fabs(a[(i + j * m) * 2 + 1] - b[(i + j * m) * 2 + 1])) maxerr = fabs(a[(i + j * m) * 2 + 1] - b[(i + j * m) * 2 + 1]);
|
||||
if (maxerr < fabs(a[((long)i + (long)j * (long)m) * 2 + 0] - b[((long)i + (long)j * (long)m) * 2 + 0]))
|
||||
maxerr = fabs(a[((long)i + (long)j * (long)m) * 2 + 0] - b[((long)i + (long)j * (long)m) * 2 + 0]);
|
||||
if (maxerr < fabs(a[((long)i + (long)j * (long)m) * 2 + 1] - b[((long)i + (long)j * (long)m) * 2 + 1]))
|
||||
maxerr = fabs(a[((long)i + (long)j * (long)m) * 2 + 1] - b[((long)i + (long)j * (long)m) * 2 + 1]);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
@@ -129,7 +129,10 @@ int main(int argc, char *argv[]){
|
||||
int step = 1;
|
||||
|
||||
struct timeval start, stop;
|
||||
double time1,timeg;
|
||||
double time1 = 0.0, timeg = 0.0;
|
||||
long nanos = 0;
|
||||
time_t seconds = 0;
|
||||
struct timespec time_start = { 0, 0 }, time_end = { 0, 0 };
|
||||
|
||||
argc--;argv++;
|
||||
|
||||
@@ -163,35 +166,32 @@ int main(int argc, char *argv[]){
|
||||
timeg=0;
|
||||
|
||||
fprintf(stderr, " %6d : ", (int)m);
|
||||
for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){
|
||||
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
|
||||
for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){
|
||||
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
|
||||
for (l=0; l<loops; l++)
|
||||
{
|
||||
clock_gettime(CLOCK_REALTIME, &time_start);
|
||||
COPY (&m, x, &inc_x, y, &inc_y );
|
||||
clock_gettime(CLOCK_REALTIME, &time_end);
|
||||
|
||||
for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){
|
||||
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
nanos = time_end.tv_nsec - time_start.tv_nsec;
|
||||
seconds = time_end.tv_sec - time_start.tv_sec;
|
||||
|
||||
for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){
|
||||
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
time1 = seconds + nanos / 1.e9;
|
||||
timeg += time1;
|
||||
}
|
||||
|
||||
COPY (&m, x, &inc_x, y, &inc_y );
|
||||
timeg /= loops;
|
||||
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
|
||||
timeg += time1;
|
||||
|
||||
}
|
||||
|
||||
timeg /= loops;
|
||||
|
||||
fprintf(stderr,
|
||||
" %10.2f MBytes %10.6f sec\n",
|
||||
COMPSIZE * sizeof(FLOAT) * 1. * (double)m / timeg * 1.e-6, timeg);
|
||||
fprintf(stderr,
|
||||
" %10.2f MBytes %12.9f sec\n",
|
||||
COMPSIZE * sizeof(FLOAT) * 1. * (double)m / timeg / 1.e6, timeg);
|
||||
|
||||
}
|
||||
|
||||
|
||||
@@ -195,7 +195,7 @@ int main(int argc, char *argv[]){
|
||||
|
||||
for(j = 0; j < to; j++){
|
||||
for(i = 0; i < to * COMPSIZE; i++){
|
||||
a[i + j * to * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
a[(long)i + (long)j * (long)to * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -39,6 +39,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
#ifdef DOUBLE
|
||||
#define GEMM BLASFUNC(dgemm)
|
||||
#elif defined(HALF)
|
||||
#define GEMM BLASFUNC(shgemm)
|
||||
#else
|
||||
#define GEMM BLASFUNC(sgemm)
|
||||
#endif
|
||||
@@ -120,7 +122,8 @@ static void *huge_malloc(BLASLONG size){
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *a, *b, *c;
|
||||
IFLOAT *a, *b;
|
||||
FLOAT *c;
|
||||
FLOAT alpha[] = {1.0, 0.0};
|
||||
FLOAT beta [] = {0.0, 0.0};
|
||||
char transa = 'N';
|
||||
@@ -184,10 +187,10 @@ int main(int argc, char *argv[]){
|
||||
k = to;
|
||||
}
|
||||
|
||||
if (( a = (FLOAT *)malloc(sizeof(FLOAT) * m * k * COMPSIZE)) == NULL) {
|
||||
if (( a = (IFLOAT *)malloc(sizeof(IFLOAT) * m * k * COMPSIZE)) == NULL) {
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
if (( b = (FLOAT *)malloc(sizeof(FLOAT) * k * n * COMPSIZE)) == NULL) {
|
||||
if (( b = (IFLOAT *)malloc(sizeof(IFLOAT) * k * n * COMPSIZE)) == NULL) {
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
if (( c = (FLOAT *)malloc(sizeof(FLOAT) * m * n * COMPSIZE)) == NULL) {
|
||||
@@ -199,15 +202,15 @@ int main(int argc, char *argv[]){
|
||||
#endif
|
||||
|
||||
for (i = 0; i < m * k * COMPSIZE; i++) {
|
||||
a[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
a[i] = ((IFLOAT) rand() / (IFLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
for (i = 0; i < k * n * COMPSIZE; i++) {
|
||||
b[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
b[i] = ((IFLOAT) rand() / (IFLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
for (i = 0; i < m * n * COMPSIZE; i++) {
|
||||
c[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
|
||||
|
||||
fprintf(stderr, " SIZE Flops Time\n");
|
||||
|
||||
for (i = from; i <= to; i += step) {
|
||||
|
||||
@@ -181,9 +181,9 @@ int main(int argc, char *argv[]){
|
||||
|
||||
for(j = 0; j < m; j++){
|
||||
for(i = 0; i < m * COMPSIZE; i++){
|
||||
a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
b[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
c[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
a[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
b[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
c[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -197,7 +197,7 @@ int main(int argc, char *argv[]){
|
||||
fprintf(stderr, " %6dx%d : ", (int)m,(int)n);
|
||||
for(j = 0; j < m; j++){
|
||||
for(i = 0; i < n * COMPSIZE; i++){
|
||||
a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
a[(long)j + (long)i * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -208,7 +208,7 @@ int main(int argc, char *argv[]){
|
||||
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
|
||||
for(i = 0; i < n * COMPSIZE * abs(inc_y); i++){
|
||||
for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){
|
||||
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
@@ -234,7 +234,7 @@ int main(int argc, char *argv[]){
|
||||
fprintf(stderr, " %6dx%d : ", (int)m,(int)n);
|
||||
for(j = 0; j < m; j++){
|
||||
for(i = 0; i < n * COMPSIZE; i++){
|
||||
a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
a[(long)j + (long)i * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -245,7 +245,7 @@ int main(int argc, char *argv[]){
|
||||
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
|
||||
for(i = 0; i < n * COMPSIZE * abs(inc_y); i++){
|
||||
for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){
|
||||
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
@@ -182,7 +182,7 @@ int main(int argc, char *argv[]){
|
||||
|
||||
for(j = 0; j < m; j++){
|
||||
for(i = 0; i < n * COMPSIZE; i++){
|
||||
a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
a[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -177,20 +177,20 @@ int main(int argc, char *argv[]){
|
||||
|
||||
for(j = 0; j < m; j++){
|
||||
for(i = 0; i < m * COMPSIZE; i++){
|
||||
a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
a[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
}
|
||||
|
||||
for(j = 0; j < m; j++){
|
||||
for(i = 0; i < m * COMPSIZE; i++){
|
||||
b[i + j * m * COMPSIZE] = 0.0;
|
||||
b[(long)i + (long)j * (long)m * COMPSIZE] = 0.0;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
for (j = 0; j < m; ++j) {
|
||||
for (i = 0; i < m * COMPSIZE; ++i) {
|
||||
b[i] += a[i + j * m * COMPSIZE];
|
||||
b[i] += a[(long)i + (long)j * (long)m * COMPSIZE];
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -172,7 +172,7 @@ int main(int argc, char *argv[]){
|
||||
|
||||
for(j = 0; j < to; j++){
|
||||
for(i = 0; i < to * COMPSIZE; i++){
|
||||
a[i + j * to * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
a[(long)i + (long)j * (long)to * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
210
benchmark/hbmv.c
Normal file
210
benchmark/hbmv.c
Normal file
@@ -0,0 +1,210 @@
|
||||
/***************************************************************************
|
||||
Copyright (c) 2014, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#ifdef __CYGWIN32__
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
#include "common.h"
|
||||
|
||||
|
||||
#undef HBMV
|
||||
|
||||
|
||||
#ifdef DOUBLE
|
||||
#define HBMV BLASFUNC(zhbmv)
|
||||
#else
|
||||
#define HBMV BLASFUNC(chbmv)
|
||||
#endif
|
||||
|
||||
|
||||
#if defined(__WIN32__) || defined(__WIN64__)
|
||||
|
||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
||||
#endif
|
||||
|
||||
int gettimeofday(struct timeval *tv, void *tz) {
|
||||
|
||||
FILETIME ft;
|
||||
unsigned __int64 tmpres = 0;
|
||||
static int tzflag;
|
||||
|
||||
if (NULL != tv)
|
||||
{
|
||||
GetSystemTimeAsFileTime(&ft);
|
||||
|
||||
tmpres |= ft.dwHighDateTime;
|
||||
tmpres <<= 32;
|
||||
tmpres |= ft.dwLowDateTime;
|
||||
|
||||
/*converting file time to unix epoch*/
|
||||
tmpres /= 10; /*convert into microseconds*/
|
||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
||||
|
||||
static void *huge_malloc(BLASLONG size) {
|
||||
int shmid;
|
||||
void *address;
|
||||
|
||||
#ifndef SHM_HUGETLB
|
||||
#define SHM_HUGETLB 04000
|
||||
#endif
|
||||
|
||||
if ((shmid =shmget(IPC_PRIVATE,
|
||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
||||
printf( "Memory allocation failed(shmget).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
address = shmat(shmid, NULL, SHM_RND);
|
||||
|
||||
if ((BLASLONG)address == -1){
|
||||
printf( "Memory allocation failed(shmat).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
shmctl(shmid, IPC_RMID, 0);
|
||||
|
||||
return address;
|
||||
}
|
||||
|
||||
#define malloc huge_malloc
|
||||
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *a, *x, *y;
|
||||
FLOAT alpha[] = {1.0, 1.0};
|
||||
FLOAT beta [] = {0.0, 0.0};
|
||||
blasint k = 1;
|
||||
char uplo='L';
|
||||
blasint m, i, j;
|
||||
blasint inc_x=1, inc_y=1;
|
||||
int loops = 1;
|
||||
int l;
|
||||
char *p;
|
||||
|
||||
int from = 1;
|
||||
int to = 200;
|
||||
int step = 1;
|
||||
|
||||
struct timeval start, stop;
|
||||
double time1,timeg;
|
||||
|
||||
argc--;argv++;
|
||||
|
||||
if (argc > 0) { from = atol(*argv); argc--; argv++;}
|
||||
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
|
||||
if (argc > 0) { step = atol(*argv); argc--; argv++;}
|
||||
|
||||
if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p);
|
||||
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p);
|
||||
if ((p = getenv("OPENBLAS_INCY"))) inc_y = atoi(p);
|
||||
if ((p = getenv("OPENBLAS_UPLO"))) uplo=*p;
|
||||
if ((p = getenv("OPENBLAS_K"))) k = atoi(p);
|
||||
|
||||
fprintf(stderr, "From : %3d To : %3d Step = %3d Uplo = '%c' k = %d Inc_x = %d Inc_y = %d Loops = %d\n",
|
||||
from, to, step, uplo, k, inc_x, inc_y, loops);
|
||||
|
||||
if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL) {
|
||||
fprintf(stderr,"Out of Memory!!\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL) {
|
||||
fprintf(stderr,"Out of Memory!!\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
if (( y = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_y) * COMPSIZE)) == NULL) {
|
||||
fprintf(stderr,"Out of Memory!!\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
#ifdef linux
|
||||
srandom(getpid());
|
||||
#endif
|
||||
|
||||
fprintf(stderr, " SIZE Flops\n");
|
||||
|
||||
for(m = from; m <= to; m += step) {
|
||||
|
||||
timeg=0;
|
||||
|
||||
fprintf(stderr, " %6dx%d : ", (int)m, (int)m);
|
||||
|
||||
for(j = 0; j < m; j++) {
|
||||
for(i = 0; i < m * COMPSIZE; i++) {
|
||||
a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
}
|
||||
|
||||
for (l = 0; l < loops; l++) {
|
||||
|
||||
for (i = 0; i < m * COMPSIZE * abs(inc_x); i++) {
|
||||
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
|
||||
for (i = 0; i < m * COMPSIZE * abs(inc_y); i++) {
|
||||
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
HBMV (&uplo, &m, &k, alpha, a, &m, x, &inc_x, beta, y, &inc_y );
|
||||
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
|
||||
timeg += time1;
|
||||
|
||||
}
|
||||
|
||||
timeg /= loops;
|
||||
|
||||
fprintf(stderr, " %10.2f MFlops\n",
|
||||
COMPSIZE * COMPSIZE * 2. * (double)(2 * k + 1) * (double)m / timeg * 1.e-6);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
@@ -164,9 +164,9 @@ int main(int argc, char *argv[]){
|
||||
|
||||
for(j = 0; j < m; j++){
|
||||
for(i = 0; i < m * COMPSIZE; i++){
|
||||
a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
b[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
c[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
a[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
b[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
c[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -178,8 +178,6 @@ int main(int argc, char *argv[]){
|
||||
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
fprintf(stderr,
|
||||
" %10.2f MFlops\n",
|
||||
COMPSIZE * COMPSIZE * 2. * (double)m * (double)m * (double)m / time1 * 1.e-6);
|
||||
|
||||
@@ -167,7 +167,7 @@ int main(int argc, char *argv[]){
|
||||
|
||||
for(j = 0; j < m; j++){
|
||||
for(i = 0; i < m * COMPSIZE; i++){
|
||||
a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
a[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
186
benchmark/her.c
Normal file
186
benchmark/her.c
Normal file
@@ -0,0 +1,186 @@
|
||||
/***************************************************************************
|
||||
Copyright (c) 2020, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#ifdef __CYGWIN32__
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
#include "common.h"
|
||||
|
||||
|
||||
#undef HER
|
||||
|
||||
|
||||
#ifdef DOUBLE
|
||||
#define HER BLASFUNC(zher)
|
||||
#else
|
||||
#define HER BLASFUNC(cher)
|
||||
#endif
|
||||
|
||||
|
||||
#if defined(__WIN32__) || defined(__WIN64__)
|
||||
|
||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
||||
#endif
|
||||
|
||||
int gettimeofday(struct timeval *tv, void *tz){
|
||||
|
||||
FILETIME ft;
|
||||
unsigned __int64 tmpres = 0;
|
||||
static int tzflag;
|
||||
|
||||
if (NULL != tv)
|
||||
{
|
||||
GetSystemTimeAsFileTime(&ft);
|
||||
|
||||
tmpres |= ft.dwHighDateTime;
|
||||
tmpres <<= 32;
|
||||
tmpres |= ft.dwLowDateTime;
|
||||
|
||||
/*converting file time to unix epoch*/
|
||||
tmpres /= 10; /*convert into microseconds*/
|
||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
||||
|
||||
static void *huge_malloc(BLASLONG size){
|
||||
int shmid;
|
||||
void *address;
|
||||
|
||||
#ifndef SHM_HUGETLB
|
||||
#define SHM_HUGETLB 04000
|
||||
#endif
|
||||
|
||||
if ((shmid =shmget(IPC_PRIVATE,
|
||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
||||
printf( "Memory allocation failed(shmget).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
address = shmat(shmid, NULL, SHM_RND);
|
||||
|
||||
if ((BLASLONG)address == -1){
|
||||
printf( "Memory allocation failed(shmat).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
shmctl(shmid, IPC_RMID, 0);
|
||||
|
||||
return address;
|
||||
}
|
||||
|
||||
#define malloc huge_malloc
|
||||
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *a, *x;
|
||||
FLOAT alpha[] = {1.0, 1.0};
|
||||
blasint incx = 1;
|
||||
char *p;
|
||||
|
||||
char uplo='U';
|
||||
char trans='N';
|
||||
|
||||
if ((p = getenv("OPENBLAS_UPLO"))) uplo=*p;
|
||||
if ((p = getenv("OPENBLAS_TRANS"))) trans=*p;
|
||||
|
||||
blasint m, i, j;
|
||||
|
||||
int from = 1;
|
||||
int to = 200;
|
||||
int step = 1;
|
||||
|
||||
struct timeval start, stop;
|
||||
double time1;
|
||||
|
||||
argc--;argv++;
|
||||
|
||||
if (argc > 0) { from = atol(*argv); argc--; argv++;}
|
||||
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
|
||||
if (argc > 0) { step = atol(*argv); argc--; argv++;}
|
||||
|
||||
fprintf(stderr, "From : %3d To : %3d Step = %3d Uplo = %c Trans = %c\n", from, to, step,uplo,trans);
|
||||
|
||||
|
||||
if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
|
||||
|
||||
#ifdef linux
|
||||
srandom(getpid());
|
||||
#endif
|
||||
|
||||
fprintf(stderr, " SIZE Flops\n");
|
||||
|
||||
for(m = from; m <= to; m += step)
|
||||
{
|
||||
fprintf(stderr, " %6d : ", (int)m);
|
||||
|
||||
for(j = 0; j < m; j++){
|
||||
for(i = 0; i < m * COMPSIZE; i++){
|
||||
a[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
x[ (long)j * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
HER (&uplo, &m, alpha, x, &incx, a, &m );
|
||||
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
fprintf(stderr,
|
||||
" %10.2f MFlops\n",
|
||||
COMPSIZE * COMPSIZE * 1. * (double)m * (double)m / time1 * 1.e-6);
|
||||
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
190
benchmark/her2.c
Normal file
190
benchmark/her2.c
Normal file
@@ -0,0 +1,190 @@
|
||||
/***************************************************************************
|
||||
Copyright (c) 2020, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#ifdef __CYGWIN32__
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
#include "common.h"
|
||||
|
||||
|
||||
#undef HER2
|
||||
|
||||
|
||||
#ifdef DOUBLE
|
||||
#define HER2 BLASFUNC(zher2)
|
||||
#else
|
||||
#define HER2 BLASFUNC(cher2)
|
||||
#endif
|
||||
|
||||
|
||||
#if defined(__WIN32__) || defined(__WIN64__)
|
||||
|
||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
||||
#endif
|
||||
|
||||
int gettimeofday(struct timeval *tv, void *tz){
|
||||
|
||||
FILETIME ft;
|
||||
unsigned __int64 tmpres = 0;
|
||||
static int tzflag;
|
||||
|
||||
if (NULL != tv)
|
||||
{
|
||||
GetSystemTimeAsFileTime(&ft);
|
||||
|
||||
tmpres |= ft.dwHighDateTime;
|
||||
tmpres <<= 32;
|
||||
tmpres |= ft.dwLowDateTime;
|
||||
|
||||
/*converting file time to unix epoch*/
|
||||
tmpres /= 10; /*convert into microseconds*/
|
||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
||||
|
||||
static void *huge_malloc(BLASLONG size){
|
||||
int shmid;
|
||||
void *address;
|
||||
|
||||
#ifndef SHM_HUGETLB
|
||||
#define SHM_HUGETLB 04000
|
||||
#endif
|
||||
|
||||
if ((shmid =shmget(IPC_PRIVATE,
|
||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
||||
printf( "Memory allocation failed(shmget).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
address = shmat(shmid, NULL, SHM_RND);
|
||||
|
||||
if ((BLASLONG)address == -1){
|
||||
printf( "Memory allocation failed(shmat).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
shmctl(shmid, IPC_RMID, 0);
|
||||
|
||||
return address;
|
||||
}
|
||||
|
||||
#define malloc huge_malloc
|
||||
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *a, *x, *y;
|
||||
FLOAT alpha[] = {1.0, 1.0};
|
||||
blasint inc = 1;
|
||||
char *p;
|
||||
|
||||
char uplo='U';
|
||||
char trans='N';
|
||||
|
||||
if ((p = getenv("OPENBLAS_UPLO"))) uplo=*p;
|
||||
if ((p = getenv("OPENBLAS_TRANS"))) trans=*p;
|
||||
|
||||
blasint m, i, j;
|
||||
|
||||
int from = 1;
|
||||
int to = 200;
|
||||
int step = 1;
|
||||
|
||||
struct timeval start, stop;
|
||||
double time1;
|
||||
|
||||
argc--;argv++;
|
||||
|
||||
if (argc > 0) { from = atol(*argv); argc--; argv++;}
|
||||
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
|
||||
if (argc > 0) { step = atol(*argv); argc--; argv++;}
|
||||
|
||||
fprintf(stderr, "From : %3d To : %3d Step = %3d Uplo = %c Trans = %c\n", from, to, step,uplo,trans);
|
||||
|
||||
|
||||
if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
if (( y = (FLOAT *)malloc(sizeof(FLOAT) * to * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
#ifdef linux
|
||||
srandom(getpid());
|
||||
#endif
|
||||
|
||||
fprintf(stderr, " SIZE Flops\n");
|
||||
|
||||
for(m = from; m <= to; m += step)
|
||||
{
|
||||
fprintf(stderr, " %6d : ", (int)m);
|
||||
|
||||
for(j = 0; j < m; j++){
|
||||
for(i = 0; i < m * COMPSIZE; i++){
|
||||
a[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
x[ (long)j * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
y[ (long)j * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
|
||||
HER2 (&uplo, &m, alpha, x, &inc, y, &inc, a, &m );
|
||||
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
fprintf(stderr,
|
||||
" %10.2f MFlops\n",
|
||||
COMPSIZE * COMPSIZE * 2. * (double)m * (double)m / time1 * 1.e-6);
|
||||
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -163,9 +163,9 @@ int main(int argc, char *argv[]){
|
||||
|
||||
for(j = 0; j < m; j++){
|
||||
for(i = 0; i < m * COMPSIZE; i++){
|
||||
a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
b[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
c[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
a[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
b[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
c[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -177,8 +177,6 @@ int main(int argc, char *argv[]){
|
||||
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
fprintf(stderr,
|
||||
" %10.2f MFlops\n",
|
||||
COMPSIZE * COMPSIZE * 2. * (double)m * (double)m * (double)m / time1 * 1.e-6);
|
||||
|
||||
@@ -162,8 +162,8 @@ int main(int argc, char *argv[]){
|
||||
|
||||
for(j = 0; j < m; j++){
|
||||
for(i = 0; i < m * COMPSIZE; i++){
|
||||
a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
c[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
a[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
c[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -175,8 +175,6 @@ int main(int argc, char *argv[]){
|
||||
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
fprintf(stderr,
|
||||
" %10.2f MFlops\n",
|
||||
COMPSIZE * COMPSIZE * 1. * (double)m * (double)m * (double)m / time1 * 1.e-6);
|
||||
|
||||
207
benchmark/hpmv.c
Normal file
207
benchmark/hpmv.c
Normal file
@@ -0,0 +1,207 @@
|
||||
/***************************************************************************
|
||||
Copyright (c) 2014, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#ifdef __CYGWIN32__
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
#include "common.h"
|
||||
|
||||
|
||||
#undef HPMV
|
||||
|
||||
|
||||
#ifdef DOUBLE
|
||||
#define HPMV BLASFUNC(zhpmv)
|
||||
#else
|
||||
#define HPMV BLASFUNC(chpmv)
|
||||
#endif
|
||||
|
||||
|
||||
#if defined(__WIN32__) || defined(__WIN64__)
|
||||
|
||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
||||
#endif
|
||||
|
||||
int gettimeofday(struct timeval *tv, void *tz) {
|
||||
|
||||
FILETIME ft;
|
||||
unsigned __int64 tmpres = 0;
|
||||
static int tzflag;
|
||||
|
||||
if (NULL != tv)
|
||||
{
|
||||
GetSystemTimeAsFileTime(&ft);
|
||||
|
||||
tmpres |= ft.dwHighDateTime;
|
||||
tmpres <<= 32;
|
||||
tmpres |= ft.dwLowDateTime;
|
||||
|
||||
/*converting file time to unix epoch*/
|
||||
tmpres /= 10; /*convert into microseconds*/
|
||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
||||
|
||||
static void *huge_malloc(BLASLONG size) {
|
||||
int shmid;
|
||||
void *address;
|
||||
|
||||
#ifndef SHM_HUGETLB
|
||||
#define SHM_HUGETLB 04000
|
||||
#endif
|
||||
|
||||
if ((shmid =shmget(IPC_PRIVATE,
|
||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
||||
printf( "Memory allocation failed(shmget).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
address = shmat(shmid, NULL, SHM_RND);
|
||||
|
||||
if ((BLASLONG)address == -1){
|
||||
printf( "Memory allocation failed(shmat).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
shmctl(shmid, IPC_RMID, 0);
|
||||
|
||||
return address;
|
||||
}
|
||||
|
||||
#define malloc huge_malloc
|
||||
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *a, *x, *y;
|
||||
FLOAT alpha[] = {1.0, 1.0};
|
||||
FLOAT beta [] = {1.0, 1.0};
|
||||
char uplo='L';
|
||||
blasint m, i, j;
|
||||
blasint inc_x=1, inc_y=1;
|
||||
int loops = 1;
|
||||
int l;
|
||||
char *p;
|
||||
|
||||
int from = 1;
|
||||
int to = 200;
|
||||
int step = 1;
|
||||
|
||||
struct timeval start, stop;
|
||||
double time1,timeg;
|
||||
|
||||
argc--;argv++;
|
||||
|
||||
if (argc > 0) { from = atol(*argv); argc--; argv++;}
|
||||
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
|
||||
if (argc > 0) { step = atol(*argv); argc--; argv++;}
|
||||
|
||||
if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p);
|
||||
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p);
|
||||
if ((p = getenv("OPENBLAS_INCY"))) inc_y = atoi(p);
|
||||
if ((p = getenv("OPENBLAS_UPLO"))) uplo=*p;
|
||||
|
||||
fprintf(stderr, "From : %3d To : %3d Step = %3d Uplo = '%c' Inc_x = %d Inc_y = %d Loops = %d\n", from, to, step,uplo,inc_x,inc_y,loops);
|
||||
|
||||
if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL) {
|
||||
fprintf(stderr,"Out of Memory!!\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL) {
|
||||
fprintf(stderr,"Out of Memory!!\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
if (( y = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_y) * COMPSIZE)) == NULL) {
|
||||
fprintf(stderr,"Out of Memory!!\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
#ifdef linux
|
||||
srandom(getpid());
|
||||
#endif
|
||||
|
||||
fprintf(stderr, " SIZE Flops\n");
|
||||
|
||||
for(m = from; m <= to; m += step) {
|
||||
|
||||
timeg=0;
|
||||
|
||||
fprintf(stderr, " %6dx%d : ", (int)m, (int)m);
|
||||
|
||||
for(j = 0; j < m; j++) {
|
||||
for(i = 0; i < m * COMPSIZE; i++) {
|
||||
a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
}
|
||||
|
||||
for (l = 0; l < loops; l++) {
|
||||
|
||||
for (i = 0; i < m * COMPSIZE * abs(inc_x); i++) {
|
||||
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
|
||||
for (i = 0; i < m * COMPSIZE * abs(inc_y); i++) {
|
||||
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
HPMV (&uplo, &m, alpha, a, x, &inc_x, beta, y, &inc_y );
|
||||
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
|
||||
timeg += time1;
|
||||
|
||||
}
|
||||
|
||||
timeg /= loops;
|
||||
|
||||
fprintf(stderr, " %10.2f MFlops\n",
|
||||
COMPSIZE * COMPSIZE * 2. * (double)m * (double)m / timeg * 1.e-6);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
@@ -181,7 +181,7 @@ int main(int argc, char *argv[]){
|
||||
timeg /= loops;
|
||||
|
||||
fprintf(stderr,
|
||||
" %10.2f MFlops %10.6f sec\n",
|
||||
" %10.2f MBytes %10.6f sec\n",
|
||||
COMPSIZE * sizeof(FLOAT) * 1. * (double)m / timeg * 1.e-6, timeg);
|
||||
|
||||
}
|
||||
|
||||
192
benchmark/iamin.c
Normal file
192
benchmark/iamin.c
Normal file
@@ -0,0 +1,192 @@
|
||||
/***************************************************************************
|
||||
Copyright (c) 2016, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#ifdef __CYGWIN32__
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
#include "common.h"
|
||||
|
||||
|
||||
#undef IAMIN
|
||||
|
||||
#ifdef COMPLEX
|
||||
#ifdef DOUBLE
|
||||
#define IAMIN BLASFUNC(izamin)
|
||||
#else
|
||||
#define IAMIN BLASFUNC(icamin)
|
||||
#endif
|
||||
#else
|
||||
#ifdef DOUBLE
|
||||
#define IAMIN BLASFUNC(idamin)
|
||||
#else
|
||||
#define IAMIN BLASFUNC(isamin)
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(__WIN32__) || defined(__WIN64__)
|
||||
|
||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
||||
#endif
|
||||
|
||||
int gettimeofday(struct timeval *tv, void *tz){
|
||||
|
||||
FILETIME ft;
|
||||
unsigned __int64 tmpres = 0;
|
||||
static int tzflag;
|
||||
|
||||
if (NULL != tv)
|
||||
{
|
||||
GetSystemTimeAsFileTime(&ft);
|
||||
|
||||
tmpres |= ft.dwHighDateTime;
|
||||
tmpres <<= 32;
|
||||
tmpres |= ft.dwLowDateTime;
|
||||
|
||||
/*converting file time to unix epoch*/
|
||||
tmpres /= 10; /*convert into microseconds*/
|
||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
||||
|
||||
static void *huge_malloc(BLASLONG size){
|
||||
int shmid;
|
||||
void *address;
|
||||
|
||||
#ifndef SHM_HUGETLB
|
||||
#define SHM_HUGETLB 04000
|
||||
#endif
|
||||
|
||||
if ((shmid =shmget(IPC_PRIVATE,
|
||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
||||
printf( "Memory allocation failed(shmget).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
address = shmat(shmid, NULL, SHM_RND);
|
||||
|
||||
if ((BLASLONG)address == -1){
|
||||
printf( "Memory allocation failed(shmat).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
shmctl(shmid, IPC_RMID, 0);
|
||||
|
||||
return address;
|
||||
}
|
||||
|
||||
#define malloc huge_malloc
|
||||
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *x;
|
||||
blasint m, i;
|
||||
blasint inc_x=1;
|
||||
int loops = 1;
|
||||
int l;
|
||||
char *p;
|
||||
|
||||
int from = 1;
|
||||
int to = 200;
|
||||
int step = 1;
|
||||
|
||||
struct timeval start, stop;
|
||||
double time1,timeg;
|
||||
|
||||
argc--;argv++;
|
||||
|
||||
if (argc > 0) { from = atol(*argv); argc--; argv++;}
|
||||
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
|
||||
if (argc > 0) { step = atol(*argv); argc--; argv++;}
|
||||
|
||||
if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p);
|
||||
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p);
|
||||
|
||||
fprintf(stderr, "From : %3d To : %3d Step = %3d Inc_x = %d Loops = %d\n", from, to, step,inc_x,loops);
|
||||
|
||||
if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
#ifdef linux
|
||||
srandom(getpid());
|
||||
#endif
|
||||
|
||||
fprintf(stderr, " SIZE Flops\n");
|
||||
|
||||
for(m = from; m <= to; m += step)
|
||||
{
|
||||
|
||||
timeg=0;
|
||||
|
||||
fprintf(stderr, " %6d : ", (int)m);
|
||||
|
||||
|
||||
for (l=0; l<loops; l++)
|
||||
{
|
||||
|
||||
for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){
|
||||
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
IAMIN (&m, x, &inc_x);
|
||||
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
|
||||
timeg += time1;
|
||||
|
||||
}
|
||||
|
||||
timeg /= loops;
|
||||
|
||||
fprintf(stderr,
|
||||
" %10.2f MFlops %10.6f sec\n",
|
||||
COMPSIZE * sizeof(FLOAT) * 1. * (double)m / timeg * 1.e-6, timeg);
|
||||
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
186
benchmark/imax.c
Normal file
186
benchmark/imax.c
Normal file
@@ -0,0 +1,186 @@
|
||||
/***************************************************************************
|
||||
Copyright (c) 2016, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#ifdef __CYGWIN32__
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
#include "common.h"
|
||||
|
||||
|
||||
#undef IMAX
|
||||
|
||||
#ifndef COMPLEX
|
||||
#ifdef DOUBLE
|
||||
#define IMAX BLASFUNC(idmax)
|
||||
#else
|
||||
#define IMAX BLASFUNC(ismax)
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(__WIN32__) || defined(__WIN64__)
|
||||
|
||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
||||
#endif
|
||||
|
||||
int gettimeofday(struct timeval *tv, void *tz){
|
||||
|
||||
FILETIME ft;
|
||||
unsigned __int64 tmpres = 0;
|
||||
static int tzflag;
|
||||
|
||||
if (NULL != tv)
|
||||
{
|
||||
GetSystemTimeAsFileTime(&ft);
|
||||
|
||||
tmpres |= ft.dwHighDateTime;
|
||||
tmpres <<= 32;
|
||||
tmpres |= ft.dwLowDateTime;
|
||||
|
||||
/*converting file time to unix epoch*/
|
||||
tmpres /= 10; /*convert into microseconds*/
|
||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
||||
|
||||
static void *huge_malloc(BLASLONG size){
|
||||
int shmid;
|
||||
void *address;
|
||||
|
||||
#ifndef SHM_HUGETLB
|
||||
#define SHM_HUGETLB 04000
|
||||
#endif
|
||||
|
||||
if ((shmid =shmget(IPC_PRIVATE,
|
||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
||||
printf( "Memory allocation failed(shmget).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
address = shmat(shmid, NULL, SHM_RND);
|
||||
|
||||
if ((BLASLONG)address == -1){
|
||||
printf( "Memory allocation failed(shmat).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
shmctl(shmid, IPC_RMID, 0);
|
||||
|
||||
return address;
|
||||
}
|
||||
|
||||
#define malloc huge_malloc
|
||||
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *x;
|
||||
blasint m, i;
|
||||
blasint inc_x=1;
|
||||
int loops = 1;
|
||||
int l;
|
||||
char *p;
|
||||
|
||||
int from = 1;
|
||||
int to = 200;
|
||||
int step = 1;
|
||||
|
||||
struct timeval start, stop;
|
||||
double time1,timeg;
|
||||
|
||||
argc--;argv++;
|
||||
|
||||
if (argc > 0) { from = atol(*argv); argc--; argv++;}
|
||||
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
|
||||
if (argc > 0) { step = atol(*argv); argc--; argv++;}
|
||||
|
||||
if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p);
|
||||
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p);
|
||||
|
||||
fprintf(stderr, "From : %3d To : %3d Step = %3d Inc_x = %d Loops = %d\n", from, to, step,inc_x,loops);
|
||||
|
||||
if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
#ifdef linux
|
||||
srandom(getpid());
|
||||
#endif
|
||||
|
||||
fprintf(stderr, " SIZE Flops\n");
|
||||
|
||||
for(m = from; m <= to; m += step)
|
||||
{
|
||||
|
||||
timeg=0;
|
||||
|
||||
fprintf(stderr, " %6d : ", (int)m);
|
||||
|
||||
|
||||
for (l=0; l<loops; l++)
|
||||
{
|
||||
|
||||
for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){
|
||||
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
IMAX (&m, x, &inc_x);
|
||||
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
|
||||
timeg += time1;
|
||||
|
||||
}
|
||||
|
||||
timeg /= loops;
|
||||
|
||||
fprintf(stderr,
|
||||
" %10.2f MFlops %10.6f sec\n",
|
||||
COMPSIZE * sizeof(FLOAT) * 1. * (double)m / timeg * 1.e-6, timeg);
|
||||
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
186
benchmark/imin.c
Normal file
186
benchmark/imin.c
Normal file
@@ -0,0 +1,186 @@
|
||||
/***************************************************************************
|
||||
Copyright (c) 2016, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#ifdef __CYGWIN32__
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
#include "common.h"
|
||||
|
||||
|
||||
#undef IMIN
|
||||
|
||||
#ifndef COMPLEX
|
||||
#ifdef DOUBLE
|
||||
#define IMIN BLASFUNC(idmin)
|
||||
#else
|
||||
#define IMIN BLASFUNC(ismin)
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(__WIN32__) || defined(__WIN64__)
|
||||
|
||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
||||
#endif
|
||||
|
||||
int gettimeofday(struct timeval *tv, void *tz){
|
||||
|
||||
FILETIME ft;
|
||||
unsigned __int64 tmpres = 0;
|
||||
static int tzflag;
|
||||
|
||||
if (NULL != tv)
|
||||
{
|
||||
GetSystemTimeAsFileTime(&ft);
|
||||
|
||||
tmpres |= ft.dwHighDateTime;
|
||||
tmpres <<= 32;
|
||||
tmpres |= ft.dwLowDateTime;
|
||||
|
||||
/*converting file time to unix epoch*/
|
||||
tmpres /= 10; /*convert into microseconds*/
|
||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
||||
|
||||
static void *huge_malloc(BLASLONG size){
|
||||
int shmid;
|
||||
void *address;
|
||||
|
||||
#ifndef SHM_HUGETLB
|
||||
#define SHM_HUGETLB 04000
|
||||
#endif
|
||||
|
||||
if ((shmid =shmget(IPC_PRIVATE,
|
||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
||||
printf( "Memory allocation failed(shmget).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
address = shmat(shmid, NULL, SHM_RND);
|
||||
|
||||
if ((BLASLONG)address == -1){
|
||||
printf( "Memory allocation failed(shmat).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
shmctl(shmid, IPC_RMID, 0);
|
||||
|
||||
return address;
|
||||
}
|
||||
|
||||
#define malloc huge_malloc
|
||||
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *x;
|
||||
blasint m, i;
|
||||
blasint inc_x=1;
|
||||
int loops = 1;
|
||||
int l;
|
||||
char *p;
|
||||
|
||||
int from = 1;
|
||||
int to = 200;
|
||||
int step = 1;
|
||||
|
||||
struct timeval start, stop;
|
||||
double time1,timeg;
|
||||
|
||||
argc--;argv++;
|
||||
|
||||
if (argc > 0) { from = atol(*argv); argc--; argv++;}
|
||||
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
|
||||
if (argc > 0) { step = atol(*argv); argc--; argv++;}
|
||||
|
||||
if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p);
|
||||
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p);
|
||||
|
||||
fprintf(stderr, "From : %3d To : %3d Step = %3d Inc_x = %d Loops = %d\n", from, to, step,inc_x,loops);
|
||||
|
||||
if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
#ifdef linux
|
||||
srandom(getpid());
|
||||
#endif
|
||||
|
||||
fprintf(stderr, " SIZE Flops\n");
|
||||
|
||||
for(m = from; m <= to; m += step)
|
||||
{
|
||||
|
||||
timeg=0;
|
||||
|
||||
fprintf(stderr, " %6d : ", (int)m);
|
||||
|
||||
|
||||
for (l=0; l<loops; l++)
|
||||
{
|
||||
|
||||
for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){
|
||||
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
IMIN (&m, x, &inc_x);
|
||||
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
|
||||
timeg += time1;
|
||||
|
||||
}
|
||||
|
||||
timeg /= loops;
|
||||
|
||||
fprintf(stderr,
|
||||
" %10.2f MFlops %10.6f sec\n",
|
||||
COMPSIZE * sizeof(FLOAT) * 1. * (double)m / timeg * 1.e-6, timeg);
|
||||
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
@@ -186,7 +186,7 @@ int main(int argc, char *argv[]){
|
||||
|
||||
for(j = 0; j < m; j++){
|
||||
for(i = 0; i < m * COMPSIZE; i++){
|
||||
a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
a[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -194,7 +194,7 @@ int main(int argc, char *argv[]){
|
||||
|
||||
for (j = 0; j < m; ++j) {
|
||||
for (i = 0; i < m * COMPSIZE; ++i) {
|
||||
b[i] += a[i + j * m * COMPSIZE];
|
||||
b[i] += a[(long)i + (long)j * (long)m * COMPSIZE];
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
185
benchmark/max.c
Normal file
185
benchmark/max.c
Normal file
@@ -0,0 +1,185 @@
|
||||
/***************************************************************************
|
||||
Copyright (c) 2016, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#ifdef __CYGWIN32__
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
#include "common.h"
|
||||
|
||||
|
||||
#undef NAMAX
|
||||
|
||||
#ifndef COMPLEX
|
||||
#ifdef DOUBLE
|
||||
#define NAMAX BLASFUNC(dmax)
|
||||
#else
|
||||
#define NAMAX BLASFUNC(smax)
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(__WIN32__) || defined(__WIN64__)
|
||||
|
||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
||||
#endif
|
||||
|
||||
int gettimeofday(struct timeval *tv, void *tz){
|
||||
|
||||
FILETIME ft;
|
||||
unsigned __int64 tmpres = 0;
|
||||
static int tzflag;
|
||||
|
||||
if (NULL != tv)
|
||||
{
|
||||
GetSystemTimeAsFileTime(&ft);
|
||||
|
||||
tmpres |= ft.dwHighDateTime;
|
||||
tmpres <<= 32;
|
||||
tmpres |= ft.dwLowDateTime;
|
||||
|
||||
/*converting file time to unix epoch*/
|
||||
tmpres /= 10; /*convert into microseconds*/
|
||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
||||
|
||||
static void *huge_malloc(BLASLONG size){
|
||||
int shmid;
|
||||
void *address;
|
||||
|
||||
#ifndef SHM_HUGETLB
|
||||
#define SHM_HUGETLB 04000
|
||||
#endif
|
||||
|
||||
if ((shmid =shmget(IPC_PRIVATE,
|
||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
||||
printf( "Memory allocation failed(shmget).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
address = shmat(shmid, NULL, SHM_RND);
|
||||
|
||||
if ((BLASLONG)address == -1){
|
||||
printf( "Memory allocation failed(shmat).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
shmctl(shmid, IPC_RMID, 0);
|
||||
|
||||
return address;
|
||||
}
|
||||
|
||||
#define malloc huge_malloc
|
||||
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *x;
|
||||
blasint m, i;
|
||||
blasint inc_x=1;
|
||||
int loops = 1;
|
||||
int l;
|
||||
char *p;
|
||||
|
||||
int from = 1;
|
||||
int to = 200;
|
||||
int step = 1;
|
||||
|
||||
struct timeval start, stop;
|
||||
double time1,timeg;
|
||||
|
||||
argc--;argv++;
|
||||
|
||||
if (argc > 0) { from = atol(*argv); argc--; argv++;}
|
||||
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
|
||||
if (argc > 0) { step = atol(*argv); argc--; argv++;}
|
||||
|
||||
if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p);
|
||||
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p);
|
||||
|
||||
fprintf(stderr, "From : %3d To : %3d Step = %3d Inc_x = %d Loops = %d\n", from, to, step,inc_x,loops);
|
||||
|
||||
if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
#ifdef linux
|
||||
srandom(getpid());
|
||||
#endif
|
||||
|
||||
fprintf(stderr, " SIZE Flops\n");
|
||||
|
||||
for(m = from; m <= to; m += step)
|
||||
{
|
||||
|
||||
timeg=0;
|
||||
|
||||
fprintf(stderr, " %6d : ", (int)m);
|
||||
|
||||
|
||||
for (l=0; l<loops; l++)
|
||||
{
|
||||
|
||||
for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){
|
||||
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
NAMAX (&m, x, &inc_x);
|
||||
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
|
||||
timeg += time1;
|
||||
|
||||
}
|
||||
|
||||
timeg /= loops;
|
||||
|
||||
fprintf(stderr,
|
||||
" %10.2f MFlops %10.6f sec\n",
|
||||
COMPSIZE * sizeof(FLOAT) * 1. * (double)m / timeg * 1.e-6, timeg);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
185
benchmark/min.c
Normal file
185
benchmark/min.c
Normal file
@@ -0,0 +1,185 @@
|
||||
/***************************************************************************
|
||||
Copyright (c) 2016, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#ifdef __CYGWIN32__
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
#include "common.h"
|
||||
|
||||
|
||||
#undef NAMIN
|
||||
|
||||
#ifndef COMPLEX
|
||||
#ifdef DOUBLE
|
||||
#define NAMIN BLASFUNC(dmin)
|
||||
#else
|
||||
#define NAMIN BLASFUNC(smin)
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(__WIN32__) || defined(__WIN64__)
|
||||
|
||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
||||
#endif
|
||||
|
||||
int gettimeofday(struct timeval *tv, void *tz){
|
||||
|
||||
FILETIME ft;
|
||||
unsigned __int64 tmpres = 0;
|
||||
static int tzflag;
|
||||
|
||||
if (NULL != tv)
|
||||
{
|
||||
GetSystemTimeAsFileTime(&ft);
|
||||
|
||||
tmpres |= ft.dwHighDateTime;
|
||||
tmpres <<= 32;
|
||||
tmpres |= ft.dwLowDateTime;
|
||||
|
||||
/*converting file time to unix epoch*/
|
||||
tmpres /= 10; /*convert into microseconds*/
|
||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
||||
|
||||
static void *huge_malloc(BLASLONG size){
|
||||
int shmid;
|
||||
void *address;
|
||||
|
||||
#ifndef SHM_HUGETLB
|
||||
#define SHM_HUGETLB 04000
|
||||
#endif
|
||||
|
||||
if ((shmid =shmget(IPC_PRIVATE,
|
||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
||||
printf( "Memory allocation failed(shmget).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
address = shmat(shmid, NULL, SHM_RND);
|
||||
|
||||
if ((BLASLONG)address == -1){
|
||||
printf( "Memory allocation failed(shmat).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
shmctl(shmid, IPC_RMID, 0);
|
||||
|
||||
return address;
|
||||
}
|
||||
|
||||
#define malloc huge_malloc
|
||||
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *x;
|
||||
blasint m, i;
|
||||
blasint inc_x=1;
|
||||
int loops = 1;
|
||||
int l;
|
||||
char *p;
|
||||
|
||||
int from = 1;
|
||||
int to = 200;
|
||||
int step = 1;
|
||||
|
||||
struct timeval start, stop;
|
||||
double time1,timeg;
|
||||
|
||||
argc--;argv++;
|
||||
|
||||
if (argc > 0) { from = atol(*argv); argc--; argv++;}
|
||||
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
|
||||
if (argc > 0) { step = atol(*argv); argc--; argv++;}
|
||||
|
||||
if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p);
|
||||
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p);
|
||||
|
||||
fprintf(stderr, "From : %3d To : %3d Step = %3d Inc_x = %d Loops = %d\n", from, to, step,inc_x,loops);
|
||||
|
||||
if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
#ifdef linux
|
||||
srandom(getpid());
|
||||
#endif
|
||||
|
||||
fprintf(stderr, " SIZE Flops\n");
|
||||
|
||||
for(m = from; m <= to; m += step)
|
||||
{
|
||||
|
||||
timeg=0;
|
||||
|
||||
fprintf(stderr, " %6d : ", (int)m);
|
||||
|
||||
|
||||
for (l=0; l<loops; l++)
|
||||
{
|
||||
|
||||
for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){
|
||||
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
NAMIN (&m, x, &inc_x);
|
||||
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
|
||||
timeg += time1;
|
||||
|
||||
}
|
||||
|
||||
timeg /= loops;
|
||||
|
||||
fprintf(stderr,
|
||||
" %10.2f MFlops %10.6f sec\n",
|
||||
COMPSIZE * sizeof(FLOAT) * 1. * (double)m / timeg * 1.e-6, timeg);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
@@ -170,46 +170,46 @@ int main(int argc, char *argv[]){
|
||||
#ifndef COMPLEX
|
||||
if (uplos & 1) {
|
||||
for (j = 0; j < m; j++) {
|
||||
for(i = 0; i < j; i++) a[i + j * m] = 0.;
|
||||
a[j + j * m] = ((double) rand() / (double) RAND_MAX) + 8.;
|
||||
for(i = j + 1; i < m; i++) a[i + j * m] = ((double) rand() / (double) RAND_MAX) - 0.5;
|
||||
for(i = 0; i < j; i++) a[(long)i + (long)j * (long)m] = 0.;
|
||||
a[(long)j + (long)j * (long)m] = ((double) rand() / (double) RAND_MAX) + 8.;
|
||||
for(i = j + 1; i < m; i++) a[(long)i + (long)j * (long)m] = ((double) rand() / (double) RAND_MAX) - 0.5;
|
||||
}
|
||||
} else {
|
||||
for (j = 0; j < m; j++) {
|
||||
for(i = 0; i < j; i++) a[i + j * m] = ((double) rand() / (double) RAND_MAX) - 0.5;
|
||||
a[j + j * m] = ((double) rand() / (double) RAND_MAX) + 8.;
|
||||
for(i = j + 1; i < m; i++) a[i + j * m] = 0.;
|
||||
for(i = 0; i < j; i++) a[(long)i + (long)j * (long)m] = ((double) rand() / (double) RAND_MAX) - 0.5;
|
||||
a[(long)j + (long)j * (long)m] = ((double) rand() / (double) RAND_MAX) + 8.;
|
||||
for(i = j + 1; i < m; i++) a[(long)i + (long)j * (long)m] = 0.;
|
||||
}
|
||||
}
|
||||
#else
|
||||
if (uplos & 1) {
|
||||
for (j = 0; j < m; j++) {
|
||||
for(i = 0; i < j; i++) {
|
||||
a[(i + j * m) * 2 + 0] = 0.;
|
||||
a[(i + j * m) * 2 + 1] = 0.;
|
||||
a[((long)i + (long)j * (long)m) * 2 + 0] = 0.;
|
||||
a[((long)i + (long)j * (long)m) * 2 + 1] = 0.;
|
||||
}
|
||||
|
||||
a[(j + j * m) * 2 + 0] = ((double) rand() / (double) RAND_MAX) + 8.;
|
||||
a[(j + j * m) * 2 + 1] = 0.;
|
||||
a[((long)j + (long)j * (long)m) * 2 + 0] = ((double) rand() / (double) RAND_MAX) + 8.;
|
||||
a[((long)j + (long)j * (long)m) * 2 + 1] = 0.;
|
||||
|
||||
for(i = j + 1; i < m; i++) {
|
||||
a[(i + j * m) * 2 + 0] = ((double) rand() / (double) RAND_MAX) - 0.5;
|
||||
a[(i + j * m) * 2 + 1] = ((double) rand() / (double) RAND_MAX) - 0.5;
|
||||
a[((long)i + (long)j * (long)m) * 2 + 0] = 0;
|
||||
a[((long)i + (long)j * (long)m) * 2 + 1] = ((double) rand() / (double) RAND_MAX) - 0.5;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
for (j = 0; j < m; j++) {
|
||||
for(i = 0; i < j; i++) {
|
||||
a[(i + j * m) * 2 + 0] = ((double) rand() / (double) RAND_MAX) - 0.5;
|
||||
a[(i + j * m) * 2 + 1] = ((double) rand() / (double) RAND_MAX) - 0.5;
|
||||
a[((long)i + (long)j * (long)m) * 2 + 0] = 0.;
|
||||
a[((long)i + (long)j * (long)m) * 2 + 1] = ((double) rand() / (double) RAND_MAX) - 0.5;
|
||||
}
|
||||
|
||||
a[(j + j * m) * 2 + 0] = ((double) rand() / (double) RAND_MAX) + 8.;
|
||||
a[(j + j * m) * 2 + 1] = 0.;
|
||||
a[((long)j + (long)j * (long)m) * 2 + 0] = ((double) rand() / (double) RAND_MAX) + 8.;
|
||||
a[((long)j + (long)j * (long)m) * 2 + 1] = 0.;
|
||||
|
||||
for(i = j + 1; i < m; i++) {
|
||||
a[(i + j * m) * 2 + 0] = 0.;
|
||||
a[(i + j * m) * 2 + 1] = 0.;
|
||||
a[((long)i + (long)j * (long)m) * 2 + 0] = 0.;
|
||||
a[((long)i + (long)j * (long)m) * 2 + 1] = 0.;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -32,9 +32,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#endif
|
||||
#include "common.h"
|
||||
|
||||
#undef ROT
|
||||
|
||||
#undef DOT
|
||||
|
||||
#ifndef COMPLEX
|
||||
|
||||
#ifdef DOUBLE
|
||||
#define ROT BLASFUNC(drot)
|
||||
@@ -42,6 +42,15 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#define ROT BLASFUNC(srot)
|
||||
#endif
|
||||
|
||||
#else
|
||||
|
||||
#ifdef DOUBLE
|
||||
#define ROT BLASFUNC(zdrot)
|
||||
#else
|
||||
#define ROT BLASFUNC(csrot)
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(__WIN32__) || defined(__WIN64__)
|
||||
|
||||
@@ -160,17 +169,16 @@ int main(int argc, char *argv[]){
|
||||
|
||||
fprintf(stderr, " %6d : ", (int)m);
|
||||
|
||||
for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){
|
||||
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
|
||||
for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){
|
||||
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
|
||||
for (l=0; l<loops; l++)
|
||||
{
|
||||
|
||||
for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){
|
||||
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
|
||||
for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){
|
||||
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
ROT (&m, x, &inc_x, y, &inc_y, c, s);
|
||||
@@ -179,13 +187,13 @@ int main(int argc, char *argv[]){
|
||||
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
|
||||
timeg += time1;
|
||||
timeg += time1;
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
timeg /= loops;
|
||||
timeg /= loops;
|
||||
|
||||
fprintf(stderr,
|
||||
fprintf(stderr,
|
||||
" %10.2f MFlops %10.6f sec\n",
|
||||
COMPSIZE * COMPSIZE * 6. * (double)m / timeg * 1.e-6, timeg);
|
||||
|
||||
|
||||
210
benchmark/rotm.c
Normal file
210
benchmark/rotm.c
Normal file
@@ -0,0 +1,210 @@
|
||||
/***************************************************************************
|
||||
Copyright (c) 2014, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
|
||||
GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
|
||||
THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#ifdef __CYGWIN32__
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
#include "common.h"
|
||||
|
||||
#undef ROTM
|
||||
|
||||
#ifdef DOUBLE
|
||||
#define ROTM BLASFUNC(drotm)
|
||||
#else
|
||||
#define ROTM BLASFUNC(srotm)
|
||||
#endif
|
||||
|
||||
#if defined(__WIN32__) || defined(__WIN64__)
|
||||
|
||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
||||
#endif
|
||||
|
||||
int gettimeofday(struct timeval *tv, void *tz)
|
||||
{
|
||||
|
||||
FILETIME ft;
|
||||
unsigned __int64 tmpres = 0;
|
||||
static int tzflag;
|
||||
|
||||
if (NULL != tv) {
|
||||
GetSystemTimeAsFileTime(&ft);
|
||||
|
||||
tmpres |= ft.dwHighDateTime;
|
||||
tmpres <<= 32;
|
||||
tmpres |= ft.dwLowDateTime;
|
||||
|
||||
/*converting file time to unix epoch*/
|
||||
tmpres /= 10; /*convert into microseconds*/
|
||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
||||
|
||||
static void *huge_malloc(BLASLONG size)
|
||||
{
|
||||
int shmid;
|
||||
void *address;
|
||||
|
||||
#ifndef SHM_HUGETLB
|
||||
#define SHM_HUGETLB 04000
|
||||
#endif
|
||||
|
||||
if ((shmid =
|
||||
shmget(IPC_PRIVATE, (size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
||||
SHM_HUGETLB | IPC_CREAT | 0600)) < 0) {
|
||||
printf("Memory allocation failed(shmget).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
address = shmat(shmid, NULL, SHM_RND);
|
||||
|
||||
if ((BLASLONG)address == -1) {
|
||||
printf("Memory allocation failed(shmat).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
shmctl(shmid, IPC_RMID, 0);
|
||||
|
||||
return address;
|
||||
}
|
||||
|
||||
#define malloc huge_malloc
|
||||
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
|
||||
FLOAT *x, *y;
|
||||
// FLOAT result;
|
||||
blasint m, i;
|
||||
blasint inc_x = 1, inc_y = 1;
|
||||
FLOAT param[5] = {1, 2.0, 3.0, 4.0, 5.0};
|
||||
int loops = 1;
|
||||
int l;
|
||||
char *p;
|
||||
|
||||
int from = 1;
|
||||
int to = 200;
|
||||
int step = 1;
|
||||
|
||||
struct timeval start, stop;
|
||||
double time1, timeg;
|
||||
|
||||
argc--;
|
||||
argv++;
|
||||
|
||||
if (argc > 0) {
|
||||
from = atol(*argv);
|
||||
argc--;
|
||||
argv++;
|
||||
}
|
||||
if (argc > 0) {
|
||||
to = MAX(atol(*argv), from);
|
||||
argc--;
|
||||
argv++;
|
||||
}
|
||||
if (argc > 0) {
|
||||
step = atol(*argv);
|
||||
argc--;
|
||||
argv++;
|
||||
}
|
||||
|
||||
if ((p = getenv("OPENBLAS_LOOPS")))
|
||||
loops = atoi(p);
|
||||
if ((p = getenv("OPENBLAS_INCX")))
|
||||
inc_x = atoi(p);
|
||||
if ((p = getenv("OPENBLAS_INCY")))
|
||||
inc_y = atoi(p);
|
||||
|
||||
fprintf(
|
||||
stderr,
|
||||
"From : %3d To : %3d Step = %3d Inc_x = %d Inc_y = %d Loops = %d\n",
|
||||
from, to, step, inc_x, inc_y, loops);
|
||||
|
||||
if ((x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) ==
|
||||
NULL) {
|
||||
fprintf(stderr, "Out of Memory!!\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
if ((y = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_y) * COMPSIZE)) ==
|
||||
NULL) {
|
||||
fprintf(stderr, "Out of Memory!!\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
#ifdef linux
|
||||
srandom(getpid());
|
||||
#endif
|
||||
|
||||
fprintf(stderr, " SIZE Flops\n");
|
||||
|
||||
for (m = from; m <= to; m += step) {
|
||||
|
||||
timeg = 0;
|
||||
|
||||
fprintf(stderr, " %6d : ", (int)m);
|
||||
for (i = 0; i < m * COMPSIZE * abs(inc_x); i++) {
|
||||
x[i] = ((FLOAT)rand() / (FLOAT)RAND_MAX) - 0.5;
|
||||
}
|
||||
|
||||
for (i = 0; i < m * COMPSIZE * abs(inc_y); i++) {
|
||||
y[i] = ((FLOAT)rand() / (FLOAT)RAND_MAX) - 0.5;
|
||||
}
|
||||
|
||||
for (l = 0; l < loops; l++) {
|
||||
gettimeofday(&start, (struct timezone *)0);
|
||||
|
||||
ROTM(&m, x, &inc_x, y, &inc_y, param);
|
||||
|
||||
gettimeofday(&stop, (struct timezone *)0);
|
||||
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) +
|
||||
(double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
|
||||
timeg += time1;
|
||||
}
|
||||
|
||||
timeg /= loops;
|
||||
|
||||
fprintf(stderr, " %10.2f MFlops %10.6f sec\n",
|
||||
COMPSIZE * COMPSIZE * 6. * (double)m / timeg * 1.e-6, timeg);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -2,6 +2,8 @@
|
||||
|
||||
argv <- commandArgs(trailingOnly = TRUE)
|
||||
|
||||
if (!is.null(options("matprod")[[1]])) options(matprod = "blas")
|
||||
|
||||
nfrom <- 128
|
||||
nto <- 2048
|
||||
nstep <- 128
|
||||
@@ -19,7 +21,6 @@ if (length(argv) > 0) {
|
||||
loops <- as.numeric(argv[z])
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
p <- Sys.getenv("OPENBLAS_LOOPS")
|
||||
@@ -27,29 +28,21 @@ if (p != "") {
|
||||
loops <- as.numeric(p)
|
||||
}
|
||||
|
||||
|
||||
cat(sprintf(
|
||||
"From %.0f To %.0f Step=%.0f Loops=%.0f\n",
|
||||
nfrom,
|
||||
nto,
|
||||
nstep,
|
||||
loops
|
||||
))
|
||||
cat(sprintf("From %.0f To %.0f Step=%.0f Loops=%.0f\n", nfrom, nto, nstep, loops))
|
||||
cat(sprintf(" SIZE Flops Time\n"))
|
||||
|
||||
n <- nfrom
|
||||
while (n <= nto) {
|
||||
A <- matrix(rnorm(n * n), ncol = n, nrow = n)
|
||||
A <- matrix(rnorm(n * n), nrow = n)
|
||||
ev <- 0
|
||||
z <- system.time(for (l in 1:loops) {
|
||||
ev <- eigen(A)
|
||||
})
|
||||
|
||||
mflops <- (26.66 * n * n * n) * loops / (z[3] * 1.0e6)
|
||||
mflops <- (26.66 * n * n * n) * loops / (z[3] * 1e+06)
|
||||
|
||||
st <- sprintf("%.0fx%.0f :", n, n)
|
||||
cat(sprintf("%20s %10.2f MFlops %10.6f sec\n", st, mflops, z[3]))
|
||||
|
||||
n <- n + nstep
|
||||
|
||||
}
|
||||
|
||||
@@ -2,6 +2,8 @@
|
||||
|
||||
argv <- commandArgs(trailingOnly = TRUE)
|
||||
|
||||
if (!is.null(options("matprod")[[1]])) options(matprod = "blas")
|
||||
|
||||
nfrom <- 128
|
||||
nto <- 2048
|
||||
nstep <- 128
|
||||
@@ -19,7 +21,6 @@ if (length(argv) > 0) {
|
||||
loops <- as.numeric(argv[z])
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
p <- Sys.getenv("OPENBLAS_LOOPS")
|
||||
@@ -27,26 +28,13 @@ if (p != "") {
|
||||
loops <- as.numeric(p)
|
||||
}
|
||||
|
||||
|
||||
cat(sprintf(
|
||||
"From %.0f To %.0f Step=%.0f Loops=%.0f\n",
|
||||
nfrom,
|
||||
nto,
|
||||
nstep,
|
||||
loops
|
||||
))
|
||||
cat(sprintf("From %.0f To %.0f Step=%.0f Loops=%.0f\n", nfrom, nto, nstep, loops))
|
||||
cat(sprintf(" SIZE Flops Time\n"))
|
||||
|
||||
n <- nfrom
|
||||
while (n <= nto) {
|
||||
A <- matrix(runif(n * n),
|
||||
ncol = n,
|
||||
nrow = n,
|
||||
byrow = TRUE)
|
||||
B <- matrix(runif(n * n),
|
||||
ncol = n,
|
||||
nrow = n,
|
||||
byrow = TRUE)
|
||||
A <- matrix(runif(n * n), nrow = n)
|
||||
B <- matrix(runif(n * n), nrow = n)
|
||||
C <- 1
|
||||
|
||||
z <- system.time(for (l in 1:loops) {
|
||||
@@ -54,11 +42,10 @@ while (n <= nto) {
|
||||
l <- l + 1
|
||||
})
|
||||
|
||||
mflops <- (2.0 * n * n * n) * loops / (z[3] * 1.0e6)
|
||||
mflops <- (2.0 * n * n * n) * loops / (z[3] * 1e+06)
|
||||
|
||||
st <- sprintf("%.0fx%.0f :", n, n)
|
||||
cat(sprintf("%20s %10.2f MFlops %10.6f sec\n", st, mflops, z[3]))
|
||||
|
||||
n <- n + nstep
|
||||
|
||||
}
|
||||
|
||||
@@ -2,6 +2,8 @@
|
||||
|
||||
argv <- commandArgs(trailingOnly = TRUE)
|
||||
|
||||
if (!is.null(options("matprod")[[1]])) options(matprod = "blas")
|
||||
|
||||
nfrom <- 128
|
||||
nto <- 2048
|
||||
nstep <- 128
|
||||
@@ -19,7 +21,6 @@ if (length(argv) > 0) {
|
||||
loops <- as.numeric(argv[z])
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
p <- Sys.getenv("OPENBLAS_LOOPS")
|
||||
@@ -27,31 +28,22 @@ if (p != "") {
|
||||
loops <- as.numeric(p)
|
||||
}
|
||||
|
||||
|
||||
cat(sprintf(
|
||||
"From %.0f To %.0f Step=%.0f Loops=%.0f\n",
|
||||
nfrom,
|
||||
nto,
|
||||
nstep,
|
||||
loops
|
||||
))
|
||||
cat(sprintf("From %.0f To %.0f Step=%.0f Loops=%.0f\n", nfrom, nto, nstep, loops))
|
||||
cat(sprintf(" SIZE Flops Time\n"))
|
||||
|
||||
n <- nfrom
|
||||
while (n <= nto) {
|
||||
A <- matrix(rnorm(n * n), ncol = n, nrow = n)
|
||||
B <- matrix(rnorm(n * n), ncol = n, nrow = n)
|
||||
A <- matrix(rnorm(n * n), nrow = n)
|
||||
B <- matrix(rnorm(n * n), nrow = n)
|
||||
|
||||
z <- system.time(for (l in 1:loops) {
|
||||
solve(A, B)
|
||||
})
|
||||
|
||||
mflops <-
|
||||
(2.0 / 3.0 * n * n * n + 2.0 * n * n * n) * loops / (z[3] * 1.0e6)
|
||||
mflops <- (8.0 / 3 * n * n * n) * loops / (z[3] * 1e+06)
|
||||
|
||||
st <- sprintf("%.0fx%.0f :", n, n)
|
||||
cat(sprintf("%20s %10.2f MFlops %10.6f sec\n", st, mflops, z[3]))
|
||||
|
||||
n <- n + nstep
|
||||
|
||||
}
|
||||
|
||||
219
benchmark/spmv.c
Normal file
219
benchmark/spmv.c
Normal file
@@ -0,0 +1,219 @@
|
||||
/***************************************************************************
|
||||
Copyright (c) 2014, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#ifdef __CYGWIN32__
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
#include "common.h"
|
||||
|
||||
|
||||
#undef SPMV
|
||||
|
||||
|
||||
#ifndef COMPLEX
|
||||
|
||||
#ifdef DOUBLE
|
||||
#define SPMV BLASFUNC(dspmv)
|
||||
#else
|
||||
#define SPMV BLASFUNC(sspmv)
|
||||
#endif
|
||||
|
||||
#else
|
||||
|
||||
#ifdef DOUBLE
|
||||
#define SPMV BLASFUNC(zspmv)
|
||||
#else
|
||||
#define SPMV BLASFUNC(cspmv)
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(__WIN32__) || defined(__WIN64__)
|
||||
|
||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
||||
#endif
|
||||
|
||||
int gettimeofday(struct timeval *tv, void *tz){
|
||||
|
||||
FILETIME ft;
|
||||
unsigned __int64 tmpres = 0;
|
||||
static int tzflag;
|
||||
|
||||
if (NULL != tv)
|
||||
{
|
||||
GetSystemTimeAsFileTime(&ft);
|
||||
|
||||
tmpres |= ft.dwHighDateTime;
|
||||
tmpres <<= 32;
|
||||
tmpres |= ft.dwLowDateTime;
|
||||
|
||||
/*converting file time to unix epoch*/
|
||||
tmpres /= 10; /*convert into microseconds*/
|
||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
||||
|
||||
static void *huge_malloc(BLASLONG size){
|
||||
int shmid;
|
||||
void *address;
|
||||
|
||||
#ifndef SHM_HUGETLB
|
||||
#define SHM_HUGETLB 04000
|
||||
#endif
|
||||
|
||||
if ((shmid =shmget(IPC_PRIVATE,
|
||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
||||
printf( "Memory allocation failed(shmget).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
address = shmat(shmid, NULL, SHM_RND);
|
||||
|
||||
if ((BLASLONG)address == -1){
|
||||
printf( "Memory allocation failed(shmat).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
shmctl(shmid, IPC_RMID, 0);
|
||||
|
||||
return address;
|
||||
}
|
||||
|
||||
#define malloc huge_malloc
|
||||
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *a, *x, *y;
|
||||
FLOAT alpha[] = {1.0, 1.0};
|
||||
FLOAT beta [] = {1.0, 1.0};
|
||||
char uplo='L';
|
||||
blasint m, i, j;
|
||||
blasint inc_x=1,inc_y=1;
|
||||
int loops = 1;
|
||||
int l;
|
||||
char *p;
|
||||
|
||||
int from = 1;
|
||||
int to = 200;
|
||||
int step = 1;
|
||||
|
||||
struct timeval start, stop;
|
||||
double time1,timeg;
|
||||
|
||||
argc--;argv++;
|
||||
|
||||
if (argc > 0) { from = atol(*argv); argc--; argv++;}
|
||||
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
|
||||
if (argc > 0) { step = atol(*argv); argc--; argv++;}
|
||||
|
||||
if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p);
|
||||
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p);
|
||||
if ((p = getenv("OPENBLAS_INCY"))) inc_y = atoi(p);
|
||||
if ((p = getenv("OPENBLAS_UPLO"))) uplo=*p;
|
||||
|
||||
fprintf(stderr, "From : %3d To : %3d Step = %3d Uplo = '%c' Inc_x = %d Inc_y = %d Loops = %d\n", from, to, step,uplo,inc_x,inc_y,loops);
|
||||
|
||||
if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
if (( y = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_y) * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
#ifdef linux
|
||||
srandom(getpid());
|
||||
#endif
|
||||
|
||||
fprintf(stderr, " SIZE Flops\n");
|
||||
|
||||
for(m = from; m <= to; m += step)
|
||||
{
|
||||
|
||||
timeg=0;
|
||||
|
||||
fprintf(stderr, " %6dx%d : ", (int)m,(int)m);
|
||||
|
||||
for(j = 0; j < m; j++){
|
||||
for(i = 0; i < m * COMPSIZE; i++){
|
||||
a[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
for (l=0; l<loops; l++)
|
||||
{
|
||||
|
||||
for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){
|
||||
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
|
||||
for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){
|
||||
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
SPMV (&uplo, &m, alpha, a, x, &inc_x, beta, y, &inc_y );
|
||||
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
|
||||
timeg += time1;
|
||||
|
||||
}
|
||||
|
||||
timeg /= loops;
|
||||
|
||||
fprintf(stderr,
|
||||
" %10.2f MFlops\n",
|
||||
COMPSIZE * COMPSIZE * 2. * (double)m * (double)m / timeg * 1.e-6);
|
||||
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
198
benchmark/spr.c
Executable file
198
benchmark/spr.c
Executable file
@@ -0,0 +1,198 @@
|
||||
/***************************************************************************
|
||||
Copyright (c) 2014, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#ifdef __CYGWIN32__
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
#include "common.h"
|
||||
|
||||
|
||||
#undef SPR
|
||||
|
||||
#ifdef DOUBLE
|
||||
#define SPR BLASFUNC(dspr)
|
||||
#else
|
||||
#define SPR BLASFUNC(sspr)
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
#if defined(__WIN32__) || defined(__WIN64__)
|
||||
|
||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
||||
#endif
|
||||
|
||||
int gettimeofday(struct timeval *tv, void *tz){
|
||||
|
||||
FILETIME ft;
|
||||
unsigned __int64 tmpres = 0;
|
||||
static int tzflag;
|
||||
|
||||
if (NULL != tv)
|
||||
{
|
||||
GetSystemTimeAsFileTime(&ft);
|
||||
|
||||
tmpres |= ft.dwHighDateTime;
|
||||
tmpres <<= 32;
|
||||
tmpres |= ft.dwLowDateTime;
|
||||
|
||||
/*converting file time to unix epoch*/
|
||||
tmpres /= 10; /*convert into microseconds*/
|
||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
||||
|
||||
static void *huge_malloc(BLASLONG size){
|
||||
int shmid;
|
||||
void *address;
|
||||
|
||||
#ifndef SHM_HUGETLB
|
||||
#define SHM_HUGETLB 04000
|
||||
#endif
|
||||
|
||||
if ((shmid =shmget(IPC_PRIVATE,
|
||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
||||
printf( "Memory allocation failed(shmget).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
address = shmat(shmid, NULL, SHM_RND);
|
||||
|
||||
if ((BLASLONG)address == -1){
|
||||
printf( "Memory allocation failed(shmat).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
shmctl(shmid, IPC_RMID, 0);
|
||||
|
||||
return address;
|
||||
}
|
||||
|
||||
#define malloc huge_malloc
|
||||
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *a,*c;
|
||||
FLOAT alpha[] = {1.0, 1.0};
|
||||
blasint inc_x=1;
|
||||
int loops = 1;
|
||||
int l;
|
||||
char *p;
|
||||
|
||||
char uplo='U';
|
||||
|
||||
if ((p = getenv("OPENBLAS_UPLO"))) uplo=*p;
|
||||
if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p);
|
||||
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p);
|
||||
|
||||
blasint m, i, j;
|
||||
|
||||
int from = 1;
|
||||
int to = 200;
|
||||
int step = 1;
|
||||
|
||||
struct timeval start, stop;
|
||||
double time1,timeg;
|
||||
|
||||
argc--;argv++;
|
||||
|
||||
if (argc > 0) { from = atol(*argv); argc--; argv++;}
|
||||
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
|
||||
if (argc > 0) { step = atol(*argv); argc--; argv++;}
|
||||
|
||||
fprintf(stderr, "From : %3d To : %3d Step = %3d Uplo = %c Inc_x = %d\n", from, to, step,uplo,inc_x);
|
||||
|
||||
|
||||
if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
if (( c = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
#ifdef linux
|
||||
srandom(getpid());
|
||||
#endif
|
||||
|
||||
fprintf(stderr, " SIZE Flops Time\n");
|
||||
|
||||
for(m = from; m <= to; m += step)
|
||||
{
|
||||
timeg=0;
|
||||
|
||||
fprintf(stderr, " %6d : ", (int)m);
|
||||
|
||||
for (l=0; l<loops; l++)
|
||||
{
|
||||
|
||||
for(j = 0; j < m; j++){
|
||||
for(i = 0; i < m * COMPSIZE; i++){
|
||||
a[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
}
|
||||
for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){
|
||||
c[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
SPR (&uplo, &m, alpha, c, &inc_x, a);
|
||||
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
|
||||
timeg += time1;
|
||||
}
|
||||
|
||||
timeg /= loops;
|
||||
|
||||
fprintf(stderr,
|
||||
" %10.2f MFlops %10.6f sec\n",
|
||||
COMPSIZE * COMPSIZE * 1. * (double)m * (double)m / timeg * 1.e-6, timeg);
|
||||
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
207
benchmark/spr2.c
Executable file
207
benchmark/spr2.c
Executable file
@@ -0,0 +1,207 @@
|
||||
/***************************************************************************
|
||||
Copyright (c) 2014, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#ifdef __CYGWIN32__
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
#include "common.h"
|
||||
|
||||
|
||||
#undef SPR2
|
||||
|
||||
#ifdef DOUBLE
|
||||
#define SPR2 BLASFUNC(dspr2)
|
||||
#else
|
||||
#define SPR2 BLASFUNC(sspr2)
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
#if defined(__WIN32__) || defined(__WIN64__)
|
||||
|
||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
||||
#endif
|
||||
|
||||
int gettimeofday(struct timeval *tv, void *tz){
|
||||
|
||||
FILETIME ft;
|
||||
unsigned __int64 tmpres = 0;
|
||||
static int tzflag;
|
||||
|
||||
if (NULL != tv)
|
||||
{
|
||||
GetSystemTimeAsFileTime(&ft);
|
||||
|
||||
tmpres |= ft.dwHighDateTime;
|
||||
tmpres <<= 32;
|
||||
tmpres |= ft.dwLowDateTime;
|
||||
|
||||
/*converting file time to unix epoch*/
|
||||
tmpres /= 10; /*convert into microseconds*/
|
||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
||||
|
||||
static void *huge_malloc(BLASLONG size){
|
||||
int shmid;
|
||||
void *address;
|
||||
|
||||
#ifndef SHM_HUGETLB
|
||||
#define SHM_HUGETLB 04000
|
||||
#endif
|
||||
|
||||
if ((shmid =shmget(IPC_PRIVATE,
|
||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
||||
printf( "Memory allocation failed(shmget).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
address = shmat(shmid, NULL, SHM_RND);
|
||||
|
||||
if ((BLASLONG)address == -1){
|
||||
printf( "Memory allocation failed(shmat).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
shmctl(shmid, IPC_RMID, 0);
|
||||
|
||||
return address;
|
||||
}
|
||||
|
||||
#define malloc huge_malloc
|
||||
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *a,*b,*c;
|
||||
FLOAT alpha[] = {1.0, 1.0};
|
||||
blasint inc_x=1,inc_y=1;
|
||||
int loops = 1;
|
||||
int l;
|
||||
char *p;
|
||||
|
||||
char uplo='U';
|
||||
|
||||
if ((p = getenv("OPENBLAS_UPLO"))) uplo=*p;
|
||||
if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p);
|
||||
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p);
|
||||
|
||||
blasint m, i, j;
|
||||
|
||||
int from = 1;
|
||||
int to = 200;
|
||||
int step = 1;
|
||||
|
||||
struct timeval start, stop;
|
||||
double time1,timeg;
|
||||
|
||||
argc--;argv++;
|
||||
|
||||
if (argc > 0) { from = atol(*argv); argc--; argv++;}
|
||||
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
|
||||
if (argc > 0) { step = atol(*argv); argc--; argv++;}
|
||||
|
||||
fprintf(stderr, "From : %3d To : %3d Step = %3d Uplo = %c Inc_x = %d Inc_y = %d\n", from, to, step,uplo,inc_x,inc_y);
|
||||
|
||||
|
||||
if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
if (( b = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_y) * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
if (( c = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
#ifdef linux
|
||||
srandom(getpid());
|
||||
#endif
|
||||
|
||||
fprintf(stderr, " SIZE Flops Time\n");
|
||||
|
||||
for(m = from; m <= to; m += step)
|
||||
{
|
||||
timeg=0;
|
||||
|
||||
fprintf(stderr, " %6d : ", (int)m);
|
||||
|
||||
for (l=0; l<loops; l++)
|
||||
{
|
||||
|
||||
for(j = 0; j < m; j++){
|
||||
for(i = 0; i < m * COMPSIZE; i++){
|
||||
a[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
}
|
||||
|
||||
for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){
|
||||
b[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
|
||||
for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){
|
||||
c[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
SPR2 (&uplo, &m, alpha, c, &inc_x, b, &inc_y, a);
|
||||
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
|
||||
timeg += time1;
|
||||
}
|
||||
|
||||
timeg /= loops;
|
||||
|
||||
fprintf(stderr,
|
||||
" %10.2f MFlops %10.6f sec\n",
|
||||
COMPSIZE * COMPSIZE * 2. * (double)m * (double)m / timeg * 1.e-6, timeg);
|
||||
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
@@ -175,9 +175,9 @@ int main(int argc, char *argv[]){
|
||||
|
||||
for(j = 0; j < m; j++){
|
||||
for(i = 0; i < m * COMPSIZE; i++){
|
||||
a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
b[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
c[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
a[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
b[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
c[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -189,8 +189,6 @@ int main(int argc, char *argv[]){
|
||||
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
fprintf(stderr,
|
||||
" %10.2f MFlops\n",
|
||||
COMPSIZE * COMPSIZE * 2. * (double)m * (double)m * (double)m / time1 * 1.e-6);
|
||||
|
||||
@@ -177,7 +177,7 @@ int main(int argc, char *argv[]){
|
||||
|
||||
for(j = 0; j < m; j++){
|
||||
for(i = 0; i < m * COMPSIZE; i++){
|
||||
a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
a[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
185
benchmark/syr.c
Normal file
185
benchmark/syr.c
Normal file
@@ -0,0 +1,185 @@
|
||||
/***************************************************************************
|
||||
Copyright (c) 2014, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#ifdef __CYGWIN32__
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
#include "common.h"
|
||||
|
||||
|
||||
#undef SYR
|
||||
|
||||
#ifdef DOUBLE
|
||||
#define SYR BLASFUNC(dsyr)
|
||||
#else
|
||||
#define SYR BLASFUNC(ssyr)
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
#if defined(__WIN32__) || defined(__WIN64__)
|
||||
|
||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
||||
#endif
|
||||
|
||||
int gettimeofday(struct timeval *tv, void *tz){
|
||||
|
||||
FILETIME ft;
|
||||
unsigned __int64 tmpres = 0;
|
||||
static int tzflag;
|
||||
|
||||
if (NULL != tv)
|
||||
{
|
||||
GetSystemTimeAsFileTime(&ft);
|
||||
|
||||
tmpres |= ft.dwHighDateTime;
|
||||
tmpres <<= 32;
|
||||
tmpres |= ft.dwLowDateTime;
|
||||
|
||||
/*converting file time to unix epoch*/
|
||||
tmpres /= 10; /*convert into microseconds*/
|
||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
||||
|
||||
static void *huge_malloc(BLASLONG size){
|
||||
int shmid;
|
||||
void *address;
|
||||
|
||||
#ifndef SHM_HUGETLB
|
||||
#define SHM_HUGETLB 04000
|
||||
#endif
|
||||
|
||||
if ((shmid =shmget(IPC_PRIVATE,
|
||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
||||
printf( "Memory allocation failed(shmget).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
address = shmat(shmid, NULL, SHM_RND);
|
||||
|
||||
if ((BLASLONG)address == -1){
|
||||
printf( "Memory allocation failed(shmat).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
shmctl(shmid, IPC_RMID, 0);
|
||||
|
||||
return address;
|
||||
}
|
||||
|
||||
#define malloc huge_malloc
|
||||
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *x,*a;
|
||||
FLOAT alpha[] = {1.0, 1.0};
|
||||
char *p;
|
||||
|
||||
char uplo='U';
|
||||
|
||||
if ((p = getenv("OPENBLAS_UPLO"))) uplo=*p;
|
||||
|
||||
blasint m, i, j;
|
||||
blasint inc_x= 1;
|
||||
int from = 1;
|
||||
int to = 200;
|
||||
int step = 1;
|
||||
|
||||
struct timeval start, stop;
|
||||
double time1;
|
||||
|
||||
argc--;argv++;
|
||||
|
||||
if (argc > 0) { from = atol(*argv); argc--; argv++;}
|
||||
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
|
||||
if (argc > 0) { step = atol(*argv); argc--; argv++;}
|
||||
|
||||
fprintf(stderr, "From : %3d To : %3d Step = %3d Uplo = %c Inc_x = %d\n", from, to, step,uplo,inc_x);
|
||||
|
||||
|
||||
if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
#ifdef linux
|
||||
srandom(getpid());
|
||||
#endif
|
||||
|
||||
fprintf(stderr, " SIZE Flops\n");
|
||||
|
||||
for(m = from; m <= to; m += step)
|
||||
{
|
||||
|
||||
fprintf(stderr, " %6d : ", (int)m);
|
||||
|
||||
for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){
|
||||
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
|
||||
for(j = 0; j < m; j++){
|
||||
for(i = 0; i < m * COMPSIZE; i++){
|
||||
a[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
}
|
||||
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
SYR (&uplo, &m, alpha, x, &inc_x, a, &m );
|
||||
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
|
||||
fprintf(stderr,
|
||||
" %10.2f MFlops\n",
|
||||
COMPSIZE * COMPSIZE * 1. * (double)m * (double)m / time1 * 1.e-6);
|
||||
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
194
benchmark/syr2.c
Normal file
194
benchmark/syr2.c
Normal file
@@ -0,0 +1,194 @@
|
||||
/***************************************************************************
|
||||
Copyright (c) 2014, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#ifdef __CYGWIN32__
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
#include "common.h"
|
||||
|
||||
|
||||
#undef SYR2
|
||||
|
||||
|
||||
#ifdef DOUBLE
|
||||
#define SYR2 BLASFUNC(dsyr2)
|
||||
#else
|
||||
#define SYR2 BLASFUNC(ssyr2)
|
||||
#endif
|
||||
|
||||
|
||||
#if defined(__WIN32__) || defined(__WIN64__)
|
||||
|
||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
||||
#endif
|
||||
|
||||
int gettimeofday(struct timeval *tv, void *tz){
|
||||
|
||||
FILETIME ft;
|
||||
unsigned __int64 tmpres = 0;
|
||||
static int tzflag;
|
||||
|
||||
if (NULL != tv)
|
||||
{
|
||||
GetSystemTimeAsFileTime(&ft);
|
||||
|
||||
tmpres |= ft.dwHighDateTime;
|
||||
tmpres <<= 32;
|
||||
tmpres |= ft.dwLowDateTime;
|
||||
|
||||
/*converting file time to unix epoch*/
|
||||
tmpres /= 10; /*convert into microseconds*/
|
||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
||||
|
||||
static void *huge_malloc(BLASLONG size){
|
||||
int shmid;
|
||||
void *address;
|
||||
|
||||
#ifndef SHM_HUGETLB
|
||||
#define SHM_HUGETLB 04000
|
||||
#endif
|
||||
|
||||
if ((shmid =shmget(IPC_PRIVATE,
|
||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
||||
printf( "Memory allocation failed(shmget).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
address = shmat(shmid, NULL, SHM_RND);
|
||||
|
||||
if ((BLASLONG)address == -1){
|
||||
printf( "Memory allocation failed(shmat).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
shmctl(shmid, IPC_RMID, 0);
|
||||
|
||||
return address;
|
||||
}
|
||||
|
||||
#define malloc huge_malloc
|
||||
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *x, *y, *a;
|
||||
FLOAT alpha[] = {1.0, 1.0};
|
||||
char *p;
|
||||
|
||||
char uplo='U';
|
||||
|
||||
if ((p = getenv("OPENBLAS_UPLO"))) uplo=*p;
|
||||
|
||||
blasint m, i, j;
|
||||
blasint inc_x= 1;
|
||||
blasint inc_y= 1;
|
||||
int from = 1;
|
||||
int to = 200;
|
||||
int step = 1;
|
||||
|
||||
struct timeval start, stop;
|
||||
double time1;
|
||||
|
||||
argc--;argv++;
|
||||
|
||||
if (argc > 0) { from = atol(*argv); argc--; argv++;}
|
||||
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
|
||||
if (argc > 0) { step = atol(*argv); argc--; argv++;}
|
||||
|
||||
fprintf(stderr, "From : %3d To : %3d Step = %3d Uplo = %c Inc_x = %d Inc_y = %d\n", from, to, step,uplo,inc_x,inc_y);
|
||||
|
||||
if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
if (( y = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_y) * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
|
||||
|
||||
#ifdef linux
|
||||
srandom(getpid());
|
||||
#endif
|
||||
|
||||
fprintf(stderr, " SIZE Flops\n");
|
||||
|
||||
for(m = from; m <= to; m += step)
|
||||
{
|
||||
|
||||
fprintf(stderr, " %6d : ", (int)m);
|
||||
for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){
|
||||
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
|
||||
for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){
|
||||
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
|
||||
for(j = 0; j < m; j++){
|
||||
for(i = 0; i < m * COMPSIZE; i++){
|
||||
a[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
}
|
||||
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
SYR2 (&uplo, &m, alpha, x, &inc_x, y, &inc_y, a, &m );
|
||||
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
|
||||
fprintf(stderr,
|
||||
" %10.2f MFlops\n",
|
||||
COMPSIZE * COMPSIZE * 2. * (double)m * (double)m / time1 * 1.e-6);
|
||||
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
@@ -175,9 +175,9 @@ int main(int argc, char *argv[]){
|
||||
|
||||
for(j = 0; j < m; j++){
|
||||
for(i = 0; i < m * COMPSIZE; i++){
|
||||
a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
b[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
c[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
a[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
b[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
c[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -189,8 +189,6 @@ int main(int argc, char *argv[]){
|
||||
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
fprintf(stderr,
|
||||
" %10.2f MFlops\n",
|
||||
COMPSIZE * COMPSIZE * 2. * (double)m * (double)m * (double)m / time1 * 1.e-6);
|
||||
|
||||
@@ -172,8 +172,8 @@ int main(int argc, char *argv[]){
|
||||
|
||||
for(j = 0; j < m; j++){
|
||||
for(i = 0; i < m * COMPSIZE; i++){
|
||||
a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
c[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
a[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
c[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -185,8 +185,6 @@ int main(int argc, char *argv[]){
|
||||
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
fprintf(stderr,
|
||||
" %10.2f MFlops\n",
|
||||
COMPSIZE * COMPSIZE * 1. * (double)m * (double)m * (double)m / time1 * 1.e-6);
|
||||
|
||||
172
benchmark/tpmv.c
Normal file
172
benchmark/tpmv.c
Normal file
@@ -0,0 +1,172 @@
|
||||
/***************************************************************************
|
||||
Copyright (c) 2014, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#ifdef __CYGWIN32__
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
#include "common.h"
|
||||
|
||||
#undef TPMV
|
||||
|
||||
#ifndef COMPLEX
|
||||
|
||||
#ifdef DOUBLE
|
||||
#define TPMV BLASFUNC(dtpmv)
|
||||
#else
|
||||
#define TPMV BLASFUNC(stpmv)
|
||||
#endif
|
||||
|
||||
#else
|
||||
|
||||
#ifdef DOUBLE
|
||||
#define TPMV BLASFUNC(ztpmv)
|
||||
#else
|
||||
#define TPMV BLASFUNC(ctpmv)
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
||||
|
||||
static void *huge_malloc(BLASLONG size)
|
||||
{
|
||||
int shmid;
|
||||
void *address;
|
||||
|
||||
#ifndef SHM_HUGETLB
|
||||
#define SHM_HUGETLB 04000
|
||||
#endif
|
||||
|
||||
if ((shmid =shmget(IPC_PRIVATE,
|
||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
||||
printf( "Memory allocation failed(shmget).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
address = shmat(shmid, NULL, SHM_RND);
|
||||
|
||||
if ((BLASLONG)address == -1) {
|
||||
printf( "Memory allocation failed(shmat).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
shmctl(shmid, IPC_RMID, 0);
|
||||
|
||||
return address;
|
||||
}
|
||||
|
||||
#define malloc huge_malloc
|
||||
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
|
||||
FLOAT *a, *x;
|
||||
char *p;
|
||||
|
||||
char uplo ='U';
|
||||
char trans='N';
|
||||
char diag ='U';
|
||||
|
||||
int loops = 1;
|
||||
int l;
|
||||
blasint inc_x=1;
|
||||
|
||||
if ((p = getenv("OPENBLAS_UPLO"))) uplo=*p;
|
||||
if ((p = getenv("OPENBLAS_TRANS"))) trans=*p;
|
||||
if ((p = getenv("OPENBLAS_DIAG"))) diag=*p;
|
||||
if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p);
|
||||
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p);
|
||||
|
||||
blasint n, i, j;
|
||||
|
||||
int from = 1;
|
||||
int to = 200;
|
||||
int step = 1;
|
||||
|
||||
struct timespec start = { 0, 0 }, stop = { 0, 0 };
|
||||
double time1, timeg;
|
||||
|
||||
argc--;argv++;
|
||||
|
||||
if (argc > 0) { from = atol(*argv); argc--; argv++;}
|
||||
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
|
||||
if (argc > 0) { step = atol(*argv); argc--; argv++;}
|
||||
|
||||
fprintf(stderr, "From : %3d To : %3d Step = %3d Uplo = %c Trans = %c Diag = %c Loops=%d Inc_x=%d\n", from,
|
||||
to, step, uplo, trans, diag, loops, inc_x);
|
||||
|
||||
if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL) {
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
#ifdef linux
|
||||
srandom(getpid());
|
||||
#endif
|
||||
|
||||
fprintf(stderr, " SIZE Flops\n");
|
||||
|
||||
for(n = from; n <= to; n += step) {
|
||||
timeg=0;
|
||||
|
||||
fprintf(stderr, " %6d : ", (int)n);
|
||||
for(j = 0; j < n; j++) {
|
||||
for(i = 0; i < n * COMPSIZE; i++) {
|
||||
a[(long)i + (long)j * (long)n * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < n * COMPSIZE * abs(inc_x); i++) {
|
||||
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
|
||||
for (l = 0; l < loops; l++) {
|
||||
clock_gettime(CLOCK_REALTIME, &start);
|
||||
TPMV (&uplo, &trans, &diag, &n, a, x, &inc_x);
|
||||
clock_gettime(CLOCK_REALTIME, &stop);
|
||||
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_nsec - start.tv_nsec)) / 1.e9;
|
||||
timeg += time1;
|
||||
}
|
||||
|
||||
timeg /= loops;
|
||||
fprintf(stderr, " %10.2f MFlops %12.9f sec\n",
|
||||
COMPSIZE * COMPSIZE * 1. * (double)n * (double)n / timeg / 1.e6, timeg);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
172
benchmark/tpsv.c
Normal file
172
benchmark/tpsv.c
Normal file
@@ -0,0 +1,172 @@
|
||||
/***************************************************************************
|
||||
Copyright (c) 2014, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#ifdef __CYGWIN32__
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
#include "common.h"
|
||||
|
||||
#undef TPSV
|
||||
|
||||
#ifndef COMPLEX
|
||||
|
||||
#ifdef DOUBLE
|
||||
#define TPSV BLASFUNC(dtpsv)
|
||||
#else
|
||||
#define TPSV BLASFUNC(stpsv)
|
||||
#endif
|
||||
|
||||
#else
|
||||
|
||||
#ifdef DOUBLE
|
||||
#define TPSV BLASFUNC(ztpsv)
|
||||
#else
|
||||
#define TPSV BLASFUNC(ctpsv)
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
||||
|
||||
static void *huge_malloc(BLASLONG size)
|
||||
{
|
||||
int shmid;
|
||||
void *address;
|
||||
|
||||
#ifndef SHM_HUGETLB
|
||||
#define SHM_HUGETLB 04000
|
||||
#endif
|
||||
|
||||
if ((shmid =shmget(IPC_PRIVATE,
|
||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
||||
printf( "Memory allocation failed(shmget).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
address = shmat(shmid, NULL, SHM_RND);
|
||||
|
||||
if ((BLASLONG)address == -1) {
|
||||
printf( "Memory allocation failed(shmat).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
shmctl(shmid, IPC_RMID, 0);
|
||||
|
||||
return address;
|
||||
}
|
||||
|
||||
#define malloc huge_malloc
|
||||
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
|
||||
FLOAT *a, *x;
|
||||
char *p;
|
||||
|
||||
char uplo ='U';
|
||||
char trans='N';
|
||||
char diag ='U';
|
||||
|
||||
int loops = 1;
|
||||
int l;
|
||||
blasint inc_x=1;
|
||||
|
||||
if ((p = getenv("OPENBLAS_UPLO"))) uplo=*p;
|
||||
if ((p = getenv("OPENBLAS_TRANS"))) trans=*p;
|
||||
if ((p = getenv("OPENBLAS_DIAG"))) diag=*p;
|
||||
if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p);
|
||||
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p);
|
||||
|
||||
blasint n, i, j;
|
||||
|
||||
int from = 1;
|
||||
int to = 200;
|
||||
int step = 1;
|
||||
|
||||
struct timespec start = { 0, 0 }, stop = { 0, 0 };
|
||||
double time1, timeg;
|
||||
|
||||
argc--;argv++;
|
||||
|
||||
if (argc > 0) { from = atol(*argv); argc--; argv++;}
|
||||
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
|
||||
if (argc > 0) { step = atol(*argv); argc--; argv++;}
|
||||
|
||||
fprintf(stderr, "From : %3d To : %3d Step = %3d Uplo = %c Trans = %c Diag = %c Loops=%d Inc_x=%d\n", from,
|
||||
to, step, uplo, trans, diag, loops, inc_x);
|
||||
|
||||
if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL) {
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
#ifdef linux
|
||||
srandom(getpid());
|
||||
#endif
|
||||
|
||||
fprintf(stderr, " SIZE Flops\n");
|
||||
|
||||
for(n = from; n <= to; n += step) {
|
||||
timeg=0;
|
||||
|
||||
fprintf(stderr, " %6d : ", (int)n);
|
||||
for(j = 0; j < n; j++) {
|
||||
for(i = 0; i < n * COMPSIZE; i++) {
|
||||
a[(long)i + (long)j * (long)n * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < n * COMPSIZE * abs(inc_x); i++) {
|
||||
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
|
||||
for (l = 0; l < loops; l++) {
|
||||
clock_gettime(CLOCK_REALTIME, &start);
|
||||
TPSV (&uplo, &trans, &diag, &n, a, x, &inc_x);
|
||||
clock_gettime(CLOCK_REALTIME, &stop);
|
||||
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_nsec - start.tv_nsec)) / 1.e9;
|
||||
timeg += time1;
|
||||
}
|
||||
|
||||
timeg /= loops;
|
||||
fprintf(stderr, " %10.2f MFlops %12.9f sec\n",
|
||||
COMPSIZE * COMPSIZE * 1. * (double)n * (double)n / timeg / 1.e6, timeg);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
@@ -175,8 +175,8 @@ int main(int argc, char *argv[]){
|
||||
|
||||
for(j = 0; j < m; j++){
|
||||
for(i = 0; i < m * COMPSIZE; i++){
|
||||
a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
b[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
a[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
b[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -188,8 +188,6 @@ int main(int argc, char *argv[]){
|
||||
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
fprintf(stderr,
|
||||
" %10.2f MFlops %10.6f sec\n",
|
||||
COMPSIZE * COMPSIZE * 1. * (double)m * (double)m * (double)m / time1 * 1.e-6, time1);
|
||||
|
||||
172
benchmark/trmv.c
Normal file
172
benchmark/trmv.c
Normal file
@@ -0,0 +1,172 @@
|
||||
/***************************************************************************
|
||||
Copyright (c) 2014, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#ifdef __CYGWIN32__
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
#include "common.h"
|
||||
|
||||
#undef TRMV
|
||||
|
||||
#ifndef COMPLEX
|
||||
|
||||
#ifdef DOUBLE
|
||||
#define TRMV BLASFUNC(dtrmv)
|
||||
#else
|
||||
#define TRMV BLASFUNC(strmv)
|
||||
#endif
|
||||
|
||||
#else
|
||||
|
||||
#ifdef DOUBLE
|
||||
#define TRMV BLASFUNC(ztrmv)
|
||||
#else
|
||||
#define TRMV BLASFUNC(ctrmv)
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
||||
|
||||
static void *huge_malloc(BLASLONG size)
|
||||
{
|
||||
int shmid;
|
||||
void *address;
|
||||
|
||||
#ifndef SHM_HUGETLB
|
||||
#define SHM_HUGETLB 04000
|
||||
#endif
|
||||
|
||||
if ((shmid =shmget(IPC_PRIVATE,
|
||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
||||
printf( "Memory allocation failed(shmget).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
address = shmat(shmid, NULL, SHM_RND);
|
||||
|
||||
if ((BLASLONG)address == -1) {
|
||||
printf( "Memory allocation failed(shmat).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
shmctl(shmid, IPC_RMID, 0);
|
||||
|
||||
return address;
|
||||
}
|
||||
|
||||
#define malloc huge_malloc
|
||||
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
|
||||
FLOAT *a, *x;
|
||||
char *p;
|
||||
|
||||
char uplo ='U';
|
||||
char trans='N';
|
||||
char diag ='U';
|
||||
|
||||
int loops = 1;
|
||||
int l;
|
||||
blasint inc_x=1;
|
||||
|
||||
if ((p = getenv("OPENBLAS_UPLO"))) uplo=*p;
|
||||
if ((p = getenv("OPENBLAS_TRANS"))) trans=*p;
|
||||
if ((p = getenv("OPENBLAS_DIAG"))) diag=*p;
|
||||
if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p);
|
||||
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p);
|
||||
|
||||
blasint n, i, j;
|
||||
|
||||
int from = 1;
|
||||
int to = 200;
|
||||
int step = 1;
|
||||
|
||||
struct timespec start = { 0, 0 }, stop = { 0, 0 };
|
||||
double time1, timeg;
|
||||
|
||||
argc--;argv++;
|
||||
|
||||
if (argc > 0) { from = atol(*argv); argc--; argv++;}
|
||||
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
|
||||
if (argc > 0) { step = atol(*argv); argc--; argv++;}
|
||||
|
||||
fprintf(stderr, "From : %3d To : %3d Step = %3d Uplo = %c Trans = %c Diag = %c Loops=%d Inc_x=%d\n", from,
|
||||
to, step, uplo, trans, diag, loops, inc_x);
|
||||
|
||||
if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL) {
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
#ifdef linux
|
||||
srandom(getpid());
|
||||
#endif
|
||||
|
||||
fprintf(stderr, " SIZE Flops\n");
|
||||
|
||||
for(n = from; n <= to; n += step) {
|
||||
timeg=0;
|
||||
|
||||
fprintf(stderr, " %6d : ", (int)n);
|
||||
for(j = 0; j < n; j++) {
|
||||
for(i = 0; i < n * COMPSIZE; i++) {
|
||||
a[(long)i + (long)j * (long)n * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < n * COMPSIZE * abs(inc_x); i++) {
|
||||
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
|
||||
for (l = 0; l < loops; l++) {
|
||||
clock_gettime(CLOCK_REALTIME, &start);
|
||||
TRMV (&uplo, &trans, &diag, &n, a, &n, x, &inc_x);
|
||||
clock_gettime(CLOCK_REALTIME, &stop);
|
||||
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_nsec - start.tv_nsec)) / 1.e9;
|
||||
timeg += time1;
|
||||
}
|
||||
|
||||
timeg /= loops;
|
||||
fprintf(stderr, " %10.2f MFlops %12.9f sec\n",
|
||||
COMPSIZE * COMPSIZE * 1. * (double)n * (double)n / timeg / 1.e6, timeg);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
@@ -191,8 +191,8 @@ int main(int argc, char *argv[]){
|
||||
|
||||
for(j = 0; j < m; j++){
|
||||
for(i = 0; i < m * COMPSIZE; i++){
|
||||
a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
b[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
a[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
b[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
222
benchmark/trsv.c
Normal file
222
benchmark/trsv.c
Normal file
@@ -0,0 +1,222 @@
|
||||
/***************************************************************************
|
||||
Copyright (c) 2014, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#ifdef __CYGWIN32__
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
#include <time.h>
|
||||
#include "common.h"
|
||||
|
||||
|
||||
#undef GEMV
|
||||
#undef TRSV
|
||||
|
||||
#ifndef COMPLEX
|
||||
|
||||
#ifdef DOUBLE
|
||||
#define TRSV BLASFUNC(dtrsv)
|
||||
#else
|
||||
#define TRSV BLASFUNC(strsv)
|
||||
#endif
|
||||
|
||||
#else
|
||||
|
||||
#ifdef DOUBLE
|
||||
#define TRSV BLASFUNC(ztrsv)
|
||||
#else
|
||||
#define TRSV BLASFUNC(ctrsv)
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(__WIN32__) || defined(__WIN64__)
|
||||
|
||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
||||
#endif
|
||||
|
||||
int gettimeofday(struct timeval *tv, void *tz){
|
||||
|
||||
FILETIME ft;
|
||||
unsigned __int64 tmpres = 0;
|
||||
static int tzflag;
|
||||
|
||||
if (NULL != tv)
|
||||
{
|
||||
GetSystemTimeAsFileTime(&ft);
|
||||
|
||||
tmpres |= ft.dwHighDateTime;
|
||||
tmpres <<= 32;
|
||||
tmpres |= ft.dwLowDateTime;
|
||||
|
||||
/*converting file time to unix epoch*/
|
||||
tmpres /= 10; /*convert into microseconds*/
|
||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
||||
|
||||
static void *huge_malloc(BLASLONG size){
|
||||
int shmid;
|
||||
void *address;
|
||||
|
||||
#ifndef SHM_HUGETLB
|
||||
#define SHM_HUGETLB 04000
|
||||
#endif
|
||||
|
||||
if ((shmid =shmget(IPC_PRIVATE,
|
||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
||||
printf( "Memory allocation failed(shmget).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
address = shmat(shmid, NULL, SHM_RND);
|
||||
|
||||
if ((BLASLONG)address == -1){
|
||||
printf( "Memory allocation failed(shmat).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
shmctl(shmid, IPC_RMID, 0);
|
||||
|
||||
return address;
|
||||
}
|
||||
|
||||
#define malloc huge_malloc
|
||||
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *a, *x;
|
||||
blasint n = 0, i, j;
|
||||
blasint inc_x=1;
|
||||
int loops = 1;
|
||||
int l;
|
||||
char *p;
|
||||
|
||||
int from = 1;
|
||||
int to = 200;
|
||||
int step = 1;
|
||||
|
||||
struct timespec time_start, time_end;
|
||||
time_t seconds = 0;
|
||||
|
||||
double time1,timeg;
|
||||
long long nanos = 0;
|
||||
|
||||
argc--;argv++;
|
||||
|
||||
if (argc > 0) { from = atol(*argv); argc--; argv++;}
|
||||
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
|
||||
if (argc > 0) { step = atol(*argv); argc--; argv++;}
|
||||
|
||||
char uplo ='L';
|
||||
char transa = 'N';
|
||||
char diag ='U';
|
||||
|
||||
if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p);
|
||||
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p);
|
||||
if ((p = getenv("OPENBLAS_TRANSA"))) transa=*p;
|
||||
if ((p = getenv("OPENBLAS_DIAG"))) diag=*p;
|
||||
if ((p = getenv("OPENBLAS_UPLO"))) uplo=*p;
|
||||
|
||||
fprintf(stderr, "From : %3d To : %3d Step = %3d Transa = '%c' Inc_x = %d uplo=%c diag=%c loop = %d\n", from, to, step,transa,inc_x,
|
||||
uplo,diag,loops);
|
||||
|
||||
|
||||
#ifdef linux
|
||||
srandom(getpid());
|
||||
#endif
|
||||
|
||||
fprintf(stderr, " SIZE Flops\n");
|
||||
fprintf(stderr, "============================================\n");
|
||||
|
||||
for(n = from; n <= to; n += step)
|
||||
{
|
||||
timeg=0;
|
||||
if (( a = (FLOAT *)malloc(sizeof(FLOAT) * n * n * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
if (( x = (FLOAT *)malloc(sizeof(FLOAT) * n * abs(inc_x) * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
for(j = 0; j < n; j++){
|
||||
for(i = 0; i < n * COMPSIZE; i++){
|
||||
a[i + j * n * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
}
|
||||
|
||||
for(i = 0; i < n * COMPSIZE * abs(inc_x); i++){
|
||||
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
|
||||
for(l =0;l< loops;l++){
|
||||
|
||||
clock_gettime(CLOCK_PROCESS_CPUTIME_ID,&time_start);
|
||||
|
||||
TRSV(&uplo,&transa,&diag,&n,a,&n,x,&inc_x);
|
||||
|
||||
clock_gettime(CLOCK_PROCESS_CPUTIME_ID,&time_end);
|
||||
nanos = time_end.tv_nsec - time_start.tv_nsec;
|
||||
seconds = time_end.tv_sec - time_start.tv_sec;
|
||||
|
||||
time1 = seconds + nanos /1.e9;
|
||||
timeg += time1;
|
||||
}
|
||||
|
||||
|
||||
timeg /= loops;
|
||||
long long muls = n*(n+1)/2.0;
|
||||
long long adds = (n - 1.0)*n/2.0;
|
||||
|
||||
fprintf(stderr, "%10d %10.2f MFlops %10.6f sec\n", n,(muls+adds) / timeg * 1.e-6, timeg);
|
||||
if(a != NULL){
|
||||
free(a);
|
||||
}
|
||||
|
||||
if( x != NULL){
|
||||
free(x);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -170,9 +170,11 @@ int main(int argc, char *argv[]){
|
||||
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
#ifdef RETURN_BY_STACK
|
||||
DOT (&result , &m, x, &inc_x, y, &inc_y );
|
||||
#else
|
||||
result = DOT (&m, x, &inc_x, y, &inc_y );
|
||||
|
||||
#endif
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
|
||||
126
c_check
126
c_check
@@ -1,28 +1,30 @@
|
||||
#!/usr/bin/perl
|
||||
|
||||
use File::Basename;
|
||||
use File::Temp qw(tempfile);
|
||||
#use File::Basename;
|
||||
# use File::Temp qw(tempfile);
|
||||
|
||||
# Checking cross compile
|
||||
$hostos = `uname -s | sed -e s/\-.*//`; chop($hostos);
|
||||
$hostarch = `uname -m | sed -e s/i.86/x86/`;chop($hostarch);
|
||||
$hostarch = `uname -p` if ($hostos eq "AIX");
|
||||
$hostarch = "x86_64" if ($hostarch eq "amd64");
|
||||
$hostarch = "arm" if ($hostarch =~ /^arm.*/);
|
||||
$hostarch = "arm64" if ($hostarch eq "aarch64");
|
||||
$hostarch = "power" if ($hostarch =~ /^(powerpc|ppc).*/);
|
||||
$hostarch = "zarch" if ($hostarch eq "s390x");
|
||||
|
||||
$tmpf = new File::Temp( UNLINK => 1 );
|
||||
#$tmpf = new File::Temp( UNLINK => 1 );
|
||||
$binary = $ENV{"BINARY"};
|
||||
|
||||
$makefile = shift(@ARGV);
|
||||
$config = shift(@ARGV);
|
||||
|
||||
$compiler_name = join(" ", @ARGV);
|
||||
$compiler_name = shift(@ARGV);
|
||||
$flags = join(" ", @ARGV);
|
||||
|
||||
# First, we need to know the target OS and compiler name
|
||||
|
||||
$data = `$compiler_name -E ctest.c`;
|
||||
$data = `$compiler_name $flags -E ctest.c`;
|
||||
|
||||
if ($?) {
|
||||
printf STDERR "C Compiler ($compiler_name) is something wrong.\n";
|
||||
@@ -31,12 +33,25 @@ if ($?) {
|
||||
|
||||
$cross_suffix = "";
|
||||
|
||||
if (dirname($compiler_name) ne ".") {
|
||||
$cross_suffix .= dirname($compiler_name) . "/";
|
||||
}
|
||||
eval "use File::Basename";
|
||||
if ($@){
|
||||
warn "could not load PERL module File::Basename, emulating its functionality";
|
||||
my $dirnam = substr($compiler_name, 0, rindex($compiler_name, "/")-1 );
|
||||
if ($dirnam ne ".") {
|
||||
$cross_suffix .= $dirnam . "/";
|
||||
}
|
||||
my $basnam = substr($compiler_name, rindex($compiler_name,"/")+1, length($compiler_name)-rindex($compiler_name,"/")-1);
|
||||
if ($basnam =~ /([^\s]*-)(.*)/) {
|
||||
$cross_suffix .= $1;
|
||||
}
|
||||
} else {
|
||||
if (dirname($compiler_name) ne ".") {
|
||||
$cross_suffix .= dirname($compiler_name) . "/";
|
||||
}
|
||||
|
||||
if (basename($compiler_name) =~ /([^\s]*-)(.*)/) {
|
||||
$cross_suffix .= $1;
|
||||
if (basename($compiler_name) =~ /([^\s]*-)(.*)/) {
|
||||
$cross_suffix .= $1;
|
||||
}
|
||||
}
|
||||
|
||||
$compiler = "";
|
||||
@@ -162,7 +177,7 @@ if ($defined == 0) {
|
||||
|
||||
# Do again
|
||||
|
||||
$data = `$compiler_name -E ctest.c`;
|
||||
$data = `$compiler_name $flags -E ctest.c`;
|
||||
|
||||
if ($?) {
|
||||
printf STDERR "C Compiler ($compiler_name) is something wrong.\n";
|
||||
@@ -171,20 +186,26 @@ if ($?) {
|
||||
|
||||
$have_msa = 0;
|
||||
if (($architecture eq "mips") || ($architecture eq "mips64")) {
|
||||
$code = '"addvi.b $w0, $w1, 1"';
|
||||
$msa_flags = "-mmsa -mfp64 -msched-weight -mload-store-pairs";
|
||||
print $tmpf "#include <msa.h>\n\n";
|
||||
print $tmpf "void main(void){ __asm__ volatile($code); }\n";
|
||||
|
||||
$args = "$msa_flags -o $tmpf.o -x c $tmpf";
|
||||
my @cmd = ("$compiler_name $args");
|
||||
system(@cmd) == 0;
|
||||
if ($? != 0) {
|
||||
$have_msa = 0;
|
||||
eval "use File::Temp qw(tempfile)";
|
||||
if ($@){
|
||||
warn "could not load PERL module File::Temp, so could not check MSA capatibility";
|
||||
} else {
|
||||
$have_msa = 1;
|
||||
$tmpf = new File::Temp( SUFFIX => '.c' , UNLINK => 1 );
|
||||
$code = '"addvi.b $w0, $w1, 1"';
|
||||
$msa_flags = "-mmsa -mfp64 -msched-weight -mload-store-pairs";
|
||||
print $tmpf "#include <msa.h>\n\n";
|
||||
print $tmpf "void main(void){ __asm__ volatile($code); }\n";
|
||||
|
||||
$args = "$msa_flags -o $tmpf.o $tmpf";
|
||||
my @cmd = ("$compiler_name $flags $args >/dev/null 2>/dev/null");
|
||||
system(@cmd) == 0;
|
||||
if ($? != 0) {
|
||||
$have_msa = 0;
|
||||
} else {
|
||||
$have_msa = 1;
|
||||
}
|
||||
unlink("$tmpf.o");
|
||||
}
|
||||
unlink("$tmpf.o");
|
||||
}
|
||||
|
||||
$architecture = x86 if ($data =~ /ARCH_X86/);
|
||||
@@ -204,20 +225,53 @@ $binformat = bin64 if ($data =~ /BINARY_64/);
|
||||
|
||||
$no_avx512= 0;
|
||||
if (($architecture eq "x86") || ($architecture eq "x86_64")) {
|
||||
$code = '"vbroadcastss -4 * 4(%rsi), %zmm2"';
|
||||
print $tmpf "#include <immintrin.h>\n\nint main(void){ __asm__ volatile($code); }\n";
|
||||
$args = " -march=skylake-avx512 -o $tmpf.o -x c $tmpf";
|
||||
my @cmd = ("$compiler_name $args >/dev/null 2>/dev/null");
|
||||
system(@cmd) == 0;
|
||||
if ($? != 0) {
|
||||
$no_avx512 = 1;
|
||||
} else {
|
||||
eval "use File::Temp qw(tempfile)";
|
||||
if ($@){
|
||||
warn "could not load PERL module File::Temp, so could not check compiler compatibility with AVX512";
|
||||
$no_avx512 = 0;
|
||||
} else {
|
||||
# $tmpf = new File::Temp( UNLINK => 1 );
|
||||
($fh,$tmpf) = tempfile( SUFFIX => '.c' , UNLINK => 1 );
|
||||
$code = '"vbroadcastss -4 * 4(%rsi), %zmm2"';
|
||||
print $tmpf "#include <immintrin.h>\n\nint main(void){ __asm__ volatile($code); }\n";
|
||||
$args = " -march=skylake-avx512 -c -o $tmpf.o $tmpf";
|
||||
if ($compiler eq "PGI") {
|
||||
$args = " -tp skylake -c -o $tmpf.o $tmpf";
|
||||
}
|
||||
my @cmd = ("$compiler_name $flags $args >/dev/null 2>/dev/null");
|
||||
system(@cmd) == 0;
|
||||
if ($? != 0) {
|
||||
$no_avx512 = 1;
|
||||
} else {
|
||||
$no_avx512 = 0;
|
||||
}
|
||||
unlink("$tmpf.o");
|
||||
}
|
||||
unlink("tmpf.o");
|
||||
}
|
||||
|
||||
$data = `$compiler_name -S ctest1.c && grep globl ctest1.s | head -n 1 && rm -f ctest1.s`;
|
||||
$c11_atomics = 0;
|
||||
if ($data =~ /HAVE_C11/) {
|
||||
eval "use File::Temp qw(tempfile)";
|
||||
if ($@){
|
||||
warn "could not load PERL module File::Temp, so could not check compiler compatibility with C11";
|
||||
$c11_atomics = 0;
|
||||
} else {
|
||||
($fh,$tmpf) = tempfile( SUFFIX => '.c' , UNLINK => 1 );
|
||||
print $tmpf "#include <stdatomic.h>\nint main(void){}\n";
|
||||
$args = " -c -o $tmpf.o $tmpf";
|
||||
my @cmd = ("$compiler_name $flags $args >/dev/null 2>/dev/null");
|
||||
system(@cmd) == 0;
|
||||
if ($? != 0) {
|
||||
$c11_atomics = 0;
|
||||
} else {
|
||||
$c11_atomics = 1;
|
||||
}
|
||||
unlink("$tmpf.o");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
$data = `$compiler_name $flags -S ctest1.c && grep globl ctest1.s | head -n 1 && rm -f ctest1.s`;
|
||||
|
||||
$data =~ /globl\s([_\.]*)(.*)/;
|
||||
|
||||
@@ -240,7 +294,7 @@ $linker_l = "";
|
||||
$linker_a = "";
|
||||
|
||||
{
|
||||
$link = `$compiler_name -c ctest2.c -o ctest2.o 2>&1 && $compiler_name $openmp -v ctest2.o -o ctest2 2>&1 && rm -f ctest2.o ctest2 ctest2.exe`;
|
||||
$link = `$compiler_name $flags -c ctest2.c -o ctest2.o 2>&1 && $compiler_name $flags $openmp -v ctest2.o -o ctest2 2>&1 && rm -f ctest2.o ctest2 ctest2.exe`;
|
||||
|
||||
$link =~ s/\-Y\sP\,/\-Y/g;
|
||||
|
||||
@@ -278,6 +332,8 @@ $linker_a = "";
|
||||
&& ($flags !~ /kernel32/)
|
||||
&& ($flags !~ /advapi32/)
|
||||
&& ($flags !~ /shell32/)
|
||||
&& ($flags !~ /omp/)
|
||||
&& ($flags !~ /[0-9]+/)
|
||||
) {
|
||||
$linker_l .= $flags . " "
|
||||
}
|
||||
@@ -318,6 +374,8 @@ print CONFFILE "#define __32BIT__\t1\n" if $binformat eq bin32;
|
||||
print CONFFILE "#define __64BIT__\t1\n" if $binformat eq bin64;
|
||||
print CONFFILE "#define FUNDERSCORE\t$need_fu\n" if $need_fu ne "";
|
||||
print CONFFILE "#define HAVE_MSA\t1\n" if $have_msa eq 1;
|
||||
print CONFFILE "#define HAVE_C11\t1\n" if $c11_atomics eq 1;
|
||||
|
||||
|
||||
if ($os eq "LINUX") {
|
||||
|
||||
|
||||
31
cblas.h
31
cblas.h
@@ -25,6 +25,11 @@ char* openblas_get_config(void);
|
||||
/*Get the CPU corename on runtime.*/
|
||||
char* openblas_get_corename(void);
|
||||
|
||||
#ifdef OPENBLAS_OS_LINUX
|
||||
/* Sets thread affinity for OpenBLAS threads. `thread_idx` is in [0, openblas_get_num_threads()-1]. */
|
||||
int openblas_setaffinity(int thread_idx, size_t cpusetsize, cpu_set_t* cpu_set);
|
||||
#endif
|
||||
|
||||
/* Get the parallelization type which is used by OpenBLAS */
|
||||
int openblas_get_parallel(void);
|
||||
/* OpenBLAS is compiled for sequential use */
|
||||
@@ -73,6 +78,11 @@ double cblas_dasum (OPENBLAS_CONST blasint n, OPENBLAS_CONST double *x, OPENBLAS
|
||||
float cblas_scasum(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx);
|
||||
double cblas_dzasum(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx);
|
||||
|
||||
float cblas_ssum (OPENBLAS_CONST blasint n, OPENBLAS_CONST float *x, OPENBLAS_CONST blasint incx);
|
||||
double cblas_dsum (OPENBLAS_CONST blasint n, OPENBLAS_CONST double *x, OPENBLAS_CONST blasint incx);
|
||||
float cblas_scsum(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx);
|
||||
double cblas_dzsum(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx);
|
||||
|
||||
float cblas_snrm2 (OPENBLAS_CONST blasint N, OPENBLAS_CONST float *X, OPENBLAS_CONST blasint incX);
|
||||
double cblas_dnrm2 (OPENBLAS_CONST blasint N, OPENBLAS_CONST double *X, OPENBLAS_CONST blasint incX);
|
||||
float cblas_scnrm2(OPENBLAS_CONST blasint N, OPENBLAS_CONST void *X, OPENBLAS_CONST blasint incX);
|
||||
@@ -88,6 +98,16 @@ CBLAS_INDEX cblas_idamin(OPENBLAS_CONST blasint n, OPENBLAS_CONST double *x, OPE
|
||||
CBLAS_INDEX cblas_icamin(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx);
|
||||
CBLAS_INDEX cblas_izamin(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx);
|
||||
|
||||
CBLAS_INDEX cblas_ismax(OPENBLAS_CONST blasint n, OPENBLAS_CONST float *x, OPENBLAS_CONST blasint incx);
|
||||
CBLAS_INDEX cblas_idmax(OPENBLAS_CONST blasint n, OPENBLAS_CONST double *x, OPENBLAS_CONST blasint incx);
|
||||
CBLAS_INDEX cblas_icmax(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx);
|
||||
CBLAS_INDEX cblas_izmax(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx);
|
||||
|
||||
CBLAS_INDEX cblas_ismin(OPENBLAS_CONST blasint n, OPENBLAS_CONST float *x, OPENBLAS_CONST blasint incx);
|
||||
CBLAS_INDEX cblas_idmin(OPENBLAS_CONST blasint n, OPENBLAS_CONST double *x, OPENBLAS_CONST blasint incx);
|
||||
CBLAS_INDEX cblas_icmin(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx);
|
||||
CBLAS_INDEX cblas_izmin(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx);
|
||||
|
||||
void cblas_saxpy(OPENBLAS_CONST blasint n, OPENBLAS_CONST float alpha, OPENBLAS_CONST float *x, OPENBLAS_CONST blasint incx, float *y, OPENBLAS_CONST blasint incy);
|
||||
void cblas_daxpy(OPENBLAS_CONST blasint n, OPENBLAS_CONST double alpha, OPENBLAS_CONST double *x, OPENBLAS_CONST blasint incx, double *y, OPENBLAS_CONST blasint incy);
|
||||
void cblas_caxpy(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx, void *y, OPENBLAS_CONST blasint incy);
|
||||
@@ -362,6 +382,17 @@ void cblas_cgeadd(OPENBLAS_CONST enum CBLAS_ORDER CORDER,OPENBLAS_CONST blasint
|
||||
void cblas_zgeadd(OPENBLAS_CONST enum CBLAS_ORDER CORDER,OPENBLAS_CONST blasint crows, OPENBLAS_CONST blasint ccols, OPENBLAS_CONST double *calpha, double *a, OPENBLAS_CONST blasint clda, OPENBLAS_CONST double *cbeta,
|
||||
double *c, OPENBLAS_CONST blasint cldc);
|
||||
|
||||
void cblas_sgemm_batch(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_TRANSPOSE * TransA_array, OPENBLAS_CONST enum CBLAS_TRANSPOSE * TransB_array, OPENBLAS_CONST blasint * M_array, OPENBLAS_CONST blasint * N_array, OPENBLAS_CONST blasint * K_array,
|
||||
OPENBLAS_CONST float * alpha_array, OPENBLAS_CONST float ** A_array, OPENBLAS_CONST blasint * lda_array, OPENBLAS_CONST float ** B_array, OPENBLAS_CONST blasint * ldb_array, OPENBLAS_CONST float * beta_array, float ** C_array, OPENBLAS_CONST blasint * ldc_array, OPENBLAS_CONST blasint group_count, OPENBLAS_CONST blasint * group_size);
|
||||
|
||||
void cblas_dgemm_batch(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_TRANSPOSE * TransA_array, OPENBLAS_CONST enum CBLAS_TRANSPOSE * TransB_array, OPENBLAS_CONST blasint * M_array, OPENBLAS_CONST blasint * N_array, OPENBLAS_CONST blasint * K_array,
|
||||
OPENBLAS_CONST double * alpha_array, OPENBLAS_CONST double ** A_array, OPENBLAS_CONST blasint * lda_array, OPENBLAS_CONST double ** B_array, OPENBLAS_CONST blasint * ldb_array, OPENBLAS_CONST double * beta_array, double ** C_array, OPENBLAS_CONST blasint * ldc_array, OPENBLAS_CONST blasint group_count, OPENBLAS_CONST blasint * group_size);
|
||||
|
||||
void cblas_cgemm_batch(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_TRANSPOSE * TransA_array, OPENBLAS_CONST enum CBLAS_TRANSPOSE * TransB_array, OPENBLAS_CONST blasint * M_array, OPENBLAS_CONST blasint * N_array, OPENBLAS_CONST blasint * K_array,
|
||||
OPENBLAS_CONST void * alpha_array, OPENBLAS_CONST void ** A_array, OPENBLAS_CONST blasint * lda_array, OPENBLAS_CONST void ** B_array, OPENBLAS_CONST blasint * ldb_array, OPENBLAS_CONST void * beta_array, void ** C_array, OPENBLAS_CONST blasint * ldc_array, OPENBLAS_CONST blasint group_count, OPENBLAS_CONST blasint * group_size);
|
||||
|
||||
void cblas_zgemm_batch(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_TRANSPOSE * TransA_array, OPENBLAS_CONST enum CBLAS_TRANSPOSE * TransB_array, OPENBLAS_CONST blasint * M_array, OPENBLAS_CONST blasint * N_array, OPENBLAS_CONST blasint * K_array,
|
||||
OPENBLAS_CONST void * alpha_array, OPENBLAS_CONST void ** A_array, OPENBLAS_CONST blasint * lda_array, OPENBLAS_CONST void ** B_array, OPENBLAS_CONST blasint * ldb_array, OPENBLAS_CONST void * beta_array, void ** C_array, OPENBLAS_CONST blasint * ldc_array, OPENBLAS_CONST blasint group_count, OPENBLAS_CONST blasint * group_size);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
||||
@@ -45,7 +45,11 @@ endif ()
|
||||
|
||||
if (DYNAMIC_ARCH)
|
||||
if (ARM64)
|
||||
set(DYNAMIC_CORE ARMV8 CORTEXA53 CORTEXA57 CORTEXA72 CORTEXA73 FALKOR THUNDERX THUNDERX2T99)
|
||||
set(DYNAMIC_CORE ARMV8 CORTEXA53 CORTEXA57 CORTEXA72 CORTEXA73 FALKOR THUNDERX THUNDERX2T99 TSV110 EMAG8180 NEOVERSEN1 THUNDERX3T110)
|
||||
endif ()
|
||||
|
||||
if (POWER)
|
||||
set(DYNAMIC_CORE POWER6 POWER8 POWER9 POWER10)
|
||||
endif ()
|
||||
|
||||
if (X86)
|
||||
@@ -72,12 +76,17 @@ if (DYNAMIC_ARCH)
|
||||
set(DYNAMIC_CORE ${DYNAMIC_CORE} HASWELL ZEN)
|
||||
endif ()
|
||||
if (NOT NO_AVX512)
|
||||
set(DYNAMIC_CORE ${DYNAMIC_CORE} SKYLAKEX)
|
||||
set(DYNAMIC_CORE ${DYNAMIC_CORE} SKYLAKEX COOPERLAKE)
|
||||
string(REGEX REPLACE "-march=native" "" CMAKE_C_FLAGS "${CMAKE_C_FLAGS}")
|
||||
endif ()
|
||||
if (DYNAMIC_LIST)
|
||||
set(DYNAMIC_CORE PRESCOTT ${DYNAMIC_LIST})
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
if (NOT DYNAMIC_CORE)
|
||||
unset(DYNAMIC_ARCH)
|
||||
message (STATUS "DYNAMIC_ARCH is not supported on this architecture, removing from options")
|
||||
unset(DYNAMIC_ARCH CACHE)
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
## Description: Ported from portion of OpenBLAS/Makefile.system
|
||||
## Sets C related variables.
|
||||
|
||||
if (${CMAKE_C_COMPILER} STREQUAL "GNU" OR ${CMAKE_C_COMPILER} STREQUAL "LSB" OR ${CMAKE_C_COMPILER} STREQUAL "Clang")
|
||||
if (${CMAKE_C_COMPILER_ID} STREQUAL "GNU" OR ${CMAKE_C_COMPILER_ID} STREQUAL "LSB" OR ${CMAKE_C_COMPILER_ID} MATCHES "Clang")
|
||||
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -Wall")
|
||||
set(COMMON_PROF "${COMMON_PROF} -fno-inline")
|
||||
@@ -43,7 +43,7 @@ if (${CMAKE_C_COMPILER} STREQUAL "GNU" OR ${CMAKE_C_COMPILER} STREQUAL "LSB" OR
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
if (${CMAKE_C_COMPILER} STREQUAL "PGI")
|
||||
if (${CMAKE_C_COMPILER_ID} STREQUAL "PGI")
|
||||
if (BINARY64)
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -tp p7-64")
|
||||
else ()
|
||||
@@ -51,7 +51,7 @@ if (${CMAKE_C_COMPILER} STREQUAL "PGI")
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
if (${CMAKE_C_COMPILER} STREQUAL "PATHSCALE")
|
||||
if (${CMAKE_C_COMPILER_ID} STREQUAL "PATHSCALE")
|
||||
if (BINARY64)
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -m64")
|
||||
else ()
|
||||
@@ -59,7 +59,7 @@ if (${CMAKE_C_COMPILER} STREQUAL "PATHSCALE")
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
if (${CMAKE_C_COMPILER} STREQUAL "OPEN64")
|
||||
if (${CMAKE_C_COMPILER_ID} STREQUAL "OPEN64")
|
||||
|
||||
if (MIPS64)
|
||||
|
||||
@@ -87,7 +87,7 @@ if (${CMAKE_C_COMPILER} STREQUAL "OPEN64")
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
if (${CMAKE_C_COMPILER} STREQUAL "SUN")
|
||||
if (${CMAKE_C_COMPILER_ID} STREQUAL "SUN")
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -w")
|
||||
if (X86)
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -m32")
|
||||
@@ -96,3 +96,23 @@ if (${CMAKE_C_COMPILER} STREQUAL "SUN")
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
if (${CORE} STREQUAL "SKYLAKEX")
|
||||
if (NOT DYNAMIC_ARCH)
|
||||
if (NOT NO_AVX512)
|
||||
set (CCOMMON_OPT "${CCOMMON_OPT} -march=skylake-avx512")
|
||||
endif ()
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
if (${CORE} STREQUAL "COOPERLAKE")
|
||||
if (NOT DYNAMIC_ARCH)
|
||||
if (NOT NO_AVX512)
|
||||
execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION)
|
||||
if (${GCC_VERSION} VERSION_GREATER 10.1 OR ${GCC_VERSION} VERSION_EQUAL 10.1)
|
||||
set (CCOMMON_OPT = "${CCOMMON_OPT} -march=cooperlake")
|
||||
else ()
|
||||
set (CCOMMON_OPT "${CCOMMON_OPT} -march=skylake-avx512")
|
||||
endif()
|
||||
endif ()
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
@@ -21,7 +21,15 @@
|
||||
# NEED2UNDERSCORES
|
||||
|
||||
if (NOT NO_LAPACK)
|
||||
enable_language(Fortran)
|
||||
include(CheckLanguage)
|
||||
check_language(Fortran)
|
||||
if(CMAKE_Fortran_COMPILER)
|
||||
enable_language(Fortran)
|
||||
else()
|
||||
message(STATUS "No Fortran compiler found, can build only BLAS but not LAPACK")
|
||||
set (NOFORTRAN 1)
|
||||
set (NO_LAPACK 1)
|
||||
endif()
|
||||
else()
|
||||
include(CMakeForceCompiler)
|
||||
CMAKE_FORCE_Fortran_COMPILER(gfortran GNU)
|
||||
|
||||
@@ -16,6 +16,7 @@ if (${F_COMPILER} STREQUAL "FLANG")
|
||||
if (USE_OPENMP)
|
||||
set(FCOMMON_OPT "${FCOMMON_OPT} -fopenmp")
|
||||
endif ()
|
||||
set(FCOMMON_OPT "${FCOMMON_OPT} -Mrecursive -Kieee")
|
||||
endif ()
|
||||
|
||||
if (${F_COMPILER} STREQUAL "G77")
|
||||
@@ -44,7 +45,10 @@ endif ()
|
||||
|
||||
if (${F_COMPILER} STREQUAL "GFORTRAN")
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_GFORT")
|
||||
# ensure reentrancy of lapack codes
|
||||
set(FCOMMON_OPT "${FCOMMON_OPT} -Wall -frecursive")
|
||||
# work around ABI violation in passing string arguments from C
|
||||
set(FCOMMON_OPT "${FCOMMON_OPT} -fno-optimize-sibling-calls")
|
||||
#Don't include -lgfortran, when NO_LAPACK=1 or lsbcc
|
||||
if (NOT NO_LAPACK)
|
||||
set(EXTRALIB "{EXTRALIB} -lgfortran")
|
||||
@@ -78,6 +82,7 @@ if (${F_COMPILER} STREQUAL "INTEL")
|
||||
if (INTERFACE64)
|
||||
set(FCOMMON_OPT "${FCOMMON_OPT} -i8")
|
||||
endif ()
|
||||
set(FCOMMON_OPT "${FCOMMON_OPT} -recursive")
|
||||
if (USE_OPENMP)
|
||||
set(FCOMMON_OPT "${FCOMMON_OPT} -openmp")
|
||||
endif ()
|
||||
@@ -117,6 +122,7 @@ if (${F_COMPILER} STREQUAL "PGI")
|
||||
else ()
|
||||
set(FCOMMON_OPT "${FCOMMON_OPT} -tp p7")
|
||||
endif ()
|
||||
set(FCOMMON_OPT "${FCOMMON_OPT} -Mrecursive")
|
||||
if (USE_OPENMP)
|
||||
set(FCOMMON_OPT "${FCOMMON_OPT} -mp")
|
||||
endif ()
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
# helper functions for the kernel CMakeLists.txt
|
||||
|
||||
|
||||
# Set the default filenames for L1 objects. Most of these will be overriden by the appropriate KERNEL file.
|
||||
# Set the default filenames for L1 objects. Most of these will be overridden by the appropriate KERNEL file.
|
||||
macro(SetDefaultL1)
|
||||
set(SAMAXKERNEL amax.S)
|
||||
set(DAMAXKERNEL amax.S)
|
||||
@@ -107,11 +107,37 @@ macro(SetDefaultL1)
|
||||
set(DAXPBYKERNEL ../arm/axpby.c)
|
||||
set(CAXPBYKERNEL ../arm/zaxpby.c)
|
||||
set(ZAXPBYKERNEL ../arm/zaxpby.c)
|
||||
set(SSUMKERNEL sum.S)
|
||||
set(DSUMKERNEL sum.S)
|
||||
set(CSUMKERNEL zsum.S)
|
||||
set(ZSUMKERNEL zsum.S)
|
||||
set(QSUMKERNEL sum.S)
|
||||
set(XSUMKERNEL zsum.S)
|
||||
if (BUILD_HALF)
|
||||
set(SHAMINKERNEL ../arm/amin.c)
|
||||
set(SHAMAXKERNEL ../arm/amax.c)
|
||||
set(SHMAXKERNEL ../arm/max.c)
|
||||
set(SHMINKERNEL ../arm/min.c)
|
||||
set(ISHAMAXKERNEL ../arm/iamax.c)
|
||||
set(ISHAMINKERNEL ../arm/iamin.c)
|
||||
set(ISHMAXKERNEL ../arm/imax.c)
|
||||
set(ISHMINKERNEL ../arm/imin.c)
|
||||
set(SHASUMKERNEL ../arm/asum.c)
|
||||
set(SHAXPYKERNEL ../arm/axpy.c)
|
||||
set(SHAXPBYKERNEL ../arm/axpby.c)
|
||||
set(SHCOPYKERNEL ../arm/copy.c)
|
||||
set(SHDOTKERNEL ../arm/dot.c)
|
||||
set(SHROTKERNEL ../arm/rot.c)
|
||||
set(SHSCALKERNEL ../arm/scal.c)
|
||||
set(SHNRM2KERNEL ../arm/nrm2.c)
|
||||
set(SHSUMKERNEL ../arm/sum.c)
|
||||
set(SHSWAPKERNEL ../arm/swap.c)
|
||||
endif ()
|
||||
endmacro ()
|
||||
|
||||
macro(SetDefaultL2)
|
||||
set(SGEMVNKERNEL gemv_n.S)
|
||||
set(SGEMVTKERNEL gemv_t.S)
|
||||
set(SGEMVNKERNEL ../arm/gemv_n.c)
|
||||
set(SGEMVTKERNEL ../arm/gemv_t.c)
|
||||
set(DGEMVNKERNEL gemv_n.S)
|
||||
set(DGEMVTKERNEL gemv_t.S)
|
||||
set(CGEMVNKERNEL zgemv_n.S)
|
||||
@@ -155,6 +181,11 @@ macro(SetDefaultL2)
|
||||
set(XHEMV_L_KERNEL ../generic/zhemv_k.c)
|
||||
set(XHEMV_V_KERNEL ../generic/zhemv_k.c)
|
||||
set(XHEMV_M_KERNEL ../generic/zhemv_k.c)
|
||||
if (BUILD_HALF)
|
||||
set(SHGEMVNKERNEL ../arm/gemv_n.c)
|
||||
set(SHGEMVTKERNEL ../arm/gemv_t.c)
|
||||
set(SHGERKERNEL ../generic/ger.c)
|
||||
endif ()
|
||||
endmacro ()
|
||||
|
||||
macro(SetDefaultL3)
|
||||
@@ -162,4 +193,18 @@ macro(SetDefaultL3)
|
||||
set(DGEADD_KERNEL ../generic/geadd.c)
|
||||
set(CGEADD_KERNEL ../generic/zgeadd.c)
|
||||
set(ZGEADD_KERNEL ../generic/zgeadd.c)
|
||||
endmacro ()
|
||||
if (BUILD_HALF)
|
||||
set(SHGEADD_KERNEL ../generic/geadd.c)
|
||||
set(SHGEMMKERNEL ../generic/gemmkernel_2x2.c)
|
||||
set(SHGEMM_BETA ../generic/gemm_beta.c)
|
||||
set(SHGEMMINCOPY ../generic/gemm_ncopy_2.c)
|
||||
set(SHGEMMITCOPY ../generic/gemm_tcopy_2.c)
|
||||
set(SHGEMMONCOPY ../generic/gemm_ncopy_2.c)
|
||||
set(SHGEMMOTCOPY ../generic/gemm_tcopy_2.c)
|
||||
set(SHGEMMINCOPYOBJ shgemm_incopy.o)
|
||||
set(SHGEMMITCOPYOBJ shgemm_itcopy.o)
|
||||
set(SHGEMMONCOPYOBJ shgemm_oncopy.o)
|
||||
set(SHGEMMOTCOPYOBJ shgemm_otcopy.o)
|
||||
endif ()
|
||||
|
||||
endmacro ()
|
||||
|
||||
@@ -115,7 +115,9 @@ set(SLASRC
|
||||
stplqt.f stplqt2.f stpmlqt.f
|
||||
ssytrd_2stage.f ssytrd_sy2sb.f ssytrd_sb2st.F ssb2st_kernels.f
|
||||
ssyevd_2stage.f ssyev_2stage.f ssyevx_2stage.f ssyevr_2stage.f
|
||||
ssbev_2stage.f ssbevx_2stage.f ssbevd_2stage.f ssygv_2stage.f)
|
||||
ssbev_2stage.f ssbevx_2stage.f ssbevd_2stage.f ssygv_2stage.f
|
||||
scombssq.f sgesvdq.f slaorhr_col_getrfnp.f
|
||||
slaorhr_col_getrfnp2.f sorgtsqr.f sorhr_col.f )
|
||||
|
||||
set(SXLASRC sgesvxx.f sgerfsx.f sla_gerfsx_extended.f sla_geamv.f
|
||||
sla_gercond.f sla_gerpvgrw.f ssysvxx.f ssyrfsx.f
|
||||
@@ -210,7 +212,9 @@ set(CLASRC
|
||||
ctplqt.f ctplqt2.f ctpmlqt.f
|
||||
chetrd_2stage.f chetrd_he2hb.f chetrd_hb2st.F chb2st_kernels.f
|
||||
cheevd_2stage.f cheev_2stage.f cheevx_2stage.f cheevr_2stage.f
|
||||
chbev_2stage.f chbevx_2stage.f chbevd_2stage.f chegv_2stage.f)
|
||||
chbev_2stage.f chbevx_2stage.f chbevd_2stage.f chegv_2stage.f
|
||||
cgesvdq.f claunhr_col_getrfnp.f claunhr_col_getrfnp2.f
|
||||
cungtsqr.f cunhr_col.f )
|
||||
|
||||
set(CXLASRC cgesvxx.f cgerfsx.f cla_gerfsx_extended.f cla_geamv.f
|
||||
cla_gercond_c.f cla_gercond_x.f cla_gerpvgrw.f
|
||||
@@ -299,7 +303,9 @@ set(DLASRC
|
||||
dtplqt.f dtplqt2.f dtpmlqt.f
|
||||
dsytrd_2stage.f dsytrd_sy2sb.f dsytrd_sb2st.F dsb2st_kernels.f
|
||||
dsyevd_2stage.f dsyev_2stage.f dsyevx_2stage.f dsyevr_2stage.f
|
||||
dsbev_2stage.f dsbevx_2stage.f dsbevd_2stage.f dsygv_2stage.f)
|
||||
dsbev_2stage.f dsbevx_2stage.f dsbevd_2stage.f dsygv_2stage.f
|
||||
dcombssq.f dgesvdq.f dlaorhr_col_getrfnp.f
|
||||
dlaorhr_col_getrfnp2.f dorgtsqr.f dorhr_col.f )
|
||||
|
||||
set(DXLASRC dgesvxx.f dgerfsx.f dla_gerfsx_extended.f dla_geamv.f
|
||||
dla_gercond.f dla_gerpvgrw.f dsysvxx.f dsyrfsx.f
|
||||
@@ -398,7 +404,9 @@ set(ZLASRC
|
||||
zgelq.f zlaswlq.f zlamswlq.f zgemlq.f
|
||||
zhetrd_2stage.f zhetrd_he2hb.f zhetrd_hb2st.F zhb2st_kernels.f
|
||||
zheevd_2stage.f zheev_2stage.f zheevx_2stage.f zheevr_2stage.f
|
||||
zhbev_2stage.f zhbevx_2stage.f zhbevd_2stage.f zhegv_2stage.f)
|
||||
zhbev_2stage.f zhbevx_2stage.f zhbevd_2stage.f zhegv_2stage.f
|
||||
zgesvdq.f zlaunhr_col_getrfnp.f zlaunhr_col_getrfnp2.f
|
||||
zungtsqr.f zunhr_col.f)
|
||||
|
||||
set(ZXLASRC zgesvxx.f zgerfsx.f zla_gerfsx_extended.f zla_geamv.f
|
||||
zla_gercond_c.f zla_gercond_x.f zla_gerpvgrw.f zsysvxx.f zsyrfsx.f
|
||||
|
||||
@@ -715,6 +715,8 @@ set(DSRC
|
||||
lapacke_dgesv_work.c
|
||||
lapacke_dgesvd.c
|
||||
lapacke_dgesvd_work.c
|
||||
lapacke_dgesvdq.c
|
||||
lapacke_dgesvdq_work.c
|
||||
lapacke_dgesvdx.c
|
||||
lapacke_dgesvdx_work.c
|
||||
lapacke_dgesvj.c
|
||||
@@ -1287,6 +1289,8 @@ set(SSRC
|
||||
lapacke_sgesv_work.c
|
||||
lapacke_sgesvd.c
|
||||
lapacke_sgesvd_work.c
|
||||
lapacke_sgesvdq.c
|
||||
lapacke_sgesvdq_work.c
|
||||
lapacke_sgesvdx.c
|
||||
lapacke_sgesvdx_work.c
|
||||
lapacke_sgesvj.c
|
||||
@@ -1853,6 +1857,8 @@ set(ZSRC
|
||||
lapacke_zgesv_work.c
|
||||
lapacke_zgesvd.c
|
||||
lapacke_zgesvd_work.c
|
||||
lapacke_zgesvdq.c
|
||||
lapacke_zgesvdq_work.c
|
||||
lapacke_zgesvdx.c
|
||||
lapacke_zgesvdx_work.c
|
||||
lapacke_zgesvj.c
|
||||
|
||||
@@ -7,5 +7,5 @@ Name: OpenBLAS
|
||||
Description: OpenBLAS is an optimized BLAS library based on GotoBLAS2 1.13 BSD version
|
||||
Version: @OPENBLAS_VERSION@
|
||||
URL: https://github.com/xianyi/OpenBLAS
|
||||
Libs: -L${libdir} -lopenblas${libsuffix}
|
||||
Libs: @OpenMP_C_FLAGS@ -L${libdir} -lopenblas${libsuffix}
|
||||
Cflags: -I${includedir}
|
||||
|
||||
@@ -8,6 +8,11 @@ if (${CMAKE_SYSTEM_NAME} STREQUAL "Linux")
|
||||
set(NO_EXPRECISION 1)
|
||||
endif ()
|
||||
|
||||
if (${CMAKE_SYSTEM_NAME} MATCHES "FreeBSD|OpenBSD|NetBSD|DragonFly|Darwin")
|
||||
set(EXTRALIB "${EXTRALIB} -lm")
|
||||
set(NO_EXPRECISION 1)
|
||||
endif ()
|
||||
|
||||
if (${CMAKE_SYSTEM_NAME} STREQUAL "AIX")
|
||||
set(EXTRALIB "${EXTRALIB} -lm")
|
||||
endif ()
|
||||
|
||||
@@ -16,6 +16,8 @@
|
||||
# HAVE_SSE2
|
||||
# HAVE_SSE3
|
||||
# MAKE
|
||||
# SHGEMM_UNROLL_M
|
||||
# SHGEMM_UNROLL_N
|
||||
# SGEMM_UNROLL_M
|
||||
# SGEMM_UNROLL_N
|
||||
# DGEMM_UNROLL_M
|
||||
@@ -59,6 +61,9 @@ set(FU "")
|
||||
if (APPLE OR (MSVC AND NOT ${CMAKE_C_COMPILER_ID} MATCHES "Clang"))
|
||||
set(FU "_")
|
||||
endif()
|
||||
if(MINGW AND NOT MINGW64)
|
||||
set(FU "_")
|
||||
endif()
|
||||
|
||||
set(COMPILER_ID ${CMAKE_C_COMPILER_ID})
|
||||
if (${COMPILER_ID} STREQUAL "GNU")
|
||||
@@ -82,18 +87,59 @@ endif ()
|
||||
# f_check
|
||||
if (NOT NOFORTRAN)
|
||||
include("${PROJECT_SOURCE_DIR}/cmake/f_check.cmake")
|
||||
else ()
|
||||
file(APPEND ${TARGET_CONF_TEMP}
|
||||
"#define BUNDERSCORE _\n"
|
||||
"#define NEEDBUNDERSCORE 1\n")
|
||||
set(BU "_")
|
||||
endif ()
|
||||
|
||||
# Cannot run getarch on target if we are cross-compiling
|
||||
if (DEFINED CORE AND CMAKE_CROSSCOMPILING AND NOT (${HOST_OS} STREQUAL "WINDOWSSTORE"))
|
||||
# Write to config as getarch would
|
||||
if (DEFINED TARGET_CORE)
|
||||
set(TCORE ${TARGET_CORE})
|
||||
else()
|
||||
set(TCORE ${CORE})
|
||||
endif()
|
||||
|
||||
# TODO: Set up defines that getarch sets up based on every other target
|
||||
# Perhaps this should be inside a different file as it grows larger
|
||||
file(APPEND ${TARGET_CONF_TEMP}
|
||||
"#define ${CORE}\n"
|
||||
"#define CHAR_CORENAME \"${CORE}\"\n")
|
||||
if ("${CORE}" STREQUAL "ARMV7")
|
||||
"#define ${TCORE}\n"
|
||||
"#define CORE_${TCORE}\n"
|
||||
"#define CHAR_CORENAME \"${TCORE}\"\n")
|
||||
if ("${TCORE}" STREQUAL "CORE2")
|
||||
file(APPEND ${TARGET_CONF_TEMP}
|
||||
"#define L1_DATA_SIZE\t32768\n"
|
||||
"#define L1_DATA_LINESIZE\t64\n"
|
||||
"#define L2_SIZE\t1048576\n"
|
||||
"#define L2_LINESIZE\t64\n"
|
||||
"#define DTB_DEFAULT_ENTRIES\t256\n"
|
||||
"#define DTB_SIZE\t4096\n"
|
||||
"#define HAVE_CMOV\n"
|
||||
"#define HAVE_MMX\n"
|
||||
"#define HAVE_SSE\n"
|
||||
"#define HAVE_SSE2\n"
|
||||
"#define HAVE_SSE3\n"
|
||||
"#define HAVE_SSSE3\n"
|
||||
"#define SLOCAL_BUFFER_SIZE\t16384\n"
|
||||
"#define DLOCAL_BUFFER_SIZE\t16384\n"
|
||||
"#define CLOCAL_BUFFER_SIZE\t16384\n"
|
||||
"#define ZLOCAL_BUFFER_SIZE\t16384\n")
|
||||
set(SGEMM_UNROLL_M 8)
|
||||
set(SGEMM_UNROLL_N 4)
|
||||
set(DGEMM_UNROLL_M 4)
|
||||
set(DGEMM_UNROLL_N 4)
|
||||
set(CGEMM_UNROLL_M 4)
|
||||
set(CGEMM_UNROLL_N 2)
|
||||
set(ZGEMM_UNROLL_M 2)
|
||||
set(ZGEMM_UNROLL_N 2)
|
||||
set(CGEMM3M_UNROLL_M 8)
|
||||
set(CGEMM3M_UNROLL_N 4)
|
||||
set(ZGEMM3M_UNROLL_M 4)
|
||||
set(ZGEMM3M_UNROLL_N 4)
|
||||
elseif ("${TCORE}" STREQUAL "ARMV7")
|
||||
file(APPEND ${TARGET_CONF_TEMP}
|
||||
"#define L1_DATA_SIZE\t65536\n"
|
||||
"#define L1_DATA_LINESIZE\t32\n"
|
||||
@@ -108,7 +154,11 @@ if (DEFINED CORE AND CMAKE_CROSSCOMPILING AND NOT (${HOST_OS} STREQUAL "WINDOWSS
|
||||
set(SGEMM_UNROLL_N 4)
|
||||
set(DGEMM_UNROLL_M 4)
|
||||
set(DGEMM_UNROLL_N 4)
|
||||
elseif ("${CORE}" STREQUAL "ARMV8")
|
||||
set(CGEMM_UNROLL_M 2)
|
||||
set(CGEMM_UNROLL_N 2)
|
||||
set(ZGEMM_UNROLL_M 2)
|
||||
set(ZGEMM_UNROLL_N 2)
|
||||
elseif ("${TCORE}" STREQUAL "ARMV8")
|
||||
file(APPEND ${TARGET_CONF_TEMP}
|
||||
"#define L1_DATA_SIZE\t32768\n"
|
||||
"#define L1_DATA_LINESIZE\t64\n"
|
||||
@@ -118,9 +168,16 @@ if (DEFINED CORE AND CMAKE_CROSSCOMPILING AND NOT (${HOST_OS} STREQUAL "WINDOWSS
|
||||
"#define DTB_SIZE\t4096\n"
|
||||
"#define L2_ASSOCIATIVE\t32\n"
|
||||
"#define ARMV8\n")
|
||||
set(SGEMM_UNROLL_M 4)
|
||||
set(SGEMM_UNROLL_M 16)
|
||||
set(SGEMM_UNROLL_N 4)
|
||||
elseif ("${CORE}" STREQUAL "CORTEXA57" OR "${CORE}" STREQUAL "CORTEXA53")
|
||||
set(DGEMM_UNROLL_M 8)
|
||||
set(DGEMM_UNROLL_N 4)
|
||||
set(CGEMM_UNROLL_M 8)
|
||||
set(CGEMM_UNROLL_N 4)
|
||||
set(ZGEMM_UNROLL_M 4)
|
||||
set(ZGEMM_UNROLL_N 4)
|
||||
set(SYMV_P 16)
|
||||
elseif ("${TCORE}" STREQUAL "CORTEXA57" OR "${TCORE}" STREQUAL "CORTEXA53")
|
||||
file(APPEND ${TARGET_CONF_TEMP}
|
||||
"#define L1_CODE_SIZE\t32768\n"
|
||||
"#define L1_CODE_LINESIZE\t64\n"
|
||||
@@ -138,15 +195,21 @@ if (DEFINED CORE AND CMAKE_CROSSCOMPILING AND NOT (${HOST_OS} STREQUAL "WINDOWSS
|
||||
"#define HAVE_VFP\n"
|
||||
"#define HAVE_NEON\n"
|
||||
"#define ARMV8\n")
|
||||
if ("${TCORE}" STREQUAL "CORTEXA57")
|
||||
set(SGEMM_UNROLL_M 16)
|
||||
set(SGEMM_UNROLL_N 4)
|
||||
else ()
|
||||
set(SGEMM_UNROLL_M 8)
|
||||
set(SGEMM_UNROLL_N 8)
|
||||
endif ()
|
||||
set(DGEMM_UNROLL_M 8)
|
||||
set(DGEMM_UNROLL_N 4)
|
||||
set(CGEMM_UNROLL_M 8)
|
||||
set(CGEMM_UNROLL_N 4)
|
||||
set(ZGEMM_UNROLL_M 8)
|
||||
set(ZGEMM_UNROLL_M 4)
|
||||
set(ZGEMM_UNROLL_N 4)
|
||||
elseif ("${CORE}" STREQUAL "CORTEXA72" OR "${CORE}" STREQUAL "CORTEXA73")
|
||||
set(SYMV_P 16)
|
||||
elseif ("${TCORE}" STREQUAL "CORTEXA72" OR "${TCORE}" STREQUAL "CORTEXA73")
|
||||
file(APPEND ${TARGET_CONF_TEMP}
|
||||
"#define L1_CODE_SIZE\t49152\n"
|
||||
"#define L1_CODE_LINESIZE\t64\n"
|
||||
@@ -170,9 +233,37 @@ if (DEFINED CORE AND CMAKE_CROSSCOMPILING AND NOT (${HOST_OS} STREQUAL "WINDOWSS
|
||||
set(DGEMM_UNROLL_N 4)
|
||||
set(CGEMM_UNROLL_M 8)
|
||||
set(CGEMM_UNROLL_N 4)
|
||||
set(ZGEMM_UNROLL_M 8)
|
||||
set(ZGEMM_UNROLL_M 4)
|
||||
set(ZGEMM_UNROLL_N 4)
|
||||
elseif ("${CORE}" STREQUAL "FALKOR")
|
||||
set(SYMV_P 16)
|
||||
elseif ("${TCORE}" STREQUAL "NEOVERSEN1")
|
||||
file(APPEND ${TARGET_CONF_TEMP}
|
||||
"#define L1_CODE_SIZE\t65536\n"
|
||||
"#define L1_CODE_LINESIZE\t64\n"
|
||||
"#define L1_CODE_ASSOCIATIVE\t4\n"
|
||||
"#define L1_DATA_SIZE\t65536\n"
|
||||
"#define L1_DATA_LINESIZE\t64\n"
|
||||
"#define L1_DATA_ASSOCIATIVE\t2\n"
|
||||
"#define L2_SIZE\t1048576\n\n"
|
||||
"#define L2_LINESIZE\t64\n"
|
||||
"#define L2_ASSOCIATIVE\t16\n"
|
||||
"#define DTB_DEFAULT_ENTRIES\t64\n"
|
||||
"#define DTB_SIZE\t4096\n"
|
||||
"#define HAVE_VFPV4\n"
|
||||
"#define HAVE_VFPV3\n"
|
||||
"#define HAVE_VFP\n"
|
||||
"#define HAVE_NEON\n"
|
||||
"#define ARMV8\n")
|
||||
set(SGEMM_UNROLL_M 16)
|
||||
set(SGEMM_UNROLL_N 4)
|
||||
set(DGEMM_UNROLL_M 8)
|
||||
set(DGEMM_UNROLL_N 4)
|
||||
set(CGEMM_UNROLL_M 8)
|
||||
set(CGEMM_UNROLL_N 4)
|
||||
set(ZGEMM_UNROLL_M 4)
|
||||
set(ZGEMM_UNROLL_N 4)
|
||||
set(SYMV_P 16)
|
||||
elseif ("${TCORE}" STREQUAL "FALKOR")
|
||||
file(APPEND ${TARGET_CONF_TEMP}
|
||||
"#define L1_CODE_SIZE\t65536\n"
|
||||
"#define L1_CODE_LINESIZE\t64\n"
|
||||
@@ -196,9 +287,10 @@ if (DEFINED CORE AND CMAKE_CROSSCOMPILING AND NOT (${HOST_OS} STREQUAL "WINDOWSS
|
||||
set(DGEMM_UNROLL_N 4)
|
||||
set(CGEMM_UNROLL_M 8)
|
||||
set(CGEMM_UNROLL_N 4)
|
||||
set(ZGEMM_UNROLL_M 8)
|
||||
set(ZGEMM_UNROLL_M 4)
|
||||
set(ZGEMM_UNROLL_N 4)
|
||||
elseif ("${CORE}" STREQUAL "THUNDERX)
|
||||
set(SYMV_P 16)
|
||||
elseif ("${TCORE}" STREQUAL "THUNDERX")
|
||||
file(APPEND ${TARGET_CONF_TEMP}
|
||||
"#define L1_CODE_SIZE\t32768\n"
|
||||
"#define L1_CODE_LINESIZE\t64\n"
|
||||
@@ -224,7 +316,8 @@ if (DEFINED CORE AND CMAKE_CROSSCOMPILING AND NOT (${HOST_OS} STREQUAL "WINDOWSS
|
||||
set(CGEMM_UNROLL_N 2)
|
||||
set(ZGEMM_UNROLL_M 2)
|
||||
set(ZGEMM_UNROLL_N 2)
|
||||
elseif ("${CORE}" STREQUAL "THUNDERX2T99)
|
||||
set(SYMV_P 16)
|
||||
elseif ("${TCORE}" STREQUAL "THUNDERX2T99")
|
||||
file(APPEND ${TARGET_CONF_TEMP}
|
||||
"#define L1_CODE_SIZE\t32768\n"
|
||||
"#define L1_CODE_LINESIZE\t64\n"
|
||||
@@ -240,7 +333,7 @@ if (DEFINED CORE AND CMAKE_CROSSCOMPILING AND NOT (${HOST_OS} STREQUAL "WINDOWSS
|
||||
"#define L3_ASSOCIATIVE\t32\n"
|
||||
"#define DTB_DEFAULT_ENTRIES\t64\n"
|
||||
"#define DTB_SIZE\t4096\n"
|
||||
"#define VULCAN\n")
|
||||
"#define ARMV8\n")
|
||||
set(SGEMM_UNROLL_M 16)
|
||||
set(SGEMM_UNROLL_N 4)
|
||||
set(DGEMM_UNROLL_M 8)
|
||||
@@ -249,7 +342,137 @@ if (DEFINED CORE AND CMAKE_CROSSCOMPILING AND NOT (${HOST_OS} STREQUAL "WINDOWSS
|
||||
set(CGEMM_UNROLL_N 4)
|
||||
set(ZGEMM_UNROLL_M 4)
|
||||
set(ZGEMM_UNROLL_N 4)
|
||||
set(SYMV_P 16)
|
||||
elseif ("${TCORE}" STREQUAL "THUNDERX3T110")
|
||||
file(APPEND ${TARGET_CONF_TEMP}
|
||||
"#define THUNDERX3T110\n"
|
||||
"#define L1_CODE_SIZE\t65536\n"
|
||||
"#define L1_CODE_LINESIZE\t64\n"
|
||||
"#define L1_CODE_ASSOCIATIVE\t8\n"
|
||||
"#define L1_DATA_SIZE\t65536\n"
|
||||
"#define L1_DATA_LINESIZE\t64\n"
|
||||
"#define L1_DATA_ASSOCIATIVE\t8\n"
|
||||
"#define L2_SIZE\t524288\n"
|
||||
"#define L2_LINESIZE\t64\n"
|
||||
"#define L2_ASSOCIATIVE\t8\n"
|
||||
"#define L3_SIZE\t94371840\n"
|
||||
"#define L3_LINESIZE\t64\n"
|
||||
"#define L3_ASSOCIATIVE\t32\n"
|
||||
"#define DTB_DEFAULT_ENTRIES\t64\n"
|
||||
"#define DTB_SIZE\t4096\n"
|
||||
"#define ARMV8\n")
|
||||
set(SGEMM_UNROLL_M 16)
|
||||
set(SGEMM_UNROLL_N 4)
|
||||
set(DGEMM_UNROLL_M 8)
|
||||
set(DGEMM_UNROLL_N 4)
|
||||
set(CGEMM_UNROLL_M 8)
|
||||
set(CGEMM_UNROLL_N 4)
|
||||
set(ZGEMM_UNROLL_M 4)
|
||||
set(ZGEMM_UNROLL_N 4)
|
||||
set(SYMV_P 16)
|
||||
elseif ("${TCORE}" STREQUAL "TSV110")
|
||||
file(APPEND ${TARGET_CONF_TEMP}
|
||||
"#define ARMV8\n"
|
||||
"#define L1_CODE_SIZE\t65536\n"
|
||||
"#define L1_CODE_LINESIZE\t64\n"
|
||||
"#define L1_CODE_ASSOCIATIVE\t4\n"
|
||||
"#define L1_DATA_SIZE\t65536\n"
|
||||
"#define L1_DATA_LINESIZE\t64\n"
|
||||
"#define L1_DATA_ASSOCIATIVE\t4\n"
|
||||
"#define L2_SIZE\t524288\n"
|
||||
"#define L2_LINESIZE\t64\n"
|
||||
"#define L2_ASSOCIATIVE\t8\n"
|
||||
"#define DTB_DEFAULT_ENTRIES\t64\n"
|
||||
"#define DTB_SIZE\t4096\n")
|
||||
set(SGEMM_UNROLL_M 16)
|
||||
set(SGEMM_UNROLL_N 4)
|
||||
set(DGEMM_UNROLL_M 8)
|
||||
set(DGEMM_UNROLL_N 4)
|
||||
set(CGEMM_UNROLL_M 8)
|
||||
set(CGEMM_UNROLL_N 4)
|
||||
set(ZGEMM_UNROLL_M 4)
|
||||
set(ZGEMM_UNROLL_N 4)
|
||||
set(SYMV_P 16)
|
||||
elseif ("${TCORE}" STREQUAL "EMAG8180")
|
||||
file(APPEND ${TARGET_CONF_TEMP}
|
||||
"#define ARMV8\n"
|
||||
"#define L1_CODE_SIZE\t32768\n"
|
||||
"#define L1_CODE_LINESIZE\t64\n"
|
||||
"#define L1_CODE_ASSOCIATIVE\t4\n"
|
||||
"#define L1_DATA_SIZE\t32768\n"
|
||||
"#define L1_DATA_LINESIZE\t64\n"
|
||||
"#define L1_DATA_ASSOCIATIVE\t4\n"
|
||||
"#define L2_SIZE\t5262144\n"
|
||||
"#define L2_LINESIZE\t64\n"
|
||||
"#define L2_ASSOCIATIVE\t8\n"
|
||||
"#define DTB_DEFAULT_ENTRIES\t64\n"
|
||||
"#define DTB_SIZE\t4096\n")
|
||||
set(SGEMM_UNROLL_M 16)
|
||||
set(SGEMM_UNROLL_N 4)
|
||||
set(DGEMM_UNROLL_M 8)
|
||||
set(DGEMM_UNROLL_N 4)
|
||||
set(CGEMM_UNROLL_M 8)
|
||||
set(CGEMM_UNROLL_N 4)
|
||||
set(ZGEMM_UNROLL_M 4)
|
||||
set(ZGEMM_UNROLL_N 4)
|
||||
set(SYMV_P 16)
|
||||
elseif ("${TCORE}" STREQUAL "POWER6")
|
||||
file(APPEND ${TARGET_CONF_TEMP}
|
||||
"#define L1_DATA_SIZE 32768\n"
|
||||
"#define L1_DATA_LINESIZE 128\n"
|
||||
"#define L2_SIZE 524288\n"
|
||||
"#define L2_LINESIZE 128 \n"
|
||||
"#define DTB_DEFAULT_ENTRIES 128\n"
|
||||
"#define DTB_SIZE 4096\n"
|
||||
"#define L2_ASSOCIATIVE 8\n")
|
||||
set(SGEMM_UNROLL_M 4)
|
||||
set(SGEMM_UNROLL_N 4)
|
||||
set(DGEMM_UNROLL_M 4)
|
||||
set(DGEMM_UNROLL_N 4)
|
||||
set(CGEMM_UNROLL_M 2)
|
||||
set(CGEMM_UNROLL_N 4)
|
||||
set(ZGEMM_UNROLL_M 2)
|
||||
set(ZGEMM_UNROLL_N 4)
|
||||
set(SYMV_P 8)
|
||||
elseif ("${TCORE}" STREQUAL "POWER8")
|
||||
file(APPEND ${TARGET_CONF_TEMP}
|
||||
"#define L1_DATA_SIZE 32768\n"
|
||||
"#define L1_DATA_LINESIZE 128\n"
|
||||
"#define L2_SIZE 524288\n"
|
||||
"#define L2_LINESIZE 128 \n"
|
||||
"#define DTB_DEFAULT_ENTRIES 128\n"
|
||||
"#define DTB_SIZE 4096\n"
|
||||
"#define L2_ASSOCIATIVE 8\n")
|
||||
set(SGEMM_UNROLL_M 16)
|
||||
set(SGEMM_UNROLL_N 8)
|
||||
set(DGEMM_UNROLL_M 16)
|
||||
set(DGEMM_UNROLL_N 4)
|
||||
set(CGEMM_UNROLL_M 8)
|
||||
set(CGEMM_UNROLL_N 4)
|
||||
set(ZGEMM_UNROLL_M 8)
|
||||
set(ZGEMM_UNROLL_N 2)
|
||||
set(SYMV_P 8)
|
||||
elseif ("${TCORE}" STREQUAL "POWER9" OR "${TCORE}" STREQUAL "POWER10")
|
||||
file(APPEND ${TARGET_CONF_TEMP}
|
||||
"#define L1_DATA_SIZE 32768\n"
|
||||
"#define L1_DATA_LINESIZE 128\n"
|
||||
"#define L2_SIZE 524288\n"
|
||||
"#define L2_LINESIZE 128 \n"
|
||||
"#define DTB_DEFAULT_ENTRIES 128\n"
|
||||
"#define DTB_SIZE 4096\n"
|
||||
"#define L2_ASSOCIATIVE 8\n")
|
||||
set(SGEMM_UNROLL_M 16)
|
||||
set(SGEMM_UNROLL_N 8)
|
||||
set(DGEMM_UNROLL_M 16)
|
||||
set(DGEMM_UNROLL_N 4)
|
||||
set(CGEMM_UNROLL_M 8)
|
||||
set(CGEMM_UNROLL_N 4)
|
||||
set(ZGEMM_UNROLL_M 8)
|
||||
set(ZGEMM_UNROLL_N 2)
|
||||
set(SYMV_P 8)
|
||||
endif()
|
||||
set(SHGEMM_UNROLL_M 8)
|
||||
set(SHGEMM_UNROLL_N 4)
|
||||
|
||||
# Or should this actually be NUM_CORES?
|
||||
if (${NUM_THREADS} GREATER 0)
|
||||
@@ -284,6 +507,9 @@ else(NOT CMAKE_CROSSCOMPILING)
|
||||
set(GETARCH_FLAGS ${GETARCH_FLAGS} -DFORCE_GENERIC)
|
||||
else()
|
||||
list(APPEND GETARCH_SRC ${PROJECT_SOURCE_DIR}/cpuid.S)
|
||||
if (DEFINED TARGET_CORE)
|
||||
set(GETARCH_FLAGS ${GETARCH_FLAGS} -DFORCE_${TARGET_CORE})
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
if ("${CMAKE_SYSTEM_NAME}" STREQUAL "WindowsStore")
|
||||
@@ -298,7 +524,7 @@ else(NOT CMAKE_CROSSCOMPILING)
|
||||
if (NOT "${CMAKE_SYSTEM_NAME}" STREQUAL "WindowsStore")
|
||||
try_compile(GETARCH_RESULT ${GETARCH_DIR}
|
||||
SOURCES ${GETARCH_SRC}
|
||||
COMPILE_DEFINITIONS ${EXFLAGS} ${GETARCH_FLAGS} -I${GETARCH_DIR} -I"${PROJECT_SOURCE_DIR}" -I"${PROJECT_BINARY_DIR}"
|
||||
COMPILE_DEFINITIONS ${EXFLAGS} ${GETARCH_FLAGS} -I"${GETARCH_DIR}" -I"${PROJECT_SOURCE_DIR}" -I"${PROJECT_BINARY_DIR}"
|
||||
OUTPUT_VARIABLE GETARCH_LOG
|
||||
COPY_FILE ${PROJECT_BINARY_DIR}/${GETARCH_BIN}
|
||||
)
|
||||
@@ -326,7 +552,7 @@ execute_process(COMMAND "${PROJECT_BINARY_DIR}/${GETARCH_BIN}" 1 OUTPUT_VARIABLE
|
||||
if (NOT "${CMAKE_SYSTEM_NAME}" STREQUAL "WindowsStore")
|
||||
try_compile(GETARCH2_RESULT ${GETARCH2_DIR}
|
||||
SOURCES ${PROJECT_SOURCE_DIR}/getarch_2nd.c
|
||||
COMPILE_DEFINITIONS ${EXFLAGS} ${GETARCH_FLAGS} ${GETARCH2_FLAGS} -I${GETARCH2_DIR} -I"${PROJECT_SOURCE_DIR}" -I"${PROJECT_BINARY_DIR}"
|
||||
COMPILE_DEFINITIONS ${EXFLAGS} ${GETARCH_FLAGS} ${GETARCH2_FLAGS} -I"${GETARCH2_DIR}" -I"${PROJECT_SOURCE_DIR}" -I"${PROJECT_BINARY_DIR}"
|
||||
OUTPUT_VARIABLE GETARCH2_LOG
|
||||
COPY_FILE ${PROJECT_BINARY_DIR}/${GETARCH2_BIN}
|
||||
)
|
||||
|
||||
@@ -33,15 +33,30 @@ endif ()
|
||||
if (DEFINED BINARY AND DEFINED TARGET AND BINARY EQUAL 32)
|
||||
message(STATUS "Compiling a ${BINARY}-bit binary.")
|
||||
set(NO_AVX 1)
|
||||
if (${TARGET} STREQUAL "HASWELL" OR ${TARGET} STREQUAL "SANDYBRIDGE" OR ${TARGET} STREQUAL "SKYLAKEX")
|
||||
if (${TARGET} STREQUAL "HASWELL" OR ${TARGET} STREQUAL "SANDYBRIDGE" OR ${TARGET} STREQUAL "SKYLAKEX" OR ${TARGET} STREQUAL "COOPERLAKE")
|
||||
set(TARGET "NEHALEM")
|
||||
endif ()
|
||||
if (${TARGET} STREQUAL "BULLDOZER" OR ${TARGET} STREQUAL "PILEDRIVER" OR ${TARGET} STREQUAL "ZEN")
|
||||
set(TARGET "BARCELONA")
|
||||
endif ()
|
||||
if (${TARGET} STREQUAL "ARMV8" OR ${TARGET} STREQUAL "CORTEXA57" OR ${TARGET} STREQUAL "CORTEXA53")
|
||||
set(TARGET "ARMV7")
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
if (DEFINED TARGET)
|
||||
if (${TARGET} STREQUAL "COOPERLAKE" AND NOT NO_AVX512)
|
||||
# if (${CMAKE_C_COMPILER_ID} STREQUAL "GNU")
|
||||
execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION)
|
||||
if (${GCC_VERSION} VERSION_GREATER 10.1 OR ${GCC_VERSION} VERSION_EQUAL 10.1)
|
||||
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=cooperlake")
|
||||
else()
|
||||
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=skylake-avx512")
|
||||
endif()
|
||||
# elseif (${CMAKE_C_COMPILER_ID} STREQUAL "CLANG")
|
||||
# set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -mavx2")
|
||||
# endif()
|
||||
endif()
|
||||
if (${TARGET} STREQUAL "SKYLAKEX" AND NOT NO_AVX512)
|
||||
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=skylake-avx512")
|
||||
endif()
|
||||
@@ -62,6 +77,18 @@ if (DEFINED TARGET)
|
||||
set(GETARCH_FLAGS "-DFORCE_${TARGET}")
|
||||
endif ()
|
||||
|
||||
# On x86_64 build getarch with march=native. This is required to detect AVX512 support in getarch.
|
||||
if (X86_64 AND NOT ${CMAKE_C_COMPILER_ID} STREQUAL "PGI")
|
||||
set(GETARCH_FLAGS "${GETARCH_FLAGS} -march=native")
|
||||
endif ()
|
||||
|
||||
# On x86 no AVX support is available
|
||||
if (X86 OR X86_64)
|
||||
if ((DEFINED BINARY AND BINARY EQUAL 32) OR ("$CMAKE_SIZEOF_VOID_P}" EQUAL "4"))
|
||||
set(GETARCH_FLAGS "${GETARCH_FLAGS} -DNO_AVX -DNO_AVX2 -DNO_AVX512")
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
if (INTERFACE64)
|
||||
message(STATUS "Using 64-bit integers.")
|
||||
set(GETARCH_FLAGS "${GETARCH_FLAGS} -DUSE64BITINT")
|
||||
@@ -133,10 +160,16 @@ endif ()
|
||||
|
||||
if (USE_THREAD)
|
||||
message(STATUS "Multi-threading enabled with ${NUM_THREADS} threads.")
|
||||
else()
|
||||
if (${USE_LOCKING})
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DUSE_LOCKING")
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
include("${PROJECT_SOURCE_DIR}/cmake/prebuild.cmake")
|
||||
|
||||
if (DEFINED BINARY)
|
||||
message(STATUS "Compiling a ${BINARY}-bit binary.")
|
||||
endif ()
|
||||
if (NOT DEFINED NEED_PIC)
|
||||
set(NEED_PIC 1)
|
||||
endif ()
|
||||
@@ -153,6 +186,9 @@ include("${PROJECT_SOURCE_DIR}/cmake/cc.cmake")
|
||||
if (NOT NOFORTRAN)
|
||||
# Fortran Compiler dependent settings
|
||||
include("${PROJECT_SOURCE_DIR}/cmake/fc.cmake")
|
||||
else ()
|
||||
set(NO_LAPACK 1)
|
||||
set(NO_LAPACKE 1)
|
||||
endif ()
|
||||
|
||||
if (BINARY64)
|
||||
@@ -178,12 +214,24 @@ if (NEED_PIC)
|
||||
endif ()
|
||||
|
||||
if (DYNAMIC_ARCH)
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DDYNAMIC_ARCH")
|
||||
if (DYNAMIC_OLDER)
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DDYNAMIC_OLDER")
|
||||
if (X86 OR X86_64 OR ARM64 OR PPC)
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DDYNAMIC_ARCH")
|
||||
if (DYNAMIC_OLDER)
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DDYNAMIC_OLDER")
|
||||
endif ()
|
||||
else ()
|
||||
unset (DYNAMIC_ARCH)
|
||||
message (STATUS "DYNAMIC_ARCH is not supported on the target architecture, removing")
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
if (DYNAMIC_LIST)
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DDYNAMIC_LIST")
|
||||
foreach(DCORE ${DYNAMIC_LIST})
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DDYN_${DCORE}")
|
||||
endforeach ()
|
||||
endif ()
|
||||
|
||||
if (NO_LAPACK)
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DNO_LAPACK")
|
||||
#Disable LAPACK C interface
|
||||
@@ -253,10 +301,24 @@ set(CCOMMON_OPT "${CCOMMON_OPT} -DMAX_CPU_NUMBER=${NUM_THREADS}")
|
||||
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DMAX_PARALLEL_NUMBER=${NUM_PARALLEL}")
|
||||
|
||||
if (BUFFERSIZE)
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DBUFFERSIZE=${BUFFERSIZE}")
|
||||
endif ()
|
||||
|
||||
if (USE_SIMPLE_THREADED_LEVEL3)
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DUSE_SIMPLE_THREADED_LEVEL3")
|
||||
endif ()
|
||||
|
||||
if (NOT ${CMAKE_SYSTEM_NAME} STREQUAL "Windows")
|
||||
if (DEFINED MAX_STACK_ALLOC)
|
||||
if (NOT ${MAX_STACK_ALLOC} EQUAL 0)
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DMAX_STACK_ALLOC=${MAX_STACK_ALLOC}")
|
||||
endif ()
|
||||
else ()
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DMAX_STACK_ALLOC=2048")
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
if (DEFINED LIBNAMESUFFIX)
|
||||
set(LIBPREFIX "libopenblas_${LIBNAMESUFFIX}")
|
||||
else ()
|
||||
@@ -273,7 +335,7 @@ endif ()
|
||||
|
||||
set(KERNELDIR "${PROJECT_SOURCE_DIR}/kernel/${ARCH}")
|
||||
|
||||
# TODO: nead to convert these Makefiles
|
||||
# TODO: need to convert these Makefiles
|
||||
# include ${PROJECT_SOURCE_DIR}/cmake/${ARCH}.cmake
|
||||
|
||||
if (${CORE} STREQUAL "PPC440")
|
||||
@@ -367,6 +429,14 @@ if (${CMAKE_C_COMPILER} STREQUAL "LSB" OR ${CMAKE_SYSTEM_NAME} STREQUAL "Windows
|
||||
set(LAPACK_CFLAGS "${LAPACK_CFLAGS} -DLAPACK_COMPLEX_STRUCTURE")
|
||||
endif ()
|
||||
|
||||
if ("${CMAKE_BUILD_TYPE}" STREQUAL "Release")
|
||||
if ("${F_COMPILER}" STREQUAL "FLANG")
|
||||
if (${CMAKE_Fortran_COMPILER_VERSION} VERSION_LESS_EQUAL 3)
|
||||
set(CMAKE_Fortran_FLAGS_RELEASE "${CMAKE_Fortran_FLAGS_RELEASE} -fno-unroll-loops")
|
||||
endif ()
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
if (NOT DEFINED SUFFIX)
|
||||
set(SUFFIX o)
|
||||
endif ()
|
||||
@@ -490,6 +560,8 @@ endif ()
|
||||
#export FUNCTION_PROFILE
|
||||
#export TARGET_CORE
|
||||
#
|
||||
#export SHGEMM_UNROLL_M
|
||||
#export SHGEMM_UNROLL_N
|
||||
#export SGEMM_UNROLL_M
|
||||
#export SGEMM_UNROLL_N
|
||||
#export DGEMM_UNROLL_M
|
||||
|
||||
@@ -15,7 +15,7 @@ if (${HOST_OS} STREQUAL "LINUX")
|
||||
EXECUTE_PROCESS( COMMAND uname -o COMMAND tr -d '\n' OUTPUT_VARIABLE OPERATING_SYSTEM)
|
||||
if(${OPERATING_SYSTEM} MATCHES "Android")
|
||||
set(HOST_OS ANDROID)
|
||||
endif(${OPERATING_SYSTEM} MATCHES "Android")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
|
||||
@@ -39,13 +39,45 @@ elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "ppc.*|power.*|Power.*")
|
||||
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "mips64.*")
|
||||
set(MIPS64 1)
|
||||
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "amd64.*|x86_64.*|AMD64.*")
|
||||
set(X86_64 1)
|
||||
if (NOT BINARY)
|
||||
if("${CMAKE_SIZEOF_VOID_P}" EQUAL "8")
|
||||
set(X86_64 1)
|
||||
else()
|
||||
set(X86 1)
|
||||
endif()
|
||||
else()
|
||||
if (${BINARY} EQUAL "64")
|
||||
set(X86_64 1)
|
||||
else ()
|
||||
set(X86 1)
|
||||
endif()
|
||||
endif()
|
||||
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "i686.*|i386.*|x86.*|amd64.*|AMD64.*")
|
||||
set(X86 1)
|
||||
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(arm.*|ARM.*)")
|
||||
set(ARM 1)
|
||||
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64.*|AARCH64.*)")
|
||||
set(ARM64 1)
|
||||
if("${CMAKE_SIZEOF_VOID_P}" EQUAL "8")
|
||||
set(ARM64 1)
|
||||
else()
|
||||
set(ARM 1)
|
||||
endif()
|
||||
elseif (${CMAKE_CROSSCOMPILING})
|
||||
if (${TARGET} STREQUAL "CORE2")
|
||||
if (NOT BINARY)
|
||||
set(X86 1)
|
||||
elseif (${BINARY} EQUAL "64")
|
||||
set(X86_64 1)
|
||||
else ()
|
||||
set(X86 1)
|
||||
endif()
|
||||
elseif (${TARGET} STREQUAL "ARMV7")
|
||||
set(ARM 1)
|
||||
else()
|
||||
set(ARM64 1)
|
||||
endif ()
|
||||
else ()
|
||||
message(WARNING "Target ARCH could not be determined, got \"${CMAKE_SYSTEM_PROCESSOR}\"")
|
||||
endif()
|
||||
|
||||
if (X86_64)
|
||||
@@ -77,11 +109,17 @@ else()
|
||||
endif()
|
||||
|
||||
if (X86_64 OR X86)
|
||||
file(WRITE ${PROJECT_BINARY_DIR}/avx512.tmp "#include <immintrin.h>\n\nint main(void){ __asm__ volatile(\"vbroadcastss -4 * 4(%rsi), %zmm2\"); }")
|
||||
execute_process(COMMAND ${CMAKE_C_COMPILER} -march=skylake-avx512 -v -o ${PROJECT_BINARY_DIR}/avx512.o -x c ${PROJECT_BINARY_DIR}/avx512.tmp OUTPUT_QUIET ERROR_QUIET RESULT_VARIABLE NO_AVX512)
|
||||
file(WRITE ${PROJECT_BINARY_DIR}/avx512.c "#include <immintrin.h>\n\nint main(void){ __asm__ volatile(\"vbroadcastss -4 * 4(%rsi), %zmm2\"); }")
|
||||
execute_process(COMMAND ${CMAKE_C_COMPILER} -march=skylake-avx512 -c -v -o ${PROJECT_BINARY_DIR}/avx512.o ${PROJECT_BINARY_DIR}/avx512.c OUTPUT_QUIET ERROR_QUIET RESULT_VARIABLE NO_AVX512)
|
||||
if (NO_AVX512 EQUAL 1)
|
||||
set (CCOMMON_OPT "${CCOMMON_OPT} -DNO_AVX512")
|
||||
endif()
|
||||
file(REMOVE "avx512.tmp" "avx512.o")
|
||||
file(REMOVE "avx512.c" "avx512.o")
|
||||
endif()
|
||||
|
||||
include(CheckIncludeFile)
|
||||
CHECK_INCLUDE_FILE("stdatomic.h" HAVE_C11)
|
||||
if (HAVE_C11 EQUAL 1)
|
||||
message (STATUS found stdatomic.h)
|
||||
set (CCOMMON_OPT "${CCOMMON_OPT} -DHAVE_C11")
|
||||
endif()
|
||||
|
||||
@@ -15,12 +15,36 @@ endfunction ()
|
||||
# Reads a Makefile into CMake vars.
|
||||
macro(ParseMakefileVars MAKEFILE_IN)
|
||||
message(STATUS "Reading vars from ${MAKEFILE_IN}...")
|
||||
set (IfElse 0)
|
||||
set (ElseSeen 0)
|
||||
file(STRINGS ${MAKEFILE_IN} makefile_contents)
|
||||
foreach (makefile_line ${makefile_contents})
|
||||
#message(STATUS "parsing ${makefile_line}")
|
||||
if (${IfElse} GREATER 0)
|
||||
string(REGEX MATCH "endif[ \t]*" line_match "${makefile_line}")
|
||||
if (NOT "${line_match}" STREQUAL "")
|
||||
# message(STATUS "ENDIF ${makefile_line}")
|
||||
set (IfElse 0)
|
||||
set (ElseSeen 0)
|
||||
continue ()
|
||||
endif ()
|
||||
string(REGEX MATCH "else[ \t]*" line_match "${makefile_line}")
|
||||
if (NOT "${line_match}" STREQUAL "")
|
||||
# message(STATUS "ELSE ${makefile_line}")
|
||||
set (ElseSeen 1)
|
||||
continue ()
|
||||
endif()
|
||||
if ( (${IfElse} EQUAL 2 AND ${ElseSeen} EQUAL 0) OR ( ${IfElse} EQUAL 1 AND ${ElseSeen} EQUAL 1))
|
||||
# message(STATUS "skipping ${makefile_line}")
|
||||
continue ()
|
||||
endif ()
|
||||
endif ()
|
||||
string(REGEX MATCH "([0-9_a-zA-Z]+)[ \t]*=[ \t]*(.+)$" line_match "${makefile_line}")
|
||||
if (NOT "${line_match}" STREQUAL "")
|
||||
#message(STATUS "match on ${line_match}")
|
||||
set(var_name ${CMAKE_MATCH_1})
|
||||
set(var_value ${CMAKE_MATCH_2})
|
||||
# set(var_value ${CMAKE_MATCH_2})
|
||||
string(STRIP ${CMAKE_MATCH_2} var_value)
|
||||
# check for Makefile variables in the string, e.g. $(TSUFFIX)
|
||||
string(REGEX MATCHALL "\\$\\(([0-9_a-zA-Z]+)\\)" make_var_matches ${var_value})
|
||||
foreach (make_var ${make_var_matches})
|
||||
@@ -33,7 +57,31 @@ macro(ParseMakefileVars MAKEFILE_IN)
|
||||
else ()
|
||||
string(REGEX MATCH "include \\$\\(KERNELDIR\\)/(.+)$" line_match "${makefile_line}")
|
||||
if (NOT "${line_match}" STREQUAL "")
|
||||
#message(STATUS "match on include ${line_match}")
|
||||
ParseMakefileVars(${KERNELDIR}/${CMAKE_MATCH_1})
|
||||
else ()
|
||||
# message(STATUS "unmatched line ${line_match}")
|
||||
string(REGEX MATCH "ifeq \\(\\$\\(([_A-Z]+)\\),[ \t]*([0-9_A-Z]+)\\)" line_match "${makefile_line}")
|
||||
if (NOT "${line_match}" STREQUAL "")
|
||||
# message(STATUS "IFEQ: ${line_match} first: ${CMAKE_MATCH_1} second: ${CMAKE_MATCH_2}")
|
||||
if (DEFINED ${${CMAKE_MATCH_1}} AND ${${CMAKE_MATCH_1}} STREQUAL ${CMAKE_MATCH_2})
|
||||
# message (STATUS "condition is true")
|
||||
set (IfElse 1)
|
||||
else ()
|
||||
set (IfElse 2)
|
||||
endif ()
|
||||
else ()
|
||||
string(REGEX MATCH "ifneq \\(\\$\\(([_A-Z]+)\\),[ \t]*([0-9_A-Z]+)\\)" line_match "${makefile_line}")
|
||||
if (NOT "${line_match}" STREQUAL "")
|
||||
# message(STATUS "IFNEQ: ${line_match} first: ${CMAKE_MATCH_1} second: ${CMAKE_MATCH_2}")
|
||||
if (NOT ( ${${CMAKE_MATCH_1}} STREQUAL ${CMAKE_MATCH_2}))
|
||||
# message (STATUS "condition is true")
|
||||
set (IfElse 1)
|
||||
else ()
|
||||
set (IfElse 2)
|
||||
endif ()
|
||||
endif ()
|
||||
endif ()
|
||||
endif ()
|
||||
endif ()
|
||||
endforeach ()
|
||||
@@ -89,7 +137,7 @@ function(AllCombinations list_in absent_codes_in)
|
||||
set(CODES_OUT ${CODES_OUT} PARENT_SCOPE)
|
||||
endfunction ()
|
||||
|
||||
# generates object files for each of the sources, using the BLAS naming scheme to pass the funciton name as a preprocessor definition
|
||||
# generates object files for each of the sources, using the BLAS naming scheme to pass the function name as a preprocessor definition
|
||||
# @param sources_in the source files to build from
|
||||
# @param defines_in (optional) preprocessor definitions that will be applied to all objects
|
||||
# @param name_in (optional) if this is set this name will be used instead of the filename. Use a * to indicate where the float character should go, if no star the character will be prepended.
|
||||
@@ -163,6 +211,7 @@ function(GenerateNamedObjects sources_in)
|
||||
if (complex_only)
|
||||
list(REMOVE_ITEM float_list "SINGLE")
|
||||
list(REMOVE_ITEM float_list "DOUBLE")
|
||||
list(REMOVE_ITEM float_list "HALF")
|
||||
elseif (real_only)
|
||||
list(REMOVE_ITEM float_list "COMPLEX")
|
||||
list(REMOVE_ITEM float_list "ZCOMPLEX")
|
||||
@@ -176,6 +225,9 @@ function(GenerateNamedObjects sources_in)
|
||||
if (NOT no_float_type)
|
||||
string(SUBSTRING ${float_type} 0 1 float_char)
|
||||
string(TOLOWER ${float_char} float_char)
|
||||
if (${float_type} STREQUAL "HALF")
|
||||
set (float_char "sh")
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
if (NOT name_in)
|
||||
@@ -210,6 +262,9 @@ function(GenerateNamedObjects sources_in)
|
||||
if (${float_type} STREQUAL "DOUBLE" OR ${float_type} STREQUAL "ZCOMPLEX")
|
||||
list(APPEND obj_defines "DOUBLE")
|
||||
endif ()
|
||||
if (${float_type} STREQUAL "HALF")
|
||||
list(APPEND obj_defines "HALF")
|
||||
endif ()
|
||||
if (${float_type} STREQUAL "COMPLEX" OR ${float_type} STREQUAL "ZCOMPLEX")
|
||||
list(APPEND obj_defines "COMPLEX")
|
||||
if (mangle_complex_sources)
|
||||
|
||||
32
common.h
32
common.h
@@ -85,6 +85,8 @@ extern "C" {
|
||||
|
||||
#if !defined(_MSC_VER)
|
||||
#include <unistd.h>
|
||||
#elif _MSC_VER < 1900
|
||||
#define snprintf _snprintf
|
||||
#endif
|
||||
#include <time.h>
|
||||
|
||||
@@ -129,7 +131,7 @@ extern "C" {
|
||||
#include <time.h>
|
||||
#include <unistd.h>
|
||||
#include <math.h>
|
||||
#ifdef SMP
|
||||
#if defined(SMP) || defined(USE_LOCKING)
|
||||
#include <pthread.h>
|
||||
#endif
|
||||
#endif
|
||||
@@ -198,7 +200,7 @@ extern "C" {
|
||||
#error "You can't specify both LOCK operation!"
|
||||
#endif
|
||||
|
||||
#ifdef SMP
|
||||
#if defined(SMP) || defined(USE_LOCKING)
|
||||
#define USE_PTHREAD_LOCK
|
||||
#undef USE_PTHREAD_SPINLOCK
|
||||
#endif
|
||||
@@ -255,6 +257,11 @@ typedef long BLASLONG;
|
||||
typedef unsigned long BLASULONG;
|
||||
#endif
|
||||
|
||||
#ifndef BFLOAT16
|
||||
typedef unsigned short bfloat16;
|
||||
#define HALFCONVERSION 1
|
||||
#endif
|
||||
|
||||
#ifdef USE64BITINT
|
||||
typedef BLASLONG blasint;
|
||||
#if defined(OS_WINDOWS) && defined(__64BIT__)
|
||||
@@ -295,6 +302,13 @@ typedef int blasint;
|
||||
#define SIZE 8
|
||||
#define BASE_SHIFT 3
|
||||
#define ZBASE_SHIFT 4
|
||||
#elif defined(HALF)
|
||||
#define IFLOAT bfloat16
|
||||
#define XFLOAT IFLOAT
|
||||
#define FLOAT float
|
||||
#define SIZE 2
|
||||
#define BASE_SHIFT 1
|
||||
#define ZBASE_SHIFT 2
|
||||
#else
|
||||
#define FLOAT float
|
||||
#define SIZE 4
|
||||
@@ -306,6 +320,10 @@ typedef int blasint;
|
||||
#define XFLOAT FLOAT
|
||||
#endif
|
||||
|
||||
#ifndef IFLOAT
|
||||
#define IFLOAT FLOAT
|
||||
#endif
|
||||
|
||||
#ifndef COMPLEX
|
||||
#define COMPSIZE 1
|
||||
#else
|
||||
@@ -342,13 +360,13 @@ typedef int blasint;
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef POWER8
|
||||
|
||||
#if defined(POWER8) || defined(POWER9) || defined(POWER10)
|
||||
#ifndef YIELDING
|
||||
#define YIELDING __asm__ __volatile__ ("nop;nop;nop;nop;nop;nop;nop;nop;\n");
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
||||
/*
|
||||
#ifdef PILEDRIVER
|
||||
#ifndef YIELDING
|
||||
@@ -439,7 +457,7 @@ please https://github.com/xianyi/OpenBLAS/issues/246
|
||||
typedef char env_var_t[MAX_PATH];
|
||||
#define readenv(p, n) 0
|
||||
#else
|
||||
#ifdef OS_WINDOWS
|
||||
#if defined(OS_WINDOWS) && !defined(OS_CYGWIN_NT)
|
||||
typedef char env_var_t[MAX_PATH];
|
||||
#define readenv(p, n) GetEnvironmentVariable((LPCTSTR)(n), (LPTSTR)(p), sizeof(p))
|
||||
#else
|
||||
@@ -650,6 +668,8 @@ void gotoblas_dynamic_init(void);
|
||||
void gotoblas_dynamic_quit(void);
|
||||
void gotoblas_profile_init(void);
|
||||
void gotoblas_profile_quit(void);
|
||||
|
||||
int support_avx512(void);
|
||||
|
||||
#ifdef USE_OPENMP
|
||||
|
||||
@@ -661,7 +681,7 @@ __declspec(dllimport) int __cdecl omp_in_parallel(void);
|
||||
__declspec(dllimport) int __cdecl omp_get_num_procs(void);
|
||||
#endif
|
||||
|
||||
#if (__STDC_VERSION__ >= 201112L)
|
||||
#ifdef HAVE_C11
|
||||
#if defined(C_GCC) && ( __GNUC__ < 7)
|
||||
// workaround for GCC bug 65467
|
||||
#ifndef _Atomic
|
||||
|
||||
@@ -43,6 +43,7 @@
|
||||
|
||||
#define MB asm("mb")
|
||||
#define WMB asm("wmb")
|
||||
#define RMB asm("rmb")
|
||||
|
||||
static void __inline blas_lock(unsigned long *address){
|
||||
#ifndef __DECC
|
||||
|
||||
@@ -37,11 +37,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
#define MB
|
||||
#define WMB
|
||||
#define RMB
|
||||
|
||||
#else
|
||||
|
||||
#define MB __asm__ __volatile__ ("dmb ish" : : : "memory")
|
||||
#define WMB __asm__ __volatile__ ("dmb ishst" : : : "memory")
|
||||
#define RMB __asm__ __volatile__ ("dmb ish" : : : "memory")
|
||||
|
||||
#endif
|
||||
|
||||
@@ -121,7 +123,7 @@ REALNAME:
|
||||
#endif
|
||||
#define HUGE_PAGESIZE ( 4 << 20)
|
||||
|
||||
#define BUFFER_SIZE (16 << 20)
|
||||
#define BUFFER_SIZE (32 << 20)
|
||||
|
||||
|
||||
#define BASE_ADDRESS (START_ADDRESS - BUFFER_SIZE * MAX_CPU_NUMBER)
|
||||
|
||||
@@ -35,7 +35,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
#define MB __asm__ __volatile__ ("dmb ish" : : : "memory")
|
||||
#define WMB __asm__ __volatile__ ("dmb ishst" : : : "memory")
|
||||
|
||||
#define RMB __asm__ __volatile__ ("dmb ishld" : : : "memory")
|
||||
|
||||
#define INLINE inline
|
||||
|
||||
@@ -53,16 +53,16 @@ static void __inline blas_lock(volatile BLASULONG *address){
|
||||
BLASULONG ret;
|
||||
|
||||
do {
|
||||
while (*address) {YIELDING;};
|
||||
|
||||
__asm__ __volatile__(
|
||||
"mov x4, #1 \n\t"
|
||||
"sevl \n\t"
|
||||
"1: \n\t"
|
||||
"wfe \n\t"
|
||||
"2: \n\t"
|
||||
"ldaxr x2, [%1] \n\t"
|
||||
"cbnz x2, 1b \n\t"
|
||||
"2: \n\t"
|
||||
"stxr w3, x4, [%1] \n\t"
|
||||
"cbnz w3, 1b \n\t"
|
||||
"cbnz w3, 2b \n\t"
|
||||
"mov %0, #0 \n\t"
|
||||
: "=r"(ret), "=r"(address)
|
||||
: "1"(address)
|
||||
@@ -78,7 +78,20 @@ static void __inline blas_lock(volatile BLASULONG *address){
|
||||
|
||||
#define BLAS_LOCK_DEFINED
|
||||
|
||||
#if !defined(OS_DARWIN) && !defined (OS_ANDROID)
|
||||
static __inline BLASULONG rpcc(void){
|
||||
BLASULONG ret = 0;
|
||||
blasint shift;
|
||||
|
||||
__asm__ __volatile__ ("isb; mrs %0,cntvct_el0":"=r"(ret));
|
||||
__asm__ __volatile__ ("mrs %0,cntfrq_el0; clz %w0, %w0":"=&r"(shift));
|
||||
|
||||
return ret << shift;
|
||||
}
|
||||
|
||||
#define RPCC_DEFINED
|
||||
#define RPCC64BIT
|
||||
#endif
|
||||
|
||||
static inline int blas_quickdivide(blasint x, blasint y){
|
||||
return x / y;
|
||||
@@ -103,12 +116,16 @@ static inline int blas_quickdivide(blasint x, blasint y){
|
||||
|
||||
#if defined(ASSEMBLER) && !defined(NEEDPARAM)
|
||||
|
||||
#define PROLOGUE \
|
||||
.text ;\
|
||||
.align 4 ;\
|
||||
.global REALNAME ;\
|
||||
.type REALNAME, %function ;\
|
||||
.macro PROLOGUE
|
||||
.text ;
|
||||
.p2align 2 ;
|
||||
.global REALNAME ;
|
||||
#ifndef __APPLE__
|
||||
.type REALNAME, %function ;
|
||||
#endif
|
||||
REALNAME:
|
||||
.endm
|
||||
|
||||
|
||||
#define EPILOGUE
|
||||
|
||||
@@ -124,12 +141,17 @@ REALNAME:
|
||||
#endif
|
||||
#define HUGE_PAGESIZE ( 4 << 20)
|
||||
|
||||
#ifndef BUFFERSIZE
|
||||
#if defined(CORTEXA57)
|
||||
#define BUFFER_SIZE (20 << 20)
|
||||
#elif defined(TSV110) || defined(EMAG8180)
|
||||
#define BUFFER_SIZE (32 << 20)
|
||||
#else
|
||||
#define BUFFER_SIZE (16 << 20)
|
||||
#endif
|
||||
|
||||
#else
|
||||
#define BUFFER_SIZE (32 << BUFFERSIZE)
|
||||
#endif
|
||||
|
||||
#define BASE_ADDRESS (START_ADDRESS - BUFFER_SIZE * MAX_CPU_NUMBER)
|
||||
|
||||
|
||||
42
common_c.h
42
common_c.h
@@ -19,6 +19,7 @@
|
||||
#define CDOTC_K cdotc_k
|
||||
#define CNRM2_K cnrm2_k
|
||||
#define CSCAL_K cscal_k
|
||||
#define CSUM_K csum_k
|
||||
#define CSWAP_K cswap_k
|
||||
#define CROT_K csrot_k
|
||||
|
||||
@@ -231,6 +232,46 @@
|
||||
|
||||
#define CGEADD_K cgeadd_k
|
||||
|
||||
#define CGEMM_SMALL_KERNEL_NN cgemm_small_kernel_nn
|
||||
#define CGEMM_SMALL_KERNEL_NT cgemm_small_kernel_nt
|
||||
#define CGEMM_SMALL_KERNEL_NR cgemm_small_kernel_nr
|
||||
#define CGEMM_SMALL_KERNEL_NC cgemm_small_kernel_nc
|
||||
|
||||
#define CGEMM_SMALL_KERNEL_TN cgemm_small_kernel_tn
|
||||
#define CGEMM_SMALL_KERNEL_TT cgemm_small_kernel_tt
|
||||
#define CGEMM_SMALL_KERNEL_TR cgemm_small_kernel_tr
|
||||
#define CGEMM_SMALL_KERNEL_TC cgemm_small_kernel_tc
|
||||
|
||||
#define CGEMM_SMALL_KERNEL_RN cgemm_small_kernel_rn
|
||||
#define CGEMM_SMALL_KERNEL_RT cgemm_small_kernel_rt
|
||||
#define CGEMM_SMALL_KERNEL_RR cgemm_small_kernel_rr
|
||||
#define CGEMM_SMALL_KERNEL_RC cgemm_small_kernel_rc
|
||||
|
||||
#define CGEMM_SMALL_KERNEL_CN cgemm_small_kernel_cn
|
||||
#define CGEMM_SMALL_KERNEL_CT cgemm_small_kernel_ct
|
||||
#define CGEMM_SMALL_KERNEL_CR cgemm_small_kernel_cr
|
||||
#define CGEMM_SMALL_KERNEL_CC cgemm_small_kernel_cc
|
||||
|
||||
#define CGEMM_SMALL_KERNEL_B0_NN cgemm_small_kernel_b0_nn
|
||||
#define CGEMM_SMALL_KERNEL_B0_NT cgemm_small_kernel_b0_nt
|
||||
#define CGEMM_SMALL_KERNEL_B0_NR cgemm_small_kernel_b0_nr
|
||||
#define CGEMM_SMALL_KERNEL_B0_NC cgemm_small_kernel_b0_nc
|
||||
|
||||
#define CGEMM_SMALL_KERNEL_B0_TN cgemm_small_kernel_b0_tn
|
||||
#define CGEMM_SMALL_KERNEL_B0_TT cgemm_small_kernel_b0_tt
|
||||
#define CGEMM_SMALL_KERNEL_B0_TR cgemm_small_kernel_b0_tr
|
||||
#define CGEMM_SMALL_KERNEL_B0_TC cgemm_small_kernel_b0_tc
|
||||
|
||||
#define CGEMM_SMALL_KERNEL_B0_RN cgemm_small_kernel_b0_rn
|
||||
#define CGEMM_SMALL_KERNEL_B0_RT cgemm_small_kernel_b0_rt
|
||||
#define CGEMM_SMALL_KERNEL_B0_RR cgemm_small_kernel_b0_rr
|
||||
#define CGEMM_SMALL_KERNEL_B0_RC cgemm_small_kernel_b0_rc
|
||||
|
||||
#define CGEMM_SMALL_KERNEL_B0_CN cgemm_small_kernel_b0_cn
|
||||
#define CGEMM_SMALL_KERNEL_B0_CT cgemm_small_kernel_b0_ct
|
||||
#define CGEMM_SMALL_KERNEL_B0_CR cgemm_small_kernel_b0_cr
|
||||
#define CGEMM_SMALL_KERNEL_B0_CC cgemm_small_kernel_b0_cc
|
||||
|
||||
#else
|
||||
|
||||
#define CAMAX_K gotoblas -> camax_k
|
||||
@@ -249,6 +290,7 @@
|
||||
#define CDOTC_K gotoblas -> cdotc_k
|
||||
#define CNRM2_K gotoblas -> cnrm2_k
|
||||
#define CSCAL_K gotoblas -> cscal_k
|
||||
#define CSUM_K gotoblas -> csum_k
|
||||
#define CSWAP_K gotoblas -> cswap_k
|
||||
#define CROT_K gotoblas -> csrot_k
|
||||
|
||||
|
||||
13
common_d.h
13
common_d.h
@@ -19,6 +19,7 @@
|
||||
#define DDOTC_K ddot_k
|
||||
#define DNRM2_K dnrm2_k
|
||||
#define DSCAL_K dscal_k
|
||||
#define DSUM_K dsum_k
|
||||
#define DSWAP_K dswap_k
|
||||
#define DROT_K drot_k
|
||||
|
||||
@@ -156,6 +157,17 @@
|
||||
#define DIMATCOPY_K_RT dimatcopy_k_rt
|
||||
#define DGEADD_K dgeadd_k
|
||||
|
||||
|
||||
#define DGEMM_SMALL_KERNEL_NN dgemm_small_kernel_nn
|
||||
#define DGEMM_SMALL_KERNEL_NT dgemm_small_kernel_nt
|
||||
#define DGEMM_SMALL_KERNEL_TN dgemm_small_kernel_tn
|
||||
#define DGEMM_SMALL_KERNEL_TT dgemm_small_kernel_tt
|
||||
|
||||
#define DGEMM_SMALL_KERNEL_B0_NN dgemm_small_kernel_b0_nn
|
||||
#define DGEMM_SMALL_KERNEL_B0_NT dgemm_small_kernel_b0_nt
|
||||
#define DGEMM_SMALL_KERNEL_B0_TN dgemm_small_kernel_b0_tn
|
||||
#define DGEMM_SMALL_KERNEL_B0_TT dgemm_small_kernel_b0_tt
|
||||
|
||||
#else
|
||||
|
||||
#define DAMAX_K gotoblas -> damax_k
|
||||
@@ -174,6 +186,7 @@
|
||||
#define DDOTC_K gotoblas -> ddot_k
|
||||
#define DNRM2_K gotoblas -> dnrm2_k
|
||||
#define DSCAL_K gotoblas -> dscal_k
|
||||
#define DSUM_K gotoblas -> dsum_k
|
||||
#define DSWAP_K gotoblas -> dswap_k
|
||||
#define DROT_K gotoblas -> drot_k
|
||||
|
||||
|
||||
@@ -47,6 +47,7 @@
|
||||
|
||||
#define MB
|
||||
#define WMB
|
||||
#define RMB
|
||||
|
||||
#ifdef __ECC
|
||||
#include <ia64intrin.h>
|
||||
|
||||
@@ -122,6 +122,13 @@ xdouble BLASFUNC(qasum) (blasint *, xdouble *, blasint *);
|
||||
double BLASFUNC(dzasum)(blasint *, double *, blasint *);
|
||||
xdouble BLASFUNC(qxasum)(blasint *, xdouble *, blasint *);
|
||||
|
||||
FLOATRET BLASFUNC(ssum) (blasint *, float *, blasint *);
|
||||
FLOATRET BLASFUNC(scsum)(blasint *, float *, blasint *);
|
||||
double BLASFUNC(dsum) (blasint *, double *, blasint *);
|
||||
xdouble BLASFUNC(qsum) (blasint *, xdouble *, blasint *);
|
||||
double BLASFUNC(dzsum)(blasint *, double *, blasint *);
|
||||
xdouble BLASFUNC(qxsum)(blasint *, xdouble *, blasint *);
|
||||
|
||||
blasint BLASFUNC(isamax)(blasint *, float *, blasint *);
|
||||
blasint BLASFUNC(idamax)(blasint *, double *, blasint *);
|
||||
blasint BLASFUNC(iqamax)(blasint *, xdouble *, blasint *);
|
||||
@@ -462,6 +469,8 @@ void BLASFUNC(xhbmv)(char *, blasint *, blasint *, xdouble *, xdouble *, blasint
|
||||
|
||||
/* Level 3 routines */
|
||||
|
||||
void BLASFUNC(shgemm)(char *, char *, blasint *, blasint *, blasint *, float *,
|
||||
bfloat16 *, blasint *, bfloat16 *, blasint *, float *, float *, blasint *);
|
||||
void BLASFUNC(sgemm)(char *, char *, blasint *, blasint *, blasint *, float *,
|
||||
float *, blasint *, float *, blasint *, float *, float *, blasint *);
|
||||
void BLASFUNC(dgemm)(char *, char *, blasint *, blasint *, blasint *, double *,
|
||||
|
||||
146
common_lapack.h
146
common_lapack.h
@@ -293,4 +293,150 @@ blasint zlarf_R(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLO
|
||||
blasint xlarf_L(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
|
||||
blasint xlarf_R(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
|
||||
|
||||
blasint strtrs_UNU_single(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
|
||||
blasint strtrs_UNN_single(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
|
||||
blasint strtrs_UTU_single(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
|
||||
blasint strtrs_UTN_single(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
|
||||
blasint strtrs_LNU_single(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
|
||||
blasint strtrs_LNN_single(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
|
||||
blasint strtrs_LTU_single(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
|
||||
blasint strtrs_LTN_single(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
|
||||
blasint dtrtrs_UNU_single(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
|
||||
blasint dtrtrs_UNN_single(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
|
||||
blasint dtrtrs_UTU_single(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
|
||||
blasint dtrtrs_UTN_single(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
|
||||
blasint dtrtrs_LNU_single(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
|
||||
blasint dtrtrs_LNN_single(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
|
||||
blasint dtrtrs_LTU_single(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
|
||||
blasint dtrtrs_LTN_single(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
|
||||
blasint qtrtrs_UNU_single(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
|
||||
blasint qtrtrs_UNN_single(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
|
||||
blasint qtrtrs_UTU_single(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
|
||||
blasint qtrtrs_UTN_single(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
|
||||
blasint qtrtrs_LNU_single(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
|
||||
blasint qtrtrs_LNN_single(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
|
||||
blasint qtrtrs_LTU_single(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
|
||||
blasint qtrtrs_LTN_single(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
|
||||
blasint ctrtrs_UNU_single(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
|
||||
blasint ctrtrs_UNN_single(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
|
||||
blasint ctrtrs_UTU_single(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
|
||||
blasint ctrtrs_UTN_single(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
|
||||
blasint ctrtrs_URU_single(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
|
||||
blasint ctrtrs_URN_single(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
|
||||
blasint ctrtrs_UCU_single(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
|
||||
blasint ctrtrs_UCN_single(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
|
||||
blasint ctrtrs_LNU_single(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
|
||||
blasint ctrtrs_LNN_single(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
|
||||
blasint ctrtrs_LTU_single(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
|
||||
blasint ctrtrs_LTN_single(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
|
||||
blasint ctrtrs_LRU_single(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
|
||||
blasint ctrtrs_LRN_single(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
|
||||
blasint ctrtrs_LCU_single(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
|
||||
blasint ctrtrs_LCN_single(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
|
||||
blasint ztrtrs_UNU_single(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
|
||||
blasint ztrtrs_UNN_single(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
|
||||
blasint ztrtrs_UTU_single(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
|
||||
blasint ztrtrs_UTN_single(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
|
||||
blasint ztrtrs_URU_single(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
|
||||
blasint ztrtrs_URN_single(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
|
||||
blasint ztrtrs_UCU_single(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
|
||||
blasint ztrtrs_UCN_single(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
|
||||
blasint ztrtrs_LNU_single(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
|
||||
blasint ztrtrs_LNN_single(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
|
||||
blasint ztrtrs_LTU_single(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
|
||||
blasint ztrtrs_LTN_single(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
|
||||
blasint ztrtrs_LRU_single(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
|
||||
blasint ztrtrs_LRN_single(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
|
||||
blasint ztrtrs_LCU_single(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
|
||||
blasint ztrtrs_LCN_single(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
|
||||
blasint xtrtrs_UNU_single(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
|
||||
blasint xtrtrs_UNN_single(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
|
||||
blasint xtrtrs_UTU_single(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
|
||||
blasint xtrtrs_UTN_single(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
|
||||
blasint xtrtrs_URU_single(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
|
||||
blasint xtrtrs_URN_single(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
|
||||
blasint xtrtrs_UCU_single(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
|
||||
blasint xtrtrs_UCN_single(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
|
||||
blasint xtrtrs_LNU_single(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
|
||||
blasint xtrtrs_LNN_single(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
|
||||
blasint xtrtrs_LTU_single(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
|
||||
blasint xtrtrs_LTN_single(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
|
||||
blasint xtrtrs_LRU_single(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
|
||||
blasint xtrtrs_LRN_single(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
|
||||
blasint xtrtrs_LCU_single(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
|
||||
blasint xtrtrs_LCN_single(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
|
||||
|
||||
blasint strtrs_UNU_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
|
||||
blasint strtrs_UNN_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
|
||||
blasint strtrs_UTU_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
|
||||
blasint strtrs_UTN_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
|
||||
blasint strtrs_LNU_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
|
||||
blasint strtrs_LNN_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
|
||||
blasint strtrs_LTU_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
|
||||
blasint strtrs_LTN_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
|
||||
blasint dtrtrs_UNU_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
|
||||
blasint dtrtrs_UNN_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
|
||||
blasint dtrtrs_UTU_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
|
||||
blasint dtrtrs_UTN_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
|
||||
blasint dtrtrs_LNU_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
|
||||
blasint dtrtrs_LNN_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
|
||||
blasint dtrtrs_LTU_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
|
||||
blasint dtrtrs_LTN_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
|
||||
blasint qtrtrs_UNU_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
|
||||
blasint qtrtrs_UNN_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
|
||||
blasint qtrtrs_UTU_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
|
||||
blasint qtrtrs_UTN_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
|
||||
blasint qtrtrs_LNU_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
|
||||
blasint qtrtrs_LNN_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
|
||||
blasint qtrtrs_LTU_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
|
||||
blasint qtrtrs_LTN_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
|
||||
blasint ctrtrs_UNU_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
|
||||
blasint ctrtrs_UNN_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
|
||||
blasint ctrtrs_UTU_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
|
||||
blasint ctrtrs_UTN_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
|
||||
blasint ctrtrs_URU_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
|
||||
blasint ctrtrs_URN_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
|
||||
blasint ctrtrs_UCU_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
|
||||
blasint ctrtrs_UCN_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
|
||||
blasint ctrtrs_LNU_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
|
||||
blasint ctrtrs_LNN_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
|
||||
blasint ctrtrs_LTU_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
|
||||
blasint ctrtrs_LTN_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
|
||||
blasint ctrtrs_LRU_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
|
||||
blasint ctrtrs_LRN_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
|
||||
blasint ctrtrs_LCU_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
|
||||
blasint ctrtrs_LCN_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
|
||||
blasint ztrtrs_UNU_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
|
||||
blasint ztrtrs_UNN_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
|
||||
blasint ztrtrs_UTU_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
|
||||
blasint ztrtrs_UTN_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
|
||||
blasint ztrtrs_URU_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
|
||||
blasint ztrtrs_URN_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
|
||||
blasint ztrtrs_UCU_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
|
||||
blasint ztrtrs_UCN_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
|
||||
blasint ztrtrs_LNU_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
|
||||
blasint ztrtrs_LNN_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
|
||||
blasint ztrtrs_LTU_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
|
||||
blasint ztrtrs_LTN_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
|
||||
blasint ztrtrs_LRU_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
|
||||
blasint ztrtrs_LRN_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
|
||||
blasint ztrtrs_LCU_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
|
||||
blasint ztrtrs_LCN_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
|
||||
blasint xtrtrs_UNU_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
|
||||
blasint xtrtrs_UNN_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
|
||||
blasint xtrtrs_UTU_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
|
||||
blasint xtrtrs_UTN_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
|
||||
blasint xtrtrs_URU_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
|
||||
blasint xtrtrs_URN_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
|
||||
blasint xtrtrs_UCU_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
|
||||
blasint xtrtrs_UCN_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
|
||||
blasint xtrtrs_LNU_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
|
||||
blasint xtrtrs_LNN_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
|
||||
blasint xtrtrs_LTU_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
|
||||
blasint xtrtrs_LTN_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
|
||||
blasint xtrtrs_LRU_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
|
||||
blasint xtrtrs_LRN_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
|
||||
blasint xtrtrs_LCU_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
|
||||
blasint xtrtrs_LCN_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
|
||||
|
||||
#endif
|
||||
|
||||
@@ -100,6 +100,13 @@ float casum_k (BLASLONG, float *, BLASLONG);
|
||||
double zasum_k (BLASLONG, double *, BLASLONG);
|
||||
xdouble xasum_k (BLASLONG, xdouble *, BLASLONG);
|
||||
|
||||
float ssum_k (BLASLONG, float *, BLASLONG);
|
||||
double dsum_k (BLASLONG, double *, BLASLONG);
|
||||
xdouble qsum_k (BLASLONG, xdouble *, BLASLONG);
|
||||
float csum_k (BLASLONG, float *, BLASLONG);
|
||||
double zsum_k (BLASLONG, double *, BLASLONG);
|
||||
xdouble xsum_k (BLASLONG, xdouble *, BLASLONG);
|
||||
|
||||
float samax_k (BLASLONG, float *, BLASLONG);
|
||||
double damax_k (BLASLONG, double *, BLASLONG);
|
||||
xdouble qamax_k (BLASLONG, xdouble *, BLASLONG);
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user