Compare commits
1135 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
5f998efd7b | ||
|
|
88a35ff457 | ||
|
|
5dde4e65d3 | ||
|
|
27a9df6477 | ||
|
|
7224022473 | ||
|
|
468ac3df9e | ||
|
|
376048156b | ||
|
|
d1c5b8f913 | ||
|
|
91bde7d315 | ||
|
|
80373ea039 | ||
|
|
d12b75a6c4 | ||
|
|
7294fb1d9d | ||
|
|
31e086d6a6 | ||
|
|
cbb47736af | ||
|
|
2a7c6930ac | ||
|
|
c4ec882020 | ||
|
|
d33fc32cf3 | ||
|
|
529bfc36ec | ||
|
|
88249ca5f7 | ||
|
|
731c518cff | ||
|
|
29fc429d9a | ||
|
|
e2d3b1561a | ||
|
|
4a012c3d20 | ||
|
|
d5ef0dee9a | ||
|
|
a590e6135c | ||
|
|
4239dd65ce | ||
|
|
3db2adf872 | ||
|
|
ad2462811a | ||
|
|
c1cf62d2c0 | ||
|
|
bfe1656b8b | ||
|
|
f02d535fde | ||
|
|
49e62c0e77 | ||
|
|
3381f23709 | ||
|
|
fa6a920caa | ||
|
|
a6515bb858 | ||
|
|
c66b842d66 | ||
|
|
df2dfe65d6 | ||
|
|
2c8d634619 | ||
|
|
37efb5bc1d | ||
|
|
97d671eb61 | ||
|
|
305cd2e8b4 | ||
|
|
09bc6ebe5b | ||
|
|
872a11a2bf | ||
|
|
eda9e8632a | ||
|
|
8f83d3f961 | ||
|
|
e5e47cfdb5 | ||
|
|
ebf9e9dabe | ||
|
|
83bd547517 | ||
|
|
e25f4c01d6 | ||
|
|
54915ce343 | ||
|
|
0150fabdb6 | ||
|
|
4f0773f07d | ||
|
|
aa5edebc80 | ||
|
|
89924b3d5b | ||
|
|
da7f0ff425 | ||
|
|
0d5c8e5386 | ||
|
|
912410f214 | ||
|
|
b122413fb0 | ||
|
|
9b7b5f7fdc | ||
|
|
34513be726 | ||
|
|
482015f8d6 | ||
|
|
639000e34f | ||
|
|
5de7727cc7 | ||
|
|
96df4b9b17 | ||
|
|
29dc8e0c61 | ||
|
|
65e56cb29d | ||
|
|
bd831a03a8 | ||
|
|
edc97918f8 | ||
|
|
e0034de22d | ||
|
|
32c7fe6bff | ||
|
|
19bdf9d52b | ||
|
|
4f09030fdc | ||
|
|
6f4eca5ea4 | ||
|
|
be55f96cbd | ||
|
|
96dd0ef4f7 | ||
|
|
8f0d6c06a9 | ||
|
|
410a07cbec | ||
|
|
72f95a0acc | ||
|
|
e545b81e76 | ||
|
|
d7afdf9137 | ||
|
|
4f4daaa42a | ||
|
|
42bbe74791 | ||
|
|
c8322c65e4 | ||
|
|
87dde1fde6 | ||
|
|
42466e54fa | ||
|
|
3b0624d50f | ||
|
|
fd4e68128e | ||
|
|
6464d1723a | ||
|
|
59c97cfee4 | ||
|
|
de7875ca5d | ||
|
|
67836c2ab4 | ||
|
|
5fecfe0f42 | ||
|
|
bba6676803 | ||
|
|
5649b2c53a | ||
|
|
6e972994b2 | ||
|
|
5b04cf7ab4 | ||
|
|
d5ea8fd823 | ||
|
|
4beffaaa4b | ||
|
|
fb28e4adc9 | ||
|
|
26faa3ca47 | ||
|
|
4f75989634 | ||
|
|
1e06b49854 | ||
|
|
7f546f54fa | ||
|
|
a809431e34 | ||
|
|
5ee1cf0223 | ||
|
|
9aea7a0d9a | ||
|
|
da0987507c | ||
|
|
81fed55782 | ||
|
|
35387edb8d | ||
|
|
9c884986ad | ||
|
|
f2f0e98bb5 | ||
|
|
166d64eb7c | ||
|
|
e078339e8d | ||
|
|
832a272784 | ||
|
|
356606314c | ||
|
|
ed79a29d87 | ||
|
|
77d16ffc69 | ||
|
|
56762d5e4c | ||
|
|
90dd190a6d | ||
|
|
ab9ec4ab4e | ||
|
|
0cbd2d34e4 | ||
|
|
62979fd104 | ||
|
|
20a413e154 | ||
|
|
dc40bc7368 | ||
|
|
1acfc78c8f | ||
|
|
b4071d0d16 | ||
|
|
7908efafc8 | ||
|
|
66dc10b019 | ||
|
|
b5c96fcfcd | ||
|
|
c9ff735da6 | ||
|
|
99880f7906 | ||
|
|
cd135e2b59 | ||
|
|
ad124a5e8b | ||
|
|
211d2eceb5 | ||
|
|
5813ed095b | ||
|
|
e44b028fe5 | ||
|
|
a6efabf155 | ||
|
|
ea26b00c06 | ||
|
|
08786c4b95 | ||
|
|
12e476f7a2 | ||
|
|
8de40955ad | ||
|
|
9b24688eed | ||
|
|
43224f7273 | ||
|
|
9254a701f3 | ||
|
|
26a614fdd1 | ||
|
|
7ae64f4f9c | ||
|
|
90e02ccf68 | ||
|
|
503dcbfde6 | ||
|
|
82e80fa82b | ||
|
|
4227049c7d | ||
|
|
688267edf3 | ||
|
|
d1fe040d9b | ||
|
|
411982715c | ||
|
|
e831d6924e | ||
|
|
ffc1d6c468 | ||
|
|
a86474c6f7 | ||
|
|
67473d09dd | ||
|
|
19ba133383 | ||
|
|
f09a9afa03 | ||
|
|
0d96b0e2a7 | ||
|
|
848cb27b1e | ||
|
|
dc34a0da96 | ||
|
|
a3935f0dfb | ||
|
|
47e9fe0bb4 | ||
|
|
c7bc0ee823 | ||
|
|
6bdee6d50a | ||
|
|
009c0d2e5a | ||
|
|
4d88e1a4ad | ||
|
|
0958b49811 | ||
|
|
09b240f1ef | ||
|
|
69f4e8b86c | ||
|
|
e072e68aa0 | ||
|
|
738628e9a8 | ||
|
|
e527dbffaa | ||
|
|
eeaee46e86 | ||
|
|
040672ecf6 | ||
|
|
c8ce9e4377 | ||
|
|
ab3ffab96a | ||
|
|
f036be9ce2 | ||
|
|
39eecfd20c | ||
|
|
5088523786 | ||
|
|
3f7720ec4b | ||
|
|
faba876fda | ||
|
|
172a62d73e | ||
|
|
e545a66a5b | ||
|
|
228c75a69c | ||
|
|
9e2f316ede | ||
|
|
e2489c9a92 | ||
|
|
c4ea9eea67 | ||
|
|
cd8f80634f | ||
|
|
faf06f0d8b | ||
|
|
c6fa4aef0c | ||
|
|
1029dcd60d | ||
|
|
d12c8bbcbb | ||
|
|
15f0d65010 | ||
|
|
7d831af1ba | ||
|
|
ee3e87cf46 | ||
|
|
8772c00bb0 | ||
|
|
0a4a7e18f6 | ||
|
|
357ef3cd8c | ||
|
|
002e646476 | ||
|
|
3dad87bbb5 | ||
|
|
bdd51cdabc | ||
|
|
1d8ab99e09 | ||
|
|
04b2b06665 | ||
|
|
8a83daf4bf | ||
|
|
39abb079fb | ||
|
|
76c6e33e54 | ||
|
|
a9594e8072 | ||
|
|
8e89668f62 | ||
|
|
f63deae9de | ||
|
|
4c2b713ce5 | ||
|
|
cdc954675c | ||
|
|
60eea75409 | ||
|
|
071a830e8b | ||
|
|
d09f88192c | ||
|
|
e58233460a | ||
|
|
3918d17025 | ||
|
|
99bd2892bf | ||
|
|
ff6f572f2e | ||
|
|
e0dc5f58c5 | ||
|
|
2757b49767 | ||
|
|
ff41e13385 | ||
|
|
1de6fa0f50 | ||
|
|
efda640723 | ||
|
|
1530e78cfe | ||
|
|
907e286eb6 | ||
|
|
cde3aee08b | ||
|
|
ee6ea7e988 | ||
|
|
ca0b36b012 | ||
|
|
01e1d85339 | ||
|
|
d0a79ca6e0 | ||
|
|
0c07003ccf | ||
|
|
f33fcedb30 | ||
|
|
0f1d6e8b39 | ||
|
|
981064acc6 | ||
|
|
ab2033f2db | ||
|
|
a4d97d980f | ||
|
|
f279ff4789 | ||
|
|
759f37feba | ||
|
|
e8d0e66982 | ||
|
|
331fd51260 | ||
|
|
0863a0d4b4 | ||
|
|
2e5f906f41 | ||
|
|
d1a97bad39 | ||
|
|
28e2fab33e | ||
|
|
752fdc6f82 | ||
|
|
c1c5a63d3c | ||
|
|
209b63197e | ||
|
|
4b55fae337 | ||
|
|
738d622feb | ||
|
|
95649dee28 | ||
|
|
3a8c5180b9 | ||
|
|
7611a41f40 | ||
|
|
1a39b92b1d | ||
|
|
dd6212e684 | ||
|
|
9bcf50872b | ||
|
|
c81dc6322f | ||
|
|
8fdb0655e9 | ||
|
|
fb200c7245 | ||
|
|
0b8e876d89 | ||
|
|
4713e7c47f | ||
|
|
6085386b10 | ||
|
|
002b41f024 | ||
|
|
84b8170bfb | ||
|
|
1aa1e6cb54 | ||
|
|
cbd2bf1f6e | ||
|
|
9f5cfd43dc | ||
|
|
1480f3df71 | ||
|
|
88afb3bc94 | ||
|
|
2ffbbb54f6 | ||
|
|
3e1bbd6b5f | ||
|
|
b678471d65 | ||
|
|
864e202afd | ||
|
|
b9bb009236 | ||
|
|
a2672d5589 | ||
|
|
c2496d8f48 | ||
|
|
fb0afdaf99 | ||
|
|
51aa157e64 | ||
|
|
87c7d10b34 | ||
|
|
d0035b857d | ||
|
|
c61a7cd293 | ||
|
|
a8bb5003de | ||
|
|
9a48adff3f | ||
|
|
823a40a110 | ||
|
|
0bd706ac8d | ||
|
|
8379550076 | ||
|
|
fc148b7e4d | ||
|
|
5bb2b91a03 | ||
|
|
abc3304587 | ||
|
|
a836fe8ec1 | ||
|
|
1153e3ac39 | ||
|
|
7c2c488c23 | ||
|
|
ae4ac6f984 | ||
|
|
4494d03a21 | ||
|
|
d35baf30cf | ||
|
|
24efbbd339 | ||
|
|
8cd46acebb | ||
|
|
9e4b6971e2 | ||
|
|
0ef7841473 | ||
|
|
7f2a959e3e | ||
|
|
6418667818 | ||
|
|
104ad066af | ||
|
|
a9bf8a781a | ||
|
|
8f9975e013 | ||
|
|
e7bd736802 | ||
|
|
14571d8b08 | ||
|
|
82d829c670 | ||
|
|
32ca9a9f68 | ||
|
|
c732f1a066 | ||
|
|
5f93aa5f87 | ||
|
|
9db451acd0 | ||
|
|
3eaff85191 | ||
|
|
db3efb2e14 | ||
|
|
19934bea8c | ||
|
|
433edc74ab | ||
|
|
99a6289d3c | ||
|
|
e7732d9941 | ||
|
|
fa216717bf | ||
|
|
f8a6e6cce4 | ||
|
|
00abce3b93 | ||
|
|
a16ace68f5 | ||
|
|
d4da3fbe9f | ||
|
|
b590cd45fc | ||
|
|
10a62cb595 | ||
|
|
9dbdc7b2cf | ||
|
|
596ead0f8d | ||
|
|
60816c9259 | ||
|
|
aaba3e483f | ||
|
|
c295d61e82 | ||
|
|
570bc9afbd | ||
|
|
f00baf32a6 | ||
|
|
8e7f2809af | ||
|
|
81ac8aab81 | ||
|
|
ce372062d6 | ||
|
|
6c7b9b74f6 | ||
|
|
f5b028eb37 | ||
|
|
cebcca9987 | ||
|
|
084e4573c1 | ||
|
|
becf8bc7a0 | ||
|
|
4e54d8ab7f | ||
|
|
6f58271190 | ||
|
|
311e0a912c | ||
|
|
429dfd83ee | ||
|
|
5af06c764a | ||
|
|
7cd26f7e38 | ||
|
|
77006cc2a3 | ||
|
|
57eee3fa43 | ||
|
|
92a858e69e | ||
|
|
13d40e7591 | ||
|
|
4c29d20108 | ||
|
|
1357b8d93b | ||
|
|
fb7057babe | ||
|
|
358ee318ed | ||
|
|
4c024b85e4 | ||
|
|
e9ccbe738c | ||
|
|
e54b6ddaa0 | ||
|
|
bcfc298c38 | ||
|
|
ce7c6c6b2d | ||
|
|
f3419e634c | ||
|
|
7472c79ea6 | ||
|
|
66c9a9b33d | ||
|
|
3705f5675a | ||
|
|
bce2b34f7a | ||
|
|
da83ec94d1 | ||
|
|
3409bccb21 | ||
|
|
8a8f3932eb | ||
|
|
90e2321ac3 | ||
|
|
4998e19869 | ||
|
|
ff1da01476 | ||
|
|
ef52a9266b | ||
|
|
4f38ae3199 | ||
|
|
4baf0c7cfc | ||
|
|
595a0224e4 | ||
|
|
f124ffab47 | ||
|
|
91610f3835 | ||
|
|
6e22ecf102 | ||
|
|
6221d6df5f | ||
|
|
117d3371d4 | ||
|
|
16446d1d23 | ||
|
|
a6e9e0b94b | ||
|
|
3178e4fea0 | ||
|
|
95c245ddb0 | ||
|
|
4b1b27347f | ||
|
|
161c927071 | ||
|
|
662f89f059 | ||
|
|
54747fe24a | ||
|
|
157ee498ac | ||
|
|
b09cc3b9bb | ||
|
|
6c0862a94f | ||
|
|
842d842751 | ||
|
|
85636ff1a0 | ||
|
|
821affb9a0 | ||
|
|
515bc56ea9 | ||
|
|
ae70b916f4 | ||
|
|
9ea0144482 | ||
|
|
1f217a6175 | ||
|
|
78348a2853 | ||
|
|
9687437928 | ||
|
|
d1c6469283 | ||
|
|
b544be914d | ||
|
|
c54a29bb48 | ||
|
|
ff4c5deafa | ||
|
|
22b9c2747d | ||
|
|
27b5211ccd | ||
|
|
beb1d076a4 | ||
|
|
9e44f3ddd0 | ||
|
|
eece9fd889 | ||
|
|
5dfa0712c3 | ||
|
|
8a592ee386 | ||
|
|
7f2409a8e1 | ||
|
|
7f28cd1f88 | ||
|
|
154729908e | ||
|
|
97bd1e42c8 | ||
|
|
7de829f713 | ||
|
|
9b69d8a8e5 | ||
|
|
0a5ff9f9f9 | ||
|
|
8a40f1355e | ||
|
|
78782485b6 | ||
|
|
8d86d14d3f | ||
|
|
925d4e1dc6 | ||
|
|
57df7956ee | ||
|
|
437c7d64f2 | ||
|
|
ca5c25c870 | ||
|
|
4a30a2584a | ||
|
|
098d8ec5d6 | ||
|
|
a94f2b7848 | ||
|
|
d346c533b1 | ||
|
|
f04af36ad0 | ||
|
|
41000c8443 | ||
|
|
011431b9d7 | ||
|
|
c8a7860eb3 | ||
|
|
2daad2bcb5 | ||
|
|
bac478d17e | ||
|
|
053044ae4d | ||
|
|
fca66262c4 | ||
|
|
412bcd187a | ||
|
|
bd06b246cc | ||
|
|
8b140220c8 | ||
|
|
318cad9c37 | ||
|
|
8fb5a1aaff | ||
|
|
7d0358475d | ||
|
|
b46f680f01 | ||
|
|
ad9f317870 | ||
|
|
a8fcd89d6d | ||
|
|
232335fd49 | ||
|
|
e12cff87b8 | ||
|
|
c4ba40e308 | ||
|
|
7a19065369 | ||
|
|
8a149e6294 | ||
|
|
956be69e1d | ||
|
|
6a2bde7a2d | ||
|
|
d7cbc7ac13 | ||
|
|
8bf71e9e06 | ||
|
|
40af513669 | ||
|
|
88011f625d | ||
|
|
8310d4d3f7 | ||
|
|
5faffc123f | ||
|
|
81794ccb9a | ||
|
|
edb5980c13 | ||
|
|
573d9218f2 | ||
|
|
7e549d5f37 | ||
|
|
085cf236c2 | ||
|
|
0d1c695508 | ||
|
|
efaf30d536 | ||
|
|
b7b3d8ec8e | ||
|
|
2df60f7315 | ||
|
|
cd7af5260a | ||
|
|
3a2e8c3537 | ||
|
|
56948dbf0f | ||
|
|
0fb380c966 | ||
|
|
c95f5008fe | ||
|
|
708dec5bb7 | ||
|
|
20b0ed1da5 | ||
|
|
6f43310de5 | ||
|
|
782f75ba94 | ||
|
|
0d0c6f7d7d | ||
|
|
0551e571dd | ||
|
|
7e253607c2 | ||
|
|
b5e98e4dda | ||
|
|
3d50ccdc0d | ||
|
|
6abec09eb4 | ||
|
|
40ac64ae4f | ||
|
|
298b13bba4 | ||
|
|
78b05f6476 | ||
|
|
2b967590a0 | ||
|
|
91b4233e06 | ||
|
|
089aad57f7 | ||
|
|
a3da10662f | ||
|
|
d46f07bb4e | ||
|
|
a670e8061e | ||
|
|
879a51165f | ||
|
|
2c3dfe2bf3 | ||
|
|
ef30e52c8f | ||
|
|
dd2b897795 | ||
|
|
9276c9012f | ||
|
|
391584af85 | ||
|
|
6fbca2a4a1 | ||
|
|
0001260f4b | ||
|
|
3c6294ca3d | ||
|
|
dd43661cfd | ||
|
|
9253dadaa7 | ||
|
|
12ab1804b6 | ||
|
|
1e03a62b67 | ||
|
|
faa73690e4 | ||
|
|
f24d5307cf | ||
|
|
8037d78eed | ||
|
|
1ca750471a | ||
|
|
0a4276bc2f | ||
|
|
08bddde3f3 | ||
|
|
e173c51c04 | ||
|
|
9c42f0374a | ||
|
|
d4380c1fe4 | ||
|
|
a51102e9b7 | ||
|
|
7282419525 | ||
|
|
c5b1fbcb2e | ||
|
|
e1cdd15b30 | ||
|
|
d4c0330967 | ||
|
|
12540cedb5 | ||
|
|
99adc8b062 | ||
|
|
6a9bbfc227 | ||
|
|
3349e9debd | ||
|
|
dd7612358d | ||
|
|
e5a6ef3808 | ||
|
|
7aac0aff8e | ||
|
|
26d7f06206 | ||
|
|
68a69c5b50 | ||
|
|
a571359afd | ||
|
|
c2464a7c4a | ||
|
|
294f933869 | ||
|
|
f59c9bd6ef | ||
|
|
c53be46d78 | ||
|
|
bbb2d73d73 | ||
|
|
659ed16591 | ||
|
|
35c98a3556 | ||
|
|
f1a5dd06c5 | ||
|
|
e125a3dc33 | ||
|
|
35f1f21a7f | ||
|
|
7b4b7179ba | ||
|
|
7a92c1538e | ||
|
|
5727268141 | ||
|
|
3d9a50e841 | ||
|
|
828c849b44 | ||
|
|
ecc0bc9813 | ||
|
|
12f209b7b0 | ||
|
|
7316a87930 | ||
|
|
0bff057a87 | ||
|
|
7ee1d29dd4 | ||
|
|
1e6cf9808c | ||
|
|
278511ad2d | ||
|
|
3b5ffb49d3 | ||
|
|
8519e4ed9f | ||
|
|
55eda3813b | ||
|
|
53bfc83c26 | ||
|
|
13ca89f6f0 | ||
|
|
461cf9ea38 | ||
|
|
0664ba4c97 | ||
|
|
aa744dfa59 | ||
|
|
61cf8f74d9 | ||
|
|
de202fa375 | ||
|
|
6f93b53590 | ||
|
|
11c44dede1 | ||
|
|
f00d642592 | ||
|
|
9e4584d069 | ||
|
|
2a5679da5f | ||
|
|
a71e8c82f6 | ||
|
|
9b987badb0 | ||
|
|
1619b2f3c8 | ||
|
|
4f3153395a | ||
|
|
d7a1a7ff2a | ||
|
|
308e6195b7 | ||
|
|
7a3d7b1f52 | ||
|
|
74cc2d6623 | ||
|
|
fc3a558515 | ||
|
|
cd9fafc054 | ||
|
|
84b92e6373 | ||
|
|
c279a53ed8 | ||
|
|
e1df5a6e23 | ||
|
|
5c658f8746 | ||
|
|
ec4390a967 | ||
|
|
fced5744fb | ||
|
|
8c0fb1258d | ||
|
|
aae581d004 | ||
|
|
e17303933a | ||
|
|
f9226275f4 | ||
|
|
cf8c7e28b3 | ||
|
|
5ac02f6dc7 | ||
|
|
7aa1ad4923 | ||
|
|
dcd15b546c | ||
|
|
96284ab295 | ||
|
|
d5e1255ca7 | ||
|
|
587455868e | ||
|
|
323c237e7b | ||
|
|
faa5e2e5e3 | ||
|
|
551fdf53e8 | ||
|
|
fdf291be30 | ||
|
|
68eb4fa329 | ||
|
|
05196a8497 | ||
|
|
db9b611b12 | ||
|
|
2e6333f74e | ||
|
|
c99cc41cbd | ||
|
|
711ecb8bd5 | ||
|
|
10c2ebdfc5 | ||
|
|
26b3b3a3e6 | ||
|
|
acdff55a6a | ||
|
|
7d6b68eb4a | ||
|
|
0bbca5e803 | ||
|
|
cd5241d0cf | ||
|
|
8d652f11e7 | ||
|
|
6c86570e1f | ||
|
|
53ba1a77c8 | ||
|
|
d23c7c713c | ||
|
|
8c43d7fa5f | ||
|
|
085f215257 | ||
|
|
8f758eeff9 | ||
|
|
0afc76fd65 | ||
|
|
91e1c5080c | ||
|
|
73f04c2c72 | ||
|
|
3e633152c6 | ||
|
|
d5130ce7e3 | ||
|
|
4824b88fcb | ||
|
|
cc26d888b8 | ||
|
|
8577be2a95 | ||
|
|
1edf30b790 | ||
|
|
b752858d6c | ||
|
|
4fc8c937d4 | ||
|
|
efa4f5c936 | ||
|
|
17d655fa64 | ||
|
|
f68141cf1d | ||
|
|
aa90518201 | ||
|
|
6b85dbb6dc | ||
|
|
a0debd4293 | ||
|
|
937493bfeb | ||
|
|
74b0672223 | ||
|
|
6e7be06e07 | ||
|
|
a04d0555ba | ||
|
|
3761c30ba4 | ||
|
|
38593cd3a3 | ||
|
|
e3b7781c2b | ||
|
|
5e6965ea47 | ||
|
|
5cc0301fc3 | ||
|
|
19a6dedfd6 | ||
|
|
0e2b92e216 | ||
|
|
d06b92906a | ||
|
|
8e98478ff3 | ||
|
|
fb8968fb83 | ||
|
|
dae6b82a71 | ||
|
|
d73244b825 | ||
|
|
233c6b959f | ||
|
|
16ec5323c9 | ||
|
|
0ad02ef2d6 | ||
|
|
73397faf68 | ||
|
|
5fc2203d8a | ||
|
|
78dcf5c3d5 | ||
|
|
32f793195f | ||
|
|
be4e5fcd20 | ||
|
|
855e0cb700 | ||
|
|
7f7d04dcd2 | ||
|
|
4e1b521e27 | ||
|
|
a1a96589aa | ||
|
|
0e68beb89f | ||
|
|
926ba8b7ca | ||
|
|
9f080c47e1 | ||
|
|
52eba814ce | ||
|
|
935356c34f | ||
|
|
ff9388d625 | ||
|
|
4f05c23673 | ||
|
|
4a1263f609 | ||
|
|
962376664d | ||
|
|
5fef0d1b75 | ||
|
|
578f471808 | ||
|
|
5a8447e97e | ||
|
|
be95bdaf47 | ||
|
|
c44ff4d648 | ||
|
|
e003a1294c | ||
|
|
44062517eb | ||
|
|
13f0f8c10e | ||
|
|
f5df444ceb | ||
|
|
e382713423 | ||
|
|
aaa8551c57 | ||
|
|
0d87c1ffb6 | ||
|
|
0b194426f8 | ||
|
|
63a7d7fb24 | ||
|
|
b4ede558a5 | ||
|
|
de3e2d4349 | ||
|
|
a0e51e96f1 | ||
|
|
d6afac9624 | ||
|
|
c2891330bc | ||
|
|
ceaa931e48 | ||
|
|
eaa63165df | ||
|
|
c65357c566 | ||
|
|
e63e9f9f26 | ||
|
|
1fe3aab047 | ||
|
|
aafd3ab60e | ||
|
|
1a1935507b | ||
|
|
d2f84c9c8a | ||
|
|
ca32253f32 | ||
|
|
9066d1f982 | ||
|
|
8d85be770d | ||
|
|
7ba1d9b9ca | ||
|
|
31aff441ce | ||
|
|
e737e32fd1 | ||
|
|
8635d425c1 | ||
|
|
97cd4b8aee | ||
|
|
72390e3ffb | ||
|
|
b07d733a71 | ||
|
|
fa3018c30e | ||
|
|
6caa40302e | ||
|
|
a48b247e9e | ||
|
|
b1b115ecd6 | ||
|
|
07bba933ff | ||
|
|
e85f8af519 | ||
|
|
adfa0ab878 | ||
|
|
cbb6649e97 | ||
|
|
77abc9b280 | ||
|
|
81e8690763 | ||
|
|
dd04a8ac22 | ||
|
|
cb554b3a9c | ||
|
|
1153459d1b | ||
|
|
1a73390ffe | ||
|
|
8b981e41a1 | ||
|
|
c10b1f555d | ||
|
|
14db1ca508 | ||
|
|
66eafb16cf | ||
|
|
3ae30cd6b9 | ||
|
|
692d9c881c | ||
|
|
055b481386 | ||
|
|
ce2b1edd4e | ||
|
|
8cf3657fb6 | ||
|
|
44222a7fe0 | ||
|
|
3ac153180c | ||
|
|
96b486acee | ||
|
|
3602a2cd1f | ||
|
|
b65de4947a | ||
|
|
04ad946fc8 | ||
|
|
f704b8d32f | ||
|
|
708ad330ac | ||
|
|
c6a27bbe64 | ||
|
|
f16b4f10b6 | ||
|
|
87a2ccc37c | ||
|
|
e3e20e2242 | ||
|
|
594b9f4c73 | ||
|
|
c96c6a26fd | ||
|
|
c8f2c5d636 | ||
|
|
5f2fa15e04 | ||
|
|
7d144aaabc | ||
|
|
f9890a6452 | ||
|
|
2c7143459f | ||
|
|
3857581adf | ||
|
|
e9754e6250 | ||
|
|
76398c3233 | ||
|
|
ba024fcfc0 | ||
|
|
b9b52c295d | ||
|
|
285d042b10 | ||
|
|
01db7908b8 | ||
|
|
5f75df40d5 | ||
|
|
b3f100dc25 | ||
|
|
2f65aad626 | ||
|
|
25116788ef | ||
|
|
958f0de65e | ||
|
|
5d212f66a7 | ||
|
|
f88ee18409 | ||
|
|
d22917a58a | ||
|
|
640cccc2b1 | ||
|
|
fba6532502 | ||
|
|
da7f69e8f4 | ||
|
|
044fb91ea5 | ||
|
|
b4380acf77 | ||
|
|
d1dd4e302e | ||
|
|
318f0949c3 | ||
|
|
299cdcdc29 | ||
|
|
a8516c5b47 | ||
|
|
c40538eaeb | ||
|
|
33e37d01b3 | ||
|
|
64db4576e6 | ||
|
|
0d22551a6b | ||
|
|
1d121852c1 | ||
|
|
98965da2e8 | ||
|
|
39937d15cd | ||
|
|
a9d7eee0dc | ||
|
|
1367a64d09 | ||
|
|
e31948ceb0 | ||
|
|
233ec2a1cc | ||
|
|
a4c6a88a65 | ||
|
|
faf0811483 | ||
|
|
4e4a3e783f | ||
|
|
d00ada378f | ||
|
|
41407acc19 | ||
|
|
67874468a6 | ||
|
|
c99c43d51e | ||
|
|
1397b47197 | ||
|
|
45f78963ac | ||
|
|
402443bf9c | ||
|
|
19fdbee291 | ||
|
|
3b0cdfab1e | ||
|
|
46efa6a1da | ||
|
|
ea1465cdf8 | ||
|
|
fb4be3b3eb | ||
|
|
6c2f4ddbcd | ||
|
|
870c4d49c0 | ||
|
|
cd7684097c | ||
|
|
2690b71b1f | ||
|
|
3e4acedf0e | ||
|
|
2610752dbb | ||
|
|
dbb213655e | ||
|
|
9742dba595 | ||
|
|
f2f8a0fe8b | ||
|
|
55a0b27c01 | ||
|
|
fbc21266e6 | ||
|
|
c053559ed9 | ||
|
|
55e4332f00 | ||
|
|
a550431ee6 | ||
|
|
3e8d6ea74f | ||
|
|
839395fc25 | ||
|
|
1331642f24 | ||
|
|
1e0bbea868 | ||
|
|
ccf41ebf78 | ||
|
|
53e849f4fc | ||
|
|
8447498b50 | ||
|
|
b6519159f5 | ||
|
|
63c56d3da9 | ||
|
|
718d0f18e3 | ||
|
|
6040858b22 | ||
|
|
70642fe4ed | ||
|
|
79d4a62e10 | ||
|
|
1ac8c32f1d | ||
|
|
0b2ad98e48 | ||
|
|
69363622a8 | ||
|
|
e6d754fddc | ||
|
|
f74ff6da38 | ||
|
|
2feef49fa8 | ||
|
|
53b6023a6c | ||
|
|
309875de3c | ||
|
|
b809f99cee | ||
|
|
5a291606ad | ||
|
|
1ce054fcb3 | ||
|
|
8fade093aa | ||
|
|
96f0bbe067 | ||
|
|
d8392c1245 | ||
|
|
aca7d7e953 | ||
|
|
94b125255f | ||
|
|
3684706a12 | ||
|
|
90aa8e24b9 | ||
|
|
11ac4665c8 | ||
|
|
c666158b79 | ||
|
|
ccf581f94d | ||
|
|
e9493f69eb | ||
|
|
88bef3bffc | ||
|
|
f27942a68a | ||
|
|
857899526f | ||
|
|
0cc2b3de0b | ||
|
|
b9534bbd76 | ||
|
|
45c8b5e756 | ||
|
|
a96a4cb012 | ||
|
|
baec8f5cac | ||
|
|
d6e8459f20 | ||
|
|
dfe1eef33b | ||
|
|
cc7cab8a45 | ||
|
|
61ae47eb99 | ||
|
|
22353b1727 | ||
|
|
efffd28739 | ||
|
|
62cabef857 | ||
|
|
711ca33bc6 | ||
|
|
40a3fed6b8 | ||
|
|
2297a2d989 | ||
|
|
5408074941 | ||
|
|
bbcdf63bb4 | ||
|
|
43eabab62f | ||
|
|
50901943fd | ||
|
|
7df0820160 | ||
|
|
17ee2237c3 | ||
|
|
4b7381b7a4 | ||
|
|
abade3f896 | ||
|
|
d1349e7a11 | ||
|
|
3efeaed0d8 | ||
|
|
d38a1ddc7a | ||
|
|
6b92204a7c | ||
|
|
f2ac1a5cee | ||
|
|
e12cf1123e | ||
|
|
d3e2f0a1af | ||
|
|
c2323dd4d2 | ||
|
|
f8eba3d548 | ||
|
|
40ab5cfc50 | ||
|
|
b7a8f9ad47 | ||
|
|
f874465bb8 | ||
|
|
bb6e050509 | ||
|
|
87336b9acf | ||
|
|
19664f3ef4 | ||
|
|
c50661e5b7 | ||
|
|
b8d64a856a | ||
|
|
898fc7552a | ||
|
|
ab0a0a75fc | ||
|
|
1cf2b10224 | ||
|
|
7ac7e147d4 | ||
|
|
7ba4fe5afb | ||
|
|
a55377e9a4 | ||
|
|
dcd5ba4443 | ||
|
|
3f1b57668e | ||
|
|
d8f18d32c3 | ||
|
|
bdb5c842fc | ||
|
|
e7c969e164 | ||
|
|
9bd962f655 | ||
|
|
4f5691e5c0 | ||
|
|
29293160a4 | ||
|
|
3e33afef2e | ||
|
|
8614057ea9 | ||
|
|
7f375f9e8f | ||
|
|
69c5169e7d | ||
|
|
e19948baa1 | ||
|
|
a2eaf234fc | ||
|
|
6a13a94e71 | ||
|
|
eff43d3289 | ||
|
|
9c4817d07b | ||
|
|
319f3a0451 | ||
|
|
02c7766f68 | ||
|
|
f38cb67ca8 | ||
|
|
eea2e30b74 | ||
|
|
19b8fd2aed | ||
|
|
0cc5212741 | ||
|
|
c47c8e8cf5 | ||
|
|
a11555c715 | ||
|
|
897d03518e | ||
|
|
23fbc5728e | ||
|
|
6d40fa587f | ||
|
|
22dcd79959 | ||
|
|
ea4df0aad3 | ||
|
|
e127fb8fd8 | ||
|
|
7fb718a7d8 | ||
|
|
24f58c8bb1 | ||
|
|
95b1faf667 | ||
|
|
2d9e406050 | ||
|
|
59083e3ce1 | ||
|
|
685be40339 | ||
|
|
31c9e399e9 | ||
|
|
7de6bb9889 | ||
|
|
d63034303b | ||
|
|
51ff17d46e | ||
|
|
905534942a | ||
|
|
18e90ee2e3 | ||
|
|
e00cccc41e | ||
|
|
73f09bf64f | ||
|
|
02e772c7e4 | ||
|
|
7aee913991 | ||
|
|
e50a933037 | ||
|
|
5f9011d6ef | ||
|
|
ebb9eba987 | ||
|
|
8e5a1083bb | ||
|
|
6743beb748 | ||
|
|
bcabf72c08 | ||
|
|
cda29f183b | ||
|
|
e52d36450a | ||
|
|
f8f2e261fe | ||
|
|
be3c843700 | ||
|
|
e6f57db846 | ||
|
|
9bfd267d51 | ||
|
|
924bc5372e | ||
|
|
2b83a69650 | ||
|
|
133c11a156 | ||
|
|
30f52d53df | ||
|
|
a124637329 | ||
|
|
642aaba2e0 | ||
|
|
4c616173e4 | ||
|
|
5e83d80725 | ||
|
|
b2e1797dc6 | ||
|
|
e216f686cb | ||
|
|
e42652f772 | ||
|
|
e77db2af31 | ||
|
|
37b00841ac | ||
|
|
fc0e0391f3 | ||
|
|
da0f27b9ac | ||
|
|
c22068c406 | ||
|
|
dee100d0e4 | ||
|
|
0273966abb | ||
|
|
3a67daa954 | ||
|
|
ab567d8443 | ||
|
|
3c09cea4b2 | ||
|
|
b4f2153dcd | ||
|
|
1c4b0eeae3 | ||
|
|
406d9d64e9 | ||
|
|
1bec9abb9a | ||
|
|
3814bf60d3 | ||
|
|
847e19c04e | ||
|
|
46c7b4d5c8 | ||
|
|
8e05d291b5 | ||
|
|
9da555e5f7 | ||
|
|
6d0db0151f | ||
|
|
37b9033c90 | ||
|
|
59e7a518c6 | ||
|
|
13889515b3 | ||
|
|
248c9340c3 | ||
|
|
e9f33b4ca7 | ||
|
|
f5d847122a | ||
|
|
a4c96eca67 | ||
|
|
fb02cb0a41 | ||
|
|
baa0363ea2 | ||
|
|
34ba66606a | ||
|
|
f615dc7603 | ||
|
|
331c417637 | ||
|
|
6c3a0b5d46 | ||
|
|
fd9fd42936 | ||
|
|
9798481979 | ||
|
|
d7a17ad85d | ||
|
|
d35f6c63c2 | ||
|
|
166d76e864 | ||
|
|
f9f127d838 | ||
|
|
62231ab337 | ||
|
|
3119def9a7 | ||
|
|
33b332372a | ||
|
|
fd838c75bc | ||
|
|
b57a60dac8 | ||
|
|
5c51163972 | ||
|
|
9299d8cfd6 | ||
|
|
0a3d3b945d | ||
|
|
4f680a7d61 | ||
|
|
ba926e807c | ||
|
|
60c6dec6e6 | ||
|
|
47898cca35 | ||
|
|
53bb924287 | ||
|
|
1e80b8b0d3 | ||
|
|
a901b065d3 | ||
|
|
3937e2a0a0 | ||
|
|
9707d608d5 | ||
|
|
701b9d7556 | ||
|
|
8977b3f235 | ||
|
|
f6426395ea | ||
|
|
0ac787eefe | ||
|
|
e5b96e55a7 | ||
|
|
1d183dcda8 | ||
|
|
e19bf3a28b | ||
|
|
3649cfbd7b | ||
|
|
5ae8993752 | ||
|
|
84d90d6ed8 | ||
|
|
518e2424a8 | ||
|
|
00e373aea6 | ||
|
|
9eaea02f33 | ||
|
|
ab7043373f | ||
|
|
504cdb10ed | ||
|
|
a8002b0c5f | ||
|
|
0553476fba | ||
|
|
2416d9dbac | ||
|
|
0d8e227ea7 | ||
|
|
12d1fb2e40 | ||
|
|
1b7f427401 | ||
|
|
b2284647a3 | ||
|
|
a6116e5859 | ||
|
|
fb5d5bb971 | ||
|
|
371071d461 | ||
|
|
8a143516e3 | ||
|
|
e5897ecb9b | ||
|
|
714638c187 | ||
|
|
e27c372e53 | ||
|
|
f3f2b3d768 | ||
|
|
9492298048 | ||
|
|
43725b82c5 | ||
|
|
14fd3d35de | ||
|
|
cebc07cebd | ||
|
|
33c5e8db7f | ||
|
|
67e39bd8fb | ||
|
|
9eb1499095 | ||
|
|
4662a0b13a | ||
|
|
e74462a3f5 | ||
|
|
056ba26755 | ||
|
|
a0d9a7fd83 | ||
|
|
5d3fc092e9 | ||
|
|
c94fe71278 | ||
|
|
d60b49e5c5 | ||
|
|
64b5a0ef84 | ||
|
|
162791e30e | ||
|
|
8743093bd7 | ||
|
|
96cf6779ca | ||
|
|
3b20b62423 | ||
|
|
6ddbfea700 | ||
|
|
c0624a26be | ||
|
|
4bfaf1ce66 | ||
|
|
e8c39138c6 | ||
|
|
f992799226 | ||
|
|
4c65afcce1 | ||
|
|
7fa5c4e2fd | ||
|
|
fa0e6a6c93 | ||
|
|
2f59135eb6 | ||
|
|
38681fb1c6 | ||
|
|
6b5d26e07b | ||
|
|
13d2d48e67 | ||
|
|
189fadfde0 | ||
|
|
627d5e7401 | ||
|
|
943fa2fb58 | ||
|
|
1b62a4f3c9 | ||
|
|
461e691127 | ||
|
|
cfaf1c678f | ||
|
|
0d7bad1f35 | ||
|
|
373a1bdadb | ||
|
|
2828f6630c | ||
|
|
58cff2fed8 | ||
|
|
5690cf3f0e | ||
|
|
a0aeda6187 | ||
|
|
84b3d760c4 | ||
|
|
0beea3a5a5 | ||
|
|
560c96a9a7 | ||
|
|
0ccfa60a53 | ||
|
|
30be551502 | ||
|
|
be1ce38f24 | ||
|
|
e818ace11a | ||
|
|
e4bfbd8258 | ||
|
|
2d5b442f5b | ||
|
|
af11aff309 | ||
|
|
e66aa5f3b7 | ||
|
|
31cf22cb4b | ||
|
|
20e593a44a | ||
|
|
7194424fef | ||
|
|
d11bde60d0 | ||
|
|
9e154aba58 | ||
|
|
5057a4b4df | ||
|
|
3e8ea7a351 | ||
|
|
d3dcdddf75 | ||
|
|
e5e7595bf9 | ||
|
|
7693887d61 | ||
|
|
8d9b196e0d | ||
|
|
a6cf8aafc0 | ||
|
|
dbdca7bf0c | ||
|
|
dabaecb2bc | ||
|
|
8c23965da3 | ||
|
|
61f21b5d03 | ||
|
|
8ede4a8da4 | ||
|
|
1c5b6bb4f7 | ||
|
|
c5f5c7a076 | ||
|
|
9a508abdc7 | ||
|
|
5eefe18ae4 | ||
|
|
1e8bb0e0e0 | ||
|
|
864b8b31de | ||
|
|
d2d15e522f | ||
|
|
f4d1e7a265 | ||
|
|
0f6bec0a32 | ||
|
|
92cdac5f87 | ||
|
|
1a41022e3e | ||
|
|
e5c47e44f6 |
22
.gitignore
vendored
22
.gitignore
vendored
@@ -14,7 +14,23 @@ lapack-3.4.2.tgz
|
||||
lapack-netlib/make.inc
|
||||
lapack-netlib/lapacke/include/lapacke_mangling.h
|
||||
lapack-netlib/TESTING/testing_results.txt
|
||||
lapack-netlib/INSTALL/test*
|
||||
lapack-netlib/TESTING/xeigtstc
|
||||
lapack-netlib/TESTING/xeigtstd
|
||||
lapack-netlib/TESTING/xeigtsts
|
||||
lapack-netlib/TESTING/xeigtstz
|
||||
lapack-netlib/TESTING/xlintstc
|
||||
lapack-netlib/TESTING/xlintstd
|
||||
lapack-netlib/TESTING/xlintstds
|
||||
lapack-netlib/TESTING/xlintstrfc
|
||||
lapack-netlib/TESTING/xlintstrfd
|
||||
lapack-netlib/TESTING/xlintstrfs
|
||||
lapack-netlib/TESTING/xlintstrfz
|
||||
lapack-netlib/TESTING/xlintsts
|
||||
lapack-netlib/TESTING/xlintstz
|
||||
lapack-netlib/TESTING/xlintstzc
|
||||
*.so
|
||||
*.so.*
|
||||
*.a
|
||||
.svn
|
||||
*~
|
||||
@@ -65,3 +81,9 @@ test/sblat3
|
||||
test/zblat1
|
||||
test/zblat2
|
||||
test/zblat3
|
||||
build
|
||||
build.*
|
||||
*.swp
|
||||
benchmark/*.goto
|
||||
benchmark/smallscaling
|
||||
|
||||
|
||||
16
.travis.yml
16
.travis.yml
@@ -1,4 +1,13 @@
|
||||
language: c
|
||||
|
||||
notifications:
|
||||
webhooks:
|
||||
urls:
|
||||
- https://webhooks.gitter.im/e/8a6e4470a0cebd090344
|
||||
on_success: change # options: [always|never|change] default: always
|
||||
on_failure: always # options: [always|never|change] default: always
|
||||
on_start: never # options: [always|never|change] default: always
|
||||
|
||||
compiler:
|
||||
- gcc
|
||||
|
||||
@@ -15,7 +24,12 @@ before_install:
|
||||
- if [[ "$TARGET_BOX" == "WIN64" ]]; then sudo apt-get install -qq binutils-mingw-w64-x86-64 gcc-mingw-w64-x86-64 gfortran-mingw-w64-x86-64; fi
|
||||
- if [[ "$TARGET_BOX" == "LINUX32" ]]; then sudo apt-get install -qq gcc-multilib gfortran-multilib; fi
|
||||
|
||||
script: make QUIET_MAKE=1 DYNAMIC_ARCH=1 TARGET=NEHALEM NUM_THREADS=32 $BTYPE
|
||||
script:
|
||||
- set -e
|
||||
- make QUIET_MAKE=1 DYNAMIC_ARCH=1 TARGET=NEHALEM NUM_THREADS=32 $BTYPE
|
||||
- if [ "$TARGET_BOX" == "LINUX32" ] || [ "$TARGET_BOX" == "LINUX64" ]; then make -C test DYNAMIC_ARCH=1 TARGET=NEHALEM NUM_THREADS=32 $BTYPE; fi
|
||||
- if [ "$TARGET_BOX" == "LINUX32" ] || [ "$TARGET_BOX" == "LINUX64" ]; then make -C ctest DYNAMIC_ARCH=1 TARGET=NEHALEM NUM_THREADS=32 $BTYPE; fi
|
||||
- if [ "$TARGET_BOX" == "LINUX32" ] || [ "$TARGET_BOX" == "LINUX64" ]; then make -C utest DYNAMIC_ARCH=1 TARGET=NEHALEM NUM_THREADS=32 $BTYPE; fi
|
||||
|
||||
# whitelist
|
||||
branches:
|
||||
|
||||
293
CMakeLists.txt
Normal file
293
CMakeLists.txt
Normal file
@@ -0,0 +1,293 @@
|
||||
##
|
||||
## Author: Hank Anderson <hank@statease.com>
|
||||
##
|
||||
|
||||
cmake_minimum_required(VERSION 2.8.5)
|
||||
project(OpenBLAS)
|
||||
set(OpenBLAS_MAJOR_VERSION 0)
|
||||
set(OpenBLAS_MINOR_VERSION 2)
|
||||
set(OpenBLAS_PATCH_VERSION 20)
|
||||
set(OpenBLAS_VERSION "${OpenBLAS_MAJOR_VERSION}.${OpenBLAS_MINOR_VERSION}.${OpenBLAS_PATCH_VERSION}")
|
||||
|
||||
enable_language(ASM)
|
||||
enable_language(C)
|
||||
|
||||
# Adhere to GNU filesystem layout conventions
|
||||
include(GNUInstallDirs)
|
||||
|
||||
if(MSVC)
|
||||
set(OpenBLAS_LIBNAME libopenblas)
|
||||
else()
|
||||
set(OpenBLAS_LIBNAME openblas)
|
||||
endif()
|
||||
|
||||
#######
|
||||
if(MSVC)
|
||||
option(BUILD_WITHOUT_LAPACK "Without LAPACK and LAPACKE (Only BLAS or CBLAS)" ON)
|
||||
endif()
|
||||
option(BUILD_WITHOUT_CBLAS "Without CBLAS" OFF)
|
||||
option(BUILD_DEBUG "Build Debug Version" OFF)
|
||||
#######
|
||||
if(BUILD_WITHOUT_LAPACK)
|
||||
set(NO_LAPACK 1)
|
||||
set(NO_LAPACKE 1)
|
||||
endif()
|
||||
|
||||
if(CMAKE_CONFIGURATION_TYPES) # multiconfig generator?
|
||||
set(CMAKE_CONFIGURATION_TYPES "Debug;Release" CACHE STRING "" FORCE)
|
||||
set(CMAKE_BUILD_TYPE
|
||||
Debug Debug
|
||||
Release Release
|
||||
)
|
||||
else()
|
||||
if( NOT CMAKE_BUILD_TYPE )
|
||||
if(BUILD_DEBUG)
|
||||
set(CMAKE_BUILD_TYPE Debug)
|
||||
else()
|
||||
set(CMAKE_BUILD_TYPE Release)
|
||||
endif()
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(BUILD_WITHOUT_CBLAS)
|
||||
set(NO_CBLAS 1)
|
||||
endif()
|
||||
|
||||
#######
|
||||
|
||||
|
||||
message(WARNING "CMake support is experimental. This will not produce the same Makefiles that OpenBLAS ships with. Only x86 support is currently available.")
|
||||
|
||||
include("${PROJECT_SOURCE_DIR}/cmake/utils.cmake")
|
||||
include("${PROJECT_SOURCE_DIR}/cmake/system.cmake")
|
||||
|
||||
set(BLASDIRS interface driver/level2 driver/level3 driver/others)
|
||||
|
||||
if (NOT DYNAMIC_ARCH)
|
||||
list(APPEND BLASDIRS kernel)
|
||||
endif ()
|
||||
|
||||
if (DEFINED SANITY_CHECK)
|
||||
list(APPEND BLASDIRS reference)
|
||||
endif ()
|
||||
|
||||
set(SUBDIRS ${BLASDIRS})
|
||||
if (NOT NO_LAPACK)
|
||||
list(APPEND SUBDIRS lapack)
|
||||
endif ()
|
||||
|
||||
# set which float types we want to build for
|
||||
if (NOT DEFINED BUILD_SINGLE AND NOT DEFINED BUILD_DOUBLE AND NOT DEFINED BUILD_COMPLEX AND NOT DEFINED BUILD_COMPLEX16)
|
||||
# if none are defined, build for all
|
||||
set(BUILD_SINGLE true)
|
||||
set(BUILD_DOUBLE true)
|
||||
set(BUILD_COMPLEX true)
|
||||
set(BUILD_COMPLEX16 true)
|
||||
endif ()
|
||||
|
||||
set(FLOAT_TYPES "")
|
||||
if (BUILD_SINGLE)
|
||||
message(STATUS "Building Single Precision")
|
||||
list(APPEND FLOAT_TYPES "SINGLE") # defines nothing
|
||||
endif ()
|
||||
|
||||
if (BUILD_DOUBLE)
|
||||
message(STATUS "Building Double Precision")
|
||||
list(APPEND FLOAT_TYPES "DOUBLE") # defines DOUBLE
|
||||
endif ()
|
||||
|
||||
if (BUILD_COMPLEX)
|
||||
message(STATUS "Building Complex Precision")
|
||||
list(APPEND FLOAT_TYPES "COMPLEX") # defines COMPLEX
|
||||
endif ()
|
||||
|
||||
if (BUILD_COMPLEX16)
|
||||
message(STATUS "Building Double Complex Precision")
|
||||
list(APPEND FLOAT_TYPES "ZCOMPLEX") # defines COMPLEX and DOUBLE
|
||||
endif ()
|
||||
|
||||
set(SUBDIRS_ALL ${SUBDIRS} test ctest utest exports benchmark ../laswp ../bench)
|
||||
|
||||
# all :: libs netlib tests shared
|
||||
|
||||
# libs :
|
||||
if (NOT DEFINED CORE OR "${CORE}" STREQUAL "UNKNOWN")
|
||||
message(FATAL_ERROR "Detecting CPU failed. Please set TARGET explicitly, e.g. make TARGET=your_cpu_target. Please read README for details.")
|
||||
endif ()
|
||||
|
||||
if (${NO_STATIC} AND ${NO_SHARED})
|
||||
message(FATAL_ERROR "Neither static nor shared are enabled.")
|
||||
endif ()
|
||||
|
||||
#Set default output directory
|
||||
set( CMAKE_LIBRARY_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/lib)
|
||||
set( CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/lib)
|
||||
if(MSVC)
|
||||
set( CMAKE_LIBRARY_OUTPUT_DIRECTORY_DEBUG ${PROJECT_BINARY_DIR}/lib/Debug)
|
||||
set( CMAKE_ARCHIVE_OUTPUT_DIRECTORY_RELEASE ${PROJECT_BINARY_DIR}/lib/Release)
|
||||
endif ()
|
||||
# get obj vars into format that add_library likes: $<TARGET_OBJS:objlib> (see http://www.cmake.org/cmake/help/v3.0/command/add_library.html)
|
||||
set(TARGET_OBJS "")
|
||||
foreach (SUBDIR ${SUBDIRS})
|
||||
add_subdirectory(${SUBDIR})
|
||||
string(REPLACE "/" "_" subdir_obj ${SUBDIR})
|
||||
list(APPEND TARGET_OBJS "$<TARGET_OBJECTS:${subdir_obj}>")
|
||||
endforeach ()
|
||||
|
||||
# netlib:
|
||||
|
||||
# Can't just use lapack-netlib's CMake files, since they are set up to search for BLAS, build and install a binary. We just want to build a couple of lib files out of lapack and lapacke.
|
||||
# Not using add_subdirectory here because lapack-netlib already has its own CMakeLists.txt. Instead include a cmake script with the sources we want.
|
||||
if (NOT NOFORTRAN AND NOT NO_LAPACK)
|
||||
include("${PROJECT_SOURCE_DIR}/cmake/lapack.cmake")
|
||||
if (NOT NO_LAPACKE)
|
||||
include("${PROJECT_SOURCE_DIR}/cmake/lapacke.cmake")
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
# Only generate .def for dll on MSVC and always produce pdb files for debug and release
|
||||
if(MSVC)
|
||||
set(OpenBLAS_DEF_FILE "${PROJECT_BINARY_DIR}/openblas.def")
|
||||
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /Zi")
|
||||
set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} /Zi")
|
||||
set(CMAKE_SHARED_LINKER_FLAGS_RELEASE "${CMAKE_SHARED_LINKER_FLAGS_RELEASE} /DEBUG /OPT:REF /OPT:ICF")
|
||||
endif()
|
||||
|
||||
# add objects to the openblas lib
|
||||
add_library(${OpenBLAS_LIBNAME} SHARED ${LA_SOURCES} ${LAPACKE_SOURCES} ${TARGET_OBJS} ${OpenBLAS_DEF_FILE})
|
||||
|
||||
include("${PROJECT_SOURCE_DIR}/cmake/export.cmake")
|
||||
|
||||
# Set output for libopenblas
|
||||
set_target_properties( ${OpenBLAS_LIBNAME} PROPERTIES RUNTIME_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/lib)
|
||||
set_target_properties( ${OpenBLAS_LIBNAME} PROPERTIES LIBRARY_OUTPUT_NAME_DEBUG "${OpenBLAS_LIBNAME}_d")
|
||||
|
||||
foreach (OUTPUTCONFIG ${CMAKE_CONFIGURATION_TYPES})
|
||||
string( TOUPPER ${OUTPUTCONFIG} OUTPUTCONFIG )
|
||||
|
||||
set_target_properties( ${OpenBLAS_LIBNAME} PROPERTIES RUNTIME_OUTPUT_DIRECTORY_${OUTPUTCONFIG} ${PROJECT_BINARY_DIR}/lib/${OUTPUTCONFIG} )
|
||||
set_target_properties( ${OpenBLAS_LIBNAME} PROPERTIES LIBRARY_OUTPUT_DIRECTORY_${OUTPUTCONFIG} ${PROJECT_BINARY_DIR}/lib/${OUTPUTCONFIG} )
|
||||
set_target_properties( ${OpenBLAS_LIBNAME} PROPERTIES ARCHIVE_OUTPUT_DIRECTORY_${OUTPUTCONFIG} ${PROJECT_BINARY_DIR}/lib/${OUTPUTCONFIG} )
|
||||
endforeach()
|
||||
|
||||
enable_testing()
|
||||
add_subdirectory(utest)
|
||||
|
||||
if (NOT MSVC)
|
||||
#only build shared library for MSVC
|
||||
|
||||
add_library(${OpenBLAS_LIBNAME}_static STATIC ${LA_SOURCES} ${LAPACKE_SOURCES} ${TARGET_OBJS})
|
||||
set_target_properties(${OpenBLAS_LIBNAME}_static PROPERTIES OUTPUT_NAME ${OpenBLAS_LIBNAME})
|
||||
set_target_properties(${OpenBLAS_LIBNAME}_static PROPERTIES CLEAN_DIRECT_OUTPUT 1)
|
||||
|
||||
if(SMP)
|
||||
target_link_libraries(${OpenBLAS_LIBNAME} pthread)
|
||||
target_link_libraries(${OpenBLAS_LIBNAME}_static pthread)
|
||||
endif()
|
||||
|
||||
#build test and ctest
|
||||
add_subdirectory(test)
|
||||
if(NOT NO_CBLAS)
|
||||
add_subdirectory(ctest)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
set_target_properties(${OpenBLAS_LIBNAME} PROPERTIES
|
||||
VERSION ${OpenBLAS_MAJOR_VERSION}.${OpenBLAS_MINOR_VERSION}
|
||||
SOVERSION ${OpenBLAS_MAJOR_VERSION}
|
||||
)
|
||||
|
||||
|
||||
# TODO: Why is the config saved here? Is this necessary with CMake?
|
||||
#Save the config files for installation
|
||||
# @cp Makefile.conf Makefile.conf_last
|
||||
# @cp config.h config_last.h
|
||||
#ifdef QUAD_PRECISION
|
||||
# @echo "#define QUAD_PRECISION">> config_last.h
|
||||
#endif
|
||||
#ifeq ($(EXPRECISION), 1)
|
||||
# @echo "#define EXPRECISION">> config_last.h
|
||||
#endif
|
||||
###
|
||||
#ifeq ($(DYNAMIC_ARCH), 1)
|
||||
# @$(MAKE) -C kernel commonlibs || exit 1
|
||||
# @for d in $(DYNAMIC_CORE) ; \
|
||||
# do $(MAKE) GOTOBLAS_MAKEFILE= -C kernel TARGET_CORE=$$d kernel || exit 1 ;\
|
||||
# done
|
||||
# @echo DYNAMIC_ARCH=1 >> Makefile.conf_last
|
||||
#endif
|
||||
#ifdef USE_THREAD
|
||||
# @echo USE_THREAD=$(USE_THREAD) >> Makefile.conf_last
|
||||
#endif
|
||||
# @touch lib.grd
|
||||
|
||||
# Install project
|
||||
|
||||
# Install libraries
|
||||
install(TARGETS ${OpenBLAS_LIBNAME}
|
||||
RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
|
||||
ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
|
||||
LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} )
|
||||
|
||||
# Install include files
|
||||
set (GENCONFIG_BIN ${CMAKE_BINARY_DIR}/gen_config_h${CMAKE_EXECUTABLE_SUFFIX})
|
||||
ADD_CUSTOM_COMMAND(
|
||||
OUTPUT ${CMAKE_BINARY_DIR}/openblas_config.h
|
||||
DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/config.h
|
||||
COMMAND ${GENCONFIG_BIN} ${CMAKE_CURRENT_SOURCE_DIR}/config.h ${CMAKE_CURRENT_SOURCE_DIR}/openblas_config_template.h > ${CMAKE_BINARY_DIR}/openblas_config.h
|
||||
)
|
||||
|
||||
ADD_CUSTOM_TARGET(genconfig
|
||||
ALL
|
||||
DEPENDS openblas_config.h
|
||||
)
|
||||
add_dependencies(genconfig ${OpenBLAS_LIBNAME})
|
||||
|
||||
install (FILES ${CMAKE_BINARY_DIR}/openblas_config.h DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
|
||||
|
||||
message(STATUS "Generating f77blas.h in ${CMAKE_INSTALL_INCLUDEDIR}")
|
||||
|
||||
ADD_CUSTOM_TARGET(genf77blas
|
||||
ALL
|
||||
COMMAND ${AWK} 'BEGIN{print \"\#ifndef OPENBLAS_F77BLAS_H\" \; print \"\#define OPENBLAS_F77BLAS_H\" \; print \"\#include \\"openblas_config.h\\" \"}; NF {print}; END{print \"\#endif\"}' ${CMAKE_CURRENT_SOURCE_DIR}/common_interface.h > ${CMAKE_BINARY_DIR}/f77blas.h
|
||||
DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/config.h
|
||||
)
|
||||
add_dependencies(genf77blas ${OpenBLAS_LIBNAME})
|
||||
|
||||
install (FILES ${CMAKE_BINARY_DIR}/f77blas.h DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
|
||||
|
||||
if(NOT NO_CBLAS)
|
||||
message (STATUS "Generating cblas.h in ${CMAKE_INSTALL_INCLUDEDIR}")
|
||||
|
||||
ADD_CUSTOM_TARGET(gencblas
|
||||
ALL
|
||||
COMMAND ${SED} 's/common/openblas_config/g' ${CMAKE_CURRENT_SOURCE_DIR}/cblas.h > "${CMAKE_BINARY_DIR}/cblas.tmp"
|
||||
COMMAND cp "${CMAKE_BINARY_DIR}/cblas.tmp" "${CMAKE_BINARY_DIR}/cblas.h"
|
||||
DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/cblas.h
|
||||
)
|
||||
add_dependencies(gencblas ${OpenBLAS_LIBNAME})
|
||||
|
||||
install (FILES ${CMAKE_BINARY_DIR}/cblas.h DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
|
||||
endif()
|
||||
|
||||
if(NOT NO_LAPACKE)
|
||||
message (STATUS "Copying LAPACKE header files to ${CMAKE_INSTALL_INCLUDEDIR}")
|
||||
add_dependencies( ${OpenBLAS_LIBNAME} genlapacke)
|
||||
FILE(GLOB_RECURSE INCLUDE_FILES "${CMAKE_CURRENT_SOURCE_DIR}/lapack-netlib/LAPACKE/*.h")
|
||||
install (FILES ${INCLUDE_FILES} DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
|
||||
|
||||
ADD_CUSTOM_TARGET(genlapacke
|
||||
COMMAND cp ${CMAKE_CURRENT_SOURCE_DIR}/lapack-netlib/LAPACKE/include/lapacke_mangling_with_flags.h.in "${CMAKE_BINARY_DIR}/lapacke_mangling.h"
|
||||
)
|
||||
install (FILES ${CMAKE_BINARY_DIR}/lapacke_mangling.h DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
|
||||
endif()
|
||||
|
||||
if(NOT MSVC)
|
||||
install (TARGETS ${OpenBLAS_LIBNAME}_static DESTINATION ${CMAKE_INSTALL_LIBDIR})
|
||||
endif()
|
||||
|
||||
include(FindPkgConfig QUIET)
|
||||
if(PKG_CONFIG_FOUND)
|
||||
configure_file(${PROJECT_SOURCE_DIR}/cmake/openblas.pc.in ${PROJECT_BINARY_DIR}/openblas.pc @ONLY)
|
||||
install (FILES ${PROJECT_BINARY_DIR}/openblas.pc DESTINATION ${CMAKE_INSTALL_LIBDIR}/pkgconfig/)
|
||||
endif()
|
||||
@@ -121,11 +121,50 @@ In chronological order:
|
||||
* [2014-10-10] trmm and sgemm kernels (optimized for APM's X-Gene 1).
|
||||
ARMv8 support.
|
||||
|
||||
* Jerome Robert <jeromerobert@gmx.com>
|
||||
* [2015-01-01] Speed-up small `ger` and `gemv` using stack allocation (bug #478)
|
||||
* [2015-12-23] `stack_check` in `gemv.c` (bug #722)
|
||||
* [2015-12-28] Allow to force the number of parallel make job
|
||||
* [2015-12-28] Fix detection of AMD E2-3200 detection
|
||||
* [2015-12-31] Let `make MAX_STACK_ALLOC=0` do what expected
|
||||
* [2016-01-19] Disable multi-threading in `ger` and `swap` for small matrices (bug #731)
|
||||
* [2016-01-24] Use `GEMM_MULTITHREAD_THRESHOLD` as a number of ops (bug #742)
|
||||
* [2016-01-26] Let `openblas_get_num_threads` return the number of active threads (bug #760)
|
||||
* [2016-01-30] Speed-up small `zger`, `zgemv`, `ztrmv` using stack allocation (bug #727)
|
||||
|
||||
* Dan Kortschak
|
||||
* [2015-01-07] Added test for drotmg bug #484.
|
||||
|
||||
* Ton van den Heuvel <https://github.com/ton>
|
||||
* [2015-03-18] Fix race condition during shutdown causing a crash in gotoblas_set_affinity().
|
||||
|
||||
* [Your name or handle] <[email or website]>
|
||||
* [Date] [Brief summary of your changes]
|
||||
* Martin Koehler <https://github.com/grisuthedragon/>
|
||||
* [2015-09-07] Improved imatcopy
|
||||
|
||||
* Ashwin Sekhar T K <https://github.com/ashwinyes/>
|
||||
* [2015-11-09] Assembly kernels for Cortex-A57 (ARMv8)
|
||||
* [2015-11-20] lapack-test fixes for Cortex-A57
|
||||
* [2016-03-14] Additional functional Assembly Kernels for Cortex-A57
|
||||
* [2016-03-14] Optimize Dgemm 4x4 for Cortex-A57
|
||||
|
||||
* theoractice <https://github.com/theoractice/>
|
||||
* [2016-03-20] Fix compiler error in VisualStudio with CMake
|
||||
* [2016-03-22] Fix access violation on Windows while static linking
|
||||
|
||||
* Paul Mustière <https://github.com/buffer51/>
|
||||
* [2016-02-04] Fix Android build on ARMV7
|
||||
* [2016-04-26] Android build with LAPACK for ARMV7 & ARMV8
|
||||
|
||||
* Shivraj Patil <https://github.com/sva-img/>
|
||||
* [2016-05-03] DGEMM optimization for MIPS P5600 and I6400 using MSA
|
||||
|
||||
* Kaustubh Raste <https://github.com/ksraste/>
|
||||
* [2016-05-09] DTRSM optimization for MIPS P5600 and I6400 using MSA
|
||||
* [2016-05-20] STRSM optimization for MIPS P5600 and I6400 using MSA
|
||||
|
||||
* Abdelrauf <https://github.com/quickwritereader>
|
||||
* [2017-01-01] dgemm and dtrmm kernels for IBM z13
|
||||
* [2017-02-26] ztrmm kernel for IBM z13
|
||||
* [2017-03-13] strmm and ctrmm kernel for IBM z13
|
||||
|
||||
|
||||
|
||||
189
Changelog.txt
189
Changelog.txt
@@ -1,4 +1,193 @@
|
||||
OpenBLAS ChangeLog
|
||||
====================================================================
|
||||
Version 0.2.20
|
||||
24-Jul-2017
|
||||
|
||||
common:
|
||||
* Improved CMake support
|
||||
* Fixed several thread race and locking bugs
|
||||
* Fixed default LAPACK optimization level
|
||||
* Updated LAPACK to 3.7.0
|
||||
* Added ReLAPACK (https://github.com/HPAC/ReLAPACK, make BUILD_RELAPACK=1)
|
||||
|
||||
POWER:
|
||||
* Optimizations for Power9
|
||||
* Fixed several Power8 assembly bugs
|
||||
|
||||
ARM:
|
||||
* New optimized Vulcan and ThunderX2T99 targets
|
||||
* Support for ARMV7 SOFT_FP ABI (make ARM_SOFTFP_ABI=1)
|
||||
* Detect all cpu cores including offline ones
|
||||
* Fix compilation with CLANG
|
||||
* Support building a shared library for Android
|
||||
|
||||
MIPS:
|
||||
* Fixed several threading issues
|
||||
* Fix compilation with CLANG
|
||||
|
||||
x86_64:
|
||||
* Detect Intel Bay Trail and Apollo Lake
|
||||
* Detect Intel Sky Lake and Kaby Lake
|
||||
* Detect Intel Knights Landing
|
||||
* Detect AMD A8, A10, A12 and Ryzen
|
||||
* Support 64bit builds with Visual Studio
|
||||
* Fix building with Intel and PGI compilers
|
||||
* Fix building with MINGW and TDM-GCC
|
||||
* Fix cmake builds for Haswell and related cpus
|
||||
* Fix building for Sandybridge with CLANG 3.9
|
||||
* Add support for the FLANG compiler
|
||||
|
||||
IBM Z:
|
||||
* New target z13 with BLAS3 optimizations
|
||||
|
||||
====================================================================
|
||||
Version 0.2.19
|
||||
1-Sep-2016
|
||||
common:
|
||||
* Improved cross compiling.
|
||||
* Fix the bug on musl libc.
|
||||
|
||||
POWER:
|
||||
* Optimize BLAS on Power8
|
||||
* Fixed Julia+OpenBLAS bugs on Power8
|
||||
|
||||
MIPS:
|
||||
* Optimize BLAS on MIPS P5600 and I6400 (Thanks, Shivraj Patil, Kaustubh Raste)
|
||||
|
||||
ARM:
|
||||
* Improved on ARM Cortex-A57. (Thanks, Ashwin Sekhar T K)
|
||||
|
||||
|
||||
====================================================================
|
||||
Version 0.2.18
|
||||
12-Apr-2016
|
||||
common:
|
||||
* If you set MAKE_NB_JOBS flag less or equal than zero,
|
||||
make will be without -j.
|
||||
|
||||
x86/x86_64:
|
||||
* Support building Visual Studio static library. (#813, Thanks, theoractice)
|
||||
* Fix bugs to pass buidbot CI tests (http://build.openblas.net)
|
||||
|
||||
ARM:
|
||||
* Provide DGEMM 8x4 kernel for Cortex-A57 (Thanks, Ashwin Sekhar T K)
|
||||
|
||||
POWER:
|
||||
* Optimize S and C BLAS3 on Power8
|
||||
* Optimize BLAS2/1 on Power8
|
||||
|
||||
====================================================================
|
||||
Version 0.2.17
|
||||
20-Mar-2016
|
||||
common:
|
||||
* Enable BUILD_LAPACK_DEPRECATED=1 by default.
|
||||
|
||||
====================================================================
|
||||
Version 0.2.16
|
||||
15-Mar-2016
|
||||
common:
|
||||
* Avoid potential getenv segfault. (#716)
|
||||
* Import LAPACK svn bugfix #142-#147,#150-#155
|
||||
|
||||
x86/x86_64:
|
||||
* Optimize c/zgemv for AMD Bulldozer, Piledriver, Steamroller
|
||||
* Fix bug with scipy linalg test.
|
||||
|
||||
ARM:
|
||||
* Improve DGEMM for ARM Cortex-A57. (Thanks, Ashwin Sekhar T K)
|
||||
|
||||
POWER:
|
||||
* Optimize D and Z BLAS3 functions for Power8.
|
||||
|
||||
====================================================================
|
||||
Version 0.2.16.rc1
|
||||
23-Feb-2016
|
||||
common:
|
||||
* Upgrade LAPACK to 3.6.0 version.
|
||||
Add BUILD_LAPACK_DEPRECATED option in Makefile.rule to build
|
||||
LAPACK deprecated functions.
|
||||
* Add MAKE_NB_JOBS option in Makefile.
|
||||
Force number of make jobs.This is particularly
|
||||
useful when using distcc. (#735. Thanks, Jerome Robert.)
|
||||
* Redesign unit test. Run unit/regression test at every build (Travis-CI and Appveyor).
|
||||
* Disable multi-threading for small size swap and ger. (#744. Thanks, Jerome Robert)
|
||||
* Improve small zger, zgemv, ztrmv using stack alloction (#727. Thanks, Jerome Robert)
|
||||
* Let openblas_get_num_threads return the number of active threads.
|
||||
(#760. Thanks, Jerome Robert)
|
||||
* Support illumos(OmniOS). (#749. Thanks, Lauri Tirkkonen)
|
||||
* Fix LAPACK Dormbr, Dormlq bug. (#711, #713. Thanks, Brendan Tracey)
|
||||
* Update scipy benchmark script. (#745. Thanks, John Kirkham)
|
||||
|
||||
x86/x86_64:
|
||||
* Optimize trsm kernels for AMD Bulldozer, Piledriver, Steamroller.
|
||||
* Detect Intel Avoton.
|
||||
* Detect AMD Trinity, Richland, E2-3200.
|
||||
* Fix gemv performance bug on Mac OSX Intel Haswell.
|
||||
* Fix some bugs with CMake and Visual Studio
|
||||
|
||||
ARM:
|
||||
* Support and optimize Cortex-A57 AArch64.
|
||||
(#686. Thanks, Ashwin Sekhar TK)
|
||||
* Fix Android build on ARMV7 (#778. Thanks, Paul Mustiere)
|
||||
* Update ARMV6 kernels.
|
||||
|
||||
POWER:
|
||||
* Fix detection of POWER architecture
|
||||
(#684. Thanks, Sebastien Villemot)
|
||||
|
||||
====================================================================
|
||||
Version 0.2.15
|
||||
27-Oct-2015
|
||||
common:
|
||||
* Support cmake on x86/x86-64. Natively compiling on MS Visual Studio.
|
||||
(experimental. Thank Hank Anderson for the initial cmake porting work.)
|
||||
|
||||
On Linux and Mac OSX, OpenBLAS cmake supports assembly kernels.
|
||||
e.g. cmake .
|
||||
make
|
||||
make test (Optional)
|
||||
|
||||
On Windows MS Visual Studio, OpenBLAS cmake only support C kernels.
|
||||
(OpenBLAS uses AT&T style assembly, which is not supported by MSVC.)
|
||||
e.g. cmake -G "Visual Studio 12 Win64" .
|
||||
Open OpenBLAS.sln and build.
|
||||
|
||||
* Enable MAX_STACK_ALLOC flags by default.
|
||||
Improve ger and gemv for small matrices.
|
||||
* Improve gemv parallel with small m and large n case.
|
||||
* Improve ?imatcopy when lda==ldb (#633. Thanks, Martin Koehler)
|
||||
* Add vecLib benchmarks (#565. Thanks, Andreas Noack.)
|
||||
* Fix LAPACK lantr for row major matrices (#634. Thanks, Dan Kortschak)
|
||||
* Fix LAPACKE lansy (#640. Thanks, Dan Kortschak)
|
||||
* Import bug fixes for LAPACKE s/dormlq, c/zunmlq
|
||||
* Raise the signal when pthread_create fails (#668. Thanks, James K. Lowden)
|
||||
* Remove g77 from compiler list.
|
||||
* Enable AppVeyor Windows CI.
|
||||
|
||||
x86/x86-64:
|
||||
* Support pure C generic kernels for x86/x86-64.
|
||||
* Support Intel Boardwell and Skylake by Haswell kernels.
|
||||
* Support AMD Excavator by Steamroller kernels.
|
||||
* Optimize s/d/c/zdot for Intel SandyBridge and Haswell.
|
||||
* Optimize s/d/c/zdot for AMD Piledriver and Steamroller.
|
||||
* Optimize s/d/c/zapxy for Intel SandyBridge and Haswell.
|
||||
* Optimize s/d/c/zapxy for AMD Piledriver and Steamroller.
|
||||
* Optimize d/c/zscal for Intel Haswell, dscal for Intel SandyBridge.
|
||||
* Optimize d/c/zscal for AMD Bulldozer, Piledriver and Steamroller.
|
||||
* Optimize s/dger for Intel SandyBridge.
|
||||
* Optimize s/dsymv for Intel SandyBridge.
|
||||
* Optimize ssymv for Intel Haswell.
|
||||
* Optimize dgemv for Intel Nehalem and Haswell.
|
||||
* Optimize dtrmm for Intel Haswell.
|
||||
|
||||
ARM:
|
||||
* Support Android NDK armeabi-v7a-hard ABI (-mfloat-abi=hard)
|
||||
e.g. make HOSTCC=gcc CC=arm-linux-androideabi-gcc NO_LAPACK=1 TARGET=ARMV7
|
||||
* Fix lock, rpcc bugs (#616, #617. Thanks, Grazvydas Ignotas)
|
||||
POWER:
|
||||
* Support ppc64le platform (ELF ABI v2. #612. Thanks, Matthew Brandyberry.)
|
||||
* Support POWER7/8 by POWER6 kernels. (#612. Thanks, Fábio Perez.)
|
||||
|
||||
====================================================================
|
||||
Version 0.2.14
|
||||
24-Mar-2015
|
||||
|
||||
73
Makefile
73
Makefile
@@ -7,10 +7,6 @@ ifneq ($(DYNAMIC_ARCH), 1)
|
||||
BLASDIRS += kernel
|
||||
endif
|
||||
|
||||
ifdef UTEST_CHECK
|
||||
SANITY_CHECK = 1
|
||||
endif
|
||||
|
||||
ifdef SANITY_CHECK
|
||||
BLASDIRS += reference
|
||||
endif
|
||||
@@ -20,12 +16,19 @@ ifneq ($(NO_LAPACK), 1)
|
||||
SUBDIRS += lapack
|
||||
endif
|
||||
|
||||
RELA =
|
||||
ifeq ($(BUILD_RELAPACK), 1)
|
||||
RELA = re_lapack
|
||||
endif
|
||||
|
||||
LAPACK_NOOPT := $(filter-out -O0 -O1 -O2 -O3 -Ofast,$(LAPACK_FFLAGS))
|
||||
|
||||
SUBDIRS_ALL = $(SUBDIRS) test ctest utest exports benchmark ../laswp ../bench
|
||||
|
||||
.PHONY : all libs netlib test ctest shared install
|
||||
.NOTPARALLEL : all libs prof lapack-test install blas-test
|
||||
.PHONY : all libs netlib $(RELA) test ctest shared install
|
||||
.NOTPARALLEL : all libs $(RELA) prof lapack-test install blas-test
|
||||
|
||||
all :: libs netlib tests shared
|
||||
all :: libs netlib $(RELA) tests shared
|
||||
@echo
|
||||
@echo " OpenBLAS build complete. ($(LIB_COMPONENTS))"
|
||||
@echo
|
||||
@@ -83,22 +86,22 @@ endif
|
||||
|
||||
shared :
|
||||
ifndef NO_SHARED
|
||||
ifeq ($(OSNAME), Linux)
|
||||
ifeq ($(OSNAME), $(filter $(OSNAME),Linux SunOS Android))
|
||||
@$(MAKE) -C exports so
|
||||
@-ln -fs $(LIBSONAME) $(LIBPREFIX).so
|
||||
@-ln -fs $(LIBSONAME) $(LIBPREFIX).so.$(MAJOR_VERSION)
|
||||
@ln -fs $(LIBSONAME) $(LIBPREFIX).so
|
||||
@ln -fs $(LIBSONAME) $(LIBPREFIX).so.$(MAJOR_VERSION)
|
||||
endif
|
||||
ifeq ($(OSNAME), FreeBSD)
|
||||
@$(MAKE) -C exports so
|
||||
@-ln -fs $(LIBSONAME) $(LIBPREFIX).so
|
||||
@ln -fs $(LIBSONAME) $(LIBPREFIX).so
|
||||
endif
|
||||
ifeq ($(OSNAME), NetBSD)
|
||||
@$(MAKE) -C exports so
|
||||
@-ln -fs $(LIBSONAME) $(LIBPREFIX).so
|
||||
@ln -fs $(LIBSONAME) $(LIBPREFIX).so
|
||||
endif
|
||||
ifeq ($(OSNAME), Darwin)
|
||||
@$(MAKE) -C exports dyn
|
||||
@-ln -fs $(LIBDYNNAME) $(LIBPREFIX).dylib
|
||||
@ln -fs $(LIBDYNNAME) $(LIBPREFIX).dylib
|
||||
endif
|
||||
ifeq ($(OSNAME), WINNT)
|
||||
@$(MAKE) -C exports dll
|
||||
@@ -110,28 +113,22 @@ endif
|
||||
|
||||
tests :
|
||||
ifndef NOFORTRAN
|
||||
ifndef TARGET
|
||||
ifndef CROSS
|
||||
touch $(LIBNAME)
|
||||
ifndef NO_FBLAS
|
||||
$(MAKE) -C test all
|
||||
ifdef UTEST_CHECK
|
||||
$(MAKE) -C utest all
|
||||
endif
|
||||
endif
|
||||
ifndef NO_CBLAS
|
||||
$(MAKE) -C ctest all
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
libs :
|
||||
ifeq ($(CORE), UNKOWN)
|
||||
$(error OpenBLAS: Detecting CPU failed. Please set TARGET explicitly, e.g. make TARGET=your_cpu_target. Please read README for the detail.)
|
||||
endif
|
||||
ifeq ($(NOFORTRAN), 1)
|
||||
$(error OpenBLAS: Detecting fortran compiler failed. Please install fortran compiler, e.g. gfortran, ifort, openf90.)
|
||||
$(info OpenBLAS: Detecting fortran compiler failed. Cannot compile LAPACK. Only compile BLAS.)
|
||||
endif
|
||||
ifeq ($(NO_STATIC), 1)
|
||||
ifeq ($(NO_SHARED), 1)
|
||||
@@ -223,6 +220,14 @@ ifndef NO_LAPACKE
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(NO_LAPACK), 1)
|
||||
re_lapack :
|
||||
|
||||
else
|
||||
re_lapack :
|
||||
@$(MAKE) -C relapack
|
||||
endif
|
||||
|
||||
prof_lapack : lapack_prebuild
|
||||
@$(MAKE) -C $(NETLIB_LAPACK_DIR) lapack_prof
|
||||
|
||||
@@ -231,7 +236,7 @@ ifndef NOFORTRAN
|
||||
-@echo "FORTRAN = $(FC)" > $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@echo "OPTS = $(LAPACK_FFLAGS)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@echo "POPTS = $(LAPACK_FPFLAGS)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@echo "NOOPT = $(LAPACK_FFLAGS) -O0" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@echo "NOOPT = -O0 $(LAPACK_NOOPT)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@echo "PNOOPT = $(LAPACK_FPFLAGS) -O0" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@echo "LOADOPTS = $(FFLAGS) $(EXTRALIB)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@echo "CC = $(CC)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
@@ -247,16 +252,23 @@ ifndef NOFORTRAN
|
||||
-@echo "SUFFIX = $(SUFFIX)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@echo "PSUFFIX = $(PSUFFIX)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@echo "CEXTRALIB = $(EXTRALIB)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
ifeq ($(FC), gfortran)
|
||||
ifeq ($(F_COMPILER), GFORTRAN)
|
||||
-@echo "TIMER = INT_ETIME" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
ifdef SMP
|
||||
ifeq ($(OSNAME), WINNT)
|
||||
-@echo "LOADER = $(FC)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
else
|
||||
-@echo "LOADER = $(FC) -pthread" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
endif
|
||||
else
|
||||
-@echo "LOADER = $(FC)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
endif
|
||||
else
|
||||
-@echo "TIMER = NONE" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@echo "LOADER = $(FC)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
endif
|
||||
ifeq ($(BUILD_LAPACK_DEPRECATED), 1)
|
||||
-@echo "BUILD_DEPRECATED = 1" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
endif
|
||||
-@cat make.inc >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
endif
|
||||
@@ -279,18 +291,28 @@ lapack-timing : large.tgz timing.tgz
|
||||
ifndef NOFORTRAN
|
||||
(cd $(NETLIB_LAPACK_DIR); $(TAR) zxf ../timing.tgz TIMING)
|
||||
(cd $(NETLIB_LAPACK_DIR)/TIMING; $(TAR) zxf ../../large.tgz )
|
||||
make -C $(NETLIB_LAPACK_DIR)/TIMING
|
||||
$(MAKE) -C $(NETLIB_LAPACK_DIR)/TIMING
|
||||
endif
|
||||
|
||||
|
||||
lapack-test :
|
||||
(cd $(NETLIB_LAPACK_DIR)/TESTING && rm -f x* *.out)
|
||||
make -j 1 -C $(NETLIB_LAPACK_DIR)/TESTING xeigtstc xeigtstd xeigtsts xeigtstz xlintstc xlintstd xlintstds xlintstrfd xlintstrfz xlintsts xlintstz xlintstzc xlintstrfs xlintstrfc
|
||||
$(MAKE) -j 1 -C $(NETLIB_LAPACK_DIR)/TESTING xeigtstc xeigtstd xeigtsts xeigtstz xlintstc xlintstd xlintstds xlintstrfd xlintstrfz xlintsts xlintstz xlintstzc xlintstrfs xlintstrfc
|
||||
ifneq ($(CROSS), 1)
|
||||
( cd $(NETLIB_LAPACK_DIR)/INSTALL; ./testlsame; ./testslamch; ./testdlamch; \
|
||||
./testsecond; ./testdsecnd; ./testieee; ./testversion )
|
||||
(cd $(NETLIB_LAPACK_DIR); ./lapack_testing.py -r )
|
||||
endif
|
||||
|
||||
lapack-runtest:
|
||||
( cd $(NETLIB_LAPACK_DIR)/INSTALL; ./testlsame; ./testslamch; ./testdlamch; \
|
||||
./testsecond; ./testdsecnd; ./testieee; ./testversion )
|
||||
(cd $(NETLIB_LAPACK_DIR); ./lapack_testing.py -r )
|
||||
|
||||
|
||||
blas-test:
|
||||
(cd $(NETLIB_LAPACK_DIR)/BLAS && rm -f x* *.out)
|
||||
make -j 1 -C $(NETLIB_LAPACK_DIR) blas_testing
|
||||
$(MAKE) -j 1 -C $(NETLIB_LAPACK_DIR) blas_testing
|
||||
(cd $(NETLIB_LAPACK_DIR)/BLAS && cat *.out)
|
||||
|
||||
|
||||
@@ -317,6 +339,7 @@ endif
|
||||
@touch $(NETLIB_LAPACK_DIR)/make.inc
|
||||
@$(MAKE) -C $(NETLIB_LAPACK_DIR) clean
|
||||
@rm -f $(NETLIB_LAPACK_DIR)/make.inc $(NETLIB_LAPACK_DIR)/lapacke/include/lapacke_mangling.h
|
||||
@$(MAKE) -C relapack clean
|
||||
@rm -f *.grd Makefile.conf_last config_last.h
|
||||
@(cd $(NETLIB_LAPACK_DIR)/TESTING && rm -f x* *.out testing_results.txt)
|
||||
@echo Done.
|
||||
|
||||
26
Makefile.arm
26
Makefile.arm
@@ -1,23 +1,19 @@
|
||||
# ifeq logical or
|
||||
ifeq ($(CORE), $(filter $(CORE),CORTEXA9 CORTEXA15))
|
||||
CCOMMON_OPT += -marm -mfpu=vfpv3 -mfloat-abi=hard -march=armv7-a
|
||||
FCOMMON_OPT += -marm -mfpu=vfpv3 -mfloat-abi=hard -march=armv7-a
|
||||
ifeq ($(CORE), $(filter $(CORE),ARMV7 CORTEXA9 CORTEXA15))
|
||||
ifeq ($(OSNAME), Android)
|
||||
CCOMMON_OPT += -mfpu=neon -march=armv7-a
|
||||
FCOMMON_OPT += -mfpu=neon -march=armv7-a
|
||||
else
|
||||
CCOMMON_OPT += -mfpu=vfpv3 -march=armv7-a
|
||||
FCOMMON_OPT += -mfpu=vfpv3 -march=armv7-a
|
||||
endif
|
||||
|
||||
ifeq ($(CORE), ARMV7)
|
||||
CCOMMON_OPT += -marm -mfpu=vfpv3 -mfloat-abi=hard -march=armv7-a
|
||||
FCOMMON_OPT += -marm -mfpu=vfpv3 -mfloat-abi=hard -march=armv7-a
|
||||
endif
|
||||
|
||||
ifeq ($(CORE), ARMV6)
|
||||
CCOMMON_OPT += -marm -mfpu=vfp -mfloat-abi=hard -march=armv6
|
||||
FCOMMON_OPT += -marm -mfpu=vfp -mfloat-abi=hard -march=armv6
|
||||
CCOMMON_OPT += -mfpu=vfp -march=armv6
|
||||
FCOMMON_OPT += -mfpu=vfp -march=armv6
|
||||
endif
|
||||
|
||||
|
||||
ifeq ($(CORE), ARMV5)
|
||||
CCOMMON_OPT += -marm -mfpu=vfp -mfloat-abi=hard -march=armv6
|
||||
FCOMMON_OPT += -marm -mfpu=vfp -mfloat-abi=hard -march=armv6
|
||||
CCOMMON_OPT += -march=armv5
|
||||
FCOMMON_OPT += -march=armv5
|
||||
endif
|
||||
|
||||
|
||||
|
||||
@@ -4,4 +4,22 @@ CCOMMON_OPT += -march=armv8-a
|
||||
FCOMMON_OPT += -march=armv8-a
|
||||
endif
|
||||
|
||||
ifeq ($(CORE), CORTEXA57)
|
||||
CCOMMON_OPT += -march=armv8-a+crc+crypto+fp+simd -mtune=cortex-a57
|
||||
FCOMMON_OPT += -march=armv8-a+crc+crypto+fp+simd -mtune=cortex-a57
|
||||
endif
|
||||
|
||||
ifeq ($(CORE), VULCAN)
|
||||
CCOMMON_OPT += -mtune=vulcan -mcpu=vulcan
|
||||
FCOMMON_OPT += -mtune=vulcan -mcpu=vulcan
|
||||
endif
|
||||
|
||||
ifeq ($(CORE), THUNDERX)
|
||||
CCOMMON_OPT += -mtune=thunderx -mcpu=thunderx
|
||||
FCOMMON_OPT += -mtune=thunderx -mcpu=thunderx
|
||||
endif
|
||||
|
||||
ifeq ($(CORE), THUNDERX2T99)
|
||||
CCOMMON_OPT += -mtune=thunderx2t99 -mcpu=thunderx2t99
|
||||
FCOMMON_OPT += -mtune=thunderx2t99 -mcpu=thunderx2t99
|
||||
endif
|
||||
|
||||
110
Makefile.install
110
Makefile.install
@@ -11,6 +11,8 @@ OPENBLAS_BINARY_DIR := $(PREFIX)/bin
|
||||
OPENBLAS_BUILD_DIR := $(CURDIR)
|
||||
OPENBLAS_CMAKE_DIR := $(OPENBLAS_LIBRARY_DIR)/cmake/openblas
|
||||
OPENBLAS_CMAKE_CONFIG := OpenBLASConfig.cmake
|
||||
OPENBLAS_CMAKE_CONFIG_VERSION := OpenBLASConfigVersion.cmake
|
||||
OPENBLAS_PKGCONFIG_DIR := $(OPENBLAS_LIBRARY_DIR)/pkgconfig
|
||||
|
||||
.PHONY : install
|
||||
.NOTPARALLEL : install
|
||||
@@ -19,98 +21,122 @@ lib.grd :
|
||||
$(error OpenBLAS: Please run "make" firstly)
|
||||
|
||||
install : lib.grd
|
||||
@-mkdir -p $(DESTDIR)$(PREFIX)
|
||||
@-mkdir -p $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)
|
||||
@-mkdir -p $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)
|
||||
@-mkdir -p $(DESTDIR)$(OPENBLAS_BINARY_DIR)
|
||||
@-mkdir -p $(DESTDIR)$(OPENBLAS_CMAKE_DIR)
|
||||
@-mkdir -p "$(DESTDIR)$(PREFIX)"
|
||||
@-mkdir -p "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)"
|
||||
@-mkdir -p "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)"
|
||||
@-mkdir -p "$(DESTDIR)$(OPENBLAS_BINARY_DIR)"
|
||||
@-mkdir -p "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)"
|
||||
@-mkdir -p "$(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)"
|
||||
@echo Generating openblas_config.h in $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)
|
||||
#for inc
|
||||
@echo \#ifndef OPENBLAS_CONFIG_H > $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/openblas_config.h
|
||||
@echo \#define OPENBLAS_CONFIG_H >> $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/openblas_config.h
|
||||
@awk 'NF {print $$1, "OPENBLAS_"$$2, $$3}' config_last.h >> $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/openblas_config.h
|
||||
@echo \#define OPENBLAS_VERSION \" OpenBLAS $(VERSION) \" >> $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/openblas_config.h
|
||||
@cat openblas_config_template.h >> $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/openblas_config.h
|
||||
@echo \#endif \/\* OPENBLAS_CONFIG_H \*\/ >> $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/openblas_config.h
|
||||
@echo \#ifndef OPENBLAS_CONFIG_H > "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/openblas_config.h"
|
||||
@echo \#define OPENBLAS_CONFIG_H >> "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/openblas_config.h"
|
||||
@$(AWK) 'NF {print $$1, "OPENBLAS_"$$2, $$3}' config_last.h >> "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/openblas_config.h"
|
||||
@echo \#define OPENBLAS_VERSION \" OpenBLAS $(VERSION) \" >> "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/openblas_config.h"
|
||||
@cat openblas_config_template.h >> "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/openblas_config.h"
|
||||
@echo \#endif \/\* OPENBLAS_CONFIG_H \*\/ >> "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/openblas_config.h"
|
||||
|
||||
@echo Generating f77blas.h in $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)
|
||||
@echo \#ifndef OPENBLAS_F77BLAS_H > $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/f77blas.h
|
||||
@echo \#define OPENBLAS_F77BLAS_H >> $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/f77blas.h
|
||||
@echo \#include \"openblas_config.h\" >> $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/f77blas.h
|
||||
@cat common_interface.h >> $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/f77blas.h
|
||||
@echo \#endif >> $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/f77blas.h
|
||||
@echo \#ifndef OPENBLAS_F77BLAS_H > "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/f77blas.h"
|
||||
@echo \#define OPENBLAS_F77BLAS_H >> "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/f77blas.h"
|
||||
@echo \#include \"openblas_config.h\" >> "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/f77blas.h"
|
||||
@cat common_interface.h >> "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/f77blas.h"
|
||||
@echo \#endif >> "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/f77blas.h"
|
||||
|
||||
ifndef NO_CBLAS
|
||||
@echo Generating cblas.h in $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)
|
||||
@sed 's/common/openblas_config/g' cblas.h > $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/cblas.h
|
||||
@sed 's/common/openblas_config/g' cblas.h > "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/cblas.h"
|
||||
endif
|
||||
|
||||
ifndef NO_LAPACKE
|
||||
@echo Copying LAPACKE header files to $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)
|
||||
@-install -pm644 $(NETLIB_LAPACK_DIR)/lapacke/include/lapacke.h $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke.h
|
||||
@-install -pm644 $(NETLIB_LAPACK_DIR)/lapacke/include/lapacke_config.h $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke_config.h
|
||||
@-install -pm644 $(NETLIB_LAPACK_DIR)/lapacke/include/lapacke_mangling_with_flags.h $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke_mangling.h
|
||||
@-install -pm644 $(NETLIB_LAPACK_DIR)/lapacke/include/lapacke_utils.h $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke_utils.h
|
||||
@-install -pm644 $(NETLIB_LAPACK_DIR)/LAPACKE/include/lapacke.h "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke.h"
|
||||
@-install -pm644 $(NETLIB_LAPACK_DIR)/LAPACKE/include/lapacke_config.h "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke_config.h"
|
||||
@-install -pm644 $(NETLIB_LAPACK_DIR)/LAPACKE/include/lapacke_mangling_with_flags.h.in "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke_mangling.h"
|
||||
@-install -pm644 $(NETLIB_LAPACK_DIR)/LAPACKE/include/lapacke_utils.h "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke_utils.h"
|
||||
endif
|
||||
|
||||
#for install static library
|
||||
ifndef NO_STATIC
|
||||
@echo Copying the static library to $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)
|
||||
@install -pm644 $(LIBNAME) $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)
|
||||
@cd $(DESTDIR)$(OPENBLAS_LIBRARY_DIR) ; \
|
||||
@install -pm644 $(LIBNAME) "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)"
|
||||
@cd "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)" ; \
|
||||
ln -fs $(LIBNAME) $(LIBPREFIX).$(LIBSUFFIX)
|
||||
endif
|
||||
#for install shared library
|
||||
ifndef NO_SHARED
|
||||
@echo Copying the shared library to $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)
|
||||
ifeq ($(OSNAME), Linux)
|
||||
@install -pm755 $(LIBSONAME) $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)
|
||||
@cd $(DESTDIR)$(OPENBLAS_LIBRARY_DIR) ; \
|
||||
ifeq ($(OSNAME), $(filter $(OSNAME),Linux SunOS Android))
|
||||
@install -pm755 $(LIBSONAME) "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)"
|
||||
@cd "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)" ; \
|
||||
ln -fs $(LIBSONAME) $(LIBPREFIX).so ; \
|
||||
ln -fs $(LIBSONAME) $(LIBPREFIX).so.$(MAJOR_VERSION)
|
||||
endif
|
||||
ifeq ($(OSNAME), FreeBSD)
|
||||
@cp $(LIBSONAME) $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)
|
||||
@cd $(DESTDIR)$(OPENBLAS_LIBRARY_DIR) ; \
|
||||
@cp $(LIBSONAME) "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)"
|
||||
@cd "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)" ; \
|
||||
ln -fs $(LIBSONAME) $(LIBPREFIX).so
|
||||
endif
|
||||
ifeq ($(OSNAME), NetBSD)
|
||||
@cp $(LIBSONAME) $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)
|
||||
@cd $(DESTDIR)$(OPENBLAS_LIBRARY_DIR) ; \
|
||||
@cp $(LIBSONAME) "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)"
|
||||
@cd "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)" ; \
|
||||
ln -fs $(LIBSONAME) $(LIBPREFIX).so
|
||||
endif
|
||||
ifeq ($(OSNAME), Darwin)
|
||||
@-cp $(LIBDYNNAME) $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)
|
||||
@-install_name_tool -id $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)/$(LIBDYNNAME) $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)/$(LIBDYNNAME)
|
||||
@cd $(DESTDIR)$(OPENBLAS_LIBRARY_DIR) ; \
|
||||
@-cp $(LIBDYNNAME) "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)"
|
||||
@-install_name_tool -id "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)/$(LIBDYNNAME)" "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)/$(LIBDYNNAME)"
|
||||
@cd "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)" ; \
|
||||
ln -fs $(LIBDYNNAME) $(LIBPREFIX).dylib
|
||||
endif
|
||||
ifeq ($(OSNAME), WINNT)
|
||||
@-cp $(LIBDLLNAME) $(OPENBLAS_BINARY_DIR)
|
||||
@-cp $(LIBDLLNAME).a $(OPENBLAS_LIBRARY_DIR)
|
||||
@-cp $(LIBDLLNAME) "$(DESTDIR)$(OPENBLAS_BINARY_DIR)"
|
||||
@-cp $(LIBDLLNAME).a "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)"
|
||||
endif
|
||||
ifeq ($(OSNAME), CYGWIN_NT)
|
||||
@-cp $(LIBDLLNAME) $(OPENBLAS_BINARY_DIR)
|
||||
@-cp $(LIBDLLNAME) "$(DESTDIR)$(OPENBLAS_BINARY_DIR)"
|
||||
endif
|
||||
endif
|
||||
|
||||
|
||||
#Generating openblas.pc
|
||||
@echo Generating openblas.pc in $(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)
|
||||
@echo 'libdir='$(OPENBLAS_LIBRARY_DIR) >> $(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)/openblas.pc
|
||||
@echo 'includedir='$(OPENBLAS_INCLUDE_DIR) >> $(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)/openblas.pc
|
||||
@echo 'version='$(VERSION) >> $(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)/openblas.pc
|
||||
@echo 'extralib='$(EXTRALIB) >> $(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)/openblas.pc
|
||||
@cat openblas.pc.in >> $(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)/openblas.pc
|
||||
|
||||
|
||||
#Generating OpenBLASConfig.cmake
|
||||
@echo Generating $(OPENBLAS_CMAKE_CONFIG) in $(DESTDIR)$(OPENBLAS_CMAKE_DIR)
|
||||
@echo "SET(OpenBLAS_VERSION \"${VERSION}\")" > $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)
|
||||
@echo "SET(OpenBLAS_INCLUDE_DIRS ${OPENBLAS_INCLUDE_DIR})" >> $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)
|
||||
@echo "SET(OpenBLAS_VERSION \"${VERSION}\")" > "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)"
|
||||
@echo "SET(OpenBLAS_INCLUDE_DIRS ${OPENBLAS_INCLUDE_DIR})" >> "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)"
|
||||
|
||||
ifndef NO_SHARED
|
||||
#ifeq logical or
|
||||
ifeq ($(OSNAME), $(filter $(OSNAME),Linux FreeBSD NetBSD))
|
||||
@echo "SET(OpenBLAS_LIBRARIES ${OPENBLAS_LIBRARY_DIR}/$(LIBPREFIX).so)" >> $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)
|
||||
@echo "SET(OpenBLAS_LIBRARIES ${OPENBLAS_LIBRARY_DIR}/$(LIBPREFIX).so)" >> "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)"
|
||||
endif
|
||||
ifeq ($(OSNAME), $(filter $(OSNAME),WINNT CYGWIN_NT))
|
||||
@echo "SET(OpenBLAS_LIBRARIES ${OPENBLAS_BINARY_DIR}/$(LIBDLLNAME))" >> $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)
|
||||
@echo "SET(OpenBLAS_LIBRARIES ${OPENBLAS_BINARY_DIR}/$(LIBDLLNAME))" >> "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)"
|
||||
endif
|
||||
ifeq ($(OSNAME), Darwin)
|
||||
@echo "SET(OpenBLAS_LIBRARIES ${OPENBLAS_LIBRARY_DIR}/$(LIBPREFIX).dylib)" >> $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)
|
||||
@echo "SET(OpenBLAS_LIBRARIES ${OPENBLAS_LIBRARY_DIR}/$(LIBPREFIX).dylib)" >> "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)"
|
||||
endif
|
||||
else
|
||||
#only static
|
||||
@echo "SET(OpenBLAS_LIBRARIES ${OPENBLAS_LIBRARY_DIR}/$(LIBPREFIX).$(LIBSUFFIX))" >> $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)
|
||||
@echo "SET(OpenBLAS_LIBRARIES ${OPENBLAS_LIBRARY_DIR}/$(LIBPREFIX).$(LIBSUFFIX))" >> "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)"
|
||||
endif
|
||||
#Generating OpenBLASConfigVersion.cmake
|
||||
@echo Generating $(OPENBLAS_CMAKE_CONFIG_VERSION) in $(DESTDIR)$(OPENBLAS_CMAKE_DIR)
|
||||
@echo "set (PACKAGE_VERSION \"${VERSION}\")" > "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG_VERSION)"
|
||||
@echo "if (PACKAGE_VERSION VERSION_LESS PACKAGE_FIND_VERSION)" >> "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG_VERSION)"
|
||||
@echo " set (PACKAGE_VERSION_COMPATIBLE FALSE)" >> "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG_VERSION)"
|
||||
@echo "else ()" >> "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG_VERSION)"
|
||||
@echo " set (PACKAGE_VERSION_COMPATIBLE TRUE)" >> "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG_VERSION)"
|
||||
@echo " if (PACKAGE_FIND_VERSION STREQUAL PACKAGE_VERSION)" >> "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG_VERSION)"
|
||||
@echo " set (PACKAGE_VERSION_EXACT TRUE)" >> "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG_VERSION)"
|
||||
@echo " endif ()" >> "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG_VERSION)"
|
||||
@echo "endif ()" >> "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG_VERSION)"
|
||||
@echo Install OK!
|
||||
|
||||
|
||||
3
Makefile.mips
Normal file
3
Makefile.mips
Normal file
@@ -0,0 +1,3 @@
|
||||
ifdef BINARY64
|
||||
else
|
||||
endif
|
||||
@@ -1,4 +1,26 @@
|
||||
# CCOMMON_OPT += -DALLOC_SHM
|
||||
|
||||
ifdef USE_THREAD
|
||||
ifeq ($(USE_THREAD), 0)
|
||||
USE_OPENMP = 0
|
||||
else
|
||||
USE_OPENMP = 1
|
||||
endif
|
||||
else
|
||||
USE_OPENMP = 1
|
||||
endif
|
||||
|
||||
|
||||
|
||||
ifeq ($(CORE), POWER8)
|
||||
ifeq ($(USE_OPENMP), 1)
|
||||
COMMON_OPT += -Ofast -mcpu=power8 -mtune=power8 -mvsx -malign-power -DUSE_OPENMP -fno-fast-math -fopenmp
|
||||
FCOMMON_OPT += -O2 -frecursive -mcpu=power8 -mtune=power8 -malign-power -DUSE_OPENMP -fno-fast-math -fopenmp
|
||||
else
|
||||
COMMON_OPT += -Ofast -mcpu=power8 -mtune=power8 -mvsx -malign-power -fno-fast-math
|
||||
FCOMMON_OPT += -O2 -frecursive -mcpu=power8 -mtune=power8 -malign-power -fno-fast-math
|
||||
endif
|
||||
endif
|
||||
|
||||
|
||||
FLAMEPATH = $(HOME)/flame/lib
|
||||
|
||||
@@ -16,6 +38,16 @@ else
|
||||
endif
|
||||
endif
|
||||
|
||||
#Either uncomment below line or run make with `USE_MASS=1` to enable support of MASS library
|
||||
#USE_MASS = 1
|
||||
|
||||
ifeq ($(USE_MASS), 1)
|
||||
# Path to MASS libs, change it if the libs are installed at any other location
|
||||
MASSPATH = /opt/ibm/xlmass/8.1.5/lib
|
||||
COMMON_OPT += -mveclibabi=mass -ftree-vectorize -funsafe-math-optimizations -DUSE_MASS
|
||||
EXTRALIB += -L$(MASSPATH) -lmass -lmassvp8 -lmass_simdp8
|
||||
endif
|
||||
|
||||
ifdef BINARY64
|
||||
|
||||
|
||||
|
||||
@@ -17,14 +17,26 @@ ifdef CPUIDEMU
|
||||
EXFLAGS = -DCPUIDEMU -DVENDOR=99
|
||||
endif
|
||||
|
||||
ifeq ($(TARGET), P5600)
|
||||
TARGET_FLAGS = -mips32r5
|
||||
endif
|
||||
|
||||
ifeq ($(TARGET), I6400)
|
||||
TARGET_FLAGS = -mips64r6
|
||||
endif
|
||||
|
||||
ifeq ($(TARGET), P6600)
|
||||
TARGET_FLAGS = -mips64r6
|
||||
endif
|
||||
|
||||
all: getarch_2nd
|
||||
./getarch_2nd 0 >> $(TARGET_MAKE)
|
||||
./getarch_2nd 1 >> $(TARGET_CONF)
|
||||
|
||||
config.h : c_check f_check getarch
|
||||
perl ./c_check $(TARGET_MAKE) $(TARGET_CONF) $(CC)
|
||||
perl ./c_check $(TARGET_MAKE) $(TARGET_CONF) $(CC) $(TARGET_FLAGS)
|
||||
ifneq ($(ONLY_CBLAS), 1)
|
||||
perl ./f_check $(TARGET_MAKE) $(TARGET_CONF) $(FC)
|
||||
perl ./f_check $(TARGET_MAKE) $(TARGET_CONF) $(FC) $(TARGET_FLAGS)
|
||||
else
|
||||
#When we only build CBLAS, we set NOFORTRAN=2
|
||||
echo "NOFORTRAN=2" >> $(TARGET_MAKE)
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
#
|
||||
|
||||
# This library's version
|
||||
VERSION = 0.2.14
|
||||
VERSION = 0.2.20
|
||||
|
||||
# If you set the suffix, the library name will be libopenblas_$(LIBNAMESUFFIX).a
|
||||
# and libopenblas_$(LIBNAMESUFFIX).so. Meanwhile, the soname in shared library
|
||||
@@ -52,6 +52,7 @@ VERSION = 0.2.14
|
||||
# USE_THREAD = 0
|
||||
|
||||
# If you're going to use this library with OpenMP, please comment it in.
|
||||
# This flag is always set for POWER8. Don't modify the flag
|
||||
# USE_OPENMP = 1
|
||||
|
||||
# You can define maximum number of threads. Basically it should be
|
||||
@@ -79,6 +80,12 @@ VERSION = 0.2.14
|
||||
# If you don't need LAPACKE (C Interface to LAPACK), please comment it in.
|
||||
# NO_LAPACKE = 1
|
||||
|
||||
# Build LAPACK Deprecated functions since LAPACK 3.6.0
|
||||
BUILD_LAPACK_DEPRECATED = 1
|
||||
|
||||
# Build RecursiveLAPACK on top of LAPACK
|
||||
# BUILD_RELAPACK = 1
|
||||
|
||||
# If you want to use legacy threaded Level 3 implementation.
|
||||
# USE_SIMPLE_THREADED_LEVEL3 = 1
|
||||
|
||||
@@ -93,7 +100,7 @@ VERSION = 0.2.14
|
||||
NO_WARMUP = 1
|
||||
|
||||
# If you want to disable CPU/Memory affinity on Linux.
|
||||
NO_AFFINITY = 1
|
||||
#NO_AFFINITY = 1
|
||||
|
||||
# if you are compiling for Linux and you have more than 16 numa nodes or more than 256 cpus
|
||||
# BIGNUMA = 1
|
||||
@@ -108,6 +115,13 @@ NO_AFFINITY = 1
|
||||
# Don't use parallel make.
|
||||
# NO_PARALLEL_MAKE = 1
|
||||
|
||||
# Force number of make jobs. The default is the number of logical CPU of the host.
|
||||
# This is particularly useful when using distcc.
|
||||
# A negative value will disable adding a -j flag to make, allowing to use a parent
|
||||
# make -j value. This is useful to call OpenBLAS make from an other project
|
||||
# makefile
|
||||
# MAKE_NB_JOBS = 2
|
||||
|
||||
# If you would like to know minute performance report of GotoBLAS.
|
||||
# FUNCTION_PROFILE = 1
|
||||
|
||||
@@ -138,19 +152,17 @@ NO_AFFINITY = 1
|
||||
# slow (Not implemented yet).
|
||||
# SANITY_CHECK = 1
|
||||
|
||||
# Run testcases in utest/ . When you enable UTEST_CHECK, it would enable
|
||||
# SANITY_CHECK to compare the result with reference BLAS.
|
||||
# UTEST_CHECK = 1
|
||||
|
||||
# The installation directory.
|
||||
# PREFIX = /opt/OpenBLAS
|
||||
|
||||
# Common Optimization Flag;
|
||||
# The default -O2 is enough.
|
||||
# Flags for POWER8 are defined in Makefile.power. Don't modify COMMON_OPT
|
||||
# COMMON_OPT = -O2
|
||||
|
||||
# gfortran option for LAPACK
|
||||
# enable this flag only on 64bit Linux and if you need a thread safe lapack library
|
||||
# Flags for POWER8 are defined in Makefile.power. Don't modify FCOMMON_OPT
|
||||
# FCOMMON_OPT = -frecursive
|
||||
|
||||
# Profiling flags
|
||||
@@ -159,16 +171,20 @@ COMMON_PROF = -pg
|
||||
# Build Debug version
|
||||
# DEBUG = 1
|
||||
|
||||
# Improve GEMV and GER for small matrices by stack allocation.
|
||||
# For details, https://github.com/xianyi/OpenBLAS/pull/482
|
||||
# Set maximum stack allocation.
|
||||
# The default value is 2048. 0 disable stack allocation a may reduce GER and GEMV
|
||||
# performance. For details, https://github.com/xianyi/OpenBLAS/pull/482
|
||||
#
|
||||
# MAX_STACK_ALLOC=2048
|
||||
# MAX_STACK_ALLOC = 0
|
||||
|
||||
# Add a prefix or suffix to all exported symbol names in the shared library.
|
||||
# Avoid conflicts with other BLAS libraries, especially when using
|
||||
# 64 bit integer interfaces in OpenBLAS.
|
||||
# For details, https://github.com/xianyi/OpenBLAS/pull/459
|
||||
#
|
||||
# The same prefix and suffix are also added to the library name,
|
||||
# i.e. you get lib$(SYMBOLPREFIX)openblas$(SYMBOLSUFFIX) rather than libopenblas
|
||||
#
|
||||
# SYMBOLPREFIX=
|
||||
# SYMBOLSUFFIX=
|
||||
|
||||
|
||||
190
Makefile.system
190
Makefile.system
@@ -23,6 +23,7 @@ CC = gcc
|
||||
UNAME_S := $(shell uname -s)
|
||||
ifeq ($(UNAME_S),Darwin)
|
||||
CC = clang
|
||||
# EXTRALIB += -Wl,-no_compact_unwind
|
||||
endif
|
||||
endif
|
||||
|
||||
@@ -64,6 +65,12 @@ endif
|
||||
ifeq ($(TARGET), STEAMROLLER)
|
||||
GETARCH_FLAGS := -DFORCE_BARCELONA
|
||||
endif
|
||||
ifeq ($(TARGET), EXCAVATOR)
|
||||
GETARCH_FLAGS := -DFORCE_BARCELONA
|
||||
endif
|
||||
ifeq ($(TARGET), ZEN)
|
||||
GETARCH_FLAGS := -DFORCE_BARCELONA
|
||||
endif
|
||||
endif
|
||||
|
||||
|
||||
@@ -91,6 +98,12 @@ endif
|
||||
ifeq ($(TARGET_CORE), STEAMROLLER)
|
||||
GETARCH_FLAGS := -DFORCE_BARCELONA
|
||||
endif
|
||||
ifeq ($(TARGET_CORE), EXCAVATOR)
|
||||
GETARCH_FLAGS := -DFORCE_BARCELONA
|
||||
endif
|
||||
ifeq ($(TARGET_CORE), ZEN)
|
||||
GETARCH_FLAGS := -DFORCE_BARCELONA
|
||||
endif
|
||||
endif
|
||||
|
||||
|
||||
@@ -132,6 +145,10 @@ NO_PARALLEL_MAKE=0
|
||||
endif
|
||||
GETARCH_FLAGS += -DNO_PARALLEL_MAKE=$(NO_PARALLEL_MAKE)
|
||||
|
||||
ifdef MAKE_NB_JOBS
|
||||
GETARCH_FLAGS += -DMAKE_NB_JOBS=$(MAKE_NB_JOBS)
|
||||
endif
|
||||
|
||||
ifeq ($(HOSTCC), loongcc)
|
||||
GETARCH_FLAGS += -static
|
||||
endif
|
||||
@@ -148,7 +165,7 @@ ifndef GOTOBLAS_MAKEFILE
|
||||
export GOTOBLAS_MAKEFILE = 1
|
||||
|
||||
# Generating Makefile.conf and config.h
|
||||
DUMMY := $(shell $(MAKE) -C $(TOPDIR) -f Makefile.prebuild CC="$(CC)" FC="$(FC)" HOSTCC="$(HOSTCC)" CFLAGS="$(GETARCH_FLAGS)" BINARY=$(BINARY) USE_OPENMP=$(USE_OPENMP) TARGET_CORE=$(TARGET_CORE) ONLY_CBLAS=$(ONLY_CBLAS) all)
|
||||
DUMMY := $(shell $(MAKE) -C $(TOPDIR) -f Makefile.prebuild CC="$(CC)" FC="$(FC)" HOSTCC="$(HOSTCC)" CFLAGS="$(GETARCH_FLAGS)" BINARY=$(BINARY) USE_OPENMP=$(USE_OPENMP) TARGET_CORE=$(TARGET_CORE) ONLY_CBLAS=$(ONLY_CBLAS) TARGET=$(TARGET) all)
|
||||
|
||||
ifndef TARGET_CORE
|
||||
include $(TOPDIR)/Makefile.conf
|
||||
@@ -195,12 +212,20 @@ DLLWRAP = $(CROSS_SUFFIX)dllwrap
|
||||
OBJCOPY = $(CROSS_SUFFIX)objcopy
|
||||
OBJCONV = $(CROSS_SUFFIX)objconv
|
||||
|
||||
|
||||
# For detect fortran failed, only build BLAS.
|
||||
ifeq ($(NOFORTRAN), 1)
|
||||
NO_LAPACK = 1
|
||||
endif
|
||||
|
||||
#
|
||||
# OS dependent settings
|
||||
#
|
||||
|
||||
ifeq ($(OSNAME), Darwin)
|
||||
export MACOSX_DEPLOYMENT_TARGET=10.2
|
||||
ifndef MACOSX_DEPLOYMENT_TARGET
|
||||
export MACOSX_DEPLOYMENT_TARGET=10.6
|
||||
endif
|
||||
MD5SUM = md5 -r
|
||||
endif
|
||||
|
||||
@@ -217,6 +242,10 @@ EXTRALIB += -lm
|
||||
NO_EXPRECISION = 1
|
||||
endif
|
||||
|
||||
ifeq ($(OSNAME), Android)
|
||||
EXTRALIB += -lm
|
||||
endif
|
||||
|
||||
ifeq ($(OSNAME), AIX)
|
||||
EXTRALIB += -lm
|
||||
endif
|
||||
@@ -279,12 +308,14 @@ endif
|
||||
ifneq ($(OSNAME), WINNT)
|
||||
ifneq ($(OSNAME), CYGWIN_NT)
|
||||
ifneq ($(OSNAME), Interix)
|
||||
ifneq ($(OSNAME), Android)
|
||||
ifdef SMP
|
||||
EXTRALIB += -lpthread
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
# ifeq logical or
|
||||
ifeq ($(OSNAME), $(filter $(OSNAME),WINNT CYGWIN_NT Interix))
|
||||
@@ -311,7 +342,8 @@ ifdef SANITY_CHECK
|
||||
CCOMMON_OPT += -DSANITY_CHECK -DREFNAME=$(*F)f$(BU)
|
||||
endif
|
||||
|
||||
ifdef MAX_STACK_ALLOC
|
||||
MAX_STACK_ALLOC ?= 2048
|
||||
ifneq ($(MAX_STACK_ALLOC), 0)
|
||||
CCOMMON_OPT += -DMAX_STACK_ALLOC=$(MAX_STACK_ALLOC)
|
||||
endif
|
||||
|
||||
@@ -323,6 +355,11 @@ ifeq ($(ARCH), x86)
|
||||
ifndef BINARY
|
||||
NO_BINARY_MODE = 1
|
||||
endif
|
||||
|
||||
ifeq ($(CORE), generic)
|
||||
NO_EXPRECISION = 1
|
||||
endif
|
||||
|
||||
ifndef NO_EXPRECISION
|
||||
ifeq ($(F_COMPILER), GFORTRAN)
|
||||
# ifeq logical or. GCC or LSB
|
||||
@@ -341,6 +378,11 @@ endif
|
||||
endif
|
||||
|
||||
ifeq ($(ARCH), x86_64)
|
||||
|
||||
ifeq ($(CORE), generic)
|
||||
NO_EXPRECISION = 1
|
||||
endif
|
||||
|
||||
ifndef NO_EXPRECISION
|
||||
ifeq ($(F_COMPILER), GFORTRAN)
|
||||
# ifeq logical or. GCC or LSB
|
||||
@@ -351,7 +393,7 @@ FCOMMON_OPT += -m128bit-long-double
|
||||
endif
|
||||
ifeq ($(C_COMPILER), CLANG)
|
||||
EXPRECISION = 1
|
||||
CCOMMON_OPT += -DEXPRECISION
|
||||
CCOMMON_OPT += -DEXPRECISION
|
||||
FCOMMON_OPT += -m128bit-long-double
|
||||
endif
|
||||
endif
|
||||
@@ -365,7 +407,7 @@ endif
|
||||
|
||||
ifeq ($(USE_OPENMP), 1)
|
||||
|
||||
#check
|
||||
#check
|
||||
ifeq ($(USE_THREAD), 0)
|
||||
$(error OpenBLAS: Cannot set both USE_OPENMP=1 and USE_THREAD=0. The USE_THREAD=0 is only for building single thread version.)
|
||||
endif
|
||||
@@ -376,7 +418,6 @@ CCOMMON_OPT += -fopenmp
|
||||
endif
|
||||
|
||||
ifeq ($(C_COMPILER), CLANG)
|
||||
$(error OpenBLAS: Clang didn't support OpenMP yet.)
|
||||
CCOMMON_OPT += -fopenmp
|
||||
endif
|
||||
|
||||
@@ -408,15 +449,16 @@ endif
|
||||
ifeq ($(ARCH), x86_64)
|
||||
DYNAMIC_CORE = PRESCOTT CORE2 PENRYN DUNNINGTON NEHALEM OPTERON OPTERON_SSE3 BARCELONA BOBCAT ATOM NANO
|
||||
ifneq ($(NO_AVX), 1)
|
||||
DYNAMIC_CORE += SANDYBRIDGE BULLDOZER PILEDRIVER STEAMROLLER
|
||||
DYNAMIC_CORE += SANDYBRIDGE BULLDOZER PILEDRIVER STEAMROLLER EXCAVATOR
|
||||
endif
|
||||
ifneq ($(NO_AVX2), 1)
|
||||
DYNAMIC_CORE += HASWELL
|
||||
DYNAMIC_CORE += HASWELL ZEN
|
||||
endif
|
||||
endif
|
||||
|
||||
# If DYNAMIC_CORE is not set, DYNAMIC_ARCH cannot do anything, so force it to empty
|
||||
ifndef DYNAMIC_CORE
|
||||
DYNAMIC_ARCH =
|
||||
override DYNAMIC_ARCH=
|
||||
endif
|
||||
endif
|
||||
|
||||
@@ -432,7 +474,7 @@ endif
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(ARCH), mips64)
|
||||
ifeq ($(ARCH), $(filter $(ARCH),mips64 mips))
|
||||
NO_BINARY_MODE = 1
|
||||
endif
|
||||
|
||||
@@ -444,6 +486,23 @@ endif
|
||||
ifeq ($(ARCH), arm)
|
||||
NO_BINARY_MODE = 1
|
||||
BINARY_DEFINED = 1
|
||||
|
||||
CCOMMON_OPT += -marm
|
||||
FCOMMON_OPT += -marm
|
||||
|
||||
# If softfp abi is mentioned on the command line, force it.
|
||||
ifeq ($(ARM_SOFTFP_ABI), 1)
|
||||
CCOMMON_OPT += -mfloat-abi=softfp
|
||||
FCOMMON_OPT += -mfloat-abi=softfp
|
||||
endif
|
||||
|
||||
ifeq ($(OSNAME), Android)
|
||||
ifeq ($(ARM_SOFTFP_ABI), 1)
|
||||
EXTRALIB += -lm
|
||||
else
|
||||
EXTRALIB += -Wl,-lm_hard
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(ARCH), arm64)
|
||||
@@ -472,13 +531,16 @@ endif
|
||||
|
||||
ifdef NO_BINARY_MODE
|
||||
|
||||
ifeq ($(ARCH), mips64)
|
||||
ifeq ($(ARCH), $(filter $(ARCH),mips64))
|
||||
ifdef BINARY64
|
||||
CCOMMON_OPT += -mabi=64
|
||||
else
|
||||
CCOMMON_OPT += -mabi=n32
|
||||
endif
|
||||
BINARY_DEFINED = 1
|
||||
else ifeq ($(ARCH), $(filter $(ARCH),mips))
|
||||
CCOMMON_OPT += -mabi=32
|
||||
BINARY_DEFINED = 1
|
||||
endif
|
||||
|
||||
ifeq ($(CORE), LOONGSON3A)
|
||||
@@ -491,6 +553,21 @@ CCOMMON_OPT += -march=mips64
|
||||
FCOMMON_OPT += -march=mips64
|
||||
endif
|
||||
|
||||
ifeq ($(CORE), P5600)
|
||||
CCOMMON_OPT += -mips32r5 -mnan=2008 -mtune=p5600 $(MSA_FLAGS)
|
||||
FCOMMON_OPT += -mips32r5 -mnan=2008 -mtune=p5600 $(MSA_FLAGS)
|
||||
endif
|
||||
|
||||
ifeq ($(CORE), I6400)
|
||||
CCOMMON_OPT += -mips64r6 -mnan=2008 -mtune=i6400 $(MSA_FLAGS)
|
||||
FCOMMON_OPT += -mips64r6 -mnan=2008 -mtune=i6400 $(MSA_FLAGS)
|
||||
endif
|
||||
|
||||
ifeq ($(CORE), P6600)
|
||||
CCOMMON_OPT += -mips64r6 -mnan=2008 -mtune=p6600 $(MSA_FLAGS)
|
||||
FCOMMON_OPT += -mips64r6 -mnan=2008 -mtune=p6600 $(MSA_FLAGS)
|
||||
endif
|
||||
|
||||
ifeq ($(OSNAME), AIX)
|
||||
BINARY_DEFINED = 1
|
||||
endif
|
||||
@@ -527,6 +604,23 @@ endif
|
||||
# Fortran Compiler dependent settings
|
||||
#
|
||||
|
||||
ifeq ($(F_COMPILER), FLANG)
|
||||
CCOMMON_OPT += -DF_INTERFACE_FLANG
|
||||
ifdef BINARY64
|
||||
ifdef INTERFACE64
|
||||
ifneq ($(INTERFACE64), 0)
|
||||
FCOMMON_OPT += -i8
|
||||
endif
|
||||
endif
|
||||
FCOMMON_OPT += -Wall
|
||||
else
|
||||
FCOMMON_OPT += -Wall
|
||||
endif
|
||||
ifeq ($(USE_OPENMP), 1)
|
||||
FCOMMON_OPT += -fopenmp
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(F_COMPILER), G77)
|
||||
CCOMMON_OPT += -DF_INTERFACE_G77
|
||||
FCOMMON_OPT += -Wall
|
||||
@@ -559,12 +653,14 @@ ifneq ($(NO_LAPACK), 1)
|
||||
EXTRALIB += -lgfortran
|
||||
endif
|
||||
ifdef NO_BINARY_MODE
|
||||
ifeq ($(ARCH), mips64)
|
||||
ifeq ($(ARCH), $(filter $(ARCH),mips64))
|
||||
ifdef BINARY64
|
||||
FCOMMON_OPT += -mabi=64
|
||||
else
|
||||
FCOMMON_OPT += -mabi=n32
|
||||
endif
|
||||
else ifeq ($(ARCH), $(filter $(ARCH),mips))
|
||||
FCOMMON_OPT += -mabi=32
|
||||
endif
|
||||
else
|
||||
ifdef BINARY64
|
||||
@@ -578,7 +674,7 @@ else
|
||||
FCOMMON_OPT += -m32
|
||||
endif
|
||||
endif
|
||||
ifdef USE_OPENMP
|
||||
ifeq ($(USE_OPENMP), 1)
|
||||
FCOMMON_OPT += -fopenmp
|
||||
endif
|
||||
endif
|
||||
@@ -590,14 +686,14 @@ ifneq ($(INTERFACE64), 0)
|
||||
FCOMMON_OPT += -i8
|
||||
endif
|
||||
endif
|
||||
ifdef USE_OPENMP
|
||||
ifeq ($(USE_OPENMP), 1)
|
||||
FCOMMON_OPT += -openmp
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(F_COMPILER), FUJITSU)
|
||||
CCOMMON_OPT += -DF_INTERFACE_FUJITSU
|
||||
ifdef USE_OPENMP
|
||||
ifeq ($(USE_OPENMP), 1)
|
||||
FCOMMON_OPT += -openmp
|
||||
endif
|
||||
endif
|
||||
@@ -615,7 +711,7 @@ endif
|
||||
else
|
||||
FCOMMON_OPT += -q32
|
||||
endif
|
||||
ifdef USE_OPENMP
|
||||
ifeq ($(USE_OPENMP), 1)
|
||||
FCOMMON_OPT += -openmp
|
||||
endif
|
||||
endif
|
||||
@@ -633,7 +729,7 @@ FCOMMON_OPT += -tp p7-64
|
||||
else
|
||||
FCOMMON_OPT += -tp p7
|
||||
endif
|
||||
ifdef USE_OPENMP
|
||||
ifeq ($(USE_OPENMP), 1)
|
||||
FCOMMON_OPT += -mp
|
||||
endif
|
||||
endif
|
||||
@@ -647,22 +743,8 @@ FCOMMON_OPT += -i8
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
ifneq ($(ARCH), mips64)
|
||||
ifndef BINARY64
|
||||
FCOMMON_OPT += -m32
|
||||
else
|
||||
FCOMMON_OPT += -m64
|
||||
endif
|
||||
else
|
||||
ifdef BINARY64
|
||||
FCOMMON_OPT += -mabi=64
|
||||
else
|
||||
FCOMMON_OPT += -mabi=n32
|
||||
endif
|
||||
endif
|
||||
|
||||
ifdef USE_OPENMP
|
||||
|
||||
ifeq ($(USE_OPENMP), 1)
|
||||
FCOMMON_OPT += -mp
|
||||
endif
|
||||
endif
|
||||
@@ -677,7 +759,7 @@ endif
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(ARCH), mips64)
|
||||
ifeq ($(ARCH), $(filter $(ARCH),mips64 mips))
|
||||
ifndef BINARY64
|
||||
FCOMMON_OPT += -n32
|
||||
else
|
||||
@@ -699,7 +781,7 @@ FCOMMON_OPT += -m64
|
||||
endif
|
||||
endif
|
||||
|
||||
ifdef USE_OPENMP
|
||||
ifeq ($(USE_OPENMP), 1)
|
||||
FEXTRALIB += -lstdc++
|
||||
FCOMMON_OPT += -mp
|
||||
endif
|
||||
@@ -707,7 +789,7 @@ endif
|
||||
|
||||
ifeq ($(C_COMPILER), OPEN64)
|
||||
|
||||
ifeq ($(ARCH), mips64)
|
||||
ifeq ($(ARCH), $(filter $(ARCH),mips64 mips))
|
||||
ifndef BINARY64
|
||||
CCOMMON_OPT += -n32
|
||||
else
|
||||
@@ -747,14 +829,14 @@ FCOMMON_OPT += -m32
|
||||
else
|
||||
FCOMMON_OPT += -m64
|
||||
endif
|
||||
ifdef USE_OPENMP
|
||||
ifeq ($(USE_OPENMP), 1)
|
||||
FCOMMON_OPT += -xopenmp=parallel
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(F_COMPILER), COMPAQ)
|
||||
CCOMMON_OPT += -DF_INTERFACE_COMPAQ
|
||||
ifdef USE_OPENMP
|
||||
ifeq ($(USE_OPENMP), 1)
|
||||
FCOMMON_OPT += -openmp
|
||||
endif
|
||||
endif
|
||||
@@ -857,12 +939,6 @@ ifdef USE_SIMPLE_THREADED_LEVEL3
|
||||
CCOMMON_OPT += -DUSE_SIMPLE_THREADED_LEVEL3
|
||||
endif
|
||||
|
||||
ifndef LIBNAMESUFFIX
|
||||
LIBPREFIX = libopenblas
|
||||
else
|
||||
LIBPREFIX = libopenblas_$(LIBNAMESUFFIX)
|
||||
endif
|
||||
|
||||
ifndef SYMBOLPREFIX
|
||||
SYMBOLPREFIX =
|
||||
endif
|
||||
@@ -871,6 +947,12 @@ ifndef SYMBOLSUFFIX
|
||||
SYMBOLSUFFIX =
|
||||
endif
|
||||
|
||||
ifndef LIBNAMESUFFIX
|
||||
LIBPREFIX = lib$(SYMBOLPREFIX)openblas$(SYMBOLSUFFIX)
|
||||
else
|
||||
LIBPREFIX = lib$(SYMBOLPREFIX)openblas$(SYMBOLSUFFIX)_$(LIBNAMESUFFIX)
|
||||
endif
|
||||
|
||||
KERNELDIR = $(TOPDIR)/kernel/$(ARCH)
|
||||
|
||||
include $(TOPDIR)/Makefile.$(ARCH)
|
||||
@@ -929,17 +1011,18 @@ ifeq ($(OSNAME), SunOS)
|
||||
TAR = gtar
|
||||
PATCH = gpatch
|
||||
GREP = ggrep
|
||||
AWK = nawk
|
||||
else
|
||||
TAR = tar
|
||||
PATCH = patch
|
||||
GREP = grep
|
||||
AWK = awk
|
||||
endif
|
||||
|
||||
ifndef MD5SUM
|
||||
MD5SUM = md5sum
|
||||
endif
|
||||
|
||||
AWK = awk
|
||||
|
||||
REVISION = -r$(VERSION)
|
||||
MAJOR_VERSION = $(word 1,$(subst ., ,$(VERSION)))
|
||||
@@ -948,16 +1031,25 @@ ifeq ($(DEBUG), 1)
|
||||
COMMON_OPT += -g
|
||||
endif
|
||||
|
||||
ifeq ($(DEBUG), 1)
|
||||
FCOMMON_OPT += -g
|
||||
endif
|
||||
|
||||
ifndef COMMON_OPT
|
||||
COMMON_OPT = -O2
|
||||
endif
|
||||
|
||||
ifndef FCOMMON_OPT
|
||||
FCOMMON_OPT = -O2 -frecursive
|
||||
endif
|
||||
|
||||
|
||||
|
||||
override CFLAGS += $(COMMON_OPT) $(CCOMMON_OPT) -I$(TOPDIR)
|
||||
override PFLAGS += $(COMMON_OPT) $(CCOMMON_OPT) -I$(TOPDIR) -DPROFILE $(COMMON_PROF)
|
||||
|
||||
override FFLAGS += $(COMMON_OPT) $(FCOMMON_OPT)
|
||||
override FPFLAGS += $(COMMON_OPT) $(FCOMMON_OPT) $(COMMON_PROF)
|
||||
override FPFLAGS += $(FCOMMON_OPT) $(COMMON_PROF)
|
||||
#MAKEOVERRIDES =
|
||||
|
||||
#For LAPACK Fortran codes.
|
||||
@@ -1037,6 +1129,9 @@ LIB_COMPONENTS += LAPACK
|
||||
ifneq ($(NO_LAPACKE), 1)
|
||||
LIB_COMPONENTS += LAPACKE
|
||||
endif
|
||||
ifeq ($(BUILD_RELAPACK), 1)
|
||||
LIB_COMPONENTS += ReLAPACK
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(ONLY_CBLAS), 1)
|
||||
@@ -1086,6 +1181,8 @@ export HAVE_VFP
|
||||
export HAVE_VFPV3
|
||||
export HAVE_VFPV4
|
||||
export HAVE_NEON
|
||||
export HAVE_MSA
|
||||
export MSA_FLAGS
|
||||
export KERNELDIR
|
||||
export FUNCTION_PROFILE
|
||||
export TARGET_CORE
|
||||
@@ -1147,4 +1244,3 @@ SUNPATH = /opt/sunstudio12.1
|
||||
else
|
||||
SUNPATH = /opt/SUNWspro
|
||||
endif
|
||||
|
||||
|
||||
6
Makefile.zarch
Normal file
6
Makefile.zarch
Normal file
@@ -0,0 +1,6 @@
|
||||
|
||||
ifeq ($(CORE), Z13)
|
||||
CCOMMON_OPT += -march=z13 -mzvector
|
||||
FCOMMON_OPT += -march=z13 -mzvector
|
||||
endif
|
||||
|
||||
42
README.md
42
README.md
@@ -1,7 +1,10 @@
|
||||
# OpenBLAS
|
||||
|
||||
[](https://travis-ci.org/xianyi/OpenBLAS)
|
||||
[](https://gitter.im/xianyi/OpenBLAS?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge)
|
||||
|
||||
Travis CI: [](https://travis-ci.org/xianyi/OpenBLAS)
|
||||
|
||||
AppVeyor: [](https://ci.appveyor.com/project/xianyi/openblas/branch/develop)
|
||||
## Introduction
|
||||
OpenBLAS is an optimized BLAS library based on GotoBLAS2 1.13 BSD version.
|
||||
|
||||
@@ -40,6 +43,35 @@ On X86 box, compile this library for loongson3a CPU with loongcc (based on Open6
|
||||
|
||||
make DEBUG=1
|
||||
|
||||
### Compile with MASS Support on Power CPU (Optional dependency)
|
||||
|
||||
[IBM MASS](http://www-01.ibm.com/software/awdtools/mass/linux/mass-linux.html) library consists of a set of mathematical functions for C, C++, and
|
||||
Fortran-language applications that are tuned for optimum performance on POWER architectures. OpenBLAS with MASS requires 64-bit, little-endian OS on POWER.
|
||||
The library can be installed as below -
|
||||
|
||||
* On Ubuntu:
|
||||
|
||||
wget -q http://public.dhe.ibm.com/software/server/POWER/Linux/xl-compiler/eval/ppc64le/ubuntu/public.gpg -O- | sudo apt-key add -</br>
|
||||
echo "deb http://public.dhe.ibm.com/software/server/POWER/Linux/xl-compiler/eval/ppc64le/ubuntu/ trusty main" | sudo tee /etc/apt/sources.list.d/ibm-xl-compiler-eval.list</br>
|
||||
sudo apt-get update</br>
|
||||
sudo apt-get install libxlmass-devel.8.1.5</br>
|
||||
|
||||
* On RHEL/CentOS:
|
||||
|
||||
wget http://public.dhe.ibm.com/software/server/POWER/Linux/xl-compiler/eval/ppc64le/rhel7/repodata/repomd.xml.key</br>
|
||||
sudo rpm --import repomd.xml.key</br>
|
||||
wget http://public.dhe.ibm.com/software/server/POWER/Linux/xl-compiler/eval/ppc64le/rhel7/ibm-xl-compiler-eval.repo</br>
|
||||
sudo cp ibm-xl-compiler-eval.repo /etc/yum.repos.d/</br>
|
||||
sudo yum install libxlmass-devel.8.1.5</br>
|
||||
|
||||
After installing MASS library, compile openblas with USE_MASS=1.
|
||||
|
||||
Example:
|
||||
|
||||
Compiling on Power8 with MASS support -
|
||||
|
||||
make USE_MASS=1 TARGET=POWER8
|
||||
|
||||
### Install to the directory (optional)
|
||||
|
||||
Example:
|
||||
@@ -72,12 +104,18 @@ Please read GotoBLAS_01Readme.txt
|
||||
|
||||
#### ARM64:
|
||||
- **ARMV8**: Experimental
|
||||
- **ARM Cortex-A57**: Experimental
|
||||
|
||||
#### IBM zEnterprise System:
|
||||
- **Z13**: Optimized Level-3 BLAS
|
||||
|
||||
|
||||
### Support OS:
|
||||
- **GNU/Linux**
|
||||
- **MingWin/Windows**: Please read <https://github.com/xianyi/OpenBLAS/wiki/How-to-use-OpenBLAS-in-Microsoft-Visual-Studio>.
|
||||
- **MingWin or Visual Studio(CMake)/Windows**: Please read <https://github.com/xianyi/OpenBLAS/wiki/How-to-use-OpenBLAS-in-Microsoft-Visual-Studio>.
|
||||
- **Darwin/Mac OS X**: Experimental. Although GotoBLAS2 supports Darwin, we are the beginner on Mac OS X.
|
||||
- **FreeBSD**: Supported by community. We didn't test the library on this OS.
|
||||
- **Android**: Supported by community. Please read <https://github.com/xianyi/OpenBLAS/wiki/How-to-build-OpenBLAS-for-Android>.
|
||||
|
||||
## Usages
|
||||
Link with libopenblas.a or -lopenblas for shared library.
|
||||
|
||||
@@ -33,6 +33,8 @@ BOBCAT
|
||||
BULLDOZER
|
||||
PILEDRIVER
|
||||
STEAMROLLER
|
||||
EXCAVATOR
|
||||
ZEN
|
||||
|
||||
c)VIA CPU:
|
||||
SSE_GENERIC
|
||||
@@ -43,6 +45,8 @@ NANO
|
||||
POWER4
|
||||
POWER5
|
||||
POWER6
|
||||
POWER7
|
||||
POWER8
|
||||
PPCG4
|
||||
PPC970
|
||||
PPC970MP
|
||||
@@ -50,24 +54,34 @@ PPC440
|
||||
PPC440FP2
|
||||
CELL
|
||||
|
||||
3.MIPS64 CPU:
|
||||
3.MIPS CPU:
|
||||
P5600
|
||||
|
||||
4.MIPS64 CPU:
|
||||
SICORTEX
|
||||
LOONGSON3A
|
||||
LOONGSON3B
|
||||
I6400
|
||||
P6600
|
||||
|
||||
4.IA64 CPU:
|
||||
5.IA64 CPU:
|
||||
ITANIUM2
|
||||
|
||||
5.SPARC CPU:
|
||||
6.SPARC CPU:
|
||||
SPARC
|
||||
SPARCV7
|
||||
|
||||
6.ARM CPU:
|
||||
7.ARM CPU:
|
||||
CORTEXA15
|
||||
CORTEXA9
|
||||
ARMV7
|
||||
ARMV6
|
||||
ARMV5
|
||||
|
||||
7.ARM 64-bit CPU:
|
||||
8.ARM 64-bit CPU:
|
||||
ARMV8
|
||||
CORTEXA57
|
||||
VULCAN
|
||||
THUNDERX
|
||||
THUNDERX2T99
|
||||
|
||||
|
||||
199
USAGE.md
Normal file
199
USAGE.md
Normal file
@@ -0,0 +1,199 @@
|
||||
# Notes on OpenBLAS usage
|
||||
## Usage
|
||||
|
||||
#### Program is Terminated. Because you tried to allocate too many memory regions
|
||||
|
||||
In OpenBLAS, we mange a pool of memory buffers and allocate the number of
|
||||
buffers as the following.
|
||||
```
|
||||
#define NUM_BUFFERS (MAX_CPU_NUMBER * 2)
|
||||
```
|
||||
This error indicates that the program exceeded the number of buffers.
|
||||
|
||||
Please build OpenBLAS with larger `NUM_THREADS`. For example, `make
|
||||
NUM_THREADS=32` or `make NUM_THREADS=64`. In `Makefile.system`, we will set
|
||||
`MAX_CPU_NUMBER=NUM_THREADS`.
|
||||
|
||||
#### How can I use OpenBLAS in multi-threaded applications?
|
||||
|
||||
If your application is already multi-threaded, it will conflict with OpenBLAS
|
||||
multi-threading. Thus, you must set OpenBLAS to use single thread in any of the
|
||||
following ways:
|
||||
|
||||
* `export OPENBLAS_NUM_THREADS=1` in the environment variables.
|
||||
* Call `openblas_set_num_threads(1)` in the application on runtime.
|
||||
* Build OpenBLAS single thread version, e.g. `make USE_THREAD=0`
|
||||
|
||||
If the application is parallelized by OpenMP, please use OpenBLAS built with
|
||||
`USE_OPENMP=1`
|
||||
|
||||
#### How to choose TARGET manually at runtime when compiled with DYNAMIC_ARCH
|
||||
|
||||
The environment variable which control the kernel selection is
|
||||
`OPENBLAS_CORETYPE` (see `driver/others/dynamic.c`) e.g. `export
|
||||
OPENBLAS_CORETYPE=Haswell` and the function `char* openblas_get_corename()`
|
||||
returns the used target.
|
||||
|
||||
#### How could I disable OpenBLAS threading affinity on runtime?
|
||||
|
||||
You can define the `OPENBLAS_MAIN_FREE` or `GOTOBLAS_MAIN_FREE` environment
|
||||
variable to disable threading affinity on runtime. For example, before the
|
||||
running,
|
||||
```
|
||||
export OPENBLAS_MAIN_FREE=1
|
||||
```
|
||||
|
||||
Alternatively, you can disable affinity feature with enabling `NO_AFFINITY=1`
|
||||
in `Makefile.rule`.
|
||||
|
||||
## Linking with the library
|
||||
|
||||
* Link with shared library
|
||||
|
||||
`gcc -o test test.c -I /your_path/OpenBLAS/include/ -L/your_path/OpenBLAS/lib -lopenblas`
|
||||
|
||||
If the library is multithreaded, please add `-lpthread`. If the library
|
||||
contains LAPACK functions, please add `-lgfortran` or other Fortran libs.
|
||||
|
||||
* Link with static library
|
||||
|
||||
`gcc -o test test.c /your/path/libopenblas.a`
|
||||
|
||||
You can download `test.c` from https://gist.github.com/xianyi/5780018
|
||||
|
||||
On Linux, if OpenBLAS was compiled with threading support (`USE_THREAD=1` by
|
||||
default), custom programs statically linked against `libopenblas.a` should also
|
||||
link with the pthread library e.g.:
|
||||
|
||||
```
|
||||
gcc -static -I/opt/OpenBLAS/include -L/opt/OpenBLAS/lib -o my_program my_program.c -lopenblas -lpthread
|
||||
```
|
||||
|
||||
Failing to add the `-lpthread` flag will cause errors such as:
|
||||
|
||||
```
|
||||
/opt/OpenBLAS/libopenblas.a(memory.o): In function `_touch_memory':
|
||||
memory.c:(.text+0x15): undefined reference to `pthread_mutex_lock'
|
||||
memory.c:(.text+0x41): undefined reference to `pthread_mutex_unlock'
|
||||
...
|
||||
```
|
||||
|
||||
## Code examples
|
||||
|
||||
#### Call CBLAS interface
|
||||
This example shows calling cblas_dgemm in C. https://gist.github.com/xianyi/6930656
|
||||
```
|
||||
#include <cblas.h>
|
||||
#include <stdio.h>
|
||||
|
||||
void main()
|
||||
{
|
||||
int i=0;
|
||||
double A[6] = {1.0,2.0,1.0,-3.0,4.0,-1.0};
|
||||
double B[6] = {1.0,2.0,1.0,-3.0,4.0,-1.0};
|
||||
double C[9] = {.5,.5,.5,.5,.5,.5,.5,.5,.5};
|
||||
cblas_dgemm(CblasColMajor, CblasNoTrans, CblasTrans,3,3,2,1,A, 3, B, 3,2,C,3);
|
||||
|
||||
for(i=0; i<9; i++)
|
||||
printf("%lf ", C[i]);
|
||||
printf("\n");
|
||||
}
|
||||
```
|
||||
`gcc -o test_cblas_open test_cblas_dgemm.c -I /your_path/OpenBLAS/include/ -L/your_path/OpenBLAS/lib -lopenblas -lpthread -lgfortran`
|
||||
|
||||
#### Call BLAS Fortran interface
|
||||
|
||||
This example shows calling dgemm Fortran interface in C. https://gist.github.com/xianyi/5780018
|
||||
|
||||
```
|
||||
#include "stdio.h"
|
||||
#include "stdlib.h"
|
||||
#include "sys/time.h"
|
||||
#include "time.h"
|
||||
|
||||
extern void dgemm_(char*, char*, int*, int*,int*, double*, double*, int*, double*, int*, double*, double*, int*);
|
||||
|
||||
int main(int argc, char* argv[])
|
||||
{
|
||||
int i;
|
||||
printf("test!\n");
|
||||
if(argc<4){
|
||||
printf("Input Error\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
int m = atoi(argv[1]);
|
||||
int n = atoi(argv[2]);
|
||||
int k = atoi(argv[3]);
|
||||
int sizeofa = m * k;
|
||||
int sizeofb = k * n;
|
||||
int sizeofc = m * n;
|
||||
char ta = 'N';
|
||||
char tb = 'N';
|
||||
double alpha = 1.2;
|
||||
double beta = 0.001;
|
||||
|
||||
struct timeval start,finish;
|
||||
double duration;
|
||||
|
||||
double* A = (double*)malloc(sizeof(double) * sizeofa);
|
||||
double* B = (double*)malloc(sizeof(double) * sizeofb);
|
||||
double* C = (double*)malloc(sizeof(double) * sizeofc);
|
||||
|
||||
srand((unsigned)time(NULL));
|
||||
|
||||
for (i=0; i<sizeofa; i++)
|
||||
A[i] = i%3+1;//(rand()%100)/10.0;
|
||||
|
||||
for (i=0; i<sizeofb; i++)
|
||||
B[i] = i%3+1;//(rand()%100)/10.0;
|
||||
|
||||
for (i=0; i<sizeofc; i++)
|
||||
C[i] = i%3+1;//(rand()%100)/10.0;
|
||||
//#if 0
|
||||
printf("m=%d,n=%d,k=%d,alpha=%lf,beta=%lf,sizeofc=%d\n",m,n,k,alpha,beta,sizeofc);
|
||||
gettimeofday(&start, NULL);
|
||||
dgemm_(&ta, &tb, &m, &n, &k, &alpha, A, &m, B, &k, &beta, C, &m);
|
||||
gettimeofday(&finish, NULL);
|
||||
|
||||
duration = ((double)(finish.tv_sec-start.tv_sec)*1000000 + (double)(finish.tv_usec-start.tv_usec)) / 1000000;
|
||||
double gflops = 2.0 * m *n*k;
|
||||
gflops = gflops/duration*1.0e-6;
|
||||
|
||||
FILE *fp;
|
||||
fp = fopen("timeDGEMM.txt", "a");
|
||||
fprintf(fp, "%dx%dx%d\t%lf s\t%lf MFLOPS\n", m, n, k, duration, gflops);
|
||||
fclose(fp);
|
||||
|
||||
free(A);
|
||||
free(B);
|
||||
free(C);
|
||||
return 0;
|
||||
}
|
||||
```
|
||||
|
||||
` gcc -o time_dgemm time_dgemm.c /your/path/libopenblas.a`
|
||||
|
||||
` ./time_dgemm <m> <n> <k> `
|
||||
|
||||
## Troubleshooting
|
||||
* Please read [Faq](https://github.com/xianyi/OpenBLAS/wiki/Faq) at first.
|
||||
* Please use gcc version 4.6 and above to compile Sandy Bridge AVX kernels on Linux/MingW/BSD.
|
||||
* Please use Clang version 3.1 and above to compile the library on Sandy Bridge microarchitecture. The Clang 3.0 will generate the wrong AVX binary code.
|
||||
* The number of CPUs/Cores should less than or equal to 256. On Linux x86_64(amd64), there is experimental support for up to 1024 CPUs/Cores and 128 numa nodes if you build the library with BIGNUMA=1.
|
||||
* OpenBLAS does not set processor affinity by default. On Linux, you can enable processor affinity by commenting the line NO_AFFINITY=1 in Makefile.rule. But this may cause [the conflict with R parallel](https://stat.ethz.ch/pipermail/r-sig-hpc/2012-April/001348.html).
|
||||
* On Loongson 3A. make test would be failed because of pthread_create error. The error code is EAGAIN. However, it will be OK when you run the same testcase on shell.
|
||||
|
||||
## BLAS reference manual
|
||||
If you want to understand every BLAS function and definition, please read
|
||||
[Intel MKL reference manual](https://software.intel.com/sites/products/documentation/doclib/iss/2013/mkl/mklman/GUID-F7ED9FB8-6663-4F44-A62B-61B63C4F0491.htm)
|
||||
or [netlib.org](http://netlib.org/blas/)
|
||||
|
||||
Here are [OpenBLAS extension functions](https://github.com/xianyi/OpenBLAS/wiki/OpenBLAS-Extensions)
|
||||
|
||||
## How to reference OpenBLAS.
|
||||
|
||||
You can reference our [papers](https://github.com/xianyi/OpenBLAS/wiki/publications).
|
||||
|
||||
Alternatively, you can cite the OpenBLAS homepage http://www.openblas.net directly.
|
||||
|
||||
44
appveyor.yml
Normal file
44
appveyor.yml
Normal file
@@ -0,0 +1,44 @@
|
||||
version: 0.2.19.{build}
|
||||
|
||||
#environment:
|
||||
|
||||
platform:
|
||||
- x64
|
||||
|
||||
configuration: Release
|
||||
|
||||
clone_folder: c:\projects\OpenBLAS
|
||||
|
||||
init:
|
||||
- git config --global core.autocrlf input
|
||||
|
||||
build:
|
||||
project: OpenBLAS.sln
|
||||
|
||||
clone_depth: 5
|
||||
|
||||
#branches to build
|
||||
branches:
|
||||
only:
|
||||
- master
|
||||
- develop
|
||||
- cmake
|
||||
|
||||
skip_tags: true
|
||||
|
||||
matrix:
|
||||
fast_finish: true
|
||||
|
||||
skip_commits:
|
||||
# Add [av skip] to commit messages
|
||||
message: /\[av skip\]/
|
||||
|
||||
before_build:
|
||||
- echo Running cmake...
|
||||
- cd c:\projects\OpenBLAS
|
||||
- cmake -G "Visual Studio 12 Win64" .
|
||||
|
||||
test_script:
|
||||
- echo Running Test
|
||||
- cd c:\projects\OpenBLAS\utest
|
||||
- openblas_utest
|
||||
9
benchmark/Make_exe.sh
Executable file
9
benchmark/Make_exe.sh
Executable file
@@ -0,0 +1,9 @@
|
||||
#!/bin/bash
|
||||
|
||||
for f in *.goto *.acml *.mkl *.atlas
|
||||
do
|
||||
if [ -f "$f" ]; then
|
||||
mv $f `echo $f|tr '.' '_'`.exe
|
||||
fi
|
||||
done
|
||||
|
||||
1219
benchmark/Makefile
1219
benchmark/Makefile
File diff suppressed because it is too large
Load Diff
196
benchmark/asum.c
Normal file
196
benchmark/asum.c
Normal file
@@ -0,0 +1,196 @@
|
||||
/***************************************************************************
|
||||
Copyright (c) 2014, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#ifdef __CYGWIN32__
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
#include "common.h"
|
||||
|
||||
|
||||
#undef ASUM
|
||||
|
||||
#ifdef COMPLEX
|
||||
#ifdef DOUBLE
|
||||
#define ASUM BLASFUNC(dzasum)
|
||||
#else
|
||||
#define ASUM BLASFUNC(scasum)
|
||||
#endif
|
||||
#else
|
||||
#ifdef DOUBLE
|
||||
#define ASUM BLASFUNC(dasum)
|
||||
#else
|
||||
#define ASUM BLASFUNC(sasum)
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(__WIN32__) || defined(__WIN64__)
|
||||
|
||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
||||
#endif
|
||||
|
||||
int gettimeofday(struct timeval *tv, void *tz){
|
||||
|
||||
FILETIME ft;
|
||||
unsigned __int64 tmpres = 0;
|
||||
static int tzflag;
|
||||
|
||||
if (NULL != tv)
|
||||
{
|
||||
GetSystemTimeAsFileTime(&ft);
|
||||
|
||||
tmpres |= ft.dwHighDateTime;
|
||||
tmpres <<= 32;
|
||||
tmpres |= ft.dwLowDateTime;
|
||||
|
||||
/*converting file time to unix epoch*/
|
||||
tmpres /= 10; /*convert into microseconds*/
|
||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
||||
|
||||
static void *huge_malloc(BLASLONG size){
|
||||
int shmid;
|
||||
void *address;
|
||||
|
||||
#ifndef SHM_HUGETLB
|
||||
#define SHM_HUGETLB 04000
|
||||
#endif
|
||||
|
||||
if ((shmid =shmget(IPC_PRIVATE,
|
||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
||||
printf( "Memory allocation failed(shmget).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
address = shmat(shmid, NULL, SHM_RND);
|
||||
|
||||
if ((BLASLONG)address == -1){
|
||||
printf( "Memory allocation failed(shmat).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
shmctl(shmid, IPC_RMID, 0);
|
||||
|
||||
return address;
|
||||
}
|
||||
|
||||
#define malloc huge_malloc
|
||||
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *x;
|
||||
FLOAT result;
|
||||
blasint m, i;
|
||||
blasint inc_x=1;
|
||||
int loops = 1;
|
||||
int l;
|
||||
char *p;
|
||||
|
||||
int from = 1;
|
||||
int to = 200;
|
||||
int step = 1;
|
||||
|
||||
struct timeval start, stop;
|
||||
double time1,timeg;
|
||||
|
||||
argc--;argv++;
|
||||
|
||||
if (argc > 0) { from = atol(*argv); argc--; argv++;}
|
||||
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
|
||||
if (argc > 0) { step = atol(*argv); argc--; argv++;}
|
||||
|
||||
if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p);
|
||||
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p);
|
||||
|
||||
fprintf(stderr, "From : %3d To : %3d Step = %3d Inc_x = %d Loops = %d\n", from, to, step,inc_x,loops);
|
||||
|
||||
if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
|
||||
#ifdef linux
|
||||
srandom(getpid());
|
||||
#endif
|
||||
|
||||
fprintf(stderr, " SIZE Flops\n");
|
||||
|
||||
for(m = from; m <= to; m += step)
|
||||
{
|
||||
|
||||
timeg=0;
|
||||
|
||||
fprintf(stderr, " %6d : ", (int)m);
|
||||
|
||||
|
||||
for (l=0; l<loops; l++)
|
||||
{
|
||||
|
||||
for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){
|
||||
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
result = ASUM (&m, x, &inc_x);
|
||||
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
|
||||
timeg += time1;
|
||||
|
||||
}
|
||||
|
||||
timeg /= loops;
|
||||
|
||||
#ifdef COMPLEX
|
||||
fprintf(stderr, " %10.2f MFlops %10.6f sec\n", 4. * (double)m / timeg * 1.e-6, timeg);
|
||||
#else
|
||||
fprintf(stderr, " %10.2f MFlops %10.6f sec\n", 2. * (double)m / timeg * 1.e-6, timeg);
|
||||
#endif
|
||||
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
@@ -190,8 +190,8 @@ int main(int argc, char *argv[]){
|
||||
timeg /= loops;
|
||||
|
||||
fprintf(stderr,
|
||||
" %10.2f MFlops\n",
|
||||
COMPSIZE * COMPSIZE * 2. * (double)m / timeg * 1.e-6);
|
||||
" %10.2f MFlops %10.6f sec\n",
|
||||
COMPSIZE * COMPSIZE * 2. * (double)m / timeg * 1.e-6, timeg);
|
||||
|
||||
}
|
||||
|
||||
|
||||
@@ -71,8 +71,14 @@ double fabs(double);
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
#if defined(__WIN32__) || defined(__WIN64__)
|
||||
|
||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
||||
#endif
|
||||
|
||||
int gettimeofday(struct timeval *tv, void *tz){
|
||||
|
||||
FILETIME ft;
|
||||
@@ -99,6 +105,7 @@ int gettimeofday(struct timeval *tv, void *tz){
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
static __inline double getmflops(int ratio, int m, double secs){
|
||||
|
||||
double mm = (double)m;
|
||||
|
||||
201
benchmark/copy.c
Normal file
201
benchmark/copy.c
Normal file
@@ -0,0 +1,201 @@
|
||||
/***************************************************************************
|
||||
Copyright (c) 2014, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#ifdef __CYGWIN32__
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
#include "common.h"
|
||||
|
||||
|
||||
#undef COPY
|
||||
|
||||
#ifdef COMPLEX
|
||||
#ifdef DOUBLE
|
||||
#define COPY BLASFUNC(zcopy)
|
||||
#else
|
||||
#define COPY BLASFUNC(ccopy)
|
||||
#endif
|
||||
#else
|
||||
#ifdef DOUBLE
|
||||
#define COPY BLASFUNC(dcopy)
|
||||
#else
|
||||
#define COPY BLASFUNC(scopy)
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(__WIN32__) || defined(__WIN64__)
|
||||
|
||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
||||
#endif
|
||||
|
||||
int gettimeofday(struct timeval *tv, void *tz){
|
||||
|
||||
FILETIME ft;
|
||||
unsigned __int64 tmpres = 0;
|
||||
static int tzflag;
|
||||
|
||||
if (NULL != tv)
|
||||
{
|
||||
GetSystemTimeAsFileTime(&ft);
|
||||
|
||||
tmpres |= ft.dwHighDateTime;
|
||||
tmpres <<= 32;
|
||||
tmpres |= ft.dwLowDateTime;
|
||||
|
||||
/*converting file time to unix epoch*/
|
||||
tmpres /= 10; /*convert into microseconds*/
|
||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
||||
|
||||
static void *huge_malloc(BLASLONG size){
|
||||
int shmid;
|
||||
void *address;
|
||||
|
||||
#ifndef SHM_HUGETLB
|
||||
#define SHM_HUGETLB 04000
|
||||
#endif
|
||||
|
||||
if ((shmid =shmget(IPC_PRIVATE,
|
||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
||||
printf( "Memory allocation failed(shmget).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
address = shmat(shmid, NULL, SHM_RND);
|
||||
|
||||
if ((BLASLONG)address == -1){
|
||||
printf( "Memory allocation failed(shmat).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
shmctl(shmid, IPC_RMID, 0);
|
||||
|
||||
return address;
|
||||
}
|
||||
|
||||
#define malloc huge_malloc
|
||||
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *x, *y;
|
||||
FLOAT alpha[2] = { 2.0, 2.0 };
|
||||
blasint m, i;
|
||||
blasint inc_x=1,inc_y=1;
|
||||
int loops = 1;
|
||||
int l;
|
||||
char *p;
|
||||
|
||||
int from = 1;
|
||||
int to = 200;
|
||||
int step = 1;
|
||||
|
||||
struct timeval start, stop;
|
||||
double time1,timeg;
|
||||
|
||||
argc--;argv++;
|
||||
|
||||
if (argc > 0) { from = atol(*argv); argc--; argv++;}
|
||||
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
|
||||
if (argc > 0) { step = atol(*argv); argc--; argv++;}
|
||||
|
||||
if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p);
|
||||
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p);
|
||||
if ((p = getenv("OPENBLAS_INCY"))) inc_y = atoi(p);
|
||||
|
||||
fprintf(stderr, "From : %3d To : %3d Step = %3d Inc_x = %d Inc_y = %d Loops = %d\n", from, to, step,inc_x,inc_y,loops);
|
||||
|
||||
if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
if (( y = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_y) * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
#ifdef linux
|
||||
srandom(getpid());
|
||||
#endif
|
||||
|
||||
fprintf(stderr, " SIZE Flops\n");
|
||||
|
||||
for(m = from; m <= to; m += step)
|
||||
{
|
||||
|
||||
timeg=0;
|
||||
|
||||
fprintf(stderr, " %6d : ", (int)m);
|
||||
|
||||
|
||||
for (l=0; l<loops; l++)
|
||||
{
|
||||
|
||||
for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){
|
||||
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
|
||||
for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){
|
||||
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
COPY (&m, x, &inc_x, y, &inc_y );
|
||||
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
|
||||
timeg += time1;
|
||||
|
||||
}
|
||||
|
||||
timeg /= loops;
|
||||
|
||||
fprintf(stderr,
|
||||
" %10.2f MBytes %10.6f sec\n",
|
||||
COMPSIZE * sizeof(FLOAT) * 1. * (double)m / timeg * 1.e-6, timeg);
|
||||
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
@@ -184,8 +184,8 @@ int main(int argc, char *argv[]){
|
||||
timeg /= loops;
|
||||
|
||||
fprintf(stderr,
|
||||
" %10.2f MFlops\n",
|
||||
COMPSIZE * COMPSIZE * 2. * (double)m / timeg * 1.e-6);
|
||||
" %10.2f MFlops %10.6f sec\n",
|
||||
COMPSIZE * COMPSIZE * 2. * (double)m / timeg * 1.e-6, timeg);
|
||||
|
||||
}
|
||||
|
||||
|
||||
@@ -144,6 +144,7 @@ int main(int argc, char *argv[]){
|
||||
FLOAT *a,*vl,*vr,*wi,*wr,*work,*rwork;
|
||||
FLOAT wkopt[4];
|
||||
char job='V';
|
||||
char jobr='N';
|
||||
char *p;
|
||||
|
||||
blasint m, i, j, info,lwork;
|
||||
@@ -202,9 +203,9 @@ int main(int argc, char *argv[]){
|
||||
lwork = -1;
|
||||
m=to;
|
||||
#ifndef COMPLEX
|
||||
GEEV (&job, &job, &m, a, &m, wr, wi, vl, &m, vr, &m, wkopt, &lwork, &info);
|
||||
GEEV (&job, &jobr, &m, a, &m, wr, wi, vl, &m, vr, &m, wkopt, &lwork, &info);
|
||||
#else
|
||||
GEEV (&job, &job, &m, a, &m, wr, vl, &m, vr, &m, wkopt, &lwork,rwork, &info);
|
||||
GEEV (&job, &jobr, &m, a, &m, wr, vl, &m, vr, &m, wkopt, &lwork,rwork, &info);
|
||||
#endif
|
||||
|
||||
lwork = (blasint)wkopt[0];
|
||||
@@ -226,16 +227,16 @@ int main(int argc, char *argv[]){
|
||||
|
||||
lwork = -1;
|
||||
#ifndef COMPLEX
|
||||
GEEV (&job, &job, &m, a, &m, wr, wi, vl, &m, vr, &m, wkopt, &lwork, &info);
|
||||
GEEV (&job, &jobr, &m, a, &m, wr, wi, vl, &m, vr, &m, wkopt, &lwork, &info);
|
||||
#else
|
||||
GEEV (&job, &job, &m, a, &m, wr, vl, &m, vr, &m, wkopt, &lwork,rwork, &info);
|
||||
GEEV (&job, &jobr, &m, a, &m, wr, vl, &m, vr, &m, wkopt, &lwork,rwork, &info);
|
||||
#endif
|
||||
|
||||
lwork = (blasint)wkopt[0];
|
||||
#ifndef COMPLEX
|
||||
GEEV (&job, &job, &m, a, &m, wr, wi, vl, &m, vr, &m, work, &lwork, &info);
|
||||
GEEV (&job, &jobr, &m, a, &m, wr, wi, vl, &m, vr, &m, work, &lwork, &info);
|
||||
#else
|
||||
GEEV (&job, &job, &m, a, &m, wr, vl, &m, vr, &m, work, &lwork,rwork, &info);
|
||||
GEEV (&job, &jobr, &m, a, &m, wr, vl, &m, vr, &m, work, &lwork,rwork, &info);
|
||||
#endif
|
||||
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
|
||||
@@ -122,7 +122,7 @@ int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *a, *b, *c;
|
||||
FLOAT alpha[] = {1.0, 1.0};
|
||||
FLOAT beta [] = {1.0, 1.0};
|
||||
FLOAT beta [] = {0.0, 0.0};
|
||||
char trans='N';
|
||||
blasint m, n, i, j;
|
||||
int loops = 1;
|
||||
@@ -168,12 +168,21 @@ int main(int argc, char *argv[]){
|
||||
has_param_n=1;
|
||||
}
|
||||
|
||||
|
||||
#ifdef linux
|
||||
srandom(getpid());
|
||||
#endif
|
||||
|
||||
for(j = 0; j < to; j++){
|
||||
for(i = 0; i < to * COMPSIZE; i++){
|
||||
a[i + j * to * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
b[i + j * to * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
c[i + j * to * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
}
|
||||
|
||||
fprintf(stderr, " SIZE Flops\n");
|
||||
|
||||
|
||||
fprintf(stderr, " SIZE Flops Time\n");
|
||||
|
||||
for(m = from; m <= to; m += step)
|
||||
{
|
||||
@@ -188,34 +197,23 @@ int main(int argc, char *argv[]){
|
||||
|
||||
|
||||
fprintf(stderr, " %6dx%d : ", (int)m, (int)n);
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
for (l=0; l<loops; l++)
|
||||
{
|
||||
|
||||
for(j = 0; j < m; j++){
|
||||
for(i = 0; i < m * COMPSIZE; i++){
|
||||
a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
b[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
c[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
}
|
||||
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
GEMM (&trans, &trans, &m, &n, &m, alpha, a, &m, b, &m, beta, c, &m );
|
||||
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
|
||||
timeg += time1;
|
||||
|
||||
}
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
|
||||
timeg /= loops;
|
||||
timeg = time1/loops;
|
||||
fprintf(stderr,
|
||||
" %10.2f MFlops\n",
|
||||
COMPSIZE * COMPSIZE * 2. * (double)m * (double)m * (double)n / timeg * 1.e-6);
|
||||
" %10.2f MFlops %10.6f sec\n",
|
||||
COMPSIZE * COMPSIZE * 2. * (double)m * (double)m * (double)n / timeg * 1.e-6, time1);
|
||||
|
||||
}
|
||||
|
||||
|
||||
@@ -221,7 +221,7 @@ int main(int argc, char *argv[]){
|
||||
|
||||
timeg /= loops;
|
||||
|
||||
fprintf(stderr, " %10.2f MFlops\n", COMPSIZE * COMPSIZE * 2. * (double)m * (double)n / timeg * 1.e-6);
|
||||
fprintf(stderr, " %10.2f MFlops %10.6f sec\n", COMPSIZE * COMPSIZE * 2. * (double)m * (double)n / timeg * 1.e-6, timeg);
|
||||
|
||||
}
|
||||
}
|
||||
@@ -258,7 +258,7 @@ int main(int argc, char *argv[]){
|
||||
|
||||
timeg /= loops;
|
||||
|
||||
fprintf(stderr, " %10.2f MFlops\n", COMPSIZE * COMPSIZE * 2. * (double)m * (double)n / timeg * 1.e-6);
|
||||
fprintf(stderr, " %10.2f MFlops %10.6f sec\n", COMPSIZE * COMPSIZE * 2. * (double)m * (double)n / timeg * 1.e-6, timeg);
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
@@ -35,12 +35,19 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
#undef GER
|
||||
|
||||
|
||||
#ifdef COMPLEX
|
||||
#ifdef DOUBLE
|
||||
#define GER BLASFUNC(zgeru)
|
||||
#else
|
||||
#define GER BLASFUNC(cgeru)
|
||||
#endif
|
||||
#else
|
||||
#ifdef DOUBLE
|
||||
#define GER BLASFUNC(dger)
|
||||
#else
|
||||
#define GER BLASFUNC(sger)
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
||||
#if defined(__WIN32__) || defined(__WIN64__)
|
||||
|
||||
218
benchmark/gesv.c
Normal file
218
benchmark/gesv.c
Normal file
@@ -0,0 +1,218 @@
|
||||
/*********************************************************************/
|
||||
/* Copyright 2009, 2010 The University of Texas at Austin. */
|
||||
/* All rights reserved. */
|
||||
/* */
|
||||
/* Redistribution and use in source and binary forms, with or */
|
||||
/* without modification, are permitted provided that the following */
|
||||
/* conditions are met: */
|
||||
/* */
|
||||
/* 1. Redistributions of source code must retain the above */
|
||||
/* copyright notice, this list of conditions and the following */
|
||||
/* disclaimer. */
|
||||
/* */
|
||||
/* 2. Redistributions in binary form must reproduce the above */
|
||||
/* copyright notice, this list of conditions and the following */
|
||||
/* disclaimer in the documentation and/or other materials */
|
||||
/* provided with the distribution. */
|
||||
/* */
|
||||
/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
|
||||
/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
|
||||
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
|
||||
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
|
||||
/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
|
||||
/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
|
||||
/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
|
||||
/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
|
||||
/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
|
||||
/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
|
||||
/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
|
||||
/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
|
||||
/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
|
||||
/* POSSIBILITY OF SUCH DAMAGE. */
|
||||
/* */
|
||||
/* The views and conclusions contained in the software and */
|
||||
/* documentation are those of the authors and should not be */
|
||||
/* interpreted as representing official policies, either expressed */
|
||||
/* or implied, of The University of Texas at Austin. */
|
||||
/*********************************************************************/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#ifdef __CYGWIN32__
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
#include "common.h"
|
||||
|
||||
double fabs(double);
|
||||
|
||||
#undef GESV
|
||||
#undef GETRS
|
||||
|
||||
#ifndef COMPLEX
|
||||
#ifdef XDOUBLE
|
||||
#define GESV BLASFUNC(qgesv)
|
||||
#elif defined(DOUBLE)
|
||||
#define GESV BLASFUNC(dgesv)
|
||||
#else
|
||||
#define GESV BLASFUNC(sgesv)
|
||||
#endif
|
||||
#else
|
||||
#ifdef XDOUBLE
|
||||
#define GESV BLASFUNC(xgesv)
|
||||
#elif defined(DOUBLE)
|
||||
#define GESV BLASFUNC(zgesv)
|
||||
#else
|
||||
#define GESV BLASFUNC(cgesv)
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(__WIN32__) || defined(__WIN64__)
|
||||
|
||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
||||
#endif
|
||||
|
||||
int gettimeofday(struct timeval *tv, void *tz){
|
||||
|
||||
FILETIME ft;
|
||||
unsigned __int64 tmpres = 0;
|
||||
static int tzflag;
|
||||
|
||||
if (NULL != tv)
|
||||
{
|
||||
GetSystemTimeAsFileTime(&ft);
|
||||
|
||||
tmpres |= ft.dwHighDateTime;
|
||||
tmpres <<= 32;
|
||||
tmpres |= ft.dwLowDateTime;
|
||||
|
||||
/*converting file time to unix epoch*/
|
||||
tmpres /= 10; /*convert into microseconds*/
|
||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
||||
|
||||
static void *huge_malloc(BLASLONG size){
|
||||
int shmid;
|
||||
void *address;
|
||||
|
||||
#ifndef SHM_HUGETLB
|
||||
#define SHM_HUGETLB 04000
|
||||
#endif
|
||||
|
||||
if ((shmid =shmget(IPC_PRIVATE,
|
||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
||||
printf( "Memory allocation failed(shmget).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
address = shmat(shmid, NULL, SHM_RND);
|
||||
|
||||
if ((BLASLONG)address == -1){
|
||||
printf( "Memory allocation failed(shmat).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
shmctl(shmid, IPC_RMID, 0);
|
||||
|
||||
return address;
|
||||
}
|
||||
|
||||
#define malloc huge_malloc
|
||||
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *a, *b;
|
||||
blasint *ipiv;
|
||||
|
||||
blasint m, i, j, info;
|
||||
|
||||
int from = 1;
|
||||
int to = 200;
|
||||
int step = 1;
|
||||
|
||||
struct timeval start, stop;
|
||||
double time1;
|
||||
|
||||
argc--;argv++;
|
||||
|
||||
if (argc > 0) { from = atol(*argv); argc--; argv++;}
|
||||
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
|
||||
if (argc > 0) { step = atol(*argv); argc--; argv++;}
|
||||
|
||||
fprintf(stderr, "From : %3d To : %3d Step = %3d\n", from, to, step);
|
||||
|
||||
if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
if (( b = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
if (( ipiv = (blasint *)malloc(sizeof(blasint) * to * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
#ifdef linux
|
||||
srandom(getpid());
|
||||
#endif
|
||||
|
||||
fprintf(stderr, " SIZE Flops Time\n");
|
||||
|
||||
for(m = from; m <= to; m += step){
|
||||
|
||||
fprintf(stderr, " %dx%d : ", (int)m, (int)m);
|
||||
|
||||
for(j = 0; j < m; j++){
|
||||
for(i = 0; i < m * COMPSIZE; i++){
|
||||
a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
}
|
||||
|
||||
for(j = 0; j < m; j++){
|
||||
for(i = 0; i < m * COMPSIZE; i++){
|
||||
b[i + j * m * COMPSIZE] = 0.0;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
for (j = 0; j < m; ++j) {
|
||||
for (i = 0; i < m * COMPSIZE; ++i) {
|
||||
b[i] += a[i + j * m * COMPSIZE];
|
||||
}
|
||||
}
|
||||
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
GESV (&m, &m, a, &m, ipiv, b, &m, &info);
|
||||
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
|
||||
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
|
||||
|
||||
|
||||
fprintf(stderr,
|
||||
"%10.2f MFlops %10.6f s\n",
|
||||
COMPSIZE * COMPSIZE * (2. / 3. * (double)m * (double)m * (double)m + 2. * (double)m * (double)m * (double)m ) / (time1) * 1.e-6 , time1);
|
||||
|
||||
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
192
benchmark/iamax.c
Normal file
192
benchmark/iamax.c
Normal file
@@ -0,0 +1,192 @@
|
||||
/***************************************************************************
|
||||
Copyright (c) 2016, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#ifdef __CYGWIN32__
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
#include "common.h"
|
||||
|
||||
|
||||
#undef IAMAX
|
||||
|
||||
#ifdef COMPLEX
|
||||
#ifdef DOUBLE
|
||||
#define IAMAX BLASFUNC(izamax)
|
||||
#else
|
||||
#define IAMAX BLASFUNC(icamax)
|
||||
#endif
|
||||
#else
|
||||
#ifdef DOUBLE
|
||||
#define IAMAX BLASFUNC(idamax)
|
||||
#else
|
||||
#define IAMAX BLASFUNC(isamax)
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(__WIN32__) || defined(__WIN64__)
|
||||
|
||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
||||
#endif
|
||||
|
||||
int gettimeofday(struct timeval *tv, void *tz){
|
||||
|
||||
FILETIME ft;
|
||||
unsigned __int64 tmpres = 0;
|
||||
static int tzflag;
|
||||
|
||||
if (NULL != tv)
|
||||
{
|
||||
GetSystemTimeAsFileTime(&ft);
|
||||
|
||||
tmpres |= ft.dwHighDateTime;
|
||||
tmpres <<= 32;
|
||||
tmpres |= ft.dwLowDateTime;
|
||||
|
||||
/*converting file time to unix epoch*/
|
||||
tmpres /= 10; /*convert into microseconds*/
|
||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
||||
|
||||
static void *huge_malloc(BLASLONG size){
|
||||
int shmid;
|
||||
void *address;
|
||||
|
||||
#ifndef SHM_HUGETLB
|
||||
#define SHM_HUGETLB 04000
|
||||
#endif
|
||||
|
||||
if ((shmid =shmget(IPC_PRIVATE,
|
||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
||||
printf( "Memory allocation failed(shmget).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
address = shmat(shmid, NULL, SHM_RND);
|
||||
|
||||
if ((BLASLONG)address == -1){
|
||||
printf( "Memory allocation failed(shmat).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
shmctl(shmid, IPC_RMID, 0);
|
||||
|
||||
return address;
|
||||
}
|
||||
|
||||
#define malloc huge_malloc
|
||||
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *x;
|
||||
blasint m, i;
|
||||
blasint inc_x=1;
|
||||
int loops = 1;
|
||||
int l;
|
||||
char *p;
|
||||
|
||||
int from = 1;
|
||||
int to = 200;
|
||||
int step = 1;
|
||||
|
||||
struct timeval start, stop;
|
||||
double time1,timeg;
|
||||
|
||||
argc--;argv++;
|
||||
|
||||
if (argc > 0) { from = atol(*argv); argc--; argv++;}
|
||||
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
|
||||
if (argc > 0) { step = atol(*argv); argc--; argv++;}
|
||||
|
||||
if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p);
|
||||
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p);
|
||||
|
||||
fprintf(stderr, "From : %3d To : %3d Step = %3d Inc_x = %d Loops = %d\n", from, to, step,inc_x,loops);
|
||||
|
||||
if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
#ifdef linux
|
||||
srandom(getpid());
|
||||
#endif
|
||||
|
||||
fprintf(stderr, " SIZE Flops\n");
|
||||
|
||||
for(m = from; m <= to; m += step)
|
||||
{
|
||||
|
||||
timeg=0;
|
||||
|
||||
fprintf(stderr, " %6d : ", (int)m);
|
||||
|
||||
|
||||
for (l=0; l<loops; l++)
|
||||
{
|
||||
|
||||
for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){
|
||||
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
IAMAX (&m, x, &inc_x);
|
||||
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
|
||||
timeg += time1;
|
||||
|
||||
}
|
||||
|
||||
timeg /= loops;
|
||||
|
||||
fprintf(stderr,
|
||||
" %10.2f MFlops %10.6f sec\n",
|
||||
COMPSIZE * sizeof(FLOAT) * 1. * (double)m / timeg * 1.e-6, timeg);
|
||||
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
193
benchmark/nrm2.c
Normal file
193
benchmark/nrm2.c
Normal file
@@ -0,0 +1,193 @@
|
||||
/***************************************************************************
|
||||
Copyright (c) 2016, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#ifdef __CYGWIN32__
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
#include "common.h"
|
||||
|
||||
|
||||
#undef NRM2
|
||||
|
||||
#ifdef COMPLEX
|
||||
#ifdef DOUBLE
|
||||
#define NRM2 BLASFUNC(dznrm2)
|
||||
#else
|
||||
#define NRM2 BLASFUNC(scnrm2)
|
||||
#endif
|
||||
#else
|
||||
#ifdef DOUBLE
|
||||
#define NRM2 BLASFUNC(dnrm2)
|
||||
#else
|
||||
#define NRM2 BLASFUNC(snrm2)
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(__WIN32__) || defined(__WIN64__)
|
||||
|
||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
||||
#endif
|
||||
|
||||
int gettimeofday(struct timeval *tv, void *tz){
|
||||
|
||||
FILETIME ft;
|
||||
unsigned __int64 tmpres = 0;
|
||||
static int tzflag;
|
||||
|
||||
if (NULL != tv)
|
||||
{
|
||||
GetSystemTimeAsFileTime(&ft);
|
||||
|
||||
tmpres |= ft.dwHighDateTime;
|
||||
tmpres <<= 32;
|
||||
tmpres |= ft.dwLowDateTime;
|
||||
|
||||
/*converting file time to unix epoch*/
|
||||
tmpres /= 10; /*convert into microseconds*/
|
||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
||||
|
||||
static void *huge_malloc(BLASLONG size){
|
||||
int shmid;
|
||||
void *address;
|
||||
|
||||
#ifndef SHM_HUGETLB
|
||||
#define SHM_HUGETLB 04000
|
||||
#endif
|
||||
|
||||
if ((shmid =shmget(IPC_PRIVATE,
|
||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
||||
printf( "Memory allocation failed(shmget).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
address = shmat(shmid, NULL, SHM_RND);
|
||||
|
||||
if ((BLASLONG)address == -1){
|
||||
printf( "Memory allocation failed(shmat).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
shmctl(shmid, IPC_RMID, 0);
|
||||
|
||||
return address;
|
||||
}
|
||||
|
||||
#define malloc huge_malloc
|
||||
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *x;
|
||||
blasint m, i;
|
||||
blasint inc_x=1;
|
||||
int loops = 1;
|
||||
int l;
|
||||
char *p;
|
||||
|
||||
int from = 1;
|
||||
int to = 200;
|
||||
int step = 1;
|
||||
|
||||
struct timeval start, stop;
|
||||
double time1,timeg;
|
||||
|
||||
argc--;argv++;
|
||||
|
||||
if (argc > 0) { from = atol(*argv); argc--; argv++;}
|
||||
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
|
||||
if (argc > 0) { step = atol(*argv); argc--; argv++;}
|
||||
|
||||
if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p);
|
||||
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p);
|
||||
|
||||
fprintf(stderr, "From : %3d To : %3d Step = %3d Inc_x = %d Loops = %d\n", from, to, step,inc_x,loops);
|
||||
|
||||
if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
#ifdef linux
|
||||
srandom(getpid());
|
||||
#endif
|
||||
|
||||
fprintf(stderr, " SIZE Flops\n");
|
||||
|
||||
for(m = from; m <= to; m += step)
|
||||
{
|
||||
|
||||
timeg=0;
|
||||
|
||||
fprintf(stderr, " %6d : ", (int)m);
|
||||
|
||||
|
||||
for (l=0; l<loops; l++)
|
||||
{
|
||||
|
||||
for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){
|
||||
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
NRM2 (&m, x, &inc_x);
|
||||
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
|
||||
timeg += time1;
|
||||
|
||||
}
|
||||
|
||||
timeg /= loops;
|
||||
|
||||
fprintf(stderr,
|
||||
" %10.2f MFlops %10.6f sec\n",
|
||||
COMPSIZE * COMPSIZE * 2. * (double)m / timeg * 1.e-6, timeg);
|
||||
|
||||
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
@@ -52,6 +52,11 @@ C)
|
||||
awk '/MFlops/ { print $3,int($9) }'|tail --lines=+2
|
||||
;;
|
||||
|
||||
B)
|
||||
# Copy Benchmark
|
||||
awk '/MBytes/ { print $1,int($3) }'|tail --lines=+2
|
||||
;;
|
||||
|
||||
|
||||
*)
|
||||
awk '/MFlops/ { print $1,int($3) }'|tail --lines=+2
|
||||
|
||||
@@ -88,6 +88,10 @@ double fabs(double);
|
||||
|
||||
#if defined(__WIN32__) || defined(__WIN64__)
|
||||
|
||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
||||
#endif
|
||||
|
||||
int gettimeofday(struct timeval *tv, void *tz){
|
||||
|
||||
FILETIME ft;
|
||||
|
||||
197
benchmark/rot.c
Normal file
197
benchmark/rot.c
Normal file
@@ -0,0 +1,197 @@
|
||||
/***************************************************************************
|
||||
Copyright (c) 2014, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#ifdef __CYGWIN32__
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
#include "common.h"
|
||||
|
||||
|
||||
#undef DOT
|
||||
|
||||
|
||||
#ifdef DOUBLE
|
||||
#define ROT BLASFUNC(drot)
|
||||
#else
|
||||
#define ROT BLASFUNC(srot)
|
||||
#endif
|
||||
|
||||
|
||||
#if defined(__WIN32__) || defined(__WIN64__)
|
||||
|
||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
||||
#endif
|
||||
|
||||
int gettimeofday(struct timeval *tv, void *tz){
|
||||
|
||||
FILETIME ft;
|
||||
unsigned __int64 tmpres = 0;
|
||||
static int tzflag;
|
||||
|
||||
if (NULL != tv)
|
||||
{
|
||||
GetSystemTimeAsFileTime(&ft);
|
||||
|
||||
tmpres |= ft.dwHighDateTime;
|
||||
tmpres <<= 32;
|
||||
tmpres |= ft.dwLowDateTime;
|
||||
|
||||
/*converting file time to unix epoch*/
|
||||
tmpres /= 10; /*convert into microseconds*/
|
||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
||||
|
||||
static void *huge_malloc(BLASLONG size){
|
||||
int shmid;
|
||||
void *address;
|
||||
|
||||
#ifndef SHM_HUGETLB
|
||||
#define SHM_HUGETLB 04000
|
||||
#endif
|
||||
|
||||
if ((shmid =shmget(IPC_PRIVATE,
|
||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
||||
printf( "Memory allocation failed(shmget).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
address = shmat(shmid, NULL, SHM_RND);
|
||||
|
||||
if ((BLASLONG)address == -1){
|
||||
printf( "Memory allocation failed(shmat).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
shmctl(shmid, IPC_RMID, 0);
|
||||
|
||||
return address;
|
||||
}
|
||||
|
||||
#define malloc huge_malloc
|
||||
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *x, *y;
|
||||
// FLOAT result;
|
||||
blasint m, i;
|
||||
blasint inc_x=1,inc_y=1;
|
||||
FLOAT c[1] = { 2.0 };
|
||||
FLOAT s[1] = { 2.0 };
|
||||
int loops = 1;
|
||||
int l;
|
||||
char *p;
|
||||
|
||||
int from = 1;
|
||||
int to = 200;
|
||||
int step = 1;
|
||||
|
||||
struct timeval start, stop;
|
||||
double time1,timeg;
|
||||
|
||||
argc--;argv++;
|
||||
|
||||
if (argc > 0) { from = atol(*argv); argc--; argv++;}
|
||||
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
|
||||
if (argc > 0) { step = atol(*argv); argc--; argv++;}
|
||||
|
||||
if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p);
|
||||
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p);
|
||||
if ((p = getenv("OPENBLAS_INCY"))) inc_y = atoi(p);
|
||||
|
||||
fprintf(stderr, "From : %3d To : %3d Step = %3d Inc_x = %d Inc_y = %d Loops = %d\n", from, to, step,inc_x,inc_y,loops);
|
||||
|
||||
if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
if (( y = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_y) * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
#ifdef linux
|
||||
srandom(getpid());
|
||||
#endif
|
||||
|
||||
fprintf(stderr, " SIZE Flops\n");
|
||||
|
||||
for(m = from; m <= to; m += step)
|
||||
{
|
||||
|
||||
timeg=0;
|
||||
|
||||
fprintf(stderr, " %6d : ", (int)m);
|
||||
|
||||
|
||||
for (l=0; l<loops; l++)
|
||||
{
|
||||
|
||||
for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){
|
||||
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
|
||||
for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){
|
||||
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
ROT (&m, x, &inc_x, y, &inc_y, c, s);
|
||||
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
|
||||
timeg += time1;
|
||||
|
||||
}
|
||||
|
||||
timeg /= loops;
|
||||
|
||||
fprintf(stderr,
|
||||
" %10.2f MFlops %10.6f sec\n",
|
||||
COMPSIZE * COMPSIZE * 6. * (double)m / timeg * 1.e-6, timeg);
|
||||
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
202
benchmark/scal.c
Normal file
202
benchmark/scal.c
Normal file
@@ -0,0 +1,202 @@
|
||||
/***************************************************************************
|
||||
Copyright (c) 2014, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#ifdef __CYGWIN32__
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
#include "common.h"
|
||||
|
||||
|
||||
#undef SCAL
|
||||
|
||||
#ifdef COMPLEX
|
||||
#ifdef DOUBLE
|
||||
#define SCAL BLASFUNC(zscal)
|
||||
#else
|
||||
#define SCAL BLASFUNC(cscal)
|
||||
#endif
|
||||
#else
|
||||
#ifdef DOUBLE
|
||||
#define SCAL BLASFUNC(dscal)
|
||||
#else
|
||||
#define SCAL BLASFUNC(sscal)
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(__WIN32__) || defined(__WIN64__)
|
||||
|
||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
||||
#endif
|
||||
|
||||
int gettimeofday(struct timeval *tv, void *tz){
|
||||
|
||||
FILETIME ft;
|
||||
unsigned __int64 tmpres = 0;
|
||||
static int tzflag;
|
||||
|
||||
if (NULL != tv)
|
||||
{
|
||||
GetSystemTimeAsFileTime(&ft);
|
||||
|
||||
tmpres |= ft.dwHighDateTime;
|
||||
tmpres <<= 32;
|
||||
tmpres |= ft.dwLowDateTime;
|
||||
|
||||
/*converting file time to unix epoch*/
|
||||
tmpres /= 10; /*convert into microseconds*/
|
||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
||||
|
||||
static void *huge_malloc(BLASLONG size){
|
||||
int shmid;
|
||||
void *address;
|
||||
|
||||
#ifndef SHM_HUGETLB
|
||||
#define SHM_HUGETLB 04000
|
||||
#endif
|
||||
|
||||
if ((shmid =shmget(IPC_PRIVATE,
|
||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
||||
printf( "Memory allocation failed(shmget).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
address = shmat(shmid, NULL, SHM_RND);
|
||||
|
||||
if ((BLASLONG)address == -1){
|
||||
printf( "Memory allocation failed(shmat).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
shmctl(shmid, IPC_RMID, 0);
|
||||
|
||||
return address;
|
||||
}
|
||||
|
||||
#define malloc huge_malloc
|
||||
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *x, *y;
|
||||
FLOAT alpha[2] = { 2.0, 2.0 };
|
||||
blasint m, i;
|
||||
blasint inc_x=1,inc_y=1;
|
||||
int loops = 1;
|
||||
int l;
|
||||
char *p;
|
||||
|
||||
int from = 1;
|
||||
int to = 200;
|
||||
int step = 1;
|
||||
|
||||
struct timeval start, stop;
|
||||
double time1,timeg;
|
||||
|
||||
argc--;argv++;
|
||||
|
||||
if (argc > 0) { from = atol(*argv); argc--; argv++;}
|
||||
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
|
||||
if (argc > 0) { step = atol(*argv); argc--; argv++;}
|
||||
|
||||
if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p);
|
||||
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p);
|
||||
|
||||
fprintf(stderr, "From : %3d To : %3d Step = %3d Inc_x = %d Inc_y = %d Loops = %d\n", from, to, step,inc_x,inc_y,loops);
|
||||
|
||||
if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
if (( y = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_y) * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
#ifdef linux
|
||||
srandom(getpid());
|
||||
#endif
|
||||
|
||||
fprintf(stderr, " SIZE Flops\n");
|
||||
|
||||
for(m = from; m <= to; m += step)
|
||||
{
|
||||
|
||||
timeg=0;
|
||||
|
||||
fprintf(stderr, " %6d : ", (int)m);
|
||||
|
||||
|
||||
for (l=0; l<loops; l++)
|
||||
{
|
||||
|
||||
for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){
|
||||
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
|
||||
for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){
|
||||
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
SCAL (&m, alpha, x, &inc_x);
|
||||
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
|
||||
timeg += time1;
|
||||
|
||||
}
|
||||
|
||||
timeg /= loops;
|
||||
|
||||
#ifdef COMPLEX
|
||||
fprintf(stderr, " %10.2f MFlops %10.6f sec\n", 6. * (double)m / timeg * 1.e-6, timeg);
|
||||
#else
|
||||
fprintf(stderr, " %10.2f MFlops %10.6f sec\n", 1. * (double)m / timeg * 1.e-6, timeg);
|
||||
#endif
|
||||
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
56
benchmark/scripts/NUMPY/cgemm.py
Executable file
56
benchmark/scripts/NUMPY/cgemm.py
Executable file
@@ -0,0 +1,56 @@
|
||||
#!/usr/bin/python
|
||||
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
import numpy
|
||||
from numpy.random import randn
|
||||
|
||||
def run_cgemm(N,l):
|
||||
|
||||
A = randn(N,N).astype('float32') + randn(N,N).astype('float32') * 1j;
|
||||
B = randn(N,N).astype('float32') + randn(N,N).astype('float32') * 1j;
|
||||
|
||||
start = time.time();
|
||||
for i in range(0,l):
|
||||
ref = numpy.dot(A,B)
|
||||
end = time.time()
|
||||
|
||||
timediff = (end -start)
|
||||
mflops = ( 8*N*N*N) *l / timediff
|
||||
mflops *= 1e-6
|
||||
|
||||
size = "%dx%d" % (N,N)
|
||||
print("%14s :\t%20f MFlops\t%20f sec" % (size,mflops,timediff))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
N=128
|
||||
NMAX=2048
|
||||
NINC=128
|
||||
LOOPS=1
|
||||
|
||||
z=0
|
||||
for arg in sys.argv:
|
||||
if z == 1:
|
||||
N = int(arg)
|
||||
elif z == 2:
|
||||
NMAX = int(arg)
|
||||
elif z == 3:
|
||||
NINC = int(arg)
|
||||
elif z == 4:
|
||||
LOOPS = int(arg)
|
||||
|
||||
z = z + 1
|
||||
|
||||
if 'OPENBLAS_LOOPS' in os.environ:
|
||||
p = os.environ['OPENBLAS_LOOPS']
|
||||
if p:
|
||||
LOOPS = int(p);
|
||||
|
||||
print("From: %d To: %d Step=%d Loops=%d" % (N, NMAX, NINC, LOOPS))
|
||||
print("\tSIZE\t\t\tFlops\t\t\t\t\tTime")
|
||||
|
||||
for i in range (N,NMAX+NINC,NINC):
|
||||
run_cgemm(i,LOOPS)
|
||||
|
||||
56
benchmark/scripts/NUMPY/cgemv.py
Executable file
56
benchmark/scripts/NUMPY/cgemv.py
Executable file
@@ -0,0 +1,56 @@
|
||||
#!/usr/bin/python
|
||||
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
import numpy
|
||||
from numpy.random import randn
|
||||
|
||||
def run_cgemv(N,l):
|
||||
|
||||
A = randn(N,N).astype('float32') + randn(N,N).astype('float32') * 1j;
|
||||
B = randn(N).astype('float32') + randn(N).astype('float32') * 1j;
|
||||
|
||||
start = time.time();
|
||||
for i in range(0,l):
|
||||
ref = numpy.dot(A,B)
|
||||
end = time.time()
|
||||
|
||||
timediff = (end -start)
|
||||
mflops = ( 8*N*N) *l / timediff
|
||||
mflops *= 1e-6
|
||||
|
||||
size = "%dx%d" % (N,N)
|
||||
print("%14s :\t%20f MFlops\t%20f sec" % (size,mflops,timediff))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
N=128
|
||||
NMAX=2048
|
||||
NINC=128
|
||||
LOOPS=1
|
||||
|
||||
z=0
|
||||
for arg in sys.argv:
|
||||
if z == 1:
|
||||
N = int(arg)
|
||||
elif z == 2:
|
||||
NMAX = int(arg)
|
||||
elif z == 3:
|
||||
NINC = int(arg)
|
||||
elif z == 4:
|
||||
LOOPS = int(arg)
|
||||
|
||||
z = z + 1
|
||||
|
||||
if 'OPENBLAS_LOOPS' in os.environ:
|
||||
p = os.environ['OPENBLAS_LOOPS']
|
||||
if p:
|
||||
LOOPS = int(p);
|
||||
|
||||
print("From: %d To: %d Step=%d Loops=%d" % (N, NMAX, NINC, LOOPS))
|
||||
print("\tSIZE\t\t\tFlops\t\t\t\t\tTime")
|
||||
|
||||
for i in range (N,NMAX+NINC,NINC):
|
||||
run_cgemv(i,LOOPS)
|
||||
|
||||
58
benchmark/scripts/NUMPY/daxpy.py
Executable file
58
benchmark/scripts/NUMPY/daxpy.py
Executable file
@@ -0,0 +1,58 @@
|
||||
#!/usr/bin/python
|
||||
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
import numpy
|
||||
from numpy.random import randn
|
||||
from scipy.linalg.blas import daxpy
|
||||
|
||||
|
||||
def run_daxpy(N,l):
|
||||
|
||||
x = randn(N).astype('float64')
|
||||
y = randn(N).astype('float64')
|
||||
|
||||
start = time.time();
|
||||
for i in range(0,l):
|
||||
y = daxpy(x,y, a=2.0 )
|
||||
end = time.time()
|
||||
|
||||
timediff = (end -start)
|
||||
mflops = ( 2*N ) *l / timediff
|
||||
mflops *= 1e-6
|
||||
|
||||
size = "%d" % (N)
|
||||
print("%14s :\t%20f MFlops\t%20f sec" % (size,mflops,timediff))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
N=128
|
||||
NMAX=2048
|
||||
NINC=128
|
||||
LOOPS=1
|
||||
|
||||
z=0
|
||||
for arg in sys.argv:
|
||||
if z == 1:
|
||||
N = int(arg)
|
||||
elif z == 2:
|
||||
NMAX = int(arg)
|
||||
elif z == 3:
|
||||
NINC = int(arg)
|
||||
elif z == 4:
|
||||
LOOPS = int(arg)
|
||||
|
||||
z = z + 1
|
||||
|
||||
if 'OPENBLAS_LOOPS' in os.environ:
|
||||
p = os.environ['OPENBLAS_LOOPS']
|
||||
if p:
|
||||
LOOPS = int(p);
|
||||
|
||||
print("From: %d To: %d Step=%d Loops=%d" % (N, NMAX, NINC, LOOPS))
|
||||
print("\tSIZE\t\t\tFlops\t\t\t\t\tTime")
|
||||
|
||||
for i in range (N,NMAX+NINC,NINC):
|
||||
run_daxpy(i,LOOPS)
|
||||
|
||||
56
benchmark/scripts/NUMPY/ddot.py
Executable file
56
benchmark/scripts/NUMPY/ddot.py
Executable file
@@ -0,0 +1,56 @@
|
||||
#!/usr/bin/python
|
||||
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
import numpy
|
||||
from numpy.random import randn
|
||||
|
||||
def run_ddot(N,l):
|
||||
|
||||
A = randn(N).astype('float64')
|
||||
B = randn(N).astype('float64')
|
||||
|
||||
start = time.time();
|
||||
for i in range(0,l):
|
||||
ref = numpy.dot(A,B)
|
||||
end = time.time()
|
||||
|
||||
timediff = (end -start)
|
||||
mflops = ( 2*N ) *l / timediff
|
||||
mflops *= 1e-6
|
||||
|
||||
size = "%d" % (N)
|
||||
print("%14s :\t%20f MFlops\t%20f sec" % (size,mflops,timediff))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
N=128
|
||||
NMAX=2048
|
||||
NINC=128
|
||||
LOOPS=1
|
||||
|
||||
z=0
|
||||
for arg in sys.argv:
|
||||
if z == 1:
|
||||
N = int(arg)
|
||||
elif z == 2:
|
||||
NMAX = int(arg)
|
||||
elif z == 3:
|
||||
NINC = int(arg)
|
||||
elif z == 4:
|
||||
LOOPS = int(arg)
|
||||
|
||||
z = z + 1
|
||||
|
||||
if 'OPENBLAS_LOOPS' in os.environ:
|
||||
p = os.environ['OPENBLAS_LOOPS']
|
||||
if p:
|
||||
LOOPS = int(p);
|
||||
|
||||
print("From: %d To: %d Step=%d Loops=%d" % (N, NMAX, NINC, LOOPS))
|
||||
print("\tSIZE\t\t\tFlops\t\t\t\t\tTime")
|
||||
|
||||
for i in range (N,NMAX+NINC,NINC):
|
||||
run_ddot(i,LOOPS)
|
||||
|
||||
55
benchmark/scripts/NUMPY/deig.py
Executable file
55
benchmark/scripts/NUMPY/deig.py
Executable file
@@ -0,0 +1,55 @@
|
||||
#!/usr/bin/python
|
||||
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
import numpy
|
||||
from numpy.random import randn
|
||||
|
||||
def run_deig(N,l):
|
||||
|
||||
A = randn(N,N).astype('float64')
|
||||
|
||||
start = time.time();
|
||||
for i in range(0,l):
|
||||
la,v = numpy.linalg.eig(A)
|
||||
end = time.time()
|
||||
|
||||
timediff = (end -start)
|
||||
mflops = ( 26.33 *N*N*N) *l / timediff
|
||||
mflops *= 1e-6
|
||||
|
||||
size = "%dx%d" % (N,N)
|
||||
print("%14s :\t%20f MFlops\t%20f sec" % (size,mflops,timediff))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
N=128
|
||||
NMAX=2048
|
||||
NINC=128
|
||||
LOOPS=1
|
||||
|
||||
z=0
|
||||
for arg in sys.argv:
|
||||
if z == 1:
|
||||
N = int(arg)
|
||||
elif z == 2:
|
||||
NMAX = int(arg)
|
||||
elif z == 3:
|
||||
NINC = int(arg)
|
||||
elif z == 4:
|
||||
LOOPS = int(arg)
|
||||
|
||||
z = z + 1
|
||||
|
||||
if 'OPENBLAS_LOOPS' in os.environ:
|
||||
p = os.environ['OPENBLAS_LOOPS']
|
||||
if p:
|
||||
LOOPS = int(p);
|
||||
|
||||
print("From: %d To: %d Step=%d Loops=%d" % (N, NMAX, NINC, LOOPS))
|
||||
print("\tSIZE\t\t\tFlops\t\t\t\t\tTime")
|
||||
|
||||
for i in range (N,NMAX+NINC,NINC):
|
||||
run_deig(i,LOOPS)
|
||||
|
||||
56
benchmark/scripts/NUMPY/dgemm.py
Executable file
56
benchmark/scripts/NUMPY/dgemm.py
Executable file
@@ -0,0 +1,56 @@
|
||||
#!/usr/bin/python
|
||||
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
import numpy
|
||||
from numpy.random import randn
|
||||
|
||||
def run_dgemm(N,l):
|
||||
|
||||
A = randn(N,N).astype('float64')
|
||||
B = randn(N,N).astype('float64')
|
||||
|
||||
start = time.time();
|
||||
for i in range(0,l):
|
||||
ref = numpy.dot(A,B)
|
||||
end = time.time()
|
||||
|
||||
timediff = (end -start)
|
||||
mflops = ( 2*N*N*N) *l / timediff
|
||||
mflops *= 1e-6
|
||||
|
||||
size = "%dx%d" % (N,N)
|
||||
print("%14s :\t%20f MFlops\t%20f sec" % (size,mflops,timediff))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
N=128
|
||||
NMAX=2048
|
||||
NINC=128
|
||||
LOOPS=1
|
||||
|
||||
z=0
|
||||
for arg in sys.argv:
|
||||
if z == 1:
|
||||
N = int(arg)
|
||||
elif z == 2:
|
||||
NMAX = int(arg)
|
||||
elif z == 3:
|
||||
NINC = int(arg)
|
||||
elif z == 4:
|
||||
LOOPS = int(arg)
|
||||
|
||||
z = z + 1
|
||||
|
||||
if 'OPENBLAS_LOOPS' in os.environ:
|
||||
p = os.environ['OPENBLAS_LOOPS']
|
||||
if p:
|
||||
LOOPS = int(p);
|
||||
|
||||
print("From: %d To: %d Step=%d Loops=%d" % (N, NMAX, NINC, LOOPS))
|
||||
print("\tSIZE\t\t\tFlops\t\t\t\t\tTime")
|
||||
|
||||
for i in range (N,NMAX+NINC,NINC):
|
||||
run_dgemm(i,LOOPS)
|
||||
|
||||
56
benchmark/scripts/NUMPY/dgemv.py
Executable file
56
benchmark/scripts/NUMPY/dgemv.py
Executable file
@@ -0,0 +1,56 @@
|
||||
#!/usr/bin/python
|
||||
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
import numpy
|
||||
from numpy.random import randn
|
||||
|
||||
def run_dgemv(N,l):
|
||||
|
||||
A = randn(N,N).astype('float64')
|
||||
B = randn(N).astype('float64')
|
||||
|
||||
start = time.time();
|
||||
for i in range(0,l):
|
||||
ref = numpy.dot(A,B)
|
||||
end = time.time()
|
||||
|
||||
timediff = (end -start)
|
||||
mflops = ( 2*N*N) *l / timediff
|
||||
mflops *= 1e-6
|
||||
|
||||
size = "%dx%d" % (N,N)
|
||||
print("%14s :\t%20f MFlops\t%20f sec" % (size,mflops,timediff))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
N=128
|
||||
NMAX=2048
|
||||
NINC=128
|
||||
LOOPS=1
|
||||
|
||||
z=0
|
||||
for arg in sys.argv:
|
||||
if z == 1:
|
||||
N = int(arg)
|
||||
elif z == 2:
|
||||
NMAX = int(arg)
|
||||
elif z == 3:
|
||||
NINC = int(arg)
|
||||
elif z == 4:
|
||||
LOOPS = int(arg)
|
||||
|
||||
z = z + 1
|
||||
|
||||
if 'OPENBLAS_LOOPS' in os.environ:
|
||||
p = os.environ['OPENBLAS_LOOPS']
|
||||
if p:
|
||||
LOOPS = int(p);
|
||||
|
||||
print("From: %d To: %d Step=%d Loops=%d" % (N, NMAX, NINC, LOOPS))
|
||||
print("\tSIZE\t\t\tFlops\t\t\t\t\tTime")
|
||||
|
||||
for i in range (N,NMAX+NINC,NINC):
|
||||
run_dgemv(i,LOOPS)
|
||||
|
||||
58
benchmark/scripts/NUMPY/dgesv.py
Executable file
58
benchmark/scripts/NUMPY/dgesv.py
Executable file
@@ -0,0 +1,58 @@
|
||||
#!/usr/bin/python
|
||||
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
import numpy
|
||||
from numpy.random import randn
|
||||
from scipy.linalg.lapack import dgesv
|
||||
|
||||
def run_dgesv(N,l):
|
||||
|
||||
a = randn(N,N).astype('float64')
|
||||
b = randn(N,N).astype('float64')
|
||||
|
||||
start = time.time();
|
||||
for i in range(0,l):
|
||||
dgesv(a,b,1,1)
|
||||
end = time.time()
|
||||
|
||||
timediff = (end -start)
|
||||
|
||||
mflops = ( 2.0/3.0 *N*N*N + 2.0*N*N*N) *l / timediff
|
||||
mflops *= 1e-6
|
||||
|
||||
size = "%dx%d" % (N,N)
|
||||
print("%14s :\t%20f MFlops\t%20f sec" % (size,mflops,timediff))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
N=128
|
||||
NMAX=2048
|
||||
NINC=128
|
||||
LOOPS=1
|
||||
|
||||
z=0
|
||||
for arg in sys.argv:
|
||||
if z == 1:
|
||||
N = int(arg)
|
||||
elif z == 2:
|
||||
NMAX = int(arg)
|
||||
elif z == 3:
|
||||
NINC = int(arg)
|
||||
elif z == 4:
|
||||
LOOPS = int(arg)
|
||||
|
||||
z = z + 1
|
||||
|
||||
if 'OPENBLAS_LOOPS' in os.environ:
|
||||
p = os.environ['OPENBLAS_LOOPS']
|
||||
if p:
|
||||
LOOPS = int(p);
|
||||
|
||||
print("From: %d To: %d Step=%d Loops=%d" % (N, NMAX, NINC, LOOPS))
|
||||
print("\tSIZE\t\t\tFlops\t\t\t\t\tTime")
|
||||
|
||||
for i in range (N,NMAX+NINC,NINC):
|
||||
run_dgesv(i,LOOPS)
|
||||
|
||||
56
benchmark/scripts/NUMPY/dsolve.py
Executable file
56
benchmark/scripts/NUMPY/dsolve.py
Executable file
@@ -0,0 +1,56 @@
|
||||
#!/usr/bin/python
|
||||
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
import numpy
|
||||
from numpy.random import randn
|
||||
|
||||
def run_dsolve(N,l):
|
||||
|
||||
A = randn(N,N).astype('float64')
|
||||
B = randn(N,N).astype('float64')
|
||||
|
||||
start = time.time();
|
||||
for i in range(0,l):
|
||||
ref = numpy.linalg.solve(A,B)
|
||||
end = time.time()
|
||||
|
||||
timediff = (end -start)
|
||||
mflops = ( 2.0/3.0 *N*N*N + 2.0*N*N*N) *l / timediff
|
||||
mflops *= 1e-6
|
||||
|
||||
size = "%dx%d" % (N,N)
|
||||
print("%14s :\t%20f MFlops\t%20f sec" % (size,mflops,timediff))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
N=128
|
||||
NMAX=2048
|
||||
NINC=128
|
||||
LOOPS=1
|
||||
|
||||
z=0
|
||||
for arg in sys.argv:
|
||||
if z == 1:
|
||||
N = int(arg)
|
||||
elif z == 2:
|
||||
NMAX = int(arg)
|
||||
elif z == 3:
|
||||
NINC = int(arg)
|
||||
elif z == 4:
|
||||
LOOPS = int(arg)
|
||||
|
||||
z = z + 1
|
||||
|
||||
if 'OPENBLAS_LOOPS' in os.environ:
|
||||
p = os.environ['OPENBLAS_LOOPS']
|
||||
if p:
|
||||
LOOPS = int(p);
|
||||
|
||||
print("From: %d To: %d Step=%d Loops=%d" % (N, NMAX, NINC, LOOPS))
|
||||
print("\tSIZE\t\t\tFlops\t\t\t\t\tTime")
|
||||
|
||||
for i in range (N,NMAX+NINC,NINC):
|
||||
run_dsolve(i,LOOPS)
|
||||
|
||||
56
benchmark/scripts/NUMPY/sdot.py
Executable file
56
benchmark/scripts/NUMPY/sdot.py
Executable file
@@ -0,0 +1,56 @@
|
||||
#!/usr/bin/python
|
||||
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
import numpy
|
||||
from numpy.random import randn
|
||||
|
||||
def run_sdot(N,l):
|
||||
|
||||
A = randn(N).astype('float32')
|
||||
B = randn(N).astype('float32')
|
||||
|
||||
start = time.time();
|
||||
for i in range(0,l):
|
||||
ref = numpy.dot(A,B)
|
||||
end = time.time()
|
||||
|
||||
timediff = (end -start)
|
||||
mflops = ( 2*N ) *l / timediff
|
||||
mflops *= 1e-6
|
||||
|
||||
size = "%d" % (N)
|
||||
print("%14s :\t%20f MFlops\t%20f sec" % (size,mflops,timediff))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
N=128
|
||||
NMAX=2048
|
||||
NINC=128
|
||||
LOOPS=1
|
||||
|
||||
z=0
|
||||
for arg in sys.argv:
|
||||
if z == 1:
|
||||
N = int(arg)
|
||||
elif z == 2:
|
||||
NMAX = int(arg)
|
||||
elif z == 3:
|
||||
NINC = int(arg)
|
||||
elif z == 4:
|
||||
LOOPS = int(arg)
|
||||
|
||||
z = z + 1
|
||||
|
||||
if 'OPENBLAS_LOOPS' in os.environ:
|
||||
p = os.environ['OPENBLAS_LOOPS']
|
||||
if p:
|
||||
LOOPS = int(p);
|
||||
|
||||
print("From: %d To: %d Step=%d Loops=%d" % (N, NMAX, NINC, LOOPS))
|
||||
print("\tSIZE\t\t\tFlops\t\t\t\t\tTime")
|
||||
|
||||
for i in range (N,NMAX+NINC,NINC):
|
||||
run_sdot(i,LOOPS)
|
||||
|
||||
56
benchmark/scripts/NUMPY/sgemm.py
Executable file
56
benchmark/scripts/NUMPY/sgemm.py
Executable file
@@ -0,0 +1,56 @@
|
||||
#!/usr/bin/python
|
||||
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
import numpy
|
||||
from numpy.random import randn
|
||||
|
||||
def run_sgemm(N,l):
|
||||
|
||||
A = randn(N,N).astype('float32')
|
||||
B = randn(N,N).astype('float32')
|
||||
|
||||
start = time.time();
|
||||
for i in range(0,l):
|
||||
ref = numpy.dot(A,B)
|
||||
end = time.time()
|
||||
|
||||
timediff = (end -start)
|
||||
mflops = ( 2*N*N*N) *l / timediff
|
||||
mflops *= 1e-6
|
||||
|
||||
size = "%dx%d" % (N,N)
|
||||
print("%14s :\t%20f MFlops\t%20f sec" % (size,mflops,timediff))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
N=128
|
||||
NMAX=2048
|
||||
NINC=128
|
||||
LOOPS=1
|
||||
|
||||
z=0
|
||||
for arg in sys.argv:
|
||||
if z == 1:
|
||||
N = int(arg)
|
||||
elif z == 2:
|
||||
NMAX = int(arg)
|
||||
elif z == 3:
|
||||
NINC = int(arg)
|
||||
elif z == 4:
|
||||
LOOPS = int(arg)
|
||||
|
||||
z = z + 1
|
||||
|
||||
if 'OPENBLAS_LOOPS' in os.environ:
|
||||
p = os.environ['OPENBLAS_LOOPS']
|
||||
if p:
|
||||
LOOPS = int(p);
|
||||
|
||||
print("From: %d To: %d Step=%d Loops=%d" % (N, NMAX, NINC, LOOPS))
|
||||
print("\tSIZE\t\t\tFlops\t\t\t\t\tTime")
|
||||
|
||||
for i in range (N,NMAX+NINC,NINC):
|
||||
run_sgemm(i,LOOPS)
|
||||
|
||||
56
benchmark/scripts/NUMPY/sgemv.py
Executable file
56
benchmark/scripts/NUMPY/sgemv.py
Executable file
@@ -0,0 +1,56 @@
|
||||
#!/usr/bin/python
|
||||
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
import numpy
|
||||
from numpy.random import randn
|
||||
|
||||
def run_sgemv(N,l):
|
||||
|
||||
A = randn(N,N).astype('float32')
|
||||
B = randn(N).astype('float32')
|
||||
|
||||
start = time.time();
|
||||
for i in range(0,l):
|
||||
ref = numpy.dot(A,B)
|
||||
end = time.time()
|
||||
|
||||
timediff = (end -start)
|
||||
mflops = ( 2*N*N) *l / timediff
|
||||
mflops *= 1e-6
|
||||
|
||||
size = "%dx%d" % (N,N)
|
||||
print("%14s :\t%20f MFlops\t%20f sec" % (size,mflops,timediff))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
N=128
|
||||
NMAX=2048
|
||||
NINC=128
|
||||
LOOPS=1
|
||||
|
||||
z=0
|
||||
for arg in sys.argv:
|
||||
if z == 1:
|
||||
N = int(arg)
|
||||
elif z == 2:
|
||||
NMAX = int(arg)
|
||||
elif z == 3:
|
||||
NINC = int(arg)
|
||||
elif z == 4:
|
||||
LOOPS = int(arg)
|
||||
|
||||
z = z + 1
|
||||
|
||||
if 'OPENBLAS_LOOPS' in os.environ:
|
||||
p = os.environ['OPENBLAS_LOOPS']
|
||||
if p:
|
||||
LOOPS = int(p);
|
||||
|
||||
print("From: %d To: %d Step=%d Loops=%d" % (N, NMAX, NINC, LOOPS))
|
||||
print("\tSIZE\t\t\tFlops\t\t\t\t\tTime")
|
||||
|
||||
for i in range (N,NMAX+NINC,NINC):
|
||||
run_sgemv(i,LOOPS)
|
||||
|
||||
56
benchmark/scripts/NUMPY/zgemm.py
Executable file
56
benchmark/scripts/NUMPY/zgemm.py
Executable file
@@ -0,0 +1,56 @@
|
||||
#!/usr/bin/python
|
||||
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
import numpy
|
||||
from numpy.random import randn
|
||||
|
||||
def run_zgemm(N,l):
|
||||
|
||||
A = randn(N,N).astype('float64') + randn(N,N).astype('float64') * 1j;
|
||||
B = randn(N,N).astype('float64') + randn(N,N).astype('float64') * 1j;
|
||||
|
||||
start = time.time();
|
||||
for i in range(0,l):
|
||||
ref = numpy.dot(A,B)
|
||||
end = time.time()
|
||||
|
||||
timediff = (end -start)
|
||||
mflops = ( 8*N*N*N) *l / timediff
|
||||
mflops *= 1e-6
|
||||
|
||||
size = "%dx%d" % (N,N)
|
||||
print("%14s :\t%20f MFlops\t%20f sec" % (size,mflops,timediff))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
N=128
|
||||
NMAX=2048
|
||||
NINC=128
|
||||
LOOPS=1
|
||||
|
||||
z=0
|
||||
for arg in sys.argv:
|
||||
if z == 1:
|
||||
N = int(arg)
|
||||
elif z == 2:
|
||||
NMAX = int(arg)
|
||||
elif z == 3:
|
||||
NINC = int(arg)
|
||||
elif z == 4:
|
||||
LOOPS = int(arg)
|
||||
|
||||
z = z + 1
|
||||
|
||||
if 'OPENBLAS_LOOPS' in os.environ:
|
||||
p = os.environ['OPENBLAS_LOOPS']
|
||||
if p:
|
||||
LOOPS = int(p);
|
||||
|
||||
print("From: %d To: %d Step=%d Loops=%d" % (N, NMAX, NINC, LOOPS))
|
||||
print("\tSIZE\t\t\tFlops\t\t\t\t\tTime")
|
||||
|
||||
for i in range (N,NMAX+NINC,NINC):
|
||||
run_zgemm(i,LOOPS)
|
||||
|
||||
56
benchmark/scripts/NUMPY/zgemv.py
Executable file
56
benchmark/scripts/NUMPY/zgemv.py
Executable file
@@ -0,0 +1,56 @@
|
||||
#!/usr/bin/python
|
||||
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
import numpy
|
||||
from numpy.random import randn
|
||||
|
||||
def run_zgemv(N,l):
|
||||
|
||||
A = randn(N,N).astype('float64') + randn(N,N).astype('float64') * 1j;
|
||||
B = randn(N).astype('float64') + randn(N).astype('float64') * 1j;
|
||||
|
||||
start = time.time();
|
||||
for i in range(0,l):
|
||||
ref = numpy.dot(A,B)
|
||||
end = time.time()
|
||||
|
||||
timediff = (end -start)
|
||||
mflops = ( 8*N*N) *l / timediff
|
||||
mflops *= 1e-6
|
||||
|
||||
size = "%dx%d" % (N,N)
|
||||
print("%14s :\t%20f MFlops\t%20f sec" % (size,mflops,timediff))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
N=128
|
||||
NMAX=2048
|
||||
NINC=128
|
||||
LOOPS=1
|
||||
|
||||
z=0
|
||||
for arg in sys.argv:
|
||||
if z == 1:
|
||||
N = int(arg)
|
||||
elif z == 2:
|
||||
NMAX = int(arg)
|
||||
elif z == 3:
|
||||
NINC = int(arg)
|
||||
elif z == 4:
|
||||
LOOPS = int(arg)
|
||||
|
||||
z = z + 1
|
||||
|
||||
if 'OPENBLAS_LOOPS' in os.environ:
|
||||
p = os.environ['OPENBLAS_LOOPS']
|
||||
if p:
|
||||
LOOPS = int(p);
|
||||
|
||||
print("From: %d To: %d Step=%d Loops=%d" % (N, NMAX, NINC, LOOPS))
|
||||
print("\tSIZE\t\t\tFlops\t\t\t\t\tTime")
|
||||
|
||||
for i in range (N,NMAX+NINC,NINC):
|
||||
run_zgemv(i,LOOPS)
|
||||
|
||||
56
benchmark/scripts/OCTAVE/cgemm.m
Executable file
56
benchmark/scripts/OCTAVE/cgemm.m
Executable file
@@ -0,0 +1,56 @@
|
||||
#!/usr/bin/octave --silent
|
||||
|
||||
nfrom = 128 ;
|
||||
nto = 2048;
|
||||
nstep = 128;
|
||||
loops = 1;
|
||||
|
||||
|
||||
arg_list = argv();
|
||||
for i = 1:nargin
|
||||
|
||||
switch(i)
|
||||
case 1
|
||||
nfrom = str2num(arg_list{i});
|
||||
case 2
|
||||
nto = str2num(arg_list{i});
|
||||
case 3
|
||||
nstep = str2num(arg_list{i});
|
||||
case 4
|
||||
loops = str2num(arg_list{i});
|
||||
|
||||
endswitch
|
||||
|
||||
endfor
|
||||
|
||||
p = getenv("OPENBLAS_LOOPS");
|
||||
if p
|
||||
loops = str2num(p);
|
||||
endif
|
||||
|
||||
printf("From %d To %d Step=%d Loops=%d\n",nfrom, nto, nstep, loops);
|
||||
printf(" SIZE FLOPS TIME\n");
|
||||
|
||||
n = nfrom;
|
||||
while n <= nto
|
||||
|
||||
A = single(rand(n,n)) + single(rand(n,n)) * 1i;
|
||||
B = single(rand(n,n)) + single(rand(n,n)) * 1i;
|
||||
start = clock();
|
||||
|
||||
l=0;
|
||||
while l < loops
|
||||
|
||||
C = A * B;
|
||||
l = l + 1;
|
||||
|
||||
endwhile
|
||||
|
||||
timeg = etime(clock(), start);
|
||||
mflops = ( 4.0 * 2.0*n*n*n *loops ) / ( timeg * 1.0e6 );
|
||||
|
||||
st1 = sprintf("%dx%d : ", n,n);
|
||||
printf("%20s %10.2f MFlops %10.6f sec\n", st1, mflops, timeg);
|
||||
n = n + nstep;
|
||||
|
||||
endwhile
|
||||
56
benchmark/scripts/OCTAVE/cgemv.m
Executable file
56
benchmark/scripts/OCTAVE/cgemv.m
Executable file
@@ -0,0 +1,56 @@
|
||||
#!/usr/bin/octave --silent
|
||||
|
||||
nfrom = 128 ;
|
||||
nto = 2048;
|
||||
nstep = 128;
|
||||
loops = 1;
|
||||
|
||||
|
||||
arg_list = argv();
|
||||
for i = 1:nargin
|
||||
|
||||
switch(i)
|
||||
case 1
|
||||
nfrom = str2num(arg_list{i});
|
||||
case 2
|
||||
nto = str2num(arg_list{i});
|
||||
case 3
|
||||
nstep = str2num(arg_list{i});
|
||||
case 4
|
||||
loops = str2num(arg_list{i});
|
||||
|
||||
endswitch
|
||||
|
||||
endfor
|
||||
|
||||
p = getenv("OPENBLAS_LOOPS");
|
||||
if p
|
||||
loops = str2num(p);
|
||||
endif
|
||||
|
||||
printf("From %d To %d Step=%d Loops=%d\n",nfrom, nto, nstep, loops);
|
||||
printf(" SIZE FLOPS TIME\n");
|
||||
|
||||
n = nfrom;
|
||||
while n <= nto
|
||||
|
||||
A = single(rand(n,n)) + single(rand(n,n)) * 1i;
|
||||
B = single(rand(n,1)) + single(rand(n,1)) * 1i;
|
||||
start = clock();
|
||||
|
||||
l=0;
|
||||
while l < loops
|
||||
|
||||
C = A * B;
|
||||
l = l + 1;
|
||||
|
||||
endwhile
|
||||
|
||||
timeg = etime(clock(), start);
|
||||
mflops = ( 4.0 * 2.0*n*n *loops ) / ( timeg * 1.0e6 );
|
||||
|
||||
st1 = sprintf("%dx%d : ", n,n);
|
||||
printf("%20s %10.2f MFlops %10.6f sec\n", st1, mflops, timeg);
|
||||
n = n + nstep;
|
||||
|
||||
endwhile
|
||||
56
benchmark/scripts/OCTAVE/deig.m
Executable file
56
benchmark/scripts/OCTAVE/deig.m
Executable file
@@ -0,0 +1,56 @@
|
||||
#!/usr/bin/octave --silent
|
||||
|
||||
nfrom = 128 ;
|
||||
nto = 2048;
|
||||
nstep = 128;
|
||||
loops = 1;
|
||||
|
||||
|
||||
arg_list = argv();
|
||||
for i = 1:nargin
|
||||
|
||||
switch(i)
|
||||
case 1
|
||||
nfrom = str2num(arg_list{i});
|
||||
case 2
|
||||
nto = str2num(arg_list{i});
|
||||
case 3
|
||||
nstep = str2num(arg_list{i});
|
||||
case 4
|
||||
loops = str2num(arg_list{i});
|
||||
|
||||
endswitch
|
||||
|
||||
endfor
|
||||
|
||||
p = getenv("OPENBLAS_LOOPS");
|
||||
if p
|
||||
loops = str2num(p);
|
||||
endif
|
||||
|
||||
printf("From %d To %d Step=%d Loops=%d\n",nfrom, nto, nstep, loops);
|
||||
printf(" SIZE FLOPS TIME\n");
|
||||
|
||||
n = nfrom;
|
||||
while n <= nto
|
||||
|
||||
A = double(rand(n,n));
|
||||
start = clock();
|
||||
|
||||
l=0;
|
||||
while l < loops
|
||||
|
||||
[V,lambda] = eig(A);
|
||||
l = l + 1;
|
||||
|
||||
endwhile
|
||||
|
||||
|
||||
timeg = etime(clock(), start);
|
||||
mflops = ( 26.33 *n*n*n ) *loops / ( timeg * 1.0e6 );
|
||||
|
||||
st1 = sprintf("%dx%d : ", n,n);
|
||||
printf("%20s %10.2f MFlops %10.6f sec\n", st1, mflops, timeg );
|
||||
n = n + nstep;
|
||||
|
||||
endwhile
|
||||
56
benchmark/scripts/OCTAVE/dgemm.m
Executable file
56
benchmark/scripts/OCTAVE/dgemm.m
Executable file
@@ -0,0 +1,56 @@
|
||||
#!/usr/bin/octave --silent
|
||||
|
||||
nfrom = 128 ;
|
||||
nto = 2048;
|
||||
nstep = 128;
|
||||
loops = 1;
|
||||
|
||||
|
||||
arg_list = argv();
|
||||
for i = 1:nargin
|
||||
|
||||
switch(i)
|
||||
case 1
|
||||
nfrom = str2num(arg_list{i});
|
||||
case 2
|
||||
nto = str2num(arg_list{i});
|
||||
case 3
|
||||
nstep = str2num(arg_list{i});
|
||||
case 4
|
||||
loops = str2num(arg_list{i});
|
||||
|
||||
endswitch
|
||||
|
||||
endfor
|
||||
|
||||
p = getenv("OPENBLAS_LOOPS");
|
||||
if p
|
||||
loops = str2num(p);
|
||||
endif
|
||||
|
||||
printf("From %d To %d Step=%d Loops=%d\n",nfrom, nto, nstep, loops);
|
||||
printf(" SIZE FLOPS TIME\n");
|
||||
|
||||
n = nfrom;
|
||||
while n <= nto
|
||||
|
||||
A = double(rand(n,n));
|
||||
B = double(rand(n,n));
|
||||
start = clock();
|
||||
|
||||
l=0;
|
||||
while l < loops
|
||||
|
||||
C = A * B;
|
||||
l = l + 1;
|
||||
|
||||
endwhile
|
||||
|
||||
timeg = etime(clock(), start);
|
||||
mflops = ( 2.0*n*n*n *loops ) / ( timeg * 1.0e6 );
|
||||
|
||||
st1 = sprintf("%dx%d : ", n,n);
|
||||
printf("%20s %10.2f MFlops %10.6f sec\n", st1, mflops, timeg);
|
||||
n = n + nstep;
|
||||
|
||||
endwhile
|
||||
56
benchmark/scripts/OCTAVE/dgemv.m
Executable file
56
benchmark/scripts/OCTAVE/dgemv.m
Executable file
@@ -0,0 +1,56 @@
|
||||
#!/usr/bin/octave --silent
|
||||
|
||||
nfrom = 128 ;
|
||||
nto = 2048;
|
||||
nstep = 128;
|
||||
loops = 1;
|
||||
|
||||
|
||||
arg_list = argv();
|
||||
for i = 1:nargin
|
||||
|
||||
switch(i)
|
||||
case 1
|
||||
nfrom = str2num(arg_list{i});
|
||||
case 2
|
||||
nto = str2num(arg_list{i});
|
||||
case 3
|
||||
nstep = str2num(arg_list{i});
|
||||
case 4
|
||||
loops = str2num(arg_list{i});
|
||||
|
||||
endswitch
|
||||
|
||||
endfor
|
||||
|
||||
p = getenv("OPENBLAS_LOOPS");
|
||||
if p
|
||||
loops = str2num(p);
|
||||
endif
|
||||
|
||||
printf("From %d To %d Step=%d Loops=%d\n",nfrom, nto, nstep, loops);
|
||||
printf(" SIZE FLOPS TIME\n");
|
||||
|
||||
n = nfrom;
|
||||
while n <= nto
|
||||
|
||||
A = double(rand(n,n));
|
||||
B = double(rand(n,1));
|
||||
start = clock();
|
||||
|
||||
l=0;
|
||||
while l < loops
|
||||
|
||||
C = A * B;
|
||||
l = l + 1;
|
||||
|
||||
endwhile
|
||||
|
||||
timeg = etime(clock(), start);
|
||||
mflops = ( 2.0*n*n *loops ) / ( timeg * 1.0e6 );
|
||||
|
||||
st1 = sprintf("%dx%d : ", n,n);
|
||||
printf("%20s %10.2f MFlops %10.6f sec\n", st1, mflops, timeg);
|
||||
n = n + nstep;
|
||||
|
||||
endwhile
|
||||
59
benchmark/scripts/OCTAVE/dsolve.m
Executable file
59
benchmark/scripts/OCTAVE/dsolve.m
Executable file
@@ -0,0 +1,59 @@
|
||||
#!/usr/bin/octave --silent
|
||||
|
||||
nfrom = 128 ;
|
||||
nto = 2048;
|
||||
nstep = 128;
|
||||
loops = 1;
|
||||
|
||||
|
||||
arg_list = argv();
|
||||
for i = 1:nargin
|
||||
|
||||
switch(i)
|
||||
case 1
|
||||
nfrom = str2num(arg_list{i});
|
||||
case 2
|
||||
nto = str2num(arg_list{i});
|
||||
case 3
|
||||
nstep = str2num(arg_list{i});
|
||||
case 4
|
||||
loops = str2num(arg_list{i});
|
||||
|
||||
endswitch
|
||||
|
||||
endfor
|
||||
|
||||
p = getenv("OPENBLAS_LOOPS");
|
||||
if p
|
||||
loops = str2num(p);
|
||||
endif
|
||||
|
||||
printf("From %d To %d Step=%d Loops=%d\n",nfrom, nto, nstep, loops);
|
||||
printf(" SIZE FLOPS TIME\n");
|
||||
|
||||
n = nfrom;
|
||||
while n <= nto
|
||||
|
||||
A = double(rand(n,n));
|
||||
B = double(rand(n,n));
|
||||
start = clock();
|
||||
|
||||
l=0;
|
||||
while l < loops
|
||||
|
||||
x = linsolve(A,B);
|
||||
#x = A / B;
|
||||
l = l + 1;
|
||||
|
||||
endwhile
|
||||
|
||||
|
||||
timeg = etime(clock(), start);
|
||||
#r = norm(A*x - B)/norm(B)
|
||||
mflops = ( 2.0/3.0 *n*n*n + 2.0*n*n*n ) *loops / ( timeg * 1.0e6 );
|
||||
|
||||
st1 = sprintf("%dx%d : ", n,n);
|
||||
printf("%20s %10.2f MFlops %10.6f sec\n", st1, mflops, timeg );
|
||||
n = n + nstep;
|
||||
|
||||
endwhile
|
||||
56
benchmark/scripts/OCTAVE/sgemm.m
Executable file
56
benchmark/scripts/OCTAVE/sgemm.m
Executable file
@@ -0,0 +1,56 @@
|
||||
#!/usr/bin/octave --silent
|
||||
|
||||
nfrom = 128 ;
|
||||
nto = 2048;
|
||||
nstep = 128;
|
||||
loops = 1;
|
||||
|
||||
|
||||
arg_list = argv();
|
||||
for i = 1:nargin
|
||||
|
||||
switch(i)
|
||||
case 1
|
||||
nfrom = str2num(arg_list{i});
|
||||
case 2
|
||||
nto = str2num(arg_list{i});
|
||||
case 3
|
||||
nstep = str2num(arg_list{i});
|
||||
case 4
|
||||
loops = str2num(arg_list{i});
|
||||
|
||||
endswitch
|
||||
|
||||
endfor
|
||||
|
||||
p = getenv("OPENBLAS_LOOPS");
|
||||
if p
|
||||
loops = str2num(p);
|
||||
endif
|
||||
|
||||
printf("From %d To %d Step=%d Loops=%d\n",nfrom, nto, nstep, loops);
|
||||
printf(" SIZE FLOPS TIME\n");
|
||||
|
||||
n = nfrom;
|
||||
while n <= nto
|
||||
|
||||
A = single(rand(n,n));
|
||||
B = single(rand(n,n));
|
||||
start = clock();
|
||||
|
||||
l=0;
|
||||
while l < loops
|
||||
|
||||
C = A * B;
|
||||
l = l + 1;
|
||||
|
||||
endwhile
|
||||
|
||||
timeg = etime(clock(), start);
|
||||
mflops = ( 2.0*n*n*n *loops ) / ( timeg * 1.0e6 );
|
||||
|
||||
st1 = sprintf("%dx%d : ", n,n);
|
||||
printf("%20s %10.2f MFlops %10.6f sec\n", st1, mflops, timeg);
|
||||
n = n + nstep;
|
||||
|
||||
endwhile
|
||||
56
benchmark/scripts/OCTAVE/sgemv.m
Executable file
56
benchmark/scripts/OCTAVE/sgemv.m
Executable file
@@ -0,0 +1,56 @@
|
||||
#!/usr/bin/octave --silent
|
||||
|
||||
nfrom = 128 ;
|
||||
nto = 2048;
|
||||
nstep = 128;
|
||||
loops = 1;
|
||||
|
||||
|
||||
arg_list = argv();
|
||||
for i = 1:nargin
|
||||
|
||||
switch(i)
|
||||
case 1
|
||||
nfrom = str2num(arg_list{i});
|
||||
case 2
|
||||
nto = str2num(arg_list{i});
|
||||
case 3
|
||||
nstep = str2num(arg_list{i});
|
||||
case 4
|
||||
loops = str2num(arg_list{i});
|
||||
|
||||
endswitch
|
||||
|
||||
endfor
|
||||
|
||||
p = getenv("OPENBLAS_LOOPS");
|
||||
if p
|
||||
loops = str2num(p);
|
||||
endif
|
||||
|
||||
printf("From %d To %d Step=%d Loops=%d\n",nfrom, nto, nstep, loops);
|
||||
printf(" SIZE FLOPS TIME\n");
|
||||
|
||||
n = nfrom;
|
||||
while n <= nto
|
||||
|
||||
A = single(rand(n,n));
|
||||
B = single(rand(n,1));
|
||||
start = clock();
|
||||
|
||||
l=0;
|
||||
while l < loops
|
||||
|
||||
C = A * B;
|
||||
l = l + 1;
|
||||
|
||||
endwhile
|
||||
|
||||
timeg = etime(clock(), start);
|
||||
mflops = ( 2.0*n*n *loops ) / ( timeg * 1.0e6 );
|
||||
|
||||
st1 = sprintf("%dx%d : ", n,n);
|
||||
printf("%20s %10.2f MFlops %10.6f sec\n", st1, mflops, timeg);
|
||||
n = n + nstep;
|
||||
|
||||
endwhile
|
||||
56
benchmark/scripts/OCTAVE/zgemm.m
Executable file
56
benchmark/scripts/OCTAVE/zgemm.m
Executable file
@@ -0,0 +1,56 @@
|
||||
#!/usr/bin/octave --silent
|
||||
|
||||
nfrom = 128 ;
|
||||
nto = 2048;
|
||||
nstep = 128;
|
||||
loops = 1;
|
||||
|
||||
|
||||
arg_list = argv();
|
||||
for i = 1:nargin
|
||||
|
||||
switch(i)
|
||||
case 1
|
||||
nfrom = str2num(arg_list{i});
|
||||
case 2
|
||||
nto = str2num(arg_list{i});
|
||||
case 3
|
||||
nstep = str2num(arg_list{i});
|
||||
case 4
|
||||
loops = str2num(arg_list{i});
|
||||
|
||||
endswitch
|
||||
|
||||
endfor
|
||||
|
||||
p = getenv("OPENBLAS_LOOPS");
|
||||
if p
|
||||
loops = str2num(p);
|
||||
endif
|
||||
|
||||
printf("From %d To %d Step=%d Loops=%d\n",nfrom, nto, nstep, loops);
|
||||
printf(" SIZE FLOPS TIME\n");
|
||||
|
||||
n = nfrom;
|
||||
while n <= nto
|
||||
|
||||
A = double(rand(n,n)) + double(rand(n,n)) * 1i;
|
||||
B = double(rand(n,n)) + double(rand(n,n)) * 1i;
|
||||
start = clock();
|
||||
|
||||
l=0;
|
||||
while l < loops
|
||||
|
||||
C = A * B;
|
||||
l = l + 1;
|
||||
|
||||
endwhile
|
||||
|
||||
timeg = etime(clock(), start);
|
||||
mflops = ( 4.0 * 2.0*n*n*n *loops ) / ( timeg * 1.0e6 );
|
||||
|
||||
st1 = sprintf("%dx%d : ", n,n);
|
||||
printf("%20s %10.2f MFlops %10.6f sec\n", st1, mflops, timeg);
|
||||
n = n + nstep;
|
||||
|
||||
endwhile
|
||||
56
benchmark/scripts/OCTAVE/zgemv.m
Executable file
56
benchmark/scripts/OCTAVE/zgemv.m
Executable file
@@ -0,0 +1,56 @@
|
||||
#!/usr/bin/octave --silent
|
||||
|
||||
nfrom = 128 ;
|
||||
nto = 2048;
|
||||
nstep = 128;
|
||||
loops = 1;
|
||||
|
||||
|
||||
arg_list = argv();
|
||||
for i = 1:nargin
|
||||
|
||||
switch(i)
|
||||
case 1
|
||||
nfrom = str2num(arg_list{i});
|
||||
case 2
|
||||
nto = str2num(arg_list{i});
|
||||
case 3
|
||||
nstep = str2num(arg_list{i});
|
||||
case 4
|
||||
loops = str2num(arg_list{i});
|
||||
|
||||
endswitch
|
||||
|
||||
endfor
|
||||
|
||||
p = getenv("OPENBLAS_LOOPS");
|
||||
if p
|
||||
loops = str2num(p);
|
||||
endif
|
||||
|
||||
printf("From %d To %d Step=%d Loops=%d\n",nfrom, nto, nstep, loops);
|
||||
printf(" SIZE FLOPS TIME\n");
|
||||
|
||||
n = nfrom;
|
||||
while n <= nto
|
||||
|
||||
A = double(rand(n,n)) + double(rand(n,n)) * 1i;
|
||||
B = double(rand(n,1)) + double(rand(n,1)) * 1i;
|
||||
start = clock();
|
||||
|
||||
l=0;
|
||||
while l < loops
|
||||
|
||||
C = A * B;
|
||||
l = l + 1;
|
||||
|
||||
endwhile
|
||||
|
||||
timeg = etime(clock(), start);
|
||||
mflops = ( 4.0 * 2.0*n*n *loops ) / ( timeg * 1.0e6 );
|
||||
|
||||
st1 = sprintf("%dx%d : ", n,n);
|
||||
printf("%20s %10.2f MFlops %10.6f sec\n", st1, mflops, timeg);
|
||||
n = n + nstep;
|
||||
|
||||
endwhile
|
||||
55
benchmark/scripts/R/deig.R
Executable file
55
benchmark/scripts/R/deig.R
Executable file
@@ -0,0 +1,55 @@
|
||||
#!/usr/bin/Rscript
|
||||
|
||||
argv <- commandArgs(trailingOnly = TRUE)
|
||||
|
||||
nfrom <- 128
|
||||
nto <- 2048
|
||||
nstep <- 128
|
||||
loops <- 1
|
||||
|
||||
if (length(argv) > 0) {
|
||||
for (z in 1:length(argv)) {
|
||||
if (z == 1) {
|
||||
nfrom <- as.numeric(argv[z])
|
||||
} else if (z == 2) {
|
||||
nto <- as.numeric(argv[z])
|
||||
} else if (z == 3) {
|
||||
nstep <- as.numeric(argv[z])
|
||||
} else if (z == 4) {
|
||||
loops <- as.numeric(argv[z])
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
p <- Sys.getenv("OPENBLAS_LOOPS")
|
||||
if (p != "") {
|
||||
loops <- as.numeric(p)
|
||||
}
|
||||
|
||||
|
||||
cat(sprintf(
|
||||
"From %.0f To %.0f Step=%.0f Loops=%.0f\n",
|
||||
nfrom,
|
||||
nto,
|
||||
nstep,
|
||||
loops
|
||||
))
|
||||
cat(sprintf(" SIZE Flops Time\n"))
|
||||
|
||||
n <- nfrom
|
||||
while (n <= nto) {
|
||||
A <- matrix(rnorm(n * n), ncol = n, nrow = n)
|
||||
ev <- 0
|
||||
z <- system.time(for (l in 1:loops) {
|
||||
ev <- eigen(A)
|
||||
})
|
||||
|
||||
mflops <- (26.66 * n * n * n) * loops / (z[3] * 1.0e6)
|
||||
|
||||
st <- sprintf("%.0fx%.0f :", n, n)
|
||||
cat(sprintf("%20s %10.2f MFlops %10.6f sec\n", st, mflops, z[3]))
|
||||
|
||||
n <- n + nstep
|
||||
|
||||
}
|
||||
64
benchmark/scripts/R/dgemm.R
Executable file
64
benchmark/scripts/R/dgemm.R
Executable file
@@ -0,0 +1,64 @@
|
||||
#!/usr/bin/Rscript
|
||||
|
||||
argv <- commandArgs(trailingOnly = TRUE)
|
||||
|
||||
nfrom <- 128
|
||||
nto <- 2048
|
||||
nstep <- 128
|
||||
loops <- 1
|
||||
|
||||
if (length(argv) > 0) {
|
||||
for (z in 1:length(argv)) {
|
||||
if (z == 1) {
|
||||
nfrom <- as.numeric(argv[z])
|
||||
} else if (z == 2) {
|
||||
nto <- as.numeric(argv[z])
|
||||
} else if (z == 3) {
|
||||
nstep <- as.numeric(argv[z])
|
||||
} else if (z == 4) {
|
||||
loops <- as.numeric(argv[z])
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
p <- Sys.getenv("OPENBLAS_LOOPS")
|
||||
if (p != "") {
|
||||
loops <- as.numeric(p)
|
||||
}
|
||||
|
||||
|
||||
cat(sprintf(
|
||||
"From %.0f To %.0f Step=%.0f Loops=%.0f\n",
|
||||
nfrom,
|
||||
nto,
|
||||
nstep,
|
||||
loops
|
||||
))
|
||||
cat(sprintf(" SIZE Flops Time\n"))
|
||||
|
||||
n <- nfrom
|
||||
while (n <= nto) {
|
||||
A <- matrix(runif(n * n),
|
||||
ncol = n,
|
||||
nrow = n,
|
||||
byrow = TRUE)
|
||||
B <- matrix(runif(n * n),
|
||||
ncol = n,
|
||||
nrow = n,
|
||||
byrow = TRUE)
|
||||
C <- 1
|
||||
|
||||
z <- system.time(for (l in 1:loops) {
|
||||
C <- A %*% B
|
||||
l <- l + 1
|
||||
})
|
||||
|
||||
mflops <- (2.0 * n * n * n) * loops / (z[3] * 1.0e6)
|
||||
|
||||
st <- sprintf("%.0fx%.0f :", n, n)
|
||||
cat(sprintf("%20s %10.2f MFlops %10.6f sec\n", st, mflops, z[3]))
|
||||
|
||||
n <- n + nstep
|
||||
|
||||
}
|
||||
57
benchmark/scripts/R/dsolve.R
Executable file
57
benchmark/scripts/R/dsolve.R
Executable file
@@ -0,0 +1,57 @@
|
||||
#!/usr/bin/Rscript
|
||||
|
||||
argv <- commandArgs(trailingOnly = TRUE)
|
||||
|
||||
nfrom <- 128
|
||||
nto <- 2048
|
||||
nstep <- 128
|
||||
loops <- 1
|
||||
|
||||
if (length(argv) > 0) {
|
||||
for (z in 1:length(argv)) {
|
||||
if (z == 1) {
|
||||
nfrom <- as.numeric(argv[z])
|
||||
} else if (z == 2) {
|
||||
nto <- as.numeric(argv[z])
|
||||
} else if (z == 3) {
|
||||
nstep <- as.numeric(argv[z])
|
||||
} else if (z == 4) {
|
||||
loops <- as.numeric(argv[z])
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
p <- Sys.getenv("OPENBLAS_LOOPS")
|
||||
if (p != "") {
|
||||
loops <- as.numeric(p)
|
||||
}
|
||||
|
||||
|
||||
cat(sprintf(
|
||||
"From %.0f To %.0f Step=%.0f Loops=%.0f\n",
|
||||
nfrom,
|
||||
nto,
|
||||
nstep,
|
||||
loops
|
||||
))
|
||||
cat(sprintf(" SIZE Flops Time\n"))
|
||||
|
||||
n <- nfrom
|
||||
while (n <= nto) {
|
||||
A <- matrix(rnorm(n * n), ncol = n, nrow = n)
|
||||
B <- matrix(rnorm(n * n), ncol = n, nrow = n)
|
||||
|
||||
z <- system.time(for (l in 1:loops) {
|
||||
solve(A, B)
|
||||
})
|
||||
|
||||
mflops <-
|
||||
(2.0 / 3.0 * n * n * n + 2.0 * n * n * n) * loops / (z[3] * 1.0e6)
|
||||
|
||||
st <- sprintf("%.0fx%.0f :", n, n)
|
||||
cat(sprintf("%20s %10.2f MFlops %10.6f sec\n", st, mflops, z[3]))
|
||||
|
||||
n <- n + nstep
|
||||
|
||||
}
|
||||
58
benchmark/scripts/SCIPY/dsyrk.py
Executable file
58
benchmark/scripts/SCIPY/dsyrk.py
Executable file
@@ -0,0 +1,58 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
import numpy
|
||||
from numpy import zeros
|
||||
from numpy.random import randn
|
||||
from scipy.linalg import blas
|
||||
|
||||
|
||||
def run_dsyrk(N, l):
|
||||
|
||||
A = randn(N, N).astype('float64', order='F')
|
||||
C = zeros((N, N), dtype='float64', order='F')
|
||||
|
||||
start = time.time()
|
||||
for i in range(0, l):
|
||||
blas.dsyrk(1.0, A, c=C, overwrite_c=True)
|
||||
end = time.time()
|
||||
|
||||
timediff = (end - start)
|
||||
mflops = (N * N * N) * l / timediff
|
||||
mflops *= 1e-6
|
||||
|
||||
size = "%dx%d" % (N, N)
|
||||
print("%14s :\t%20f MFlops\t%20f sec" % (size, mflops, timediff))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
N = 128
|
||||
NMAX = 2048
|
||||
NINC = 128
|
||||
LOOPS = 1
|
||||
|
||||
z = 0
|
||||
for arg in sys.argv:
|
||||
if z == 1:
|
||||
N = int(arg)
|
||||
elif z == 2:
|
||||
NMAX = int(arg)
|
||||
elif z == 3:
|
||||
NINC = int(arg)
|
||||
elif z == 4:
|
||||
LOOPS = int(arg)
|
||||
|
||||
z = z + 1
|
||||
|
||||
if 'OPENBLAS_LOOPS' in os.environ:
|
||||
p = os.environ['OPENBLAS_LOOPS']
|
||||
if p:
|
||||
LOOPS = int(p)
|
||||
|
||||
print("From: %d To: %d Step=%d Loops=%d" % (N, NMAX, NINC, LOOPS))
|
||||
print("\tSIZE\t\t\tFlops\t\t\t\t\tTime")
|
||||
|
||||
for i in range(N, NMAX + NINC, NINC):
|
||||
run_dsyrk(i, LOOPS)
|
||||
58
benchmark/scripts/SCIPY/ssyrk.py
Executable file
58
benchmark/scripts/SCIPY/ssyrk.py
Executable file
@@ -0,0 +1,58 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
import numpy
|
||||
from numpy import zeros
|
||||
from numpy.random import randn
|
||||
from scipy.linalg import blas
|
||||
|
||||
|
||||
def run_ssyrk(N, l):
|
||||
|
||||
A = randn(N, N).astype('float32', order='F')
|
||||
C = zeros((N, N), dtype='float32', order='F')
|
||||
|
||||
start = time.time()
|
||||
for i in range(0, l):
|
||||
blas.ssyrk(1.0, A, c=C, overwrite_c=True)
|
||||
end = time.time()
|
||||
|
||||
timediff = (end - start)
|
||||
mflops = (N * N * N) * l / timediff
|
||||
mflops *= 1e-6
|
||||
|
||||
size = "%dx%d" % (N, N)
|
||||
print("%14s :\t%20f MFlops\t%20f sec" % (size, mflops, timediff))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
N = 128
|
||||
NMAX = 2048
|
||||
NINC = 128
|
||||
LOOPS = 1
|
||||
|
||||
z = 0
|
||||
for arg in sys.argv:
|
||||
if z == 1:
|
||||
N = int(arg)
|
||||
elif z == 2:
|
||||
NMAX = int(arg)
|
||||
elif z == 3:
|
||||
NINC = int(arg)
|
||||
elif z == 4:
|
||||
LOOPS = int(arg)
|
||||
|
||||
z = z + 1
|
||||
|
||||
if 'OPENBLAS_LOOPS' in os.environ:
|
||||
p = os.environ['OPENBLAS_LOOPS']
|
||||
if p:
|
||||
LOOPS = int(p)
|
||||
|
||||
print("From: %d To: %d Step=%d Loops=%d" % (N, NMAX, NINC, LOOPS))
|
||||
print("\tSIZE\t\t\tFlops\t\t\t\t\tTime")
|
||||
|
||||
for i in range(N, NMAX + NINC, NINC):
|
||||
run_ssyrk(i, LOOPS)
|
||||
197
benchmark/smallscaling.c
Normal file
197
benchmark/smallscaling.c
Normal file
@@ -0,0 +1,197 @@
|
||||
// run with OPENBLAS_NUM_THREADS=1 and OMP_NUM_THREADS=n
|
||||
#include <math.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <time.h>
|
||||
#include <cblas.h>
|
||||
#include <omp.h>
|
||||
#include <pthread.h>
|
||||
#define MIN_SIZE 5
|
||||
#define MAX_SIZE 60
|
||||
#define NB_SIZE 10
|
||||
|
||||
// number of loop for a 1x1 matrix. Lower it if the test is
|
||||
// too slow on you computer.
|
||||
#define NLOOP 2e7
|
||||
|
||||
typedef struct {
|
||||
int matrix_size;
|
||||
int n_loop;
|
||||
void (* bench_func)();
|
||||
void (* blas_func)();
|
||||
void * (* create_matrix)(int size);
|
||||
} BenchParam;
|
||||
|
||||
void * s_create_matrix(int size) {
|
||||
float * r = malloc(size * sizeof(double));
|
||||
int i;
|
||||
for(i = 0; i < size; i++)
|
||||
r[i] = 1e3 * i / size;
|
||||
return r;
|
||||
}
|
||||
|
||||
void * c_create_matrix(int size) {
|
||||
float * r = malloc(size * 2 * sizeof(double));
|
||||
int i;
|
||||
for(i = 0; i < 2 * size; i++)
|
||||
r[i] = 1e3 * i / size;
|
||||
return r;
|
||||
}
|
||||
|
||||
void * z_create_matrix(int size) {
|
||||
double * r = malloc(size * 2 * sizeof(double));
|
||||
int i;
|
||||
for(i = 0; i < 2 * size; i++)
|
||||
r[i] = 1e3 * i / size;
|
||||
return r;
|
||||
}
|
||||
|
||||
void * d_create_matrix(int size) {
|
||||
double * r = malloc(size * sizeof(double));
|
||||
int i;
|
||||
for(i = 0; i < size; i++)
|
||||
r[i] = 1e3 * i / size;
|
||||
return r;
|
||||
}
|
||||
|
||||
void trmv_bench(BenchParam * param)
|
||||
{
|
||||
int i, n;
|
||||
int size = param->matrix_size;
|
||||
n = param->n_loop / size;
|
||||
int one = 1;
|
||||
void * A = param->create_matrix(size * size);
|
||||
void * y = param->create_matrix(size);
|
||||
for(i = 0; i < n; i++) {
|
||||
param->blas_func("U", "N", "N", &size, A, &size, y, &one);
|
||||
}
|
||||
free(A);
|
||||
free(y);
|
||||
}
|
||||
|
||||
void gemv_bench(BenchParam * param)
|
||||
{
|
||||
int i, n;
|
||||
int size = param->matrix_size;
|
||||
n = param->n_loop / size;
|
||||
double v = 1.01;
|
||||
int one = 1;
|
||||
void * A = param->create_matrix(size * size);
|
||||
void * y = param->create_matrix(size);
|
||||
for(i = 0; i < n; i++) {
|
||||
param->blas_func("N", &size, &size, &v, A, &size, y, &one, &v, y, &one);
|
||||
}
|
||||
free(A);
|
||||
free(y);
|
||||
}
|
||||
|
||||
void ger_bench(BenchParam * param) {
|
||||
int i, n;
|
||||
int size = param->matrix_size;
|
||||
n = param->n_loop / size;
|
||||
double v = 1.01;
|
||||
int one = 1;
|
||||
void * A = param->create_matrix(size * size);
|
||||
void * y = param->create_matrix(size);
|
||||
for(i = 0; i < n; i++) {
|
||||
param->blas_func(&size, &size, &v, y, &one, y, &one, A, &size);
|
||||
}
|
||||
free(A);
|
||||
free(y);
|
||||
}
|
||||
|
||||
#ifndef _WIN32
|
||||
void * pthread_func_wrapper(void * param) {
|
||||
((BenchParam *)param)->bench_func(param);
|
||||
pthread_exit(NULL);
|
||||
}
|
||||
#endif
|
||||
|
||||
#define NB_TESTS 5
|
||||
void * TESTS[4 * NB_TESTS] = {
|
||||
trmv_bench, ztrmv_, z_create_matrix, "ztrmv",
|
||||
gemv_bench, dgemv_, d_create_matrix, "dgemv",
|
||||
gemv_bench, zgemv_, z_create_matrix, "zgemv",
|
||||
ger_bench, dger_, d_create_matrix, "dger",
|
||||
ger_bench, zgerc_, z_create_matrix, "zgerc",
|
||||
};
|
||||
|
||||
inline static double delta_time(struct timespec tick) {
|
||||
struct timespec tock;
|
||||
clock_gettime(CLOCK_MONOTONIC, &tock);
|
||||
return (tock.tv_sec - tick.tv_sec) + (tock.tv_nsec - tick.tv_nsec) / 1e9;
|
||||
}
|
||||
|
||||
double pthread_bench(BenchParam * param, int nb_threads)
|
||||
{
|
||||
#ifdef _WIN32
|
||||
return 0;
|
||||
#else
|
||||
BenchParam threaded_param = *param;
|
||||
pthread_t threads[nb_threads];
|
||||
int t, rc;
|
||||
struct timespec tick;
|
||||
threaded_param.n_loop /= nb_threads;
|
||||
clock_gettime(CLOCK_MONOTONIC, &tick);
|
||||
for(t=0; t<nb_threads; t++){
|
||||
rc = pthread_create(&threads[t], NULL, pthread_func_wrapper, &threaded_param);
|
||||
if (rc){
|
||||
printf("ERROR; return code from pthread_create() is %d\n", rc);
|
||||
exit(-1);
|
||||
}
|
||||
}
|
||||
for(t=0; t<nb_threads; t++){
|
||||
pthread_join(threads[t], NULL);
|
||||
}
|
||||
return delta_time(tick);
|
||||
#endif
|
||||
}
|
||||
|
||||
double seq_bench(BenchParam * param) {
|
||||
struct timespec tick;
|
||||
clock_gettime(CLOCK_MONOTONIC, &tick);
|
||||
param->bench_func(param);
|
||||
return delta_time(tick);
|
||||
}
|
||||
|
||||
double omp_bench(BenchParam * param) {
|
||||
BenchParam threaded_param = *param;
|
||||
struct timespec tick;
|
||||
int t;
|
||||
int nb_threads = omp_get_max_threads();
|
||||
threaded_param.n_loop /= nb_threads;
|
||||
clock_gettime(CLOCK_MONOTONIC, &tick);
|
||||
#pragma omp parallel for
|
||||
for(t = 0; t < nb_threads; t ++){
|
||||
param->bench_func(&threaded_param);
|
||||
}
|
||||
return delta_time(tick);
|
||||
}
|
||||
|
||||
int main(int argc, char * argv[]) {
|
||||
double inc_factor = exp(log((double)MAX_SIZE / MIN_SIZE) / NB_SIZE);
|
||||
BenchParam param;
|
||||
int test_id;
|
||||
printf ("Running on %d threads\n", omp_get_max_threads());
|
||||
for(test_id = 0; test_id < NB_TESTS; test_id ++) {
|
||||
double size = MIN_SIZE;
|
||||
param.bench_func = TESTS[test_id * 4];
|
||||
param.blas_func = TESTS[test_id * 4 + 1];
|
||||
param.create_matrix = TESTS[test_id * 4 + 2];
|
||||
printf("\nBenchmark of %s\n", (char*)TESTS[test_id * 4 + 3]);
|
||||
param.n_loop = NLOOP;
|
||||
while(size <= MAX_SIZE) {
|
||||
param.matrix_size = (int)(size + 0.5);
|
||||
double seq_time = seq_bench(¶m);
|
||||
double omp_time = omp_bench(¶m);
|
||||
double pthread_time = pthread_bench(¶m, omp_get_max_threads());
|
||||
printf("matrix size %d, sequential %gs, openmp %gs, speedup %g, "
|
||||
"pthread %gs, speedup %g\n",
|
||||
param.matrix_size, seq_time,
|
||||
omp_time, seq_time / omp_time,
|
||||
pthread_time, seq_time / pthread_time);
|
||||
size *= inc_factor;
|
||||
}
|
||||
}
|
||||
return(0);
|
||||
}
|
||||
201
benchmark/swap.c
Normal file
201
benchmark/swap.c
Normal file
@@ -0,0 +1,201 @@
|
||||
/***************************************************************************
|
||||
Copyright (c) 2014, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above swapright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above swapright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE SWAPRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#ifdef __CYGWIN32__
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
#include "common.h"
|
||||
|
||||
|
||||
#undef SWAP
|
||||
|
||||
#ifdef COMPLEX
|
||||
#ifdef DOUBLE
|
||||
#define SWAP BLASFUNC(zswap)
|
||||
#else
|
||||
#define SWAP BLASFUNC(cswap)
|
||||
#endif
|
||||
#else
|
||||
#ifdef DOUBLE
|
||||
#define SWAP BLASFUNC(dswap)
|
||||
#else
|
||||
#define SWAP BLASFUNC(sswap)
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(__WIN32__) || defined(__WIN64__)
|
||||
|
||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
||||
#endif
|
||||
|
||||
int gettimeofday(struct timeval *tv, void *tz){
|
||||
|
||||
FILETIME ft;
|
||||
unsigned __int64 tmpres = 0;
|
||||
static int tzflag;
|
||||
|
||||
if (NULL != tv)
|
||||
{
|
||||
GetSystemTimeAsFileTime(&ft);
|
||||
|
||||
tmpres |= ft.dwHighDateTime;
|
||||
tmpres <<= 32;
|
||||
tmpres |= ft.dwLowDateTime;
|
||||
|
||||
/*converting file time to unix epoch*/
|
||||
tmpres /= 10; /*convert into microseconds*/
|
||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
||||
|
||||
static void *huge_malloc(BLASLONG size){
|
||||
int shmid;
|
||||
void *address;
|
||||
|
||||
#ifndef SHM_HUGETLB
|
||||
#define SHM_HUGETLB 04000
|
||||
#endif
|
||||
|
||||
if ((shmid =shmget(IPC_PRIVATE,
|
||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
||||
printf( "Memory allocation failed(shmget).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
address = shmat(shmid, NULL, SHM_RND);
|
||||
|
||||
if ((BLASLONG)address == -1){
|
||||
printf( "Memory allocation failed(shmat).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
shmctl(shmid, IPC_RMID, 0);
|
||||
|
||||
return address;
|
||||
}
|
||||
|
||||
#define malloc huge_malloc
|
||||
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *x, *y;
|
||||
FLOAT alpha[2] = { 2.0, 2.0 };
|
||||
blasint m, i;
|
||||
blasint inc_x=1,inc_y=1;
|
||||
int loops = 1;
|
||||
int l;
|
||||
char *p;
|
||||
|
||||
int from = 1;
|
||||
int to = 200;
|
||||
int step = 1;
|
||||
|
||||
struct timeval start, stop;
|
||||
double time1,timeg;
|
||||
|
||||
argc--;argv++;
|
||||
|
||||
if (argc > 0) { from = atol(*argv); argc--; argv++;}
|
||||
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
|
||||
if (argc > 0) { step = atol(*argv); argc--; argv++;}
|
||||
|
||||
if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p);
|
||||
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p);
|
||||
if ((p = getenv("OPENBLAS_INCY"))) inc_y = atoi(p);
|
||||
|
||||
fprintf(stderr, "From : %3d To : %3d Step = %3d Inc_x = %d Inc_y = %d Loops = %d\n", from, to, step,inc_x,inc_y,loops);
|
||||
|
||||
if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
if (( y = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_y) * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
#ifdef linux
|
||||
srandom(getpid());
|
||||
#endif
|
||||
|
||||
fprintf(stderr, " SIZE Flops\n");
|
||||
|
||||
for(m = from; m <= to; m += step)
|
||||
{
|
||||
|
||||
timeg=0;
|
||||
|
||||
fprintf(stderr, " %6d : ", (int)m);
|
||||
|
||||
|
||||
for (l=0; l<loops; l++)
|
||||
{
|
||||
|
||||
for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){
|
||||
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
|
||||
for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){
|
||||
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
SWAP (&m, x, &inc_x, y, &inc_y );
|
||||
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
|
||||
timeg += time1;
|
||||
|
||||
}
|
||||
|
||||
timeg /= loops;
|
||||
|
||||
fprintf(stderr,
|
||||
" %10.2f MBytes %10.6f sec\n",
|
||||
COMPSIZE * sizeof(FLOAT) * 1. * (double)m / timeg * 1.e-6, timeg);
|
||||
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
@@ -191,8 +191,8 @@ int main(int argc, char *argv[]){
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
fprintf(stderr,
|
||||
" %10.2f MFlops\n",
|
||||
COMPSIZE * COMPSIZE * 1. * (double)m * (double)m * (double)m / time1 * 1.e-6);
|
||||
" %10.2f MFlops %10.6f sec\n",
|
||||
COMPSIZE * COMPSIZE * 1. * (double)m * (double)m * (double)m / time1 * 1.e-6, time1);
|
||||
|
||||
}
|
||||
|
||||
|
||||
@@ -130,11 +130,21 @@ int main(int argc, char *argv[]){
|
||||
char trans='N';
|
||||
char diag ='U';
|
||||
|
||||
|
||||
int l;
|
||||
int loops = 1;
|
||||
double timeg;
|
||||
|
||||
if ((p = getenv("OPENBLAS_SIDE"))) side=*p;
|
||||
if ((p = getenv("OPENBLAS_UPLO"))) uplo=*p;
|
||||
if ((p = getenv("OPENBLAS_TRANS"))) trans=*p;
|
||||
if ((p = getenv("OPENBLAS_DIAG"))) diag=*p;
|
||||
|
||||
p = getenv("OPENBLAS_LOOPS");
|
||||
if ( p != NULL )
|
||||
loops = atoi(p);
|
||||
|
||||
|
||||
blasint m, i, j;
|
||||
|
||||
int from = 1;
|
||||
@@ -150,7 +160,7 @@ int main(int argc, char *argv[]){
|
||||
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
|
||||
if (argc > 0) { step = atol(*argv); argc--; argv++;}
|
||||
|
||||
fprintf(stderr, "From : %3d To : %3d Step = %3d Side = %c Uplo = %c Trans = %c Diag = %c\n", from, to, step,side,uplo,trans,diag);
|
||||
fprintf(stderr, "From : %3d To : %3d Step = %3d Side = %c Uplo = %c Trans = %c Diag = %c Loops = %d\n", from, to, step,side,uplo,trans,diag,loops);
|
||||
|
||||
if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
@@ -171,28 +181,35 @@ int main(int argc, char *argv[]){
|
||||
for(m = from; m <= to; m += step)
|
||||
{
|
||||
|
||||
fprintf(stderr, " %6d : ", (int)m);
|
||||
timeg=0.0;
|
||||
|
||||
for(j = 0; j < m; j++){
|
||||
for(i = 0; i < m * COMPSIZE; i++){
|
||||
a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
b[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
}
|
||||
fprintf(stderr, " %6d : ", (int)m);
|
||||
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
for (l=0; l<loops; l++)
|
||||
{
|
||||
|
||||
TRSM (&side, &uplo, &trans, &diag, &m, &m, alpha, a, &m, b, &m);
|
||||
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
for(j = 0; j < m; j++){
|
||||
for(i = 0; i < m * COMPSIZE; i++){
|
||||
a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
b[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
}
|
||||
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
TRSM (&side, &uplo, &trans, &diag, &m, &m, alpha, a, &m, b, &m);
|
||||
|
||||
fprintf(stderr,
|
||||
" %10.2f MFlops\n",
|
||||
COMPSIZE * COMPSIZE * 1. * (double)m * (double)m * (double)m / time1 * 1.e-6);
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
|
||||
timeg += time1;
|
||||
}
|
||||
|
||||
time1 = timeg/loops;
|
||||
|
||||
fprintf(stderr, " %10.2f MFlops\n", COMPSIZE * COMPSIZE * 1. * (double)m * (double)m * (double)m / time1 * 1.e-6);
|
||||
|
||||
}
|
||||
|
||||
|
||||
196
benchmark/zdot-intel.c
Normal file
196
benchmark/zdot-intel.c
Normal file
@@ -0,0 +1,196 @@
|
||||
/***************************************************************************
|
||||
Copyright (c) 2014, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#ifdef __CYGWIN32__
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
#define RETURN_BY_STACK 1
|
||||
#include "common.h"
|
||||
|
||||
|
||||
#undef DOT
|
||||
|
||||
|
||||
#ifdef DOUBLE
|
||||
#define DOT BLASFUNC(zdotu)
|
||||
#else
|
||||
#define DOT BLASFUNC(cdotu)
|
||||
#endif
|
||||
|
||||
|
||||
#if defined(__WIN32__) || defined(__WIN64__)
|
||||
|
||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
||||
#endif
|
||||
|
||||
int gettimeofday(struct timeval *tv, void *tz){
|
||||
|
||||
FILETIME ft;
|
||||
unsigned __int64 tmpres = 0;
|
||||
static int tzflag;
|
||||
|
||||
if (NULL != tv)
|
||||
{
|
||||
GetSystemTimeAsFileTime(&ft);
|
||||
|
||||
tmpres |= ft.dwHighDateTime;
|
||||
tmpres <<= 32;
|
||||
tmpres |= ft.dwLowDateTime;
|
||||
|
||||
/*converting file time to unix epoch*/
|
||||
tmpres /= 10; /*convert into microseconds*/
|
||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
||||
|
||||
static void *huge_malloc(BLASLONG size){
|
||||
int shmid;
|
||||
void *address;
|
||||
|
||||
#ifndef SHM_HUGETLB
|
||||
#define SHM_HUGETLB 04000
|
||||
#endif
|
||||
|
||||
if ((shmid =shmget(IPC_PRIVATE,
|
||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
||||
printf( "Memory allocation failed(shmget).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
address = shmat(shmid, NULL, SHM_RND);
|
||||
|
||||
if ((BLASLONG)address == -1){
|
||||
printf( "Memory allocation failed(shmat).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
shmctl(shmid, IPC_RMID, 0);
|
||||
|
||||
return address;
|
||||
}
|
||||
|
||||
#define malloc huge_malloc
|
||||
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *x, *y;
|
||||
FLOAT _Complex result;
|
||||
blasint m, i;
|
||||
blasint inc_x=1,inc_y=1;
|
||||
int loops = 1;
|
||||
int l;
|
||||
char *p;
|
||||
|
||||
int from = 1;
|
||||
int to = 200;
|
||||
int step = 1;
|
||||
|
||||
struct timeval start, stop;
|
||||
double time1,timeg;
|
||||
|
||||
argc--;argv++;
|
||||
|
||||
if (argc > 0) { from = atol(*argv); argc--; argv++;}
|
||||
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
|
||||
if (argc > 0) { step = atol(*argv); argc--; argv++;}
|
||||
|
||||
if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p);
|
||||
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p);
|
||||
if ((p = getenv("OPENBLAS_INCY"))) inc_y = atoi(p);
|
||||
|
||||
fprintf(stderr, "From : %3d To : %3d Step = %3d Inc_x = %d Inc_y = %d Loops = %d\n", from, to, step,inc_x,inc_y,loops);
|
||||
|
||||
if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
if (( y = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_y) * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
#ifdef linux
|
||||
srandom(getpid());
|
||||
#endif
|
||||
|
||||
fprintf(stderr, " SIZE Flops\n");
|
||||
|
||||
for(m = from; m <= to; m += step)
|
||||
{
|
||||
|
||||
timeg=0;
|
||||
|
||||
fprintf(stderr, " %6d : ", (int)m);
|
||||
|
||||
|
||||
for (l=0; l<loops; l++)
|
||||
{
|
||||
|
||||
for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){
|
||||
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
|
||||
for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){
|
||||
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
DOT (&result, &m, x, &inc_x, y, &inc_y );
|
||||
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
|
||||
timeg += time1;
|
||||
|
||||
}
|
||||
|
||||
timeg /= loops;
|
||||
|
||||
fprintf(stderr,
|
||||
" %10.2f MFlops\n",
|
||||
COMPSIZE * COMPSIZE * 2. * (double)m / timeg * 1.e-6);
|
||||
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
195
benchmark/zdot.c
Normal file
195
benchmark/zdot.c
Normal file
@@ -0,0 +1,195 @@
|
||||
/***************************************************************************
|
||||
Copyright (c) 2014, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#ifdef __CYGWIN32__
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
#include "common.h"
|
||||
|
||||
|
||||
#undef DOT
|
||||
|
||||
|
||||
#ifdef DOUBLE
|
||||
#define DOT BLASFUNC(zdotu)
|
||||
#else
|
||||
#define DOT BLASFUNC(cdotu)
|
||||
#endif
|
||||
|
||||
|
||||
#if defined(__WIN32__) || defined(__WIN64__)
|
||||
|
||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
||||
#endif
|
||||
|
||||
int gettimeofday(struct timeval *tv, void *tz){
|
||||
|
||||
FILETIME ft;
|
||||
unsigned __int64 tmpres = 0;
|
||||
static int tzflag;
|
||||
|
||||
if (NULL != tv)
|
||||
{
|
||||
GetSystemTimeAsFileTime(&ft);
|
||||
|
||||
tmpres |= ft.dwHighDateTime;
|
||||
tmpres <<= 32;
|
||||
tmpres |= ft.dwLowDateTime;
|
||||
|
||||
/*converting file time to unix epoch*/
|
||||
tmpres /= 10; /*convert into microseconds*/
|
||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
||||
|
||||
static void *huge_malloc(BLASLONG size){
|
||||
int shmid;
|
||||
void *address;
|
||||
|
||||
#ifndef SHM_HUGETLB
|
||||
#define SHM_HUGETLB 04000
|
||||
#endif
|
||||
|
||||
if ((shmid =shmget(IPC_PRIVATE,
|
||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
||||
printf( "Memory allocation failed(shmget).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
address = shmat(shmid, NULL, SHM_RND);
|
||||
|
||||
if ((BLASLONG)address == -1){
|
||||
printf( "Memory allocation failed(shmat).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
shmctl(shmid, IPC_RMID, 0);
|
||||
|
||||
return address;
|
||||
}
|
||||
|
||||
#define malloc huge_malloc
|
||||
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *x, *y;
|
||||
FLOAT _Complex result;
|
||||
blasint m, i;
|
||||
blasint inc_x=1,inc_y=1;
|
||||
int loops = 1;
|
||||
int l;
|
||||
char *p;
|
||||
|
||||
int from = 1;
|
||||
int to = 200;
|
||||
int step = 1;
|
||||
|
||||
struct timeval start, stop;
|
||||
double time1,timeg;
|
||||
|
||||
argc--;argv++;
|
||||
|
||||
if (argc > 0) { from = atol(*argv); argc--; argv++;}
|
||||
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
|
||||
if (argc > 0) { step = atol(*argv); argc--; argv++;}
|
||||
|
||||
if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p);
|
||||
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p);
|
||||
if ((p = getenv("OPENBLAS_INCY"))) inc_y = atoi(p);
|
||||
|
||||
fprintf(stderr, "From : %3d To : %3d Step = %3d Inc_x = %d Inc_y = %d Loops = %d\n", from, to, step,inc_x,inc_y,loops);
|
||||
|
||||
if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
if (( y = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_y) * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
#ifdef linux
|
||||
srandom(getpid());
|
||||
#endif
|
||||
|
||||
fprintf(stderr, " SIZE Flops\n");
|
||||
|
||||
for(m = from; m <= to; m += step)
|
||||
{
|
||||
|
||||
timeg=0;
|
||||
|
||||
fprintf(stderr, " %6d : ", (int)m);
|
||||
|
||||
|
||||
for (l=0; l<loops; l++)
|
||||
{
|
||||
|
||||
for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){
|
||||
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
|
||||
for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){
|
||||
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
result = DOT (&m, x, &inc_x, y, &inc_y );
|
||||
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
|
||||
timeg += time1;
|
||||
|
||||
}
|
||||
|
||||
timeg /= loops;
|
||||
|
||||
fprintf(stderr,
|
||||
" %10.2f MFlops %10.6f sec\n",
|
||||
COMPSIZE * COMPSIZE * 2. * (double)m / timeg * 1.e-6, timeg);
|
||||
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
69
c_check
69
c_check
@@ -1,10 +1,18 @@
|
||||
#!/usr/bin/perl
|
||||
|
||||
use File::Basename;
|
||||
use File::Temp qw(tempfile);
|
||||
|
||||
# Checking cross compile
|
||||
$hostos = `uname -s | sed -e s/\-.*//`; chop($hostos);
|
||||
$hostarch = `uname -m | sed -e s/i.86/x86/`;chop($hostarch);
|
||||
$hostarch = "x86_64" if ($hostarch eq "amd64");
|
||||
$hostarch = "arm" if ($hostarch =~ /^arm.*/);
|
||||
$hostarch = "arm64" if ($hostarch eq "aarch64");
|
||||
$hostarch = "power" if ($hostarch =~ /^(powerpc|ppc).*/);
|
||||
$hostarch = "zarch" if ($hostarch eq "s390x");
|
||||
|
||||
$tmpf = new File::Temp( UNLINK => 1 );
|
||||
$binary = $ENV{"BINARY"};
|
||||
|
||||
$makefile = shift(@ARGV);
|
||||
@@ -23,14 +31,12 @@ if ($?) {
|
||||
|
||||
$cross_suffix = "";
|
||||
|
||||
if ($ARGV[0] =~ /(.*)(-[.\d]+)/) {
|
||||
if ($1 =~ /(.*-)(.*)/) {
|
||||
$cross_suffix = $1;
|
||||
}
|
||||
} else {
|
||||
if ($ARGV[0] =~ /(.*-)(.*)/) {
|
||||
$cross_suffix = $1;
|
||||
}
|
||||
if (dirname($compiler_name) ne ".") {
|
||||
$cross_suffix .= dirname($compiler_name) . "/";
|
||||
}
|
||||
|
||||
if (basename($compiler_name) =~ /([^\s]*-)(.*)/) {
|
||||
$cross_suffix .= $1;
|
||||
}
|
||||
|
||||
$compiler = "";
|
||||
@@ -55,17 +61,19 @@ $os = osf if ($data =~ /OS_OSF/);
|
||||
$os = WINNT if ($data =~ /OS_WINNT/);
|
||||
$os = CYGWIN_NT if ($data =~ /OS_CYGWIN_NT/);
|
||||
$os = Interix if ($data =~ /OS_INTERIX/);
|
||||
$os = Android if ($data =~ /OS_ANDROID/);
|
||||
|
||||
$architecture = x86 if ($data =~ /ARCH_X86/);
|
||||
$architecture = x86_64 if ($data =~ /ARCH_X86_64/);
|
||||
$architecture = power if ($data =~ /ARCH_POWER/);
|
||||
$architecture = mips32 if ($data =~ /ARCH_MIPS32/);
|
||||
$architecture = mips if ($data =~ /ARCH_MIPS/);
|
||||
$architecture = mips64 if ($data =~ /ARCH_MIPS64/);
|
||||
$architecture = alpha if ($data =~ /ARCH_ALPHA/);
|
||||
$architecture = sparc if ($data =~ /ARCH_SPARC/);
|
||||
$architecture = ia64 if ($data =~ /ARCH_IA64/);
|
||||
$architecture = arm if ($data =~ /ARCH_ARM/);
|
||||
$architecture = arm64 if ($data =~ /ARCH_ARM64/);
|
||||
$architecture = zarch if ($data =~ /ARCH_ZARCH/);
|
||||
|
||||
$defined = 0;
|
||||
|
||||
@@ -75,7 +83,12 @@ if ($os eq "AIX") {
|
||||
$defined = 1;
|
||||
}
|
||||
|
||||
if (($architecture eq "mips32") || ($architecture eq "mips64")) {
|
||||
if ($architecture eq "mips") {
|
||||
$compiler_name .= " -mabi=32";
|
||||
$defined = 1;
|
||||
}
|
||||
|
||||
if ($architecture eq "mips64") {
|
||||
$compiler_name .= " -mabi=n32" if ($binary eq "32");
|
||||
$compiler_name .= " -mabi=64" if ($binary eq "64");
|
||||
$defined = 1;
|
||||
@@ -85,6 +98,11 @@ if (($architecture eq "arm") || ($architecture eq "arm64")) {
|
||||
$defined = 1;
|
||||
}
|
||||
|
||||
if ($architecture eq "zarch") {
|
||||
$defined = 1;
|
||||
$binary = 64;
|
||||
}
|
||||
|
||||
if ($architecture eq "alpha") {
|
||||
$defined = 1;
|
||||
$binary = 64;
|
||||
@@ -148,16 +166,35 @@ if ($?) {
|
||||
die 1;
|
||||
}
|
||||
|
||||
$have_msa = 0;
|
||||
if (($architecture eq "mips") || ($architecture eq "mips64")) {
|
||||
$code = '"addvi.b $w0, $w1, 1"';
|
||||
$msa_flags = "-mmsa -mfp64 -msched-weight -mload-store-pairs";
|
||||
print $tmpf "#include <msa.h>\n\n";
|
||||
print $tmpf "void main(void){ __asm__ volatile($code); }\n";
|
||||
|
||||
$args = "$msa_flags -o $tmpf.o -x c $tmpf";
|
||||
my @cmd = ("$compiler_name $args");
|
||||
system(@cmd) == 0;
|
||||
if ($? != 0) {
|
||||
$have_msa = 0;
|
||||
} else {
|
||||
$have_msa = 1;
|
||||
}
|
||||
unlink("$tmpf.o");
|
||||
}
|
||||
|
||||
$architecture = x86 if ($data =~ /ARCH_X86/);
|
||||
$architecture = x86_64 if ($data =~ /ARCH_X86_64/);
|
||||
$architecture = power if ($data =~ /ARCH_POWER/);
|
||||
$architecture = mips32 if ($data =~ /ARCH_MIPS32/);
|
||||
$architecture = mips if ($data =~ /ARCH_MIPS/);
|
||||
$architecture = mips64 if ($data =~ /ARCH_MIPS64/);
|
||||
$architecture = alpha if ($data =~ /ARCH_ALPHA/);
|
||||
$architecture = sparc if ($data =~ /ARCH_SPARC/);
|
||||
$architecture = ia64 if ($data =~ /ARCH_IA64/);
|
||||
$architecture = arm if ($data =~ /ARCH_ARM/);
|
||||
$architecture = arm64 if ($data =~ /ARCH_ARM64/);
|
||||
$architecture = zarch if ($data =~ /ARCH_ZARCH/);
|
||||
|
||||
$binformat = bin32;
|
||||
$binformat = bin64 if ($data =~ /BINARY_64/);
|
||||
@@ -205,6 +242,11 @@ $linker_a = "";
|
||||
$linker_L .= "-Wl,". $flags . " "
|
||||
}
|
||||
|
||||
if ($flags =~ /^\--exclude-libs/) {
|
||||
$linker_L .= "-Wl,". $flags . " ";
|
||||
$flags="";
|
||||
}
|
||||
|
||||
if (
|
||||
($flags =~ /^\-l/)
|
||||
&& ($flags !~ /gfortranbegin/)
|
||||
@@ -239,9 +281,11 @@ print MAKEFILE "BINARY64=\n" if $binformat ne bin64;
|
||||
print MAKEFILE "BINARY32=1\n" if $binformat eq bin32;
|
||||
print MAKEFILE "BINARY64=1\n" if $binformat eq bin64;
|
||||
print MAKEFILE "FU=$need_fu\n" if $need_fu ne "";
|
||||
print MAKEFILE "CROSS_SUFFIX=$cross_suffix\n" if $cross_suffix ne "";
|
||||
print MAKEFILE "CROSS_SUFFIX=$cross_suffix\n" if $cross != 0 && $cross_suffix ne "";
|
||||
print MAKEFILE "CROSS=1\n" if $cross != 0;
|
||||
print MAKEFILE "CEXTRALIB=$linker_L $linker_l $linker_a\n";
|
||||
print MAKEFILE "HAVE_MSA=1\n" if $have_msa eq 1;
|
||||
print MAKEFILE "MSA_FLAGS=$msa_flags\n" if $have_msa eq 1;
|
||||
|
||||
$os =~ tr/[a-z]/[A-Z]/;
|
||||
$architecture =~ tr/[a-z]/[A-Z]/;
|
||||
@@ -253,6 +297,7 @@ print CONFFILE "#define C_$compiler\t1\n";
|
||||
print CONFFILE "#define __32BIT__\t1\n" if $binformat eq bin32;
|
||||
print CONFFILE "#define __64BIT__\t1\n" if $binformat eq bin64;
|
||||
print CONFFILE "#define FUNDERSCORE\t$need_fu\n" if $need_fu ne "";
|
||||
print CONFFILE "#define HAVE_MSA\t1\n" if $have_msa eq 1;
|
||||
|
||||
if ($os eq "LINUX") {
|
||||
|
||||
|
||||
350
cblas_noconst.h
350
cblas_noconst.h
@@ -1,350 +0,0 @@
|
||||
#ifndef CBLAS_H
|
||||
#define CBLAS_H
|
||||
|
||||
#include <stddef.h>
|
||||
#include "common.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
/* Assume C declarations for C++ */
|
||||
#endif /* __cplusplus */
|
||||
|
||||
/*Set the number of threads on runtime.*/
|
||||
void openblas_set_num_threads(int num_threads);
|
||||
void goto_set_num_threads(int num_threads);
|
||||
|
||||
/*Get the number of threads on runtime.*/
|
||||
int openblas_get_num_threads(void);
|
||||
|
||||
/*Get the number of physical processors (cores).*/
|
||||
int openblas_get_num_procs(void);
|
||||
|
||||
/*Get the build configure on runtime.*/
|
||||
char* openblas_get_config(void);
|
||||
|
||||
/* Get the parallelization type which is used by OpenBLAS */
|
||||
int openblas_get_parallel(void);
|
||||
/* OpenBLAS is compiled for sequential use */
|
||||
#define OPENBLAS_SEQUENTIAL 0
|
||||
/* OpenBLAS is compiled using normal threading model */
|
||||
#define OPENBLAS_THREAD 1
|
||||
/* OpenBLAS is compiled using OpenMP threading model */
|
||||
#define OPENBLAS_OPENMP 2
|
||||
|
||||
|
||||
#define CBLAS_INDEX size_t
|
||||
|
||||
typedef enum CBLAS_ORDER {CblasRowMajor=101, CblasColMajor=102} CBLAS_ORDER;
|
||||
typedef enum CBLAS_TRANSPOSE {CblasNoTrans=111, CblasTrans=112, CblasConjTrans=113, CblasConjNoTrans=114} CBLAS_TRANSPOSE;
|
||||
typedef enum CBLAS_UPLO {CblasUpper=121, CblasLower=122} CBLAS_UPLO;
|
||||
typedef enum CBLAS_DIAG {CblasNonUnit=131, CblasUnit=132} CBLAS_DIAG;
|
||||
typedef enum CBLAS_SIDE {CblasLeft=141, CblasRight=142} CBLAS_SIDE;
|
||||
|
||||
float cblas_sdsdot(blasint n, float alpha, float *x, blasint incx, float *y, blasint incy);
|
||||
double cblas_dsdot (blasint n, float *x, blasint incx, float *y, blasint incy);
|
||||
float cblas_sdot(blasint n, float *x, blasint incx, float *y, blasint incy);
|
||||
double cblas_ddot(blasint n, double *x, blasint incx, double *y, blasint incy);
|
||||
|
||||
openblas_complex_float cblas_cdotu(blasint n, float *x, blasint incx, float *y, blasint incy);
|
||||
openblas_complex_float cblas_cdotc(blasint n, float *x, blasint incx, float *y, blasint incy);
|
||||
openblas_complex_double cblas_zdotu(blasint n, double *x, blasint incx, double *y, blasint incy);
|
||||
openblas_complex_double cblas_zdotc(blasint n, double *x, blasint incx, double *y, blasint incy);
|
||||
|
||||
void cblas_cdotu_sub(blasint n, float *x, blasint incx, float *y, blasint incy, openblas_complex_float *ret);
|
||||
void cblas_cdotc_sub(blasint n, float *x, blasint incx, float *y, blasint incy, openblas_complex_float *ret);
|
||||
void cblas_zdotu_sub(blasint n, double *x, blasint incx, double *y, blasint incy, openblas_complex_double *ret);
|
||||
void cblas_zdotc_sub(blasint n, double *x, blasint incx, double *y, blasint incy, openblas_complex_double *ret);
|
||||
|
||||
float cblas_sasum (blasint n, float *x, blasint incx);
|
||||
double cblas_dasum (blasint n, double *x, blasint incx);
|
||||
float cblas_scasum(blasint n, float *x, blasint incx);
|
||||
double cblas_dzasum(blasint n, double *x, blasint incx);
|
||||
|
||||
float cblas_snrm2 (blasint N, float *X, blasint incX);
|
||||
double cblas_dnrm2 (blasint N, double *X, blasint incX);
|
||||
float cblas_scnrm2(blasint N, float *X, blasint incX);
|
||||
double cblas_dznrm2(blasint N, double *X, blasint incX);
|
||||
|
||||
CBLAS_INDEX cblas_isamax(blasint n, float *x, blasint incx);
|
||||
CBLAS_INDEX cblas_idamax(blasint n, double *x, blasint incx);
|
||||
CBLAS_INDEX cblas_icamax(blasint n, float *x, blasint incx);
|
||||
CBLAS_INDEX cblas_izamax(blasint n, double *x, blasint incx);
|
||||
|
||||
void cblas_saxpy(blasint n, float alpha, float *x, blasint incx, float *y, blasint incy);
|
||||
void cblas_daxpy(blasint n, double alpha, double *x, blasint incx, double *y, blasint incy);
|
||||
void cblas_caxpy(blasint n, float *alpha, float *x, blasint incx, float *y, blasint incy);
|
||||
void cblas_zaxpy(blasint n, double *alpha, double *x, blasint incx, double *y, blasint incy);
|
||||
|
||||
void cblas_scopy(blasint n, float *x, blasint incx, float *y, blasint incy);
|
||||
void cblas_dcopy(blasint n, double *x, blasint incx, double *y, blasint incy);
|
||||
void cblas_ccopy(blasint n, float *x, blasint incx, float *y, blasint incy);
|
||||
void cblas_zcopy(blasint n, double *x, blasint incx, double *y, blasint incy);
|
||||
|
||||
void cblas_sswap(blasint n, float *x, blasint incx, float *y, blasint incy);
|
||||
void cblas_dswap(blasint n, double *x, blasint incx, double *y, blasint incy);
|
||||
void cblas_cswap(blasint n, float *x, blasint incx, float *y, blasint incy);
|
||||
void cblas_zswap(blasint n, double *x, blasint incx, double *y, blasint incy);
|
||||
|
||||
void cblas_srot(blasint N, float *X, blasint incX, float *Y, blasint incY, float c, float s);
|
||||
void cblas_drot(blasint N, double *X, blasint incX, double *Y, blasint incY, double c, double s);
|
||||
|
||||
void cblas_srotg(float *a, float *b, float *c, float *s);
|
||||
void cblas_drotg(double *a, double *b, double *c, double *s);
|
||||
|
||||
void cblas_srotm(blasint N, float *X, blasint incX, float *Y, blasint incY, float *P);
|
||||
void cblas_drotm(blasint N, double *X, blasint incX, double *Y, blasint incY, double *P);
|
||||
|
||||
void cblas_srotmg(float *d1, float *d2, float *b1, float b2, float *P);
|
||||
void cblas_drotmg(double *d1, double *d2, double *b1, double b2, double *P);
|
||||
|
||||
void cblas_sscal(blasint N, float alpha, float *X, blasint incX);
|
||||
void cblas_dscal(blasint N, double alpha, double *X, blasint incX);
|
||||
void cblas_cscal(blasint N, float *alpha, float *X, blasint incX);
|
||||
void cblas_zscal(blasint N, double *alpha, double *X, blasint incX);
|
||||
void cblas_csscal(blasint N, float alpha, float *X, blasint incX);
|
||||
void cblas_zdscal(blasint N, double alpha, double *X, blasint incX);
|
||||
|
||||
void cblas_sgemv(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE trans, blasint m, blasint n,
|
||||
float alpha, float *a, blasint lda, float *x, blasint incx, float beta, float *y, blasint incy);
|
||||
void cblas_dgemv(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE trans, blasint m, blasint n,
|
||||
double alpha, double *a, blasint lda, double *x, blasint incx, double beta, double *y, blasint incy);
|
||||
void cblas_cgemv(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE trans, blasint m, blasint n,
|
||||
float *alpha, float *a, blasint lda, float *x, blasint incx, float *beta, float *y, blasint incy);
|
||||
void cblas_zgemv(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE trans, blasint m, blasint n,
|
||||
double *alpha, double *a, blasint lda, double *x, blasint incx, double *beta, double *y, blasint incy);
|
||||
|
||||
void cblas_sger (enum CBLAS_ORDER order, blasint M, blasint N, float alpha, float *X, blasint incX, float *Y, blasint incY, float *A, blasint lda);
|
||||
void cblas_dger (enum CBLAS_ORDER order, blasint M, blasint N, double alpha, double *X, blasint incX, double *Y, blasint incY, double *A, blasint lda);
|
||||
void cblas_cgeru(enum CBLAS_ORDER order, blasint M, blasint N, float *alpha, float *X, blasint incX, float *Y, blasint incY, float *A, blasint lda);
|
||||
void cblas_cgerc(enum CBLAS_ORDER order, blasint M, blasint N, float *alpha, float *X, blasint incX, float *Y, blasint incY, float *A, blasint lda);
|
||||
void cblas_zgeru(enum CBLAS_ORDER order, blasint M, blasint N, double *alpha, double *X, blasint incX, double *Y, blasint incY, double *A, blasint lda);
|
||||
void cblas_zgerc(enum CBLAS_ORDER order, blasint M, blasint N, double *alpha, double *X, blasint incX, double *Y, blasint incY, double *A, blasint lda);
|
||||
|
||||
void cblas_strsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, blasint N, float *A, blasint lda, float *X, blasint incX);
|
||||
void cblas_dtrsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, blasint N, double *A, blasint lda, double *X, blasint incX);
|
||||
void cblas_ctrsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, blasint N, float *A, blasint lda, float *X, blasint incX);
|
||||
void cblas_ztrsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, blasint N, double *A, blasint lda, double *X, blasint incX);
|
||||
|
||||
void cblas_strmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, blasint N, float *A, blasint lda, float *X, blasint incX);
|
||||
void cblas_dtrmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, blasint N, double *A, blasint lda, double *X, blasint incX);
|
||||
void cblas_ctrmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, blasint N, float *A, blasint lda, float *X, blasint incX);
|
||||
void cblas_ztrmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, blasint N, double *A, blasint lda, double *X, blasint incX);
|
||||
|
||||
void cblas_ssyr(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, float alpha, float *X, blasint incX, float *A, blasint lda);
|
||||
void cblas_dsyr(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, double alpha, double *X, blasint incX, double *A, blasint lda);
|
||||
void cblas_cher(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, float alpha, float *X, blasint incX, float *A, blasint lda);
|
||||
void cblas_zher(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, double alpha, double *X, blasint incX, double *A, blasint lda);
|
||||
|
||||
void cblas_ssyr2(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo,blasint N, float alpha, float *X,
|
||||
blasint incX, float *Y, blasint incY, float *A, blasint lda);
|
||||
void cblas_dsyr2(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, double alpha, double *X,
|
||||
blasint incX, double *Y, blasint incY, double *A, blasint lda);
|
||||
void cblas_cher2(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, float *alpha, float *X, blasint incX,
|
||||
float *Y, blasint incY, float *A, blasint lda);
|
||||
void cblas_zher2(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, double *alpha, double *X, blasint incX,
|
||||
double *Y, blasint incY, double *A, blasint lda);
|
||||
|
||||
void cblas_sgbmv(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE TransA, blasint M, blasint N,
|
||||
blasint KL, blasint KU, float alpha, float *A, blasint lda, float *X, blasint incX, float beta, float *Y, blasint incY);
|
||||
void cblas_dgbmv(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE TransA, blasint M, blasint N,
|
||||
blasint KL, blasint KU, double alpha, double *A, blasint lda, double *X, blasint incX, double beta, double *Y, blasint incY);
|
||||
void cblas_cgbmv(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE TransA, blasint M, blasint N,
|
||||
blasint KL, blasint KU, float *alpha, float *A, blasint lda, float *X, blasint incX, float *beta, float *Y, blasint incY);
|
||||
void cblas_zgbmv(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE TransA, blasint M, blasint N,
|
||||
blasint KL, blasint KU, double *alpha, double *A, blasint lda, double *X, blasint incX, double *beta, double *Y, blasint incY);
|
||||
|
||||
void cblas_ssbmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, blasint K, float alpha, float *A,
|
||||
blasint lda, float *X, blasint incX, float beta, float *Y, blasint incY);
|
||||
void cblas_dsbmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, blasint K, double alpha, double *A,
|
||||
blasint lda, double *X, blasint incX, double beta, double *Y, blasint incY);
|
||||
|
||||
|
||||
void cblas_stbmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
|
||||
blasint N, blasint K, float *A, blasint lda, float *X, blasint incX);
|
||||
void cblas_dtbmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
|
||||
blasint N, blasint K, double *A, blasint lda, double *X, blasint incX);
|
||||
void cblas_ctbmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
|
||||
blasint N, blasint K, float *A, blasint lda, float *X, blasint incX);
|
||||
void cblas_ztbmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
|
||||
blasint N, blasint K, double *A, blasint lda, double *X, blasint incX);
|
||||
|
||||
void cblas_stbsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
|
||||
blasint N, blasint K, float *A, blasint lda, float *X, blasint incX);
|
||||
void cblas_dtbsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
|
||||
blasint N, blasint K, double *A, blasint lda, double *X, blasint incX);
|
||||
void cblas_ctbsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
|
||||
blasint N, blasint K, float *A, blasint lda, float *X, blasint incX);
|
||||
void cblas_ztbsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
|
||||
blasint N, blasint K, double *A, blasint lda, double *X, blasint incX);
|
||||
|
||||
void cblas_stpmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
|
||||
blasint N, float *Ap, float *X, blasint incX);
|
||||
void cblas_dtpmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
|
||||
blasint N, double *Ap, double *X, blasint incX);
|
||||
void cblas_ctpmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
|
||||
blasint N, float *Ap, float *X, blasint incX);
|
||||
void cblas_ztpmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
|
||||
blasint N, double *Ap, double *X, blasint incX);
|
||||
|
||||
void cblas_stpsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
|
||||
blasint N, float *Ap, float *X, blasint incX);
|
||||
void cblas_dtpsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
|
||||
blasint N, double *Ap, double *X, blasint incX);
|
||||
void cblas_ctpsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
|
||||
blasint N, float *Ap, float *X, blasint incX);
|
||||
void cblas_ztpsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
|
||||
blasint N, double *Ap, double *X, blasint incX);
|
||||
|
||||
void cblas_ssymv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, float alpha, float *A,
|
||||
blasint lda, float *X, blasint incX, float beta, float *Y, blasint incY);
|
||||
void cblas_dsymv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, double alpha, double *A,
|
||||
blasint lda, double *X, blasint incX, double beta, double *Y, blasint incY);
|
||||
void cblas_chemv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, float *alpha, float *A,
|
||||
blasint lda, float *X, blasint incX, float *beta, float *Y, blasint incY);
|
||||
void cblas_zhemv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, double *alpha, double *A,
|
||||
blasint lda, double *X, blasint incX, double *beta, double *Y, blasint incY);
|
||||
|
||||
|
||||
void cblas_sspmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, float alpha, float *Ap,
|
||||
float *X, blasint incX, float beta, float *Y, blasint incY);
|
||||
void cblas_dspmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, double alpha, double *Ap,
|
||||
double *X, blasint incX, double beta, double *Y, blasint incY);
|
||||
|
||||
void cblas_sspr(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, float alpha, float *X, blasint incX, float *Ap);
|
||||
void cblas_dspr(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, double alpha, double *X, blasint incX, double *Ap);
|
||||
|
||||
void cblas_chpr(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, float alpha, float *X, blasint incX, float *A);
|
||||
void cblas_zhpr(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, double alpha, double *X,blasint incX, double *A);
|
||||
|
||||
void cblas_sspr2(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, float alpha, float *X, blasint incX, float *Y, blasint incY, float *A);
|
||||
void cblas_dspr2(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, double alpha, double *X, blasint incX, double *Y, blasint incY, double *A);
|
||||
void cblas_chpr2(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, float *alpha, float *X, blasint incX, float *Y, blasint incY, float *Ap);
|
||||
void cblas_zhpr2(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, double *alpha, double *X, blasint incX, double *Y, blasint incY, double *Ap);
|
||||
|
||||
void cblas_chbmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, blasint K,
|
||||
float *alpha, float *A, blasint lda, float *X, blasint incX, float *beta, float *Y, blasint incY);
|
||||
void cblas_zhbmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, blasint K,
|
||||
double *alpha, double *A, blasint lda, double *X, blasint incX, double *beta, double *Y, blasint incY);
|
||||
|
||||
void cblas_chpmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N,
|
||||
float *alpha, float *Ap, float *X, blasint incX, float *beta, float *Y, blasint incY);
|
||||
void cblas_zhpmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N,
|
||||
double *alpha, double *Ap, double *X, blasint incX, double *beta, double *Y, blasint incY);
|
||||
|
||||
void cblas_sgemm(enum CBLAS_ORDER Order, enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANSPOSE TransB, blasint M, blasint N, blasint K,
|
||||
float alpha, float *A, blasint lda, float *B, blasint ldb, float beta, float *C, blasint ldc);
|
||||
void cblas_dgemm(enum CBLAS_ORDER Order, enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANSPOSE TransB, blasint M, blasint N, blasint K,
|
||||
double alpha, double *A, blasint lda, double *B, blasint ldb, double beta, double *C, blasint ldc);
|
||||
void cblas_cgemm(enum CBLAS_ORDER Order, enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANSPOSE TransB, blasint M, blasint N, blasint K,
|
||||
float *alpha, float *A, blasint lda, float *B, blasint ldb, float *beta, float *C, blasint ldc);
|
||||
void cblas_cgemm3m(enum CBLAS_ORDER Order, enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANSPOSE TransB, blasint M, blasint N, blasint K,
|
||||
float *alpha, float *A, blasint lda, float *B, blasint ldb, float *beta, float *C, blasint ldc);
|
||||
void cblas_zgemm(enum CBLAS_ORDER Order, enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANSPOSE TransB, blasint M, blasint N, blasint K,
|
||||
double *alpha, double *A, blasint lda, double *B, blasint ldb, double *beta, double *C, blasint ldc);
|
||||
void cblas_zgemm3m(enum CBLAS_ORDER Order, enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANSPOSE TransB, blasint M, blasint N, blasint K,
|
||||
double *alpha, double *A, blasint lda, double *B, blasint ldb, double *beta, double *C, blasint ldc);
|
||||
|
||||
void cblas_ssymm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, blasint M, blasint N,
|
||||
float alpha, float *A, blasint lda, float *B, blasint ldb, float beta, float *C, blasint ldc);
|
||||
void cblas_dsymm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, blasint M, blasint N,
|
||||
double alpha, double *A, blasint lda, double *B, blasint ldb, double beta, double *C, blasint ldc);
|
||||
void cblas_csymm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, blasint M, blasint N,
|
||||
float *alpha, float *A, blasint lda, float *B, blasint ldb, float *beta, float *C, blasint ldc);
|
||||
void cblas_zsymm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, blasint M, blasint N,
|
||||
double *alpha, double *A, blasint lda, double *B, blasint ldb, double *beta, double *C, blasint ldc);
|
||||
|
||||
void cblas_ssyrk(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans,
|
||||
blasint N, blasint K, float alpha, float *A, blasint lda, float beta, float *C, blasint ldc);
|
||||
void cblas_dsyrk(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans,
|
||||
blasint N, blasint K, double alpha, double *A, blasint lda, double beta, double *C, blasint ldc);
|
||||
void cblas_csyrk(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans,
|
||||
blasint N, blasint K, float *alpha, float *A, blasint lda, float *beta, float *C, blasint ldc);
|
||||
void cblas_zsyrk(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans,
|
||||
blasint N, blasint K, double *alpha, double *A, blasint lda, double *beta, double *C, blasint ldc);
|
||||
|
||||
void cblas_ssyr2k(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans,
|
||||
blasint N, blasint K, float alpha, float *A, blasint lda, float *B, blasint ldb, float beta, float *C, blasint ldc);
|
||||
void cblas_dsyr2k(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans,
|
||||
blasint N, blasint K, double alpha, double *A, blasint lda, double *B, blasint ldb, double beta, double *C, blasint ldc);
|
||||
void cblas_csyr2k(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans,
|
||||
blasint N, blasint K, float *alpha, float *A, blasint lda, float *B, blasint ldb, float *beta, float *C, blasint ldc);
|
||||
void cblas_zsyr2k(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans,
|
||||
blasint N, blasint K, double *alpha, double *A, blasint lda, double *B, blasint ldb, double *beta, double *C, blasint ldc);
|
||||
|
||||
void cblas_strmm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA,
|
||||
enum CBLAS_DIAG Diag, blasint M, blasint N, float alpha, float *A, blasint lda, float *B, blasint ldb);
|
||||
void cblas_dtrmm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA,
|
||||
enum CBLAS_DIAG Diag, blasint M, blasint N, double alpha, double *A, blasint lda, double *B, blasint ldb);
|
||||
void cblas_ctrmm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA,
|
||||
enum CBLAS_DIAG Diag, blasint M, blasint N, float *alpha, float *A, blasint lda, float *B, blasint ldb);
|
||||
void cblas_ztrmm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA,
|
||||
enum CBLAS_DIAG Diag, blasint M, blasint N, double *alpha, double *A, blasint lda, double *B, blasint ldb);
|
||||
|
||||
void cblas_strsm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA,
|
||||
enum CBLAS_DIAG Diag, blasint M, blasint N, float alpha, float *A, blasint lda, float *B, blasint ldb);
|
||||
void cblas_dtrsm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA,
|
||||
enum CBLAS_DIAG Diag, blasint M, blasint N, double alpha, double *A, blasint lda, double *B, blasint ldb);
|
||||
void cblas_ctrsm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA,
|
||||
enum CBLAS_DIAG Diag, blasint M, blasint N, float *alpha, float *A, blasint lda, float *B, blasint ldb);
|
||||
void cblas_ztrsm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA,
|
||||
enum CBLAS_DIAG Diag, blasint M, blasint N, double *alpha, double *A, blasint lda, double *B, blasint ldb);
|
||||
|
||||
void cblas_chemm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, blasint M, blasint N,
|
||||
float *alpha, float *A, blasint lda, float *B, blasint ldb, float *beta, float *C, blasint ldc);
|
||||
void cblas_zhemm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, blasint M, blasint N,
|
||||
double *alpha, double *A, blasint lda, double *B, blasint ldb, double *beta, double *C, blasint ldc);
|
||||
|
||||
void cblas_cherk(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans, blasint N, blasint K,
|
||||
float alpha, float *A, blasint lda, float beta, float *C, blasint ldc);
|
||||
void cblas_zherk(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans, blasint N, blasint K,
|
||||
double alpha, double *A, blasint lda, double beta, double *C, blasint ldc);
|
||||
|
||||
void cblas_cher2k(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans, blasint N, blasint K,
|
||||
float *alpha, float *A, blasint lda, float *B, blasint ldb, float beta, float *C, blasint ldc);
|
||||
void cblas_zher2k(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans, blasint N, blasint K,
|
||||
double *alpha, double *A, blasint lda, double *B, blasint ldb, double beta, double *C, blasint ldc);
|
||||
|
||||
void cblas_xerbla(blasint p, char *rout, char *form, ...);
|
||||
|
||||
/*** BLAS extensions ***/
|
||||
|
||||
void cblas_saxpby(blasint n, float alpha, float *x, blasint incx,float beta, float *y, blasint incy);
|
||||
|
||||
void cblas_daxpby(blasint n, double alpha, double *x, blasint incx,double beta, double *y, blasint incy);
|
||||
|
||||
void cblas_caxpby(blasint n, float *alpha, float *x, blasint incx,float *beta, float *y, blasint incy);
|
||||
|
||||
void cblas_zaxpby(blasint n, double *alpha, double *x, blasint incx,double *beta, double *y, blasint incy);
|
||||
|
||||
void cblas_somatcopy( enum CBLAS_ORDER CORDER, enum CBLAS_TRANSPOSE CTRANS, blasint crows, blasint ccols, float calpha, float *a,
|
||||
blasint clda, float *b, blasint cldb);
|
||||
void cblas_domatcopy( enum CBLAS_ORDER CORDER, enum CBLAS_TRANSPOSE CTRANS, blasint crows, blasint ccols, double calpha, double *a,
|
||||
blasint clda, double *b, blasint cldb);
|
||||
void cblas_comatcopy( enum CBLAS_ORDER CORDER, enum CBLAS_TRANSPOSE CTRANS, blasint crows, blasint ccols, void* calpha, void* a,
|
||||
blasint clda, void *b, blasint cldb);
|
||||
void cblas_zomatcopy( enum CBLAS_ORDER CORDER, enum CBLAS_TRANSPOSE CTRANS, blasint crows, blasint ccols, void* calpha, void* a,
|
||||
blasint clda, void *b, blasint cldb);
|
||||
|
||||
void cblas_simatcopy( enum CBLAS_ORDER CORDER, enum CBLAS_TRANSPOSE CTRANS, blasint crows, blasint ccols, float calpha, float *a,
|
||||
blasint clda, blasint cldb);
|
||||
void cblas_dimatcopy( enum CBLAS_ORDER CORDER, enum CBLAS_TRANSPOSE CTRANS, blasint crows, blasint ccols, double calpha, double *a,
|
||||
blasint clda, blasint cldb);
|
||||
void cblas_cimatcopy( enum CBLAS_ORDER CORDER, enum CBLAS_TRANSPOSE CTRANS, blasint crows, blasint ccols, float* calpha, float* a,
|
||||
blasint clda, blasint cldb);
|
||||
void cblas_zimatcopy( enum CBLAS_ORDER CORDER, enum CBLAS_TRANSPOSE CTRANS, blasint crows, blasint ccols, double* calpha, double* a,
|
||||
blasint clda, blasint cldb);
|
||||
|
||||
void cblas_sgeadd( enum CBLAS_ORDER CORDER, blasint crows, blasint ccols, float calpha, float *a, blasint clda, float cbeta,
|
||||
float *c, blasint cldc);
|
||||
void cblas_dgeadd( enum CBLAS_ORDER CORDER, blasint crows, blasint ccols, double calpha, double *a, blasint clda, double cbeta,
|
||||
double *c, blasint cldc);
|
||||
void cblas_cgeadd( enum CBLAS_ORDER CORDER, blasint crows, blasint ccols, float *calpha, float *a, blasint clda, float *cbeta,
|
||||
float *c, blasint cldc);
|
||||
void cblas_zgeadd( enum CBLAS_ORDER CORDER, blasint crows, blasint ccols, double *calpha, double *a, blasint clda, double *cbeta,
|
||||
double *c, blasint cldc);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif /* __cplusplus */
|
||||
|
||||
#endif
|
||||
115
cmake/arch.cmake
Normal file
115
cmake/arch.cmake
Normal file
@@ -0,0 +1,115 @@
|
||||
##
|
||||
## Author: Hank Anderson <hank@statease.com>
|
||||
## Description: Ported from portion of OpenBLAS/Makefile.system
|
||||
## Sets various variables based on architecture.
|
||||
|
||||
if (${ARCH} STREQUAL "x86" OR ${ARCH} STREQUAL "x86_64")
|
||||
|
||||
if (${ARCH} STREQUAL "x86")
|
||||
if (NOT BINARY)
|
||||
set(NO_BINARY_MODE 1)
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
if (NOT NO_EXPRECISION)
|
||||
if (${F_COMPILER} MATCHES "GFORTRAN")
|
||||
# N.B. I'm not sure if CMake differentiates between GCC and LSB -hpa
|
||||
if (${CMAKE_C_COMPILER_ID} STREQUAL "GNU" OR ${CMAKE_C_COMPILER_ID} STREQUAL "LSB")
|
||||
set(EXPRECISION 1)
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DEXPRECISION -m128bit-long-double")
|
||||
set(FCOMMON_OPT "${FCOMMON_OPT} -m128bit-long-double")
|
||||
endif ()
|
||||
if (${CMAKE_C_COMPILER_ID} STREQUAL "Clang")
|
||||
set(EXPRECISION 1)
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DEXPRECISION")
|
||||
set(FCOMMON_OPT "${FCOMMON_OPT} -m128bit-long-double")
|
||||
endif ()
|
||||
endif ()
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
if (${CMAKE_C_COMPILER_ID} STREQUAL "Intel")
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -wd981")
|
||||
endif ()
|
||||
|
||||
if (USE_OPENMP)
|
||||
|
||||
if (${CMAKE_C_COMPILER_ID} STREQUAL "GNU" OR ${CMAKE_C_COMPILER_ID} STREQUAL "LSB")
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -fopenmp")
|
||||
endif ()
|
||||
|
||||
if (${CMAKE_C_COMPILER_ID} STREQUAL "Clang")
|
||||
message(WARNING "Clang doesn't support OpenMP yet.")
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -fopenmp")
|
||||
endif ()
|
||||
|
||||
if (${CMAKE_C_COMPILER_ID} STREQUAL "Intel")
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -openmp")
|
||||
endif ()
|
||||
|
||||
if (${CMAKE_C_COMPILER_ID} STREQUAL "PGI")
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -mp")
|
||||
endif ()
|
||||
|
||||
if (${CMAKE_C_COMPILER_ID} STREQUAL "OPEN64")
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -mp")
|
||||
set(CEXTRALIB "${CEXTRALIB} -lstdc++")
|
||||
endif ()
|
||||
|
||||
if (${CMAKE_C_COMPILER_ID} STREQUAL "PATHSCALE")
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -mp")
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
|
||||
if (DYNAMIC_ARCH)
|
||||
if (${ARCH} STREQUAL "x86")
|
||||
set(DYNAMIC_CORE "KATMAI COPPERMINE NORTHWOOD PRESCOTT BANIAS CORE2 PENRYN DUNNINGTON NEHALEM ATHLON OPTERON OPTERON_SSE3 BARCELONA BOBCAT ATOM NANO")
|
||||
endif ()
|
||||
|
||||
if (${ARCH} STREQUAL "x86_64")
|
||||
set(DYNAMIC_CORE "PRESCOTT CORE2 PENRYN DUNNINGTON NEHALEM OPTERON OPTERON_SSE3 BARCELONA BOBCAT ATOM NANO")
|
||||
if (NOT NO_AVX)
|
||||
set(DYNAMIC_CORE "${DYNAMIC_CORE} SANDYBRIDGE BULLDOZER PILEDRIVER STEAMROLLER")
|
||||
endif ()
|
||||
if (NOT NO_AVX2)
|
||||
set(DYNAMIC_CORE "${DYNAMIC_CORE} HASWELL ZEN")
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
if (NOT DYNAMIC_CORE)
|
||||
unset(DYNAMIC_ARCH)
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
if (${ARCH} STREQUAL "ia64")
|
||||
set(NO_BINARY_MODE 1)
|
||||
set(BINARY_DEFINED 1)
|
||||
|
||||
if (${F_COMPILER} MATCHES "GFORTRAN")
|
||||
if (${CMAKE_C_COMPILER_ID} STREQUAL "GNU")
|
||||
# EXPRECISION = 1
|
||||
# CCOMMON_OPT += -DEXPRECISION
|
||||
endif ()
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
if (${ARCH} STREQUAL "mips64")
|
||||
set(NO_BINARY_MODE 1)
|
||||
endif ()
|
||||
|
||||
if (${ARCH} STREQUAL "alpha")
|
||||
set(NO_BINARY_MODE 1)
|
||||
set(BINARY_DEFINED 1)
|
||||
endif ()
|
||||
|
||||
if (${ARCH} STREQUAL "arm")
|
||||
set(NO_BINARY_MODE 1)
|
||||
set(BINARY_DEFINED 1)
|
||||
endif ()
|
||||
|
||||
if (${ARCH} STREQUAL "arm64")
|
||||
set(NO_BINARY_MODE 1)
|
||||
set(BINARY_DEFINED 1)
|
||||
endif ()
|
||||
|
||||
98
cmake/c_check.cmake
Normal file
98
cmake/c_check.cmake
Normal file
@@ -0,0 +1,98 @@
|
||||
##
|
||||
## Author: Hank Anderson <hank@statease.com>
|
||||
## Description: Ported from the OpenBLAS/c_check perl script.
|
||||
## This is triggered by prebuild.cmake and runs before any of the code is built.
|
||||
## Creates config.h and Makefile.conf.
|
||||
|
||||
# CMake vars set by this file:
|
||||
# OSNAME (use CMAKE_SYSTEM_NAME)
|
||||
# ARCH
|
||||
# C_COMPILER (use CMAKE_C_COMPILER)
|
||||
# BINARY32
|
||||
# BINARY64
|
||||
# FU
|
||||
# CROSS_SUFFIX
|
||||
# CROSS
|
||||
# CEXTRALIB
|
||||
|
||||
# Defines set by this file:
|
||||
# OS_
|
||||
# ARCH_
|
||||
# C_
|
||||
# __32BIT__
|
||||
# __64BIT__
|
||||
# FUNDERSCORE
|
||||
# PTHREAD_CREATE_FUNC
|
||||
|
||||
# N.B. c_check (and ctest.c) is not cross-platform, so instead try to use CMake variables.
|
||||
set(FU "")
|
||||
if(APPLE)
|
||||
set(FU "_")
|
||||
elseif(MSVC)
|
||||
set(FU "_")
|
||||
elseif(UNIX)
|
||||
set(FU "")
|
||||
endif()
|
||||
|
||||
# Convert CMake vars into the format that OpenBLAS expects
|
||||
string(TOUPPER ${CMAKE_SYSTEM_NAME} HOST_OS)
|
||||
if (${HOST_OS} STREQUAL "WINDOWS")
|
||||
set(HOST_OS WINNT)
|
||||
endif ()
|
||||
|
||||
# added by hpa - check size of void ptr to detect 64-bit compile
|
||||
if (NOT DEFINED BINARY)
|
||||
set(BINARY 32)
|
||||
if (CMAKE_SIZEOF_VOID_P EQUAL 8)
|
||||
set(BINARY 64)
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
if (BINARY EQUAL 64)
|
||||
set(BINARY64 1)
|
||||
else ()
|
||||
set(BINARY32 1)
|
||||
endif ()
|
||||
|
||||
# CMake docs define these:
|
||||
# CMAKE_SYSTEM_PROCESSOR - The name of the CPU CMake is building for.
|
||||
# CMAKE_HOST_SYSTEM_PROCESSOR - The name of the CPU CMake is running on.
|
||||
#
|
||||
# TODO: CMAKE_SYSTEM_PROCESSOR doesn't seem to be correct - instead get it from the compiler a la c_check
|
||||
set(ARCH ${CMAKE_SYSTEM_PROCESSOR})
|
||||
if (${ARCH} STREQUAL "AMD64")
|
||||
set(ARCH "x86_64")
|
||||
endif ()
|
||||
|
||||
# If you are using a 32-bit compiler on a 64-bit system CMAKE_SYSTEM_PROCESSOR will be wrong
|
||||
if (${ARCH} STREQUAL "x86_64" AND BINARY EQUAL 32)
|
||||
set(ARCH x86)
|
||||
endif ()
|
||||
|
||||
if (${ARCH} STREQUAL "X86")
|
||||
set(ARCH x86)
|
||||
endif ()
|
||||
|
||||
if (${ARCH} MATCHES "ppc")
|
||||
set(ARCH power)
|
||||
endif ()
|
||||
|
||||
set(COMPILER_ID ${CMAKE_CXX_COMPILER_ID})
|
||||
if (${COMPILER_ID} STREQUAL "GNU")
|
||||
set(COMPILER_ID "GCC")
|
||||
endif ()
|
||||
|
||||
string(TOUPPER ${ARCH} UC_ARCH)
|
||||
|
||||
file(WRITE ${TARGET_CONF}
|
||||
"#define OS_${HOST_OS}\t1\n"
|
||||
"#define ARCH_${UC_ARCH}\t1\n"
|
||||
"#define C_${COMPILER_ID}\t1\n"
|
||||
"#define __${BINARY}BIT__\t1\n"
|
||||
"#define FUNDERSCORE\t${FU}\n")
|
||||
|
||||
if (${HOST_OS} STREQUAL "WINDOWSSTORE")
|
||||
file(APPEND ${TARGET_CONF}
|
||||
"#define OS_WINNT\t1\n")
|
||||
endif ()
|
||||
|
||||
103
cmake/cc.cmake
Normal file
103
cmake/cc.cmake
Normal file
@@ -0,0 +1,103 @@
|
||||
##
|
||||
## Author: Hank Anderson <hank@statease.com>
|
||||
## Description: Ported from portion of OpenBLAS/Makefile.system
|
||||
## Sets C related variables.
|
||||
|
||||
if (${CMAKE_C_COMPILER} STREQUAL "GNU" OR ${CMAKE_C_COMPILER} STREQUAL "LSB" OR ${CMAKE_C_COMPILER} STREQUAL "Clang")
|
||||
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -Wall")
|
||||
set(COMMON_PROF "${COMMON_PROF} -fno-inline")
|
||||
set(NO_UNINITIALIZED_WARN "-Wno-uninitialized")
|
||||
|
||||
if (QUIET_MAKE)
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} ${NO_UNINITIALIZED_WARN} -Wno-unused")
|
||||
endif ()
|
||||
|
||||
if (NO_BINARY_MODE)
|
||||
|
||||
if (${ARCH} STREQUAL "mips64")
|
||||
if (BINARY64)
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -mabi=64")
|
||||
else ()
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -mabi=n32")
|
||||
endif ()
|
||||
set(BINARY_DEFINED 1)
|
||||
endif ()
|
||||
|
||||
if (${CORE} STREQUAL "LOONGSON3A")
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -march=mips64")
|
||||
set(FCOMMON_OPT "${FCOMMON_OPT} -march=mips64")
|
||||
endif ()
|
||||
|
||||
if (${CORE} STREQUAL "LOONGSON3B")
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -march=mips64")
|
||||
set(FCOMMON_OPT "${FCOMMON_OPT} -march=mips64")
|
||||
endif ()
|
||||
|
||||
if (${OSNAME} STREQUAL "AIX")
|
||||
set(BINARY_DEFINED 1)
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
if (NOT BINARY_DEFINED)
|
||||
if (BINARY64)
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -m64")
|
||||
else ()
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -m32")
|
||||
endif ()
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
if (${CMAKE_C_COMPILER} STREQUAL "PGI")
|
||||
if (BINARY64)
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -tp p7-64")
|
||||
else ()
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -tp p7")
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
if (${CMAKE_C_COMPILER} STREQUAL "PATHSCALE")
|
||||
if (BINARY64)
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -m64")
|
||||
else ()
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -m32")
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
if (${CMAKE_C_COMPILER} STREQUAL "OPEN64")
|
||||
|
||||
if (${ARCH} STREQUAL "mips64")
|
||||
|
||||
if (NOT BINARY64)
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -n32")
|
||||
else ()
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -n64")
|
||||
endif ()
|
||||
|
||||
if (${CORE} STREQUAL "LOONGSON3A")
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -loongson3 -static")
|
||||
endif ()
|
||||
|
||||
if (${CORE} STREQUAL "LOONGSON3B")
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -loongson3 -static")
|
||||
endif ()
|
||||
|
||||
else ()
|
||||
|
||||
if (BINARY64)
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -m32")
|
||||
else ()
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -m64")
|
||||
endif ()
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
if (${CMAKE_C_COMPILER} STREQUAL "SUN")
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -w")
|
||||
if (${ARCH} STREQUAL "x86")
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -m32")
|
||||
else ()
|
||||
set(FCOMMON_OPT "${FCOMMON_OPT} -m64")
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
60
cmake/export.cmake
Normal file
60
cmake/export.cmake
Normal file
@@ -0,0 +1,60 @@
|
||||
|
||||
#Only generate .def for dll on MSVC
|
||||
if(MSVC)
|
||||
|
||||
set_source_files_properties(${OpenBLAS_DEF_FILE} PROPERTIES GENERATED 1)
|
||||
|
||||
if (NOT DEFINED ARCH)
|
||||
set(ARCH_IN "x86_64")
|
||||
else()
|
||||
set(ARCH_IN ${ARCH})
|
||||
endif()
|
||||
|
||||
if (${CORE} STREQUAL "generic")
|
||||
set(ARCH_IN "GENERIC")
|
||||
endif ()
|
||||
|
||||
if (NOT DEFINED EXPRECISION)
|
||||
set(EXPRECISION_IN 0)
|
||||
else()
|
||||
set(EXPRECISION_IN ${EXPRECISION})
|
||||
endif()
|
||||
|
||||
if (NOT DEFINED NO_CBLAS)
|
||||
set(NO_CBLAS_IN 0)
|
||||
else()
|
||||
set(NO_CBLAS_IN ${NO_CBLAS})
|
||||
endif()
|
||||
|
||||
if (NOT DEFINED NO_LAPACK)
|
||||
set(NO_LAPACK_IN 0)
|
||||
else()
|
||||
set(NO_LAPACK_IN ${NO_LAPACK})
|
||||
endif()
|
||||
|
||||
if (NOT DEFINED NO_LAPACKE)
|
||||
set(NO_LAPACKE_IN 0)
|
||||
else()
|
||||
set(NO_LAPACKE_IN ${NO_LAPACKE})
|
||||
endif()
|
||||
|
||||
if (NOT DEFINED NEED2UNDERSCORES)
|
||||
set(NEED2UNDERSCORES_IN 0)
|
||||
else()
|
||||
set(NEED2UNDERSCORES_IN ${NEED2UNDERSCORES})
|
||||
endif()
|
||||
|
||||
if (NOT DEFINED ONLY_CBLAS)
|
||||
set(ONLY_CBLAS_IN 0)
|
||||
else()
|
||||
set(ONLY_CBLAS_IN ${ONLY_CBLAS})
|
||||
endif()
|
||||
|
||||
add_custom_command(
|
||||
TARGET ${OpenBLAS_LIBNAME} PRE_LINK
|
||||
COMMAND perl
|
||||
ARGS "${PROJECT_SOURCE_DIR}/exports/gensymbol" "win2k" "${ARCH_IN}" "dummy" "${EXPRECISION_IN}" "${NO_CBLAS_IN}" "${NO_LAPACK_IN}" "${NO_LAPACKE_IN}" "${NEED2UNDERSCORES_IN}" "${ONLY_CBLAS_IN}" "${SYMBOLPREFIX}" "${SYMBOLSUFFIX}" > "${PROJECT_BINARY_DIR}/openblas.def"
|
||||
COMMENT "Create openblas.def file"
|
||||
VERBATIM)
|
||||
|
||||
endif()
|
||||
66
cmake/f_check.cmake
Normal file
66
cmake/f_check.cmake
Normal file
@@ -0,0 +1,66 @@
|
||||
##
|
||||
## Author: Hank Anderson <hank@statease.com>
|
||||
## Copyright: (c) Stat-Ease, Inc.
|
||||
## Created: 12/29/14
|
||||
## Last Modified: 12/29/14
|
||||
## Description: Ported from the OpenBLAS/f_check perl script.
|
||||
## This is triggered by prebuild.cmake and runs before any of the code is built.
|
||||
## Appends Fortran information to config.h and Makefile.conf.
|
||||
|
||||
# CMake vars set by this file:
|
||||
# F_COMPILER
|
||||
# FC
|
||||
# BU
|
||||
# NOFORTRAN
|
||||
# NEED2UNDERSCORES
|
||||
# FEXTRALIB
|
||||
|
||||
# Defines set by this file:
|
||||
# BUNDERSCORE
|
||||
# NEEDBUNDERSCORE
|
||||
# NEED2UNDERSCORES
|
||||
|
||||
if (MSVC)
|
||||
# had to do this for MSVC, else CMake automatically assumes I have ifort... -hpa
|
||||
include(CMakeForceCompiler)
|
||||
CMAKE_FORCE_Fortran_COMPILER(gfortran GNU)
|
||||
endif ()
|
||||
|
||||
if (NOT NO_LAPACK)
|
||||
enable_language(Fortran)
|
||||
else()
|
||||
include(CMakeForceCompiler)
|
||||
CMAKE_FORCE_Fortran_COMPILER(gfortran GNU)
|
||||
endif()
|
||||
|
||||
if (NOT ONLY_CBLAS)
|
||||
# N.B. f_check is not cross-platform, so instead try to use CMake variables
|
||||
# run f_check (appends to TARGET files)
|
||||
# message(STATUS "Running f_check...")
|
||||
# execute_process(COMMAND perl f_check ${TARGET_MAKE} ${TARGET_CONF} ${CMAKE_Fortran_COMPILER}
|
||||
# WORKING_DIRECTORY ${PROJECT_SOURCE_DIR})
|
||||
|
||||
# TODO: detect whether underscore needed, set #defines and BU appropriately - use try_compile
|
||||
# TODO: set FEXTRALIB flags a la f_check?
|
||||
|
||||
set(BU "_")
|
||||
file(APPEND ${TARGET_CONF}
|
||||
"#define BUNDERSCORE _\n"
|
||||
"#define NEEDBUNDERSCORE 1\n"
|
||||
"#define NEED2UNDERSCORES 0\n")
|
||||
|
||||
else ()
|
||||
|
||||
#When we only build CBLAS, we set NOFORTRAN=2
|
||||
set(NOFORTRAN 2)
|
||||
set(NO_FBLAS 1)
|
||||
#set(F_COMPILER GFORTRAN) # CMake handles the fortran compiler
|
||||
set(BU "_")
|
||||
file(APPEND ${TARGET_CONF}
|
||||
"#define BUNDERSCORE _\n"
|
||||
"#define NEEDBUNDERSCORE 1\n")
|
||||
endif()
|
||||
|
||||
get_filename_component(F_COMPILER ${CMAKE_Fortran_COMPILER} NAME_WE)
|
||||
string(TOUPPER ${F_COMPILER} F_COMPILER)
|
||||
|
||||
215
cmake/fc.cmake
Normal file
215
cmake/fc.cmake
Normal file
@@ -0,0 +1,215 @@
|
||||
##
|
||||
## Author: Hank Anderson <hank@statease.com>
|
||||
## Description: Ported from portion of OpenBLAS/Makefile.system
|
||||
## Sets Fortran related variables.
|
||||
|
||||
if (${F_COMPILER} STREQUAL "FLANG")
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_FLANG")
|
||||
if (BINARY64)
|
||||
if (INTERFACE64)
|
||||
set(FCOMMON_OPT "${FCOMMON_OPT} -i8")
|
||||
endif ()
|
||||
set(FCOMMON_OPT "${FCOMMON_OPT} -Wall")
|
||||
else ()
|
||||
set(FCOMMON_OPT "${FCOMMON_OPT} -Wall")
|
||||
endif ()
|
||||
if (USE_OPENMP)
|
||||
set(FCOMMON_OPT "${FCOMMON_OPT} -fopenmp")
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
if (${F_COMPILER} STREQUAL "G77")
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_G77")
|
||||
set(FCOMMON_OPT "${FCOMMON_OPT} -Wall")
|
||||
if (NOT NO_BINARY_MODE)
|
||||
if (BINARY64)
|
||||
set(FCOMMON_OPT "${FCOMMON_OPT} -m64")
|
||||
else ()
|
||||
set(FCOMMON_OPT "${FCOMMON_OPT} -m32")
|
||||
endif ()
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
if (${F_COMPILER} STREQUAL "G95")
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_G95")
|
||||
set(FCOMMON_OPT "${FCOMMON_OPT} -Wall")
|
||||
if (NOT NO_BINARY_MODE)
|
||||
if (BINARY64)
|
||||
set(FCOMMON_OPT "${FCOMMON_OPT} -m64")
|
||||
else ()
|
||||
set(FCOMMON_OPT "${FCOMMON_OPT} -m32")
|
||||
endif ()
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
if (${F_COMPILER} STREQUAL "GFORTRAN")
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_GFORT")
|
||||
set(FCOMMON_OPT "${FCOMMON_OPT} -Wall")
|
||||
#Don't include -lgfortran, when NO_LAPACK=1 or lsbcc
|
||||
if (NOT NO_LAPACK)
|
||||
set(EXTRALIB "{EXTRALIB} -lgfortran")
|
||||
endif ()
|
||||
if (NO_BINARY_MODE)
|
||||
if (${ARCH} STREQUAL "mips64")
|
||||
if (BINARY64)
|
||||
set(FCOMMON_OPT "${FCOMMON_OPT} -mabi=64")
|
||||
else ()
|
||||
set(FCOMMON_OPT "${FCOMMON_OPT} -mabi=n32")
|
||||
endif ()
|
||||
endif ()
|
||||
else ()
|
||||
if (BINARY64)
|
||||
set(FCOMMON_OPT "${FCOMMON_OPT} -m64")
|
||||
if (INTERFACE64)
|
||||
set(FCOMMON_OPT "${FCOMMON_OPT} -fdefault-integer-8")
|
||||
endif ()
|
||||
else ()
|
||||
set(FCOMMON_OPT "${FCOMMON_OPT} -m32")
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
if (USE_OPENMP)
|
||||
set(FCOMMON_OPT "${FCOMMON_OPT} -fopenmp")
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
if (${F_COMPILER} STREQUAL "INTEL")
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_INTEL")
|
||||
if (INTERFACE64)
|
||||
set(FCOMMON_OPT "${FCOMMON_OPT} -i8")
|
||||
endif ()
|
||||
if (USE_OPENMP)
|
||||
set(FCOMMON_OPT "${FCOMMON_OPT} -openmp")
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
if (${F_COMPILER} STREQUAL "FUJITSU")
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_FUJITSU")
|
||||
if (USE_OPENMP)
|
||||
set(FCOMMON_OPT "${FCOMMON_OPT} -openmp")
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
if (${F_COMPILER} STREQUAL "IBM")
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_IBM")
|
||||
# FCOMMON_OPT += -qarch=440
|
||||
if (BINARY64)
|
||||
set(FCOMMON_OPT "${FCOMMON_OPT} -q64")
|
||||
if (INTERFACE64)
|
||||
set(FCOMMON_OPT "${FCOMMON_OPT} -qintsize=8")
|
||||
endif ()
|
||||
else ()
|
||||
set(FCOMMON_OPT "${FCOMMON_OPT} -q32")
|
||||
endif ()
|
||||
if (USE_OPENMP)
|
||||
set(FCOMMON_OPT "${FCOMMON_OPT} -openmp")
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
if (${F_COMPILER} STREQUAL "PGI")
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_PGI")
|
||||
set(COMMON_PROF "${COMMON_PROF} -DPGICOMPILER")
|
||||
if (BINARY64)
|
||||
if (INTERFACE64)
|
||||
set(FCOMMON_OPT "${FCOMMON_OPT} -i8")
|
||||
endif ()
|
||||
set(FCOMMON_OPT "${FCOMMON_OPT} -tp p7-64")
|
||||
else ()
|
||||
set(FCOMMON_OPT "${FCOMMON_OPT} -tp p7")
|
||||
endif ()
|
||||
if (USE_OPENMP)
|
||||
set(FCOMMON_OPT "${FCOMMON_OPT} -mp")
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
if (${F_COMPILER} STREQUAL "PATHSCALE")
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_PATHSCALE")
|
||||
if (BINARY64)
|
||||
if (INTERFACE64)
|
||||
set(FCOMMON_OPT "${FCOMMON_OPT} -i8")
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
if (NOT ${ARCH} STREQUAL "mips64")
|
||||
if (NOT BINARY64)
|
||||
set(FCOMMON_OPT "${FCOMMON_OPT} -m32")
|
||||
else ()
|
||||
set(FCOMMON_OPT "${FCOMMON_OPT} -m64")
|
||||
endif ()
|
||||
else ()
|
||||
if (BINARY64)
|
||||
set(FCOMMON_OPT "${FCOMMON_OPT} -mabi=64")
|
||||
else ()
|
||||
set(FCOMMON_OPT "${FCOMMON_OPT} -mabi=n32")
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
if (USE_OPENMP)
|
||||
set(FCOMMON_OPT "${FCOMMON_OPT} -mp")
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
if (${F_COMPILER} STREQUAL "OPEN64")
|
||||
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_OPEN64")
|
||||
if (BINARY64)
|
||||
if (INTERFACE64)
|
||||
set(FCOMMON_OPT "${FCOMMON_OPT} -i8")
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
if (${ARCH} STREQUAL "mips64")
|
||||
|
||||
if (NOT BINARY64)
|
||||
set(FCOMMON_OPT "${FCOMMON_OPT} -n32")
|
||||
else ()
|
||||
set(FCOMMON_OPT "${FCOMMON_OPT} -n64")
|
||||
endif ()
|
||||
|
||||
if (${CORE} STREQUAL "LOONGSON3A")
|
||||
set(FCOMMON_OPT "${FCOMMON_OPT} -loongson3 -static")
|
||||
endif ()
|
||||
|
||||
if (${CORE} STREQUAL "LOONGSON3B")
|
||||
set(FCOMMON_OPT "${FCOMMON_OPT} -loongson3 -static")
|
||||
endif ()
|
||||
else ()
|
||||
if (NOT BINARY64)
|
||||
set(FCOMMON_OPT "${FCOMMON_OPT} -m32")
|
||||
else ()
|
||||
set(FCOMMON_OPT "${FCOMMON_OPT} -m64")
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
if (USE_OPENMP)
|
||||
set(FEXTRALIB "${FEXTRALIB} -lstdc++")
|
||||
set(FCOMMON_OPT "${FCOMMON_OPT} -mp")
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
if (${F_COMPILER} STREQUAL "SUN")
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_SUN")
|
||||
if (${ARCH} STREQUAL "x86")
|
||||
set(FCOMMON_OPT "${FCOMMON_OPT} -m32")
|
||||
else ()
|
||||
set(FCOMMON_OPT "${FCOMMON_OPT} -m64")
|
||||
endif ()
|
||||
if (USE_OPENMP)
|
||||
set(FCOMMON_OPT "${FCOMMON_OPT} -xopenmp=parallel")
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
if (${F_COMPILER} STREQUAL "COMPAQ")
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_COMPAQ")
|
||||
if (USE_OPENMP)
|
||||
set(FCOMMON_OPT "${FCOMMON_OPT} -openmp")
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
# from the root Makefile - this is for lapack-netlib to compile the correct secnd file.
|
||||
if (${F_COMPILER} STREQUAL "GFORTRAN")
|
||||
set(TIMER "INT_ETIME")
|
||||
else ()
|
||||
set(TIMER "NONE")
|
||||
endif ()
|
||||
|
||||
165
cmake/kernel.cmake
Normal file
165
cmake/kernel.cmake
Normal file
@@ -0,0 +1,165 @@
|
||||
# helper functions for the kernel CMakeLists.txt
|
||||
|
||||
|
||||
# Set the default filenames for L1 objects. Most of these will be overriden by the appropriate KERNEL file.
|
||||
macro(SetDefaultL1)
|
||||
set(SAMAXKERNEL amax.S)
|
||||
set(DAMAXKERNEL amax.S)
|
||||
set(QAMAXKERNEL amax.S)
|
||||
set(CAMAXKERNEL zamax.S)
|
||||
set(ZAMAXKERNEL zamax.S)
|
||||
set(XAMAXKERNEL zamax.S)
|
||||
set(SAMINKERNEL amin.S)
|
||||
set(DAMINKERNEL amin.S)
|
||||
set(QAMINKERNEL amin.S)
|
||||
set(CAMINKERNEL zamin.S)
|
||||
set(ZAMINKERNEL zamin.S)
|
||||
set(XAMINKERNEL zamin.S)
|
||||
set(SMAXKERNEL max.S)
|
||||
set(DMAXKERNEL max.S)
|
||||
set(QMAXKERNEL max.S)
|
||||
set(SMINKERNEL min.S)
|
||||
set(DMINKERNEL min.S)
|
||||
set(QMINKERNEL min.S)
|
||||
set(ISAMAXKERNEL iamax.S)
|
||||
set(IDAMAXKERNEL iamax.S)
|
||||
set(IQAMAXKERNEL iamax.S)
|
||||
set(ICAMAXKERNEL izamax.S)
|
||||
set(IZAMAXKERNEL izamax.S)
|
||||
set(IXAMAXKERNEL izamax.S)
|
||||
set(ISAMINKERNEL iamin.S)
|
||||
set(IDAMINKERNEL iamin.S)
|
||||
set(IQAMINKERNEL iamin.S)
|
||||
set(ICAMINKERNEL izamin.S)
|
||||
set(IZAMINKERNEL izamin.S)
|
||||
set(IXAMINKERNEL izamin.S)
|
||||
set(ISMAXKERNEL iamax.S)
|
||||
set(IDMAXKERNEL iamax.S)
|
||||
set(IQMAXKERNEL iamax.S)
|
||||
set(ISMINKERNEL iamin.S)
|
||||
set(IDMINKERNEL iamin.S)
|
||||
set(IQMINKERNEL iamin.S)
|
||||
set(SASUMKERNEL asum.S)
|
||||
set(DASUMKERNEL asum.S)
|
||||
set(CASUMKERNEL zasum.S)
|
||||
set(ZASUMKERNEL zasum.S)
|
||||
set(QASUMKERNEL asum.S)
|
||||
set(XASUMKERNEL zasum.S)
|
||||
set(SAXPYKERNEL axpy.S)
|
||||
set(DAXPYKERNEL axpy.S)
|
||||
set(CAXPYKERNEL zaxpy.S)
|
||||
set(ZAXPYKERNEL zaxpy.S)
|
||||
set(QAXPYKERNEL axpy.S)
|
||||
set(XAXPYKERNEL zaxpy.S)
|
||||
set(SCOPYKERNEL copy.S)
|
||||
set(DCOPYKERNEL copy.S)
|
||||
set(CCOPYKERNEL zcopy.S)
|
||||
set(ZCOPYKERNEL zcopy.S)
|
||||
set(QCOPYKERNEL copy.S)
|
||||
set(XCOPYKERNEL zcopy.S)
|
||||
set(SDOTKERNEL dot.S)
|
||||
set(DDOTKERNEL dot.S)
|
||||
set(CDOTKERNEL zdot.S)
|
||||
set(ZDOTKERNEL zdot.S)
|
||||
set(QDOTKERNEL dot.S)
|
||||
set(XDOTKERNEL zdot.S)
|
||||
set(SNRM2KERNEL nrm2.S)
|
||||
set(DNRM2KERNEL nrm2.S)
|
||||
set(QNRM2KERNEL nrm2.S)
|
||||
set(CNRM2KERNEL znrm2.S)
|
||||
set(ZNRM2KERNEL znrm2.S)
|
||||
set(XNRM2KERNEL znrm2.S)
|
||||
set(SROTKERNEL rot.S)
|
||||
set(DROTKERNEL rot.S)
|
||||
set(QROTKERNEL rot.S)
|
||||
set(CROTKERNEL zrot.S)
|
||||
set(ZROTKERNEL zrot.S)
|
||||
set(XROTKERNEL zrot.S)
|
||||
set(SSCALKERNEL scal.S)
|
||||
set(DSCALKERNEL scal.S)
|
||||
set(CSCALKERNEL zscal.S)
|
||||
set(ZSCALKERNEL zscal.S)
|
||||
set(QSCALKERNEL scal.S)
|
||||
set(XSCALKERNEL zscal.S)
|
||||
set(SSWAPKERNEL swap.S)
|
||||
set(DSWAPKERNEL swap.S)
|
||||
set(CSWAPKERNEL zswap.S)
|
||||
set(ZSWAPKERNEL zswap.S)
|
||||
set(QSWAPKERNEL swap.S)
|
||||
set(XSWAPKERNEL zswap.S)
|
||||
set(SGEMVNKERNEL gemv_n.S)
|
||||
set(SGEMVTKERNEL gemv_t.S)
|
||||
set(DGEMVNKERNEL gemv_n.S)
|
||||
set(DGEMVTKERNEL gemv_t.S)
|
||||
set(CGEMVNKERNEL zgemv_n.S)
|
||||
set(CGEMVTKERNEL zgemv_t.S)
|
||||
set(ZGEMVNKERNEL zgemv_n.S)
|
||||
set(ZGEMVTKERNEL zgemv_t.S)
|
||||
set(QGEMVNKERNEL gemv_n.S)
|
||||
set(QGEMVTKERNEL gemv_t.S)
|
||||
set(XGEMVNKERNEL zgemv_n.S)
|
||||
set(XGEMVTKERNEL zgemv_t.S)
|
||||
set(SCABS_KERNEL ../generic/cabs.c)
|
||||
set(DCABS_KERNEL ../generic/cabs.c)
|
||||
set(QCABS_KERNEL ../generic/cabs.c)
|
||||
set(LSAME_KERNEL ../generic/lsame.c)
|
||||
set(SAXPBYKERNEL ../arm/axpby.c)
|
||||
set(DAXPBYKERNEL ../arm/axpby.c)
|
||||
set(CAXPBYKERNEL ../arm/zaxpby.c)
|
||||
set(ZAXPBYKERNEL ../arm/zaxpby.c)
|
||||
endmacro ()
|
||||
|
||||
macro(SetDefaultL2)
|
||||
set(SGEMVNKERNEL gemv_n.S)
|
||||
set(SGEMVTKERNEL gemv_t.S)
|
||||
set(DGEMVNKERNEL gemv_n.S)
|
||||
set(DGEMVTKERNEL gemv_t.S)
|
||||
set(CGEMVNKERNEL zgemv_n.S)
|
||||
set(CGEMVTKERNEL zgemv_t.S)
|
||||
set(ZGEMVNKERNEL zgemv_n.S)
|
||||
set(ZGEMVTKERNEL zgemv_t.S)
|
||||
set(QGEMVNKERNEL gemv_n.S)
|
||||
set(QGEMVTKERNEL gemv_t.S)
|
||||
set(XGEMVNKERNEL zgemv_n.S)
|
||||
set(XGEMVTKERNEL zgemv_t.S)
|
||||
set(SGERKERNEL ../generic/ger.c)
|
||||
set(DGERKERNEL ../generic/ger.c)
|
||||
set(QGERKERNEL ../generic/ger.c)
|
||||
set(CGERUKERNEL ../generic/zger.c)
|
||||
set(CGERCKERNEL ../generic/zger.c)
|
||||
set(ZGERUKERNEL ../generic/zger.c)
|
||||
set(ZGERCKERNEL ../generic/zger.c)
|
||||
set(XGERUKERNEL ../generic/zger.c)
|
||||
set(XGERCKERNEL ../generic/zger.c)
|
||||
set(SSYMV_U_KERNEL ../generic/symv_k.c)
|
||||
set(SSYMV_L_KERNEL ../generic/symv_k.c)
|
||||
set(DSYMV_U_KERNEL ../generic/symv_k.c)
|
||||
set(DSYMV_L_KERNEL ../generic/symv_k.c)
|
||||
set(QSYMV_U_KERNEL ../generic/symv_k.c)
|
||||
set(QSYMV_L_KERNEL ../generic/symv_k.c)
|
||||
set(CSYMV_U_KERNEL ../generic/zsymv_k.c)
|
||||
set(CSYMV_L_KERNEL ../generic/zsymv_k.c)
|
||||
set(ZSYMV_U_KERNEL ../generic/zsymv_k.c)
|
||||
set(ZSYMV_L_KERNEL ../generic/zsymv_k.c)
|
||||
set(XSYMV_U_KERNEL ../generic/zsymv_k.c)
|
||||
set(XSYMV_L_KERNEL ../generic/zsymv_k.c)
|
||||
set(CHEMV_U_KERNEL ../generic/zhemv_k.c)
|
||||
set(CHEMV_L_KERNEL ../generic/zhemv_k.c)
|
||||
set(CHEMV_V_KERNEL ../generic/zhemv_k.c)
|
||||
set(CHEMV_M_KERNEL ../generic/zhemv_k.c)
|
||||
set(ZHEMV_U_KERNEL ../generic/zhemv_k.c)
|
||||
set(ZHEMV_L_KERNEL ../generic/zhemv_k.c)
|
||||
set(ZHEMV_V_KERNEL ../generic/zhemv_k.c)
|
||||
set(ZHEMV_M_KERNEL ../generic/zhemv_k.c)
|
||||
set(XHEMV_U_KERNEL ../generic/zhemv_k.c)
|
||||
set(XHEMV_L_KERNEL ../generic/zhemv_k.c)
|
||||
set(XHEMV_V_KERNEL ../generic/zhemv_k.c)
|
||||
set(XHEMV_M_KERNEL ../generic/zhemv_k.c)
|
||||
endmacro ()
|
||||
|
||||
macro(SetDefaultL3)
|
||||
set(SGEADD_KERNEL ../generic/geadd.c)
|
||||
set(DGEADD_KERNEL ../generic/geadd.c)
|
||||
set(CGEADD_KERNEL ../generic/zgeadd.c)
|
||||
set(ZGEADD_KERNEL ../generic/zgeadd.c)
|
||||
endmacro ()
|
||||
387
cmake/lapack.cmake
Normal file
387
cmake/lapack.cmake
Normal file
@@ -0,0 +1,387 @@
|
||||
# Sources for compiling lapack-netlib. Can't use CMakeLists.txt because lapack-netlib already has its own cmake files.
|
||||
|
||||
set(ALLAUX
|
||||
ilaenv.f ieeeck.f lsamen.f xerbla_array.f iparmq.f
|
||||
ilaprec.f ilatrans.f ilauplo.f iladiag.f iparam2stage.F chla_transtype.f
|
||||
../INSTALL/ilaver.f ../INSTALL/slamch.f
|
||||
)
|
||||
|
||||
set(SCLAUX
|
||||
sbdsdc.f
|
||||
sbdsqr.f sdisna.f slabad.f slacpy.f sladiv.f slae2.f slaebz.f
|
||||
slaed0.f slaed1.f slaed2.f slaed3.f slaed4.f slaed5.f slaed6.f
|
||||
slaed7.f slaed8.f slaed9.f slaeda.f slaev2.f slagtf.f
|
||||
slagts.f slamrg.f slanst.f
|
||||
slapy2.f slapy3.f slarnv.f
|
||||
slarra.f slarrb.f slarrc.f slarrd.f slarre.f slarrf.f slarrj.f
|
||||
slarrk.f slarrr.f slaneg.f
|
||||
slartg.f slaruv.f slas2.f slascl.f
|
||||
slasd0.f slasd1.f slasd2.f slasd3.f slasd4.f slasd5.f slasd6.f
|
||||
slasd7.f slasd8.f slasda.f slasdq.f slasdt.f
|
||||
slaset.f slasq1.f slasq2.f slasq3.f slasq4.f slasq5.f slasq6.f
|
||||
slasr.f slasrt.f slassq.f slasv2.f spttrf.f sstebz.f sstedc.f
|
||||
ssteqr.f ssterf.f slaisnan.f sisnan.f
|
||||
slartgp.f slartgs.f
|
||||
../INSTALL/second_${TIMER}.f
|
||||
)
|
||||
|
||||
set(DZLAUX
|
||||
dbdsdc.f dbdsvdx.f
|
||||
dbdsqr.f ddisna.f dlabad.f dlacpy.f dladiv.f dlae2.f dlaebz.f
|
||||
dlaed0.f dlaed1.f dlaed2.f dlaed3.f dlaed4.f dlaed5.f dlaed6.f
|
||||
dlaed7.f dlaed8.f dlaed9.f dlaeda.f dlaev2.f dlagtf.f
|
||||
dlagts.f dlamrg.f dlanst.f
|
||||
dlapy2.f dlapy3.f dlarnv.f
|
||||
dlarra.f dlarrb.f dlarrc.f dlarrd.f dlarre.f dlarrf.f dlarrj.f
|
||||
dlarrk.f dlarrr.f dlaneg.f
|
||||
dlartg.f dlaruv.f dlas2.f dlascl.f
|
||||
dlasd0.f dlasd1.f dlasd2.f dlasd3.f dlasd4.f dlasd5.f dlasd6.f
|
||||
dlasd7.f dlasd8.f dlasda.f dlasdq.f dlasdt.f
|
||||
dlaset.f dlasq1.f dlasq2.f dlasq3.f dlasq4.f dlasq5.f dlasq6.f
|
||||
dlasr.f dlasrt.f dlassq.f dlasv2.f dpttrf.f dstebz.f dstedc.f
|
||||
dsteqr.f dsterf.f dlaisnan.f disnan.f
|
||||
dlartgp.f dlartgs.f
|
||||
../INSTALL/dlamch.f ../INSTALL/dsecnd_${TIMER}.f
|
||||
dgelq.f dgelqt.f dgelqt3.f dgemlq.f dgemlqt.f dgemqr.f dgeqr.f
|
||||
dgetsls.f dlamswlq.f dlamtsqr.f dlaswlq.f dlatsqr.f dtplqt.f
|
||||
dtplqt2.f dtpmlqt.f dsysv_aa.f dsytrf_aa.f dsytrs_aa.f dlasyf_aa.f
|
||||
dsytf2_rk.f dlasyf_rk.f dsytrf_rk.f dsytrs_3.f dsycon_3.f dsytri_3.f
|
||||
dsytri_3x.f dsysv_rk.f dsb2st_kernels.f dsbev_2stage.f dsbevd_2stage.f
|
||||
dsbevx_2stage.f dsyev_2stage.f dsyevd_2stage.f dsyevr_2stage.f
|
||||
dsyevx_2stage.f dsygv_2stage.f dsytrd_2stage.f dsytrd_sb2st.F
|
||||
dsytrd_sy2sb.f dlarfy.f
|
||||
)
|
||||
|
||||
set(SLASRC
|
||||
sbdsvdx.f sgbbrd.f sgbcon.f sgbequ.f sgbrfs.f sgbsv.f
|
||||
sgbsvx.f sgbtf2.f sgbtrf.f sgbtrs.f sgebak.f sgebal.f sgebd2.f
|
||||
sgebrd.f sgecon.f sgeequ.f sgees.f sgeesx.f sgeev.f sgeevx.f
|
||||
DEPRECATED/sgegs.f DEPRECATED/sgegv.f sgehd2.f sgehrd.f sgelq2.f sgelqf.f
|
||||
sgels.f sgelsd.f sgelss.f DEPRECATED/sgelsx.f sgelsy.f sgeql2.f sgeqlf.f
|
||||
sgeqp3.f DEPRECATED/sgeqpf.f sgeqr2.f sgeqr2p.f sgeqrf.f sgeqrfp.f sgerfs.f
|
||||
sgerq2.f sgerqf.f sgesc2.f sgesdd.f sgesvd.f sgesvdx.f sgesvx.f
|
||||
sgetc2.f sgetri.f sgetrf2.f
|
||||
sggbak.f sggbal.f sgghd3.f sgges.f sgges3.f sggesx.f sggev.f sggev3.f sggevx.f
|
||||
sggglm.f sgghrd.f sgglse.f sggqrf.f
|
||||
sggrqf.f DEPRECATED/sggsvd.f sggsvd3.f DEPRECATED/sggsvp.f sggsvp3.f sgtcon.f sgtrfs.f sgtsv.f
|
||||
sgtsvx.f sgttrf.f sgttrs.f sgtts2.f shgeqz.f
|
||||
shsein.f shseqr.f slabrd.f slacon.f slacn2.f
|
||||
slaein.f slaexc.f slag2.f slags2.f slagtm.f slagv2.f slahqr.f
|
||||
DEPRECATED/slahrd.f slahr2.f slaic1.f slaln2.f slals0.f slalsa.f slalsd.f
|
||||
slangb.f slange.f slangt.f slanhs.f slansb.f slansp.f
|
||||
slansy.f slantb.f slantp.f slantr.f slanv2.f
|
||||
slapll.f slapmt.f
|
||||
slaqgb.f slaqge.f slaqp2.f slaqps.f slaqsb.f slaqsp.f slaqsy.f
|
||||
slaqr0.f slaqr1.f slaqr2.f slaqr3.f slaqr4.f slaqr5.f
|
||||
slaqtr.f slar1v.f slar2v.f ilaslr.f ilaslc.f
|
||||
slarf.f slarfb.f slarfg.f slarfgp.f slarft.f slarfx.f slargv.f
|
||||
slarrv.f slartv.f
|
||||
slarz.f slarzb.f slarzt.f slasy2.f slasyf.f slasyf_rook.f
|
||||
slatbs.f slatdf.f slatps.f slatrd.f slatrs.f slatrz.f DEPRECATED/slatzm.f
|
||||
sopgtr.f sopmtr.f sorg2l.f sorg2r.f
|
||||
sorgbr.f sorghr.f sorgl2.f sorglq.f sorgql.f sorgqr.f sorgr2.f
|
||||
sorgrq.f sorgtr.f sorm2l.f sorm2r.f sorm22.f
|
||||
sormbr.f sormhr.f sorml2.f sormlq.f sormql.f sormqr.f sormr2.f
|
||||
sormr3.f sormrq.f sormrz.f sormtr.f spbcon.f spbequ.f spbrfs.f
|
||||
spbstf.f spbsv.f spbsvx.f
|
||||
spbtf2.f spbtrf.f spbtrs.f spocon.f spoequ.f sporfs.f sposv.f
|
||||
sposvx.f spstrf.f spstf2.f
|
||||
sppcon.f sppequ.f
|
||||
spprfs.f sppsv.f sppsvx.f spptrf.f spptri.f spptrs.f sptcon.f
|
||||
spteqr.f sptrfs.f sptsv.f sptsvx.f spttrs.f sptts2.f srscl.f
|
||||
ssbev.f ssbevd.f ssbevx.f ssbgst.f ssbgv.f ssbgvd.f ssbgvx.f
|
||||
ssbtrd.f sspcon.f sspev.f sspevd.f sspevx.f sspgst.f
|
||||
sspgv.f sspgvd.f sspgvx.f ssprfs.f sspsv.f sspsvx.f ssptrd.f
|
||||
ssptrf.f ssptri.f ssptrs.f sstegr.f sstein.f sstev.f sstevd.f sstevr.f
|
||||
sstevx.f
|
||||
ssycon.f ssyev.f ssyevd.f ssyevr.f ssyevx.f ssygs2.f
|
||||
ssygst.f ssygv.f ssygvd.f ssygvx.f ssyrfs.f ssysv.f ssysvx.f
|
||||
ssytd2.f ssytf2.f ssytrd.f ssytrf.f ssytri.f ssytri2.f ssytri2x.f
|
||||
ssyswapr.f ssytrs.f ssytrs2.f ssyconv.f
|
||||
ssytf2_rook.f ssytrf_rook.f ssytrs_rook.f
|
||||
ssytri_rook.f ssycon_rook.f ssysv_rook.f
|
||||
stbcon.f
|
||||
stbrfs.f stbtrs.f stgevc.f stgex2.f stgexc.f stgsen.f
|
||||
stgsja.f stgsna.f stgsy2.f stgsyl.f stpcon.f stprfs.f stptri.f
|
||||
stptrs.f
|
||||
strcon.f strevc.f strevc3.f strexc.f strrfs.f strsen.f strsna.f strsyl.f
|
||||
strtrs.f DEPRECATED/stzrqf.f stzrzf.f sstemr.f
|
||||
slansf.f spftrf.f spftri.f spftrs.f ssfrk.f stfsm.f stftri.f stfttp.f
|
||||
stfttr.f stpttf.f stpttr.f strttf.f strttp.f
|
||||
sgejsv.f sgesvj.f sgsvj0.f sgsvj1.f
|
||||
sgeequb.f ssyequb.f spoequb.f sgbequb.f
|
||||
sbbcsd.f slapmr.f sorbdb.f sorbdb1.f sorbdb2.f sorbdb3.f sorbdb4.f
|
||||
sorbdb5.f sorbdb6.f sorcsd.f sorcsd2by1.f
|
||||
sgeqrt.f sgeqrt2.f sgeqrt3.f sgemqrt.f
|
||||
stpqrt.f stpqrt2.f stpmqrt.f stprfb.f spotri.f
|
||||
sgelq.f sgelqt.f sgelqt3.f sgemlq.f sgemlqt.f sgemqr.f sgeqr.f sgetsls.f
|
||||
slamswlq.f slamtsqr.f slaswlq.f slatsqr.f stplqt.f stplqt2.f stpmlqt.f
|
||||
ssysv_aa.f ssytrf_aa.f ssytrs_aa.f slasyf_aa.f ssytf2_rk.f slasyf_rk.f
|
||||
ssytrf_rk.f ssytrs_3.f ssycon_3.f ssytri_3.f ssytri_3x.f ssysv_rk.f
|
||||
ssb2st_kernels.f ssbev_2stage.f ssbevd_2stage.f ssbevx_2stage.f
|
||||
ssyev_2stage.f ssyevd_2stage.f ssyevr_2stage.f ssyevx_2stage.f
|
||||
ssygv_2stage.f ssytrd_2stage.f ssytrd_sb2st.F ssytrd_sy2sb.f slarfy.f
|
||||
)
|
||||
|
||||
set(DSLASRC spotrs.f spotrf2.f)
|
||||
|
||||
set(CLASRC
|
||||
cbdsqr.f cgbbrd.f cgbcon.f cgbequ.f cgbrfs.f cgbsv.f cgbsvx.f
|
||||
cgbtf2.f cgbtrf.f cgbtrs.f cgebak.f cgebal.f cgebd2.f cgebrd.f
|
||||
cgecon.f cgeequ.f cgees.f cgeesx.f cgeev.f cgeevx.f
|
||||
DEPRECATED/cgegs.f DEPRECATED/cgegv.f cgehd2.f cgehrd.f cgelq2.f cgelqf.f
|
||||
cgels.f cgelsd.f cgelss.f DEPRECATED/cgelsx.f cgelsy.f cgeql2.f cgeqlf.f cgeqp3.f
|
||||
DEPRECATED/cgeqpf.f cgeqr2.f cgeqr2p.f cgeqrf.f cgeqrfp.f cgerfs.f
|
||||
cgerq2.f cgerqf.f cgesc2.f cgesdd.f cgesvd.f
|
||||
cgesvx.f cgetc2.f cgetri.f
|
||||
cggbak.f cggbal.f cgges.f cggesx.f cggev.f cggevx.f cggglm.f
|
||||
cgghrd.f cgglse.f cggqrf.f cggrqf.f
|
||||
DEPRECATED/cggsvd.f DEPRECATED/cggsvp.f
|
||||
cgtcon.f cgtrfs.f cgtsv.f cgtsvx.f cgttrf.f cgttrs.f cgtts2.f chbev.f
|
||||
chbevd.f chbevx.f chbgst.f chbgv.f chbgvd.f chbgvx.f chbtrd.f
|
||||
checon.f cheev.f cheevd.f cheevr.f cheevx.f chegs2.f chegst.f
|
||||
chegv.f chegvd.f chegvx.f cherfs.f chesv.f chesvx.f chetd2.f
|
||||
chetf2.f chetrd.f
|
||||
chetrf.f chetri.f chetri2.f chetri2x.f cheswapr.f
|
||||
chetrs.f chetrs2.f
|
||||
chetf2_rook.f chetrf_rook.f chetri_rook.f chetrs_rook.f checon_rook.f chesv_rook.f
|
||||
chgeqz.f chpcon.f chpev.f chpevd.f
|
||||
chpevx.f chpgst.f chpgv.f chpgvd.f chpgvx.f chprfs.f chpsv.f
|
||||
chpsvx.f
|
||||
chptrd.f chptrf.f chptri.f chptrs.f chsein.f chseqr.f clabrd.f
|
||||
clacgv.f clacon.f clacn2.f clacp2.f clacpy.f clacrm.f clacrt.f cladiv.f
|
||||
claed0.f claed7.f claed8.f
|
||||
claein.f claesy.f claev2.f clags2.f clagtm.f
|
||||
clahef.f clahef_rook.f clahqr.f
|
||||
DEPRECATED/clahrd.f clahr2.f claic1.f clals0.f clalsa.f clalsd.f clangb.f clange.f clangt.f
|
||||
clanhb.f clanhe.f
|
||||
clanhp.f clanhs.f clanht.f clansb.f clansp.f clansy.f clantb.f
|
||||
clantp.f clantr.f clapll.f clapmt.f clarcm.f claqgb.f claqge.f
|
||||
claqhb.f claqhe.f claqhp.f claqp2.f claqps.f claqsb.f
|
||||
claqr0.f claqr1.f claqr2.f claqr3.f claqr4.f claqr5.f
|
||||
claqsp.f claqsy.f clar1v.f clar2v.f ilaclr.f ilaclc.f
|
||||
clarf.f clarfb.f clarfg.f clarft.f clarfgp.f
|
||||
clarfx.f clargv.f clarnv.f clarrv.f clartg.f clartv.f
|
||||
clarz.f clarzb.f clarzt.f clascl.f claset.f clasr.f classq.f
|
||||
clasyf.f clasyf_rook.f clatbs.f clatdf.f clatps.f clatrd.f clatrs.f clatrz.f
|
||||
DEPRECATED/clatzm.f cpbcon.f cpbequ.f cpbrfs.f cpbstf.f cpbsv.f
|
||||
cpbsvx.f cpbtf2.f cpbtrf.f cpbtrs.f cpocon.f cpoequ.f cporfs.f
|
||||
cposv.f cposvx.f cpstrf.f cpstf2.f
|
||||
cppcon.f cppequ.f cpprfs.f cppsv.f cppsvx.f cpptrf.f cpptri.f cpptrs.f
|
||||
cptcon.f cpteqr.f cptrfs.f cptsv.f cptsvx.f cpttrf.f cpttrs.f cptts2.f
|
||||
crot.f cspcon.f csprfs.f cspsv.f
|
||||
cspsvx.f csptrf.f csptri.f csptrs.f csrscl.f cstedc.f
|
||||
cstegr.f cstein.f csteqr.f
|
||||
csycon.f
|
||||
csyrfs.f csysv.f csysvx.f csytf2.f csytrf.f csytri.f csytri2.f csytri2x.f
|
||||
csyswapr.f csytrs.f csytrs2.f csyconv.f
|
||||
csytf2_rook.f csytrf_rook.f csytrs_rook.f
|
||||
csytri_rook.f csycon_rook.f csysv_rook.f
|
||||
ctbcon.f ctbrfs.f ctbtrs.f ctgevc.f ctgex2.f
|
||||
ctgexc.f ctgsen.f ctgsja.f ctgsna.f ctgsy2.f ctgsyl.f ctpcon.f
|
||||
ctprfs.f ctptri.f
|
||||
ctptrs.f ctrcon.f ctrevc.f ctrevc3.f ctrexc.f ctrrfs.f ctrsen.f ctrsna.f
|
||||
ctrsyl.f ctrtrs.f DEPRECATED/ctzrqf.f ctzrzf.f cung2l.f cung2r.f
|
||||
cungbr.f cunghr.f cungl2.f cunglq.f cungql.f cungqr.f cungr2.f
|
||||
cungrq.f cungtr.f cunm2l.f cunm2r.f cunmbr.f cunmhr.f cunml2.f
|
||||
cunmlq.f cunmql.f cunmqr.f cunmr2.f cunmr3.f cunmrq.f cunmrz.f
|
||||
cunmtr.f cupgtr.f cupmtr.f icmax1.f scsum1.f cstemr.f
|
||||
chfrk.f ctfttp.f clanhf.f cpftrf.f cpftri.f cpftrs.f ctfsm.f ctftri.f
|
||||
ctfttr.f ctpttf.f ctpttr.f ctrttf.f ctrttp.f
|
||||
cgeequb.f cgbequb.f csyequb.f cpoequb.f cheequb.f
|
||||
cbbcsd.f clapmr.f cunbdb.f cunbdb1.f cunbdb2.f cunbdb3.f cunbdb4.f
|
||||
cunbdb5.f cunbdb6.f cuncsd.f cuncsd2by1.f
|
||||
cgeqrt.f cgeqrt2.f cgeqrt3.f cgemqrt.f
|
||||
ctpqrt.f ctpqrt2.f ctpmqrt.f ctprfb.f cpotri.f
|
||||
cgelq.f cgelqt.f cgelqt3.f cgemlq.f cgemlqt.f cgemqr.f cgeqr.f cgetsls.f
|
||||
clamswlq.f clamtsqr.f claswlq.f clatsqr.f ctplqt.f ctplqt2.f ctpmlqt.f
|
||||
chesv_aa.f chetrf_aa.f chetrs_aa.f clahef_aa.f csytf2_rk.f clasyf_rk.f
|
||||
csytrf_rk.f csytrs_3.f csycon_3.f csytri_3.f csytri_3x.f csysv_rk.f
|
||||
chetf2_rk.f clahef_rk.f chetrf_rk.f chetrs_3.f checon_3.f chetri_3.f
|
||||
chetri_3x.f chesv_rk.f chb2st_kernels.f chbev_2stage.f chbevd_2stage.f
|
||||
chbevx_2stage.f cheev_2stage.f cheevd_2stage.f cheevr_2stage.f cheevx_2stage.f
|
||||
chegv_2stage.f chetrd_2stage.f chetrd_hb2st.F chetrd_he2hb.f clarfy.f
|
||||
)
|
||||
|
||||
set(ZCLASRC cpotrs.f)
|
||||
|
||||
set(DLASRC
|
||||
dgbbrd.f dgbcon.f dgbequ.f dgbrfs.f dgbsv.f
|
||||
dgbsvx.f dgbtf2.f dgbtrf.f dgbtrs.f dgebak.f dgebal.f dgebd2.f
|
||||
dgebrd.f dgecon.f dgeequ.f dgees.f dgeesx.f dgeev.f dgeevx.f
|
||||
DEPRECATED/dgegs.f DEPRECATED/dgegv.f dgehd2.f dgehrd.f dgelq2.f dgelqf.f
|
||||
dgels.f dgelsd.f dgelss.f DEPRECATED/dgelsx.f dgelsy.f dgeql2.f dgeqlf.f
|
||||
dgeqp3.f DEPRECATED/dgeqpf.f dgeqr2.f dgeqr2p.f dgeqrf.f dgeqrfp.f dgerfs.f
|
||||
dgerq2.f dgerqf.f dgesc2.f dgesdd.f dgesvd.f dgesvdx.f dgesvx.f
|
||||
dgetc2.f dgetri.f dgetrf2.f
|
||||
dggbak.f dggbal.f dgges.f dgges3.f dggesx.f dggev.f dggev3.f dggevx.f
|
||||
dggglm.f dgghd3.f dgghrd.f dgglse.f dggqrf.f
|
||||
dggrqf.f dggsvd3.f dggsvp3.f DEPRECATED/dggsvd.f DEPRECATED/dggsvp.f dgtcon.f dgtrfs.f dgtsv.f
|
||||
dgtsvx.f dgttrf.f dgttrs.f dgtts2.f dhgeqz.f
|
||||
dhsein.f dhseqr.f dlabrd.f dlacon.f dlacn2.f
|
||||
dlaein.f dlaexc.f dlag2.f dlags2.f dlagtm.f dlagv2.f dlahqr.f
|
||||
DEPRECATED/dlahrd.f dlahr2.f dlaic1.f dlaln2.f dlals0.f dlalsa.f dlalsd.f
|
||||
dlangb.f dlange.f dlangt.f dlanhs.f dlansb.f dlansp.f
|
||||
dlansy.f dlantb.f dlantp.f dlantr.f dlanv2.f
|
||||
dlapll.f dlapmt.f
|
||||
dlaqgb.f dlaqge.f dlaqp2.f dlaqps.f dlaqsb.f dlaqsp.f dlaqsy.f
|
||||
dlaqr0.f dlaqr1.f dlaqr2.f dlaqr3.f dlaqr4.f dlaqr5.f
|
||||
dlaqtr.f dlar1v.f dlar2v.f iladlr.f iladlc.f
|
||||
dlarf.f dlarfb.f dlarfg.f dlarfgp.f dlarft.f dlarfx.f
|
||||
dlargv.f dlarrv.f dlartv.f
|
||||
dlarz.f dlarzb.f dlarzt.f dlasy2.f dlasyf.f dlasyf_rook.f
|
||||
dlatbs.f dlatdf.f dlatps.f dlatrd.f dlatrs.f dlatrz.f DEPRECATED/dlatzm.f
|
||||
dopgtr.f dopmtr.f dorg2l.f dorg2r.f
|
||||
dorgbr.f dorghr.f dorgl2.f dorglq.f dorgql.f dorgqr.f dorgr2.f
|
||||
dorgrq.f dorgtr.f dorm2l.f dorm2r.f dorm22.f
|
||||
dormbr.f dormhr.f dorml2.f dormlq.f dormql.f dormqr.f dormr2.f
|
||||
dormr3.f dormrq.f dormrz.f dormtr.f dpbcon.f dpbequ.f dpbrfs.f
|
||||
dpbstf.f dpbsv.f dpbsvx.f
|
||||
dpbtf2.f dpbtrf.f dpbtrs.f dpocon.f dpoequ.f dporfs.f dposv.f
|
||||
dposvx.f dpotrf2.f dpotrs.f dpstrf.f dpstf2.f
|
||||
dppcon.f dppequ.f
|
||||
dpprfs.f dppsv.f dppsvx.f dpptrf.f dpptri.f dpptrs.f dptcon.f
|
||||
dpteqr.f dptrfs.f dptsv.f dptsvx.f dpttrs.f dptts2.f drscl.f
|
||||
dsbev.f dsbevd.f dsbevx.f dsbgst.f dsbgv.f dsbgvd.f dsbgvx.f
|
||||
dsbtrd.f dspcon.f dspev.f dspevd.f dspevx.f dspgst.f
|
||||
dspgv.f dspgvd.f dspgvx.f dsprfs.f dspsv.f dspsvx.f dsptrd.f
|
||||
dsptrf.f dsptri.f dsptrs.f dstegr.f dstein.f dstev.f dstevd.f dstevr.f
|
||||
dstevx.f
|
||||
dsycon.f dsyev.f dsyevd.f dsyevr.f
|
||||
dsyevx.f dsygs2.f dsygst.f dsygv.f dsygvd.f dsygvx.f dsyrfs.f
|
||||
dsysv.f dsysvx.f
|
||||
dsytd2.f dsytf2.f dsytrd.f dsytrf.f dsytri.f dsytri2.f dsytri2x.f
|
||||
dsyswapr.f dsytrs.f dsytrs2.f dsyconv.f
|
||||
dsytf2_rook.f dsytrf_rook.f dsytrs_rook.f
|
||||
dsytri_rook.f dsycon_rook.f dsysv_rook.f
|
||||
dtbcon.f dtbrfs.f dtbtrs.f dtgevc.f dtgex2.f dtgexc.f dtgsen.f
|
||||
dtgsja.f dtgsna.f dtgsy2.f dtgsyl.f dtpcon.f dtprfs.f dtptri.f
|
||||
dtptrs.f
|
||||
dtrcon.f dtrevc.f dtrevc3.f dtrexc.f dtrrfs.f dtrsen.f dtrsna.f dtrsyl.f
|
||||
dtrtrs.f DEPRECATED/dtzrqf.f dtzrzf.f dstemr.f
|
||||
dsgesv.f dsposv.f dlag2s.f slag2d.f dlat2s.f
|
||||
dlansf.f dpftrf.f dpftri.f dpftrs.f dsfrk.f dtfsm.f dtftri.f dtfttp.f
|
||||
dtfttr.f dtpttf.f dtpttr.f dtrttf.f dtrttp.f
|
||||
dgejsv.f dgesvj.f dgsvj0.f dgsvj1.f
|
||||
dgeequb.f dsyequb.f dpoequb.f dgbequb.f
|
||||
dbbcsd.f dlapmr.f dorbdb.f dorbdb1.f dorbdb2.f dorbdb3.f dorbdb4.f
|
||||
dorbdb5.f dorbdb6.f dorcsd.f dorcsd2by1.f
|
||||
dgeqrt.f dgeqrt2.f dgeqrt3.f dgemqrt.f
|
||||
dtpqrt.f dtpqrt2.f dtpmqrt.f dtprfb.f dpotri.f
|
||||
dgelq.f dgelqt.f dgelqt3.f dgemlq.f dgemlqt.f dgemqr.f dgeqr.f dgetsls.f
|
||||
dlamswlq.f dlamtsqr.f dlaswlq.f dlatsqr.f dtplqt.f dtplqt2.f dtpmlqt.f
|
||||
dsysv_aa.f dsytrf_aa.f dsytrs_aa.f dlasyf_aa.f dsytf2_rk.f dlasyf_rk.f
|
||||
dsytrf_rk.f dsytrs_3.f dsycon_3.f dsytri_3.f dsytri_3x.f dsysv_rk.f
|
||||
dsb2st_kernels.f dsbev_2stage.f dsbevd_2stage.f dsbevx_2stage.f
|
||||
dsyev_2stage.f dsyevd_2stage.f dsyevr_2stage.f dsyevx_2stage.f
|
||||
dsygv_2stage.f dsytrd_2stage.f dsytrd_sb2st.F dsytrd_sy2sb.f dlarfy.f
|
||||
)
|
||||
|
||||
set(ZLASRC
|
||||
zbdsqr.f zgbbrd.f zgbcon.f zgbequ.f zgbrfs.f zgbsv.f zgbsvx.f
|
||||
zgbtf2.f zgbtrf.f zgbtrs.f zgebak.f zgebal.f zgebd2.f zgebrd.f
|
||||
zgecon.f zgeequ.f zgees.f zgeesx.f zgeev.f zgeevx.f
|
||||
DEPRECATED/zgegs.f DEPRECATED/zgegv.f zgehd2.f zgehrd.f zgejsv.f zgelq2.f zgelqf.f
|
||||
zgels.f zgelsd.f zgelss.f DEPRECATED/zgelsx.f zgelsy.f zgeql2.f zgeqlf.f zgeqp3.f
|
||||
DEPRECATED/zgeqpf.f zgeqr2.f zgeqr2p.f zgeqrf.f zgeqrfp.f zgerfs.f zgerq2.f zgerqf.f
|
||||
zgesc2.f zgesdd.f zgesvd.f zgesvdx.f zgesvj.f zgesvx.f zgetc2.f
|
||||
zgetri.f zgetrf2.f
|
||||
zggbak.f zggbal.f zgges.f zgges3.f zggesx.f zggev.f zggev3.f zggevx.f zggglm.f
|
||||
zgghd3.f zgghrd.f zgglse.f zggqrf.f zggrqf.f
|
||||
DEPRECATED/zggsvd.f zggsvd3.f DEPRECATED/zggsvp.f zggsvp3.f
|
||||
zgsvj0.f zgsvj1.f
|
||||
zgtcon.f zgtrfs.f zgtsv.f zgtsvx.f zgttrf.f zgttrs.f zgtts2.f zhbev.f
|
||||
zhbevd.f zhbevx.f zhbgst.f zhbgv.f zhbgvd.f zhbgvx.f zhbtrd.f
|
||||
zhecon.f zheev.f zheevd.f zheevr.f zheevx.f zhegs2.f zhegst.f
|
||||
zhegv.f zhegvd.f zhegvx.f zherfs.f zhesv.f zhesvx.f zhetd2.f
|
||||
zhetf2.f zhetrd.f
|
||||
zhetrf.f zhetri.f zhetri2.f zhetri2x.f zheswapr.f
|
||||
zhetrs.f zhetrs2.f
|
||||
zhetf2_rook.f zhetrf_rook.f zhetri_rook.f zhetrs_rook.f zhecon_rook.f zhesv_rook.f
|
||||
zhgeqz.f zhpcon.f zhpev.f zhpevd.f
|
||||
zhpevx.f zhpgst.f zhpgv.f zhpgvd.f zhpgvx.f zhprfs.f zhpsv.f
|
||||
zhpsvx.f
|
||||
zhptrd.f zhptrf.f zhptri.f zhptrs.f zhsein.f zhseqr.f zlabrd.f
|
||||
zlacgv.f zlacon.f zlacn2.f zlacp2.f zlacpy.f zlacrm.f zlacrt.f zladiv.f
|
||||
zlaed0.f zlaed7.f zlaed8.f
|
||||
zlaein.f zlaesy.f zlaev2.f zlags2.f zlagtm.f
|
||||
zlahef.f zlahef_rook.f zlahqr.f
|
||||
DEPRECATED/zlahrd.f zlahr2.f zlaic1.f zlals0.f zlalsa.f zlalsd.f zlangb.f zlange.f
|
||||
zlangt.f zlanhb.f
|
||||
zlanhe.f
|
||||
zlanhp.f zlanhs.f zlanht.f zlansb.f zlansp.f zlansy.f zlantb.f
|
||||
zlantp.f zlantr.f zlapll.f zlapmt.f zlaqgb.f zlaqge.f
|
||||
zlaqhb.f zlaqhe.f zlaqhp.f zlaqp2.f zlaqps.f zlaqsb.f
|
||||
zlaqr0.f zlaqr1.f zlaqr2.f zlaqr3.f zlaqr4.f zlaqr5.f
|
||||
zlaqsp.f zlaqsy.f zlar1v.f zlar2v.f ilazlr.f ilazlc.f
|
||||
zlarcm.f zlarf.f zlarfb.f
|
||||
zlarfg.f zlarft.f zlarfgp.f
|
||||
zlarfx.f zlargv.f zlarnv.f zlarrv.f zlartg.f zlartv.f
|
||||
zlarz.f zlarzb.f zlarzt.f zlascl.f zlaset.f zlasr.f
|
||||
zlassq.f zlasyf.f zlasyf_rook.f zlasyf_aa.f
|
||||
zlatbs.f zlatdf.f zlatps.f zlatrd.f zlatrs.f zlatrz.f DEPRECATED/zlatzm.f
|
||||
zpbcon.f zpbequ.f zpbrfs.f zpbstf.f zpbsv.f
|
||||
zpbsvx.f zpbtf2.f zpbtrf.f zpbtrs.f zpocon.f zpoequ.f zporfs.f
|
||||
zposv.f zposvx.f zpotrf2.f zpotrs.f zpstrf.f zpstf2.f
|
||||
zppcon.f zppequ.f zpprfs.f zppsv.f zppsvx.f zpptrf.f zpptri.f zpptrs.f
|
||||
zptcon.f zpteqr.f zptrfs.f zptsv.f zptsvx.f zpttrf.f zpttrs.f zptts2.f
|
||||
zrot.f zspcon.f zsprfs.f zspsv.f
|
||||
zspsvx.f zsptrf.f zsptri.f zsptrs.f zdrscl.f zstedc.f
|
||||
zstegr.f zstein.f zsteqr.f
|
||||
zsycon.f zsysv_aa.f
|
||||
zsyrfs.f zsysv.f zsysvx.f zsytf2.f zsytrf.f zsytri.f zsytri2.f zsytri2x.f
|
||||
zsyswapr.f zsytrs.f zsytrs_aa.f zsytrs2.f zsyconv.f
|
||||
zsytf2_rook.f zsytrf_rook.f zsytrs_rook.f
|
||||
zsytri_rook.f zsycon_rook.f zsysv_rook.f
|
||||
ztbcon.f ztbrfs.f ztbtrs.f ztgevc.f ztgex2.f
|
||||
ztgexc.f ztgsen.f ztgsja.f ztgsna.f ztgsy2.f ztgsyl.f ztpcon.f
|
||||
ztprfs.f ztptri.f
|
||||
ztptrs.f ztrcon.f ztrevc.f ztrevc3.f ztrexc.f ztrrfs.f ztrsen.f ztrsna.f
|
||||
ztrsyl.f ztrtrs.f DEPRECATED/ztzrqf.f ztzrzf.f zung2l.f
|
||||
zung2r.f zungbr.f zunghr.f zungl2.f zunglq.f zungql.f zungqr.f zungr2.f
|
||||
zungrq.f zungtr.f zunm2l.f zunm2r.f zunmbr.f zunmhr.f zunm22.f zunml2.f
|
||||
zunmlq.f zunmql.f zunmqr.f zunmr2.f zunmr3.f zunmrq.f zunmrz.f
|
||||
zunmtr.f zupgtr.f
|
||||
zupmtr.f izmax1.f dzsum1.f zstemr.f
|
||||
zcgesv.f zcposv.f zlag2c.f clag2z.f zlat2c.f
|
||||
zhfrk.f ztfttp.f zlanhf.f zpftrf.f zpftri.f zpftrs.f ztfsm.f ztftri.f
|
||||
ztfttr.f ztpttf.f ztpttr.f ztrttf.f ztrttp.f
|
||||
zgeequb.f zgbequb.f zsyequb.f zpoequb.f zheequb.f
|
||||
zbbcsd.f zlapmr.f zunbdb.f zunbdb1.f zunbdb2.f zunbdb3.f zunbdb4.f
|
||||
zunbdb5.f zunbdb6.f zuncsd.f zuncsd2by1.f
|
||||
zgeqrt.f zgeqrt2.f zgeqrt3.f zgemqrt.f
|
||||
ztpqrt.f ztpqrt2.f ztpmqrt.f ztprfb.f zpotri.f
|
||||
zgelq.f zgelqt.f zgelqt3.f zgemlq.f zgemlqt.f zgemqr.f zgeqr.f zgetsls.f
|
||||
zlamswlq.f zlamtsqr.f zlaswlq.f zlatsqr.f ztplqt.f ztplqt2.f ztpmlqt.f
|
||||
zhesv_aa.f zhetrf_aa.f zhetrs_aa.f zlahef_aa.f zsytf2_rk.f zlasyf_rk.f
|
||||
zsytrf_aa.f zsytrf_rk.f zsytrs_3.f zsycon_3.f zsytri_3.f zsytri_3x.f zsysv_rk.f
|
||||
zhetf2_rk.f zlahef_rk.f zhetrf_rk.f zhetrs_3.f zhecon_3.f zhetri_3.f
|
||||
zhetri_3x.f zhesv_rk.f zhb2st_kernels.f zhbev_2stage.f zhbevd_2stage.f
|
||||
zhbevx_2stage.f zheev_2stage.f zheevd_2stage.f zheevr_2stage.f
|
||||
zheevx_2stage.f zhegv_2stage.f zhetrd_2stage.f zhetrd_hb2st.F zhetrd_he2hb.f
|
||||
zlarfy.f
|
||||
)
|
||||
|
||||
set(LA_REL_SRC ${ALLAUX})
|
||||
if (BUILD_SINGLE)
|
||||
list(APPEND LA_REL_SRC ${SLASRC} ${DSLASRC} ${SCLAUX})
|
||||
endif ()
|
||||
|
||||
if (BUILD_DOUBLE)
|
||||
list(APPEND LA_REL_SRC ${DLASRC} ${DSLASRC} ${DZLAUX})
|
||||
endif ()
|
||||
|
||||
if (BUILD_COMPLEX)
|
||||
list(APPEND LA_REL_SRC ${CLASRC} ${ZCLASRC} ${SCLAUX})
|
||||
endif ()
|
||||
|
||||
if (BUILD_COMPLEX16)
|
||||
list(APPEND LA_REL_SRC ${ZLASRC} ${ZCLASRC} ${DZLAUX})
|
||||
endif ()
|
||||
|
||||
# add lapack-netlib folder to the sources
|
||||
set(LA_SOURCES "")
|
||||
foreach (LA_FILE ${LA_REL_SRC})
|
||||
list(APPEND LA_SOURCES "${NETLIB_LAPACK_DIR}/SRC/${LA_FILE}")
|
||||
endforeach ()
|
||||
set_source_files_properties(${LA_SOURCES} PROPERTIES COMPILE_FLAGS "${LAPACK_FFLAGS}")
|
||||
2396
cmake/lapacke.cmake
Normal file
2396
cmake/lapacke.cmake
Normal file
File diff suppressed because it is too large
Load Diff
9
cmake/openblas.pc.in
Normal file
9
cmake/openblas.pc.in
Normal file
@@ -0,0 +1,9 @@
|
||||
libdir=@CMAKE_INSTALL_FULL_LIBDIR@
|
||||
includedir=@CMAKE_INSTALL_FULL_INCLUDEDIR@
|
||||
|
||||
Name: OpenBLAS
|
||||
Description: OpenBLAS is an optimized BLAS library based on GotoBLAS2 1.13 BSD version
|
||||
Version: @OPENBLAS_VERSION@
|
||||
URL: https://github.com/xianyi/OpenBLAS
|
||||
Libs: -L${libdir} -lopenblas
|
||||
Cflags: -I${includedir}
|
||||
104
cmake/os.cmake
Normal file
104
cmake/os.cmake
Normal file
@@ -0,0 +1,104 @@
|
||||
##
|
||||
## Author: Hank Anderson <hank@statease.com>
|
||||
## Description: Ported from portion of OpenBLAS/Makefile.system
|
||||
## Detects the OS and sets appropriate variables.
|
||||
|
||||
if (${CMAKE_SYSTEM_NAME} STREQUAL "Darwin")
|
||||
set(ENV{MACOSX_DEPLOYMENT_TARGET} "10.2") # TODO: should be exported as an env var
|
||||
set(MD5SUM "md5 -r")
|
||||
endif ()
|
||||
|
||||
if (${CMAKE_SYSTEM_NAME} STREQUAL "FreeBSD")
|
||||
set(MD5SUM "md5 -r")
|
||||
endif ()
|
||||
|
||||
if (${CMAKE_SYSTEM_NAME} STREQUAL "NetBSD")
|
||||
set(MD5SUM "md5 -n")
|
||||
endif ()
|
||||
|
||||
if (${CMAKE_SYSTEM_NAME} STREQUAL "Linux")
|
||||
set(EXTRALIB "${EXTRALIB} -lm")
|
||||
set(NO_EXPRECISION 1)
|
||||
endif ()
|
||||
|
||||
if (${CMAKE_SYSTEM_NAME} STREQUAL "AIX")
|
||||
set(EXTRALIB "${EXTRALIB} -lm")
|
||||
endif ()
|
||||
|
||||
# TODO: this is probably meant for mingw, not other windows compilers
|
||||
if (${CMAKE_SYSTEM_NAME} STREQUAL "Windows")
|
||||
|
||||
set(NEED_PIC 0)
|
||||
set(NO_EXPRECISION 1)
|
||||
|
||||
set(EXTRALIB "${EXTRALIB} -defaultlib:advapi32")
|
||||
|
||||
# probably not going to use these
|
||||
set(SUFFIX "obj")
|
||||
set(PSUFFIX "pobj")
|
||||
set(LIBSUFFIX "a")
|
||||
|
||||
if (${CMAKE_C_COMPILER_ID} STREQUAL "Clang")
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DMS_ABI")
|
||||
endif ()
|
||||
|
||||
if (${CMAKE_C_COMPILER_ID} STREQUAL "GNU")
|
||||
|
||||
# Test for supporting MS_ABI
|
||||
# removed string parsing in favor of CMake's version comparison -hpa
|
||||
execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION)
|
||||
if (${GCC_VERSION} VERSION_GREATER 4.7 OR ${GCC_VERSION} VERSION_EQUAL 4.7)
|
||||
# GCC Version >=4.7
|
||||
# It is compatible with MSVC ABI.
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DMS_ABI")
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
# Ensure the correct stack alignment on Win32
|
||||
# http://permalink.gmane.org/gmane.comp.lib.openblas.general/97
|
||||
if (${ARCH} STREQUAL "x86")
|
||||
if (NOT MSVC AND NOT ${CMAKE_C_COMPILER_ID} STREQUAL "Clang")
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -mincoming-stack-boundary=2")
|
||||
endif ()
|
||||
set(FCOMMON_OPT "${FCOMMON_OPT} -mincoming-stack-boundary=2")
|
||||
endif ()
|
||||
|
||||
endif ()
|
||||
|
||||
if (${CMAKE_SYSTEM_NAME} STREQUAL "Interix")
|
||||
set(NEED_PIC 0)
|
||||
set(NO_EXPRECISION 1)
|
||||
|
||||
set(INTERIX_TOOL_DIR STREQUAL "/opt/gcc.3.3/i586-pc-interix3/bin")
|
||||
endif ()
|
||||
|
||||
if (CYGWIN)
|
||||
set(NEED_PIC 0)
|
||||
set(NO_EXPRECISION 1)
|
||||
endif ()
|
||||
|
||||
if (NOT ${CMAKE_SYSTEM_NAME} STREQUAL "Windows" AND NOT ${CMAKE_SYSTEM_NAME} STREQUAL "Interix" AND NOT ${CMAKE_SYSTEM_NAME} STREQUAL "Android")
|
||||
if (SMP)
|
||||
set(EXTRALIB "${EXTRALIB} -lpthread")
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
if (QUAD_PRECISION)
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DQUAD_PRECISION")
|
||||
set(NO_EXPRECISION 1)
|
||||
endif ()
|
||||
|
||||
if (${ARCH} STREQUAL "x86")
|
||||
set(NO_EXPRECISION 1)
|
||||
endif ()
|
||||
|
||||
if (UTEST_CHECK)
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DUTEST_CHECK")
|
||||
set(SANITY_CHECK 1)
|
||||
endif ()
|
||||
|
||||
if (SANITY_CHECK)
|
||||
# TODO: need some way to get $(*F) (target filename)
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DSANITY_CHECK -DREFNAME=$(*F)f${BU}")
|
||||
endif ()
|
||||
|
||||
148
cmake/prebuild.cmake
Normal file
148
cmake/prebuild.cmake
Normal file
@@ -0,0 +1,148 @@
|
||||
##
|
||||
## Author: Hank Anderson <hank@statease.com>
|
||||
## Description: Ported from OpenBLAS/Makefile.prebuild
|
||||
## This is triggered by system.cmake and runs before any of the code is built.
|
||||
## Creates config.h and Makefile.conf by first running the c_check perl script (which creates those files).
|
||||
## Next it runs f_check and appends some fortran information to the files.
|
||||
## Then it runs getarch and getarch_2nd for even more environment information.
|
||||
## Finally it builds gen_config_h for use at build time to generate config.h.
|
||||
|
||||
# CMake vars set by this file:
|
||||
# CORE
|
||||
# LIBCORE
|
||||
# NUM_CORES
|
||||
# HAVE_MMX
|
||||
# HAVE_SSE
|
||||
# HAVE_SSE2
|
||||
# HAVE_SSE3
|
||||
# MAKE
|
||||
# SGEMM_UNROLL_M
|
||||
# SGEMM_UNROLL_N
|
||||
# DGEMM_UNROLL_M
|
||||
# DGEMM_UNROLL_M
|
||||
# QGEMM_UNROLL_N
|
||||
# QGEMM_UNROLL_N
|
||||
# CGEMM_UNROLL_M
|
||||
# CGEMM_UNROLL_M
|
||||
# ZGEMM_UNROLL_N
|
||||
# ZGEMM_UNROLL_N
|
||||
# XGEMM_UNROLL_M
|
||||
# XGEMM_UNROLL_N
|
||||
# CGEMM3M_UNROLL_M
|
||||
# CGEMM3M_UNROLL_N
|
||||
# ZGEMM3M_UNROLL_M
|
||||
# ZGEMM3M_UNROLL_M
|
||||
# XGEMM3M_UNROLL_N
|
||||
# XGEMM3M_UNROLL_N
|
||||
|
||||
# CPUIDEMU = ../../cpuid/table.o
|
||||
|
||||
if (DEFINED CPUIDEMU)
|
||||
set(EXFLAGS "-DCPUIDEMU -DVENDOR=99")
|
||||
endif ()
|
||||
|
||||
if (DEFINED TARGET_CORE)
|
||||
# set the C flags for just this file
|
||||
set(GETARCH2_FLAGS "-DBUILD_KERNEL")
|
||||
set(TARGET_MAKE "Makefile_kernel.conf")
|
||||
set(TARGET_CONF "config_kernel.h")
|
||||
else()
|
||||
set(TARGET_MAKE "Makefile.conf")
|
||||
set(TARGET_CONF "config.h")
|
||||
endif ()
|
||||
|
||||
include("${PROJECT_SOURCE_DIR}/cmake/c_check.cmake")
|
||||
|
||||
if (NOT NOFORTRAN)
|
||||
include("${PROJECT_SOURCE_DIR}/cmake/f_check.cmake")
|
||||
endif ()
|
||||
|
||||
# compile getarch
|
||||
set(GETARCH_SRC
|
||||
${PROJECT_SOURCE_DIR}/getarch.c
|
||||
${CPUIDEMO}
|
||||
)
|
||||
|
||||
if (NOT MSVC)
|
||||
list(APPEND GETARCH_SRC ${PROJECT_SOURCE_DIR}/cpuid.S)
|
||||
endif ()
|
||||
|
||||
if (MSVC)
|
||||
#Use generic for MSVC now
|
||||
set(GETARCH_FLAGS ${GETARCH_FLAGS} -DFORCE_GENERIC)
|
||||
endif()
|
||||
|
||||
if ("${CMAKE_SYSTEM_NAME}" STREQUAL "WindowsStore")
|
||||
# disable WindowsStore strict CRT checks
|
||||
set(GETARCH_FLAGS ${GETARCH_FLAGS} -D_CRT_SECURE_NO_WARNINGS)
|
||||
endif ()
|
||||
|
||||
set(GETARCH_DIR "${PROJECT_BINARY_DIR}/getarch_build")
|
||||
set(GETARCH_BIN "getarch${CMAKE_EXECUTABLE_SUFFIX}")
|
||||
file(MAKE_DIRECTORY ${GETARCH_DIR})
|
||||
if (NOT "${CMAKE_SYSTEM_NAME}" STREQUAL "WindowsStore")
|
||||
try_compile(GETARCH_RESULT ${GETARCH_DIR}
|
||||
SOURCES ${GETARCH_SRC}
|
||||
COMPILE_DEFINITIONS ${EXFLAGS} ${GETARCH_FLAGS} -I${PROJECT_SOURCE_DIR}
|
||||
OUTPUT_VARIABLE GETARCH_LOG
|
||||
COPY_FILE ${PROJECT_BINARY_DIR}/${GETARCH_BIN}
|
||||
)
|
||||
|
||||
if (NOT ${GETARCH_RESULT})
|
||||
MESSAGE(FATAL_ERROR "Compiling getarch failed ${GETARCH_LOG}")
|
||||
endif ()
|
||||
endif ()
|
||||
message(STATUS "Running getarch")
|
||||
|
||||
# use the cmake binary w/ the -E param to run a shell command in a cross-platform way
|
||||
execute_process(COMMAND ${PROJECT_BINARY_DIR}/${GETARCH_BIN} 0 OUTPUT_VARIABLE GETARCH_MAKE_OUT)
|
||||
execute_process(COMMAND ${PROJECT_BINARY_DIR}/${GETARCH_BIN} 1 OUTPUT_VARIABLE GETARCH_CONF_OUT)
|
||||
|
||||
message(STATUS "GETARCH results:\n${GETARCH_MAKE_OUT}")
|
||||
|
||||
# append config data from getarch to the TARGET file and read in CMake vars
|
||||
file(APPEND ${TARGET_CONF} ${GETARCH_CONF_OUT})
|
||||
ParseGetArchVars(${GETARCH_MAKE_OUT})
|
||||
|
||||
set(GETARCH2_DIR "${PROJECT_BINARY_DIR}/getarch2_build")
|
||||
set(GETARCH2_BIN "getarch_2nd${CMAKE_EXECUTABLE_SUFFIX}")
|
||||
file(MAKE_DIRECTORY ${GETARCH2_DIR})
|
||||
if (NOT "${CMAKE_SYSTEM_NAME}" STREQUAL "WindowsStore")
|
||||
try_compile(GETARCH2_RESULT ${GETARCH2_DIR}
|
||||
SOURCES ${PROJECT_SOURCE_DIR}/getarch_2nd.c
|
||||
COMPILE_DEFINITIONS ${EXFLAGS} ${GETARCH_FLAGS} ${GETARCH2_FLAGS} -I${PROJECT_SOURCE_DIR}
|
||||
OUTPUT_VARIABLE GETARCH2_LOG
|
||||
COPY_FILE ${PROJECT_BINARY_DIR}/${GETARCH2_BIN}
|
||||
)
|
||||
|
||||
if (NOT ${GETARCH2_RESULT})
|
||||
MESSAGE(FATAL_ERROR "Compiling getarch_2nd failed ${GETARCH2_LOG}")
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
# use the cmake binary w/ the -E param to run a shell command in a cross-platform way
|
||||
execute_process(COMMAND ${PROJECT_BINARY_DIR}/${GETARCH2_BIN} 0 OUTPUT_VARIABLE GETARCH2_MAKE_OUT)
|
||||
execute_process(COMMAND ${PROJECT_BINARY_DIR}/${GETARCH2_BIN} 1 OUTPUT_VARIABLE GETARCH2_CONF_OUT)
|
||||
|
||||
# append config data from getarch_2nd to the TARGET file and read in CMake vars
|
||||
file(APPEND ${TARGET_CONF} ${GETARCH2_CONF_OUT})
|
||||
ParseGetArchVars(${GETARCH2_MAKE_OUT})
|
||||
|
||||
# compile get_config_h
|
||||
set(GEN_CONFIG_H_DIR "${PROJECT_BINARY_DIR}/genconfig_h_build")
|
||||
set(GEN_CONFIG_H_BIN "gen_config_h${CMAKE_EXECUTABLE_SUFFIX}")
|
||||
set(GEN_CONFIG_H_FLAGS "-DVERSION=\"${OpenBLAS_VERSION}\"")
|
||||
file(MAKE_DIRECTORY ${GEN_CONFIG_H_DIR})
|
||||
|
||||
if (NOT "${CMAKE_SYSTEM_NAME}" STREQUAL "WindowsStore")
|
||||
try_compile(GEN_CONFIG_H_RESULT ${GEN_CONFIG_H_DIR}
|
||||
SOURCES ${PROJECT_SOURCE_DIR}/gen_config_h.c
|
||||
COMPILE_DEFINITIONS ${EXFLAGS} ${GETARCH_FLAGS} ${GEN_CONFIG_H_FLAGS} -I${PROJECT_SOURCE_DIR}
|
||||
OUTPUT_VARIABLE GEN_CONFIG_H_LOG
|
||||
COPY_FILE ${PROJECT_BINARY_DIR}/${GEN_CONFIG_H_BIN}
|
||||
)
|
||||
|
||||
if (NOT ${GEN_CONFIG_H_RESULT})
|
||||
MESSAGE(FATAL_ERROR "Compiling gen_config_h failed ${GEN_CONFIG_H_LOG}")
|
||||
endif ()
|
||||
endif ()
|
||||
554
cmake/system.cmake
Normal file
554
cmake/system.cmake
Normal file
@@ -0,0 +1,554 @@
|
||||
##
|
||||
## Author: Hank Anderson <hank@statease.com>
|
||||
## Description: Ported from OpenBLAS/Makefile.system
|
||||
##
|
||||
|
||||
set(NETLIB_LAPACK_DIR "${PROJECT_SOURCE_DIR}/lapack-netlib")
|
||||
|
||||
# TODO: Makefile.system detects Darwin (mac) and switches to clang here -hpa
|
||||
# http://stackoverflow.com/questions/714100/os-detecting-makefile
|
||||
|
||||
# TODO: Makefile.system sets HOSTCC = $(CC) here if not already set -hpa
|
||||
|
||||
# TARGET_CORE will override TARGET which is used in DYNAMIC_ARCH=1.
|
||||
if (DEFINED TARGET_CORE)
|
||||
set(TARGET ${TARGET_CORE})
|
||||
endif ()
|
||||
|
||||
# Force fallbacks for 32bit
|
||||
if (DEFINED BINARY AND DEFINED TARGET AND BINARY EQUAL 32)
|
||||
message(STATUS "Compiling a ${BINARY}-bit binary.")
|
||||
set(NO_AVX 1)
|
||||
if (${TARGET} STREQUAL "HASWELL" OR ${TARGET} STREQUAL "SANDYBRIDGE")
|
||||
set(TARGET "NEHALEM")
|
||||
endif ()
|
||||
if (${TARGET} STREQUAL "BULLDOZER" OR ${TARGET} STREQUAL "PILEDRIVER" OR ${TARGET} STREQUAL "ZEN")
|
||||
set(TARGET "BARCELONA")
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
if (DEFINED TARGET)
|
||||
message(STATUS "Targetting the ${TARGET} architecture.")
|
||||
set(GETARCH_FLAGS "-DFORCE_${TARGET}")
|
||||
endif ()
|
||||
|
||||
if (INTERFACE64)
|
||||
message(STATUS "Using 64-bit integers.")
|
||||
set(GETARCH_FLAGS "${GETARCH_FLAGS} -DUSE64BITINT")
|
||||
endif ()
|
||||
|
||||
if (NOT DEFINED GEMM_MULTITHREAD_THRESHOLD)
|
||||
set(GEMM_MULTITHREAD_THRESHOLD 4)
|
||||
endif ()
|
||||
message(STATUS "GEMM multithread threshold set to ${GEMM_MULTITHREAD_THRESHOLD}.")
|
||||
set(GETARCH_FLAGS "${GETARCH_FLAGS} -DGEMM_MULTITHREAD_THRESHOLD=${GEMM_MULTITHREAD_THRESHOLD}")
|
||||
|
||||
if (NO_AVX)
|
||||
message(STATUS "Disabling Advanced Vector Extensions (AVX).")
|
||||
set(GETARCH_FLAGS "${GETARCH_FLAGS} -DNO_AVX")
|
||||
endif ()
|
||||
|
||||
if (NO_AVX2)
|
||||
message(STATUS "Disabling Advanced Vector Extensions 2 (AVX2).")
|
||||
set(GETARCH_FLAGS "${GETARCH_FLAGS} -DNO_AVX2")
|
||||
endif ()
|
||||
|
||||
if (CMAKE_BUILD_TYPE STREQUAL Debug)
|
||||
set(GETARCH_FLAGS "${GETARCH_FLAGS} -g")
|
||||
endif ()
|
||||
|
||||
# TODO: let CMake handle this? -hpa
|
||||
#if (${QUIET_MAKE})
|
||||
# set(MAKE "${MAKE} -s")
|
||||
#endif()
|
||||
|
||||
if (NOT DEFINED NO_PARALLEL_MAKE)
|
||||
set(NO_PARALLEL_MAKE 0)
|
||||
endif ()
|
||||
set(GETARCH_FLAGS "${GETARCH_FLAGS} -DNO_PARALLEL_MAKE=${NO_PARALLEL_MAKE}")
|
||||
|
||||
if (CMAKE_CXX_COMPILER STREQUAL loongcc)
|
||||
set(GETARCH_FLAGS "${GETARCH_FLAGS} -static")
|
||||
endif ()
|
||||
|
||||
#if don't use Fortran, it will only compile CBLAS.
|
||||
if (ONLY_CBLAS)
|
||||
set(NO_LAPACK 1)
|
||||
else ()
|
||||
set(ONLY_CBLAS 0)
|
||||
endif ()
|
||||
|
||||
include("${PROJECT_SOURCE_DIR}/cmake/prebuild.cmake")
|
||||
|
||||
if (NOT DEFINED NUM_THREADS)
|
||||
set(NUM_THREADS ${NUM_CORES})
|
||||
endif ()
|
||||
|
||||
if (${NUM_THREADS} EQUAL 1)
|
||||
set(USE_THREAD 0)
|
||||
endif ()
|
||||
|
||||
if (DEFINED USE_THREAD)
|
||||
if (NOT ${USE_THREAD})
|
||||
unset(SMP)
|
||||
else ()
|
||||
set(SMP 1)
|
||||
endif ()
|
||||
else ()
|
||||
# N.B. this is NUM_THREAD in Makefile.system which is probably a bug -hpa
|
||||
if (${NUM_THREADS} EQUAL 1)
|
||||
unset(SMP)
|
||||
else ()
|
||||
set(SMP 1)
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
if (${SMP})
|
||||
message(STATUS "SMP enabled.")
|
||||
endif ()
|
||||
|
||||
if (NOT DEFINED NEED_PIC)
|
||||
set(NEED_PIC 1)
|
||||
endif ()
|
||||
|
||||
# TODO: I think CMake should be handling all this stuff -hpa
|
||||
unset(ARFLAGS)
|
||||
set(CPP "${COMPILER} -E")
|
||||
set(AR "${CROSS_SUFFIX}ar")
|
||||
set(AS "${CROSS_SUFFIX}as")
|
||||
set(LD "${CROSS_SUFFIX}ld")
|
||||
set(RANLIB "${CROSS_SUFFIX}ranlib")
|
||||
set(NM "${CROSS_SUFFIX}nm")
|
||||
set(DLLWRAP "${CROSS_SUFFIX}dllwrap")
|
||||
set(OBJCOPY "${CROSS_SUFFIX}objcopy")
|
||||
set(OBJCONV "${CROSS_SUFFIX}objconv")
|
||||
|
||||
# OS dependent settings
|
||||
include("${PROJECT_SOURCE_DIR}/cmake/os.cmake")
|
||||
|
||||
# Architecture dependent settings
|
||||
include("${PROJECT_SOURCE_DIR}/cmake/arch.cmake")
|
||||
|
||||
# C Compiler dependent settings
|
||||
include("${PROJECT_SOURCE_DIR}/cmake/cc.cmake")
|
||||
|
||||
if (NOT NOFORTRAN)
|
||||
# Fortran Compiler dependent settings
|
||||
include("${PROJECT_SOURCE_DIR}/cmake/fc.cmake")
|
||||
endif ()
|
||||
|
||||
if (BINARY64)
|
||||
if (INTERFACE64)
|
||||
# CCOMMON_OPT += -DUSE64BITINT
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
if (NEED_PIC)
|
||||
if (${CMAKE_C_COMPILER} STREQUAL "IBM")
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -qpic=large")
|
||||
else ()
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -fPIC")
|
||||
endif ()
|
||||
|
||||
if (${F_COMPILER} STREQUAL "SUN")
|
||||
set(FCOMMON_OPT "${FCOMMON_OPT} -pic")
|
||||
else ()
|
||||
set(FCOMMON_OPT "${FCOMMON_OPT} -fPIC")
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
if (DYNAMIC_ARCH)
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DDYNAMIC_ARCH")
|
||||
endif ()
|
||||
|
||||
if (NO_LAPACK)
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DNO_LAPACK")
|
||||
#Disable LAPACK C interface
|
||||
set(NO_LAPACKE 1)
|
||||
endif ()
|
||||
|
||||
if (NO_LAPACKE)
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DNO_LAPACKE")
|
||||
endif ()
|
||||
|
||||
if (NO_AVX)
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DNO_AVX")
|
||||
endif ()
|
||||
|
||||
if (${ARCH} STREQUAL "x86")
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DNO_AVX")
|
||||
endif ()
|
||||
|
||||
if (NO_AVX2)
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DNO_AVX2")
|
||||
endif ()
|
||||
|
||||
if (SMP)
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DSMP_SERVER")
|
||||
|
||||
if (${ARCH} STREQUAL "mips64")
|
||||
if (NOT ${CORE} STREQUAL "LOONGSON3B")
|
||||
set(USE_SIMPLE_THREADED_LEVEL3 1)
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
if (USE_OPENMP)
|
||||
# USE_SIMPLE_THREADED_LEVEL3 = 1
|
||||
# NO_AFFINITY = 1
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DUSE_OPENMP")
|
||||
endif ()
|
||||
|
||||
if (BIGNUMA)
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DBIGNUMA")
|
||||
endif ()
|
||||
|
||||
endif ()
|
||||
|
||||
if (NO_WARMUP)
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DNO_WARMUP")
|
||||
endif ()
|
||||
|
||||
if (CONSISTENT_FPCSR)
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DCONSISTENT_FPCSR")
|
||||
endif ()
|
||||
|
||||
# Only for development
|
||||
# set(CCOMMON_OPT "${CCOMMON_OPT} -DPARAMTEST")
|
||||
# set(CCOMMON_OPT "${CCOMMON_OPT} -DPREFETCHTEST")
|
||||
# set(CCOMMON_OPT "${CCOMMON_OPT} -DNO_SWITCHING")
|
||||
# set(USE_PAPI 1)
|
||||
|
||||
if (USE_PAPI)
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DUSE_PAPI")
|
||||
set(EXTRALIB "${EXTRALIB} -lpapi -lperfctr")
|
||||
endif ()
|
||||
|
||||
if (DYNAMIC_THREADS)
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DDYNAMIC_THREADS")
|
||||
endif ()
|
||||
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DMAX_CPU_NUMBER=${NUM_THREADS}")
|
||||
|
||||
if (USE_SIMPLE_THREADED_LEVEL3)
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DUSE_SIMPLE_THREADED_LEVEL3")
|
||||
endif ()
|
||||
|
||||
if (DEFINED LIBNAMESUFFIX)
|
||||
set(LIBPREFIX "libopenblas_${LIBNAMESUFFIX}")
|
||||
else ()
|
||||
set(LIBPREFIX "libopenblas")
|
||||
endif ()
|
||||
|
||||
if (NOT DEFINED SYMBOLPREFIX)
|
||||
set(SYMBOLPREFIX "")
|
||||
endif ()
|
||||
|
||||
if (NOT DEFINED SYMBOLSUFFIX)
|
||||
set(SYMBOLSUFFIX "")
|
||||
endif ()
|
||||
|
||||
set(KERNELDIR "${PROJECT_SOURCE_DIR}/kernel/${ARCH}")
|
||||
|
||||
# TODO: nead to convert these Makefiles
|
||||
# include ${PROJECT_SOURCE_DIR}/cmake/${ARCH}.cmake
|
||||
|
||||
if (${CORE} STREQUAL "PPC440")
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DALLOC_QALLOC")
|
||||
endif ()
|
||||
|
||||
if (${CORE} STREQUAL "PPC440FP2")
|
||||
set(STATIC_ALLOCATION 1)
|
||||
endif ()
|
||||
|
||||
if (NOT ${CMAKE_SYSTEM_NAME} STREQUAL "Linux")
|
||||
set(NO_AFFINITY 1)
|
||||
endif ()
|
||||
|
||||
if (NOT ${ARCH} STREQUAL "x86_64" AND NOT ${ARCH} STREQUAL "x86" AND NOT ${CORE} STREQUAL "LOONGSON3B")
|
||||
set(NO_AFFINITY 1)
|
||||
endif ()
|
||||
|
||||
if (NO_AFFINITY)
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DNO_AFFINITY")
|
||||
endif ()
|
||||
|
||||
if (FUNCTION_PROFILE)
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DFUNCTION_PROFILE")
|
||||
endif ()
|
||||
|
||||
if (HUGETLB_ALLOCATION)
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DALLOC_HUGETLB")
|
||||
endif ()
|
||||
|
||||
if (DEFINED HUGETLBFILE_ALLOCATION)
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DALLOC_HUGETLBFILE -DHUGETLB_FILE_NAME=${HUGETLBFILE_ALLOCATION})")
|
||||
endif ()
|
||||
|
||||
if (STATIC_ALLOCATION)
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DALLOC_STATIC")
|
||||
endif ()
|
||||
|
||||
if (DEVICEDRIVER_ALLOCATION)
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DALLOC_DEVICEDRIVER -DDEVICEDRIVER_NAME=\"/dev/mapper\"")
|
||||
endif ()
|
||||
|
||||
if (MIXED_MEMORY_ALLOCATION)
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DMIXED_MEMORY_ALLOCATION")
|
||||
endif ()
|
||||
|
||||
if (${CMAKE_SYSTEM_NAME} STREQUAL "SunOS")
|
||||
set(TAR gtar)
|
||||
set(PATCH gpatch)
|
||||
set(GREP ggrep)
|
||||
else ()
|
||||
set(TAR tar)
|
||||
set(PATCH patch)
|
||||
set(GREP grep)
|
||||
endif ()
|
||||
|
||||
if (NOT DEFINED MD5SUM)
|
||||
set(MD5SUM md5sum)
|
||||
endif ()
|
||||
|
||||
set(AWK awk)
|
||||
|
||||
set(SED sed)
|
||||
|
||||
set(REVISION "-r${OpenBLAS_VERSION}")
|
||||
set(MAJOR_VERSION ${OpenBLAS_MAJOR_VERSION})
|
||||
|
||||
if (DEBUG)
|
||||
set(COMMON_OPT "${COMMON_OPT} -g")
|
||||
endif ()
|
||||
|
||||
if (NOT DEFINED COMMON_OPT)
|
||||
set(COMMON_OPT "-O2")
|
||||
endif ()
|
||||
|
||||
#For x86 32-bit
|
||||
if (DEFINED BINARY AND BINARY EQUAL 32)
|
||||
if (NOT MSVC)
|
||||
set(COMMON_OPT "${COMMON_OPT} -m32")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${COMMON_OPT} ${CCOMMON_OPT}")
|
||||
if(NOT MSVC)
|
||||
set(CMAKE_ASM_FLAGS "${CMAKE_ASM_FLAGS} ${COMMON_OPT} ${CCOMMON_OPT}")
|
||||
endif()
|
||||
# TODO: not sure what PFLAGS is -hpa
|
||||
set(PFLAGS "${PFLAGS} ${COMMON_OPT} ${CCOMMON_OPT} -I${TOPDIR} -DPROFILE ${COMMON_PROF}")
|
||||
|
||||
set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} ${COMMON_OPT} ${FCOMMON_OPT}")
|
||||
# TODO: not sure what FPFLAGS is -hpa
|
||||
set(FPFLAGS "${FPFLAGS} ${COMMON_OPT} ${FCOMMON_OPT} ${COMMON_PROF}")
|
||||
|
||||
#For LAPACK Fortran codes.
|
||||
set(LAPACK_FFLAGS "${LAPACK_FFLAGS} ${CMAKE_Fortran_FLAGS}")
|
||||
set(LAPACK_FPFLAGS "${LAPACK_FPFLAGS} ${FPFLAGS}")
|
||||
|
||||
#Disable -fopenmp for LAPACK Fortran codes on Windows.
|
||||
if (${CMAKE_SYSTEM_NAME} STREQUAL "Windows")
|
||||
set(FILTER_FLAGS "-fopenmp;-mp;-openmp;-xopenmp=parralel")
|
||||
foreach (FILTER_FLAG ${FILTER_FLAGS})
|
||||
string(REPLACE ${FILTER_FLAG} "" LAPACK_FFLAGS ${LAPACK_FFLAGS})
|
||||
string(REPLACE ${FILTER_FLAG} "" LAPACK_FPFLAGS ${LAPACK_FPFLAGS})
|
||||
endforeach ()
|
||||
endif ()
|
||||
|
||||
if ("${F_COMPILER}" STREQUAL "GFORTRAN")
|
||||
# lapack-netlib is rife with uninitialized warnings -hpa
|
||||
set(LAPACK_FFLAGS "${LAPACK_FFLAGS} -Wno-maybe-uninitialized")
|
||||
endif ()
|
||||
|
||||
set(LAPACK_CFLAGS "${CMAKE_C_CFLAGS} -DHAVE_LAPACK_CONFIG_H")
|
||||
if (INTERFACE64)
|
||||
set(LAPACK_CFLAGS "${LAPACK_CFLAGS} -DLAPACK_ILP64")
|
||||
endif ()
|
||||
|
||||
if (${CMAKE_SYSTEM_NAME} STREQUAL "Windows")
|
||||
set(LAPACK_CFLAGS "${LAPACK_CFLAGS} -DOPENBLAS_OS_WINDOWS")
|
||||
endif ()
|
||||
|
||||
if (${CMAKE_C_COMPILER} STREQUAL "LSB" OR ${CMAKE_SYSTEM_NAME} STREQUAL "Windows")
|
||||
set(LAPACK_CFLAGS "${LAPACK_CFLAGS} -DLAPACK_COMPLEX_STRUCTURE")
|
||||
endif ()
|
||||
|
||||
if (NOT DEFINED SUFFIX)
|
||||
set(SUFFIX o)
|
||||
endif ()
|
||||
|
||||
if (NOT DEFINED PSUFFIX)
|
||||
set(PSUFFIX po)
|
||||
endif ()
|
||||
|
||||
if (NOT DEFINED LIBSUFFIX)
|
||||
set(LIBSUFFIX a)
|
||||
endif ()
|
||||
|
||||
if (DYNAMIC_ARCH)
|
||||
if (DEFINED SMP)
|
||||
set(LIBNAME "${LIBPREFIX}p${REVISION}.${LIBSUFFIX}")
|
||||
set(LIBNAME_P "${LIBPREFIX}p${REVISION}_p.${LIBSUFFIX}")
|
||||
else ()
|
||||
set(LIBNAME "${LIBPREFIX}${REVISION}.${LIBSUFFIX}")
|
||||
set(LIBNAME_P "${LIBPREFIX}${REVISION}_p.${LIBSUFFIX}")
|
||||
endif ()
|
||||
else ()
|
||||
if (DEFINED SMP)
|
||||
set(LIBNAME "${LIBPREFIX}_${LIBCORE}p${REVISION}.${LIBSUFFIX}")
|
||||
set(LIBNAME_P "${LIBPREFIX}_${LIBCORE}p${REVISION}_p.${LIBSUFFIX}")
|
||||
else ()
|
||||
set(LIBNAME "${LIBPREFIX}_${LIBCORE}${REVISION}.${LIBSUFFIX}")
|
||||
set(LIBNAME_P "${LIBPREFIX}_${LIBCORE}${REVISION}_p.${LIBSUFFIX}")
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
|
||||
set(LIBDLLNAME "${LIBPREFIX}.dll")
|
||||
set(LIBSONAME "${LIBNAME}.${LIBSUFFIX}.so")
|
||||
set(LIBDYNNAME "${LIBNAME}.${LIBSUFFIX}.dylib")
|
||||
set(LIBDEFNAME "${LIBNAME}.${LIBSUFFIX}.def")
|
||||
set(LIBEXPNAME "${LIBNAME}.${LIBSUFFIX}.exp")
|
||||
set(LIBZIPNAME "${LIBNAME}.${LIBSUFFIX}.zip")
|
||||
|
||||
set(LIBS "${PROJECT_SOURCE_DIR}/${LIBNAME}")
|
||||
set(LIBS_P "${PROJECT_SOURCE_DIR}/${LIBNAME_P}")
|
||||
|
||||
|
||||
set(LIB_COMPONENTS BLAS)
|
||||
if (NOT NO_CBLAS)
|
||||
set(LIB_COMPONENTS "${LIB_COMPONENTS} CBLAS")
|
||||
endif ()
|
||||
|
||||
if (NOT NO_LAPACK)
|
||||
set(LIB_COMPONENTS "${LIB_COMPONENTS} LAPACK")
|
||||
if (NOT NO_LAPACKE)
|
||||
set(LIB_COMPONENTS "${LIB_COMPONENTS} LAPACKE")
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
if (ONLY_CBLAS)
|
||||
set(LIB_COMPONENTS CBLAS)
|
||||
endif ()
|
||||
|
||||
|
||||
# For GEMM3M
|
||||
set(USE_GEMM3M 0)
|
||||
|
||||
if (DEFINED ARCH)
|
||||
if (${ARCH} STREQUAL "x86" OR ${ARCH} STREQUAL "x86_64" OR ${ARCH} STREQUAL "ia64" OR ${ARCH} STREQUAL "MIPS")
|
||||
set(USE_GEMM3M 1)
|
||||
endif ()
|
||||
|
||||
if (${CORE} STREQUAL "generic")
|
||||
set(USE_GEMM3M 0)
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
|
||||
#export OSNAME
|
||||
#export ARCH
|
||||
#export CORE
|
||||
#export LIBCORE
|
||||
#export PGCPATH
|
||||
#export CONFIG
|
||||
#export CC
|
||||
#export FC
|
||||
#export BU
|
||||
#export FU
|
||||
#export NEED2UNDERSCORES
|
||||
#export USE_THREAD
|
||||
#export NUM_THREADS
|
||||
#export NUM_CORES
|
||||
#export SMP
|
||||
#export MAKEFILE_RULE
|
||||
#export NEED_PIC
|
||||
#export BINARY
|
||||
#export BINARY32
|
||||
#export BINARY64
|
||||
#export F_COMPILER
|
||||
#export C_COMPILER
|
||||
#export USE_OPENMP
|
||||
#export CROSS
|
||||
#export CROSS_SUFFIX
|
||||
#export NOFORTRAN
|
||||
#export NO_FBLAS
|
||||
#export EXTRALIB
|
||||
#export CEXTRALIB
|
||||
#export FEXTRALIB
|
||||
#export HAVE_SSE
|
||||
#export HAVE_SSE2
|
||||
#export HAVE_SSE3
|
||||
#export HAVE_SSSE3
|
||||
#export HAVE_SSE4_1
|
||||
#export HAVE_SSE4_2
|
||||
#export HAVE_SSE4A
|
||||
#export HAVE_SSE5
|
||||
#export HAVE_AVX
|
||||
#export HAVE_VFP
|
||||
#export HAVE_VFPV3
|
||||
#export HAVE_VFPV4
|
||||
#export HAVE_NEON
|
||||
#export KERNELDIR
|
||||
#export FUNCTION_PROFILE
|
||||
#export TARGET_CORE
|
||||
#
|
||||
#export SGEMM_UNROLL_M
|
||||
#export SGEMM_UNROLL_N
|
||||
#export DGEMM_UNROLL_M
|
||||
#export DGEMM_UNROLL_N
|
||||
#export QGEMM_UNROLL_M
|
||||
#export QGEMM_UNROLL_N
|
||||
#export CGEMM_UNROLL_M
|
||||
#export CGEMM_UNROLL_N
|
||||
#export ZGEMM_UNROLL_M
|
||||
#export ZGEMM_UNROLL_N
|
||||
#export XGEMM_UNROLL_M
|
||||
#export XGEMM_UNROLL_N
|
||||
#export CGEMM3M_UNROLL_M
|
||||
#export CGEMM3M_UNROLL_N
|
||||
#export ZGEMM3M_UNROLL_M
|
||||
#export ZGEMM3M_UNROLL_N
|
||||
#export XGEMM3M_UNROLL_M
|
||||
#export XGEMM3M_UNROLL_N
|
||||
|
||||
|
||||
#if (USE_CUDA)
|
||||
# export CUDADIR
|
||||
# export CUCC
|
||||
# export CUFLAGS
|
||||
# export CULIB
|
||||
#endif
|
||||
|
||||
#.SUFFIXES: .$(PSUFFIX) .$(SUFFIX) .f
|
||||
#
|
||||
#.f.$(SUFFIX):
|
||||
# $(FC) $(FFLAGS) -c $< -o $(@F)
|
||||
#
|
||||
#.f.$(PSUFFIX):
|
||||
# $(FC) $(FPFLAGS) -pg -c $< -o $(@F)
|
||||
|
||||
# these are not cross-platform
|
||||
#ifdef BINARY64
|
||||
#PATHSCALEPATH = /opt/pathscale/lib/3.1
|
||||
#PGIPATH = /opt/pgi/linux86-64/7.1-5/lib
|
||||
#else
|
||||
#PATHSCALEPATH = /opt/pathscale/lib/3.1/32
|
||||
#PGIPATH = /opt/pgi/linux86/7.1-5/lib
|
||||
#endif
|
||||
|
||||
#ACMLPATH = /opt/acml/4.3.0
|
||||
#ifneq ($(OSNAME), Darwin)
|
||||
#MKLPATH = /opt/intel/mkl/10.2.2.025/lib
|
||||
#else
|
||||
#MKLPATH = /Library/Frameworks/Intel_MKL.framework/Versions/10.0.1.014/lib
|
||||
#endif
|
||||
#ATLASPATH = /opt/atlas/3.9.17/opteron
|
||||
#FLAMEPATH = $(HOME)/flame/lib
|
||||
#ifneq ($(OSNAME), SunOS)
|
||||
#SUNPATH = /opt/sunstudio12.1
|
||||
#else
|
||||
#SUNPATH = /opt/SUNWspro
|
||||
#endif
|
||||
|
||||
346
cmake/utils.cmake
Normal file
346
cmake/utils.cmake
Normal file
@@ -0,0 +1,346 @@
|
||||
# Functions to help with the OpenBLAS build
|
||||
|
||||
# Reads string from getarch into CMake vars. Format of getarch vars is VARNAME=VALUE
|
||||
function(ParseGetArchVars GETARCH_IN)
|
||||
string(REGEX MATCHALL "[0-9_a-zA-Z]+=[0-9_a-zA-Z]+" GETARCH_RESULT_LIST "${GETARCH_IN}")
|
||||
foreach (GETARCH_LINE ${GETARCH_RESULT_LIST})
|
||||
# split the line into var and value, then assign the value to a CMake var
|
||||
string(REGEX MATCHALL "[0-9_a-zA-Z]+" SPLIT_VAR "${GETARCH_LINE}")
|
||||
list(GET SPLIT_VAR 0 VAR_NAME)
|
||||
list(GET SPLIT_VAR 1 VAR_VALUE)
|
||||
set(${VAR_NAME} ${VAR_VALUE} PARENT_SCOPE)
|
||||
endforeach ()
|
||||
endfunction ()
|
||||
|
||||
# Reads a Makefile into CMake vars.
|
||||
macro(ParseMakefileVars MAKEFILE_IN)
|
||||
message(STATUS "Reading vars from ${MAKEFILE_IN}...")
|
||||
file(STRINGS ${MAKEFILE_IN} makefile_contents)
|
||||
foreach (makefile_line ${makefile_contents})
|
||||
string(REGEX MATCH "([0-9_a-zA-Z]+)[ \t]*=[ \t]*(.+)$" line_match "${makefile_line}")
|
||||
if (NOT "${line_match}" STREQUAL "")
|
||||
set(var_name ${CMAKE_MATCH_1})
|
||||
set(var_value ${CMAKE_MATCH_2})
|
||||
# check for Makefile variables in the string, e.g. $(TSUFFIX)
|
||||
string(REGEX MATCHALL "\\$\\(([0-9_a-zA-Z]+)\\)" make_var_matches ${var_value})
|
||||
foreach (make_var ${make_var_matches})
|
||||
# strip out Makefile $() markup
|
||||
string(REGEX REPLACE "\\$\\(([0-9_a-zA-Z]+)\\)" "\\1" make_var ${make_var})
|
||||
# now replace the instance of the Makefile variable with the value of the CMake variable (note the double quote)
|
||||
string(REPLACE "$(${make_var})" "${${make_var}}" var_value ${var_value})
|
||||
endforeach ()
|
||||
set(${var_name} ${var_value})
|
||||
else ()
|
||||
string(REGEX MATCH "include \\$\\(KERNELDIR\\)/(.+)$" line_match "${makefile_line}")
|
||||
if (NOT "${line_match}" STREQUAL "")
|
||||
ParseMakefileVars(${KERNELDIR}/${CMAKE_MATCH_1})
|
||||
endif ()
|
||||
endif ()
|
||||
endforeach ()
|
||||
endmacro ()
|
||||
|
||||
# Returns all combinations of the input list, as a list with colon-separated combinations
|
||||
# E.g. input of A B C returns A B C A:B A:C B:C
|
||||
# N.B. The input is meant to be a list, and to past a list to a function in CMake you must quote it (e.g. AllCombinations("${LIST_VAR}")).
|
||||
# #param absent_codes codes to use when an element is absent from a combination. For example, if you have TRANS;UNIT;UPPER you may want the code to be NNL when nothing is present.
|
||||
# @returns LIST_OUT a list of combinations
|
||||
# CODES_OUT a list of codes corresponding to each combination, with N meaning the item is not present, and the first letter of the list item meaning it is presen
|
||||
function(AllCombinations list_in absent_codes_in)
|
||||
list(LENGTH list_in list_count)
|
||||
set(num_combos 1)
|
||||
# subtract 1 since we will iterate from 0 to num_combos
|
||||
math(EXPR num_combos "(${num_combos} << ${list_count}) - 1")
|
||||
set(LIST_OUT "")
|
||||
set(CODES_OUT "")
|
||||
foreach (c RANGE 0 ${num_combos})
|
||||
|
||||
set(current_combo "")
|
||||
set(current_code "")
|
||||
|
||||
# this is a little ridiculous just to iterate through a list w/ indices
|
||||
math(EXPR last_list_index "${list_count} - 1")
|
||||
foreach (list_index RANGE 0 ${last_list_index})
|
||||
math(EXPR bit "1 << ${list_index}")
|
||||
math(EXPR combo_has_bit "${c} & ${bit}")
|
||||
list(GET list_in ${list_index} list_elem)
|
||||
if (combo_has_bit)
|
||||
if (current_combo)
|
||||
set(current_combo "${current_combo}:${list_elem}")
|
||||
else ()
|
||||
set(current_combo ${list_elem})
|
||||
endif ()
|
||||
string(SUBSTRING ${list_elem} 0 1 code_char)
|
||||
else ()
|
||||
list(GET absent_codes_in ${list_index} code_char)
|
||||
endif ()
|
||||
set(current_code "${current_code}${code_char}")
|
||||
endforeach ()
|
||||
|
||||
if (current_combo STREQUAL "")
|
||||
list(APPEND LIST_OUT " ") # Empty set is a valid combination, but CMake isn't appending the empty string for some reason, use a space
|
||||
else ()
|
||||
list(APPEND LIST_OUT ${current_combo})
|
||||
endif ()
|
||||
list(APPEND CODES_OUT ${current_code})
|
||||
|
||||
endforeach ()
|
||||
|
||||
set(LIST_OUT ${LIST_OUT} PARENT_SCOPE)
|
||||
set(CODES_OUT ${CODES_OUT} PARENT_SCOPE)
|
||||
endfunction ()
|
||||
|
||||
# generates object files for each of the sources, using the BLAS naming scheme to pass the funciton name as a preprocessor definition
|
||||
# @param sources_in the source files to build from
|
||||
# @param defines_in (optional) preprocessor definitions that will be applied to all objects
|
||||
# @param name_in (optional) if this is set this name will be used instead of the filename. Use a * to indicate where the float character should go, if no star the character will be prepended.
|
||||
# e.g. with DOUBLE set, "i*max" will generate the name "idmax", and "max" will be "dmax"
|
||||
# @param replace_last_with replaces the last character in the filename with this string (e.g. symm_k should be symm_TU)
|
||||
# @param append_with appends the filename with this string (e.g. trmm_R should be trmm_RTUU or some other combination of characters)
|
||||
# @param no_float_type turns off the float type define for this build (e.g. SINGLE/DOUBLE/etc)
|
||||
# @param complex_filename_scheme some routines have separate source files for complex and non-complex float types.
|
||||
# 0 - compiles for all types
|
||||
# 1 - compiles the sources for non-complex types only (SINGLE/DOUBLE)
|
||||
# 2 - compiles for complex types only (COMPLEX/DOUBLE COMPLEX)
|
||||
# 3 - compiles for all types, but changes source names for complex by prepending z (e.g. axpy.c becomes zaxpy.c)
|
||||
# 4 - compiles for complex types only, but changes source names for complex by prepending z (e.g. hemv.c becomes zhemv.c)
|
||||
# STRING - compiles only the given type (e.g. DOUBLE)
|
||||
function(GenerateNamedObjects sources_in)
|
||||
|
||||
if (DEFINED ARGV1)
|
||||
set(defines_in ${ARGV1})
|
||||
endif ()
|
||||
|
||||
if (DEFINED ARGV2 AND NOT "${ARGV2}" STREQUAL "")
|
||||
set(name_in ${ARGV2})
|
||||
# strip off extension for kernel files that pass in the object name.
|
||||
get_filename_component(name_in ${name_in} NAME_WE)
|
||||
endif ()
|
||||
|
||||
if (DEFINED ARGV3)
|
||||
set(use_cblas ${ARGV3})
|
||||
else ()
|
||||
set(use_cblas false)
|
||||
endif ()
|
||||
|
||||
if (DEFINED ARGV4)
|
||||
set(replace_last_with ${ARGV4})
|
||||
endif ()
|
||||
|
||||
if (DEFINED ARGV5)
|
||||
set(append_with ${ARGV5})
|
||||
endif ()
|
||||
|
||||
if (DEFINED ARGV6)
|
||||
set(no_float_type ${ARGV6})
|
||||
else ()
|
||||
set(no_float_type false)
|
||||
endif ()
|
||||
|
||||
if (no_float_type)
|
||||
set(float_list "DUMMY") # still need to loop once
|
||||
else ()
|
||||
set(float_list "${FLOAT_TYPES}")
|
||||
endif ()
|
||||
|
||||
set(real_only false)
|
||||
set(complex_only false)
|
||||
set(mangle_complex_sources false)
|
||||
if (DEFINED ARGV7 AND NOT "${ARGV7}" STREQUAL "")
|
||||
if (${ARGV7} EQUAL 1)
|
||||
set(real_only true)
|
||||
elseif (${ARGV7} EQUAL 2)
|
||||
set(complex_only true)
|
||||
elseif (${ARGV7} EQUAL 3)
|
||||
set(mangle_complex_sources true)
|
||||
elseif (${ARGV7} EQUAL 4)
|
||||
set(mangle_complex_sources true)
|
||||
set(complex_only true)
|
||||
elseif (NOT ${ARGV7} EQUAL 0)
|
||||
set(float_list ${ARGV7})
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
if (complex_only)
|
||||
list(REMOVE_ITEM float_list "SINGLE")
|
||||
list(REMOVE_ITEM float_list "DOUBLE")
|
||||
elseif (real_only)
|
||||
list(REMOVE_ITEM float_list "COMPLEX")
|
||||
list(REMOVE_ITEM float_list "ZCOMPLEX")
|
||||
endif ()
|
||||
|
||||
set(float_char "")
|
||||
set(OBJ_LIST_OUT "")
|
||||
foreach (float_type ${float_list})
|
||||
foreach (source_file ${sources_in})
|
||||
|
||||
if (NOT no_float_type)
|
||||
string(SUBSTRING ${float_type} 0 1 float_char)
|
||||
string(TOLOWER ${float_char} float_char)
|
||||
endif ()
|
||||
|
||||
if (NOT name_in)
|
||||
get_filename_component(source_name ${source_file} NAME_WE)
|
||||
set(obj_name "${float_char}${source_name}")
|
||||
else ()
|
||||
# replace * with float_char
|
||||
if (${name_in} MATCHES "\\*")
|
||||
string(REPLACE "*" ${float_char} obj_name ${name_in})
|
||||
else ()
|
||||
set(obj_name "${float_char}${name_in}")
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
if (replace_last_with)
|
||||
string(REGEX REPLACE ".$" ${replace_last_with} obj_name ${obj_name})
|
||||
else ()
|
||||
set(obj_name "${obj_name}${append_with}")
|
||||
endif ()
|
||||
|
||||
# now add the object and set the defines
|
||||
set(obj_defines ${defines_in})
|
||||
|
||||
if (use_cblas)
|
||||
set(obj_name "cblas_${obj_name}")
|
||||
list(APPEND obj_defines "CBLAS")
|
||||
endif ()
|
||||
|
||||
list(APPEND obj_defines "ASMNAME=${FU}${obj_name};ASMFNAME=${FU}${obj_name}${BU};NAME=${obj_name}${BU};CNAME=${obj_name};CHAR_NAME=\"${obj_name}${BU}\";CHAR_CNAME=\"${obj_name}\"")
|
||||
if (${float_type} STREQUAL "DOUBLE" OR ${float_type} STREQUAL "ZCOMPLEX")
|
||||
list(APPEND obj_defines "DOUBLE")
|
||||
endif ()
|
||||
if (${float_type} STREQUAL "COMPLEX" OR ${float_type} STREQUAL "ZCOMPLEX")
|
||||
list(APPEND obj_defines "COMPLEX")
|
||||
if (mangle_complex_sources)
|
||||
# add a z to the filename
|
||||
get_filename_component(source_name ${source_file} NAME)
|
||||
get_filename_component(source_dir ${source_file} DIRECTORY)
|
||||
string(REPLACE ${source_name} "z${source_name}" source_file ${source_file})
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
if (VERBOSE_GEN)
|
||||
message(STATUS "${obj_name}:${source_file}")
|
||||
message(STATUS "${obj_defines}")
|
||||
endif ()
|
||||
|
||||
# create a copy of the source to avoid duplicate obj filename problem with ar.exe
|
||||
get_filename_component(source_extension ${source_file} EXT)
|
||||
set(new_source_file "${CMAKE_CURRENT_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/${obj_name}${source_extension}")
|
||||
if (IS_ABSOLUTE ${source_file})
|
||||
set(old_source_file ${source_file})
|
||||
else ()
|
||||
set(old_source_file "${CMAKE_CURRENT_LIST_DIR}/${source_file}")
|
||||
endif ()
|
||||
|
||||
string(REPLACE ";" "\n#define " define_source "${obj_defines}")
|
||||
string(REPLACE "=" " " define_source "${define_source}")
|
||||
file(WRITE ${new_source_file} "#define ${define_source}\n#include \"${old_source_file}\"")
|
||||
list(APPEND SRC_LIST_OUT ${new_source_file})
|
||||
|
||||
endforeach ()
|
||||
endforeach ()
|
||||
|
||||
list(APPEND OPENBLAS_SRC ${SRC_LIST_OUT})
|
||||
set(OPENBLAS_SRC ${OPENBLAS_SRC} PARENT_SCOPE)
|
||||
endfunction ()
|
||||
|
||||
# generates object files for each of the sources for each of the combinations of the preprocessor definitions passed in
|
||||
# @param sources_in the source files to build from
|
||||
# @param defines_in the preprocessor definitions that will be combined to create the object files
|
||||
# @param all_defines_in (optional) preprocessor definitions that will be applied to all objects
|
||||
# @param replace_scheme If 1, replace the "k" in the filename with the define combo letters. E.g. symm_k.c with TRANS and UNIT defined will be symm_TU.
|
||||
# If 0, it will simply append the code, e.g. symm_L.c with TRANS and UNIT will be symm_LTU.
|
||||
# If 2, it will append the code with an underscore, e.g. symm.c with TRANS and UNIT will be symm_TU.
|
||||
# If 3, it will insert the code *around* the last character with an underscore, e.g. symm_L.c with TRANS and UNIT will be symm_TLU (required by BLAS level2 objects).
|
||||
# If 4, it will insert the code before the last underscore. E.g. trtri_U_parallel with TRANS will be trtri_UT_parallel
|
||||
# @param alternate_name replaces the source name as the object name (define codes are still appended)
|
||||
# @param no_float_type turns off the float type define for this build (e.g. SINGLE/DOUBLE/etc)
|
||||
# @param complex_filename_scheme see GenerateNamedObjects
|
||||
function(GenerateCombinationObjects sources_in defines_in absent_codes_in all_defines_in replace_scheme)
|
||||
|
||||
set(alternate_name_in "")
|
||||
if (DEFINED ARGV5)
|
||||
set(alternate_name_in ${ARGV5})
|
||||
endif ()
|
||||
|
||||
set(no_float_type false)
|
||||
if (DEFINED ARGV6)
|
||||
set(no_float_type ${ARGV6})
|
||||
endif ()
|
||||
|
||||
set(complex_filename_scheme "")
|
||||
if (DEFINED ARGV7)
|
||||
set(complex_filename_scheme ${ARGV7})
|
||||
endif ()
|
||||
|
||||
AllCombinations("${defines_in}" "${absent_codes_in}")
|
||||
set(define_combos ${LIST_OUT})
|
||||
set(define_codes ${CODES_OUT})
|
||||
|
||||
list(LENGTH define_combos num_combos)
|
||||
math(EXPR num_combos "${num_combos} - 1")
|
||||
|
||||
foreach (c RANGE 0 ${num_combos})
|
||||
|
||||
list(GET define_combos ${c} define_combo)
|
||||
list(GET define_codes ${c} define_code)
|
||||
|
||||
foreach (source_file ${sources_in})
|
||||
|
||||
set(alternate_name ${alternate_name_in})
|
||||
|
||||
# replace colon separated list with semicolons, this turns it into a CMake list that we can use foreach with
|
||||
string(REPLACE ":" ";" define_combo ${define_combo})
|
||||
|
||||
# now add the object and set the defines
|
||||
set(cur_defines ${define_combo})
|
||||
if ("${cur_defines}" STREQUAL " ")
|
||||
set(cur_defines ${all_defines_in})
|
||||
else ()
|
||||
list(APPEND cur_defines ${all_defines_in})
|
||||
endif ()
|
||||
|
||||
set(replace_code "")
|
||||
set(append_code "")
|
||||
if (replace_scheme EQUAL 1)
|
||||
set(replace_code ${define_code})
|
||||
else ()
|
||||
if (replace_scheme EQUAL 2)
|
||||
set(append_code "_${define_code}")
|
||||
elseif (replace_scheme EQUAL 3)
|
||||
if ("${alternate_name}" STREQUAL "")
|
||||
string(REGEX MATCH "[a-zA-Z]\\." last_letter ${source_file})
|
||||
else ()
|
||||
string(REGEX MATCH "[a-zA-Z]$" last_letter ${alternate_name})
|
||||
endif ()
|
||||
# first extract the last letter
|
||||
string(SUBSTRING ${last_letter} 0 1 last_letter) # remove period from match
|
||||
# break the code up into the first letter and the remaining (should only be 2 anyway)
|
||||
string(SUBSTRING ${define_code} 0 1 define_code_first)
|
||||
string(SUBSTRING ${define_code} 1 -1 define_code_second)
|
||||
set(replace_code "${define_code_first}${last_letter}${define_code_second}")
|
||||
elseif (replace_scheme EQUAL 4)
|
||||
# insert code before the last underscore and pass that in as the alternate_name
|
||||
if ("${alternate_name}" STREQUAL "")
|
||||
get_filename_component(alternate_name ${source_file} NAME_WE)
|
||||
endif ()
|
||||
set(extra_underscore "")
|
||||
# check if filename has two underscores, insert another if not (e.g. getrs_parallel needs to become getrs_U_parallel not getrsU_parallel)
|
||||
string(REGEX MATCH "_[a-zA-Z]+_" underscores ${alternate_name})
|
||||
string(LENGTH "${underscores}" underscores)
|
||||
if (underscores EQUAL 0)
|
||||
set(extra_underscore "_")
|
||||
endif ()
|
||||
string(REGEX REPLACE "(.+)(_[^_]+)$" "\\1${extra_underscore}${define_code}\\2" alternate_name ${alternate_name})
|
||||
else()
|
||||
set(append_code ${define_code}) # replace_scheme should be 0
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
GenerateNamedObjects("${source_file}" "${cur_defines}" "${alternate_name}" false "${replace_code}" "${append_code}" "${no_float_type}" "${complex_filename_scheme}")
|
||||
endforeach ()
|
||||
endforeach ()
|
||||
|
||||
set(OPENBLAS_SRC ${OPENBLAS_SRC} PARENT_SCOPE)
|
||||
endfunction ()
|
||||
|
||||
158
common.h
158
common.h
@@ -82,17 +82,29 @@ extern "C" {
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#if !defined(_MSC_VER)
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
#include <time.h>
|
||||
|
||||
#ifdef OS_LINUX
|
||||
#include <malloc.h>
|
||||
#include <sched.h>
|
||||
#endif
|
||||
|
||||
#if defined(OS_DARWIN) || defined(OS_FREEBSD) || defined(OS_NETBSD)
|
||||
#if defined(OS_DARWIN) || defined(OS_FREEBSD) || defined(OS_NETBSD) || defined(OS_ANDROID)
|
||||
#include <sched.h>
|
||||
#endif
|
||||
|
||||
#ifdef OS_ANDROID
|
||||
#define NO_SYSV_IPC
|
||||
//Android NDK only supports complex.h since Android 5.0
|
||||
#if __ANDROID_API__ < 21
|
||||
#define FORCE_OPENBLAS_COMPLEX_STRUCT
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef OS_WINDOWS
|
||||
#ifdef ATOM
|
||||
#define GOTO_ATOM ATOM
|
||||
@@ -106,8 +118,11 @@ extern "C" {
|
||||
#endif
|
||||
#else
|
||||
#include <sys/mman.h>
|
||||
#ifndef NO_SYSV_IPC
|
||||
#include <sys/shm.h>
|
||||
#endif
|
||||
#include <sys/time.h>
|
||||
#include <time.h>
|
||||
#include <unistd.h>
|
||||
#include <math.h>
|
||||
#ifdef SMP
|
||||
@@ -287,13 +302,6 @@ typedef int blasint;
|
||||
#define COMPSIZE 2
|
||||
#endif
|
||||
|
||||
#if defined(C_PGI) || defined(C_SUN)
|
||||
#define CREAL(X) (*((FLOAT *)&X + 0))
|
||||
#define CIMAG(X) (*((FLOAT *)&X + 1))
|
||||
#else
|
||||
#define CREAL __real__
|
||||
#define CIMAG __imag__
|
||||
#endif
|
||||
|
||||
#define Address_H(x) (((x)+(1<<15))>>16)
|
||||
#define Address_L(x) ((x)-((Address_H(x))<<16))
|
||||
@@ -307,8 +315,12 @@ typedef int blasint;
|
||||
#endif
|
||||
|
||||
#if defined(OS_WINDOWS)
|
||||
#if defined(_MSC_VER) && !defined(__clang__)
|
||||
#define YIELDING YieldProcessor()
|
||||
#else
|
||||
#define YIELDING SwitchToThread()
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(ARMV7) || defined(ARMV6) || defined(ARMV8) || defined(ARMV5)
|
||||
#define YIELDING asm volatile ("nop;nop;nop;nop;nop;nop;nop;nop; \n");
|
||||
@@ -320,12 +332,20 @@ typedef int blasint;
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef POWER8
|
||||
#ifndef YIELDING
|
||||
#define YIELDING __asm__ __volatile__ ("nop;nop;nop;nop;nop;nop;nop;nop;\n");
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
||||
/*
|
||||
#ifdef PILEDRIVER
|
||||
#ifndef YIELDING
|
||||
#define YIELDING __asm__ __volatile__ ("nop;nop;nop;nop;nop;nop;nop;nop;\n");
|
||||
#endif
|
||||
#endif
|
||||
*/
|
||||
|
||||
/*
|
||||
#ifdef STEAMROLLER
|
||||
@@ -384,6 +404,10 @@ please https://github.com/xianyi/OpenBLAS/issues/246
|
||||
#include "common_sparc.h"
|
||||
#endif
|
||||
|
||||
#ifdef ARCH_MIPS
|
||||
#include "common_mips.h"
|
||||
#endif
|
||||
|
||||
#ifdef ARCH_MIPS64
|
||||
#include "common_mips64.h"
|
||||
#endif
|
||||
@@ -396,16 +420,69 @@ please https://github.com/xianyi/OpenBLAS/issues/246
|
||||
#include "common_arm64.h"
|
||||
#endif
|
||||
|
||||
#ifdef ARCH_ZARCH
|
||||
#include "common_zarch.h"
|
||||
#endif
|
||||
|
||||
#ifndef ASSEMBLER
|
||||
#ifdef OS_WINDOWSSTORE
|
||||
typedef char env_var_t[MAX_PATH];
|
||||
#define readenv(p, n) 0
|
||||
#else
|
||||
#ifdef OS_WINDOWS
|
||||
typedef char env_var_t[MAX_PATH];
|
||||
#define readenv(p, n) GetEnvironmentVariable((n), (p), sizeof(p))
|
||||
#define readenv(p, n) GetEnvironmentVariable((LPCTSTR)(n), (LPTSTR)(p), sizeof(p))
|
||||
#else
|
||||
typedef char* env_var_t;
|
||||
#define readenv(p, n) ((p)=getenv(n))
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if !defined(RPCC_DEFINED) && !defined(OS_WINDOWS)
|
||||
#ifdef _POSIX_MONOTONIC_CLOCK
|
||||
#if defined(__GLIBC_PREREQ) // cut the if condition if two lines, otherwise will fail at __GLIBC_PREREQ(2, 17)
|
||||
#if __GLIBC_PREREQ(2, 17) // don't require -lrt
|
||||
#define USE_MONOTONIC
|
||||
#endif
|
||||
#elif defined(OS_ANDROID)
|
||||
#define USE_MONOTONIC
|
||||
#endif
|
||||
#endif
|
||||
/* use similar scale as x86 rdtsc for timeouts to work correctly */
|
||||
static inline unsigned long long rpcc(void){
|
||||
#ifdef USE_MONOTONIC
|
||||
struct timespec ts;
|
||||
clock_gettime(CLOCK_MONOTONIC, &ts);
|
||||
return (unsigned long long)ts.tv_sec * 1000000000ull + ts.tv_nsec;
|
||||
#else
|
||||
struct timeval tv;
|
||||
gettimeofday(&tv,NULL);
|
||||
return (unsigned long long)tv.tv_sec * 1000000000ull + tv.tv_usec * 1000;
|
||||
#endif
|
||||
}
|
||||
#define RPCC_DEFINED
|
||||
#define RPCC64BIT
|
||||
#endif // !RPCC_DEFINED
|
||||
|
||||
#if !defined(BLAS_LOCK_DEFINED) && defined(__GNUC__)
|
||||
static void __inline blas_lock(volatile BLASULONG *address){
|
||||
|
||||
do {
|
||||
while (*address) {YIELDING;};
|
||||
|
||||
} while (!__sync_bool_compare_and_swap(address, 0, 1));
|
||||
}
|
||||
#define BLAS_LOCK_DEFINED
|
||||
#endif
|
||||
|
||||
#ifndef RPCC_DEFINED
|
||||
#error "rpcc() implementation is missing for your platform"
|
||||
#endif
|
||||
#ifndef BLAS_LOCK_DEFINED
|
||||
#error "blas_lock() implementation is missing for your platform"
|
||||
#endif
|
||||
#endif // !ASSEMBLER
|
||||
|
||||
#ifdef OS_LINUX
|
||||
#include "common_linux.h"
|
||||
#endif
|
||||
@@ -450,18 +527,57 @@ typedef char* env_var_t;
|
||||
/* C99 supports complex floating numbers natively, which GCC also offers as an
|
||||
extension since version 3.0. If neither are available, use a compatible
|
||||
structure as fallback (see Clause 6.2.5.13 of the C99 standard). */
|
||||
#if (defined(__STDC_IEC_559_COMPLEX__) || __STDC_VERSION__ >= 199901L || \
|
||||
(__GNUC__ >= 3 && !defined(__cplusplus)))
|
||||
#if ((defined(__STDC_IEC_559_COMPLEX__) || __STDC_VERSION__ >= 199901L || \
|
||||
(__GNUC__ >= 3 && !defined(__cplusplus))) && !(defined(FORCE_OPENBLAS_COMPLEX_STRUCT)))
|
||||
#define OPENBLAS_COMPLEX_C99
|
||||
#ifndef __cplusplus
|
||||
#include <complex.h>
|
||||
#endif
|
||||
typedef float _Complex openblas_complex_float;
|
||||
typedef double _Complex openblas_complex_double;
|
||||
typedef xdouble _Complex openblas_complex_xdouble;
|
||||
#define openblas_make_complex_float(real, imag) ((real) + ((imag) * _Complex_I))
|
||||
#define openblas_make_complex_double(real, imag) ((real) + ((imag) * _Complex_I))
|
||||
#define openblas_make_complex_xdouble(real, imag) ((real) + ((imag) * _Complex_I))
|
||||
#else
|
||||
#define OPENBLAS_COMPLEX_STRUCT
|
||||
typedef struct { float real, imag; } openblas_complex_float;
|
||||
typedef struct { double real, imag; } openblas_complex_double;
|
||||
typedef struct { xdouble real, imag; } openblas_complex_xdouble;
|
||||
#define openblas_make_complex_float(real, imag) {(real), (imag)}
|
||||
#define openblas_make_complex_double(real, imag) {(real), (imag)}
|
||||
#define openblas_make_complex_xdouble(real, imag) {(real), (imag)}
|
||||
#endif
|
||||
|
||||
#ifdef XDOUBLE
|
||||
#define OPENBLAS_COMPLEX_FLOAT openblas_complex_xdouble
|
||||
#define OPENBLAS_MAKE_COMPLEX_FLOAT(r,i) openblas_make_complex_xdouble(r,i)
|
||||
#elif defined(DOUBLE)
|
||||
#define OPENBLAS_COMPLEX_FLOAT openblas_complex_double
|
||||
#define OPENBLAS_MAKE_COMPLEX_FLOAT(r,i) openblas_make_complex_double(r,i)
|
||||
#else
|
||||
#define OPENBLAS_COMPLEX_FLOAT openblas_complex_float
|
||||
#define OPENBLAS_MAKE_COMPLEX_FLOAT(r,i) openblas_make_complex_float(r,i)
|
||||
#endif
|
||||
|
||||
#if defined(C_PGI) || defined(C_SUN)
|
||||
#if defined(__STDC_IEC_559_COMPLEX__)
|
||||
#define CREAL(X) creal(X)
|
||||
#define CIMAG(X) cimag(X)
|
||||
#else
|
||||
#define CREAL(X) (*((FLOAT *)&X + 0))
|
||||
#define CIMAG(X) (*((FLOAT *)&X + 1))
|
||||
#endif
|
||||
#else
|
||||
#ifdef OPENBLAS_COMPLEX_STRUCT
|
||||
#define CREAL(Z) ((Z).real)
|
||||
#define CIMAG(Z) ((Z).imag)
|
||||
#else
|
||||
#define CREAL __real__
|
||||
#define CIMAG __imag__
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#endif // ASSEMBLER
|
||||
|
||||
#ifndef IFLUSH
|
||||
@@ -478,6 +594,10 @@ typedef char* env_var_t;
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(C_MSVC)
|
||||
#define inline __inline
|
||||
#endif
|
||||
|
||||
#ifndef ASSEMBLER
|
||||
|
||||
#ifndef MIN
|
||||
@@ -499,6 +619,8 @@ void blas_set_parameter(void);
|
||||
int blas_get_cpu_number(void);
|
||||
void *blas_memory_alloc (int);
|
||||
void blas_memory_free (void *);
|
||||
void *blas_memory_alloc_nolock (int); //use malloc without blas_lock
|
||||
void blas_memory_free_nolock (void *);
|
||||
|
||||
int get_num_procs (void);
|
||||
|
||||
@@ -518,9 +640,14 @@ void gotoblas_profile_init(void);
|
||||
void gotoblas_profile_quit(void);
|
||||
|
||||
#ifdef USE_OPENMP
|
||||
#ifndef C_MSVC
|
||||
int omp_in_parallel(void);
|
||||
int omp_get_num_procs(void);
|
||||
#else
|
||||
__declspec(dllimport) int __cdecl omp_in_parallel(void);
|
||||
__declspec(dllimport) int __cdecl omp_get_num_procs(void);
|
||||
#endif
|
||||
#else
|
||||
#ifdef __ELF__
|
||||
int omp_in_parallel (void) __attribute__ ((weak));
|
||||
int omp_get_num_procs(void) __attribute__ ((weak));
|
||||
@@ -532,7 +659,11 @@ static __inline void blas_unlock(volatile BLASULONG *address){
|
||||
*address = 0;
|
||||
}
|
||||
|
||||
|
||||
#ifdef OS_WINDOWSSTORE
|
||||
static __inline int readenv_atoi(char *env) {
|
||||
return 0;
|
||||
}
|
||||
#else
|
||||
#ifdef OS_WINDOWS
|
||||
static __inline int readenv_atoi(char *env) {
|
||||
env_var_t p;
|
||||
@@ -547,7 +678,7 @@ static __inline int readenv_atoi(char *env) {
|
||||
return(0);
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
#if !defined(XDOUBLE) || !defined(QUAD_PRECISION)
|
||||
|
||||
@@ -631,6 +762,7 @@ typedef struct {
|
||||
#endif
|
||||
|
||||
#ifndef ASSEMBLER
|
||||
#include "common_stackalloc.h"
|
||||
#if 0
|
||||
#include "symcopy.h"
|
||||
#endif
|
||||
|
||||
@@ -76,6 +76,7 @@ static void __inline blas_lock(unsigned long *address){
|
||||
"30:", address);
|
||||
#endif
|
||||
}
|
||||
#define BLAS_LOCK_DEFINED
|
||||
|
||||
static __inline unsigned int rpcc(void){
|
||||
|
||||
@@ -89,6 +90,7 @@ static __inline unsigned int rpcc(void){
|
||||
|
||||
return r0;
|
||||
}
|
||||
#define RPCC_DEFINED
|
||||
|
||||
|
||||
#define HALT ldq $0, 0($0)
|
||||
|
||||
90
common_arm.h
90
common_arm.h
@@ -1,5 +1,5 @@
|
||||
/*****************************************************************************
|
||||
Copyright (c) 2011-2014, The OpenBLAS Project
|
||||
Copyright (c) 2011-2015, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
@@ -30,56 +30,29 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
**********************************************************************************/
|
||||
|
||||
/*********************************************************************/
|
||||
/* Copyright 2009, 2010 The University of Texas at Austin. */
|
||||
/* All rights reserved. */
|
||||
/* */
|
||||
/* Redistribution and use in source and binary forms, with or */
|
||||
/* without modification, are permitted provided that the following */
|
||||
/* conditions are met: */
|
||||
/* */
|
||||
/* 1. Redistributions of source code must retain the above */
|
||||
/* copyright notice, this list of conditions and the following */
|
||||
/* disclaimer. */
|
||||
/* */
|
||||
/* 2. Redistributions in binary form must reproduce the above */
|
||||
/* copyright notice, this list of conditions and the following */
|
||||
/* disclaimer in the documentation and/or other materials */
|
||||
/* provided with the distribution. */
|
||||
/* */
|
||||
/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
|
||||
/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
|
||||
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
|
||||
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
|
||||
/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
|
||||
/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
|
||||
/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
|
||||
/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
|
||||
/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
|
||||
/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
|
||||
/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
|
||||
/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
|
||||
/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
|
||||
/* POSSIBILITY OF SUCH DAMAGE. */
|
||||
/* */
|
||||
/* The views and conclusions contained in the software and */
|
||||
/* documentation are those of the authors and should not be */
|
||||
/* interpreted as representing official policies, either expressed */
|
||||
/* or implied, of The University of Texas at Austin. */
|
||||
/*********************************************************************/
|
||||
|
||||
#ifndef COMMON_ARM
|
||||
#define COMMON_ARM
|
||||
|
||||
#if defined(ARMV5) || defined(ARMV6)
|
||||
|
||||
#define MB
|
||||
#define WMB
|
||||
|
||||
#else
|
||||
|
||||
#define MB __asm__ __volatile__ ("dmb ish" : : : "memory")
|
||||
#define WMB __asm__ __volatile__ ("dmb ishst" : : : "memory")
|
||||
|
||||
#endif
|
||||
|
||||
#define INLINE inline
|
||||
|
||||
#define RETURN_BY_COMPLEX
|
||||
|
||||
#ifndef ASSEMBLER
|
||||
|
||||
#if defined(ARMV6) || defined(ARMV7) || defined(ARMV8)
|
||||
|
||||
static void __inline blas_lock(volatile BLASULONG *address){
|
||||
|
||||
int register ret;
|
||||
@@ -88,37 +61,29 @@ static void __inline blas_lock(volatile BLASULONG *address){
|
||||
while (*address) {YIELDING;};
|
||||
|
||||
__asm__ __volatile__(
|
||||
"ldrex r2, [%1] \n\t"
|
||||
"mov r2, #0 \n\t"
|
||||
"strex r3, r2, [%1] \n\t"
|
||||
"mov %0 , r3 \n\t"
|
||||
: "=r"(ret), "=r"(address)
|
||||
: "1"(address)
|
||||
: "memory", "r2" , "r3"
|
||||
|
||||
|
||||
"ldrex r2, [%1] \n\t"
|
||||
"strex %0, %2, [%1] \n\t"
|
||||
"orr %0, r2 \n\t"
|
||||
: "=&r"(ret)
|
||||
: "r"(address), "r"(1)
|
||||
: "memory", "r2"
|
||||
);
|
||||
|
||||
} while (ret);
|
||||
|
||||
MB;
|
||||
}
|
||||
|
||||
|
||||
static inline unsigned long long rpcc(void){
|
||||
unsigned long long ret=0;
|
||||
double v;
|
||||
struct timeval tv;
|
||||
gettimeofday(&tv,NULL);
|
||||
v=(double) tv.tv_sec + (double) tv.tv_usec * 1e-6;
|
||||
ret = (unsigned long long) ( v * 1000.0d );
|
||||
return ret;
|
||||
}
|
||||
#define BLAS_LOCK_DEFINED
|
||||
#endif
|
||||
|
||||
static inline int blas_quickdivide(blasint x, blasint y){
|
||||
return x / y;
|
||||
}
|
||||
|
||||
#if defined(DOUBLE)
|
||||
#if !defined(HAVE_VFP)
|
||||
/* no FPU, soft float */
|
||||
#define GET_IMAGE(res)
|
||||
#elif defined(DOUBLE)
|
||||
#define GET_IMAGE(res) __asm__ __volatile__("vstr.f64 d1, %0" : "=m"(res) : : "memory")
|
||||
#else
|
||||
#define GET_IMAGE(res) __asm__ __volatile__("vstr.f32 s1, %0" : "=m"(res) : : "memory")
|
||||
@@ -140,7 +105,6 @@ static inline int blas_quickdivide(blasint x, blasint y){
|
||||
#define PROLOGUE \
|
||||
.arm ;\
|
||||
.global REALNAME ;\
|
||||
.func REALNAME ;\
|
||||
REALNAME:
|
||||
|
||||
#define EPILOGUE
|
||||
@@ -166,4 +130,8 @@ REALNAME:
|
||||
#define MAP_ANONYMOUS MAP_ANON
|
||||
#endif
|
||||
|
||||
#if !defined(ARMV5) && !defined(ARMV6) && !defined(ARMV7) && !defined(ARMV8)
|
||||
#error "you must define ARMV5, ARMV6, ARMV7 or ARMV8"
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*****************************************************************************
|
||||
Copyright (c) 2011-2014, The OpenBLAS Project
|
||||
Copyright (c) 2011-2015, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
@@ -30,89 +30,55 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
**********************************************************************************/
|
||||
|
||||
/*********************************************************************/
|
||||
/* Copyright 2009, 2010 The University of Texas at Austin. */
|
||||
/* All rights reserved. */
|
||||
/* */
|
||||
/* Redistribution and use in source and binary forms, with or */
|
||||
/* without modification, are permitted provided that the following */
|
||||
/* conditions are met: */
|
||||
/* */
|
||||
/* 1. Redistributions of source code must retain the above */
|
||||
/* copyright notice, this list of conditions and the following */
|
||||
/* disclaimer. */
|
||||
/* */
|
||||
/* 2. Redistributions in binary form must reproduce the above */
|
||||
/* copyright notice, this list of conditions and the following */
|
||||
/* disclaimer in the documentation and/or other materials */
|
||||
/* provided with the distribution. */
|
||||
/* */
|
||||
/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
|
||||
/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
|
||||
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
|
||||
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
|
||||
/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
|
||||
/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
|
||||
/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
|
||||
/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
|
||||
/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
|
||||
/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
|
||||
/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
|
||||
/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
|
||||
/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
|
||||
/* POSSIBILITY OF SUCH DAMAGE. */
|
||||
/* */
|
||||
/* The views and conclusions contained in the software and */
|
||||
/* documentation are those of the authors and should not be */
|
||||
/* interpreted as representing official policies, either expressed */
|
||||
/* or implied, of The University of Texas at Austin. */
|
||||
/*********************************************************************/
|
||||
|
||||
#ifndef COMMON_ARM64
|
||||
#define COMMON_ARM64
|
||||
|
||||
#define MB
|
||||
#define WMB
|
||||
#define MB __asm__ __volatile__ ("dmb ish" : : : "memory")
|
||||
#define WMB __asm__ __volatile__ ("dmb ishst" : : : "memory")
|
||||
|
||||
|
||||
#define INLINE inline
|
||||
|
||||
#ifdef F_INTERFACE_FLANG
|
||||
#define RETURN_BY_STACK
|
||||
#else
|
||||
#define RETURN_BY_COMPLEX
|
||||
#endif
|
||||
|
||||
#ifndef ASSEMBLER
|
||||
|
||||
|
||||
static void __inline blas_lock(volatile BLASULONG *address){
|
||||
/*
|
||||
int register ret;
|
||||
|
||||
BLASULONG ret;
|
||||
|
||||
do {
|
||||
while (*address) {YIELDING;};
|
||||
|
||||
__asm__ __volatile__(
|
||||
"ldrex r2, [%1] \n\t"
|
||||
"mov r2, #0 \n\t"
|
||||
"strex r3, r2, [%1] \n\t"
|
||||
"mov %0 , r3 \n\t"
|
||||
"mov x4, #1 \n\t"
|
||||
"1: \n\t"
|
||||
"ldaxr x2, [%1] \n\t"
|
||||
"cbnz x2, 1b \n\t"
|
||||
"2: \n\t"
|
||||
"stxr w3, x4, [%1] \n\t"
|
||||
"cbnz w3, 1b \n\t"
|
||||
"mov %0, #0 \n\t"
|
||||
: "=r"(ret), "=r"(address)
|
||||
: "1"(address)
|
||||
: "memory", "r2" , "r3"
|
||||
: "memory", "x2" , "x3", "x4"
|
||||
|
||||
|
||||
);
|
||||
|
||||
|
||||
} while (ret);
|
||||
*/
|
||||
|
||||
}
|
||||
|
||||
#define BLAS_LOCK_DEFINED
|
||||
|
||||
|
||||
static inline unsigned long long rpcc(void){
|
||||
unsigned long long ret=0;
|
||||
double v;
|
||||
struct timeval tv;
|
||||
gettimeofday(&tv,NULL);
|
||||
v=(double) tv.tv_sec + (double) tv.tv_usec * 1e-6;
|
||||
ret = (unsigned long long) ( v * 1000.0d );
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline int blas_quickdivide(blasint x, blasint y){
|
||||
return x / y;
|
||||
@@ -138,8 +104,10 @@ static inline int blas_quickdivide(blasint x, blasint y){
|
||||
#if defined(ASSEMBLER) && !defined(NEEDPARAM)
|
||||
|
||||
#define PROLOGUE \
|
||||
.text ;\
|
||||
.align 4 ;\
|
||||
.global REALNAME ;\
|
||||
.func REALNAME ;\
|
||||
.type REALNAME, %function ;\
|
||||
REALNAME:
|
||||
|
||||
#define EPILOGUE
|
||||
@@ -156,7 +124,11 @@ REALNAME:
|
||||
#endif
|
||||
#define HUGE_PAGESIZE ( 4 << 20)
|
||||
|
||||
#if defined(CORTEXA57)
|
||||
#define BUFFER_SIZE (20 << 20)
|
||||
#else
|
||||
#define BUFFER_SIZE (16 << 20)
|
||||
#endif
|
||||
|
||||
|
||||
#define BASE_ADDRESS (START_ADDRESS - BUFFER_SIZE * MAX_CPU_NUMBER)
|
||||
@@ -166,3 +138,4 @@ REALNAME:
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
19
common_c.h
19
common_c.h
@@ -220,6 +220,15 @@
|
||||
#define COMATCOPY_K_CTC comatcopy_k_ctc
|
||||
#define COMATCOPY_K_RTC comatcopy_k_rtc
|
||||
|
||||
#define CIMATCOPY_K_CN cimatcopy_k_cn
|
||||
#define CIMATCOPY_K_RN cimatcopy_k_rn
|
||||
#define CIMATCOPY_K_CT cimatcopy_k_ct
|
||||
#define CIMATCOPY_K_RT cimatcopy_k_rt
|
||||
#define CIMATCOPY_K_CNC cimatcopy_k_cnc
|
||||
#define CIMATCOPY_K_RNC cimatcopy_k_rnc
|
||||
#define CIMATCOPY_K_CTC cimatcopy_k_ctc
|
||||
#define CIMATCOPY_K_RTC cimatcopy_k_rtc
|
||||
|
||||
#define CGEADD_K cgeadd_k
|
||||
|
||||
#else
|
||||
@@ -403,6 +412,16 @@
|
||||
#define COMATCOPY_K_RNC gotoblas -> comatcopy_k_rnc
|
||||
#define COMATCOPY_K_CTC gotoblas -> comatcopy_k_ctc
|
||||
#define COMATCOPY_K_RTC gotoblas -> comatcopy_k_rtc
|
||||
|
||||
#define CIMATCOPY_K_CN gotoblas -> cimatcopy_k_cn
|
||||
#define CIMATCOPY_K_RN gotoblas -> cimatcopy_k_rn
|
||||
#define CIMATCOPY_K_CT gotoblas -> cimatcopy_k_ct
|
||||
#define CIMATCOPY_K_RT gotoblas -> cimatcopy_k_rt
|
||||
#define CIMATCOPY_K_CNC gotoblas -> cimatcopy_k_cnc
|
||||
#define CIMATCOPY_K_RNC gotoblas -> cimatcopy_k_rnc
|
||||
#define CIMATCOPY_K_CTC gotoblas -> cimatcopy_k_ctc
|
||||
#define CIMATCOPY_K_RTC gotoblas -> cimatcopy_k_rtc
|
||||
|
||||
#define CGEADD_K gotoblas -> cgeadd_k
|
||||
|
||||
#endif
|
||||
|
||||
@@ -149,6 +149,11 @@
|
||||
#define DOMATCOPY_K_RN domatcopy_k_rn
|
||||
#define DOMATCOPY_K_CT domatcopy_k_ct
|
||||
#define DOMATCOPY_K_RT domatcopy_k_rt
|
||||
|
||||
#define DIMATCOPY_K_CN dimatcopy_k_cn
|
||||
#define DIMATCOPY_K_RN dimatcopy_k_rn
|
||||
#define DIMATCOPY_K_CT dimatcopy_k_ct
|
||||
#define DIMATCOPY_K_RT dimatcopy_k_rt
|
||||
#define DGEADD_K dgeadd_k
|
||||
|
||||
#else
|
||||
@@ -267,6 +272,10 @@
|
||||
#define DOMATCOPY_K_RN gotoblas -> domatcopy_k_rn
|
||||
#define DOMATCOPY_K_CT gotoblas -> domatcopy_k_ct
|
||||
#define DOMATCOPY_K_RT gotoblas -> domatcopy_k_rt
|
||||
#define DIMATCOPY_K_CN gotoblas -> dimatcopy_k_cn
|
||||
#define DIMATCOPY_K_RN gotoblas -> dimatcopy_k_rn
|
||||
#define DIMATCOPY_K_CT gotoblas -> dimatcopy_k_ct
|
||||
#define DIMATCOPY_K_RT gotoblas -> dimatcopy_k_rt
|
||||
|
||||
#define DGEADD_K gotoblas -> dgeadd_k
|
||||
|
||||
|
||||
@@ -68,6 +68,7 @@ static __inline void blas_lock(volatile unsigned long *address){
|
||||
: "ar.ccv", "memory");
|
||||
} while (ret);
|
||||
}
|
||||
#define BLAS_LOCK_DEFINED
|
||||
|
||||
static __inline unsigned long rpcc(void) {
|
||||
unsigned long clocks;
|
||||
@@ -75,6 +76,7 @@ static __inline unsigned long rpcc(void) {
|
||||
__asm__ __volatile__ ("mov %0=ar.itc" : "=r"(clocks));
|
||||
return clocks;
|
||||
}
|
||||
#define RPCC_DEFINED
|
||||
|
||||
|
||||
static __inline unsigned long stmxcsr(void){
|
||||
@@ -99,10 +101,12 @@ static __inline void blas_lock(volatile unsigned long *address){
|
||||
while (*address || _InterlockedCompareExchange((volatile int *) address,1,0))
|
||||
;
|
||||
}
|
||||
#define BLAS_LOCK_DEFINED
|
||||
|
||||
static __inline unsigned int rpcc(void) {
|
||||
return __getReg(_IA64_REG_AR_ITC);
|
||||
}
|
||||
#define RPCC_DEFINED
|
||||
|
||||
static __inline unsigned int stmxcsr(void) {
|
||||
return __getReg(_IA64_REG_AR_FPSR);
|
||||
|
||||
@@ -47,12 +47,12 @@ double dsdot_k(BLASLONG, float *, BLASLONG, float *, BLASLONG);
|
||||
double ddot_k(BLASLONG, double *, BLASLONG, double *, BLASLONG);
|
||||
xdouble qdot_k(BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG);
|
||||
|
||||
float _Complex cdotc_k (BLASLONG, float *, BLASLONG, float *, BLASLONG);
|
||||
float _Complex cdotu_k (BLASLONG, float *, BLASLONG, float *, BLASLONG);
|
||||
double _Complex zdotc_k (BLASLONG, double *, BLASLONG, double *, BLASLONG);
|
||||
double _Complex zdotu_k (BLASLONG, double *, BLASLONG, double *, BLASLONG);
|
||||
xdouble _Complex xdotc_k (BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG);
|
||||
xdouble _Complex xdotu_k (BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG);
|
||||
openblas_complex_float cdotc_k (BLASLONG, float *, BLASLONG, float *, BLASLONG);
|
||||
openblas_complex_float cdotu_k (BLASLONG, float *, BLASLONG, float *, BLASLONG);
|
||||
openblas_complex_double zdotc_k (BLASLONG, double *, BLASLONG, double *, BLASLONG);
|
||||
openblas_complex_double zdotu_k (BLASLONG, double *, BLASLONG, double *, BLASLONG);
|
||||
openblas_complex_xdouble xdotc_k (BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG);
|
||||
openblas_complex_xdouble xdotu_k (BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG);
|
||||
|
||||
int saxpy_k (BLASLONG, BLASLONG, BLASLONG, float,
|
||||
float *, BLASLONG, float *, BLASLONG, float *, BLASLONG);
|
||||
|
||||
@@ -1736,31 +1736,55 @@ int somatcopy_k_cn(BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLAS
|
||||
int somatcopy_k_rn(BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG);
|
||||
int somatcopy_k_ct(BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG);
|
||||
int somatcopy_k_rt(BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG);
|
||||
int simatcopy_k_cn(BLASLONG, BLASLONG, float, float *, BLASLONG);
|
||||
int simatcopy_k_rn(BLASLONG, BLASLONG, float, float *, BLASLONG);
|
||||
int simatcopy_k_ct(BLASLONG, BLASLONG, float, float *, BLASLONG);
|
||||
int simatcopy_k_rt(BLASLONG, BLASLONG, float, float *, BLASLONG);
|
||||
|
||||
int domatcopy_k_cn(BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG);
|
||||
int domatcopy_k_rn(BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG);
|
||||
int domatcopy_k_ct(BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG);
|
||||
int domatcopy_k_rt(BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG);
|
||||
int dimatcopy_k_cn(BLASLONG, BLASLONG, double, double *, BLASLONG);
|
||||
int dimatcopy_k_rn(BLASLONG, BLASLONG, double, double *, BLASLONG);
|
||||
int dimatcopy_k_ct(BLASLONG, BLASLONG, double, double *, BLASLONG);
|
||||
int dimatcopy_k_rt(BLASLONG, BLASLONG, double, double *, BLASLONG);
|
||||
|
||||
int comatcopy_k_cn(BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG);
|
||||
int comatcopy_k_rn(BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG);
|
||||
int comatcopy_k_ct(BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG);
|
||||
int comatcopy_k_rt(BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG);
|
||||
int cimatcopy_k_cn(BLASLONG, BLASLONG, float, float, float *, BLASLONG);
|
||||
int cimatcopy_k_rn(BLASLONG, BLASLONG, float, float, float *, BLASLONG);
|
||||
int cimatcopy_k_ct(BLASLONG, BLASLONG, float, float, float *, BLASLONG);
|
||||
int cimatcopy_k_rt(BLASLONG, BLASLONG, float, float, float *, BLASLONG);
|
||||
|
||||
int comatcopy_k_cnc(BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG);
|
||||
int comatcopy_k_rnc(BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG);
|
||||
int comatcopy_k_ctc(BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG);
|
||||
int comatcopy_k_rtc(BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG);
|
||||
int cimatcopy_k_cnc(BLASLONG, BLASLONG, float, float, float *, BLASLONG);
|
||||
int cimatcopy_k_rnc(BLASLONG, BLASLONG, float, float, float *, BLASLONG);
|
||||
int cimatcopy_k_ctc(BLASLONG, BLASLONG, float, float, float *, BLASLONG);
|
||||
int cimatcopy_k_rtc(BLASLONG, BLASLONG, float, float, float *, BLASLONG);
|
||||
|
||||
int zomatcopy_k_cn(BLASLONG, BLASLONG, double, double, double *, BLASLONG, double *, BLASLONG);
|
||||
int zomatcopy_k_rn(BLASLONG, BLASLONG, double, double, double *, BLASLONG, double *, BLASLONG);
|
||||
int zomatcopy_k_ct(BLASLONG, BLASLONG, double, double, double *, BLASLONG, double *, BLASLONG);
|
||||
int zomatcopy_k_rt(BLASLONG, BLASLONG, double, double, double *, BLASLONG, double *, BLASLONG);
|
||||
int zimatcopy_k_cn(BLASLONG, BLASLONG, double, double, double *, BLASLONG);
|
||||
int zimatcopy_k_rn(BLASLONG, BLASLONG, double, double, double *, BLASLONG);
|
||||
int zimatcopy_k_ct(BLASLONG, BLASLONG, double, double, double *, BLASLONG);
|
||||
int zimatcopy_k_rt(BLASLONG, BLASLONG, double, double, double *, BLASLONG);
|
||||
|
||||
int zomatcopy_k_cnc(BLASLONG, BLASLONG, double, double, double *, BLASLONG, double *, BLASLONG);
|
||||
int zomatcopy_k_rnc(BLASLONG, BLASLONG, double, double, double *, BLASLONG, double *, BLASLONG);
|
||||
int zomatcopy_k_ctc(BLASLONG, BLASLONG, double, double, double *, BLASLONG, double *, BLASLONG);
|
||||
int zomatcopy_k_rtc(BLASLONG, BLASLONG, double, double, double *, BLASLONG, double *, BLASLONG);
|
||||
int zimatcopy_k_cnc(BLASLONG, BLASLONG, double, double, double *, BLASLONG);
|
||||
int zimatcopy_k_rnc(BLASLONG, BLASLONG, double, double, double *, BLASLONG);
|
||||
int zimatcopy_k_ctc(BLASLONG, BLASLONG, double, double, double *, BLASLONG);
|
||||
int zimatcopy_k_rtc(BLASLONG, BLASLONG, double, double, double *, BLASLONG);
|
||||
|
||||
int sgeadd_k(BLASLONG, BLASLONG, float, float*, BLASLONG, float, float *, BLASLONG);
|
||||
int dgeadd_k(BLASLONG, BLASLONG, double, double*, BLASLONG, double, double *, BLASLONG);
|
||||
|
||||
@@ -70,7 +70,7 @@ extern long int syscall (long int __sysno, ...);
|
||||
static inline int my_mbind(void *addr, unsigned long len, int mode,
|
||||
unsigned long *nodemask, unsigned long maxnode,
|
||||
unsigned flags) {
|
||||
#if defined (__LSB_VERSION__)
|
||||
#if defined (__LSB_VERSION__) || defined(ARCH_ZARCH)
|
||||
// So far, LSB (Linux Standard Base) don't support syscall().
|
||||
// https://lsbbugs.linuxfoundation.org/show_bug.cgi?id=3482
|
||||
return 0;
|
||||
@@ -90,7 +90,7 @@ static inline int my_mbind(void *addr, unsigned long len, int mode,
|
||||
}
|
||||
|
||||
static inline int my_set_mempolicy(int mode, const unsigned long *addr, unsigned long flag) {
|
||||
#if defined (__LSB_VERSION__)
|
||||
#if defined (__LSB_VERSION__) || defined(ARCH_ZARCH)
|
||||
// So far, LSB (Linux Standard Base) don't support syscall().
|
||||
// https://lsbbugs.linuxfoundation.org/show_bug.cgi?id=3482
|
||||
return 0;
|
||||
|
||||
@@ -634,6 +634,11 @@
|
||||
#define OMATCOPY_K_RN DOMATCOPY_K_RN
|
||||
#define OMATCOPY_K_CT DOMATCOPY_K_CT
|
||||
#define OMATCOPY_K_RT DOMATCOPY_K_RT
|
||||
#define IMATCOPY_K_CN DIMATCOPY_K_CN
|
||||
#define IMATCOPY_K_RN DIMATCOPY_K_RN
|
||||
#define IMATCOPY_K_CT DIMATCOPY_K_CT
|
||||
#define IMATCOPY_K_RT DIMATCOPY_K_RT
|
||||
|
||||
#define GEADD_K DGEADD_K
|
||||
#else
|
||||
|
||||
@@ -931,6 +936,10 @@
|
||||
#define OMATCOPY_K_RN SOMATCOPY_K_RN
|
||||
#define OMATCOPY_K_CT SOMATCOPY_K_CT
|
||||
#define OMATCOPY_K_RT SOMATCOPY_K_RT
|
||||
#define IMATCOPY_K_CN SIMATCOPY_K_CN
|
||||
#define IMATCOPY_K_RN SIMATCOPY_K_RN
|
||||
#define IMATCOPY_K_CT SIMATCOPY_K_CT
|
||||
#define IMATCOPY_K_RT SIMATCOPY_K_RT
|
||||
|
||||
#define GEADD_K SGEADD_K
|
||||
#endif
|
||||
@@ -1747,6 +1756,15 @@
|
||||
#define OMATCOPY_K_RNC ZOMATCOPY_K_RNC
|
||||
#define OMATCOPY_K_CTC ZOMATCOPY_K_CTC
|
||||
#define OMATCOPY_K_RTC ZOMATCOPY_K_RTC
|
||||
#define IMATCOPY_K_CN ZIMATCOPY_K_CN
|
||||
#define IMATCOPY_K_RN ZIMATCOPY_K_RN
|
||||
#define IMATCOPY_K_CT ZIMATCOPY_K_CT
|
||||
#define IMATCOPY_K_RT ZIMATCOPY_K_RT
|
||||
#define IMATCOPY_K_CNC ZIMATCOPY_K_CNC
|
||||
#define IMATCOPY_K_RNC ZIMATCOPY_K_RNC
|
||||
#define IMATCOPY_K_CTC ZIMATCOPY_K_CTC
|
||||
#define IMATCOPY_K_RTC ZIMATCOPY_K_RTC
|
||||
|
||||
#define GEADD_K ZGEADD_K
|
||||
|
||||
#else
|
||||
@@ -2160,6 +2178,14 @@
|
||||
#define OMATCOPY_K_RNC COMATCOPY_K_RNC
|
||||
#define OMATCOPY_K_CTC COMATCOPY_K_CTC
|
||||
#define OMATCOPY_K_RTC COMATCOPY_K_RTC
|
||||
#define IMATCOPY_K_CN CIMATCOPY_K_CN
|
||||
#define IMATCOPY_K_RN CIMATCOPY_K_RN
|
||||
#define IMATCOPY_K_CT CIMATCOPY_K_CT
|
||||
#define IMATCOPY_K_RT CIMATCOPY_K_RT
|
||||
#define IMATCOPY_K_CNC CIMATCOPY_K_CNC
|
||||
#define IMATCOPY_K_RNC CIMATCOPY_K_RNC
|
||||
#define IMATCOPY_K_CTC CIMATCOPY_K_CTC
|
||||
#define IMATCOPY_K_RTC CIMATCOPY_K_RTC
|
||||
|
||||
#define GEADD_K CGEADD_K
|
||||
|
||||
@@ -2167,7 +2193,7 @@
|
||||
#endif
|
||||
|
||||
#ifndef ASSEMBLER
|
||||
#if defined(ARCH_X86) || defined(ARCH_X86_64) || defined(ARCH_IA64) || defined(ARCH_MIPS64)
|
||||
#if defined(ARCH_X86) || defined(ARCH_X86_64) || defined(ARCH_IA64) || defined(ARCH_MIPS64) || defined(ARCH_ARM64)
|
||||
extern BLASLONG gemm_offset_a;
|
||||
extern BLASLONG gemm_offset_b;
|
||||
extern BLASLONG sgemm_p;
|
||||
|
||||
103
common_mips.h
Normal file
103
common_mips.h
Normal file
@@ -0,0 +1,103 @@
|
||||
/*****************************************************************************
|
||||
Copyright (c) 2016, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written
|
||||
permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
**********************************************************************************/
|
||||
|
||||
#ifndef COMMON_MIPS
|
||||
#define COMMON_MIPS
|
||||
|
||||
#define MB __sync_synchronize()
|
||||
#define WMB __sync_synchronize()
|
||||
|
||||
#define INLINE inline
|
||||
|
||||
#define RETURN_BY_COMPLEX
|
||||
|
||||
#ifndef ASSEMBLER
|
||||
|
||||
static inline unsigned int rpcc(void){
|
||||
unsigned long ret;
|
||||
|
||||
__asm__ __volatile__(".set push \n"
|
||||
"rdhwr %0, $30 \n"
|
||||
".set pop" : "=r"(ret) : : "memory");
|
||||
|
||||
return ret;
|
||||
}
|
||||
#define RPCC_DEFINED
|
||||
|
||||
static inline int blas_quickdivide(blasint x, blasint y){
|
||||
return x / y;
|
||||
}
|
||||
|
||||
#define GET_IMAGE(res)
|
||||
|
||||
#define GET_IMAGE_CANCEL
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
#ifndef F_INTERFACE
|
||||
#define REALNAME ASMNAME
|
||||
#else
|
||||
#define REALNAME ASMFNAME
|
||||
#endif
|
||||
|
||||
#if defined(ASSEMBLER) && !defined(NEEDPARAM)
|
||||
|
||||
#define PROLOGUE \
|
||||
.arm ;\
|
||||
.global REALNAME ;\
|
||||
REALNAME:
|
||||
|
||||
#define EPILOGUE
|
||||
|
||||
#define PROFCODE
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
#define SEEK_ADDRESS
|
||||
|
||||
#ifndef PAGESIZE
|
||||
#define PAGESIZE ( 4 << 10)
|
||||
#endif
|
||||
#define HUGE_PAGESIZE ( 4 << 20)
|
||||
|
||||
#define BUFFER_SIZE (16 << 20)
|
||||
|
||||
|
||||
#define BASE_ADDRESS (START_ADDRESS - BUFFER_SIZE * MAX_CPU_NUMBER)
|
||||
|
||||
#ifndef MAP_ANONYMOUS
|
||||
#define MAP_ANONYMOUS MAP_ANON
|
||||
#endif
|
||||
|
||||
#endif
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user