Compare commits
731 Commits
v0.1alpha1
...
haswell
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
d51868190e | ||
|
|
9b5be29886 | ||
|
|
768d2042d4 | ||
|
|
91eaea364b | ||
|
|
a136426624 | ||
|
|
2f0d2ce1ea | ||
|
|
0f6e79f918 | ||
|
|
ea74f331f4 | ||
|
|
53eaf41901 | ||
|
|
9423f980f6 | ||
|
|
c6156b2ef2 | ||
|
|
034a5b2083 | ||
|
|
27d4234d4d | ||
|
|
aae75b2461 | ||
|
|
a74ac84981 | ||
|
|
402d6e91db | ||
|
|
6d2c6b5c74 | ||
|
|
1b5a267cdd | ||
|
|
4a1575e748 | ||
|
|
339fef2649 | ||
|
|
b3254eecaf | ||
|
|
d910404f00 | ||
|
|
1cae4114a8 | ||
|
|
91ca041cc2 | ||
|
|
d2385f0d52 | ||
|
|
ec2dadde9b | ||
|
|
d56292e2e4 | ||
|
|
ffe70b1fdc | ||
|
|
ecbc85b954 | ||
|
|
9e38dbb658 | ||
|
|
d844901062 | ||
|
|
eab79631c3 | ||
|
|
d13aa79d26 | ||
|
|
dfaa489224 | ||
|
|
9fbf437177 | ||
|
|
38ce786754 | ||
|
|
2594728eb7 | ||
|
|
cc7876f660 | ||
|
|
51e5983599 | ||
|
|
65ebab0688 | ||
|
|
7594216412 | ||
|
|
fac07b0687 | ||
|
|
3169f524e4 | ||
|
|
d4833f1801 | ||
|
|
fe8b841c82 | ||
|
|
9b1b01a478 | ||
|
|
2a625447ea | ||
|
|
be18cd47f6 | ||
|
|
8191efc420 | ||
|
|
39dc69db4a | ||
|
|
04d51536a4 | ||
|
|
c947ab85dc | ||
|
|
0b6e13b689 | ||
|
|
e09dc279a2 | ||
|
|
1596ced242 | ||
|
|
4be4db590c | ||
|
|
5c648a8984 | ||
|
|
7581e2e9cb | ||
|
|
c44dc4dd3c | ||
|
|
ac7735e01f | ||
|
|
271ceeba15 | ||
|
|
71eecaaf37 | ||
|
|
9d3fae15a8 | ||
|
|
2d3c884294 | ||
|
|
d54a061713 | ||
|
|
86afb47e83 | ||
|
|
42a4dff056 | ||
|
|
5bc322a66c | ||
|
|
dec7ad0dfd | ||
|
|
274304bd03 | ||
|
|
5007a534c4 | ||
|
|
a537d7d8d7 | ||
|
|
b42145834f | ||
|
|
3d5e792c72 | ||
|
|
a9bd12da2c | ||
|
|
697e198e8a | ||
|
|
36b0f7fe1d | ||
|
|
d2b20c5c51 | ||
|
|
fd1d9fdb22 | ||
|
|
fe5f46c330 | ||
|
|
e25de3d182 | ||
|
|
25c6050593 | ||
|
|
12e02a00e0 | ||
|
|
29a3196f56 | ||
|
|
8776a73773 | ||
|
|
7e84acd3e8 | ||
|
|
33d3ab6e09 | ||
|
|
9a0f978929 | ||
|
|
7f210587f0 | ||
|
|
9f0a3a35b3 | ||
|
|
dbae93110b | ||
|
|
19cd5c64a2 | ||
|
|
9adf87495e | ||
|
|
440db4cdda | ||
|
|
cd93cae5a7 | ||
|
|
8565afb3c2 | ||
|
|
5bf7cf8d67 | ||
|
|
29a005c635 | ||
|
|
f1be3a168a | ||
|
|
410afda9b4 | ||
|
|
bf04544902 | ||
|
|
86283c0be1 | ||
|
|
f27cabfd08 | ||
|
|
23dd474cd0 | ||
|
|
f1b452e160 | ||
|
|
3dabd7e6e6 | ||
|
|
6f4a0ebe38 | ||
|
|
5048a80032 | ||
|
|
6e679266f8 | ||
|
|
f1db386211 | ||
|
|
6da558d2ab | ||
|
|
a2942456ef | ||
|
|
f750103336 | ||
|
|
00f33c0134 | ||
|
|
5b36cc0f47 | ||
|
|
c8f1aeb154 | ||
|
|
8fa93be06e | ||
|
|
1e8128f41c | ||
|
|
6d8095bcb9 | ||
|
|
2f5fdd2000 | ||
|
|
80a2e901b1 | ||
|
|
73770e60b8 | ||
|
|
ac50bccbd2 | ||
|
|
82015beaef | ||
|
|
6216ab8a7e | ||
|
|
370e3834a9 | ||
|
|
95aedfa0ff | ||
|
|
cba97daf3c | ||
|
|
5400a9f4e4 | ||
|
|
e31186efd4 | ||
|
|
2b801a00a5 | ||
|
|
b3eab8fcb7 | ||
|
|
6d9d70c55c | ||
|
|
dfd1064d7b | ||
|
|
02bc36ac79 | ||
|
|
5118a7f4d1 | ||
|
|
e172b70ea2 | ||
|
|
1cf4b974b2 | ||
|
|
7bccff1512 | ||
|
|
afe44b0241 | ||
|
|
a77c71eaf5 | ||
|
|
b2219b3478 | ||
|
|
f5a0038bad | ||
|
|
c937090121 | ||
|
|
fe8c5666f9 | ||
|
|
f6b50057e2 | ||
|
|
2840d56aeb | ||
|
|
2d49db2f5b | ||
|
|
04391e6d9c | ||
|
|
85484a42df | ||
|
|
3983011f0b | ||
|
|
2a1515c9dd | ||
|
|
31f51e78bc | ||
|
|
beffee7d91 | ||
|
|
a35f4343fa | ||
|
|
ce5626a384 | ||
|
|
e0b968c3a7 | ||
|
|
93f1074dd4 | ||
|
|
1c63180bb6 | ||
|
|
22a8fcc4b7 | ||
|
|
9965d48005 | ||
|
|
4a474ea7dc | ||
|
|
69ce737cc5 | ||
|
|
d13788d1b4 | ||
|
|
70411af888 | ||
|
|
16eb780e13 | ||
|
|
a746724e84 | ||
|
|
3f7b0cd994 | ||
|
|
cc6db2ecfe | ||
|
|
3175be4b3d | ||
|
|
a29e6592da | ||
|
|
212463dce9 | ||
|
|
037bd82bef | ||
|
|
eae4cfa3f6 | ||
|
|
6c4a7d0828 | ||
|
|
fe98de2f68 | ||
|
|
db389b5915 | ||
|
|
52f587db7f | ||
|
|
067e8417fd | ||
|
|
a82da3d069 | ||
|
|
1569bf14f8 | ||
|
|
df554aebd2 | ||
|
|
eae6920f2d | ||
|
|
c92ae012a6 | ||
|
|
f51a849d91 | ||
|
|
44ef70420c | ||
|
|
d488b1b1aa | ||
|
|
4070d9a123 | ||
|
|
0b90c0ec64 | ||
|
|
2b8ab8f55b | ||
|
|
1cb9579cd0 | ||
|
|
2638370844 | ||
|
|
89637f87c8 | ||
|
|
c0b1e41bec | ||
|
|
49faee1a51 | ||
|
|
c0159d44a3 | ||
|
|
c17a850c1c | ||
|
|
099853fff6 | ||
|
|
44d23881b5 | ||
|
|
2905042c6a | ||
|
|
32fb6b9bb2 | ||
|
|
673e453b3f | ||
|
|
143cca4dd5 | ||
|
|
aaeb8eaecd | ||
|
|
8aeec32ea0 | ||
|
|
87fc9de572 | ||
|
|
564aa60fec | ||
|
|
f645665dd6 | ||
|
|
e45a347cd2 | ||
|
|
99727ac013 | ||
|
|
6e0a2fbc0c | ||
|
|
0a22f99c58 | ||
|
|
79ba52115d | ||
|
|
835293cc1a | ||
|
|
b736aa8110 | ||
|
|
ae521ecc3e | ||
|
|
36adfe8d64 | ||
|
|
a07cc39571 | ||
|
|
cff70a666d | ||
|
|
b5c2ac4fd6 | ||
|
|
749f45ffc8 | ||
|
|
534c5ec919 | ||
|
|
bd2da90e13 | ||
|
|
84bd0aabaa | ||
|
|
5b504d6c23 | ||
|
|
72b1edaf1b | ||
|
|
a2930664f4 | ||
|
|
6e0db36373 | ||
|
|
1e1250b703 | ||
|
|
23186d9f21 | ||
|
|
e6ebbfd314 | ||
|
|
4471c77905 | ||
|
|
9f0fb6e662 | ||
|
|
f26b7a08aa | ||
|
|
63f14189e3 | ||
|
|
e39384432b | ||
|
|
c5437149c0 | ||
|
|
6f5b395009 | ||
|
|
d4f9571818 | ||
|
|
937d838619 | ||
|
|
a8f9b6a665 | ||
|
|
6209c8fc44 | ||
|
|
238ceb4ac0 | ||
|
|
77b572fa0b | ||
|
|
f69f89b846 | ||
|
|
c77032b0cc | ||
|
|
1b3b9e841d | ||
|
|
b67252c2e4 | ||
|
|
c69e73b868 | ||
|
|
b51e2ba1ee | ||
|
|
9c0a834f98 | ||
|
|
2a7503e563 | ||
|
|
fd0c388681 | ||
|
|
61a9582987 | ||
|
|
b681064c6c | ||
|
|
e80e285928 | ||
|
|
2ed0f6ab60 | ||
|
|
5448643557 | ||
|
|
824c3c4df3 | ||
|
|
c19a488af2 | ||
|
|
32d2ca3035 | ||
|
|
6df39ad9e7 | ||
|
|
3a96e4cbcb | ||
|
|
6f008abcef | ||
|
|
3eb5af1955 | ||
|
|
fbb75e58b1 | ||
|
|
f54f5bac9e | ||
|
|
5d3312142a | ||
|
|
886cbaf4e4 | ||
|
|
0c4074e10b | ||
|
|
cc522aa21d | ||
|
|
9c78fad721 | ||
|
|
6028232ad1 | ||
|
|
feb9a3889a | ||
|
|
32dbeb636d | ||
|
|
57944538b6 | ||
|
|
3ce2c62b0b | ||
|
|
50464997a3 | ||
|
|
8e7cad1650 | ||
|
|
590e6aeafc | ||
|
|
88ef307cef | ||
|
|
6e8501c8a1 | ||
|
|
fa916a0fac | ||
|
|
fb298b34ae | ||
|
|
16012767f4 | ||
|
|
bcbac31b47 | ||
|
|
8dc0c72583 | ||
|
|
89405a1a0b | ||
|
|
4f2b12b8a8 | ||
|
|
646e168d26 | ||
|
|
93dbbe1fb8 | ||
|
|
a135f5d9ed | ||
|
|
d0b6299b13 | ||
|
|
9e58dd509e | ||
|
|
7c8227101b | ||
|
|
f67fa62851 | ||
|
|
cd1d473ba0 | ||
|
|
56f160134d | ||
|
|
0ded1fcc1c | ||
|
|
a789b588cd | ||
|
|
8eaa04acbb | ||
|
|
d854b30ae6 | ||
|
|
d65bbec99b | ||
|
|
e4c39c7c26 | ||
|
|
ba800f0883 | ||
|
|
25491e42f9 | ||
|
|
960b0c88a7 | ||
|
|
65ffead0cf | ||
|
|
f2fb8c7035 | ||
|
|
9f59f384d8 | ||
|
|
23965f164c | ||
|
|
6a72840945 | ||
|
|
947457fb7c | ||
|
|
79120bf9a0 | ||
|
|
acb11905d5 | ||
|
|
109500178c | ||
|
|
e50a664865 | ||
|
|
357078b93e | ||
|
|
731220f870 | ||
|
|
69aa6c8fb1 | ||
|
|
60b263f3d2 | ||
|
|
7ac306e0da | ||
|
|
4cb454cdf2 | ||
|
|
19ad2fb128 | ||
|
|
5d96e4f224 | ||
|
|
6821677489 | ||
|
|
dbbda55e67 | ||
|
|
6c34a7f43c | ||
|
|
3326f3152c | ||
|
|
7641f6e253 | ||
|
|
48bdc1ad3b | ||
|
|
3ad29452d1 | ||
|
|
6e3f6f25a5 | ||
|
|
990efcab6e | ||
|
|
75a5dc3975 | ||
|
|
986d542acb | ||
|
|
6958c1a1aa | ||
|
|
a068d54981 | ||
|
|
d692ee07f7 | ||
|
|
1a57717b1a | ||
|
|
6b01d58712 | ||
|
|
35b943f17f | ||
|
|
e029242870 | ||
|
|
7a9b94b519 | ||
|
|
66b919d99f | ||
|
|
f4846afbad | ||
|
|
53588bc786 | ||
|
|
b47f13ee4c | ||
|
|
309f90e563 | ||
|
|
773c01f496 | ||
|
|
d831b2ff8b | ||
|
|
724ae159ce | ||
|
|
2c9a203bd1 | ||
|
|
f300ce3df5 | ||
|
|
e2c7c75715 | ||
|
|
66e64131ed | ||
|
|
5900b1462e | ||
|
|
9405f26f4b | ||
|
|
54e7b37630 | ||
|
|
529f1b5006 | ||
|
|
e5ac3007e0 | ||
|
|
0d0405b434 | ||
|
|
f1ce74ffdd | ||
|
|
d744c9590a | ||
|
|
3cc6ae793e | ||
|
|
4c2123c334 | ||
|
|
5155e3f509 | ||
|
|
5c8bf6ae0e | ||
|
|
6ae2f868fd | ||
|
|
a1ead62f28 | ||
|
|
0133580148 | ||
|
|
274246651d | ||
|
|
299b5a44dc | ||
|
|
a9500d0079 | ||
|
|
64ad8b9809 | ||
|
|
875d520ccf | ||
|
|
d311236dfd | ||
|
|
36e0982966 | ||
|
|
8cdb795438 | ||
|
|
4db6660de4 | ||
|
|
0b08f7479e | ||
|
|
200e4acf15 | ||
|
|
99d1978df7 | ||
|
|
08bf6674d5 | ||
|
|
8b122ff9dc | ||
|
|
69200884e1 | ||
|
|
0d1518add9 | ||
|
|
91ed4e4450 | ||
|
|
fd3046b32a | ||
|
|
a4ee6f3915 | ||
|
|
a0363e9b48 | ||
|
|
b471d52e61 | ||
|
|
9fb341a9f8 | ||
|
|
fba6b590f2 | ||
|
|
97f68f7f3a | ||
|
|
1138817dd2 | ||
|
|
13f8fc0b1a | ||
|
|
bdf8d9411e | ||
|
|
bb10cb8442 | ||
|
|
d48cff8cf1 | ||
|
|
f19af5ecc0 | ||
|
|
bfaaa975e6 | ||
|
|
b7c0fa6bd2 | ||
|
|
7110d17146 | ||
|
|
e01b3d4b54 | ||
|
|
cea1a885b5 | ||
|
|
f78eb335d6 | ||
|
|
2345bdec68 | ||
|
|
5f0117385e | ||
|
|
6caf1bab73 | ||
|
|
01e3c984ce | ||
|
|
6751f7b9a7 | ||
|
|
d5717a97ea | ||
|
|
b45d43d295 | ||
|
|
dcfb69c2b5 | ||
|
|
e85549ee11 | ||
|
|
789f205177 | ||
|
|
378acfe826 | ||
|
|
538c764d2b | ||
|
|
0f26a21624 | ||
|
|
5c1efa1149 | ||
|
|
ca4136cf41 | ||
|
|
3a26470fb7 | ||
|
|
6c5899dff5 | ||
|
|
2df2878dfc | ||
|
|
0b719945c5 | ||
|
|
b1a54a0107 | ||
|
|
08c177ca36 | ||
|
|
2573311308 | ||
|
|
1d72b8bf1b | ||
|
|
758e34efbb | ||
|
|
735ca38b8f | ||
|
|
f76a384841 | ||
|
|
9419a43a7f | ||
|
|
b695680a33 | ||
|
|
d0e731e8b8 | ||
|
|
48f075cfd5 | ||
|
|
3e87648de3 | ||
|
|
fe4ab95cd5 | ||
|
|
801383effe | ||
|
|
54cd65e47f | ||
|
|
a55821a2ec | ||
|
|
068861a927 | ||
|
|
d007cca61d | ||
|
|
a92895939e | ||
|
|
7bd1834d59 | ||
|
|
1b056c5328 | ||
|
|
e8306f623a | ||
|
|
3108a1853d | ||
|
|
25f1a573fd | ||
|
|
71d29fa3d0 | ||
|
|
50848e34ec | ||
|
|
4a5d08d0cf | ||
|
|
6fcdaa4387 | ||
|
|
699fc7641f | ||
|
|
3692b4d631 | ||
|
|
4b7677a916 | ||
|
|
5719b7a58d | ||
|
|
f22bfe6a55 | ||
|
|
551f478477 | ||
|
|
a430880729 | ||
|
|
a507b56ab1 | ||
|
|
f430e54daf | ||
|
|
47860cf002 | ||
|
|
13f5f18140 | ||
|
|
857a0fa0df | ||
|
|
0a958b6a02 | ||
|
|
b39c51195b | ||
|
|
853d16ed7e | ||
|
|
422359d09a | ||
|
|
544af1efec | ||
|
|
a6214c057e | ||
|
|
fe809c39f9 | ||
|
|
157cc54449 | ||
|
|
cbfacf9a10 | ||
|
|
fda5e0da8a | ||
|
|
98d004c50b | ||
|
|
037d995c4d | ||
|
|
e4a27d194e | ||
|
|
d34fce56e4 | ||
|
|
b8b922d334 | ||
|
|
88c272f6a7 | ||
|
|
6cfcb54a28 | ||
|
|
3ef96aa567 | ||
|
|
996dc6d1c8 | ||
|
|
61b1c2db5b | ||
|
|
34fd3b85a8 | ||
|
|
f76f952547 | ||
|
|
11b4a0e4b6 | ||
|
|
eefd30881c | ||
|
|
d3b67d0bd8 | ||
|
|
d6cab3f37e | ||
|
|
37edae1c90 | ||
|
|
90d6ad569d | ||
|
|
a4daa34db7 | ||
|
|
cfc2940412 | ||
|
|
a6adbb299d | ||
|
|
8cc7f86cf7 | ||
|
|
a53c6e2440 | ||
|
|
5199809bba | ||
|
|
a431042475 | ||
|
|
33941033e2 | ||
|
|
4e29b6ffc0 | ||
|
|
839b18aa26 | ||
|
|
a27339b244 | ||
|
|
10e25690b4 | ||
|
|
660d9e09f8 | ||
|
|
e9be1fdd2b | ||
|
|
44124d3055 | ||
|
|
14c3511e92 | ||
|
|
d647f751ee | ||
|
|
be1692d64f | ||
|
|
f404a17787 | ||
|
|
fc4927fa0f | ||
|
|
06e208c5c3 | ||
|
|
52485e5fd0 | ||
|
|
dee74174ff | ||
|
|
e7846547be | ||
|
|
7f89edee3e | ||
|
|
11cc9dc151 | ||
|
|
4236d0d938 | ||
|
|
8218cbea2a | ||
|
|
5b7f443cf4 | ||
|
|
0e39699c8c | ||
|
|
60a58e03ea | ||
|
|
e154920bc6 | ||
|
|
fcb89ad94d | ||
|
|
d48a1d1928 | ||
|
|
d02171b494 | ||
|
|
c8a5d4b86f | ||
|
|
006200c9a4 | ||
|
|
5d657c6e67 | ||
|
|
14428af879 | ||
|
|
5656cca4f3 | ||
|
|
08570c4248 | ||
|
|
b2bdb6f7c4 | ||
|
|
9037782a9c | ||
|
|
f93318a6c8 | ||
|
|
001c2c322b | ||
|
|
f4eee224d8 | ||
|
|
74306b54d7 | ||
|
|
fd2ee0c9e2 | ||
|
|
78914475ae | ||
|
|
435420d6d5 | ||
|
|
03b0eb19f7 | ||
|
|
910338f071 | ||
|
|
fad089ffff | ||
|
|
19a48b82cf | ||
|
|
09b2417848 | ||
|
|
5cbbc496b0 | ||
|
|
09f74f6d23 | ||
|
|
2b3eae6cc7 | ||
|
|
0b89a7a92d | ||
|
|
91ce66a0a8 | ||
|
|
ccdba3c771 | ||
|
|
3871b6a86d | ||
|
|
83ecfbb9b3 | ||
|
|
31c836ac25 | ||
|
|
3afedbf6f0 | ||
|
|
b4ec36debc | ||
|
|
1f15bee02a | ||
|
|
dff146e306 | ||
|
|
722dd08703 | ||
|
|
d047afe615 | ||
|
|
a7a7751be7 | ||
|
|
0bbf955d4c | ||
|
|
70abe10fc0 | ||
|
|
0a696bd4ce | ||
|
|
fda39c6cb0 | ||
|
|
875da22a43 | ||
|
|
363a563ec2 | ||
|
|
8da6fdc2ce | ||
|
|
0caa5616f2 | ||
|
|
727e6d83c0 | ||
|
|
da3f101a77 | ||
|
|
fe613de8e1 | ||
|
|
142e99d4e9 | ||
|
|
7af0139a09 | ||
|
|
8e53b57bb2 | ||
|
|
0d3647c395 | ||
|
|
0d76196a09 | ||
|
|
b281f3dee4 | ||
|
|
a4292976e9 | ||
|
|
c2dad58ad1 | ||
|
|
d5a6d789e6 | ||
|
|
875dde437d | ||
|
|
5be22ca80d | ||
|
|
66904fc4e8 | ||
|
|
8163ab7e55 | ||
|
|
ef6f7f32ae | ||
|
|
285e69e2d1 | ||
|
|
d1baf14a64 | ||
|
|
0884f6b78d | ||
|
|
2d78fb05c8 | ||
|
|
b95ad4cfaf | ||
|
|
3bbe3ddb31 | ||
|
|
a32e56500a | ||
|
|
c1e618ea2d | ||
|
|
19f5b5c132 | ||
|
|
c852ce3981 | ||
|
|
ba31b19c00 | ||
|
|
66a3c6df4e | ||
|
|
57658a8c14 | ||
|
|
9fe3049de6 | ||
|
|
831858b883 | ||
|
|
8de2ba67dd | ||
|
|
fe7a932ab8 | ||
|
|
1d31c79dc9 | ||
|
|
d40e5621e9 | ||
|
|
bcc7956216 | ||
|
|
821cbb2995 | ||
|
|
74fa790354 | ||
|
|
756477bfe3 | ||
|
|
864c68ffc5 | ||
|
|
68cae521df | ||
|
|
d0152ec8ca | ||
|
|
e08cfaf9ca | ||
|
|
ee4bb8bd25 | ||
|
|
7fa3d23dd9 | ||
|
|
9679dd077e | ||
|
|
048742f38f | ||
|
|
7b410b7f0e | ||
|
|
d238a768ab | ||
|
|
260db9fb9e | ||
|
|
e27b761d7c | ||
|
|
16fc083322 | ||
|
|
3c856c0c1a | ||
|
|
dc9c69db93 | ||
|
|
b1fe26c45a | ||
|
|
0389b631fa | ||
|
|
64fa709d1f | ||
|
|
4727fe8abf | ||
|
|
90481ce742 | ||
|
|
9fc6764fa7 | ||
|
|
74d4cdb81a | ||
|
|
7906146836 | ||
|
|
3274ff47b8 | ||
|
|
a059c553a1 | ||
|
|
23e182ca7c | ||
|
|
a15bc95824 | ||
|
|
74a3f63489 | ||
|
|
09f49fa891 | ||
|
|
b9d89f8aaa | ||
|
|
cb0214787b | ||
|
|
2e8cdd1542 | ||
|
|
b29d327d14 | ||
|
|
c8360e3ae5 | ||
|
|
19d2ab4853 | ||
|
|
12d77deeee | ||
|
|
043927c7db | ||
|
|
30947ea2d5 | ||
|
|
33313b0221 | ||
|
|
a5300420e2 | ||
|
|
9b46bf1eb4 | ||
|
|
c06b7be32f | ||
|
|
68532fa9ec | ||
|
|
708d2b6255 | ||
|
|
e72113f06a | ||
|
|
14f81da375 | ||
|
|
fc21f7ad28 | ||
|
|
ca8bf5abb0 | ||
|
|
4a73f5c5ea | ||
|
|
6a0762949d | ||
|
|
859b71645a | ||
|
|
078bfd0b4f | ||
|
|
1c96d345e2 | ||
|
|
82f5274828 | ||
|
|
e568df0dae | ||
|
|
c4efde7713 | ||
|
|
7a1e6202e1 | ||
|
|
32353a9d30 | ||
|
|
2e6e9272fe | ||
|
|
d978436c4b | ||
|
|
fab36f1adb | ||
|
|
7945919f22 | ||
|
|
c642b61d4d | ||
|
|
aeed8d6225 | ||
|
|
1a4181afd0 | ||
|
|
49742cb2d3 | ||
|
|
b3d1887745 | ||
|
|
8d50a9fd1a | ||
|
|
1496383224 | ||
|
|
4335bca2f7 | ||
|
|
31040e4d80 | ||
|
|
3d7e62eb8b | ||
|
|
88d94d0ec8 | ||
|
|
af40551c9f | ||
|
|
c30c22a76c | ||
|
|
cc09e6ef3a | ||
|
|
fc84909115 | ||
|
|
5ca4e51df0 | ||
|
|
fcb5ce011b | ||
|
|
a9320f896e | ||
|
|
830a823be1 | ||
|
|
b206fc7075 | ||
|
|
1d60510959 | ||
|
|
03272a606d | ||
|
|
0dc9eca36f | ||
|
|
8cc628a953 | ||
|
|
bbc517292a | ||
|
|
29dce62b8f | ||
|
|
fa8e4fd879 | ||
|
|
432c309f63 | ||
|
|
d2f351d819 | ||
|
|
5a991b7149 | ||
|
|
417b8ec792 | ||
|
|
7dcf4eeee7 | ||
|
|
1acf5ace29 | ||
|
|
fcf9b82f14 | ||
|
|
2aab238c61 | ||
|
|
b8d93812f0 | ||
|
|
ff6ae89d3e | ||
|
|
0a45e5495f | ||
|
|
9320933520 | ||
|
|
921caefa56 | ||
|
|
ecd4c1f3d9 | ||
|
|
ab9e4ce351 | ||
|
|
921e040b15 | ||
|
|
00ef0cd434 | ||
|
|
989c6f8b06 | ||
|
|
552f31dbbd | ||
|
|
5452ba3850 | ||
|
|
54745902b8 | ||
|
|
1aa9a298e1 | ||
|
|
782205a693 | ||
|
|
ac494c0d04 | ||
|
|
85f99d4769 | ||
|
|
5e7f29b19e | ||
|
|
141091f528 | ||
|
|
0edcdd470e | ||
|
|
d672491122 |
18
.gitignore
vendored
18
.gitignore
vendored
@@ -1,11 +1,29 @@
|
||||
*.obj
|
||||
*.lib
|
||||
*.dll
|
||||
*.dylib
|
||||
*.def
|
||||
*.o
|
||||
*.out
|
||||
lapack-3.1.1
|
||||
lapack-3.1.1.tgz
|
||||
lapack-3.4.1
|
||||
lapack-3.4.1.tgz
|
||||
lapack-3.4.2
|
||||
lapack-3.4.2.tgz
|
||||
lapack-netlib/make.inc
|
||||
lapack-netlib/lapacke/include/lapacke_mangling.h
|
||||
lapack-netlib/TESTING/testing_results.txt
|
||||
*.so
|
||||
*.a
|
||||
.svn
|
||||
*~
|
||||
lib.grd
|
||||
nohup.out
|
||||
config.h
|
||||
Makefile.conf
|
||||
Makefile.conf_last
|
||||
config_last.h
|
||||
getarch
|
||||
getarch_2nd
|
||||
utest/openblas_utest
|
||||
|
||||
24
.travis.yml
Normal file
24
.travis.yml
Normal file
@@ -0,0 +1,24 @@
|
||||
language: c
|
||||
compiler:
|
||||
- gcc
|
||||
|
||||
env:
|
||||
- TARGET_BOX=LINUX64 BTYPE="BINARY=64"
|
||||
- TARGET_BOX=LINUX64 BTYPE="BINARY=64 USE_OPENMP=1"
|
||||
- TARGET_BOX=LINUX64 BTYPE="BINARY=64 INTERFACE64=1"
|
||||
- TARGET_BOX=LINUX32 BTYPE="BINARY=32"
|
||||
- TARGET_BOX=WIN64 BTYPE="BINARY=64 HOSTCC=gcc CC=x86_64-w64-mingw32-gcc FC=x86_64-w64-mingw32-gfortran"
|
||||
|
||||
before_install:
|
||||
- sudo apt-get update -qq
|
||||
- sudo apt-get install -qq gfortran
|
||||
- if [[ "$TARGET_BOX" == "WIN64" ]]; then sudo apt-get install -qq binutils-mingw-w64-x86-64 gcc-mingw-w64-x86-64 gfortran-mingw-w64-x86-64; fi
|
||||
- if [[ "$TARGET_BOX" == "LINUX32" ]]; then sudo apt-get install -qq gcc-multilib gfortran-multilib; fi
|
||||
|
||||
script: make QUIET_MAKE=1 DYNAMIC_ARCH=1 TARGET=NEHALEM NUM_THREADS=32 $BTYPE
|
||||
|
||||
# whitelist
|
||||
branches:
|
||||
only:
|
||||
- master
|
||||
- develop
|
||||
39
BACKERS.md
Normal file
39
BACKERS.md
Normal file
@@ -0,0 +1,39 @@
|
||||
Thank you for the support.
|
||||
|
||||
### [2013.8] [Testbed for OpenBLAS project](https://www.bountysource.com/fundraisers/443-testbed-for-openblas-project)
|
||||
|
||||
https://www.bountysource.com/fundraisers/443-testbed-for-openblas-project/pledges
|
||||
|
||||
In chronological order:
|
||||
|
||||
* aeberspaecher
|
||||
* fmolina
|
||||
* saullocastro
|
||||
* xianyi
|
||||
* cuda
|
||||
* carter
|
||||
* StefanKarpinski
|
||||
* staticfloat
|
||||
* sebastien-villemot
|
||||
* JeffBezanson
|
||||
* ihnorton
|
||||
* simonp0420
|
||||
* andrioni
|
||||
* Tim Holy
|
||||
* ivarne
|
||||
* johnmyleswhite
|
||||
* traz
|
||||
* Jean-Francis Roy
|
||||
* bkalpert
|
||||
* Anirban
|
||||
* pgermain
|
||||
* alexandre.lacoste.18
|
||||
* foges
|
||||
* ssam
|
||||
* WestleyArgentum
|
||||
* daniebmariani
|
||||
* pjpuglia
|
||||
* albarrentine
|
||||
* Alexander Vogt
|
||||
|
||||
|
||||
90
CONTRIBUTORS.md
Normal file
90
CONTRIBUTORS.md
Normal file
@@ -0,0 +1,90 @@
|
||||
# Contributions to the OpenBLAS project
|
||||
|
||||
## Creator & Maintainer
|
||||
|
||||
* Zhang Xianyi <traits.zhang@gmail.com>
|
||||
|
||||
## Active Developers
|
||||
|
||||
* Wang Qian <traz0824@gmail.com>
|
||||
* Optimize BLAS3 on ICT Loongson 3A.
|
||||
* Optimize BLAS3 on Intel Sandy Bridge.
|
||||
|
||||
* Zaheer Chothia <zaheer.chothia@gmail.com>
|
||||
* Improve the compatibility about complex number
|
||||
* Build LAPACKE: C interface to LAPACK
|
||||
* Improve the windows build.
|
||||
|
||||
## Previous Developers
|
||||
|
||||
* Chen Shaohu <huhumartinwar@gmail.com>
|
||||
* Optimize GEMV on the Loongson 3A processor.
|
||||
|
||||
* Luo Wen
|
||||
* Intern. Test Level-2 BLAS.
|
||||
|
||||
## Contributors
|
||||
|
||||
In chronological order:
|
||||
|
||||
* pipping <http://page.mi.fu-berlin.de/pipping>
|
||||
* [2011-06-11] Make USE_OPENMP=0 disable openmp.
|
||||
|
||||
* Stefan Karpinski <stefan@karpinski.org>
|
||||
* [2011-12-28] Fix a bug about SystemStubs on Mac OS X.
|
||||
|
||||
* Alexander Eberspächer <https://github.com/aeberspaecher>
|
||||
* [2012-05-02] Add note on patch for segfaults on Linux kernel 2.6.32.
|
||||
|
||||
* Mike Nolta <mike@nolta.net>
|
||||
* [2012-05-19] Fix building bug on FreeBSD and NetBSD.
|
||||
|
||||
* Sylvestre Ledru <https://github.com/sylvestre>
|
||||
* [2012-07-01] Improve the detection of sparc. Fix building bug under
|
||||
Hurd and kfreebsd.
|
||||
|
||||
* Jameson Nash <https://github.com/vtjnash>
|
||||
* [2012-08-20] Provide support for passing CFLAGS, FFLAGS, PFLAGS, FPFLAGS to
|
||||
make on the command line.
|
||||
|
||||
* Alexander Nasonov <alnsn@yandex.ru>
|
||||
* [2012-11-10] Fix NetBSD build.
|
||||
|
||||
* Sébastien Villemot <sebastien@debian.org>
|
||||
* [2012-11-14] Fix compilation with TARGET=GENERIC. Patch applied to Debian package.
|
||||
|
||||
* Werner Saar <wernsaar@googlemail.com>
|
||||
* [2013-03-04] Optimize AVX and FMA4 DGEMM on AMD Bulldozer
|
||||
* [2013-04-27] Optimize AVX and FMA4 TRSM on AMD Bulldozer
|
||||
* [2013-06-09] Optimize AVX and FMA4 SGEMM on AMD Bulldozer
|
||||
* [2013-06-11] Optimize AVX and FMA4 ZGEMM on AMD Bulldozer
|
||||
* [2013-06-12] Optimize AVX and FMA4 CGEMM on AMD Bulldozer
|
||||
* [2013-06-16] Optimize dgemv_n kernel on AMD Bulldozer
|
||||
* [2013-06-20] Optimize ddot, daxpy kernel on AMD Bulldozer
|
||||
* [2013-06-21] Optimize dcopy kernel on AMD Bulldozer
|
||||
|
||||
* Kang-Che Sung <Explorer09@gmail.com>
|
||||
* [2013-05-17] Fix typo in the document. Re-order the architecture list in getarch.c.
|
||||
|
||||
* Kenneth Hoste <kenneth.hoste@gmail.com>
|
||||
* [2013-05-22] Adjust Makefile about downloading LAPACK source files.
|
||||
|
||||
* Lei WANG <https://github.com/wlbksy>
|
||||
* [2013-05-22] Fix a bug about wget.
|
||||
|
||||
* Dan Luu <http://www.linkedin.com/in/danluu>
|
||||
* [2013-06-30] Add Intel Haswell support (using sandybridge optimizations).
|
||||
|
||||
* grisuthedragon <https://github.com/grisuthedragon>
|
||||
* [2013-07-11] create openblas_get_parallel to retrieve information which parallelization
|
||||
model is used by OpenBLAS.
|
||||
|
||||
* Sébastien Fabbro <bicatali@gentoo.org>
|
||||
* [2013-07-24] Modify makefile to respect user's LDFLAGS
|
||||
* [2013-07-24] Add stack markings for GNU as arch-independent for assembler files
|
||||
|
||||
* carlkl <https://github.com/carlkl>
|
||||
* [2013-12-13] Fixed LAPACKE building bug on Windows
|
||||
|
||||
* [Your name or handle] <[email or website]>
|
||||
* [Date] [Brief summary of your changes]
|
||||
265
Changelog.txt
265
Changelog.txt
@@ -1,4 +1,269 @@
|
||||
OpenBLAS ChangeLog
|
||||
====================================================================
|
||||
Version 0.2.8
|
||||
01-Aug-2013
|
||||
common:
|
||||
* Support Open64 5.0. (#266)
|
||||
* Add executable stack markings. (#262, Thank Sébastien Fabbro)
|
||||
* Respect user's LDFLAGS (Thank Sébastien Fabbro)
|
||||
|
||||
x86/x86-64:
|
||||
* Rollback bulldozer and piledriver kernels to barcelona kernels (#263)
|
||||
We will fix the compuational error bug in bulldozer and piledriver kernels.
|
||||
|
||||
====================================================================
|
||||
Version 0.2.7
|
||||
20-Jul-2013
|
||||
common:
|
||||
* Support LSB (Linux Standard Base) 4.1.
|
||||
e.g. make CC=lsbcc
|
||||
* Include LAPACK 3.4.2 source codes to the repo.
|
||||
Avoid downloading at compile time.
|
||||
* Add NO_PARALLEL_MAKE flag to disable parallel make.
|
||||
* Create openblas_get_parallel to retrieve information which
|
||||
parallelization model is used by OpenBLAS. (Thank grisuthedragon)
|
||||
* Detect LLVM/Clang compiler. The default compiler is Clang on Mac OS X.
|
||||
* Change LIBSUFFIX from .lib to .a on windows.
|
||||
* A walk round for dtrti_U single thread bug. Replace it with LAPACK codes. (#191)
|
||||
|
||||
x86/x86-64:
|
||||
* Optimize c/zgemm, trsm, dgemv_n, ddot, daxpy, dcopy on
|
||||
AMD Bulldozer. (Thank Werner Saar)
|
||||
* Add Intel Haswell support (using Sandybridge optimizations).
|
||||
(Thank Dan Luu)
|
||||
* Add AMD Piledriver support (using Bulldozer optimizations).
|
||||
* Fix the computational error in zgemm avx kernel on
|
||||
Sandybridge. (#237)
|
||||
* Fix the overflow bug in gemv.
|
||||
* Fix the overflow bug in multi-threaded BLAS3, getrf when NUM_THREADS
|
||||
is very large.(#214, #221, #246).
|
||||
MIPS64:
|
||||
* Support loongcc (Open64 based) compiler for ICT Loongson 3A/B.
|
||||
|
||||
Power:
|
||||
* Support Power7 by old Power6 kernels. (#220)
|
||||
|
||||
====================================================================
|
||||
Version 0.2.6
|
||||
2-Mar-2013
|
||||
common:
|
||||
* Improved OpenMP performance slightly. (d744c9)
|
||||
* Improved cblas.h compatibility with Intel MKL.(#185)
|
||||
* Fixed the overflowing bug in single thread cholesky factorization.
|
||||
* Fixed the overflowing buffer bug of multithreading hbmv and sbmv.(#174)
|
||||
|
||||
x86/x86-64:
|
||||
* Added AMD Bulldozer x86-64 S/DGEMM AVX kernels. (Thank Werner Saar)
|
||||
We will tune the performance in future.
|
||||
* Auto-detect Intel Xeon E7540.
|
||||
* Fixed the overflowing buffer bug of gemv. (#173)
|
||||
* Fixed the bug of s/cdot about invalid reading NAN on x86_64. (#189)
|
||||
|
||||
MIPS64:
|
||||
|
||||
====================================================================
|
||||
Version 0.2.5
|
||||
26-Nov-2012
|
||||
common:
|
||||
* Added NO_SHARED flag to disable generating the shared library.
|
||||
* Compile LAPACKE with ILP64 modle when INTERFACE64=1 (#158)
|
||||
* Export LAPACK 3.4.2 symbols in shared library. (#147)
|
||||
* Only detect the number of physical CPU cores on Mac OSX. (#157)
|
||||
* Fixed NetBSD build. (#155)
|
||||
* Fixed compilation with TARGET=GENERIC. (#160)
|
||||
x86/x86-64:
|
||||
* Restore the original CPU affinity when calling
|
||||
openblas_set_num_threads(1) (#153)
|
||||
* Fixed a SEGFAULT bug in dgemv_t when m is very large.(#154)
|
||||
MIPS64:
|
||||
|
||||
====================================================================
|
||||
Version 0.2.4
|
||||
8-Oct-2012
|
||||
common:
|
||||
* Upgraded LAPACK to 3.4.2 version. (#145)
|
||||
* Provided support for passing CFLAGS, FFLAGS, PFLAGS,
|
||||
FPFLAGS to make. (#137)
|
||||
* f77blas.h:compatibility for compilers without C99 complex
|
||||
number support. (#141)
|
||||
x86/x86-64:
|
||||
* Added NO_AVX flag. Check OS supporting AVX on runtime. (#139)
|
||||
* Fixed zdot incompatibility ABI issue with GCC 4.7 on
|
||||
Windows 32-bit. (#140)
|
||||
MIPS64:
|
||||
* Fixed the generation of shared library bug.
|
||||
* Fixed the detection bug on the Loongson 3A server.
|
||||
====================================================================
|
||||
Version 0.2.3
|
||||
20-Aug-2012
|
||||
common:
|
||||
* Fixed LAPACK unstable bug about ?laswp. (#130)
|
||||
* Fixed the shared library bug about unloading the library on
|
||||
Linux (#132).
|
||||
* Fixed the compilation failure on BlueGene/P (TARGET=PPC440FP2)
|
||||
Please use gcc and IBM xlf. (#134)
|
||||
x86/x86-64:
|
||||
* Supported goto_set_num_threads and openblas_set_num_threads
|
||||
APIs in Windows. They can set the number of threads on runtime.
|
||||
|
||||
====================================================================
|
||||
Version 0.2.2
|
||||
6-July-2012
|
||||
common:
|
||||
* Fixed exporting DLL functions bug on Windows/MingW
|
||||
* Support GNU Hurd (Thank Sylvestre Ledru)
|
||||
* Support kfreebsd kernel (Thank Sylvestre Ledru)
|
||||
x86/x86-64:
|
||||
* Support Intel Sandy Bridge 22nm desktop/mobile CPU
|
||||
SPARC:
|
||||
* Improve the detection of SPARC (Thank Sylvestre Ledru)
|
||||
|
||||
====================================================================
|
||||
Version 0.2.1
|
||||
30-Jun-2012
|
||||
common:
|
||||
x86/x86-64:
|
||||
* Fixed the SEGFAULT bug about hyper-theading
|
||||
* Support AMD Bulldozer by using GotoBLAS2 AMD Barcelona codes
|
||||
|
||||
====================================================================
|
||||
Version 0.2.0
|
||||
26-Jun-2012
|
||||
common:
|
||||
* Removed the limitation (64) of numbers of CPU cores.
|
||||
Now, it supports 256 cores at max.
|
||||
* Supported clang compiler.
|
||||
* Fixed some build bugs on FreeBSD
|
||||
x86/x86-64:
|
||||
* Optimized Level-3 BLAS on Intel Sandy Bridge x86-64 by AVX instructions.
|
||||
Please use gcc >= 4.6 or clang >=3.1.
|
||||
* Support AMD Bobcat by using GotoBLAS2 AMD Barcelona codes.
|
||||
|
||||
====================================================================
|
||||
Version 0.1.1
|
||||
29-Apr-2012
|
||||
common:
|
||||
* Upgraded LAPACK to 3.4.1 version. (Thank Zaheer Chothia)
|
||||
* Supported LAPACKE, a C interface to LAPACKE. (Thank Zaheer Chothia)
|
||||
* Fixed the build bug (MD5 and download) on Mac OSX.
|
||||
* Auto download CUnit 2.1.2-2 from SF.net with UTEST_CHECK=1.
|
||||
* Fxied the compatibility issue for compilers without C99 complex number
|
||||
(e.g. Visual Studio)
|
||||
x86/x86_64:
|
||||
* Auto-detect Intel Sandy Bridge Core i7-3xxx & Xeon E7 Westmere-EX.
|
||||
* Test alpha=Nan in dscale.
|
||||
* Fixed a SEGFAULT bug in samax on x86 windows.
|
||||
|
||||
====================================================================
|
||||
Version 0.1.0
|
||||
23-Mar-2012
|
||||
common:
|
||||
* Set soname of shared library on Linux.
|
||||
* Added LIBNAMESUFFIX flag in Makefile.rule. The user can use
|
||||
this flag to control the library name, e.g. libopenblas.a,
|
||||
libopenblas_ifort.a or libopenblas_omp.a.
|
||||
* Added GEMM_MULTITHREAD_THRESHOLD flag in Makefile.rule.
|
||||
The lib use single thread in GEMM function with small matrices.
|
||||
x86/x86_64:
|
||||
* Used GEMV SSE/SSE2 kernels on x86 32-bit.
|
||||
* Exported CBLAS functions in Windows DLL.
|
||||
MIPS64:
|
||||
* Completed Level-3 BLAS optimization on Loongson 3A CPU.
|
||||
* Improved GEMV performance on Loongson 3A CPU.
|
||||
* Improved Level-3 BLAS performance on Loongson 3B CPU. (EXPERIMENT)
|
||||
|
||||
====================================================================
|
||||
Version 0.1 alpha2.5
|
||||
19-Feb-2012
|
||||
common:
|
||||
* Fixed missing "#include <sched.h>" bug on Mac OS X.
|
||||
Thank Mike Nolta for the patch.
|
||||
* Upgraded LAPACK to 3.4.0 version
|
||||
* Fixed a bug on Mac OS X. Don't require SystemStubs on OS X.
|
||||
SystemStubs does not exist on Lion. Thank Stefan Karpinski.
|
||||
* Improved README with using OpenMP. Check the internal threads
|
||||
count less than or equal to omp_get_max_threads()
|
||||
x86/x86_64:
|
||||
* Auto-detect Intel Core i6/i7 (Sandy Bridge) CPU with Nehalem assembly kernels
|
||||
* Fixed some bugs on MingW 64-bit including zgemv, cdot, zdot.
|
||||
|
||||
====================================================================
|
||||
Version 0.1 alpha2.4
|
||||
18-Sep-2011
|
||||
common:
|
||||
* Fixed a bug about installation. The header file "fblas77.h"
|
||||
works fine now.
|
||||
* Fixed #61 a building bug about setting TARGET and DYNAMIC_ARCH.
|
||||
* Try to handle absolute path of shared library in OSX. (#57)
|
||||
Thank Dr Kane O'Donnell.
|
||||
* Changed the installation folder layout to $(PREFIX)/include and
|
||||
$(PREFIX)/lib
|
||||
|
||||
x86/x86_64:
|
||||
* Fixed #58 zdot/xdot SEGFAULT bug with GCC-4.6 on x86. According
|
||||
to i386 calling convention, The callee should remove the first
|
||||
hidden parameter.Thank Mr. John for this patch.
|
||||
|
||||
====================================================================
|
||||
Version 0.1 alpha2.3
|
||||
5-Sep-2011
|
||||
|
||||
x86/x86_64:
|
||||
* Added DTB_ENTRIES into dynamic arch setting parameters. Now,
|
||||
it can read DTB_ENTRIES on runtime. (Refs issue #55 on github)
|
||||
|
||||
====================================================================
|
||||
Version 0.1 alpha2.2
|
||||
14-Jul-2011
|
||||
|
||||
common:
|
||||
* Fixed a building bug when DYNAMIC_ARCH=1 & INTERFACE64=1.
|
||||
(Refs issue #44 on github)
|
||||
|
||||
====================================================================
|
||||
Version 0.1 alpha2.1
|
||||
28-Jun-2011
|
||||
|
||||
common:
|
||||
* Stop the build and output the error message when detecting
|
||||
fortran compiler failed. (Refs issue #42 on github)
|
||||
|
||||
====================================================================
|
||||
Version 0.1 alpha2
|
||||
23-Jun-2011
|
||||
|
||||
common:
|
||||
* Fixed blasint undefined bug in <cblas.h> file. Other software
|
||||
could include this header successfully(Refs issue #13 on github)
|
||||
* Fixed the SEGFAULT bug on 64 cores. On SMP server, the number
|
||||
of CPUs or cores should be less than or equal to 64.(Refs issue #14
|
||||
on github)
|
||||
* Support "void goto_set_num_threads(int num_threads)" and "void
|
||||
openblas_set_num_threads(int num_threads)" when USE_OPENMP=1
|
||||
* Added extern "C" to support C++. Thank Tasio for the patch(Refs
|
||||
issue #21 on github)
|
||||
* Provided an error message when the arch is not supported.(Refs
|
||||
issue #19 on github)
|
||||
* Fixed issue #23. Fixed a bug of f_check script about generating link flags.
|
||||
* Added openblas_set_num_threads for Fortran.
|
||||
* Fixed #25 a wrong result of rotmg.
|
||||
* Fixed a bug about detecting underscore prefix in c_check.
|
||||
* Print the wall time (cycles) with enabling FUNCTION_PROFILE
|
||||
* Fixed #35 a build bug with NO_LAPACK=1 & DYNAMIC_ARCH=1
|
||||
* Added install target. You can use "make install". (Refs #20)
|
||||
|
||||
|
||||
x86/x86_64:
|
||||
* Fixed #28 a wrong result of dsdot on x86_64.
|
||||
* Fixed #32 a SEGFAULT bug of zdotc with gcc-4.6.
|
||||
* Fixed #33 ztrmm bug on Nehalem.
|
||||
* Walk round #27 the low performance axpy issue with small imput size & multithreads.
|
||||
|
||||
MIPS64:
|
||||
* Fixed #28 a wrong result of dsdot on Loongson3A/MIPS64.
|
||||
* Optimized single/double precision BLAS Level3 on Loongson3A/MIPS64. (Refs #2)
|
||||
* Optimized single/double precision axpy function on Loongson3A/MIPS64. (Refs #3)
|
||||
|
||||
====================================================================
|
||||
Version 0.1 alpha1
|
||||
20-Mar-2011
|
||||
|
||||
@@ -90,6 +90,15 @@
|
||||
number of threads will consume extra resource. I recommend you to
|
||||
specify minimum number of threads.
|
||||
|
||||
1.9 Q I have segfaults when I compile with USE_OPENMP=1. What's wrong?
|
||||
|
||||
A This may be related to a bug in the Linux kernel 2.6.32. Try applying
|
||||
the patch segaults.patch using
|
||||
|
||||
patch < segfaults.patch
|
||||
|
||||
and see if the crashes persist. Note that this patch will lead to many
|
||||
compiler warnings.
|
||||
|
||||
2. Architecture Specific issue or Implementation
|
||||
|
||||
|
||||
2
LICENSE
2
LICENSE
@@ -1,4 +1,4 @@
|
||||
Copyright (c) 2011, Lab of Parallel Software and Computational Science,ICSAS
|
||||
Copyright (c) 2011,2012 Lab of Parallel Software and Computational Science,ISCAS
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
|
||||
225
Makefile
225
Makefile
@@ -3,7 +3,7 @@ include ./Makefile.system
|
||||
|
||||
BLASDIRS = interface driver/level2 driver/level3 driver/others
|
||||
|
||||
ifndef DYNAMIC_ARCH
|
||||
ifneq ($(DYNAMIC_ARCH), 1)
|
||||
BLASDIRS += kernel
|
||||
endif
|
||||
|
||||
@@ -22,12 +22,12 @@ endif
|
||||
|
||||
SUBDIRS_ALL = $(SUBDIRS) test ctest utest exports benchmark ../laswp ../bench
|
||||
|
||||
.PHONY : all libs netlib test ctest shared
|
||||
.NOTPARALLEL : all libs prof lapack-test
|
||||
.PHONY : all libs netlib test ctest shared install
|
||||
.NOTPARALLEL : all libs prof lapack-test install
|
||||
|
||||
all :: libs netlib tests shared
|
||||
@echo
|
||||
@echo " GotoBLAS build complete."
|
||||
@echo " OpenBLAS build complete. ($(LIB_COMPONENTS))"
|
||||
@echo
|
||||
@echo " OS ... $(OSNAME) "
|
||||
@echo " Architecture ... $(ARCH) "
|
||||
@@ -35,9 +35,14 @@ ifndef BINARY64
|
||||
@echo " BINARY ... 32bit "
|
||||
else
|
||||
@echo " BINARY ... 64bit "
|
||||
endif
|
||||
ifdef INTERFACE64
|
||||
@echo " Use 64 bits int (equivalent to \"-i8\" in Fortran) "
|
||||
endif
|
||||
@echo " C compiler ... $(C_COMPILER) (command line : $(CC))"
|
||||
ifndef NOFORTRAN
|
||||
@echo " Fortran compiler ... $(F_COMPILER) (command line : $(FC))"
|
||||
endif
|
||||
ifneq ($(OSNAME), AIX)
|
||||
@echo -n " Library Name ... $(LIBNAME)"
|
||||
else
|
||||
@@ -49,32 +54,54 @@ ifndef SMP
|
||||
else
|
||||
@echo " (Multi threaded; Max num-threads is $(NUM_THREADS))"
|
||||
endif
|
||||
|
||||
ifeq ($(USE_OPENMP), 1)
|
||||
@echo
|
||||
@echo " Use OpenMP in the multithreading. Becasue of ignoring OPENBLAS_NUM_THREADS and GOTO_NUM_THREADS flags, "
|
||||
@echo " you should use OMP_NUM_THREADS environment variable to control the number of threads."
|
||||
@echo
|
||||
endif
|
||||
|
||||
ifeq ($(OSNAME), Darwin)
|
||||
@echo "WARNING: If you plan to use the dynamic library $(LIBDYNNAME), you must run:"
|
||||
@echo
|
||||
@echo "\"make PREFIX=/your_installation_path/ install\"."
|
||||
@echo
|
||||
@echo "(or set PREFIX in Makefile.rule and run make install."
|
||||
@echo "If you want to move the .dylib to a new location later, make sure you change"
|
||||
@echo "the internal name of the dylib with:"
|
||||
@echo
|
||||
@echo "install_name_tool -id /new/absolute/path/to/$(LIBDYNNAME) $(LIBDYNNAME)"
|
||||
endif
|
||||
@echo
|
||||
@echo "To install the library, you can run \"make PREFIX=/path/to/your/installation install\"."
|
||||
@echo
|
||||
|
||||
shared :
|
||||
ifndef NO_SHARED
|
||||
ifeq ($(OSNAME), Linux)
|
||||
$(MAKE) -C exports so
|
||||
-ln -fs $(LIBSONAME) libopenblas.so
|
||||
@$(MAKE) -C exports so
|
||||
@-ln -fs $(LIBSONAME) $(LIBPREFIX).so
|
||||
@-ln -fs $(LIBSONAME) $(LIBPREFIX).so.$(MAJOR_VERSION)
|
||||
endif
|
||||
ifeq ($(OSNAME), FreeBSD)
|
||||
$(MAKE) -C exports so
|
||||
-ln -fs $(LIBSONAME) libopenblas.so
|
||||
@$(MAKE) -C exports so
|
||||
@-ln -fs $(LIBSONAME) $(LIBPREFIX).so
|
||||
endif
|
||||
ifeq ($(OSNAME), NetBSD)
|
||||
$(MAKE) -C exports so
|
||||
-ln -fs $(LIBSONAME) libopenblas.so
|
||||
@$(MAKE) -C exports so
|
||||
@-ln -fs $(LIBSONAME) $(LIBPREFIX).so
|
||||
endif
|
||||
ifeq ($(OSNAME), Darwin)
|
||||
$(MAKE) -C exports dyn
|
||||
-ln -fs $(LIBDYNNAME) libopenblas.dylib
|
||||
@$(MAKE) -C exports dyn
|
||||
@-ln -fs $(LIBDYNNAME) $(LIBPREFIX).dylib
|
||||
endif
|
||||
ifeq ($(OSNAME), WINNT)
|
||||
$(MAKE) -C exports dll
|
||||
# -ln -fs $(LIBDLLNAME) libopenblas.dll
|
||||
@$(MAKE) -C exports dll
|
||||
endif
|
||||
ifeq ($(OSNAME), CYGWIN_NT)
|
||||
$(MAKE) -C exports dll
|
||||
-ln -fs $(LIBDLLNAME) libopenblas.dll
|
||||
@$(MAKE) -C exports dll
|
||||
endif
|
||||
endif
|
||||
|
||||
tests :
|
||||
@@ -96,34 +123,55 @@ endif
|
||||
endif
|
||||
|
||||
libs :
|
||||
-ln -fs $(LIBNAME) libopenblas.$(LIBSUFFIX)
|
||||
for d in $(SUBDIRS) ; \
|
||||
ifeq ($(CORE), UNKOWN)
|
||||
$(error OpenBLAS: Detecting CPU failed. Please set TARGET explicitly, e.g. make TARGET=your_cpu_target. Please read README for the detail.)
|
||||
endif
|
||||
ifeq ($(NOFORTRAN), 1)
|
||||
$(error OpenBLAS: Detecting fortran compiler failed. Please install fortran compiler, e.g. gfortran, ifort, openf90.)
|
||||
endif
|
||||
@-ln -fs $(LIBNAME) $(LIBPREFIX).$(LIBSUFFIX)
|
||||
@for d in $(SUBDIRS) ; \
|
||||
do if test -d $$d; then \
|
||||
$(MAKE) -C $$d $(@F) || exit 1 ; \
|
||||
fi; \
|
||||
done
|
||||
ifdef DYNAMIC_ARCH
|
||||
$(MAKE) -C kernel commonlibs || exit 1
|
||||
for d in $(DYNAMIC_CORE) ; \
|
||||
#Save the config files for installation
|
||||
@cp Makefile.conf Makefile.conf_last
|
||||
@cp config.h config_last.h
|
||||
ifdef QUAD_PRECISION
|
||||
@echo "#define QUAD_PRECISION">> config_last.h
|
||||
endif
|
||||
ifeq ($(EXPRECISION), 1)
|
||||
@echo "#define EXPRECISION">> config_last.h
|
||||
endif
|
||||
##
|
||||
ifeq ($(DYNAMIC_ARCH), 1)
|
||||
@$(MAKE) -C kernel commonlibs || exit 1
|
||||
@for d in $(DYNAMIC_CORE) ; \
|
||||
do $(MAKE) GOTOBLAS_MAKEFILE= -C kernel TARGET_CORE=$$d kernel || exit 1 ;\
|
||||
done
|
||||
@echo DYNAMIC_ARCH=1 >> Makefile.conf_last
|
||||
endif
|
||||
ifdef USE_THREAD
|
||||
@echo USE_THREAD=$(USE_THREAD) >> Makefile.conf_last
|
||||
endif
|
||||
@touch lib.grd
|
||||
|
||||
prof : prof_blas prof_lapack
|
||||
|
||||
prof_blas :
|
||||
ln -fs $(LIBNAME_P) libopenblas_p.$(LIBSUFFIX)
|
||||
ln -fs $(LIBNAME_P) $(LIBPREFIX)_p.$(LIBSUFFIX)
|
||||
for d in $(SUBDIRS) ; \
|
||||
do if test -d $$d; then \
|
||||
$(MAKE) -C $$d prof || exit 1 ; \
|
||||
fi; \
|
||||
done
|
||||
ifdef DYNAMIC_ARCH
|
||||
ifeq ($(DYNAMIC_ARCH), 1)
|
||||
$(MAKE) -C kernel commonprof || exit 1
|
||||
endif
|
||||
|
||||
blas :
|
||||
ln -fs $(LIBNAME) libopenblas.$(LIBSUFFIX)
|
||||
ln -fs $(LIBNAME) $(LIBPREFIX).$(LIBSUFFIX)
|
||||
for d in $(BLASDIRS) ; \
|
||||
do if test -d $$d; then \
|
||||
$(MAKE) -C $$d libs || exit 1 ; \
|
||||
@@ -131,13 +179,13 @@ blas :
|
||||
done
|
||||
|
||||
hpl :
|
||||
ln -fs $(LIBNAME) libopenblas.$(LIBSUFFIX)
|
||||
ln -fs $(LIBNAME) $(LIBPREFIX).$(LIBSUFFIX)
|
||||
for d in $(BLASDIRS) ../laswp exports ; \
|
||||
do if test -d $$d; then \
|
||||
$(MAKE) -C $$d $(@F) || exit 1 ; \
|
||||
fi; \
|
||||
done
|
||||
ifdef DYNAMIC_ARCH
|
||||
ifeq ($(DYNAMIC_ARCH), 1)
|
||||
$(MAKE) -C kernel commonlibs || exit 1
|
||||
for d in $(DYNAMIC_CORE) ; \
|
||||
do $(MAKE) GOTOBLAS_MAKEFILE= -C kernel TARGET_CORE=$$d kernel || exit 1 ;\
|
||||
@@ -145,7 +193,7 @@ ifdef DYNAMIC_ARCH
|
||||
endif
|
||||
|
||||
hpl_p :
|
||||
ln -fs $(LIBNAME_P) libopenblas_p.$(LIBSUFFIX)
|
||||
ln -fs $(LIBNAME_P) $(LIBPREFIX)_p.$(LIBSUFFIX)
|
||||
for d in $(SUBDIRS) ../laswp exports ; \
|
||||
do if test -d $$d; then \
|
||||
$(MAKE) -C $$d $(@F) || exit 1 ; \
|
||||
@@ -156,90 +204,123 @@ ifeq ($(NO_LAPACK), 1)
|
||||
netlib :
|
||||
|
||||
else
|
||||
netlib : lapack-3.1.1 patch.for_lapack-3.1.1 lapack-3.1.1/make.inc
|
||||
netlib : lapack_prebuild
|
||||
ifndef NOFORTRAN
|
||||
-@$(MAKE) -C lapack-3.1.1 lapacklib
|
||||
@$(MAKE) -C $(NETLIB_LAPACK_DIR) lapacklib
|
||||
endif
|
||||
ifndef NO_LAPACKE
|
||||
@$(MAKE) -C $(NETLIB_LAPACK_DIR) lapackelib
|
||||
endif
|
||||
endif
|
||||
|
||||
prof_lapack : lapack-3.1.1 lapack-3.1.1/make.inc
|
||||
-@$(MAKE) -C lapack-3.1.1 lapack_prof
|
||||
prof_lapack : lapack_prebuild
|
||||
@$(MAKE) -C $(NETLIB_LAPACK_DIR) lapack_prof
|
||||
|
||||
lapack-3.1.1/make.inc :
|
||||
lapack_prebuild :
|
||||
ifndef NOFORTRAN
|
||||
-@echo "FORTRAN = $(FC)" > lapack-3.1.1/make.inc
|
||||
-@echo "OPTS = $(FFLAGS)" >> lapack-3.1.1/make.inc
|
||||
-@echo "POPTS = $(FPFLAGS)" >> lapack-3.1.1/make.inc
|
||||
-@echo "NOOPT = $(FFLAGS) -O0" >> lapack-3.1.1/make.inc
|
||||
-@echo "PNOOPT = $(FPFLAGS) -O0" >> lapack-3.1.1/make.inc
|
||||
-@echo "LOADOPTS = $(FFLAGS) $(EXTRALIB)" >> lapack-3.1.1/make.inc
|
||||
-@echo "ARCH = $(AR)" >> lapack-3.1.1/make.inc
|
||||
-@echo "RANLIB = $(RANLIB)" >> lapack-3.1.1/make.inc
|
||||
-@echo "LAPACKLIB = ../$(LIBNAME)" >> lapack-3.1.1/make.inc
|
||||
-@echo "LAPACKLIB_P = ../$(LIBNAME_P)" >> lapack-3.1.1/make.inc
|
||||
-@echo "SUFFIX = $(SUFFIX)" >> lapack-3.1.1/make.inc
|
||||
-@echo "PSUFFIX = $(PSUFFIX)" >> lapack-3.1.1/make.inc
|
||||
# -@echo "CEXTRALIB = $(CEXTRALIB)" >> lapack-3.1.1/make.inc
|
||||
-@cat make.inc >> lapack-3.1.1/make.inc
|
||||
-@echo "FORTRAN = $(FC)" > $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@echo "OPTS = $(LAPACK_FFLAGS)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@echo "POPTS = $(LAPACK_FPFLAGS)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@echo "NOOPT = $(LAPACK_FFLAGS) -O0" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@echo "PNOOPT = $(LAPACK_FPFLAGS) -O0" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@echo "LOADOPTS = $(FFLAGS) $(EXTRALIB)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@echo "CC = $(CC)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@echo "override CFLAGS = $(LAPACK_CFLAGS)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@echo "ARCH = $(AR)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@echo "ARCHFLAGS = -ru" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@echo "RANLIB = $(RANLIB)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@echo "LAPACKLIB = ../$(LIBNAME)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@echo "LAPACKELIB = ../$(LIBNAME)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@echo "LAPACKLIB_P = ../$(LIBNAME_P)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@echo "SUFFIX = $(SUFFIX)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@echo "PSUFFIX = $(PSUFFIX)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@echo "CEXTRALIB = $(EXTRALIB)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@cat make.inc >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
endif
|
||||
|
||||
lapack-3.1.1 : lapack-3.1.1.tgz
|
||||
lapack-3.4.2 : lapack-3.4.2.tgz
|
||||
ifndef NOFORTRAN
|
||||
@if test `$(MD5SUM) lapack-3.1.1.tgz | $(AWK) '{print $$1}'` = 00b21551a899bcfbaa7b8443e1faeef9; then \
|
||||
ifndef NO_LAPACK
|
||||
@if test `$(MD5SUM) $< | $(AWK) '{print $$1}'` = 61bf1a8a4469d4bdb7604f5897179478; then \
|
||||
echo $(TAR) zxf $< ;\
|
||||
$(TAR) zxf $< && (cd lapack-3.1.1; $(PATCH) -p1 < ../patch.for_lapack-3.1.1) ;\
|
||||
$(TAR) zxf $< && (cd $(NETLIB_LAPACK_DIR); $(PATCH) -p1 < ../patch.for_lapack-3.4.2) ;\
|
||||
rm -f $(NETLIB_LAPACK_DIR)/lapacke/make.inc ;\
|
||||
else \
|
||||
echo " lapack-3.1.1.tgz check sum is wrong (Please use orignal)." ;\
|
||||
rm -rf lapack-3.1.1 ;\
|
||||
rm -rf $(NETLIB_LAPACK_DIR) ;\
|
||||
echo " Cannot download lapack-3.4.2.tgz or the MD5 check sum is wrong (Please use orignal)."; \
|
||||
exit 1; \
|
||||
fi
|
||||
endif
|
||||
endif
|
||||
|
||||
lapack-3.1.1.tgz :
|
||||
LAPACK_URL=http://www.netlib.org/lapack/lapack-3.4.2.tgz
|
||||
|
||||
lapack-3.4.2.tgz :
|
||||
ifndef NOFORTRAN
|
||||
-wget http://www.netlib.org/lapack/lapack-3.1.1.tgz
|
||||
#http://stackoverflow.com/questions/7656425/makefile-ifeq-logical-or
|
||||
ifeq ($(OSNAME), $(filter $(OSNAME),Darwin NetBSD))
|
||||
curl -O $(LAPACK_URL);
|
||||
else
|
||||
ifeq ($(OSNAME), FreeBSD)
|
||||
fetch $(LAPACK_URL);
|
||||
else
|
||||
wget -O $@ $(LAPACK_URL);
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
large.tgz :
|
||||
ifndef NOFORTRAN
|
||||
-wget http://www.netlib.org/lapack/timing/large.tgz
|
||||
if [ ! -a $< ]; then
|
||||
-wget http://www.netlib.org/lapack/timing/large.tgz;
|
||||
fi
|
||||
endif
|
||||
|
||||
timing.tgz :
|
||||
ifndef NOFORTRAN
|
||||
-wget http://www.netlib.org/lapack/timing/timing.tgz
|
||||
if [ ! -a $< ]; then
|
||||
-wget http://www.netlib.org/lapack/timing/timing.tgz;
|
||||
fi
|
||||
endif
|
||||
|
||||
lapack-timing : lapack-3.1.1 large.tgz timing.tgz
|
||||
lapack-timing : large.tgz timing.tgz
|
||||
ifndef NOFORTRAN
|
||||
(cd lapack-3.1.1; $(TAR) zxf ../timing.tgz TIMING)
|
||||
(cd lapack-3.1.1/TIMING; $(TAR) zxf ../../large.tgz )
|
||||
make -C lapack-3.1.1 tmglib
|
||||
make -C lapack-3.1.1/TIMING
|
||||
(cd $(NETLIB_LAPACK_DIR); $(TAR) zxf ../timing.tgz TIMING)
|
||||
(cd $(NETLIB_LAPACK_DIR)/TIMING; $(TAR) zxf ../../large.tgz )
|
||||
make -C $(NETLIB_LAPACK_DIR) tmglib
|
||||
make -C $(NETLIB_LAPACK_DIR)/TIMING
|
||||
endif
|
||||
|
||||
|
||||
lapack-test :
|
||||
$(MAKE) -C lapack-3.1.1 tmglib
|
||||
$(MAKE) -C lapack-3.1.1/TESTING xeigtstc xeigtstd xeigtsts xeigtstz xlintstc xlintstd xlintstds xlintsts xlintstz xlintstzc
|
||||
@rm -f lapack-3.1.1/TESTING/*.out
|
||||
$(MAKE) -j 1 -C lapack-3.1.1/TESTING
|
||||
$(GREP) failed lapack-3.1.1/TESTING/*.out
|
||||
$(MAKE) -C $(NETLIB_LAPACK_DIR) tmglib
|
||||
$(MAKE) -C $(NETLIB_LAPACK_DIR)/TESTING xeigtstc xeigtstd xeigtsts xeigtstz xlintstc xlintstd xlintstds xlintsts xlintstz xlintstzc
|
||||
@rm -f $(NETLIB_LAPACK_DIR)/TESTING/*.out
|
||||
$(MAKE) -j 1 -C $(NETLIB_LAPACK_DIR)/TESTING
|
||||
$(GREP) failed $(NETLIB_LAPACK_DIR)/TESTING/*.out
|
||||
|
||||
dummy :
|
||||
|
||||
install :
|
||||
$(MAKE) -f Makefile.install install
|
||||
|
||||
clean ::
|
||||
@for d in $(SUBDIRS_ALL) ; \
|
||||
do if test -d $$d; then \
|
||||
$(MAKE) -C $$d $(@F) || exit 1 ; \
|
||||
fi; \
|
||||
done
|
||||
ifdef DYNAMIC_ARCH
|
||||
#ifdef DYNAMIC_ARCH
|
||||
@$(MAKE) -C kernel clean
|
||||
#endif
|
||||
@$(MAKE) -C reference clean
|
||||
@rm -f *.$(LIBSUFFIX) *.so *~ *.exe getarch getarch_2nd *.dll *.lib *.$(SUFFIX) *.dwf $(LIBPREFIX).$(LIBSUFFIX) $(LIBPREFIX)_p.$(LIBSUFFIX) $(LIBPREFIX).so.$(MAJOR_VERSION) *.lnk myconfig.h
|
||||
ifeq ($(OSNAME), Darwin)
|
||||
@rm -rf getarch.dSYM getarch_2nd.dSYM
|
||||
endif
|
||||
@rm -f *.$(LIBSUFFIX) *.so *~ *.exe getarch getarch_2nd *.dll *.lib *.$(SUFFIX) *.dwf libopenblas.$(LIBSUFFIX) libopenblas_p.$(LIBSUFFIX) *.lnk myconfig.h
|
||||
@rm -f Makefile.conf config.h Makefile_kernel.conf config_kernel.h st* *.dylib
|
||||
@if test -d lapack-3.1.1; then \
|
||||
echo deleting lapack-3.1.1; \
|
||||
rm -rf lapack-3.1.1 ;\
|
||||
fi
|
||||
@echo Done.
|
||||
@touch $(NETLIB_LAPACK_DIR)/make.inc
|
||||
@$(MAKE) -C $(NETLIB_LAPACK_DIR) clean
|
||||
@rm -f $(NETLIB_LAPACK_DIR)/make.inc $(NETLIB_LAPACK_DIR)/lapacke/include/lapacke_mangling.h
|
||||
@rm -f *.grd Makefile.conf_last config_last.h
|
||||
@echo Done.
|
||||
|
||||
12
Makefile.arm
Normal file
12
Makefile.arm
Normal file
@@ -0,0 +1,12 @@
|
||||
|
||||
ifeq ($(CORE), ARMV7)
|
||||
CCOMMON_OPT += -marm -mfpu=vfpv3 -mfloat-abi=hard -march=armv7-a
|
||||
FCOMMON_OPT += -marm -mfpu=vfpv3 -mfloat-abi=hard -march=armv7-a
|
||||
endif
|
||||
|
||||
ifeq ($(CORE), ARMV6)
|
||||
CCOMMON_OPT += -marm -mfpu=vfp -mfloat-abi=hard -march=armv6
|
||||
FCOMMON_OPT += -marm -mfpu=vfp -mfloat-abi=hard -march=armv6
|
||||
endif
|
||||
|
||||
|
||||
7
Makefile.arm64
Normal file
7
Makefile.arm64
Normal file
@@ -0,0 +1,7 @@
|
||||
|
||||
ifeq ($(CORE), ARMV8)
|
||||
CCOMMON_OPT += -march=armv8-a
|
||||
FCOMMON_OPT += -march=armv8-a
|
||||
endif
|
||||
|
||||
|
||||
@@ -1,6 +1 @@
|
||||
COPT = -Wall -O2 # -DGEMMTEST
|
||||
ifdef BINARY64
|
||||
else
|
||||
# LDFLAGS = -m elf32ppc
|
||||
LDFLAGS = -m elf_i386
|
||||
endif
|
||||
|
||||
89
Makefile.install
Normal file
89
Makefile.install
Normal file
@@ -0,0 +1,89 @@
|
||||
TOPDIR = .
|
||||
export GOTOBLAS_MAKEFILE = 1
|
||||
-include $(TOPDIR)/Makefile.conf_last
|
||||
include ./Makefile.system
|
||||
|
||||
PREFIX ?= /opt/OpenBLAS
|
||||
|
||||
OPENBLAS_INCLUDE_DIR := $(PREFIX)/include
|
||||
OPENBLAS_LIBRARY_DIR := $(PREFIX)/lib
|
||||
OPENBLAS_BUILD_DIR := $(CURDIR)
|
||||
|
||||
.PHONY : install
|
||||
.NOTPARALLEL : install
|
||||
|
||||
lib.grd :
|
||||
$(error OpenBLAS: Please run "make" firstly)
|
||||
|
||||
install : lib.grd
|
||||
@-mkdir -p $(DESTDIR)$(PREFIX)
|
||||
@-mkdir -p $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)
|
||||
@-mkdir -p $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)
|
||||
@echo Generating openblas_config.h in $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)
|
||||
#for inc
|
||||
@echo \#ifndef OPENBLAS_CONFIG_H > $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/openblas_config.h
|
||||
@echo \#define OPENBLAS_CONFIG_H >> $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/openblas_config.h
|
||||
@awk '{print $$1, "OPENBLAS_"$$2, $$3}' config_last.h >> $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/openblas_config.h
|
||||
@echo \#define OPENBLAS_VERSION \" OpenBLAS $(VERSION) \" >> $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/openblas_config.h
|
||||
@cat openblas_config_template.h >> $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/openblas_config.h
|
||||
@echo \#endif \/\* OPENBLAS_CONFIG_H \*\/ >> $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/openblas_config.h
|
||||
|
||||
@echo Generating f77blas.h in $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)
|
||||
@echo \#ifndef OPENBLAS_F77BLAS_H > $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/f77blas.h
|
||||
@echo \#define OPENBLAS_F77BLAS_H >> $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/f77blas.h
|
||||
@echo \#include \"openblas_config.h\" >> $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/f77blas.h
|
||||
@cat common_interface.h >> $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/f77blas.h
|
||||
@echo \#endif >> $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/f77blas.h
|
||||
|
||||
ifndef NO_CBLAS
|
||||
@echo Generating cblas.h in $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)
|
||||
@sed 's/common/openblas_config/g' cblas.h > $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/cblas.h
|
||||
endif
|
||||
|
||||
ifndef NO_LAPACKE
|
||||
@echo Copying LAPACKE header files to $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)
|
||||
@-install -pDm644 $(NETLIB_LAPACK_DIR)/lapacke/include/lapacke.h $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke.h
|
||||
@-install -pDm644 $(NETLIB_LAPACK_DIR)/lapacke/include/lapacke_config.h $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke_config.h
|
||||
@-install -pDm644 $(NETLIB_LAPACK_DIR)/lapacke/include/lapacke_mangling_with_flags.h $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke_mangling.h
|
||||
@-install -pDm644 $(NETLIB_LAPACK_DIR)/lapacke/include/lapacke_utils.h $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke_utils.h
|
||||
endif
|
||||
|
||||
#for install static library
|
||||
@echo Copying the static library to $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)
|
||||
@install -pm644 $(LIBNAME) $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)
|
||||
@cd $(DESTDIR)$(OPENBLAS_LIBRARY_DIR) ; \
|
||||
ln -fs $(LIBNAME) $(LIBPREFIX).$(LIBSUFFIX)
|
||||
#for install shared library
|
||||
ifndef NO_SHARED
|
||||
@echo Copying the shared library to $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)
|
||||
ifeq ($(OSNAME), Linux)
|
||||
@install -pm755 $(LIBSONAME) $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)
|
||||
@cd $(DESTDIR)$(OPENBLAS_LIBRARY_DIR) ; \
|
||||
ln -fs $(LIBSONAME) $(LIBPREFIX).so ; \
|
||||
ln -fs $(LIBSONAME) $(LIBPREFIX).so.$(MAJOR_VERSION)
|
||||
endif
|
||||
ifeq ($(OSNAME), FreeBSD)
|
||||
@cp $(LIBSONAME) $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)
|
||||
@cd $(DESTDIR)$(OPENBLAS_LIBRARY_DIR) ; \
|
||||
ln -fs $(LIBSONAME) $(LIBPREFIX).so
|
||||
endif
|
||||
ifeq ($(OSNAME), NetBSD)
|
||||
@cp $(LIBSONAME) $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)
|
||||
@cd $(DESTDIR)$(OPENBLAS_LIBRARY_DIR) ; \
|
||||
ln -fs $(LIBSONAME) $(LIBPREFIX).so
|
||||
endif
|
||||
ifeq ($(OSNAME), Darwin)
|
||||
@-cp $(LIBDYNNAME) $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)
|
||||
@-install_name_tool -id $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)/$(LIBDYNNAME) $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)/$(LIBDYNNAME)
|
||||
@-ln -fs $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)/$(LIBDYNNAME) $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)/$(LIBPREFIX).dylib
|
||||
endif
|
||||
ifeq ($(OSNAME), WINNT)
|
||||
@-cp $(LIBDLLNAME) $(OPENBLAS_LIBRARY_DIR)
|
||||
endif
|
||||
ifeq ($(OSNAME), CYGWIN_NT)
|
||||
@-cp $(LIBDLLNAME) $(OPENBLAS_LIBRARY_DIR)
|
||||
endif
|
||||
endif
|
||||
|
||||
@echo Install OK!
|
||||
|
||||
@@ -17,13 +17,7 @@ endif
|
||||
endif
|
||||
|
||||
ifdef BINARY64
|
||||
ifeq ($(OSNAME), Linux)
|
||||
LDFLAGS = -m elf64ppc
|
||||
endif
|
||||
|
||||
ifeq ($(OSNAME), Darwin)
|
||||
LDFLAGS = -arch ppc64
|
||||
endif
|
||||
|
||||
ifeq ($(OSNAME), AIX)
|
||||
CCOMMON_OPT += -mpowerpc64 -maix64
|
||||
@@ -34,17 +28,12 @@ ifeq ($(COMPILER_F77), xlf)
|
||||
FCOMMON_OPT += -q64
|
||||
endif
|
||||
ARFLAGS = -X 64
|
||||
LDFLAGS = -b64
|
||||
ASFLAGS = -a64
|
||||
endif
|
||||
else
|
||||
ifeq ($(OSNAME), Linux)
|
||||
LDFLAGS = -m elf32ppc
|
||||
endif
|
||||
ifeq ($(OSNAME), AIX)
|
||||
CCOMMON_OPT += -Wa,-a32
|
||||
ARFLAGS = -X 32
|
||||
LDFLAGS = -b32
|
||||
ASFLAGS = -a32
|
||||
endif
|
||||
endif
|
||||
|
||||
@@ -1,3 +1,5 @@
|
||||
# This is triggered by Makefile.system and runs before any of the code is built.
|
||||
|
||||
export BINARY
|
||||
export USE_OPENMP
|
||||
|
||||
@@ -21,7 +23,17 @@ all: getarch_2nd
|
||||
|
||||
config.h : c_check f_check getarch
|
||||
perl ./c_check $(TARGET_MAKE) $(TARGET_CONF) $(CC)
|
||||
ifneq ($(ONLY_CBLAS), 1)
|
||||
perl ./f_check $(TARGET_MAKE) $(TARGET_CONF) $(FC)
|
||||
else
|
||||
#When we only build CBLAS, we set NOFORTRAN=2
|
||||
echo "NOFORTRAN=2" >> $(TARGET_MAKE)
|
||||
echo "NO_FBLAS=1" >> $(TARGET_MAKE)
|
||||
echo "F_COMPILER=GFORTRAN" >> $(TARGET_MAKE)
|
||||
echo "BU=_" >> $(TARGET_MAKE)
|
||||
echo "#define BUNDERSCORE _" >> $(TARGET_CONF)
|
||||
echo "#define NEEDBUNDERSCORE 1" >> $(TARGET_CONF)
|
||||
endif
|
||||
./getarch 0 >> $(TARGET_MAKE)
|
||||
./getarch 1 >> $(TARGET_CONF)
|
||||
|
||||
@@ -3,7 +3,12 @@
|
||||
#
|
||||
|
||||
# This library's version
|
||||
VERSION = 0.1alpha1
|
||||
VERSION = 0.2.8
|
||||
|
||||
# If you set the suffix, the library name will be libopenblas_$(LIBNAMESUFFIX).a
|
||||
# and libopenblas_$(LIBNAMESUFFIX).so. Meanwhile, the soname in shared library
|
||||
# is libopenblas_$(LIBNAMESUFFIX).so.0.
|
||||
# LIBNAMESUFFIX = omp
|
||||
|
||||
# You can specify the target architecture, otherwise it's
|
||||
# automatically detected.
|
||||
@@ -19,10 +24,13 @@ VERSION = 0.1alpha1
|
||||
# Fortran compiler. Default is g77.
|
||||
# FC = gfortran
|
||||
|
||||
# Even you can specify cross compiler
|
||||
# Even you can specify cross compiler. Meanwhile, please set HOSTCC.
|
||||
# CC = x86_64-w64-mingw32-gcc
|
||||
# FC = x86_64-w64-mingw32-gfortran
|
||||
|
||||
# If you use the cross compiler, please set this host compiler.
|
||||
# HOSTCC = gcc
|
||||
|
||||
# If you need 32bit binary, define BINARY=32, otherwise define BINARY=64
|
||||
# BINARY=64
|
||||
|
||||
@@ -40,12 +48,23 @@ VERSION = 0.1alpha1
|
||||
# automatically detected by the the script.
|
||||
# NUM_THREADS = 24
|
||||
|
||||
# if you don't need generate the shared library, please comment it in.
|
||||
# NO_SHARED = 1
|
||||
|
||||
# If you don't need CBLAS interface, please comment it in.
|
||||
# NO_CBLAS = 1
|
||||
|
||||
# If you don't need LAPACK, please comment it in.
|
||||
# If you only want CBLAS interface without installing Fortran compiler,
|
||||
# please comment it in.
|
||||
# ONLY_CBLAS = 1
|
||||
|
||||
# If you don't need LAPACK, please comment it in.
|
||||
# If you set NO_LAPACK=1, the library automatically sets NO_LAPACKE=1.
|
||||
# NO_LAPACK = 1
|
||||
|
||||
# If you don't need LAPACKE (C Interface to LAPACK), please comment it in.
|
||||
# NO_LAPACKE = 1
|
||||
|
||||
# If you want to use legacy threaded Level 3 implementation.
|
||||
# USE_SIMPLE_THREADED_LEVEL3 = 1
|
||||
|
||||
@@ -62,6 +81,13 @@ VERSION = 0.1alpha1
|
||||
# If you want to disable CPU/Memory affinity on Linux.
|
||||
# NO_AFFINITY = 1
|
||||
|
||||
# Don't use AVX kernel on Sandy Bridge. It is compatible with old compilers
|
||||
# and OS. However, the performance is low.
|
||||
# NO_AVX = 1
|
||||
|
||||
# Don't use parallel make.
|
||||
# NO_PARALLEL_MAKE = 1
|
||||
|
||||
# If you would like to know minute performance report of GotoBLAS.
|
||||
# FUNCTION_PROFILE = 1
|
||||
|
||||
@@ -83,6 +109,11 @@ VERSION = 0.1alpha1
|
||||
# If you need to synchronize FP CSR between threads (for x86/x86_64 only).
|
||||
# CONSISTENT_FPCSR = 1
|
||||
|
||||
# If any gemm arguement m, n or k is less or equal this threshold, gemm will be execute
|
||||
# with single thread. You can use this flag to avoid the overhead of multi-threading
|
||||
# in small matrix sizes. The default value is 4.
|
||||
# GEMM_MULTITHREAD_THRESHOLD = 4
|
||||
|
||||
# If you need santy check by comparing reference BLAS. It'll be very
|
||||
# slow (Not implemented yet).
|
||||
# SANITY_CHECK = 1
|
||||
@@ -91,19 +122,19 @@ VERSION = 0.1alpha1
|
||||
# SANITY_CHECK to compare the result with reference BLAS.
|
||||
# UTEST_CHECK = 1
|
||||
|
||||
# Common Optimization Flag; -O2 is enough.
|
||||
# DEBUG = 1
|
||||
# The installation directory.
|
||||
# PREFIX = /opt/OpenBLAS
|
||||
|
||||
ifeq ($(DEBUG), 1)
|
||||
COMMON_OPT += -g
|
||||
# -DDEBUG
|
||||
else
|
||||
COMMON_OPT += -O2
|
||||
endif
|
||||
# Common Optimization Flag;
|
||||
# The default -O2 is enough.
|
||||
# COMMON_OPT = -O2
|
||||
|
||||
# Profiling flags
|
||||
COMMON_PROF = -pg
|
||||
|
||||
# Build Debug version
|
||||
# DEBUG = 1
|
||||
|
||||
#
|
||||
# End of user configuration
|
||||
#
|
||||
|
||||
@@ -10,7 +10,6 @@ endif
|
||||
ifeq ($(COMPILER_F77), f90)
|
||||
FCOMMON_OPT += -xarch=v9
|
||||
endif
|
||||
LDFLAGS = -64
|
||||
else
|
||||
|
||||
CCOMMON_OPT += -mcpu=v9
|
||||
|
||||
331
Makefile.system
331
Makefile.system
@@ -9,8 +9,24 @@ ifndef TOPDIR
|
||||
TOPDIR = .
|
||||
endif
|
||||
|
||||
NETLIB_LAPACK_DIR = $(TOPDIR)/lapack-netlib
|
||||
|
||||
# Default C compiler
|
||||
# - Only set if not specified on the command line or inherited from the environment.
|
||||
# - CC is an implicit variable so neither '?=' or 'ifndef' can be used.
|
||||
# http://stackoverflow.com/questions/4029274/mingw-and-make-variables
|
||||
# - Default value is 'cc' which is not always a valid command (e.g. MinGW).
|
||||
ifeq ($(origin CC),default)
|
||||
CC = gcc
|
||||
# Change the default compile to clang on Mac OSX.
|
||||
# http://stackoverflow.com/questions/714100/os-detecting-makefile
|
||||
UNAME_S := $(shell uname -s)
|
||||
ifeq ($(UNAME_S),Darwin)
|
||||
CC = clang
|
||||
endif
|
||||
endif
|
||||
|
||||
# Default Fortran compiler (FC) is selected by f_check.
|
||||
|
||||
ifndef MAKEFILE_RULE
|
||||
include $(TOPDIR)/Makefile.rule
|
||||
@@ -27,7 +43,50 @@ HOSTCC = $(CC)
|
||||
endif
|
||||
|
||||
ifdef TARGET
|
||||
GETARCH_FLAGS += -DFORCE_$(TARGET)
|
||||
GETARCH_FLAGS := -DFORCE_$(TARGET)
|
||||
endif
|
||||
|
||||
#TARGET_CORE will override TARGET which is used in DYNAMIC_ARCH=1.
|
||||
#
|
||||
ifdef TARGET_CORE
|
||||
GETARCH_FLAGS := -DFORCE_$(TARGET_CORE)
|
||||
endif
|
||||
|
||||
ifdef INTERFACE64
|
||||
GETARCH_FLAGS += -DUSE64BITINT
|
||||
endif
|
||||
|
||||
ifndef GEMM_MULTITHREAD_THRESHOLD
|
||||
GEMM_MULTITHREAD_THRESHOLD=4
|
||||
endif
|
||||
GETARCH_FLAGS += -DGEMM_MULTITHREAD_THRESHOLD=$(GEMM_MULTITHREAD_THRESHOLD)
|
||||
|
||||
ifeq ($(NO_AVX), 1)
|
||||
GETARCH_FLAGS += -DNO_AVX
|
||||
endif
|
||||
|
||||
ifeq ($(DEBUG), 1)
|
||||
GETARCH_FLAGS += -g
|
||||
endif
|
||||
|
||||
ifeq ($(QUIET_MAKE), 1)
|
||||
MAKE += -s
|
||||
endif
|
||||
|
||||
ifndef NO_PARALLEL_MAKE
|
||||
NO_PARALLEL_MAKE=0
|
||||
endif
|
||||
GETARCH_FLAGS += -DNO_PARALLEL_MAKE=$(NO_PARALLEL_MAKE)
|
||||
|
||||
ifeq ($(HOSTCC), loongcc)
|
||||
GETARCH_FLAGS += -static
|
||||
endif
|
||||
|
||||
#if don't use Fortran, it will only compile CBLAS.
|
||||
ifeq ($(ONLY_CBLAS), 1)
|
||||
NO_LAPACK = 1
|
||||
else
|
||||
ONLY_CBLAS = 0
|
||||
endif
|
||||
|
||||
# This operation is expensive, so execution should be once.
|
||||
@@ -35,7 +94,7 @@ ifndef GOTOBLAS_MAKEFILE
|
||||
export GOTOBLAS_MAKEFILE = 1
|
||||
|
||||
# Generating Makefile.conf and config.h
|
||||
DUMMY := $(shell $(MAKE) -C $(TOPDIR) -f Makefile.getarch CC="$(CC)" FC="$(FC)" HOSTCC="$(HOSTCC)" CFLAGS=$(GETARCH_FLAGS) BINARY=$(BINARY) USE_OPENMP=$(USE_OPENMP) TARGET_CORE=$(TARGET_CORE) all)
|
||||
DUMMY := $(shell $(MAKE) -C $(TOPDIR) -f Makefile.prebuild CC="$(CC)" FC="$(FC)" HOSTCC="$(HOSTCC)" CFLAGS="$(GETARCH_FLAGS)" BINARY=$(BINARY) USE_OPENMP=$(USE_OPENMP) TARGET_CORE=$(TARGET_CORE) ONLY_CBLAS=$(ONLY_CBLAS) all)
|
||||
|
||||
ifndef TARGET_CORE
|
||||
include $(TOPDIR)/Makefile.conf
|
||||
@@ -85,8 +144,16 @@ DLLWRAP = $(CROSS_SUFFIX)dllwrap
|
||||
#
|
||||
|
||||
ifeq ($(OSNAME), Darwin)
|
||||
EXTRALIB += -lSystemStubs
|
||||
export MACOSX_DEPLOYMENT_TARGET=10.2
|
||||
MD5SUM = md5 -r
|
||||
endif
|
||||
|
||||
ifeq ($(OSNAME), FreeBSD)
|
||||
MD5SUM = md5 -r
|
||||
endif
|
||||
|
||||
ifeq ($(OSNAME), NetBSD)
|
||||
MD5SUM = md5 -n
|
||||
endif
|
||||
|
||||
ifeq ($(OSNAME), Linux)
|
||||
@@ -105,7 +172,39 @@ EXTRALIB += -defaultlib:advapi32
|
||||
|
||||
SUFFIX = obj
|
||||
PSUFFIX = pobj
|
||||
LIBSUFFIX = lib
|
||||
LIBSUFFIX = a
|
||||
|
||||
ifeq ($(C_COMPILER), CLANG)
|
||||
CCOMMON_OPT += -DMS_ABI
|
||||
endif
|
||||
|
||||
ifeq ($(C_COMPILER), GCC)
|
||||
#Test for supporting MS_ABI
|
||||
GCCVERSIONGTEQ4 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 4)
|
||||
GCCVERSIONGT4 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \> 4)
|
||||
GCCMINORVERSIONGTEQ7 := $(shell expr `$(CC) -dumpversion | cut -f2 -d.` \>= 7)
|
||||
ifeq ($(GCCVERSIONGT4), 1)
|
||||
# GCC Majar version > 4
|
||||
# It is compatible with MSVC ABI.
|
||||
CCOMMON_OPT += -DMS_ABI
|
||||
endif
|
||||
|
||||
ifeq ($(GCCVERSIONGTEQ4), 1)
|
||||
ifeq ($(GCCMINORVERSIONGTEQ7), 1)
|
||||
# GCC Version >=4.7
|
||||
# It is compatible with MSVC ABI.
|
||||
CCOMMON_OPT += -DMS_ABI
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
# Ensure the correct stack alignment on Win32
|
||||
# http://permalink.gmane.org/gmane.comp.lib.openblas.general/97
|
||||
ifeq ($(ARCH), x86)
|
||||
CCOMMON_OPT += -mincoming-stack-boundary=2
|
||||
FCOMMON_OPT += -mincoming-stack-boundary=2
|
||||
endif
|
||||
|
||||
endif
|
||||
|
||||
ifeq ($(OSNAME), Interix)
|
||||
@@ -130,6 +229,11 @@ endif
|
||||
endif
|
||||
endif
|
||||
|
||||
# ifeq logical or
|
||||
ifeq ($(OSNAME), $(filter $(OSNAME),WINNT CYGWIN_NT Interix))
|
||||
OS_WINDOWS=1
|
||||
endif
|
||||
|
||||
ifdef QUAD_PRECISION
|
||||
CCOMMON_OPT += -DQUAD_PRECISION
|
||||
NO_EXPRECISION = 1
|
||||
@@ -160,11 +264,17 @@ NO_BINARY_MODE = 1
|
||||
endif
|
||||
ifndef NO_EXPRECISION
|
||||
ifeq ($(F_COMPILER), GFORTRAN)
|
||||
ifeq ($(C_COMPILER), GCC)
|
||||
# ifeq logical or. GCC or LSB
|
||||
ifeq ($(C_COMPILER), $(filter $(C_COMPILER),GCC LSB))
|
||||
EXPRECISION = 1
|
||||
CCOMMON_OPT += -DEXPRECISION -m128bit-long-double
|
||||
FCOMMON_OPT += -m128bit-long-double
|
||||
endif
|
||||
ifeq ($(C_COMPILER), CLANG)
|
||||
EXPRECISION = 1
|
||||
CCOMMON_OPT += -DEXPRECISION
|
||||
FCOMMON_OPT += -m128bit-long-double
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
@@ -172,11 +282,17 @@ endif
|
||||
ifeq ($(ARCH), x86_64)
|
||||
ifndef NO_EXPRECISION
|
||||
ifeq ($(F_COMPILER), GFORTRAN)
|
||||
ifeq ($(C_COMPILER), GCC)
|
||||
# ifeq logical or. GCC or LSB
|
||||
ifeq ($(C_COMPILER), $(filter $(C_COMPILER),GCC LSB))
|
||||
EXPRECISION = 1
|
||||
CCOMMON_OPT += -DEXPRECISION -m128bit-long-double
|
||||
FCOMMON_OPT += -m128bit-long-double
|
||||
endif
|
||||
ifeq ($(C_COMPILER), CLANG)
|
||||
EXPRECISION = 1
|
||||
CCOMMON_OPT += -DEXPRECISION
|
||||
FCOMMON_OPT += -m128bit-long-double
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
@@ -185,8 +301,14 @@ ifeq ($(C_COMPILER), INTEL)
|
||||
CCOMMON_OPT += -wd981
|
||||
endif
|
||||
|
||||
ifdef USE_OPENMP
|
||||
ifeq ($(C_COMPILER), GCC)
|
||||
ifeq ($(USE_OPENMP), 1)
|
||||
# ifeq logical or. GCC or LSB
|
||||
ifeq ($(C_COMPILER), $(filter $(C_COMPILER),GCC LSB))
|
||||
CCOMMON_OPT += -fopenmp
|
||||
endif
|
||||
|
||||
ifeq ($(C_COMPILER), CLANG)
|
||||
$(error OpenBLAS: Clang didn't support OpenMP yet.)
|
||||
CCOMMON_OPT += -fopenmp
|
||||
endif
|
||||
|
||||
@@ -209,14 +331,20 @@ endif
|
||||
endif
|
||||
|
||||
|
||||
ifdef DYNAMIC_ARCH
|
||||
ifeq ($(DYNAMIC_ARCH), 1)
|
||||
ifeq ($(ARCH), x86)
|
||||
DYNAMIC_CORE = KATMAI COPPERMINE NORTHWOOD PRESCOTT BANIAS \
|
||||
CORE2 PENRYN DUNNINGTON NEHALEM ATHLON OPTERON OPTERON_SSE3 BARCELONA ATOM NANO
|
||||
CORE2 PENRYN DUNNINGTON NEHALEM ATHLON OPTERON OPTERON_SSE3 BARCELONA BOBCAT ATOM NANO
|
||||
ifneq ($(NO_AVX), 1)
|
||||
DYNAMIC_CORE += SANDYBRIDGE BULLDOZER PILEDRIVER HASWELL
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(ARCH), x86_64)
|
||||
DYNAMIC_CORE = PRESCOTT CORE2 PENRYN DUNNINGTON NEHALEM OPTERON OPTERON_SSE3 BARCELONA ATOM NANO
|
||||
DYNAMIC_CORE = PRESCOTT CORE2 PENRYN DUNNINGTON NEHALEM OPTERON OPTERON_SSE3 BARCELONA BOBCAT ATOM NANO
|
||||
ifneq ($(NO_AVX), 1)
|
||||
DYNAMIC_CORE += SANDYBRIDGE BULLDOZER PILEDRIVER HASWELL
|
||||
endif
|
||||
endif
|
||||
|
||||
ifndef DYNAMIC_CORE
|
||||
@@ -245,15 +373,35 @@ NO_BINARY_MODE = 1
|
||||
BINARY_DEFINED = 1
|
||||
endif
|
||||
|
||||
ifeq ($(ARCH), arm)
|
||||
NO_BINARY_MODE = 1
|
||||
BINARY_DEFINED = 1
|
||||
endif
|
||||
|
||||
ifeq ($(ARCH), arm64)
|
||||
NO_BINARY_MODE = 1
|
||||
BINARY_DEFINED = 1
|
||||
endif
|
||||
|
||||
|
||||
|
||||
|
||||
#
|
||||
# C Compiler dependent settings
|
||||
#
|
||||
|
||||
ifeq ($(C_COMPILER), GCC)
|
||||
|
||||
# ifeq logical or. GCC or CLANG or LSB
|
||||
# http://stackoverflow.com/questions/7656425/makefile-ifeq-logical-or
|
||||
ifeq ($(C_COMPILER), $(filter $(C_COMPILER),GCC CLANG LSB))
|
||||
CCOMMON_OPT += -Wall
|
||||
COMMON_PROF += -fno-inline
|
||||
NO_UNINITIALIZED_WARN = -Wno-uninitialized
|
||||
|
||||
ifeq ($(QUIET_MAKE), 1)
|
||||
CCOMMON_OPT += $(NO_UNINITIALIZED_WARN) -Wno-unused
|
||||
endif
|
||||
|
||||
ifdef NO_BINARY_MODE
|
||||
|
||||
ifeq ($(ARCH), mips64)
|
||||
@@ -265,7 +413,12 @@ endif
|
||||
BINARY_DEFINED = 1
|
||||
endif
|
||||
|
||||
ifeq ($(CORE), LOONGSON3A)
|
||||
ifeq ($(CORE), LOONGSON3A)
|
||||
CCOMMON_OPT += -march=mips64
|
||||
FCOMMON_OPT += -march=mips64
|
||||
endif
|
||||
|
||||
ifeq ($(CORE), LOONGSON3B)
|
||||
CCOMMON_OPT += -march=mips64
|
||||
FCOMMON_OPT += -march=mips64
|
||||
endif
|
||||
@@ -332,7 +485,11 @@ endif
|
||||
|
||||
ifeq ($(F_COMPILER), GFORTRAN)
|
||||
CCOMMON_OPT += -DF_INTERFACE_GFORT
|
||||
FCOMMON_OPT += -Wall
|
||||
FCOMMON_OPT += -Wall
|
||||
#Don't include -lgfortran, when NO_LAPACK=1 or lsbcc
|
||||
ifneq ($(NO_LAPACK), 1)
|
||||
EXTRALIB += -lgfortran
|
||||
endif
|
||||
ifdef NO_BINARY_MODE
|
||||
ifeq ($(ARCH), mips64)
|
||||
ifdef BINARY64
|
||||
@@ -439,11 +596,28 @@ ifdef INTERFACE64
|
||||
FCOMMON_OPT += -i8
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(ARCH), mips64)
|
||||
ifndef BINARY64
|
||||
FCOMMON_OPT += -n32
|
||||
else
|
||||
FCOMMON_OPT += -n64
|
||||
endif
|
||||
ifeq ($(CORE), LOONGSON3A)
|
||||
FCOMMON_OPT += -loongson3 -static
|
||||
endif
|
||||
|
||||
ifeq ($(CORE), LOONGSON3B)
|
||||
FCOMMON_OPT += -loongson3 -static
|
||||
endif
|
||||
|
||||
else
|
||||
ifndef BINARY64
|
||||
FCOMMON_OPT += -m32
|
||||
else
|
||||
FCOMMON_OPT += -m64
|
||||
endif
|
||||
endif
|
||||
|
||||
ifdef USE_OPENMP
|
||||
FEXTRALIB += -lstdc++
|
||||
@@ -452,12 +626,30 @@ endif
|
||||
endif
|
||||
|
||||
ifeq ($(C_COMPILER), OPEN64)
|
||||
|
||||
ifeq ($(ARCH), mips64)
|
||||
ifndef BINARY64
|
||||
CCOMMON_OPT += -n32
|
||||
else
|
||||
CCOMMON_OPT += -n64
|
||||
endif
|
||||
ifeq ($(CORE), LOONGSON3A)
|
||||
CCOMMON_OPT += -loongson3 -static
|
||||
endif
|
||||
|
||||
ifeq ($(CORE), LOONGSON3B)
|
||||
CCOMMON_OPT += -loongson3 -static
|
||||
endif
|
||||
|
||||
else
|
||||
|
||||
ifndef BINARY64
|
||||
CCOMMON_OPT += -m32
|
||||
else
|
||||
CCOMMON_OPT += -m64
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(C_COMPILER), SUN)
|
||||
CCOMMON_OPT += -w
|
||||
@@ -489,7 +681,8 @@ endif
|
||||
|
||||
ifdef BINARY64
|
||||
ifdef INTERFACE64
|
||||
CCOMMON_OPT += -DUSE64BITINT
|
||||
CCOMMON_OPT +=
|
||||
#-DUSE64BITINT
|
||||
endif
|
||||
endif
|
||||
|
||||
@@ -510,12 +703,28 @@ ifeq ($(DYNAMIC_ARCH), 1)
|
||||
CCOMMON_OPT += -DDYNAMIC_ARCH
|
||||
endif
|
||||
|
||||
ifeq ($(NO_LAPACK), 1)
|
||||
CCOMMON_OPT += -DNO_LAPACK
|
||||
#Disable LAPACK C interface
|
||||
NO_LAPACKE = 1
|
||||
endif
|
||||
|
||||
ifeq ($(NO_LAPACKE), 1)
|
||||
CCOMMON_OPT += -DNO_LAPACKE
|
||||
endif
|
||||
|
||||
ifeq ($(NO_AVX), 1)
|
||||
CCOMMON_OPT += -DNO_AVX
|
||||
endif
|
||||
|
||||
ifdef SMP
|
||||
CCOMMON_OPT += -DSMP_SERVER
|
||||
|
||||
ifeq ($(ARCH), mips64)
|
||||
ifneq ($(CORE), LOONGSON3B)
|
||||
USE_SIMPLE_THREADED_LEVEL3 = 1
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(USE_OPENMP), 1)
|
||||
# USE_SIMPLE_THREADED_LEVEL3 = 1
|
||||
@@ -554,7 +763,11 @@ ifdef USE_SIMPLE_THREADED_LEVEL3
|
||||
CCOMMON_OPT += -DUSE_SIMPLE_THREADED_LEVEL3
|
||||
endif
|
||||
|
||||
ifndef LIBNAMESUFFIX
|
||||
LIBPREFIX = libopenblas
|
||||
else
|
||||
LIBPREFIX = libopenblas_$(LIBNAMESUFFIX)
|
||||
endif
|
||||
|
||||
KERNELDIR = $(TOPDIR)/kernel/$(ARCH)
|
||||
|
||||
@@ -576,9 +789,11 @@ endif
|
||||
|
||||
ifneq ($(ARCH), x86_64)
|
||||
ifneq ($(ARCH), x86)
|
||||
ifneq ($(CORE), LOONGSON3B)
|
||||
NO_AFFINITY = 1
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
ifdef NO_AFFINITY
|
||||
CCOMMON_OPT += -DNO_AFFINITY
|
||||
@@ -618,16 +833,59 @@ PATCH = patch
|
||||
GREP = grep
|
||||
endif
|
||||
|
||||
ifndef MD5SUM
|
||||
MD5SUM = md5sum
|
||||
endif
|
||||
|
||||
AWK = awk
|
||||
|
||||
REVISION = -r$(VERSION)
|
||||
MAJOR_VERSION = $(word 1,$(subst ., ,$(VERSION)))
|
||||
|
||||
CFLAGS = $(COMMON_OPT) $(CCOMMON_OPT) -I$(TOPDIR)
|
||||
PFLAGS = $(COMMON_OPT) $(CCOMMON_OPT) -I$(TOPDIR) -DPROFILE $(COMMON_PROF)
|
||||
ifeq ($(DEBUG), 1)
|
||||
COMMON_OPT += -g
|
||||
endif
|
||||
|
||||
FFLAGS = $(COMMON_OPT) $(FCOMMON_OPT)
|
||||
FPFLAGS = $(COMMON_OPT) $(FCOMMON_OPT) $(COMMON_PROF)
|
||||
ifndef COMMON_OPT
|
||||
ifeq ($(ARCH), arm)
|
||||
COMMON_OPT = -O3
|
||||
endif
|
||||
endif
|
||||
|
||||
ifndef COMMON_OPT
|
||||
ifeq ($(ARCH), arm64)
|
||||
COMMON_OPT = -O3
|
||||
endif
|
||||
endif
|
||||
|
||||
|
||||
ifndef COMMON_OPT
|
||||
COMMON_OPT = -O2
|
||||
endif
|
||||
|
||||
|
||||
override CFLAGS += $(COMMON_OPT) $(CCOMMON_OPT) -I$(TOPDIR)
|
||||
override PFLAGS += $(COMMON_OPT) $(CCOMMON_OPT) -I$(TOPDIR) -DPROFILE $(COMMON_PROF)
|
||||
|
||||
override FFLAGS += $(COMMON_OPT) $(FCOMMON_OPT)
|
||||
override FPFLAGS += $(COMMON_OPT) $(FCOMMON_OPT) $(COMMON_PROF)
|
||||
#MAKEOVERRIDES =
|
||||
|
||||
#For LAPACK Fortran codes.
|
||||
LAPACK_FFLAGS := $(filter-out -fopenmp -mp -openmp -xopenmp=parallel,$(FFLAGS))
|
||||
LAPACK_FPFLAGS := $(filter-out -fopenmp -mp -openmp -xopenmp=parallel,$(FPFLAGS))
|
||||
|
||||
LAPACK_CFLAGS = $(CFLAGS)
|
||||
LAPACK_CFLAGS += -DHAVE_LAPACK_CONFIG_H
|
||||
ifdef INTERFACE64
|
||||
LAPACK_CFLAGS += -DLAPACK_ILP64
|
||||
endif
|
||||
ifdef OS_WINDOWS
|
||||
LAPACK_CFLAGS += -DOPENBLAS_OS_WINDOWS
|
||||
endif
|
||||
ifeq ($(C_COMPILER), LSB)
|
||||
LAPACK_CFLAGS += -DLAPACK_COMPLEX_STRUCTURE
|
||||
endif
|
||||
|
||||
ifndef SUFFIX
|
||||
SUFFIX = o
|
||||
@@ -641,7 +899,7 @@ ifndef LIBSUFFIX
|
||||
LIBSUFFIX = a
|
||||
endif
|
||||
|
||||
ifndef DYNAMIC_ARCH
|
||||
ifneq ($(DYNAMIC_ARCH), 1)
|
||||
ifndef SMP
|
||||
LIBNAME = $(LIBPREFIX)_$(LIBCORE)$(REVISION).$(LIBSUFFIX)
|
||||
LIBNAME_P = $(LIBPREFIX)_$(LIBCORE)$(REVISION)_p.$(LIBSUFFIX)
|
||||
@@ -660,8 +918,8 @@ endif
|
||||
endif
|
||||
|
||||
|
||||
LIBDLLNAME = $(LIBPREFIX).dll
|
||||
LIBSONAME = $(LIBNAME:.$(LIBSUFFIX)=.so)
|
||||
LIBDLLNAME = $(LIBNAME:.$(LIBSUFFIX)=.dll)
|
||||
LIBDYNNAME = $(LIBNAME:.$(LIBSUFFIX)=.dylib)
|
||||
LIBDEFNAME = $(LIBNAME:.$(LIBSUFFIX)=.def)
|
||||
LIBEXPNAME = $(LIBNAME:.$(LIBSUFFIX)=.exp)
|
||||
@@ -670,6 +928,23 @@ LIBZIPNAME = $(LIBNAME:.$(LIBSUFFIX)=.zip)
|
||||
LIBS = $(TOPDIR)/$(LIBNAME)
|
||||
LIBS_P = $(TOPDIR)/$(LIBNAME_P)
|
||||
|
||||
|
||||
LIB_COMPONENTS = BLAS
|
||||
ifneq ($(NO_CBLAS), 1)
|
||||
LIB_COMPONENTS += CBLAS
|
||||
endif
|
||||
|
||||
ifneq ($(NO_LAPACK), 1)
|
||||
LIB_COMPONENTS += LAPACK
|
||||
ifneq ($(NO_LAPACKE), 1)
|
||||
LIB_COMPONENTS += LAPACKE
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(ONLY_CBLAS), 1)
|
||||
LIB_COMPONENTS = CBLAS
|
||||
endif
|
||||
|
||||
export OSNAME
|
||||
export ARCH
|
||||
export CORE
|
||||
@@ -680,6 +955,7 @@ export CC
|
||||
export FC
|
||||
export BU
|
||||
export FU
|
||||
export NEED2UNDERSCORES
|
||||
export USE_THREAD
|
||||
export NUM_THREADS
|
||||
export NUM_CORES
|
||||
@@ -695,6 +971,7 @@ export USE_OPENMP
|
||||
export CROSS
|
||||
export CROSS_SUFFIX
|
||||
export NOFORTRAN
|
||||
export NO_FBLAS
|
||||
export EXTRALIB
|
||||
export CEXTRALIB
|
||||
export FEXTRALIB
|
||||
@@ -706,6 +983,11 @@ export HAVE_SSE4_1
|
||||
export HAVE_SSE4_2
|
||||
export HAVE_SSE4A
|
||||
export HAVE_SSE5
|
||||
export HAVE_AVX
|
||||
export HAVE_VFP
|
||||
export HAVE_VFPV3
|
||||
export HAVE_VFPV4
|
||||
export HAVE_NEON
|
||||
export KERNELDIR
|
||||
export FUNCTION_PROFILE
|
||||
export TARGET_CORE
|
||||
@@ -722,6 +1004,13 @@ export ZGEMM_UNROLL_M
|
||||
export ZGEMM_UNROLL_N
|
||||
export XGEMM_UNROLL_M
|
||||
export XGEMM_UNROLL_N
|
||||
export CGEMM3M_UNROLL_M
|
||||
export CGEMM3M_UNROLL_N
|
||||
export ZGEMM3M_UNROLL_M
|
||||
export ZGEMM3M_UNROLL_N
|
||||
export XGEMM3M_UNROLL_M
|
||||
export XGEMM3M_UNROLL_N
|
||||
|
||||
|
||||
ifdef USE_CUDA
|
||||
export CUDADIR
|
||||
|
||||
@@ -22,19 +22,19 @@ BLASOBJS += $(QBLASOBJS) $(XBLASOBJS)
|
||||
BLASOBJS_P += $(QBLASOBJS_P) $(XBLASOBJS_P)
|
||||
endif
|
||||
|
||||
$(SBLASOBJS) $(SBLASOBJS_P) : CFLAGS += -UDOUBLE -UCOMPLEX
|
||||
$(DBLASOBJS) $(DBLASOBJS_P) : CFLAGS += -DDOUBLE -UCOMPLEX
|
||||
$(QBLASOBJS) $(QBLASOBJS_P) : CFLAGS += -DXDOUBLE -UCOMPLEX
|
||||
$(CBLASOBJS) $(CBLASOBJS_P) : CFLAGS += -UDOUBLE -DCOMPLEX
|
||||
$(ZBLASOBJS) $(ZBLASOBJS_P) : CFLAGS += -DDOUBLE -DCOMPLEX
|
||||
$(XBLASOBJS) $(XBLASOBJS_P) : CFLAGS += -DXDOUBLE -DCOMPLEX
|
||||
$(SBLASOBJS) $(SBLASOBJS_P) : override CFLAGS += -UDOUBLE -UCOMPLEX
|
||||
$(DBLASOBJS) $(DBLASOBJS_P) : override CFLAGS += -DDOUBLE -UCOMPLEX
|
||||
$(QBLASOBJS) $(QBLASOBJS_P) : override CFLAGS += -DXDOUBLE -UCOMPLEX
|
||||
$(CBLASOBJS) $(CBLASOBJS_P) : override CFLAGS += -UDOUBLE -DCOMPLEX
|
||||
$(ZBLASOBJS) $(ZBLASOBJS_P) : override CFLAGS += -DDOUBLE -DCOMPLEX
|
||||
$(XBLASOBJS) $(XBLASOBJS_P) : override CFLAGS += -DXDOUBLE -DCOMPLEX
|
||||
|
||||
$(SBLASOBJS_P) : CFLAGS += -DPROFILE $(COMMON_PROF)
|
||||
$(DBLASOBJS_P) : CFLAGS += -DPROFILE $(COMMON_PROF)
|
||||
$(QBLASOBJS_P) : CFLAGS += -DPROFILE $(COMMON_PROF)
|
||||
$(CBLASOBJS_P) : CFLAGS += -DPROFILE $(COMMON_PROF)
|
||||
$(ZBLASOBJS_P) : CFLAGS += -DPROFILE $(COMMON_PROF)
|
||||
$(XBLASOBJS_P) : CFLAGS += -DPROFILE $(COMMON_PROF)
|
||||
$(SBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF)
|
||||
$(DBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF)
|
||||
$(QBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF)
|
||||
$(CBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF)
|
||||
$(ZBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF)
|
||||
$(XBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF)
|
||||
|
||||
libs :: $(BLASOBJS) $(COMMONOBJS)
|
||||
$(AR) $(ARFLAGS) -ru $(TOPDIR)/$(LIBNAME) $^
|
||||
@@ -606,7 +606,8 @@ clean ::
|
||||
@if test -d $(ARCH); then \
|
||||
(cd $(ARCH) && $(MAKE) clean) \
|
||||
fi
|
||||
@rm -rf *.a *.s *.o *.po *.obj *.i *.so core core.* gmon.out *.cso \
|
||||
@find . -name '*.o' | xargs rm -rf
|
||||
@rm -rf *.a *.s *.po *.obj *.i *.so core core.* gmon.out *.cso \
|
||||
*.csx *.is *~ *.exe *.flame *.pdb *.dwf \
|
||||
gen_insn_flash.c gen_insn_flash *.stackdump *.dll *.exp *.lib \
|
||||
*.pc *.pcl *.def *.i *.prof linktest.c \
|
||||
|
||||
@@ -1,8 +1,5 @@
|
||||
# COMPILER_PREFIX = mingw32-
|
||||
|
||||
ifeq ($(OSNAME), Linux)
|
||||
LDFLAGS = -melf_i386
|
||||
endif
|
||||
|
||||
ifeq ($(OSNAME), Interix)
|
||||
ARFLAGS = -m x86
|
||||
|
||||
@@ -2,25 +2,12 @@
|
||||
|
||||
ifeq ($(OSNAME), SunOS)
|
||||
ifdef BINARY64
|
||||
LDFLAGS = -64
|
||||
ifeq ($(F_COMPILER), SUN)
|
||||
FCOMMON_OPT += -m64
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(OSNAME), FreeBSD)
|
||||
LDFLAGS = -m elf_x86_64_fbsd
|
||||
endif
|
||||
|
||||
ifeq ($(OSNAME), Linux)
|
||||
LDFLAGS = -m elf_x86_64
|
||||
endif
|
||||
|
||||
ifeq ($(OSNAME), Darwin)
|
||||
LDFLAGS =
|
||||
endif
|
||||
|
||||
ifeq ($(OSNAME), Interix)
|
||||
ARFLAGS = -m x64
|
||||
endif
|
||||
|
||||
59
README
59
README
@@ -1,59 +0,0 @@
|
||||
OpenBLAS Readme
|
||||
|
||||
1.Introduction
|
||||
OpenBLAS is an optimized BLAS library based on GotoBLAS2 1.13 BSD version. OpenBLAS is an open source project supported by Lab of Parallel Software and Computational Science, ISCAS.(http://www.rdcps.ac.cn)
|
||||
|
||||
2.Intallation
|
||||
Download from project homepage. http://xianyi.github.com/OpenBLAS/
|
||||
Or,
|
||||
check out codes from git://github.com/xianyi/OpenBLAS.git
|
||||
1)Normal compile
|
||||
Please read GotoBLAS_02QuickInstall.txt or type "make"
|
||||
|
||||
2)Cross compile
|
||||
Please set CC and FC with the cross toolchains. Then, set HOSTCC with your host C compiler. At last, set TARGET explicitly.
|
||||
|
||||
examples:
|
||||
On X86 box, compile this library for loongson3a CPU.
|
||||
make BINARY=64 CC=mips64el-unknown-linux-gnu-gcc FC=mips64el-unknown-linux-gnu-gfortran HOSTCC=gcc TARGET=LOONGSON3A
|
||||
|
||||
3)Debug version
|
||||
make DEBUG=1
|
||||
|
||||
3.Support CPU & OS
|
||||
Please read GotoBLAS_01Readme.txt
|
||||
|
||||
Additional support CPU:
|
||||
x86_64:
|
||||
Intel Xeon 56xx (Westmere) //Used GotoBLAS2 Nehalem codes.
|
||||
MIPS64:
|
||||
ICT Loongson 3A //The initial version used GotoBLAS2 MIPS64 kernels. Thus, the performance is not good.
|
||||
|
||||
4.Usages
|
||||
Link with libopenblas.a or -lopenblas for shared library.
|
||||
|
||||
4.1 Set the number of threads with environment variables. for example,
|
||||
export OPENBLAS_NUM_THREADS=4
|
||||
or
|
||||
export GOTO_NUM_THREADS=4
|
||||
or
|
||||
export OMP_NUM_THREADS=4
|
||||
|
||||
The priorities are OPENBLAS_NUM_THREAD > GOTO_NUM_THREADS > OMP_NUM_THREADS.
|
||||
|
||||
4.2 Set the number of threads with calling functions. for example,
|
||||
void goto_set_num_threads(int num_threads);
|
||||
or
|
||||
void openblas_set_num_threads(int num_threads);
|
||||
|
||||
5.Report Bugs
|
||||
Please add a issue in https://github.com/xianyi/OpenBLAS/issues
|
||||
|
||||
6.To-Do List:
|
||||
Optimization on ICT Loongson 3A CPU
|
||||
|
||||
7.Contact
|
||||
OpenBLAS users mailing list: http://list.rdcps.ac.cn/mailman/listinfo/openblas
|
||||
|
||||
8.ChangeLog
|
||||
Please see Changelog.txt to obtain the differences between GotoBLAS2 1.13 BSD version.
|
||||
130
README.md
Normal file
130
README.md
Normal file
@@ -0,0 +1,130 @@
|
||||
# OpenBLAS
|
||||
|
||||
[](https://travis-ci.org/xianyi/OpenBLAS)
|
||||
|
||||
## Introduction
|
||||
OpenBLAS is an optimized BLAS library based on GotoBLAS2 1.13 BSD version.
|
||||
|
||||
Please read the documents on OpenBLAS wiki pages <http://github.com/xianyi/OpenBLAS/wiki>.
|
||||
|
||||
## Binary Packages
|
||||
We provide binary packages for the following platform.
|
||||
|
||||
* Windows x86/x86_64
|
||||
|
||||
You can download them from [file hosting on sourceforge.net](https://sourceforge.net/projects/openblas/files/).
|
||||
|
||||
## Installation from Source
|
||||
Download from project homepage. http://xianyi.github.com/OpenBLAS/
|
||||
|
||||
Or, check out codes from git://github.com/xianyi/OpenBLAS.git
|
||||
### Normal compile
|
||||
* type "make" to detect the CPU automatically.
|
||||
or
|
||||
* type "make TARGET=xxx" to set target CPU, e.g. "make TARGET=NEHALEM". The full target list is in file TargetList.txt.
|
||||
|
||||
### Cross compile
|
||||
Please set CC and FC with the cross toolchains. Then, set HOSTCC with your host C compiler. At last, set TARGET explicitly.
|
||||
|
||||
Examples:
|
||||
|
||||
On X86 box, compile this library for loongson3a CPU.
|
||||
|
||||
make BINARY=64 CC=mips64el-unknown-linux-gnu-gcc FC=mips64el-unknown-linux-gnu-gfortran HOSTCC=gcc TARGET=LOONGSON3A
|
||||
|
||||
On X86 box, compile this library for loongson3a CPU with loongcc (based on Open64) compiler.
|
||||
|
||||
make CC=loongcc FC=loongf95 HOSTCC=gcc TARGET=LOONGSON3A CROSS=1 CROSS_SUFFIX=mips64el-st-linux-gnu- NO_LAPACKE=1 NO_SHARED=1 BINARY=32
|
||||
|
||||
### Debug version
|
||||
|
||||
make DEBUG=1
|
||||
|
||||
### Install to the directory (optional)
|
||||
|
||||
Example:
|
||||
|
||||
make install PREFIX=your_installation_directory
|
||||
|
||||
The default directory is /opt/OpenBLAS
|
||||
|
||||
## Support CPU & OS
|
||||
Please read GotoBLAS_01Readme.txt
|
||||
|
||||
### Additional support CPU:
|
||||
|
||||
#### x86/x86-64:
|
||||
- **Intel Xeon 56xx (Westmere)**: Used GotoBLAS2 Nehalem codes.
|
||||
- **Intel Sandy Bridge**: Optimized Level-3 BLAS with AVX on x86-64.
|
||||
- **Intel Haswell**: Optimized Level-3 BLAS with AVX on x86-64 (identical to Sandy Bridge).
|
||||
- **AMD Bobcat**: Used GotoBLAS2 Barcelona codes.
|
||||
- **AMD Bulldozer**: x86-64 S/DGEMM AVX kernels. (Thank Werner Saar)
|
||||
- **AMD PILEDRIVER**: Used Bulldozer codes.
|
||||
|
||||
#### MIPS64:
|
||||
- **ICT Loongson 3A**: Optimized Level-3 BLAS and the part of Level-1,2.
|
||||
- **ICT Loongson 3B**: Experimental
|
||||
|
||||
### Support OS:
|
||||
- **GNU/Linux**
|
||||
- **MingWin/Windows**: Please read <https://github.com/xianyi/OpenBLAS/wiki/How-to-use-OpenBLAS-in-Microsoft-Visual-Studio>.
|
||||
- **Darwin/Mac OS X**: Experimental. Although GotoBLAS2 supports Darwin, we are the beginner on Mac OS X.
|
||||
- **FreeBSD**: Supported by community. We didn't test the library on this OS.
|
||||
|
||||
## Usages
|
||||
Link with libopenblas.a or -lopenblas for shared library.
|
||||
|
||||
### Set the number of threads with environment variables.
|
||||
|
||||
Examples:
|
||||
|
||||
export OPENBLAS_NUM_THREADS=4
|
||||
|
||||
or
|
||||
|
||||
export GOTO_NUM_THREADS=4
|
||||
|
||||
or
|
||||
|
||||
export OMP_NUM_THREADS=4
|
||||
|
||||
The priorities are OPENBLAS_NUM_THREADS > GOTO_NUM_THREADS > OMP_NUM_THREADS.
|
||||
|
||||
If you compile this lib with USE_OPENMP=1, you should set OMP_NUM_THREADS environment variable. OpenBLAS ignores OPENBLAS_NUM_THREADS and GOTO_NUM_THREADS with USE_OPENMP=1.
|
||||
|
||||
### Set the number of threads on runtime.
|
||||
|
||||
We provided the below functions to control the number of threads on runtime.
|
||||
|
||||
void goto_set_num_threads(int num_threads);
|
||||
|
||||
void openblas_set_num_threads(int num_threads);
|
||||
|
||||
If you compile this lib with USE_OPENMP=1, you should use the above functions, too.
|
||||
|
||||
## Report Bugs
|
||||
Please add a issue in https://github.com/xianyi/OpenBLAS/issues
|
||||
|
||||
## Contact
|
||||
* OpenBLAS users mailing list: https://groups.google.com/forum/#!forum/openblas-users
|
||||
* OpenBLAS developers mailing list: https://groups.google.com/forum/#!forum/openblas-dev
|
||||
|
||||
## ChangeLog
|
||||
Please see Changelog.txt to obtain the differences between GotoBLAS2 1.13 BSD version.
|
||||
|
||||
## Troubleshooting
|
||||
* Please read [Faq](https://github.com/xianyi/OpenBLAS/wiki/Faq) at first.
|
||||
* Please use gcc version 4.6 and above to compile Sandy Bridge AVX kernels on Linux/MingW/BSD.
|
||||
* Please use Clang version 3.1 and above to compile the library on Sandy Bridge microarchitecture. The Clang 3.0 will generate the wrong AVX binary code.
|
||||
* The number of CPUs/Cores should less than or equal to 256.
|
||||
* On Linux, OpenBLAS sets the processor affinity by default. This may cause [the conflict with R parallel](https://stat.ethz.ch/pipermail/r-sig-hpc/2012-April/001348.html). You can build the library with NO_AFFINITY=1.
|
||||
* On Loongson 3A. make test would be failed because of pthread_create error. The error code is EAGAIN. However, it will be OK when you run the same testcase on shell.
|
||||
|
||||
## Contributing
|
||||
1. [Check for open issues](https://github.com/xianyi/OpenBLAS/issues) or open a fresh issue to start a discussion around a feature idea or a bug.
|
||||
1. Fork the [OpenBLAS](https://github.com/xianyi/OpenBLAS) repository to start making your changes.
|
||||
1. Write a test which shows that the bug was fixed or that the feature works as expected.
|
||||
1. Send a pull request. Make sure to add yourself to `CONTRIBUTORS.md`.
|
||||
|
||||
## Donation
|
||||
Please read [this wiki page](https://github.com/xianyi/OpenBLAS/wiki/Donation).
|
||||
61
TargetList.txt
Normal file
61
TargetList.txt
Normal file
@@ -0,0 +1,61 @@
|
||||
Force Target Examples:
|
||||
|
||||
make TARGET=NEHALEM
|
||||
make TARGET=LOONGSON3A BINARY=64
|
||||
make TARGET=ISTANBUL
|
||||
|
||||
Supported List:
|
||||
1.X86/X86_64
|
||||
a)Intel CPU:
|
||||
P2
|
||||
KATMAI
|
||||
COPPERMINE
|
||||
NORTHWOOD
|
||||
PRESCOTT
|
||||
BANIAS
|
||||
YONAH
|
||||
CORE2
|
||||
PENRYN
|
||||
DUNNINGTON
|
||||
NEHALEM
|
||||
SANDYBRIDGE
|
||||
ATOM
|
||||
|
||||
b)AMD CPU:
|
||||
ATHLON
|
||||
OPTERON
|
||||
OPTERON_SSE3
|
||||
BARCELONA
|
||||
SHANGHAI
|
||||
ISTANBUL
|
||||
BOBCAT
|
||||
BULLDOZER
|
||||
|
||||
c)VIA CPU:
|
||||
SSE_GENERIC
|
||||
VIAC3
|
||||
NANO
|
||||
|
||||
2.Power CPU:
|
||||
POWER4
|
||||
POWER5
|
||||
POWER6
|
||||
PPCG4
|
||||
PPC970
|
||||
PPC970MP
|
||||
PPC440
|
||||
PPC440FP2
|
||||
CELL
|
||||
|
||||
3.MIPS64 CPU:
|
||||
SICORTEX
|
||||
LOONGSON3A
|
||||
LOONGSON3B
|
||||
|
||||
4.IA64 CPU:
|
||||
ITANIUM2
|
||||
|
||||
5.SPARC CPU:
|
||||
SPARC
|
||||
SPARCV7
|
||||
|
||||
36
c_check
36
c_check
@@ -33,6 +33,8 @@ if ($ARGV[0] =~ /(.*)(-[.\d]+)/) {
|
||||
}
|
||||
|
||||
$compiler = "";
|
||||
$compiler = LSB if ($data =~ /COMPILER_LSB/);
|
||||
$compiler = CLANG if ($data =~ /COMPILER_CLANG/);
|
||||
$compiler = PGI if ($data =~ /COMPILER_PGI/);
|
||||
$compiler = PATHSCALE if ($data =~ /COMPILER_PATHSCALE/);
|
||||
$compiler = INTEL if ($data =~ /COMPILER_INTEL/);
|
||||
@@ -43,14 +45,14 @@ $compiler = DEC if ($data =~ /COMPILER_DEC/);
|
||||
$compiler = GCC if ($compiler eq "");
|
||||
|
||||
$os = Linux if ($data =~ /OS_LINUX/);
|
||||
$os = FreeBSD if ($data =~ /OS_FreeBSD/);
|
||||
$os = NetBSD if ($data =~ /OS_NetBSD/);
|
||||
$os = Darwin if ($data =~ /OS_Darwin/);
|
||||
$os = SunOS if ($data =~ /OS_SunOS/);
|
||||
$os = FreeBSD if ($data =~ /OS_FREEBSD/);
|
||||
$os = NetBSD if ($data =~ /OS_NETBSD/);
|
||||
$os = Darwin if ($data =~ /OS_DARWIN/);
|
||||
$os = SunOS if ($data =~ /OS_SUNOS/);
|
||||
$os = AIX if ($data =~ /OS_AIX/);
|
||||
$os = osf if ($data =~ /OS_OSF/);
|
||||
$os = WINNT if ($data =~ /OS_WINNT/);
|
||||
$os = CYGWIN_NT if ($data =~ /OS_CYGWIN/);
|
||||
$os = CYGWIN_NT if ($data =~ /OS_CYGWIN_NT/);
|
||||
$os = Interix if ($data =~ /OS_INTERIX/);
|
||||
|
||||
$architecture = x86 if ($data =~ /ARCH_X86/);
|
||||
@@ -61,6 +63,8 @@ $architecture = mips64 if ($data =~ /ARCH_MIPS64/);
|
||||
$architecture = alpha if ($data =~ /ARCH_ALPHA/);
|
||||
$architecture = sparc if ($data =~ /ARCH_SPARC/);
|
||||
$architecture = ia64 if ($data =~ /ARCH_IA64/);
|
||||
$architecture = arm if ($data =~ /ARCH_ARM/);
|
||||
$architecture = arm64 if ($data =~ /ARCH_ARM64/);
|
||||
|
||||
$defined = 0;
|
||||
|
||||
@@ -117,7 +121,11 @@ if ($compiler eq "OPEN64") {
|
||||
$openmp = "-mp";
|
||||
}
|
||||
|
||||
if ($compiler eq "GCC") {
|
||||
if ($compiler eq "CLANG") {
|
||||
$openmp = "-fopenmp";
|
||||
}
|
||||
|
||||
if ($compiler eq "GCC" || $compiler eq "LSB") {
|
||||
$openmp = "-fopenmp";
|
||||
}
|
||||
|
||||
@@ -143,13 +151,15 @@ $architecture = mips64 if ($data =~ /ARCH_MIPS64/);
|
||||
$architecture = alpha if ($data =~ /ARCH_ALPHA/);
|
||||
$architecture = sparc if ($data =~ /ARCH_SPARC/);
|
||||
$architecture = ia64 if ($data =~ /ARCH_IA64/);
|
||||
$architecture = arm if ($data =~ /ARCH_ARM/);
|
||||
$architecture = arm64 if ($data =~ /ARCH_ARM64/);
|
||||
|
||||
$binformat = bin32;
|
||||
$binformat = bin64 if ($data =~ /BINARY_64/);
|
||||
|
||||
$data = `$compiler_name -S ctest1.c && grep globl ctest1.s | head -n 1 && rm -f ctest1.s`;
|
||||
|
||||
$data =~ /globl\ ([_\.]*)(.*)/;
|
||||
$data =~ /globl\s([_\.]*)(.*)/;
|
||||
|
||||
$need_fu = $1;
|
||||
|
||||
@@ -174,6 +184,8 @@ $linker_a = "";
|
||||
$link =~ s/\-Y\sP\,/\-Y/g;
|
||||
|
||||
@flags = split(/[\s\,\n]/, $link);
|
||||
# remove leading and trailing quotes from each flag.
|
||||
@flags = map {s/^['"]|['"]$//g; $_} @flags;
|
||||
|
||||
foreach $flags (@flags) {
|
||||
if (
|
||||
@@ -239,13 +251,13 @@ print CONFFILE "#define FUNDERSCORE\t$need_fu\n" if $need_fu ne "";
|
||||
|
||||
if ($os eq "LINUX") {
|
||||
|
||||
@pthread = split(/\s+/, `nm /lib/libpthread.so* | grep _pthread_create`);
|
||||
# @pthread = split(/\s+/, `nm /lib/libpthread.so* | grep _pthread_create`);
|
||||
|
||||
if ($pthread[2] ne "") {
|
||||
print CONFFILE "#define PTHREAD_CREATE_FUNC $pthread[2]\n";
|
||||
} else {
|
||||
# if ($pthread[2] ne "") {
|
||||
# print CONFFILE "#define PTHREAD_CREATE_FUNC $pthread[2]\n";
|
||||
# } else {
|
||||
print CONFFILE "#define PTHREAD_CREATE_FUNC pthread_create\n";
|
||||
}
|
||||
# }
|
||||
} else {
|
||||
print CONFFILE "#define PTHREAD_CREATE_FUNC pthread_create\n";
|
||||
}
|
||||
|
||||
477
cblas.h
477
cblas.h
@@ -1,273 +1,312 @@
|
||||
#ifndef CBLAS_H
|
||||
#define CBLAS_H
|
||||
|
||||
#include <stddef.h>
|
||||
#include "common.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
/* Assume C declarations for C++ */
|
||||
#endif /* __cplusplus */
|
||||
|
||||
/*Set the number of threads on runtime.*/
|
||||
void openblas_set_num_threads(int num_threads);
|
||||
void goto_set_num_threads(int num_threads);
|
||||
|
||||
/*Get the build configure on runtime.*/
|
||||
char* openblas_get_config(void);
|
||||
|
||||
/* Get the parallelization type which is used by OpenBLAS */
|
||||
int openblas_get_parallel(void);
|
||||
/* OpenBLAS is compiled for sequential use */
|
||||
#define OPENBLAS_SEQUENTIAL 0
|
||||
/* OpenBLAS is compiled using normal threading model */
|
||||
#define OPENBLAS_THREAD 1
|
||||
/* OpenBLAS is compiled using OpenMP threading model */
|
||||
#define OPENBLAS_OPENMP 2
|
||||
|
||||
|
||||
/*
|
||||
* Since all of GotoBlas was written without const,
|
||||
* we disable it at build time.
|
||||
*/
|
||||
#ifndef OPENBLAS_CONST
|
||||
# define OPENBLAS_CONST const
|
||||
#endif
|
||||
|
||||
|
||||
#define CBLAS_INDEX size_t
|
||||
|
||||
enum CBLAS_ORDER {CblasRowMajor=101, CblasColMajor=102};
|
||||
enum CBLAS_TRANSPOSE {CblasNoTrans=111, CblasTrans=112, CblasConjTrans=113, CblasConjNoTrans=114};
|
||||
enum CBLAS_UPLO {CblasUpper=121, CblasLower=122};
|
||||
enum CBLAS_DIAG {CblasNonUnit=131, CblasUnit=132};
|
||||
enum CBLAS_SIDE {CblasLeft=141, CblasRight=142};
|
||||
typedef enum CBLAS_ORDER {CblasRowMajor=101, CblasColMajor=102} CBLAS_ORDER;
|
||||
typedef enum CBLAS_TRANSPOSE {CblasNoTrans=111, CblasTrans=112, CblasConjTrans=113, CblasConjNoTrans=114} CBLAS_TRANSPOSE;
|
||||
typedef enum CBLAS_UPLO {CblasUpper=121, CblasLower=122} CBLAS_UPLO;
|
||||
typedef enum CBLAS_DIAG {CblasNonUnit=131, CblasUnit=132} CBLAS_DIAG;
|
||||
typedef enum CBLAS_SIDE {CblasLeft=141, CblasRight=142} CBLAS_SIDE;
|
||||
|
||||
float cblas_sdsdot(blasint n, float, float *x, blasint incx, float *y, blasint incy);
|
||||
double cblas_dsdot (blasint n, float *x, blasint incx, float *y, blasint incy);
|
||||
float cblas_sdot(blasint n, float *x, blasint incx, float *y, blasint incy);
|
||||
double cblas_ddot(blasint n, double *x, blasint incx, double *y, blasint incy);
|
||||
float cblas_sdsdot(OPENBLAS_CONST blasint n, OPENBLAS_CONST float alpha, OPENBLAS_CONST float *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST float *y, OPENBLAS_CONST blasint incy);
|
||||
double cblas_dsdot (OPENBLAS_CONST blasint n, OPENBLAS_CONST float *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST float *y, OPENBLAS_CONST blasint incy);
|
||||
float cblas_sdot(OPENBLAS_CONST blasint n, OPENBLAS_CONST float *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST float *y, OPENBLAS_CONST blasint incy);
|
||||
double cblas_ddot(OPENBLAS_CONST blasint n, OPENBLAS_CONST double *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST double *y, OPENBLAS_CONST blasint incy);
|
||||
|
||||
float _Complex cblas_cdotu(blasint n, float *x, blasint incx, float *y, blasint incy);
|
||||
float _Complex cblas_cdotc(blasint n, float *x, blasint incx, float *y, blasint incy);
|
||||
double _Complex cblas_zdotu(blasint n, double *x, blasint incx, double *y, blasint incy);
|
||||
double _Complex cblas_zdotc(blasint n, double *x, blasint incx, double *y, blasint incy);
|
||||
openblas_complex_float cblas_cdotu(OPENBLAS_CONST blasint n, OPENBLAS_CONST float *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST float *y, OPENBLAS_CONST blasint incy);
|
||||
openblas_complex_float cblas_cdotc(OPENBLAS_CONST blasint n, OPENBLAS_CONST float *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST float *y, OPENBLAS_CONST blasint incy);
|
||||
openblas_complex_double cblas_zdotu(OPENBLAS_CONST blasint n, OPENBLAS_CONST double *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST double *y, OPENBLAS_CONST blasint incy);
|
||||
openblas_complex_double cblas_zdotc(OPENBLAS_CONST blasint n, OPENBLAS_CONST double *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST double *y, OPENBLAS_CONST blasint incy);
|
||||
|
||||
void cblas_cdotu_sub(blasint n, float *x, blasint incx, float *y, blasint incy, float _Complex *ret);
|
||||
void cblas_cdotc_sub(blasint n, float *x, blasint incx, float *y, blasint incy, float _Complex *ret);
|
||||
void cblas_zdotu_sub(blasint n, double *x, blasint incx, double *y, blasint incy, double _Complex *ret);
|
||||
void cblas_zdotc_sub(blasint n, double *x, blasint incx, double *y, blasint incy, double _Complex *ret);
|
||||
void cblas_cdotu_sub(OPENBLAS_CONST blasint n, OPENBLAS_CONST float *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST float *y, OPENBLAS_CONST blasint incy, openblas_complex_float *ret);
|
||||
void cblas_cdotc_sub(OPENBLAS_CONST blasint n, OPENBLAS_CONST float *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST float *y, OPENBLAS_CONST blasint incy, openblas_complex_float *ret);
|
||||
void cblas_zdotu_sub(OPENBLAS_CONST blasint n, OPENBLAS_CONST double *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST double *y, OPENBLAS_CONST blasint incy, openblas_complex_double *ret);
|
||||
void cblas_zdotc_sub(OPENBLAS_CONST blasint n, OPENBLAS_CONST double *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST double *y, OPENBLAS_CONST blasint incy, openblas_complex_double *ret);
|
||||
|
||||
float cblas_sasum (blasint n, float *x, blasint incx);
|
||||
double cblas_dasum (blasint n, double *x, blasint incx);
|
||||
float cblas_scasum(blasint n, float *x, blasint incx);
|
||||
double cblas_dzasum(blasint n, double *x, blasint incx);
|
||||
float cblas_sasum (OPENBLAS_CONST blasint n, OPENBLAS_CONST float *x, OPENBLAS_CONST blasint incx);
|
||||
double cblas_dasum (OPENBLAS_CONST blasint n, OPENBLAS_CONST double *x, OPENBLAS_CONST blasint incx);
|
||||
float cblas_scasum(OPENBLAS_CONST blasint n, OPENBLAS_CONST float *x, OPENBLAS_CONST blasint incx);
|
||||
double cblas_dzasum(OPENBLAS_CONST blasint n, OPENBLAS_CONST double *x, OPENBLAS_CONST blasint incx);
|
||||
|
||||
float cblas_snrm2 (blasint N, float *X, blasint incX);
|
||||
double cblas_dnrm2 (blasint N, double *X, blasint incX);
|
||||
float cblas_scnrm2(blasint N, float *X, blasint incX);
|
||||
double cblas_dznrm2(blasint N, double *X, blasint incX);
|
||||
float cblas_snrm2 (OPENBLAS_CONST blasint N, OPENBLAS_CONST float *X, OPENBLAS_CONST blasint incX);
|
||||
double cblas_dnrm2 (OPENBLAS_CONST blasint N, OPENBLAS_CONST double *X, OPENBLAS_CONST blasint incX);
|
||||
float cblas_scnrm2(OPENBLAS_CONST blasint N, OPENBLAS_CONST float *X, OPENBLAS_CONST blasint incX);
|
||||
double cblas_dznrm2(OPENBLAS_CONST blasint N, OPENBLAS_CONST double *X, OPENBLAS_CONST blasint incX);
|
||||
|
||||
CBLAS_INDEX cblas_isamax(blasint n, float *x, blasint incx);
|
||||
CBLAS_INDEX cblas_idamax(blasint n, double *x, blasint incx);
|
||||
CBLAS_INDEX cblas_icamax(blasint n, float *x, blasint incx);
|
||||
CBLAS_INDEX cblas_izamax(blasint n, double *x, blasint incx);
|
||||
CBLAS_INDEX cblas_isamax(OPENBLAS_CONST blasint n, OPENBLAS_CONST float *x, OPENBLAS_CONST blasint incx);
|
||||
CBLAS_INDEX cblas_idamax(OPENBLAS_CONST blasint n, OPENBLAS_CONST double *x, OPENBLAS_CONST blasint incx);
|
||||
CBLAS_INDEX cblas_icamax(OPENBLAS_CONST blasint n, OPENBLAS_CONST float *x, OPENBLAS_CONST blasint incx);
|
||||
CBLAS_INDEX cblas_izamax(OPENBLAS_CONST blasint n, OPENBLAS_CONST double *x, OPENBLAS_CONST blasint incx);
|
||||
|
||||
void cblas_saxpy(blasint n, float, float *x, blasint incx, float *y, blasint incy);
|
||||
void cblas_daxpy(blasint n, double, double *x, blasint incx, double *y, blasint incy);
|
||||
void cblas_caxpy(blasint n, float *, float *x, blasint incx, float *y, blasint incy);
|
||||
void cblas_zaxpy(blasint n, double *, double *x, blasint incx, double *y, blasint incy);
|
||||
void cblas_saxpy(OPENBLAS_CONST blasint n, OPENBLAS_CONST float alpha, OPENBLAS_CONST float *x, OPENBLAS_CONST blasint incx, float *y, OPENBLAS_CONST blasint incy);
|
||||
void cblas_daxpy(OPENBLAS_CONST blasint n, OPENBLAS_CONST double alpha, OPENBLAS_CONST double *x, OPENBLAS_CONST blasint incx, double *y, OPENBLAS_CONST blasint incy);
|
||||
void cblas_caxpy(OPENBLAS_CONST blasint n, OPENBLAS_CONST float *alpha, OPENBLAS_CONST float *x, OPENBLAS_CONST blasint incx, float *y, OPENBLAS_CONST blasint incy);
|
||||
void cblas_zaxpy(OPENBLAS_CONST blasint n, OPENBLAS_CONST double *alpha, OPENBLAS_CONST double *x, OPENBLAS_CONST blasint incx, double *y, OPENBLAS_CONST blasint incy);
|
||||
|
||||
void cblas_scopy(blasint n, float *x, blasint incx, float *y, blasint incy);
|
||||
void cblas_dcopy(blasint n, double *x, blasint incx, double *y, blasint incy);
|
||||
void cblas_ccopy(blasint n, float *x, blasint incx, float *y, blasint incy);
|
||||
void cblas_zcopy(blasint n, double *x, blasint incx, double *y, blasint incy);
|
||||
void cblas_scopy(OPENBLAS_CONST blasint n, OPENBLAS_CONST float *x, OPENBLAS_CONST blasint incx, float *y, OPENBLAS_CONST blasint incy);
|
||||
void cblas_dcopy(OPENBLAS_CONST blasint n, OPENBLAS_CONST double *x, OPENBLAS_CONST blasint incx, double *y, OPENBLAS_CONST blasint incy);
|
||||
void cblas_ccopy(OPENBLAS_CONST blasint n, OPENBLAS_CONST float *x, OPENBLAS_CONST blasint incx, float *y, OPENBLAS_CONST blasint incy);
|
||||
void cblas_zcopy(OPENBLAS_CONST blasint n, OPENBLAS_CONST double *x, OPENBLAS_CONST blasint incx, double *y, OPENBLAS_CONST blasint incy);
|
||||
|
||||
void cblas_sswap(blasint n, float *x, blasint incx, float *y, blasint incy);
|
||||
void cblas_dswap(blasint n, double *x, blasint incx, double *y, blasint incy);
|
||||
void cblas_cswap(blasint n, float *x, blasint incx, float *y, blasint incy);
|
||||
void cblas_zswap(blasint n, double *x, blasint incx, double *y, blasint incy);
|
||||
void cblas_sswap(OPENBLAS_CONST blasint n, float *x, OPENBLAS_CONST blasint incx, float *y, OPENBLAS_CONST blasint incy);
|
||||
void cblas_dswap(OPENBLAS_CONST blasint n, double *x, OPENBLAS_CONST blasint incx, double *y, OPENBLAS_CONST blasint incy);
|
||||
void cblas_cswap(OPENBLAS_CONST blasint n, float *x, OPENBLAS_CONST blasint incx, float *y, OPENBLAS_CONST blasint incy);
|
||||
void cblas_zswap(OPENBLAS_CONST blasint n, double *x, OPENBLAS_CONST blasint incx, double *y, OPENBLAS_CONST blasint incy);
|
||||
|
||||
void cblas_srot(blasint N, float *X, blasint incX, float *Y, blasint incY, float c, float s);
|
||||
void cblas_drot(blasint N, double *X, blasint incX, double *Y, blasint incY, double c, double s);
|
||||
void cblas_srot(OPENBLAS_CONST blasint N, float *X, OPENBLAS_CONST blasint incX, float *Y, OPENBLAS_CONST blasint incY, OPENBLAS_CONST float c, OPENBLAS_CONST float s);
|
||||
void cblas_drot(OPENBLAS_CONST blasint N, double *X, OPENBLAS_CONST blasint incX, double *Y, OPENBLAS_CONST blasint incY, OPENBLAS_CONST double c, OPENBLAS_CONST double s);
|
||||
|
||||
void cblas_srotg(float *a, float *b, float *c, float *s);
|
||||
void cblas_drotg(double *a, double *b, double *c, double *s);
|
||||
|
||||
void cblas_srotm(blasint N, float *X, blasint incX, float *Y, blasint incY, float *P);
|
||||
void cblas_drotm(blasint N, double *X, blasint incX, double *Y, blasint incY, double *P);
|
||||
void cblas_srotm(OPENBLAS_CONST blasint N, float *X, OPENBLAS_CONST blasint incX, float *Y, OPENBLAS_CONST blasint incY, OPENBLAS_CONST float *P);
|
||||
void cblas_drotm(OPENBLAS_CONST blasint N, double *X, OPENBLAS_CONST blasint incX, double *Y, OPENBLAS_CONST blasint incY, OPENBLAS_CONST double *P);
|
||||
|
||||
void cblas_srotmg(float *d1, float *d2, float *b1, float b2, float *P);
|
||||
void cblas_drotmg(double *d1, double *d2, double *b1, double b2, double *P);
|
||||
void cblas_srotmg(float *d1, float *d2, float *b1, OPENBLAS_CONST float b2, float *P);
|
||||
void cblas_drotmg(double *d1, double *d2, double *b1, OPENBLAS_CONST double b2, double *P);
|
||||
|
||||
void cblas_sscal(blasint N, float alpha, float *X, blasint incX);
|
||||
void cblas_dscal(blasint N, double alpha, double *X, blasint incX);
|
||||
void cblas_cscal(blasint N, float *alpha, float *X, blasint incX);
|
||||
void cblas_zscal(blasint N, double *alpha, double *X, blasint incX);
|
||||
void cblas_csscal(blasint N, float alpha, float *X, blasint incX);
|
||||
void cblas_zdscal(blasint N, double alpha, double *X, blasint incX);
|
||||
void cblas_sscal(OPENBLAS_CONST blasint N, OPENBLAS_CONST float alpha, float *X, OPENBLAS_CONST blasint incX);
|
||||
void cblas_dscal(OPENBLAS_CONST blasint N, OPENBLAS_CONST double alpha, double *X, OPENBLAS_CONST blasint incX);
|
||||
void cblas_cscal(OPENBLAS_CONST blasint N, OPENBLAS_CONST float *alpha, float *X, OPENBLAS_CONST blasint incX);
|
||||
void cblas_zscal(OPENBLAS_CONST blasint N, OPENBLAS_CONST double *alpha, double *X, OPENBLAS_CONST blasint incX);
|
||||
void cblas_csscal(OPENBLAS_CONST blasint N, OPENBLAS_CONST float alpha, float *X, OPENBLAS_CONST blasint incX);
|
||||
void cblas_zdscal(OPENBLAS_CONST blasint N, OPENBLAS_CONST double alpha, double *X, OPENBLAS_CONST blasint incX);
|
||||
|
||||
void cblas_sgemv(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE trans, blasint m, blasint n,
|
||||
float alpha, float *a, blasint lda, float *x, blasint incx, float beta, float *y, blasint incy);
|
||||
void cblas_dgemv(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE trans, blasint m, blasint n,
|
||||
double alpha, double *a, blasint lda, double *x, blasint incx, double beta, double *y, blasint incy);
|
||||
void cblas_cgemv(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE trans, blasint m, blasint n,
|
||||
float *alpha, float *a, blasint lda, float *x, blasint incx, float *beta, float *y, blasint incy);
|
||||
void cblas_zgemv(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE trans, blasint m, blasint n,
|
||||
double *alpha, double *a, blasint lda, double *x, blasint incx, double *beta, double *y, blasint incy);
|
||||
void cblas_sgemv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_TRANSPOSE trans, OPENBLAS_CONST blasint m, OPENBLAS_CONST blasint n,
|
||||
OPENBLAS_CONST float alpha, OPENBLAS_CONST float *a, OPENBLAS_CONST blasint lda, OPENBLAS_CONST float *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST float beta, float *y, OPENBLAS_CONST blasint incy);
|
||||
void cblas_dgemv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_TRANSPOSE trans, OPENBLAS_CONST blasint m, OPENBLAS_CONST blasint n,
|
||||
OPENBLAS_CONST double alpha, OPENBLAS_CONST double *a, OPENBLAS_CONST blasint lda, OPENBLAS_CONST double *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST double beta, double *y, OPENBLAS_CONST blasint incy);
|
||||
void cblas_cgemv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_TRANSPOSE trans, OPENBLAS_CONST blasint m, OPENBLAS_CONST blasint n,
|
||||
OPENBLAS_CONST float *alpha, OPENBLAS_CONST float *a, OPENBLAS_CONST blasint lda, OPENBLAS_CONST float *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST float *beta, float *y, OPENBLAS_CONST blasint incy);
|
||||
void cblas_zgemv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_TRANSPOSE trans, OPENBLAS_CONST blasint m, OPENBLAS_CONST blasint n,
|
||||
OPENBLAS_CONST double *alpha, OPENBLAS_CONST double *a, OPENBLAS_CONST blasint lda, OPENBLAS_CONST double *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST double *beta, double *y, OPENBLAS_CONST blasint incy);
|
||||
|
||||
void cblas_sger (enum CBLAS_ORDER order, blasint M, blasint N, float alpha, float *X, blasint incX, float *Y, blasint incY, float *A, blasint lda);
|
||||
void cblas_dger (enum CBLAS_ORDER order, blasint M, blasint N, double alpha, double *X, blasint incX, double *Y, blasint incY, double *A, blasint lda);
|
||||
void cblas_cgeru(enum CBLAS_ORDER order, blasint M, blasint N, float *alpha, float *X, blasint incX, float *Y, blasint incY, float *A, blasint lda);
|
||||
void cblas_cgerc(enum CBLAS_ORDER order, blasint M, blasint N, float *alpha, float *X, blasint incX, float *Y, blasint incY, float *A, blasint lda);
|
||||
void cblas_zgeru(enum CBLAS_ORDER order, blasint M, blasint N, double *alpha, double *X, blasint incX, double *Y, blasint incY, double *A, blasint lda);
|
||||
void cblas_zgerc(enum CBLAS_ORDER order, blasint M, blasint N, double *alpha, double *X, blasint incX, double *Y, blasint incY, double *A, blasint lda);
|
||||
void cblas_sger (OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST float alpha, OPENBLAS_CONST float *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST float *Y, OPENBLAS_CONST blasint incY, float *A, OPENBLAS_CONST blasint lda);
|
||||
void cblas_dger (OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST double alpha, OPENBLAS_CONST double *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST double *Y, OPENBLAS_CONST blasint incY, double *A, OPENBLAS_CONST blasint lda);
|
||||
void cblas_cgeru(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST float *alpha, OPENBLAS_CONST float *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST float *Y, OPENBLAS_CONST blasint incY, float *A, OPENBLAS_CONST blasint lda);
|
||||
void cblas_cgerc(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST float *alpha, OPENBLAS_CONST float *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST float *Y, OPENBLAS_CONST blasint incY, float *A, OPENBLAS_CONST blasint lda);
|
||||
void cblas_zgeru(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST double *alpha, OPENBLAS_CONST double *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST double *Y, OPENBLAS_CONST blasint incY, double *A, OPENBLAS_CONST blasint lda);
|
||||
void cblas_zgerc(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST double *alpha, OPENBLAS_CONST double *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST double *Y, OPENBLAS_CONST blasint incY, double *A, OPENBLAS_CONST blasint lda);
|
||||
|
||||
void cblas_strsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, blasint N, float *A, blasint lda, float *X, blasint incX);
|
||||
void cblas_dtrsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, blasint N, double *A, blasint lda, double *X, blasint incX);
|
||||
void cblas_ctrsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, blasint N, float *A, blasint lda, float *X, blasint incX);
|
||||
void cblas_ztrsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, blasint N, double *A, blasint lda, double *X, blasint incX);
|
||||
void cblas_strsv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag, OPENBLAS_CONST blasint N, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, float *X, OPENBLAS_CONST blasint incX);
|
||||
void cblas_dtrsv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag, OPENBLAS_CONST blasint N, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, double *X, OPENBLAS_CONST blasint incX);
|
||||
void cblas_ctrsv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag, OPENBLAS_CONST blasint N, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, float *X, OPENBLAS_CONST blasint incX);
|
||||
void cblas_ztrsv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag, OPENBLAS_CONST blasint N, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, double *X, OPENBLAS_CONST blasint incX);
|
||||
|
||||
void cblas_strmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, blasint N, float *A, blasint lda, float *X, blasint incX);
|
||||
void cblas_dtrmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, blasint N, double *A, blasint lda, double *X, blasint incX);
|
||||
void cblas_ctrmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, blasint N, float *A, blasint lda, float *X, blasint incX);
|
||||
void cblas_ztrmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, blasint N, double *A, blasint lda, double *X, blasint incX);
|
||||
void cblas_strmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag, OPENBLAS_CONST blasint N, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, float *X, OPENBLAS_CONST blasint incX);
|
||||
void cblas_dtrmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag, OPENBLAS_CONST blasint N, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, double *X, OPENBLAS_CONST blasint incX);
|
||||
void cblas_ctrmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag, OPENBLAS_CONST blasint N, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, float *X, OPENBLAS_CONST blasint incX);
|
||||
void cblas_ztrmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag, OPENBLAS_CONST blasint N, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, double *X, OPENBLAS_CONST blasint incX);
|
||||
|
||||
void cblas_ssyr(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, float alpha, float *X, blasint incX, float *A, blasint lda);
|
||||
void cblas_dsyr(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, double alpha, double *X, blasint incX, double *A, blasint lda);
|
||||
void cblas_cher(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, float alpha, float *X, blasint incX, float *A, blasint lda);
|
||||
void cblas_zher(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, double alpha, double *X, blasint incX, double *A, blasint lda);
|
||||
void cblas_ssyr(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST float alpha, OPENBLAS_CONST float *X, OPENBLAS_CONST blasint incX, float *A, OPENBLAS_CONST blasint lda);
|
||||
void cblas_dsyr(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST double alpha, OPENBLAS_CONST double *X, OPENBLAS_CONST blasint incX, double *A, OPENBLAS_CONST blasint lda);
|
||||
void cblas_cher(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST float alpha, OPENBLAS_CONST float *X, OPENBLAS_CONST blasint incX, float *A, OPENBLAS_CONST blasint lda);
|
||||
void cblas_zher(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST double alpha, OPENBLAS_CONST double *X, OPENBLAS_CONST blasint incX, double *A, OPENBLAS_CONST blasint lda);
|
||||
|
||||
void cblas_ssyr2(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo,blasint N, float alpha, float *X,
|
||||
blasint incX, float *Y, blasint incY, float *A, blasint lda);
|
||||
void cblas_dsyr2(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, double alpha, double *X,
|
||||
blasint incX, double *Y, blasint incY, double *A, blasint lda);
|
||||
void cblas_cher2(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, float *alpha, float *X, blasint incX,
|
||||
float *Y, blasint incY, float *A, blasint lda);
|
||||
void cblas_zher2(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, double *alpha, double *X, blasint incX,
|
||||
double *Y, blasint incY, double *A, blasint lda);
|
||||
void cblas_ssyr2(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo,OPENBLAS_CONST blasint N, OPENBLAS_CONST float alpha, OPENBLAS_CONST float *X,
|
||||
OPENBLAS_CONST blasint incX, OPENBLAS_CONST float *Y, OPENBLAS_CONST blasint incY, float *A, OPENBLAS_CONST blasint lda);
|
||||
void cblas_dsyr2(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST double alpha, OPENBLAS_CONST double *X,
|
||||
OPENBLAS_CONST blasint incX, OPENBLAS_CONST double *Y, OPENBLAS_CONST blasint incY, double *A, OPENBLAS_CONST blasint lda);
|
||||
void cblas_cher2(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST float *alpha, OPENBLAS_CONST float *X, OPENBLAS_CONST blasint incX,
|
||||
OPENBLAS_CONST float *Y, OPENBLAS_CONST blasint incY, float *A, OPENBLAS_CONST blasint lda);
|
||||
void cblas_zher2(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST double *alpha, OPENBLAS_CONST double *X, OPENBLAS_CONST blasint incX,
|
||||
OPENBLAS_CONST double *Y, OPENBLAS_CONST blasint incY, double *A, OPENBLAS_CONST blasint lda);
|
||||
|
||||
void cblas_sgbmv(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE TransA, blasint M, blasint N,
|
||||
blasint KL, blasint KU, float alpha, float *A, blasint lda, float *X, blasint incX, float beta, float *Y, blasint incY);
|
||||
void cblas_dgbmv(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE TransA, blasint M, blasint N,
|
||||
blasint KL, blasint KU, double alpha, double *A, blasint lda, double *X, blasint incX, double beta, double *Y, blasint incY);
|
||||
void cblas_cgbmv(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE TransA, blasint M, blasint N,
|
||||
blasint KL, blasint KU, float *alpha, float *A, blasint lda, float *X, blasint incX, float *beta, float *Y, blasint incY);
|
||||
void cblas_zgbmv(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE TransA, blasint M, blasint N,
|
||||
blasint KL, blasint KU, double *alpha, double *A, blasint lda, double *X, blasint incX, double *beta, double *Y, blasint incY);
|
||||
void cblas_sgbmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N,
|
||||
OPENBLAS_CONST blasint KL, OPENBLAS_CONST blasint KU, OPENBLAS_CONST float alpha, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST float *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST float beta, float *Y, OPENBLAS_CONST blasint incY);
|
||||
void cblas_dgbmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N,
|
||||
OPENBLAS_CONST blasint KL, OPENBLAS_CONST blasint KU, OPENBLAS_CONST double alpha, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST double *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST double beta, double *Y, OPENBLAS_CONST blasint incY);
|
||||
void cblas_cgbmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N,
|
||||
OPENBLAS_CONST blasint KL, OPENBLAS_CONST blasint KU, OPENBLAS_CONST float *alpha, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST float *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST float *beta, float *Y, OPENBLAS_CONST blasint incY);
|
||||
void cblas_zgbmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N,
|
||||
OPENBLAS_CONST blasint KL, OPENBLAS_CONST blasint KU, OPENBLAS_CONST double *alpha, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST double *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST double *beta, double *Y, OPENBLAS_CONST blasint incY);
|
||||
|
||||
void cblas_ssbmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, blasint K, float alpha, float *A,
|
||||
blasint lda, float *X, blasint incX, float beta, float *Y, blasint incY);
|
||||
void cblas_dsbmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, blasint K, double alpha, double *A,
|
||||
blasint lda, double *X, blasint incX, double beta, double *Y, blasint incY);
|
||||
void cblas_ssbmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST float alpha, OPENBLAS_CONST float *A,
|
||||
OPENBLAS_CONST blasint lda, OPENBLAS_CONST float *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST float beta, float *Y, OPENBLAS_CONST blasint incY);
|
||||
void cblas_dsbmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST double alpha, OPENBLAS_CONST double *A,
|
||||
OPENBLAS_CONST blasint lda, OPENBLAS_CONST double *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST double beta, double *Y, OPENBLAS_CONST blasint incY);
|
||||
|
||||
|
||||
void cblas_stbmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
|
||||
blasint N, blasint K, float *A, blasint lda, float *X, blasint incX);
|
||||
void cblas_dtbmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
|
||||
blasint N, blasint K, double *A, blasint lda, double *X, blasint incX);
|
||||
void cblas_ctbmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
|
||||
blasint N, blasint K, float *A, blasint lda, float *X, blasint incX);
|
||||
void cblas_ztbmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
|
||||
blasint N, blasint K, double *A, blasint lda, double *X, blasint incX);
|
||||
void cblas_stbmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag,
|
||||
OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, float *X, OPENBLAS_CONST blasint incX);
|
||||
void cblas_dtbmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag,
|
||||
OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, double *X, OPENBLAS_CONST blasint incX);
|
||||
void cblas_ctbmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag,
|
||||
OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, float *X, OPENBLAS_CONST blasint incX);
|
||||
void cblas_ztbmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag,
|
||||
OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, double *X, OPENBLAS_CONST blasint incX);
|
||||
|
||||
void cblas_stbsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
|
||||
blasint N, blasint K, float *A, blasint lda, float *X, blasint incX);
|
||||
void cblas_dtbsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
|
||||
blasint N, blasint K, double *A, blasint lda, double *X, blasint incX);
|
||||
void cblas_ctbsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
|
||||
blasint N, blasint K, float *A, blasint lda, float *X, blasint incX);
|
||||
void cblas_ztbsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
|
||||
blasint N, blasint K, double *A, blasint lda, double *X, blasint incX);
|
||||
void cblas_stbsv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag,
|
||||
OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, float *X, OPENBLAS_CONST blasint incX);
|
||||
void cblas_dtbsv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag,
|
||||
OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, double *X, OPENBLAS_CONST blasint incX);
|
||||
void cblas_ctbsv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag,
|
||||
OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, float *X, OPENBLAS_CONST blasint incX);
|
||||
void cblas_ztbsv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag,
|
||||
OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, double *X, OPENBLAS_CONST blasint incX);
|
||||
|
||||
void cblas_stpmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
|
||||
blasint N, float *Ap, float *X, blasint incX);
|
||||
void cblas_dtpmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
|
||||
blasint N, double *Ap, double *X, blasint incX);
|
||||
void cblas_ctpmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
|
||||
blasint N, float *Ap, float *X, blasint incX);
|
||||
void cblas_ztpmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
|
||||
blasint N, double *Ap, double *X, blasint incX);
|
||||
void cblas_stpmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag,
|
||||
OPENBLAS_CONST blasint N, OPENBLAS_CONST float *Ap, float *X, OPENBLAS_CONST blasint incX);
|
||||
void cblas_dtpmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag,
|
||||
OPENBLAS_CONST blasint N, OPENBLAS_CONST double *Ap, double *X, OPENBLAS_CONST blasint incX);
|
||||
void cblas_ctpmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag,
|
||||
OPENBLAS_CONST blasint N, OPENBLAS_CONST float *Ap, float *X, OPENBLAS_CONST blasint incX);
|
||||
void cblas_ztpmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag,
|
||||
OPENBLAS_CONST blasint N, OPENBLAS_CONST double *Ap, double *X, OPENBLAS_CONST blasint incX);
|
||||
|
||||
void cblas_stpsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
|
||||
blasint N, float *Ap, float *X, blasint incX);
|
||||
void cblas_dtpsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
|
||||
blasint N, double *Ap, double *X, blasint incX);
|
||||
void cblas_ctpsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
|
||||
blasint N, float *Ap, float *X, blasint incX);
|
||||
void cblas_ztpsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
|
||||
blasint N, double *Ap, double *X, blasint incX);
|
||||
void cblas_stpsv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag,
|
||||
OPENBLAS_CONST blasint N, OPENBLAS_CONST float *Ap, float *X, OPENBLAS_CONST blasint incX);
|
||||
void cblas_dtpsv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag,
|
||||
OPENBLAS_CONST blasint N, OPENBLAS_CONST double *Ap, double *X, OPENBLAS_CONST blasint incX);
|
||||
void cblas_ctpsv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag,
|
||||
OPENBLAS_CONST blasint N, OPENBLAS_CONST float *Ap, float *X, OPENBLAS_CONST blasint incX);
|
||||
void cblas_ztpsv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag,
|
||||
OPENBLAS_CONST blasint N, OPENBLAS_CONST double *Ap, double *X, OPENBLAS_CONST blasint incX);
|
||||
|
||||
void cblas_ssymv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, float alpha, float *A,
|
||||
blasint lda, float *X, blasint incX, float beta, float *Y, blasint incY);
|
||||
void cblas_dsymv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, double alpha, double *A,
|
||||
blasint lda, double *X, blasint incX, double beta, double *Y, blasint incY);
|
||||
void cblas_chemv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, float *alpha, float *A,
|
||||
blasint lda, float *X, blasint incX, float *beta, float *Y, blasint incY);
|
||||
void cblas_zhemv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, double *alpha, double *A,
|
||||
blasint lda, double *X, blasint incX, double *beta, double *Y, blasint incY);
|
||||
void cblas_ssymv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST float alpha, OPENBLAS_CONST float *A,
|
||||
OPENBLAS_CONST blasint lda, OPENBLAS_CONST float *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST float beta, float *Y, OPENBLAS_CONST blasint incY);
|
||||
void cblas_dsymv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST double alpha, OPENBLAS_CONST double *A,
|
||||
OPENBLAS_CONST blasint lda, OPENBLAS_CONST double *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST double beta, double *Y, OPENBLAS_CONST blasint incY);
|
||||
void cblas_chemv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST float *alpha, OPENBLAS_CONST float *A,
|
||||
OPENBLAS_CONST blasint lda, OPENBLAS_CONST float *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST float *beta, float *Y, OPENBLAS_CONST blasint incY);
|
||||
void cblas_zhemv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST double *alpha, OPENBLAS_CONST double *A,
|
||||
OPENBLAS_CONST blasint lda, OPENBLAS_CONST double *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST double *beta, double *Y, OPENBLAS_CONST blasint incY);
|
||||
|
||||
|
||||
void cblas_sspmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, float alpha, float *Ap,
|
||||
float *X, blasint incX, float beta, float *Y, blasint incY);
|
||||
void cblas_dspmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, double alpha, double *Ap,
|
||||
double *X, blasint incX, double beta, double *Y, blasint incY);
|
||||
void cblas_sspmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST float alpha, OPENBLAS_CONST float *Ap,
|
||||
OPENBLAS_CONST float *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST float beta, float *Y, OPENBLAS_CONST blasint incY);
|
||||
void cblas_dspmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST double alpha, OPENBLAS_CONST double *Ap,
|
||||
OPENBLAS_CONST double *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST double beta, double *Y, OPENBLAS_CONST blasint incY);
|
||||
|
||||
void cblas_sspr(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, float alpha, float *X, blasint incX, float *Ap);
|
||||
void cblas_dspr(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, double alpha, double *X, blasint incX, double *Ap);
|
||||
void cblas_sspr(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST float alpha, OPENBLAS_CONST float *X, OPENBLAS_CONST blasint incX, float *Ap);
|
||||
void cblas_dspr(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST double alpha, OPENBLAS_CONST double *X, OPENBLAS_CONST blasint incX, double *Ap);
|
||||
|
||||
void cblas_chpr(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, float alpha, float *X, blasint incX, float *A);
|
||||
void cblas_zhpr(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, double alpha, double *X,blasint incX, double *A);
|
||||
void cblas_chpr(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST float alpha, OPENBLAS_CONST float *X, OPENBLAS_CONST blasint incX, float *A);
|
||||
void cblas_zhpr(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST double alpha, OPENBLAS_CONST double *X,OPENBLAS_CONST blasint incX, double *A);
|
||||
|
||||
void cblas_sspr2(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, float alpha, float *X, blasint incX, float *Y, blasint incY, float *A);
|
||||
void cblas_dspr2(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, double alpha, double *X, blasint incX, double *Y, blasint incY, double *A);
|
||||
void cblas_chpr2(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, float *alpha, float *X, blasint incX, float *Y, blasint incY, float *Ap);
|
||||
void cblas_zhpr2(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, double *alpha, double *X, blasint incX, double *Y, blasint incY, double *Ap);
|
||||
void cblas_sspr2(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST float alpha, OPENBLAS_CONST float *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST float *Y, OPENBLAS_CONST blasint incY, float *A);
|
||||
void cblas_dspr2(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST double alpha, OPENBLAS_CONST double *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST double *Y, OPENBLAS_CONST blasint incY, double *A);
|
||||
void cblas_chpr2(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST float *alpha, OPENBLAS_CONST float *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST float *Y, OPENBLAS_CONST blasint incY, float *Ap);
|
||||
void cblas_zhpr2(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST double *alpha, OPENBLAS_CONST double *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST double *Y, OPENBLAS_CONST blasint incY, double *Ap);
|
||||
|
||||
void cblas_chbmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, blasint K,
|
||||
float *alpha, float *A, blasint lda, float *X, blasint incX, float *beta, float *Y, blasint incY);
|
||||
void cblas_zhbmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, blasint K,
|
||||
double *alpha, double *A, blasint lda, double *X, blasint incX, double *beta, double *Y, blasint incY);
|
||||
void cblas_chbmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K,
|
||||
OPENBLAS_CONST float *alpha, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST float *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST float *beta, float *Y, OPENBLAS_CONST blasint incY);
|
||||
void cblas_zhbmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K,
|
||||
OPENBLAS_CONST double *alpha, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST double *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST double *beta, double *Y, OPENBLAS_CONST blasint incY);
|
||||
|
||||
void cblas_chpmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N,
|
||||
float *alpha, float *Ap, float *X, blasint incX, float *beta, float *Y, blasint incY);
|
||||
void cblas_zhpmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N,
|
||||
double *alpha, double *Ap, double *X, blasint incX, double *beta, double *Y, blasint incY);
|
||||
void cblas_chpmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N,
|
||||
OPENBLAS_CONST float *alpha, OPENBLAS_CONST float *Ap, OPENBLAS_CONST float *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST float *beta, float *Y, OPENBLAS_CONST blasint incY);
|
||||
void cblas_zhpmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N,
|
||||
OPENBLAS_CONST double *alpha, OPENBLAS_CONST double *Ap, OPENBLAS_CONST double *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST double *beta, double *Y, OPENBLAS_CONST blasint incY);
|
||||
|
||||
void cblas_sgemm(enum CBLAS_ORDER Order, enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANSPOSE TransB, blasint M, blasint N, blasint K,
|
||||
float alpha, float *A, blasint lda, float *B, blasint ldb, float beta, float *C, blasint ldc);
|
||||
void cblas_dgemm(enum CBLAS_ORDER Order, enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANSPOSE TransB, blasint M, blasint N, blasint K,
|
||||
double alpha, double *A, blasint lda, double *B, blasint ldb, double beta, double *C, blasint ldc);
|
||||
void cblas_cgemm(enum CBLAS_ORDER Order, enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANSPOSE TransB, blasint M, blasint N, blasint K,
|
||||
float *alpha, float *A, blasint lda, float *B, blasint ldb, float *beta, float *C, blasint ldc);
|
||||
void cblas_zgemm(enum CBLAS_ORDER Order, enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANSPOSE TransB, blasint M, blasint N, blasint K,
|
||||
double *alpha, double *A, blasint lda, double *B, blasint ldb, double *beta, double *C, blasint ldc);
|
||||
void cblas_sgemm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransB, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K,
|
||||
OPENBLAS_CONST float alpha, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST float *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST float beta, float *C, OPENBLAS_CONST blasint ldc);
|
||||
void cblas_dgemm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransB, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K,
|
||||
OPENBLAS_CONST double alpha, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST double *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST double beta, double *C, OPENBLAS_CONST blasint ldc);
|
||||
void cblas_cgemm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransB, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K,
|
||||
OPENBLAS_CONST float *alpha, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST float *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST float *beta, float *C, OPENBLAS_CONST blasint ldc);
|
||||
void cblas_zgemm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransB, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K,
|
||||
OPENBLAS_CONST double *alpha, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST double *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST double *beta, double *C, OPENBLAS_CONST blasint ldc);
|
||||
|
||||
void cblas_ssymm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, blasint M, blasint N,
|
||||
float alpha, float *A, blasint lda, float *B, blasint ldb, float beta, float *C, blasint ldc);
|
||||
void cblas_dsymm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, blasint M, blasint N,
|
||||
double alpha, double *A, blasint lda, double *B, blasint ldb, double beta, double *C, blasint ldc);
|
||||
void cblas_csymm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, blasint M, blasint N,
|
||||
float *alpha, float *A, blasint lda, float *B, blasint ldb, float *beta, float *C, blasint ldc);
|
||||
void cblas_zsymm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, blasint M, blasint N,
|
||||
double *alpha, double *A, blasint lda, double *B, blasint ldb, double *beta, double *C, blasint ldc);
|
||||
void cblas_ssymm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_SIDE Side, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N,
|
||||
OPENBLAS_CONST float alpha, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST float *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST float beta, float *C, OPENBLAS_CONST blasint ldc);
|
||||
void cblas_dsymm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_SIDE Side, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N,
|
||||
OPENBLAS_CONST double alpha, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST double *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST double beta, double *C, OPENBLAS_CONST blasint ldc);
|
||||
void cblas_csymm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_SIDE Side, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N,
|
||||
OPENBLAS_CONST float *alpha, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST float *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST float *beta, float *C, OPENBLAS_CONST blasint ldc);
|
||||
void cblas_zsymm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_SIDE Side, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N,
|
||||
OPENBLAS_CONST double *alpha, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST double *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST double *beta, double *C, OPENBLAS_CONST blasint ldc);
|
||||
|
||||
void cblas_ssyrk(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans,
|
||||
blasint N, blasint K, float alpha, float *A, blasint lda, float beta, float *C, blasint ldc);
|
||||
void cblas_dsyrk(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans,
|
||||
blasint N, blasint K, double alpha, double *A, blasint lda, double beta, double *C, blasint ldc);
|
||||
void cblas_csyrk(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans,
|
||||
blasint N, blasint K, float *alpha, float *A, blasint lda, float *beta, float *C, blasint ldc);
|
||||
void cblas_zsyrk(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans,
|
||||
blasint N, blasint K, double *alpha, double *A, blasint lda, double *beta, double *C, blasint ldc);
|
||||
void cblas_ssyrk(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE Trans,
|
||||
OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST float alpha, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST float beta, float *C, OPENBLAS_CONST blasint ldc);
|
||||
void cblas_dsyrk(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE Trans,
|
||||
OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST double alpha, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST double beta, double *C, OPENBLAS_CONST blasint ldc);
|
||||
void cblas_csyrk(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE Trans,
|
||||
OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST float *alpha, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST float *beta, float *C, OPENBLAS_CONST blasint ldc);
|
||||
void cblas_zsyrk(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE Trans,
|
||||
OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST double *alpha, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST double *beta, double *C, OPENBLAS_CONST blasint ldc);
|
||||
|
||||
void cblas_ssyr2k(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans,
|
||||
blasint N, blasint K, float alpha, float *A, blasint lda, float *B, blasint ldb, float beta, float *C, blasint ldc);
|
||||
void cblas_dsyr2k(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans,
|
||||
blasint N, blasint K, double alpha, double *A, blasint lda, double *B, blasint ldb, double beta, double *C, blasint ldc);
|
||||
void cblas_csyr2k(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans,
|
||||
blasint N, blasint K, float *alpha, float *A, blasint lda, float *B, blasint ldb, float *beta, float *C, blasint ldc);
|
||||
void cblas_zsyr2k(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans,
|
||||
blasint N, blasint K, double *alpha, double *A, blasint lda, double *B, blasint ldb, double *beta, double *C, blasint ldc);
|
||||
void cblas_ssyr2k(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE Trans,
|
||||
OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST float alpha, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST float *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST float beta, float *C, OPENBLAS_CONST blasint ldc);
|
||||
void cblas_dsyr2k(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE Trans,
|
||||
OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST double alpha, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST double *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST double beta, double *C, OPENBLAS_CONST blasint ldc);
|
||||
void cblas_csyr2k(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE Trans,
|
||||
OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST float *alpha, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST float *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST float *beta, float *C, OPENBLAS_CONST blasint ldc);
|
||||
void cblas_zsyr2k(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE Trans,
|
||||
OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST double *alpha, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST double *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST double *beta, double *C, OPENBLAS_CONST blasint ldc);
|
||||
|
||||
void cblas_strmm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA,
|
||||
enum CBLAS_DIAG Diag, blasint M, blasint N, float alpha, float *A, blasint lda, float *B, blasint ldb);
|
||||
void cblas_dtrmm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA,
|
||||
enum CBLAS_DIAG Diag, blasint M, blasint N, double alpha, double *A, blasint lda, double *B, blasint ldb);
|
||||
void cblas_ctrmm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA,
|
||||
enum CBLAS_DIAG Diag, blasint M, blasint N, float *alpha, float *A, blasint lda, float *B, blasint ldb);
|
||||
void cblas_ztrmm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA,
|
||||
enum CBLAS_DIAG Diag, blasint M, blasint N, double *alpha, double *A, blasint lda, double *B, blasint ldb);
|
||||
void cblas_strmm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_SIDE Side, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA,
|
||||
OPENBLAS_CONST enum CBLAS_DIAG Diag, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST float alpha, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, float *B, OPENBLAS_CONST blasint ldb);
|
||||
void cblas_dtrmm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_SIDE Side, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA,
|
||||
OPENBLAS_CONST enum CBLAS_DIAG Diag, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST double alpha, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, double *B, OPENBLAS_CONST blasint ldb);
|
||||
void cblas_ctrmm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_SIDE Side, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA,
|
||||
OPENBLAS_CONST enum CBLAS_DIAG Diag, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST float *alpha, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, float *B, OPENBLAS_CONST blasint ldb);
|
||||
void cblas_ztrmm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_SIDE Side, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA,
|
||||
OPENBLAS_CONST enum CBLAS_DIAG Diag, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST double *alpha, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, double *B, OPENBLAS_CONST blasint ldb);
|
||||
|
||||
void cblas_strsm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA,
|
||||
enum CBLAS_DIAG Diag, blasint M, blasint N, float alpha, float *A, blasint lda, float *B, blasint ldb);
|
||||
void cblas_dtrsm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA,
|
||||
enum CBLAS_DIAG Diag, blasint M, blasint N, double alpha, double *A, blasint lda, double *B, blasint ldb);
|
||||
void cblas_ctrsm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA,
|
||||
enum CBLAS_DIAG Diag, blasint M, blasint N, float *alpha, float *A, blasint lda, float *B, blasint ldb);
|
||||
void cblas_ztrsm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA,
|
||||
enum CBLAS_DIAG Diag, blasint M, blasint N, double *alpha, double *A, blasint lda, double *B, blasint ldb);
|
||||
void cblas_strsm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_SIDE Side, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA,
|
||||
OPENBLAS_CONST enum CBLAS_DIAG Diag, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST float alpha, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, float *B, OPENBLAS_CONST blasint ldb);
|
||||
void cblas_dtrsm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_SIDE Side, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA,
|
||||
OPENBLAS_CONST enum CBLAS_DIAG Diag, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST double alpha, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, double *B, OPENBLAS_CONST blasint ldb);
|
||||
void cblas_ctrsm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_SIDE Side, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA,
|
||||
OPENBLAS_CONST enum CBLAS_DIAG Diag, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST float *alpha, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, float *B, OPENBLAS_CONST blasint ldb);
|
||||
void cblas_ztrsm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_SIDE Side, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA,
|
||||
OPENBLAS_CONST enum CBLAS_DIAG Diag, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST double *alpha, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, double *B, OPENBLAS_CONST blasint ldb);
|
||||
|
||||
void cblas_chemm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, blasint M, blasint N,
|
||||
float *alpha, float *A, blasint lda, float *B, blasint ldb, float *beta, float *C, blasint ldc);
|
||||
void cblas_zhemm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, blasint M, blasint N,
|
||||
double *alpha, double *A, blasint lda, double *B, blasint ldb, double *beta, double *C, blasint ldc);
|
||||
void cblas_chemm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_SIDE Side, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N,
|
||||
OPENBLAS_CONST float *alpha, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST float *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST float *beta, float *C, OPENBLAS_CONST blasint ldc);
|
||||
void cblas_zhemm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_SIDE Side, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N,
|
||||
OPENBLAS_CONST double *alpha, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST double *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST double *beta, double *C, OPENBLAS_CONST blasint ldc);
|
||||
|
||||
void cblas_cherk(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans, blasint N, blasint K,
|
||||
float alpha, float *A, blasint lda, float beta, float *C, blasint ldc);
|
||||
void cblas_zherk(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans, blasint N, blasint K,
|
||||
double alpha, double *A, blasint lda, double beta, double *C, blasint ldc);
|
||||
void cblas_cherk(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE Trans, OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K,
|
||||
OPENBLAS_CONST float alpha, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST float beta, float *C, OPENBLAS_CONST blasint ldc);
|
||||
void cblas_zherk(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE Trans, OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K,
|
||||
OPENBLAS_CONST double alpha, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST double beta, double *C, OPENBLAS_CONST blasint ldc);
|
||||
|
||||
void cblas_cher2k(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans, blasint N, blasint K,
|
||||
float *alpha, float *A, blasint lda, float *B, blasint ldb, float beta, float *C, blasint ldc);
|
||||
void cblas_zher2k(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans, blasint N, blasint K,
|
||||
double *alpha, double *A, blasint lda, double *B, blasint ldb, double beta, double *C, blasint ldc);
|
||||
void cblas_cher2k(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE Trans, OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K,
|
||||
OPENBLAS_CONST float *alpha, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST float *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST float beta, float *C, OPENBLAS_CONST blasint ldc);
|
||||
void cblas_zher2k(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE Trans, OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K,
|
||||
OPENBLAS_CONST double *alpha, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST double *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST double beta, double *C, OPENBLAS_CONST blasint ldc);
|
||||
|
||||
void cblas_xerbla(blasint p, char *rout, char *form, ...);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif /* __cplusplus */
|
||||
|
||||
#endif
|
||||
|
||||
303
cblas_noconst.h
Normal file
303
cblas_noconst.h
Normal file
@@ -0,0 +1,303 @@
|
||||
#ifndef CBLAS_H
|
||||
#define CBLAS_H
|
||||
|
||||
#include <stddef.h>
|
||||
#include "common.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
/* Assume C declarations for C++ */
|
||||
#endif /* __cplusplus */
|
||||
|
||||
/*Set the number of threads on runtime.*/
|
||||
void openblas_set_num_threads(int num_threads);
|
||||
void goto_set_num_threads(int num_threads);
|
||||
|
||||
/*Get the build configure on runtime.*/
|
||||
char* openblas_get_config(void);
|
||||
|
||||
/* Get the parallelization type which is used by OpenBLAS */
|
||||
int openblas_get_parallel(void);
|
||||
/* OpenBLAS is compiled for sequential use */
|
||||
#define OPENBLAS_SEQUENTIAL 0
|
||||
/* OpenBLAS is compiled using normal threading model */
|
||||
#define OPENBLAS_THREAD 1
|
||||
/* OpenBLAS is compiled using OpenMP threading model */
|
||||
#define OPENBLAS_OPENMP 2
|
||||
|
||||
|
||||
#define CBLAS_INDEX size_t
|
||||
|
||||
typedef enum CBLAS_ORDER {CblasRowMajor=101, CblasColMajor=102} CBLAS_ORDER;
|
||||
typedef enum CBLAS_TRANSPOSE {CblasNoTrans=111, CblasTrans=112, CblasConjTrans=113, CblasConjNoTrans=114} CBLAS_TRANSPOSE;
|
||||
typedef enum CBLAS_UPLO {CblasUpper=121, CblasLower=122} CBLAS_UPLO;
|
||||
typedef enum CBLAS_DIAG {CblasNonUnit=131, CblasUnit=132} CBLAS_DIAG;
|
||||
typedef enum CBLAS_SIDE {CblasLeft=141, CblasRight=142} CBLAS_SIDE;
|
||||
|
||||
float cblas_sdsdot(blasint n, float alpha, float *x, blasint incx, float *y, blasint incy);
|
||||
double cblas_dsdot (blasint n, float *x, blasint incx, float *y, blasint incy);
|
||||
float cblas_sdot(blasint n, float *x, blasint incx, float *y, blasint incy);
|
||||
double cblas_ddot(blasint n, double *x, blasint incx, double *y, blasint incy);
|
||||
|
||||
openblas_complex_float cblas_cdotu(blasint n, float *x, blasint incx, float *y, blasint incy);
|
||||
openblas_complex_float cblas_cdotc(blasint n, float *x, blasint incx, float *y, blasint incy);
|
||||
openblas_complex_double cblas_zdotu(blasint n, double *x, blasint incx, double *y, blasint incy);
|
||||
openblas_complex_double cblas_zdotc(blasint n, double *x, blasint incx, double *y, blasint incy);
|
||||
|
||||
void cblas_cdotu_sub(blasint n, float *x, blasint incx, float *y, blasint incy, openblas_complex_float *ret);
|
||||
void cblas_cdotc_sub(blasint n, float *x, blasint incx, float *y, blasint incy, openblas_complex_float *ret);
|
||||
void cblas_zdotu_sub(blasint n, double *x, blasint incx, double *y, blasint incy, openblas_complex_double *ret);
|
||||
void cblas_zdotc_sub(blasint n, double *x, blasint incx, double *y, blasint incy, openblas_complex_double *ret);
|
||||
|
||||
float cblas_sasum (blasint n, float *x, blasint incx);
|
||||
double cblas_dasum (blasint n, double *x, blasint incx);
|
||||
float cblas_scasum(blasint n, float *x, blasint incx);
|
||||
double cblas_dzasum(blasint n, double *x, blasint incx);
|
||||
|
||||
float cblas_snrm2 (blasint N, float *X, blasint incX);
|
||||
double cblas_dnrm2 (blasint N, double *X, blasint incX);
|
||||
float cblas_scnrm2(blasint N, float *X, blasint incX);
|
||||
double cblas_dznrm2(blasint N, double *X, blasint incX);
|
||||
|
||||
CBLAS_INDEX cblas_isamax(blasint n, float *x, blasint incx);
|
||||
CBLAS_INDEX cblas_idamax(blasint n, double *x, blasint incx);
|
||||
CBLAS_INDEX cblas_icamax(blasint n, float *x, blasint incx);
|
||||
CBLAS_INDEX cblas_izamax(blasint n, double *x, blasint incx);
|
||||
|
||||
void cblas_saxpy(blasint n, float alpha, float *x, blasint incx, float *y, blasint incy);
|
||||
void cblas_daxpy(blasint n, double alpha, double *x, blasint incx, double *y, blasint incy);
|
||||
void cblas_caxpy(blasint n, float *alpha, float *x, blasint incx, float *y, blasint incy);
|
||||
void cblas_zaxpy(blasint n, double *alpha, double *x, blasint incx, double *y, blasint incy);
|
||||
|
||||
void cblas_scopy(blasint n, float *x, blasint incx, float *y, blasint incy);
|
||||
void cblas_dcopy(blasint n, double *x, blasint incx, double *y, blasint incy);
|
||||
void cblas_ccopy(blasint n, float *x, blasint incx, float *y, blasint incy);
|
||||
void cblas_zcopy(blasint n, double *x, blasint incx, double *y, blasint incy);
|
||||
|
||||
void cblas_sswap(blasint n, float *x, blasint incx, float *y, blasint incy);
|
||||
void cblas_dswap(blasint n, double *x, blasint incx, double *y, blasint incy);
|
||||
void cblas_cswap(blasint n, float *x, blasint incx, float *y, blasint incy);
|
||||
void cblas_zswap(blasint n, double *x, blasint incx, double *y, blasint incy);
|
||||
|
||||
void cblas_srot(blasint N, float *X, blasint incX, float *Y, blasint incY, float c, float s);
|
||||
void cblas_drot(blasint N, double *X, blasint incX, double *Y, blasint incY, double c, double s);
|
||||
|
||||
void cblas_srotg(float *a, float *b, float *c, float *s);
|
||||
void cblas_drotg(double *a, double *b, double *c, double *s);
|
||||
|
||||
void cblas_srotm(blasint N, float *X, blasint incX, float *Y, blasint incY, float *P);
|
||||
void cblas_drotm(blasint N, double *X, blasint incX, double *Y, blasint incY, double *P);
|
||||
|
||||
void cblas_srotmg(float *d1, float *d2, float *b1, float b2, float *P);
|
||||
void cblas_drotmg(double *d1, double *d2, double *b1, double b2, double *P);
|
||||
|
||||
void cblas_sscal(blasint N, float alpha, float *X, blasint incX);
|
||||
void cblas_dscal(blasint N, double alpha, double *X, blasint incX);
|
||||
void cblas_cscal(blasint N, float *alpha, float *X, blasint incX);
|
||||
void cblas_zscal(blasint N, double *alpha, double *X, blasint incX);
|
||||
void cblas_csscal(blasint N, float alpha, float *X, blasint incX);
|
||||
void cblas_zdscal(blasint N, double alpha, double *X, blasint incX);
|
||||
|
||||
void cblas_sgemv(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE trans, blasint m, blasint n,
|
||||
float alpha, float *a, blasint lda, float *x, blasint incx, float beta, float *y, blasint incy);
|
||||
void cblas_dgemv(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE trans, blasint m, blasint n,
|
||||
double alpha, double *a, blasint lda, double *x, blasint incx, double beta, double *y, blasint incy);
|
||||
void cblas_cgemv(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE trans, blasint m, blasint n,
|
||||
float *alpha, float *a, blasint lda, float *x, blasint incx, float *beta, float *y, blasint incy);
|
||||
void cblas_zgemv(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE trans, blasint m, blasint n,
|
||||
double *alpha, double *a, blasint lda, double *x, blasint incx, double *beta, double *y, blasint incy);
|
||||
|
||||
void cblas_sger (enum CBLAS_ORDER order, blasint M, blasint N, float alpha, float *X, blasint incX, float *Y, blasint incY, float *A, blasint lda);
|
||||
void cblas_dger (enum CBLAS_ORDER order, blasint M, blasint N, double alpha, double *X, blasint incX, double *Y, blasint incY, double *A, blasint lda);
|
||||
void cblas_cgeru(enum CBLAS_ORDER order, blasint M, blasint N, float *alpha, float *X, blasint incX, float *Y, blasint incY, float *A, blasint lda);
|
||||
void cblas_cgerc(enum CBLAS_ORDER order, blasint M, blasint N, float *alpha, float *X, blasint incX, float *Y, blasint incY, float *A, blasint lda);
|
||||
void cblas_zgeru(enum CBLAS_ORDER order, blasint M, blasint N, double *alpha, double *X, blasint incX, double *Y, blasint incY, double *A, blasint lda);
|
||||
void cblas_zgerc(enum CBLAS_ORDER order, blasint M, blasint N, double *alpha, double *X, blasint incX, double *Y, blasint incY, double *A, blasint lda);
|
||||
|
||||
void cblas_strsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, blasint N, float *A, blasint lda, float *X, blasint incX);
|
||||
void cblas_dtrsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, blasint N, double *A, blasint lda, double *X, blasint incX);
|
||||
void cblas_ctrsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, blasint N, float *A, blasint lda, float *X, blasint incX);
|
||||
void cblas_ztrsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, blasint N, double *A, blasint lda, double *X, blasint incX);
|
||||
|
||||
void cblas_strmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, blasint N, float *A, blasint lda, float *X, blasint incX);
|
||||
void cblas_dtrmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, blasint N, double *A, blasint lda, double *X, blasint incX);
|
||||
void cblas_ctrmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, blasint N, float *A, blasint lda, float *X, blasint incX);
|
||||
void cblas_ztrmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, blasint N, double *A, blasint lda, double *X, blasint incX);
|
||||
|
||||
void cblas_ssyr(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, float alpha, float *X, blasint incX, float *A, blasint lda);
|
||||
void cblas_dsyr(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, double alpha, double *X, blasint incX, double *A, blasint lda);
|
||||
void cblas_cher(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, float alpha, float *X, blasint incX, float *A, blasint lda);
|
||||
void cblas_zher(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, double alpha, double *X, blasint incX, double *A, blasint lda);
|
||||
|
||||
void cblas_ssyr2(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo,blasint N, float alpha, float *X,
|
||||
blasint incX, float *Y, blasint incY, float *A, blasint lda);
|
||||
void cblas_dsyr2(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, double alpha, double *X,
|
||||
blasint incX, double *Y, blasint incY, double *A, blasint lda);
|
||||
void cblas_cher2(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, float *alpha, float *X, blasint incX,
|
||||
float *Y, blasint incY, float *A, blasint lda);
|
||||
void cblas_zher2(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, double *alpha, double *X, blasint incX,
|
||||
double *Y, blasint incY, double *A, blasint lda);
|
||||
|
||||
void cblas_sgbmv(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE TransA, blasint M, blasint N,
|
||||
blasint KL, blasint KU, float alpha, float *A, blasint lda, float *X, blasint incX, float beta, float *Y, blasint incY);
|
||||
void cblas_dgbmv(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE TransA, blasint M, blasint N,
|
||||
blasint KL, blasint KU, double alpha, double *A, blasint lda, double *X, blasint incX, double beta, double *Y, blasint incY);
|
||||
void cblas_cgbmv(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE TransA, blasint M, blasint N,
|
||||
blasint KL, blasint KU, float *alpha, float *A, blasint lda, float *X, blasint incX, float *beta, float *Y, blasint incY);
|
||||
void cblas_zgbmv(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE TransA, blasint M, blasint N,
|
||||
blasint KL, blasint KU, double *alpha, double *A, blasint lda, double *X, blasint incX, double *beta, double *Y, blasint incY);
|
||||
|
||||
void cblas_ssbmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, blasint K, float alpha, float *A,
|
||||
blasint lda, float *X, blasint incX, float beta, float *Y, blasint incY);
|
||||
void cblas_dsbmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, blasint K, double alpha, double *A,
|
||||
blasint lda, double *X, blasint incX, double beta, double *Y, blasint incY);
|
||||
|
||||
|
||||
void cblas_stbmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
|
||||
blasint N, blasint K, float *A, blasint lda, float *X, blasint incX);
|
||||
void cblas_dtbmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
|
||||
blasint N, blasint K, double *A, blasint lda, double *X, blasint incX);
|
||||
void cblas_ctbmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
|
||||
blasint N, blasint K, float *A, blasint lda, float *X, blasint incX);
|
||||
void cblas_ztbmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
|
||||
blasint N, blasint K, double *A, blasint lda, double *X, blasint incX);
|
||||
|
||||
void cblas_stbsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
|
||||
blasint N, blasint K, float *A, blasint lda, float *X, blasint incX);
|
||||
void cblas_dtbsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
|
||||
blasint N, blasint K, double *A, blasint lda, double *X, blasint incX);
|
||||
void cblas_ctbsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
|
||||
blasint N, blasint K, float *A, blasint lda, float *X, blasint incX);
|
||||
void cblas_ztbsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
|
||||
blasint N, blasint K, double *A, blasint lda, double *X, blasint incX);
|
||||
|
||||
void cblas_stpmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
|
||||
blasint N, float *Ap, float *X, blasint incX);
|
||||
void cblas_dtpmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
|
||||
blasint N, double *Ap, double *X, blasint incX);
|
||||
void cblas_ctpmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
|
||||
blasint N, float *Ap, float *X, blasint incX);
|
||||
void cblas_ztpmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
|
||||
blasint N, double *Ap, double *X, blasint incX);
|
||||
|
||||
void cblas_stpsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
|
||||
blasint N, float *Ap, float *X, blasint incX);
|
||||
void cblas_dtpsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
|
||||
blasint N, double *Ap, double *X, blasint incX);
|
||||
void cblas_ctpsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
|
||||
blasint N, float *Ap, float *X, blasint incX);
|
||||
void cblas_ztpsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
|
||||
blasint N, double *Ap, double *X, blasint incX);
|
||||
|
||||
void cblas_ssymv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, float alpha, float *A,
|
||||
blasint lda, float *X, blasint incX, float beta, float *Y, blasint incY);
|
||||
void cblas_dsymv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, double alpha, double *A,
|
||||
blasint lda, double *X, blasint incX, double beta, double *Y, blasint incY);
|
||||
void cblas_chemv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, float *alpha, float *A,
|
||||
blasint lda, float *X, blasint incX, float *beta, float *Y, blasint incY);
|
||||
void cblas_zhemv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, double *alpha, double *A,
|
||||
blasint lda, double *X, blasint incX, double *beta, double *Y, blasint incY);
|
||||
|
||||
|
||||
void cblas_sspmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, float alpha, float *Ap,
|
||||
float *X, blasint incX, float beta, float *Y, blasint incY);
|
||||
void cblas_dspmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, double alpha, double *Ap,
|
||||
double *X, blasint incX, double beta, double *Y, blasint incY);
|
||||
|
||||
void cblas_sspr(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, float alpha, float *X, blasint incX, float *Ap);
|
||||
void cblas_dspr(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, double alpha, double *X, blasint incX, double *Ap);
|
||||
|
||||
void cblas_chpr(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, float alpha, float *X, blasint incX, float *A);
|
||||
void cblas_zhpr(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, double alpha, double *X,blasint incX, double *A);
|
||||
|
||||
void cblas_sspr2(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, float alpha, float *X, blasint incX, float *Y, blasint incY, float *A);
|
||||
void cblas_dspr2(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, double alpha, double *X, blasint incX, double *Y, blasint incY, double *A);
|
||||
void cblas_chpr2(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, float *alpha, float *X, blasint incX, float *Y, blasint incY, float *Ap);
|
||||
void cblas_zhpr2(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, double *alpha, double *X, blasint incX, double *Y, blasint incY, double *Ap);
|
||||
|
||||
void cblas_chbmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, blasint K,
|
||||
float *alpha, float *A, blasint lda, float *X, blasint incX, float *beta, float *Y, blasint incY);
|
||||
void cblas_zhbmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, blasint K,
|
||||
double *alpha, double *A, blasint lda, double *X, blasint incX, double *beta, double *Y, blasint incY);
|
||||
|
||||
void cblas_chpmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N,
|
||||
float *alpha, float *Ap, float *X, blasint incX, float *beta, float *Y, blasint incY);
|
||||
void cblas_zhpmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N,
|
||||
double *alpha, double *Ap, double *X, blasint incX, double *beta, double *Y, blasint incY);
|
||||
|
||||
void cblas_sgemm(enum CBLAS_ORDER Order, enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANSPOSE TransB, blasint M, blasint N, blasint K,
|
||||
float alpha, float *A, blasint lda, float *B, blasint ldb, float beta, float *C, blasint ldc);
|
||||
void cblas_dgemm(enum CBLAS_ORDER Order, enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANSPOSE TransB, blasint M, blasint N, blasint K,
|
||||
double alpha, double *A, blasint lda, double *B, blasint ldb, double beta, double *C, blasint ldc);
|
||||
void cblas_cgemm(enum CBLAS_ORDER Order, enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANSPOSE TransB, blasint M, blasint N, blasint K,
|
||||
float *alpha, float *A, blasint lda, float *B, blasint ldb, float *beta, float *C, blasint ldc);
|
||||
void cblas_zgemm(enum CBLAS_ORDER Order, enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANSPOSE TransB, blasint M, blasint N, blasint K,
|
||||
double *alpha, double *A, blasint lda, double *B, blasint ldb, double *beta, double *C, blasint ldc);
|
||||
|
||||
void cblas_ssymm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, blasint M, blasint N,
|
||||
float alpha, float *A, blasint lda, float *B, blasint ldb, float beta, float *C, blasint ldc);
|
||||
void cblas_dsymm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, blasint M, blasint N,
|
||||
double alpha, double *A, blasint lda, double *B, blasint ldb, double beta, double *C, blasint ldc);
|
||||
void cblas_csymm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, blasint M, blasint N,
|
||||
float *alpha, float *A, blasint lda, float *B, blasint ldb, float *beta, float *C, blasint ldc);
|
||||
void cblas_zsymm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, blasint M, blasint N,
|
||||
double *alpha, double *A, blasint lda, double *B, blasint ldb, double *beta, double *C, blasint ldc);
|
||||
|
||||
void cblas_ssyrk(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans,
|
||||
blasint N, blasint K, float alpha, float *A, blasint lda, float beta, float *C, blasint ldc);
|
||||
void cblas_dsyrk(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans,
|
||||
blasint N, blasint K, double alpha, double *A, blasint lda, double beta, double *C, blasint ldc);
|
||||
void cblas_csyrk(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans,
|
||||
blasint N, blasint K, float *alpha, float *A, blasint lda, float *beta, float *C, blasint ldc);
|
||||
void cblas_zsyrk(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans,
|
||||
blasint N, blasint K, double *alpha, double *A, blasint lda, double *beta, double *C, blasint ldc);
|
||||
|
||||
void cblas_ssyr2k(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans,
|
||||
blasint N, blasint K, float alpha, float *A, blasint lda, float *B, blasint ldb, float beta, float *C, blasint ldc);
|
||||
void cblas_dsyr2k(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans,
|
||||
blasint N, blasint K, double alpha, double *A, blasint lda, double *B, blasint ldb, double beta, double *C, blasint ldc);
|
||||
void cblas_csyr2k(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans,
|
||||
blasint N, blasint K, float *alpha, float *A, blasint lda, float *B, blasint ldb, float *beta, float *C, blasint ldc);
|
||||
void cblas_zsyr2k(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans,
|
||||
blasint N, blasint K, double *alpha, double *A, blasint lda, double *B, blasint ldb, double *beta, double *C, blasint ldc);
|
||||
|
||||
void cblas_strmm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA,
|
||||
enum CBLAS_DIAG Diag, blasint M, blasint N, float alpha, float *A, blasint lda, float *B, blasint ldb);
|
||||
void cblas_dtrmm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA,
|
||||
enum CBLAS_DIAG Diag, blasint M, blasint N, double alpha, double *A, blasint lda, double *B, blasint ldb);
|
||||
void cblas_ctrmm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA,
|
||||
enum CBLAS_DIAG Diag, blasint M, blasint N, float *alpha, float *A, blasint lda, float *B, blasint ldb);
|
||||
void cblas_ztrmm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA,
|
||||
enum CBLAS_DIAG Diag, blasint M, blasint N, double *alpha, double *A, blasint lda, double *B, blasint ldb);
|
||||
|
||||
void cblas_strsm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA,
|
||||
enum CBLAS_DIAG Diag, blasint M, blasint N, float alpha, float *A, blasint lda, float *B, blasint ldb);
|
||||
void cblas_dtrsm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA,
|
||||
enum CBLAS_DIAG Diag, blasint M, blasint N, double alpha, double *A, blasint lda, double *B, blasint ldb);
|
||||
void cblas_ctrsm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA,
|
||||
enum CBLAS_DIAG Diag, blasint M, blasint N, float *alpha, float *A, blasint lda, float *B, blasint ldb);
|
||||
void cblas_ztrsm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA,
|
||||
enum CBLAS_DIAG Diag, blasint M, blasint N, double *alpha, double *A, blasint lda, double *B, blasint ldb);
|
||||
|
||||
void cblas_chemm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, blasint M, blasint N,
|
||||
float *alpha, float *A, blasint lda, float *B, blasint ldb, float *beta, float *C, blasint ldc);
|
||||
void cblas_zhemm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, blasint M, blasint N,
|
||||
double *alpha, double *A, blasint lda, double *B, blasint ldb, double *beta, double *C, blasint ldc);
|
||||
|
||||
void cblas_cherk(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans, blasint N, blasint K,
|
||||
float alpha, float *A, blasint lda, float beta, float *C, blasint ldc);
|
||||
void cblas_zherk(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans, blasint N, blasint K,
|
||||
double alpha, double *A, blasint lda, double beta, double *C, blasint ldc);
|
||||
|
||||
void cblas_cher2k(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans, blasint N, blasint K,
|
||||
float *alpha, float *A, blasint lda, float *B, blasint ldb, float beta, float *C, blasint ldc);
|
||||
void cblas_zher2k(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans, blasint N, blasint K,
|
||||
double *alpha, double *A, blasint lda, double *B, blasint ldb, double beta, double *C, blasint ldc);
|
||||
|
||||
void cblas_xerbla(blasint p, char *rout, char *form, ...);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif /* __cplusplus */
|
||||
|
||||
#endif
|
||||
85
common.h
85
common.h
@@ -39,6 +39,11 @@
|
||||
#ifndef COMMON_H
|
||||
#define COMMON_H
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
/* Assume C declarations for C++ */
|
||||
#endif /* __cplusplus */
|
||||
|
||||
#ifndef _GNU_SOURCE
|
||||
#define _GNU_SOURCE
|
||||
#endif
|
||||
@@ -63,7 +68,7 @@
|
||||
#define SMP
|
||||
#endif
|
||||
|
||||
#if defined(OS_WINNT) || defined(OS_CYGWIN_NT) || defined(OS_Interix)
|
||||
#if defined(OS_WINNT) || defined(OS_CYGWIN_NT) || defined(OS_INTERIX)
|
||||
#define WINDOWS_ABI
|
||||
#define OS_WINDOWS
|
||||
|
||||
@@ -84,6 +89,10 @@
|
||||
#include <sched.h>
|
||||
#endif
|
||||
|
||||
#if defined(OS_DARWIN) || defined(OS_FREEBSD) || defined(OS_NETBSD)
|
||||
#include <sched.h>
|
||||
#endif
|
||||
|
||||
#ifdef OS_WINDOWS
|
||||
#ifdef ATOM
|
||||
#define GOTO_ATOM ATOM
|
||||
@@ -301,10 +310,36 @@ typedef int blasint;
|
||||
#define YIELDING SwitchToThread()
|
||||
#endif
|
||||
|
||||
#if defined(ARMV7) || defined(ARMV6) || defined(ARMV8)
|
||||
#define YIELDING asm volatile ("nop;nop;nop;nop;nop;nop;nop;nop; \n");
|
||||
#endif
|
||||
|
||||
#ifdef PILEDRIVER
|
||||
#define YIELDING __asm__ __volatile__ ("nop;nop;nop;nop;nop;nop;nop;nop;\n");
|
||||
#endif
|
||||
|
||||
|
||||
#ifndef YIELDING
|
||||
#define YIELDING sched_yield()
|
||||
#endif
|
||||
|
||||
/***
|
||||
To alloc job_t on heap or statck.
|
||||
please https://github.com/xianyi/OpenBLAS/issues/246
|
||||
***/
|
||||
#if defined(OS_WINDOWS)
|
||||
#define GETRF_MEM_ALLOC_THRESHOLD 32
|
||||
#define BLAS3_MEM_ALLOC_THRESHOLD 32
|
||||
#endif
|
||||
|
||||
#ifndef GETRF_MEM_ALLOC_THRESHOLD
|
||||
#define GETRF_MEM_ALLOC_THRESHOLD 80
|
||||
#endif
|
||||
|
||||
#ifndef BLAS3_MEM_ALLOC_THRESHOLD
|
||||
#define BLAS3_MEM_ALLOC_THRESHOLD 160
|
||||
#endif
|
||||
|
||||
#ifdef QUAD_PRECISION
|
||||
#include "common_quad.h"
|
||||
#endif
|
||||
@@ -337,12 +372,26 @@ typedef int blasint;
|
||||
#include "common_mips64.h"
|
||||
#endif
|
||||
|
||||
#ifdef ARCH_ARM
|
||||
#include "common_arm.h"
|
||||
#endif
|
||||
|
||||
#ifdef ARCH_ARM64
|
||||
#include "common_arm64.h"
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef OS_LINUX
|
||||
#include "common_linux.h"
|
||||
#endif
|
||||
|
||||
#define MMAP_ACCESS (PROT_READ | PROT_WRITE)
|
||||
|
||||
#ifdef __NetBSD__
|
||||
#define MMAP_POLICY (MAP_PRIVATE | MAP_ANON)
|
||||
#else
|
||||
#define MMAP_POLICY (MAP_PRIVATE | MAP_ANONYMOUS)
|
||||
#endif
|
||||
|
||||
#include "param.h"
|
||||
#include "common_param.h"
|
||||
@@ -365,6 +414,31 @@ typedef int blasint;
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifndef ASSEMBLER
|
||||
#ifndef NOINCLUDE
|
||||
/* Inclusion of a standard header file is needed for definition of __STDC_*
|
||||
predefined macros with some compilers (e.g. GCC 4.7 on Linux). This occurs
|
||||
as a side effect of including either <features.h> or <stdc-predef.h>. */
|
||||
#include <stdio.h>
|
||||
#endif // NOINCLUDE
|
||||
|
||||
/* C99 supports complex floating numbers natively, which GCC also offers as an
|
||||
extension since version 3.0. If neither are available, use a compatible
|
||||
structure as fallback (see Clause 6.2.5.13 of the C99 standard). */
|
||||
#if (defined(__STDC_IEC_559_COMPLEX__) || __STDC_VERSION__ >= 199901L || \
|
||||
(__GNUC__ >= 3 && !defined(__cplusplus)))
|
||||
#define OPENBLAS_COMPLEX_C99
|
||||
typedef float _Complex openblas_complex_float;
|
||||
typedef double _Complex openblas_complex_double;
|
||||
typedef xdouble _Complex openblas_complex_xdouble;
|
||||
#else
|
||||
#define OPENBLAS_COMPLEX_STRUCT
|
||||
typedef struct { float real, imag; } openblas_complex_float;
|
||||
typedef struct { double real, imag; } openblas_complex_double;
|
||||
typedef struct { xdouble real, imag; } openblas_complex_xdouble;
|
||||
#endif
|
||||
#endif // ASSEMBLER
|
||||
|
||||
#ifndef IFLUSH
|
||||
#define IFLUSH
|
||||
#endif
|
||||
@@ -518,8 +592,10 @@ typedef struct {
|
||||
#include "common_level2.h"
|
||||
#include "common_level3.h"
|
||||
#include "common_lapack.h"
|
||||
|
||||
#ifdef CBLAS
|
||||
#include "cblas.h"
|
||||
# define OPENBLAS_CONST /* see comment in cblas.h */
|
||||
# include "cblas.h"
|
||||
#endif
|
||||
|
||||
#ifndef ASSEMBLER
|
||||
@@ -607,4 +683,9 @@ extern int gotoblas_profile;
|
||||
#define PRINT_DEBUG_NAME if (readenv("GOTO_DEBUG")) fprintf(stderr, "GotoBLAS : %s\n", CHAR_NAME)
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
||||
#endif /* __cplusplus */
|
||||
|
||||
#endif
|
||||
|
||||
@@ -150,9 +150,17 @@ REALNAME:
|
||||
#define PROFCODE .prologue 0
|
||||
#endif
|
||||
|
||||
#if defined(__linux__) && defined(__ELF__)
|
||||
#define GNUSTACK .section .note.GNU-stack,"",@progbits
|
||||
#else
|
||||
#define GNUSTACK
|
||||
#endif
|
||||
|
||||
#define EPILOGUE \
|
||||
.end REALNAME; \
|
||||
.ident VERSION
|
||||
.ident VERSION; \
|
||||
GNUSTACK
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef DOUBLE
|
||||
|
||||
169
common_arm.h
Normal file
169
common_arm.h
Normal file
@@ -0,0 +1,169 @@
|
||||
/*****************************************************************************
|
||||
Copyright (c) 2011, Lab of Parallel Software and Computational Science,ICSAS
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the ISCAS nor the names of its contributors may
|
||||
be used to endorse or promote products derived from this software
|
||||
without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
**********************************************************************************/
|
||||
|
||||
/*********************************************************************/
|
||||
/* Copyright 2009, 2010 The University of Texas at Austin. */
|
||||
/* All rights reserved. */
|
||||
/* */
|
||||
/* Redistribution and use in source and binary forms, with or */
|
||||
/* without modification, are permitted provided that the following */
|
||||
/* conditions are met: */
|
||||
/* */
|
||||
/* 1. Redistributions of source code must retain the above */
|
||||
/* copyright notice, this list of conditions and the following */
|
||||
/* disclaimer. */
|
||||
/* */
|
||||
/* 2. Redistributions in binary form must reproduce the above */
|
||||
/* copyright notice, this list of conditions and the following */
|
||||
/* disclaimer in the documentation and/or other materials */
|
||||
/* provided with the distribution. */
|
||||
/* */
|
||||
/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
|
||||
/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
|
||||
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
|
||||
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
|
||||
/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
|
||||
/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
|
||||
/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
|
||||
/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
|
||||
/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
|
||||
/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
|
||||
/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
|
||||
/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
|
||||
/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
|
||||
/* POSSIBILITY OF SUCH DAMAGE. */
|
||||
/* */
|
||||
/* The views and conclusions contained in the software and */
|
||||
/* documentation are those of the authors and should not be */
|
||||
/* interpreted as representing official policies, either expressed */
|
||||
/* or implied, of The University of Texas at Austin. */
|
||||
/*********************************************************************/
|
||||
|
||||
#ifndef COMMON_ARM
|
||||
#define COMMON_ARM
|
||||
|
||||
#define MB
|
||||
#define WMB
|
||||
|
||||
#define INLINE inline
|
||||
|
||||
#define RETURN_BY_COMPLEX
|
||||
|
||||
#ifndef ASSEMBLER
|
||||
|
||||
static void __inline blas_lock(volatile BLASULONG *address){
|
||||
|
||||
int register ret;
|
||||
|
||||
do {
|
||||
while (*address) {YIELDING;};
|
||||
|
||||
__asm__ __volatile__(
|
||||
"ldrex r2, [%1] \n\t"
|
||||
"mov r2, #0 \n\t"
|
||||
"strex r3, r2, [%1] \n\t"
|
||||
"mov %0 , r3 \n\t"
|
||||
: "=r"(ret), "=r"(address)
|
||||
: "1"(address)
|
||||
: "memory", "r2" , "r3"
|
||||
|
||||
|
||||
);
|
||||
|
||||
} while (ret);
|
||||
|
||||
}
|
||||
|
||||
|
||||
static inline unsigned long long rpcc(void){
|
||||
unsigned long long ret=0;
|
||||
double v;
|
||||
struct timeval tv;
|
||||
gettimeofday(&tv,NULL);
|
||||
v=(double) tv.tv_sec + (double) tv.tv_usec * 1e-6;
|
||||
ret = (unsigned long long) ( v * 1000.0d );
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline int blas_quickdivide(blasint x, blasint y){
|
||||
return x / y;
|
||||
}
|
||||
|
||||
#if defined(DOUBLE)
|
||||
#define GET_IMAGE(res) __asm__ __volatile__("vstr.f64 d1, %0" : "=m"(res) : : "memory")
|
||||
#else
|
||||
#define GET_IMAGE(res) __asm__ __volatile__("vstr.f32 s1, %0" : "=m"(res) : : "memory")
|
||||
#endif
|
||||
|
||||
#define GET_IMAGE_CANCEL
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
#ifndef F_INTERFACE
|
||||
#define REALNAME ASMNAME
|
||||
#else
|
||||
#define REALNAME ASMFNAME
|
||||
#endif
|
||||
|
||||
#if defined(ASSEMBLER) && !defined(NEEDPARAM)
|
||||
|
||||
#define PROLOGUE \
|
||||
.arm ;\
|
||||
.global REALNAME ;\
|
||||
.func REALNAME ;\
|
||||
REALNAME:
|
||||
|
||||
#define EPILOGUE
|
||||
|
||||
#define PROFCODE
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
#define SEEK_ADDRESS
|
||||
|
||||
#ifndef PAGESIZE
|
||||
#define PAGESIZE ( 4 << 10)
|
||||
#endif
|
||||
#define HUGE_PAGESIZE ( 4 << 20)
|
||||
|
||||
#define BUFFER_SIZE (16 << 20)
|
||||
|
||||
|
||||
#define BASE_ADDRESS (START_ADDRESS - BUFFER_SIZE * MAX_CPU_NUMBER)
|
||||
|
||||
#ifndef MAP_ANONYMOUS
|
||||
#define MAP_ANONYMOUS MAP_ANON
|
||||
#endif
|
||||
|
||||
#endif
|
||||
169
common_arm64.h
Normal file
169
common_arm64.h
Normal file
@@ -0,0 +1,169 @@
|
||||
/*****************************************************************************
|
||||
Copyright (c) 2011, Lab of Parallel Software and Computational Science,ICSAS
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the ISCAS nor the names of its contributors may
|
||||
be used to endorse or promote products derived from this software
|
||||
without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
**********************************************************************************/
|
||||
|
||||
/*********************************************************************/
|
||||
/* Copyright 2009, 2010 The University of Texas at Austin. */
|
||||
/* All rights reserved. */
|
||||
/* */
|
||||
/* Redistribution and use in source and binary forms, with or */
|
||||
/* without modification, are permitted provided that the following */
|
||||
/* conditions are met: */
|
||||
/* */
|
||||
/* 1. Redistributions of source code must retain the above */
|
||||
/* copyright notice, this list of conditions and the following */
|
||||
/* disclaimer. */
|
||||
/* */
|
||||
/* 2. Redistributions in binary form must reproduce the above */
|
||||
/* copyright notice, this list of conditions and the following */
|
||||
/* disclaimer in the documentation and/or other materials */
|
||||
/* provided with the distribution. */
|
||||
/* */
|
||||
/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
|
||||
/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
|
||||
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
|
||||
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
|
||||
/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
|
||||
/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
|
||||
/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
|
||||
/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
|
||||
/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
|
||||
/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
|
||||
/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
|
||||
/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
|
||||
/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
|
||||
/* POSSIBILITY OF SUCH DAMAGE. */
|
||||
/* */
|
||||
/* The views and conclusions contained in the software and */
|
||||
/* documentation are those of the authors and should not be */
|
||||
/* interpreted as representing official policies, either expressed */
|
||||
/* or implied, of The University of Texas at Austin. */
|
||||
/*********************************************************************/
|
||||
|
||||
#ifndef COMMON_ARM64
|
||||
#define COMMON_ARM64
|
||||
|
||||
#define MB
|
||||
#define WMB
|
||||
|
||||
#define INLINE inline
|
||||
|
||||
#define RETURN_BY_COMPLEX
|
||||
|
||||
#ifndef ASSEMBLER
|
||||
|
||||
static void __inline blas_lock(volatile BLASULONG *address){
|
||||
/*
|
||||
int register ret;
|
||||
|
||||
do {
|
||||
while (*address) {YIELDING;};
|
||||
|
||||
__asm__ __volatile__(
|
||||
"ldrex r2, [%1] \n\t"
|
||||
"mov r2, #0 \n\t"
|
||||
"strex r3, r2, [%1] \n\t"
|
||||
"mov %0 , r3 \n\t"
|
||||
: "=r"(ret), "=r"(address)
|
||||
: "1"(address)
|
||||
: "memory", "r2" , "r3"
|
||||
|
||||
|
||||
);
|
||||
|
||||
} while (ret);
|
||||
*/
|
||||
}
|
||||
|
||||
|
||||
static inline unsigned long long rpcc(void){
|
||||
unsigned long long ret=0;
|
||||
double v;
|
||||
struct timeval tv;
|
||||
gettimeofday(&tv,NULL);
|
||||
v=(double) tv.tv_sec + (double) tv.tv_usec * 1e-6;
|
||||
ret = (unsigned long long) ( v * 1000.0d );
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline int blas_quickdivide(blasint x, blasint y){
|
||||
return x / y;
|
||||
}
|
||||
|
||||
#if defined(DOUBLE)
|
||||
#define GET_IMAGE(res) __asm__ __volatile__("vstr.f64 d1, %0" : "=m"(res) : : "memory")
|
||||
#else
|
||||
#define GET_IMAGE(res) __asm__ __volatile__("vstr.f32 s1, %0" : "=m"(res) : : "memory")
|
||||
#endif
|
||||
|
||||
#define GET_IMAGE_CANCEL
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
#ifndef F_INTERFACE
|
||||
#define REALNAME ASMNAME
|
||||
#else
|
||||
#define REALNAME ASMFNAME
|
||||
#endif
|
||||
|
||||
#if defined(ASSEMBLER) && !defined(NEEDPARAM)
|
||||
|
||||
#define PROLOGUE \
|
||||
.arm ;\
|
||||
.global REALNAME ;\
|
||||
.func REALNAME ;\
|
||||
REALNAME:
|
||||
|
||||
#define EPILOGUE
|
||||
|
||||
#define PROFCODE
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
#define SEEK_ADDRESS
|
||||
|
||||
#ifndef PAGESIZE
|
||||
#define PAGESIZE ( 4 << 10)
|
||||
#endif
|
||||
#define HUGE_PAGESIZE ( 4 << 20)
|
||||
|
||||
#define BUFFER_SIZE (16 << 20)
|
||||
|
||||
|
||||
#define BASE_ADDRESS (START_ADDRESS - BUFFER_SIZE * MAX_CPU_NUMBER)
|
||||
|
||||
#ifndef MAP_ANONYMOUS
|
||||
#define MAP_ANONYMOUS MAP_ANON
|
||||
#endif
|
||||
|
||||
#endif
|
||||
@@ -379,8 +379,15 @@ REALNAME:
|
||||
#define PROFCODE
|
||||
#endif
|
||||
|
||||
#if defined(__linux__) && defined(__ELF__)
|
||||
#define GNUSTACK .section .note.GNU-stack,"",@progbits
|
||||
#else
|
||||
#define GNUSTACK
|
||||
#endif
|
||||
|
||||
#define EPILOGUE \
|
||||
.endp REALNAME
|
||||
.endp REALNAME ; \
|
||||
GNUSTACK
|
||||
|
||||
#define START_ADDRESS 0x20000fc800000000UL
|
||||
|
||||
|
||||
@@ -38,8 +38,15 @@
|
||||
|
||||
#ifndef ASSEMBLER
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
/* Assume C declarations for C++ */
|
||||
#endif /* __cplusplus */
|
||||
|
||||
int BLASFUNC(xerbla)(char *, blasint *info, blasint);
|
||||
|
||||
void openblas_set_num_threads_(int *);
|
||||
|
||||
FLOATRET BLASFUNC(sdot) (blasint *, float *, blasint *, float *, blasint *);
|
||||
FLOATRET BLASFUNC(sdsdot)(blasint *, float *, float *, blasint *, float *, blasint *);
|
||||
|
||||
@@ -69,19 +76,19 @@ myxcomplex_t BLASFUNC(xdotu) (blasint *, xdouble *, blasint *, xdouble *,
|
||||
myxcomplex_t BLASFUNC(xdotc) (blasint *, xdouble *, blasint *, xdouble *, blasint *);
|
||||
|
||||
#elif defined RETURN_BY_STACK
|
||||
void BLASFUNC(cdotu) (float _Complex *, blasint *, float * , blasint *, float *, blasint *);
|
||||
void BLASFUNC(cdotc) (float _Complex *, blasint *, float *, blasint *, float *, blasint *);
|
||||
void BLASFUNC(zdotu) (double _Complex *, blasint *, double *, blasint *, double *, blasint *);
|
||||
void BLASFUNC(zdotc) (double _Complex *, blasint *, double *, blasint *, double *, blasint *);
|
||||
void BLASFUNC(xdotu) (xdouble _Complex *, blasint *, xdouble *, blasint *, xdouble *, blasint *);
|
||||
void BLASFUNC(xdotc) (xdouble _Complex *, blasint *, xdouble *, blasint *, xdouble *, blasint *);
|
||||
void BLASFUNC(cdotu) (openblas_complex_float *, blasint *, float * , blasint *, float *, blasint *);
|
||||
void BLASFUNC(cdotc) (openblas_complex_float *, blasint *, float *, blasint *, float *, blasint *);
|
||||
void BLASFUNC(zdotu) (openblas_complex_double *, blasint *, double *, blasint *, double *, blasint *);
|
||||
void BLASFUNC(zdotc) (openblas_complex_double *, blasint *, double *, blasint *, double *, blasint *);
|
||||
void BLASFUNC(xdotu) (openblas_complex_xdouble *, blasint *, xdouble *, blasint *, xdouble *, blasint *);
|
||||
void BLASFUNC(xdotc) (openblas_complex_xdouble *, blasint *, xdouble *, blasint *, xdouble *, blasint *);
|
||||
#else
|
||||
float _Complex BLASFUNC(cdotu) (blasint *, float *, blasint *, float *, blasint *);
|
||||
float _Complex BLASFUNC(cdotc) (blasint *, float *, blasint *, float *, blasint *);
|
||||
double _Complex BLASFUNC(zdotu) (blasint *, double *, blasint *, double *, blasint *);
|
||||
double _Complex BLASFUNC(zdotc) (blasint *, double *, blasint *, double *, blasint *);
|
||||
xdouble _Complex BLASFUNC(xdotu) (blasint *, xdouble *, blasint *, xdouble *, blasint *);
|
||||
xdouble _Complex BLASFUNC(xdotc) (blasint *, xdouble *, blasint *, xdouble *, blasint *);
|
||||
openblas_complex_float BLASFUNC(cdotu) (blasint *, float *, blasint *, float *, blasint *);
|
||||
openblas_complex_float BLASFUNC(cdotc) (blasint *, float *, blasint *, float *, blasint *);
|
||||
openblas_complex_double BLASFUNC(zdotu) (blasint *, double *, blasint *, double *, blasint *);
|
||||
openblas_complex_double BLASFUNC(zdotc) (blasint *, double *, blasint *, double *, blasint *);
|
||||
openblas_complex_xdouble BLASFUNC(xdotu) (blasint *, xdouble *, blasint *, xdouble *, blasint *);
|
||||
openblas_complex_xdouble BLASFUNC(xdotc) (blasint *, xdouble *, blasint *, xdouble *, blasint *);
|
||||
#endif
|
||||
|
||||
void BLASFUNC(saxpy) (blasint *, float *, float *, blasint *, float *, blasint *);
|
||||
@@ -635,6 +642,8 @@ int BLASFUNC(zgemc)(char *, char *, blasint *, blasint *, blasint *, double *,
|
||||
int BLASFUNC(xgemc)(char *, char *, blasint *, blasint *, blasint *, xdouble *,
|
||||
xdouble *, blasint *, xdouble *, blasint *, xdouble *, blasint *, xdouble *, xdouble *, blasint *);
|
||||
|
||||
/* Lapack routines */
|
||||
|
||||
int BLASFUNC(sgetf2)(blasint *, blasint *, float *, blasint *, blasint *, blasint *);
|
||||
int BLASFUNC(dgetf2)(blasint *, blasint *, double *, blasint *, blasint *, blasint *);
|
||||
int BLASFUNC(qgetf2)(blasint *, blasint *, xdouble *, blasint *, blasint *, blasint *);
|
||||
@@ -670,6 +679,13 @@ int BLASFUNC(cgesv)(blasint *, blasint *, float *, blasint *, blasint *, float
|
||||
int BLASFUNC(zgesv)(blasint *, blasint *, double *, blasint *, blasint *, double*, blasint *, blasint *);
|
||||
int BLASFUNC(xgesv)(blasint *, blasint *, xdouble *, blasint *, blasint *, xdouble*, blasint *, blasint *);
|
||||
|
||||
int BLASFUNC(sgesvd)(char *, char *, blasint *, blasint *, float *, blasint *, float *, float *, blasint *, float *, blasint *, float *, blasint *, blasint *);
|
||||
int BLASFUNC(dgesvd)(char *, char *, blasint *, blasint *, double *, blasint *, double *, double *, blasint *, double *, blasint *, double *, blasint *, blasint *);
|
||||
int BLASFUNC(qgesvd)(char *, char *, blasint *, blasint *, xdouble *, blasint *, xdouble *, xdouble *, blasint *, xdouble *, blasint *, xdouble *, blasint *, blasint *);
|
||||
int BLASFUNC(cgesvd)(char *, char *, blasint *, blasint *, float *, blasint *, float *, float *, blasint *, float *, blasint *, float *, blasint *, blasint *);
|
||||
int BLASFUNC(zgesvd)(char *, char *, blasint *, blasint *, double *, blasint *, double *, double *, blasint *, double *, blasint *, double *, blasint *, blasint *);
|
||||
int BLASFUNC(xgesvd)(char *, char *, blasint *, blasint *, xdouble *, blasint *, xdouble *, xdouble *, blasint *, xdouble *, blasint *, xdouble *, blasint *, blasint *);
|
||||
|
||||
int BLASFUNC(spotf2)(char *, blasint *, float *, blasint *, blasint *);
|
||||
int BLASFUNC(dpotf2)(char *, blasint *, double *, blasint *, blasint *);
|
||||
int BLASFUNC(qpotf2)(char *, blasint *, xdouble *, blasint *, blasint *);
|
||||
@@ -684,6 +700,13 @@ int BLASFUNC(cpotrf)(char *, blasint *, float *, blasint *, blasint *);
|
||||
int BLASFUNC(zpotrf)(char *, blasint *, double *, blasint *, blasint *);
|
||||
int BLASFUNC(xpotrf)(char *, blasint *, xdouble *, blasint *, blasint *);
|
||||
|
||||
int BLASFUNC(spotrs)(char *, blasint *, blasint *, float *, blasint *, float *, blasint *, blasint *);
|
||||
int BLASFUNC(dpotrs)(char *, blasint *, blasint *, double *, blasint *, double *, blasint *, blasint *);
|
||||
int BLASFUNC(qpotrs)(char *, blasint *, blasint *, xdouble *, blasint *, xdouble *, blasint *, blasint *);
|
||||
int BLASFUNC(cpotrs)(char *, blasint *, blasint *, float *, blasint *, float *, blasint *, blasint *);
|
||||
int BLASFUNC(zpotrs)(char *, blasint *, blasint *, double *, blasint *, double *, blasint *, blasint *);
|
||||
int BLASFUNC(xpotrs)(char *, blasint *, blasint *, xdouble *, blasint *, xdouble *, blasint *, blasint *);
|
||||
|
||||
int BLASFUNC(slauu2)(char *, blasint *, float *, blasint *, blasint *);
|
||||
int BLASFUNC(dlauu2)(char *, blasint *, double *, blasint *, blasint *);
|
||||
int BLASFUNC(qlauu2)(char *, blasint *, xdouble *, blasint *, blasint *);
|
||||
@@ -733,4 +756,10 @@ xdouble BLASFUNC(qlamch)(char *);
|
||||
FLOATRET BLASFUNC(slamc3)(float *, float *);
|
||||
double BLASFUNC(dlamc3)(double *, double *);
|
||||
xdouble BLASFUNC(qlamc3)(xdouble *, xdouble *);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
||||
#endif /* __cplusplus */
|
||||
|
||||
#endif
|
||||
|
||||
@@ -65,20 +65,47 @@ extern long int syscall (long int __sysno, ...);
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
static inline int my_mbind(void *addr, unsigned long len, int mode,
|
||||
unsigned long *nodemask, unsigned long maxnode,
|
||||
unsigned flags) {
|
||||
#if defined (__LSB_VERSION__)
|
||||
// So far, LSB (Linux Standard Base) don't support syscall().
|
||||
// https://lsbbugs.linuxfoundation.org/show_bug.cgi?id=3482
|
||||
return 0;
|
||||
#else
|
||||
#if defined (LOONGSON3B)
|
||||
#if defined (__64BIT__)
|
||||
return syscall(SYS_mbind, addr, len, mode, nodemask, maxnode, flags);
|
||||
#else
|
||||
return 0; //NULL Implementation on Loongson 3B 32bit.
|
||||
#endif
|
||||
#else
|
||||
//Fixed randomly SEGFAULT when nodemask==NULL with above Linux 2.6.34
|
||||
unsigned long null_nodemask=0;
|
||||
return syscall(SYS_mbind, addr, len, mode, &null_nodemask, maxnode, flags);
|
||||
// unsigned long null_nodemask=0;
|
||||
return syscall(SYS_mbind, addr, len, mode, nodemask, maxnode, flags);
|
||||
#endif
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline int my_set_mempolicy(int mode, const unsigned long *addr, unsigned long flag) {
|
||||
|
||||
#if defined (__LSB_VERSION__)
|
||||
// So far, LSB (Linux Standard Base) don't support syscall().
|
||||
// https://lsbbugs.linuxfoundation.org/show_bug.cgi?id=3482
|
||||
return 0;
|
||||
#else
|
||||
return syscall(SYS_set_mempolicy, mode, addr, flag);
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline int my_gettid(void) { return syscall(SYS_gettid); }
|
||||
static inline int my_gettid(void) {
|
||||
#ifdef SYS_gettid
|
||||
return syscall(SYS_gettid);
|
||||
#else
|
||||
return getpid();
|
||||
#endif
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
||||
@@ -2127,7 +2127,9 @@
|
||||
#endif
|
||||
|
||||
#ifndef ASSEMBLER
|
||||
#if defined(ARCH_X86) || defined(ARCH_X86_64) || defined(ARCH_IA64)
|
||||
#if defined(ARCH_X86) || defined(ARCH_X86_64) || defined(ARCH_IA64) || defined(ARCH_MIPS64)
|
||||
extern BLASLONG gemm_offset_a;
|
||||
extern BLASLONG gemm_offset_b;
|
||||
extern BLASLONG sgemm_p;
|
||||
extern BLASLONG sgemm_q;
|
||||
extern BLASLONG sgemm_r;
|
||||
|
||||
@@ -101,10 +101,15 @@ static void INLINE blas_lock(volatile unsigned long *address){
|
||||
|
||||
static inline unsigned int rpcc(void){
|
||||
unsigned long ret;
|
||||
#if defined(LOONGSON3A)
|
||||
unsigned long long tmp;
|
||||
__asm__ __volatile__("dmfc0 %0, $25, 1": "=r"(tmp):: "memory");
|
||||
ret=tmp;
|
||||
#if defined(LOONGSON3A) || defined(LOONGSON3B)
|
||||
// unsigned long long tmp;
|
||||
//__asm__ __volatile__("dmfc0 %0, $25, 1": "=r"(tmp):: "memory");
|
||||
//ret=tmp;
|
||||
__asm__ __volatile__(".set push \n"
|
||||
".set mips32r2\n"
|
||||
"rdhwr %0, $2\n"
|
||||
".set pop": "=r"(ret):: "memory");
|
||||
|
||||
#else
|
||||
__asm__ __volatile__(".set push \n"
|
||||
".set mips32r2\n"
|
||||
@@ -114,6 +119,21 @@ static inline unsigned int rpcc(void){
|
||||
return ret;
|
||||
}
|
||||
|
||||
#if defined(LOONGSON3A) || defined(LOONGSON3B)
|
||||
#ifndef NO_AFFINITY
|
||||
#define WHEREAMI
|
||||
static inline int WhereAmI(void){
|
||||
int ret=0;
|
||||
__asm__ __volatile__(".set push \n"
|
||||
".set mips32r2\n"
|
||||
"rdhwr %0, $0\n"
|
||||
".set pop": "=r"(ret):: "memory");
|
||||
return ret;
|
||||
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
||||
static inline int blas_quickdivide(blasint x, blasint y){
|
||||
return x / y;
|
||||
}
|
||||
@@ -152,6 +172,7 @@ static inline int blas_quickdivide(blasint x, blasint y){
|
||||
#define CMPEQ c.eq.d
|
||||
#define CMPLE c.le.d
|
||||
#define CMPLT c.lt.d
|
||||
#define NEG neg.d
|
||||
#else
|
||||
#define LD lwc1
|
||||
#define ST swc1
|
||||
@@ -170,6 +191,14 @@ static inline int blas_quickdivide(blasint x, blasint y){
|
||||
#define CMPEQ c.eq.s
|
||||
#define CMPLE c.le.s
|
||||
#define CMPLT c.lt.s
|
||||
#define PLU plu.ps
|
||||
#define PLL pll.ps
|
||||
#define PUU puu.ps
|
||||
#define PUL pul.ps
|
||||
#define MADPS madd.ps
|
||||
#define CVTU cvt.s.pu
|
||||
#define CVTL cvt.s.pl
|
||||
#define NEG neg.s
|
||||
#endif
|
||||
|
||||
#if defined(__64BIT__) && defined(USE64BITINT)
|
||||
@@ -206,10 +235,17 @@ REALNAME: ;\
|
||||
.set noreorder ;\
|
||||
.set nomacro
|
||||
|
||||
#if defined(__linux__) && defined(__ELF__)
|
||||
#define GNUSTACK .section .note.GNU-stack,"",@progbits
|
||||
#else
|
||||
#define GNUSTACK
|
||||
#endif
|
||||
|
||||
#define EPILOGUE \
|
||||
.set macro ;\
|
||||
.set reorder ;\
|
||||
.end REALNAME
|
||||
.end REALNAME ;\
|
||||
GNUSTACK
|
||||
|
||||
#define PROFCODE
|
||||
#endif
|
||||
@@ -218,7 +254,17 @@ REALNAME: ;\
|
||||
|
||||
#define SEEK_ADDRESS
|
||||
|
||||
#define BUFFER_SIZE ( 8 << 20)
|
||||
#define BUFFER_SIZE ( 32 << 20)
|
||||
|
||||
#if defined(LOONGSON3A)
|
||||
#define PAGESIZE (16UL << 10)
|
||||
#define FIXED_PAGESIZE (16UL << 10)
|
||||
#endif
|
||||
|
||||
#if defined(LOONGSON3B)
|
||||
#define PAGESIZE (16UL << 10)
|
||||
#define FIXED_PAGESIZE (16UL << 10)
|
||||
#endif
|
||||
|
||||
#ifndef PAGESIZE
|
||||
#define PAGESIZE (64UL << 10)
|
||||
@@ -231,7 +277,7 @@ REALNAME: ;\
|
||||
#define MAP_ANONYMOUS MAP_ANON
|
||||
#endif
|
||||
|
||||
#if defined(LOONGSON3A)
|
||||
#if defined(LOONGSON3A) || defined(LOONGSON3B)
|
||||
#define PREFETCHD_(x) ld $0, x
|
||||
#define PREFETCHD(x) PREFETCHD_(x)
|
||||
#else
|
||||
|
||||
@@ -44,6 +44,7 @@
|
||||
#ifdef DYNAMIC_ARCH
|
||||
|
||||
typedef struct {
|
||||
int dtb_entries;
|
||||
int offsetA, offsetB, align;
|
||||
|
||||
int sgemm_p, sgemm_q, sgemm_r;
|
||||
@@ -813,6 +814,7 @@ BLASLONG (*ixamin_k)(BLASLONG, xdouble *, BLASLONG);
|
||||
|
||||
extern gotoblas_t *gotoblas;
|
||||
|
||||
#define DTB_ENTRIES gotoblas -> dtb_entries
|
||||
#define GEMM_OFFSET_A gotoblas -> offsetA
|
||||
#define GEMM_OFFSET_B gotoblas -> offsetB
|
||||
#define GEMM_ALIGN gotoblas -> align
|
||||
@@ -863,6 +865,8 @@ extern gotoblas_t *gotoblas;
|
||||
|
||||
#else
|
||||
|
||||
#define DTB_ENTRIES DTB_DEFAULT_ENTRIES
|
||||
|
||||
#define GEMM_OFFSET_A GEMM_DEFAULT_OFFSET_A
|
||||
#define GEMM_OFFSET_B GEMM_DEFAULT_OFFSET_B
|
||||
#define GEMM_ALIGN GEMM_DEFAULT_ALIGN
|
||||
@@ -997,14 +1001,14 @@ extern gotoblas_t *gotoblas;
|
||||
#endif
|
||||
|
||||
#ifdef XDOUBLE
|
||||
#define GEMM3M_UNROLL_M QGEMM_DEFAULT_UNROLL_M
|
||||
#define GEMM3M_UNROLL_N QGEMM_DEFAULT_UNROLL_N
|
||||
#define GEMM3M_UNROLL_M QGEMM_UNROLL_M
|
||||
#define GEMM3M_UNROLL_N QGEMM_UNROLL_N
|
||||
#elif defined(DOUBLE)
|
||||
#define GEMM3M_UNROLL_M DGEMM_DEFAULT_UNROLL_M
|
||||
#define GEMM3M_UNROLL_N DGEMM_DEFAULT_UNROLL_N
|
||||
#define GEMM3M_UNROLL_M DGEMM_UNROLL_M
|
||||
#define GEMM3M_UNROLL_N DGEMM_UNROLL_N
|
||||
#else
|
||||
#define GEMM3M_UNROLL_M SGEMM_DEFAULT_UNROLL_M
|
||||
#define GEMM3M_UNROLL_N SGEMM_DEFAULT_UNROLL_N
|
||||
#define GEMM3M_UNROLL_M SGEMM_UNROLL_M
|
||||
#define GEMM3M_UNROLL_N SGEMM_UNROLL_N
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
@@ -60,4 +60,10 @@ float _Complex BLASFUNC_REF(cdotc) (blasint *, float *, blasint *, float *,
|
||||
double _Complex BLASFUNC_REF(zdotu) (blasint *, double *, blasint *, double *, blasint *);
|
||||
double _Complex BLASFUNC_REF(zdotc) (blasint *, double *, blasint *, double *, blasint *);
|
||||
|
||||
void BLASFUNC_REF(drotmg)(double *, double *, double *, double *, double *);
|
||||
|
||||
double BLASFUNC_REF(dsdot)(blasint *, float *, blasint *, float *, blasint*);
|
||||
|
||||
FLOATRET BLASFUNC_REF(samax) (blasint *, float *, blasint *);
|
||||
|
||||
#endif
|
||||
|
||||
@@ -199,8 +199,17 @@ static __inline int blas_quickdivide(blasint x, blasint y){
|
||||
.type REALNAME, #function; \
|
||||
.proc 07; \
|
||||
REALNAME:;
|
||||
|
||||
#if defined(__linux__) && defined(__ELF__)
|
||||
#define GNUSTACK .section .note.GNU-stack,"",@progbits
|
||||
#else
|
||||
#define GNUSTACK
|
||||
#endif
|
||||
|
||||
#define EPILOGUE \
|
||||
.size REALNAME, .-REALNAME
|
||||
.size REALNAME, .-REALNAME; \
|
||||
GNUSTACK
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
@@ -39,6 +39,11 @@
|
||||
#ifndef COMMON_THREAD
|
||||
#define COMMON_THREAD
|
||||
|
||||
#ifdef USE_OPENMP
|
||||
#include <omp.h>
|
||||
extern void goto_set_num_threads(int nthreads);
|
||||
#endif
|
||||
|
||||
/* Basic Thread Debugging */
|
||||
#undef SMP_DEBUG
|
||||
|
||||
@@ -98,7 +103,7 @@ typedef struct blas_queue {
|
||||
|
||||
struct blas_queue *next;
|
||||
|
||||
#if defined( __WIN32__) || defined(__CYGWIN32__)
|
||||
#if defined( __WIN32__) || defined(__CYGWIN32__) || defined(_WIN32) || defined(__CYGWIN__)
|
||||
CRITICAL_SECTION lock;
|
||||
HANDLE finish;
|
||||
#else
|
||||
@@ -126,13 +131,24 @@ extern int blas_server_avail;
|
||||
|
||||
static __inline int num_cpu_avail(int level) {
|
||||
|
||||
if ((blas_cpu_number == 1)
|
||||
#ifdef USE_OPENMP
|
||||
int openmp_nthreads=0;
|
||||
#endif
|
||||
|
||||
if (blas_cpu_number == 1
|
||||
|
||||
#ifdef USE_OPENMP
|
||||
|| omp_in_parallel()
|
||||
#endif
|
||||
) return 1;
|
||||
|
||||
#ifdef USE_OPENMP
|
||||
openmp_nthreads=omp_get_max_threads();
|
||||
if (blas_cpu_number != openmp_nthreads) {
|
||||
goto_set_num_threads(openmp_nthreads);
|
||||
}
|
||||
#endif
|
||||
|
||||
return blas_cpu_number;
|
||||
|
||||
}
|
||||
|
||||
22
common_x86.h
22
common_x86.h
@@ -171,6 +171,11 @@ static __inline int blas_quickdivide(unsigned int x, unsigned int y){
|
||||
#define MMXSTORE movd
|
||||
#endif
|
||||
|
||||
#if defined(PILEDRIVER) || defined(BULLDOZER)
|
||||
//Enable some optimazation for barcelona.
|
||||
#define BARCELONA_OPTIMIZATION
|
||||
#endif
|
||||
|
||||
#if defined(HAVE_3DNOW)
|
||||
#define EMMS femms
|
||||
#elif defined(HAVE_MMX)
|
||||
@@ -254,7 +259,7 @@ static __inline int blas_quickdivide(unsigned int x, unsigned int y){
|
||||
#define PROFCODE
|
||||
#endif
|
||||
|
||||
#if defined(OS_WINNT) || defined(OS_CYGWIN_NT) || defined(OS_INERIX)
|
||||
#if defined(OS_WINNT) || defined(OS_CYGWIN_NT) || defined(OS_INTERIX)
|
||||
#define SAVEREGISTERS \
|
||||
subl $32, %esp;\
|
||||
movups %xmm6, 0(%esp);\
|
||||
@@ -269,7 +274,7 @@ static __inline int blas_quickdivide(unsigned int x, unsigned int y){
|
||||
#define RESTOREREGISTERS
|
||||
#endif
|
||||
|
||||
#if defined(OS_WINNT) || defined(OS_CYGWIN_NT) || defined(OS_INERIX)
|
||||
#if defined(OS_WINNT) || defined(OS_CYGWIN_NT) || defined(OS_INTERIX)
|
||||
#define PROLOGUE \
|
||||
.text; \
|
||||
.align 16; \
|
||||
@@ -282,7 +287,7 @@ REALNAME:
|
||||
#define EPILOGUE .end REALNAME
|
||||
#endif
|
||||
|
||||
#if defined(OS_LINUX) || defined(OS_FreeBSD) || defined(OS_NetBSD) || defined(__ELF__)
|
||||
#if defined(OS_LINUX) || defined(OS_FREEBSD) || defined(OS_NETBSD) || defined(__ELF__)
|
||||
#define PROLOGUE \
|
||||
.text; \
|
||||
.align 16; \
|
||||
@@ -296,7 +301,9 @@ REALNAME:
|
||||
#define PROFCODE
|
||||
#endif
|
||||
|
||||
#define EPILOGUE .size REALNAME, .-REALNAME
|
||||
#define EPILOGUE \
|
||||
.size REALNAME, .-REALNAME; \
|
||||
.section .note.GNU-stack,"",@progbits
|
||||
|
||||
#endif
|
||||
|
||||
@@ -335,6 +342,7 @@ REALNAME:
|
||||
#define ALIGN_2 .align 2
|
||||
#define ALIGN_3 .align 3
|
||||
#define ALIGN_4 .align 4
|
||||
#define ALIGN_5 .align 5
|
||||
#define ffreep fstp
|
||||
#endif
|
||||
|
||||
@@ -357,3 +365,9 @@ REALNAME:
|
||||
#ifndef ALIGN_6
|
||||
#define ALIGN_6 .align 64
|
||||
#endif
|
||||
// ffreep %st(0).
|
||||
// Because Clang didn't support ffreep, we directly use the opcode.
|
||||
// Please check out http://www.sandpile.org/x86/opc_fpu.htm
|
||||
#ifndef ffreep
|
||||
#define ffreep .byte 0xdf, 0xc0 #
|
||||
#endif
|
||||
|
||||
@@ -218,6 +218,11 @@ static __inline int blas_quickdivide(unsigned int x, unsigned int y){
|
||||
|
||||
#ifdef ASSEMBLER
|
||||
|
||||
#if defined(PILEDRIVER) || defined(BULLDOZER)
|
||||
//Enable some optimazation for barcelona.
|
||||
#define BARCELONA_OPTIMIZATION
|
||||
#endif
|
||||
|
||||
#if defined(HAVE_3DNOW)
|
||||
#define EMMS femms
|
||||
#elif defined(HAVE_MMX)
|
||||
@@ -353,7 +358,7 @@ REALNAME:
|
||||
#define EPILOGUE .end REALNAME
|
||||
#endif
|
||||
|
||||
#if defined(OS_LINUX) || defined(OS_FreeBSD) || defined(OS_NetBSD) || defined(__ELF__) || defined(C_PGI)
|
||||
#if defined(OS_LINUX) || defined(OS_FREEBSD) || defined(OS_NETBSD) || defined(__ELF__) || defined(C_PGI)
|
||||
#define PROLOGUE \
|
||||
.text; \
|
||||
.align 512; \
|
||||
@@ -367,7 +372,10 @@ REALNAME:
|
||||
#define PROFCODE
|
||||
#endif
|
||||
|
||||
#define EPILOGUE .size REALNAME, .-REALNAME
|
||||
#define EPILOGUE \
|
||||
.size REALNAME, .-REALNAME; \
|
||||
.section .note.GNU-stack,"",@progbits
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
@@ -425,6 +433,7 @@ REALNAME:
|
||||
#define ALIGN_2 .align 2
|
||||
#define ALIGN_3 .align 3
|
||||
#define ALIGN_4 .align 4
|
||||
#define ALIGN_5 .align 5
|
||||
#define ffreep fstp
|
||||
#endif
|
||||
|
||||
@@ -448,4 +457,10 @@ REALNAME:
|
||||
#define ALIGN_6 .align 64
|
||||
#endif
|
||||
|
||||
// ffreep %st(0).
|
||||
// Because Clang didn't support ffreep, we directly use the opcode.
|
||||
// Please check out http://www.sandpile.org/x86/opc_fpu.htm
|
||||
#ifndef ffreep
|
||||
#define ffreep .byte 0xdf, 0xc0 #
|
||||
#endif
|
||||
#endif
|
||||
|
||||
14
cpuid.h
14
cpuid.h
@@ -103,6 +103,11 @@
|
||||
#define CORE_NEHALEM 17
|
||||
#define CORE_ATOM 18
|
||||
#define CORE_NANO 19
|
||||
#define CORE_SANDYBRIDGE 20
|
||||
#define CORE_BOBCAT 21
|
||||
#define CORE_BULLDOZER 22
|
||||
#define CORE_PILEDRIVER 23
|
||||
#define CORE_HASWELL 24
|
||||
|
||||
#define HAVE_SSE (1 << 0)
|
||||
#define HAVE_SSE2 (1 << 1)
|
||||
@@ -122,6 +127,9 @@
|
||||
#define HAVE_MISALIGNSSE (1 << 15)
|
||||
#define HAVE_128BITFPU (1 << 16)
|
||||
#define HAVE_FASTMOVU (1 << 17)
|
||||
#define HAVE_AVX (1 << 18)
|
||||
#define HAVE_FMA4 (1 << 19)
|
||||
#define HAVE_FMA3 (1 << 20)
|
||||
|
||||
#define CACHE_INFO_L1_I 1
|
||||
#define CACHE_INFO_L1_D 2
|
||||
@@ -188,4 +196,10 @@ typedef struct {
|
||||
#define CPUTYPE_NSGEODE 41
|
||||
#define CPUTYPE_VIAC3 42
|
||||
#define CPUTYPE_NANO 43
|
||||
#define CPUTYPE_SANDYBRIDGE 44
|
||||
#define CPUTYPE_BOBCAT 45
|
||||
#define CPUTYPE_BULLDOZER 46
|
||||
#define CPUTYPE_PILEDRIVER 47
|
||||
#define CPUTYPE_HASWELL 48
|
||||
|
||||
#endif
|
||||
|
||||
@@ -72,7 +72,7 @@ void get_cpuconfig(void){
|
||||
printf("#define L1_DATA_LINESIZE 32\n");
|
||||
printf("#define L2_SIZE 2097152\n");
|
||||
printf("#define L2_LINESIZE 32\n");
|
||||
printf("#define DTB_ENTRIES 32\n");
|
||||
printf("#define DTB_DEFAULT_ENTRIES 32\n");
|
||||
printf("#define DTB_SIZE 8192\n");
|
||||
break;
|
||||
|
||||
@@ -81,7 +81,7 @@ void get_cpuconfig(void){
|
||||
printf("#define L1_DATA_LINESIZE 32\n");
|
||||
printf("#define L2_SIZE 2097152\n");
|
||||
printf("#define L2_LINESIZE 64\n");
|
||||
printf("#define DTB_ENTRIES 64\n");
|
||||
printf("#define DTB_DEFAULT_ENTRIES 64\n");
|
||||
printf("#define DTB_SIZE 8192\n");
|
||||
break;
|
||||
|
||||
@@ -90,7 +90,7 @@ void get_cpuconfig(void){
|
||||
printf("#define L1_DATA_LINESIZE 64\n");
|
||||
printf("#define L2_SIZE 4194304\n");
|
||||
printf("#define L2_LINESIZE 64\n");
|
||||
printf("#define DTB_ENTRIES 64\n");
|
||||
printf("#define DTB_DEFAULT_ENTRIES 64\n");
|
||||
printf("#define DTB_SIZE 8192\n");
|
||||
break;
|
||||
}
|
||||
|
||||
262
cpuid_arm.c
Normal file
262
cpuid_arm.c
Normal file
@@ -0,0 +1,262 @@
|
||||
/**************************************************************************
|
||||
Copyright (c) 2013, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include <string.h>
|
||||
|
||||
#define CPU_UNKNOWN 0
|
||||
#define CPU_ARMV6 1
|
||||
#define CPU_ARMV7 2
|
||||
#define CPU_CORTEXA15 3
|
||||
|
||||
static char *cpuname[] = {
|
||||
"UNKOWN",
|
||||
"ARMV6",
|
||||
"ARMV7",
|
||||
"CORTEXA15"
|
||||
};
|
||||
|
||||
|
||||
int get_feature(char *search)
|
||||
{
|
||||
|
||||
#ifdef linux
|
||||
FILE *infile;
|
||||
char buffer[2048], *p,*t;
|
||||
p = (char *) NULL ;
|
||||
|
||||
infile = fopen("/proc/cpuinfo", "r");
|
||||
|
||||
while (fgets(buffer, sizeof(buffer), infile))
|
||||
{
|
||||
|
||||
if (!strncmp("Features", buffer, 8))
|
||||
{
|
||||
p = strchr(buffer, ':') + 2;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
fclose(infile);
|
||||
|
||||
|
||||
if( p == NULL ) return;
|
||||
|
||||
t = strtok(p," ");
|
||||
while( t = strtok(NULL," "))
|
||||
{
|
||||
if (!strcmp(t, search)) { return(1); }
|
||||
}
|
||||
|
||||
#endif
|
||||
return(0);
|
||||
}
|
||||
|
||||
|
||||
int detect(void)
|
||||
{
|
||||
|
||||
#ifdef linux
|
||||
|
||||
FILE *infile;
|
||||
char buffer[512], *p;
|
||||
p = (char *) NULL ;
|
||||
|
||||
infile = fopen("/proc/cpuinfo", "r");
|
||||
|
||||
while (fgets(buffer, sizeof(buffer), infile))
|
||||
{
|
||||
|
||||
if (!strncmp("model name", buffer, 10))
|
||||
{
|
||||
p = strchr(buffer, ':') + 2;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
fclose(infile);
|
||||
|
||||
if(p != NULL)
|
||||
{
|
||||
|
||||
if (strstr(p, "ARMv7"))
|
||||
{
|
||||
if ( get_feature("vfpv4"))
|
||||
return CPU_ARMV7;
|
||||
|
||||
if ( get_feature("vfpv3"))
|
||||
return CPU_ARMV7;
|
||||
|
||||
if ( get_feature("vfp"))
|
||||
return CPU_ARMV6;
|
||||
|
||||
|
||||
}
|
||||
|
||||
if (strstr(p, "ARMv6"))
|
||||
{
|
||||
if ( get_feature("vfp"))
|
||||
return CPU_ARMV6;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
#endif
|
||||
|
||||
return CPU_UNKNOWN;
|
||||
}
|
||||
|
||||
char *get_corename(void)
|
||||
{
|
||||
return cpuname[detect()];
|
||||
}
|
||||
|
||||
void get_architecture(void)
|
||||
{
|
||||
printf("ARM");
|
||||
}
|
||||
|
||||
void get_subarchitecture(void)
|
||||
{
|
||||
int d = detect();
|
||||
switch (d)
|
||||
{
|
||||
|
||||
case CPU_ARMV7:
|
||||
printf("ARMV7");
|
||||
break;
|
||||
|
||||
case CPU_ARMV6:
|
||||
printf("ARMV6");
|
||||
break;
|
||||
|
||||
default:
|
||||
printf("UNKNOWN");
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void get_subdirname(void)
|
||||
{
|
||||
printf("arm");
|
||||
}
|
||||
|
||||
void get_cpuconfig(void)
|
||||
{
|
||||
|
||||
int d = detect();
|
||||
switch (d)
|
||||
{
|
||||
|
||||
case CPU_ARMV7:
|
||||
printf("#define ARMV7\n");
|
||||
printf("#define HAVE_VFP\n");
|
||||
printf("#define HAVE_VFPV3\n");
|
||||
if ( get_feature("neon")) printf("#define HAVE_NEON\n");
|
||||
if ( get_feature("vfpv4")) printf("#define HAVE_VFPV4\n");
|
||||
printf("#define L1_DATA_SIZE 65536\n");
|
||||
printf("#define L1_DATA_LINESIZE 32\n");
|
||||
printf("#define L2_SIZE 512488\n");
|
||||
printf("#define L2_LINESIZE 32\n");
|
||||
printf("#define DTB_DEFAULT_ENTRIES 64\n");
|
||||
printf("#define DTB_SIZE 4096\n");
|
||||
printf("#define L2_ASSOCIATIVE 4\n");
|
||||
break;
|
||||
|
||||
case CPU_ARMV6:
|
||||
printf("#define ARMV6\n");
|
||||
printf("#define HAVE_VFP\n");
|
||||
printf("#define L1_DATA_SIZE 65536\n");
|
||||
printf("#define L1_DATA_LINESIZE 32\n");
|
||||
printf("#define L2_SIZE 512488\n");
|
||||
printf("#define L2_LINESIZE 32\n");
|
||||
printf("#define DTB_DEFAULT_ENTRIES 64\n");
|
||||
printf("#define DTB_SIZE 4096\n");
|
||||
printf("#define L2_ASSOCIATIVE 4\n");
|
||||
break;
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void get_libname(void)
|
||||
{
|
||||
|
||||
int d = detect();
|
||||
switch (d)
|
||||
{
|
||||
|
||||
case CPU_ARMV7:
|
||||
printf("armv7\n");
|
||||
break;
|
||||
|
||||
case CPU_ARMV6:
|
||||
printf("armv6\n");
|
||||
break;
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void get_features(void)
|
||||
{
|
||||
|
||||
#ifdef linux
|
||||
FILE *infile;
|
||||
char buffer[2048], *p,*t;
|
||||
p = (char *) NULL ;
|
||||
|
||||
infile = fopen("/proc/cpuinfo", "r");
|
||||
|
||||
while (fgets(buffer, sizeof(buffer), infile))
|
||||
{
|
||||
|
||||
if (!strncmp("Features", buffer, 8))
|
||||
{
|
||||
p = strchr(buffer, ':') + 2;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
fclose(infile);
|
||||
|
||||
|
||||
if( p == NULL ) return;
|
||||
|
||||
t = strtok(p," ");
|
||||
while( t = strtok(NULL," "))
|
||||
{
|
||||
if (!strcmp(t, "vfp")) { printf("HAVE_VFP=1\n"); continue; }
|
||||
if (!strcmp(t, "vfpv3")) { printf("HAVE_VFPV3=1\n"); continue; }
|
||||
if (!strcmp(t, "vfpv4")) { printf("HAVE_VFPV4=1\n"); continue; }
|
||||
if (!strcmp(t, "neon")) { printf("HAVE_NEON=1\n"); continue; }
|
||||
}
|
||||
|
||||
#endif
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
@@ -133,6 +133,6 @@ void get_cpuconfig(void){
|
||||
printf("#define L2_SIZE 1572864\n");
|
||||
printf("#define L2_LINESIZE 128\n");
|
||||
printf("#define DTB_SIZE 16384\n");
|
||||
printf("#define DTB_ENTRIES 128\n");
|
||||
printf("#define DTB_DEFAULT_ENTRIES 128\n");
|
||||
}
|
||||
|
||||
|
||||
45
cpuid_mips.c
45
cpuid_mips.c
@@ -1,5 +1,5 @@
|
||||
/*****************************************************************************
|
||||
Copyright (c) 2011, Lab of Parallel Software and Computational Science,ICSAS
|
||||
Copyright (c) 2011,2012 Lab of Parallel Software and Computational Science,ISCAS
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
@@ -72,11 +72,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#define CPU_UNKNOWN 0
|
||||
#define CPU_SICORTEX 1
|
||||
#define CPU_LOONGSON3A 2
|
||||
#define CPU_LOONGSON3B 3
|
||||
|
||||
static char *cpuname[] = {
|
||||
"UNKOWN",
|
||||
"SICORTEX",
|
||||
"LOONGSON3A"
|
||||
"LOONGSON3A",
|
||||
"LOONGSON3B"
|
||||
};
|
||||
|
||||
int detect(void){
|
||||
@@ -99,10 +101,14 @@ int detect(void){
|
||||
|
||||
fclose(infile);
|
||||
|
||||
if(p != NULL){
|
||||
if (strstr(p, "Loongson-3A")){
|
||||
return CPU_LOONGSON3A;
|
||||
}else if(strstr(p, "Loongson-3B")){
|
||||
return CPU_LOONGSON3B;
|
||||
}else if (strstr(p, "Loongson-3")){
|
||||
infile = fopen("/proc/cpuinfo", "r");
|
||||
p = (char *)NULL;
|
||||
while (fgets(buffer, sizeof(buffer), infile)){
|
||||
if (!strncmp("system type", buffer, 11)){
|
||||
p = strchr(buffer, ':') + 2;
|
||||
@@ -115,6 +121,24 @@ int detect(void){
|
||||
}else{
|
||||
return CPU_SICORTEX;
|
||||
}
|
||||
}
|
||||
//Check model name for Loongson3
|
||||
infile = fopen("/proc/cpuinfo", "r");
|
||||
p = (char *)NULL;
|
||||
while (fgets(buffer, sizeof(buffer), infile)){
|
||||
if (!strncmp("model name", buffer, 10)){
|
||||
p = strchr(buffer, ':') + 2;
|
||||
break;
|
||||
}
|
||||
}
|
||||
fclose(infile);
|
||||
if(p != NULL){
|
||||
if (strstr(p, "Loongson-3A")){
|
||||
return CPU_LOONGSON3A;
|
||||
}else if(strstr(p, "Loongson-3B")){
|
||||
return CPU_LOONGSON3B;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
return CPU_UNKNOWN;
|
||||
}
|
||||
@@ -130,6 +154,8 @@ void get_architecture(void){
|
||||
void get_subarchitecture(void){
|
||||
if(detect()==CPU_LOONGSON3A) {
|
||||
printf("LOONGSON3A");
|
||||
}else if(detect()==CPU_LOONGSON3B){
|
||||
printf("LOONGSON3B");
|
||||
}else{
|
||||
printf("SICORTEX");
|
||||
}
|
||||
@@ -146,7 +172,16 @@ void get_cpuconfig(void){
|
||||
printf("#define L1_DATA_LINESIZE 32\n");
|
||||
printf("#define L2_SIZE 512488\n");
|
||||
printf("#define L2_LINESIZE 32\n");
|
||||
printf("#define DTB_ENTRIES 64\n");
|
||||
printf("#define DTB_DEFAULT_ENTRIES 64\n");
|
||||
printf("#define DTB_SIZE 4096\n");
|
||||
printf("#define L2_ASSOCIATIVE 4\n");
|
||||
}else if(detect()==CPU_LOONGSON3B){
|
||||
printf("#define LOONGSON3B\n");
|
||||
printf("#define L1_DATA_SIZE 65536\n");
|
||||
printf("#define L1_DATA_LINESIZE 32\n");
|
||||
printf("#define L2_SIZE 512488\n");
|
||||
printf("#define L2_LINESIZE 32\n");
|
||||
printf("#define DTB_DEFAULT_ENTRIES 64\n");
|
||||
printf("#define DTB_SIZE 4096\n");
|
||||
printf("#define L2_ASSOCIATIVE 4\n");
|
||||
}else{
|
||||
@@ -155,7 +190,7 @@ void get_cpuconfig(void){
|
||||
printf("#define L1_DATA_LINESIZE 32\n");
|
||||
printf("#define L2_SIZE 512488\n");
|
||||
printf("#define L2_LINESIZE 32\n");
|
||||
printf("#define DTB_ENTRIES 32\n");
|
||||
printf("#define DTB_DEFAULT_ENTRIES 32\n");
|
||||
printf("#define DTB_SIZE 4096\n");
|
||||
printf("#define L2_ASSOCIATIVE 8\n");
|
||||
}
|
||||
@@ -164,6 +199,8 @@ void get_cpuconfig(void){
|
||||
void get_libname(void){
|
||||
if(detect()==CPU_LOONGSON3A) {
|
||||
printf("loongson3a\n");
|
||||
}else if(detect()==CPU_LOONGSON3B) {
|
||||
printf("loongson3b\n");
|
||||
}else{
|
||||
#ifdef __mips64
|
||||
printf("mips64\n");
|
||||
|
||||
@@ -114,6 +114,7 @@ int detect(void){
|
||||
if (!strncasecmp(p, "PPC970", 6)) return CPUTYPE_PPC970;
|
||||
if (!strncasecmp(p, "POWER5", 6)) return CPUTYPE_POWER5;
|
||||
if (!strncasecmp(p, "POWER6", 6)) return CPUTYPE_POWER6;
|
||||
if (!strncasecmp(p, "POWER7", 6)) return CPUTYPE_POWER6;
|
||||
if (!strncasecmp(p, "Cell", 4)) return CPUTYPE_CELL;
|
||||
if (!strncasecmp(p, "7447", 4)) return CPUTYPE_PPCG4;
|
||||
|
||||
@@ -165,7 +166,7 @@ void get_cpuconfig(void){
|
||||
printf("#define L1_DATA_LINESIZE 128\n");
|
||||
printf("#define L2_SIZE 524288\n");
|
||||
printf("#define L2_LINESIZE 128 \n");
|
||||
printf("#define DTB_ENTRIES 128\n");
|
||||
printf("#define DTB_DEFAULT_ENTRIES 128\n");
|
||||
printf("#define DTB_SIZE 4096\n");
|
||||
printf("#define L2_ASSOCIATIVE 8\n");
|
||||
|
||||
|
||||
@@ -50,7 +50,7 @@ void get_subdirname(void){
|
||||
|
||||
void get_cpuconfig(void){
|
||||
printf("#define V9\n");
|
||||
printf("#define DTB_ENTRIES 32\n");
|
||||
printf("#define DTB_DEFAULT_ENTRIES 32\n");
|
||||
}
|
||||
|
||||
void get_libname(void){
|
||||
|
||||
250
cpuid_x86.c
250
cpuid_x86.c
@@ -40,6 +40,17 @@
|
||||
#include <string.h>
|
||||
#include "cpuid.h"
|
||||
|
||||
#ifdef NO_AVX
|
||||
#define CPUTYPE_HASWELL CPUTYPE_NEHALEM
|
||||
#define CORE_HASWELL CORE_NEHALEM
|
||||
#define CPUTYPE_SANDYBRIDGE CPUTYPE_NEHALEM
|
||||
#define CORE_SANDYBRIDGE CORE_NEHALEM
|
||||
#define CPUTYPE_BULLDOZER CPUTYPE_BARCELONA
|
||||
#define CORE_BULLDOZER CORE_BARCELONA
|
||||
#define CPUTYPE_PILEDRIVER CPUTYPE_BARCELONA
|
||||
#define CORE_PILEDRIVER CORE_BARCELONA
|
||||
#endif
|
||||
|
||||
#ifndef CPUIDEMU
|
||||
|
||||
#if defined(__APPLE__) && defined(__i386__)
|
||||
@@ -109,6 +120,33 @@ static inline int have_excpuid(void){
|
||||
return eax & 0xffff;
|
||||
}
|
||||
|
||||
#ifndef NO_AVX
|
||||
static inline void xgetbv(int op, int * eax, int * edx){
|
||||
//Use binary code for xgetbv
|
||||
__asm__ __volatile__
|
||||
(".byte 0x0f, 0x01, 0xd0": "=a" (*eax), "=d" (*edx) : "c" (op) : "cc");
|
||||
}
|
||||
#endif
|
||||
|
||||
int support_avx(){
|
||||
#ifndef NO_AVX
|
||||
int eax, ebx, ecx, edx;
|
||||
int ret=0;
|
||||
|
||||
cpuid(1, &eax, &ebx, &ecx, &edx);
|
||||
if ((ecx & (1 << 28)) != 0 && (ecx & (1 << 27)) != 0 && (ecx & (1 << 26)) != 0){
|
||||
xgetbv(0, &eax, &edx);
|
||||
if((eax & 6) == 6){
|
||||
ret=1; //OS support AVX
|
||||
}
|
||||
}
|
||||
return ret;
|
||||
#else
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
int get_vendor(void){
|
||||
int eax, ebx, ecx, edx;
|
||||
char vendor[13];
|
||||
@@ -189,11 +227,18 @@ int get_cputype(int gettype){
|
||||
if ((ecx & (1 << 9)) != 0) feature |= HAVE_SSSE3;
|
||||
if ((ecx & (1 << 19)) != 0) feature |= HAVE_SSE4_1;
|
||||
if ((ecx & (1 << 20)) != 0) feature |= HAVE_SSE4_2;
|
||||
#ifndef NO_AVX
|
||||
if (support_avx()) feature |= HAVE_AVX;
|
||||
if ((ecx & (1 << 12)) != 0) feature |= HAVE_FMA3;
|
||||
#endif
|
||||
|
||||
if (have_excpuid() >= 0x01) {
|
||||
cpuid(0x80000001, &eax, &ebx, &ecx, &edx);
|
||||
if ((ecx & (1 << 6)) != 0) feature |= HAVE_SSE4A;
|
||||
if ((ecx & (1 << 7)) != 0) feature |= HAVE_MISALIGNSSE;
|
||||
#ifndef NO_AVX
|
||||
if ((ecx & (1 << 16)) != 0) feature |= HAVE_FMA4;
|
||||
#endif
|
||||
if ((edx & (1 << 30)) != 0) feature |= HAVE_3DNOWEX;
|
||||
if ((edx & (1 << 31)) != 0) feature |= HAVE_3DNOW;
|
||||
}
|
||||
@@ -974,18 +1019,58 @@ int get_cpuname(void){
|
||||
return CPUTYPE_DUNNINGTON;
|
||||
}
|
||||
break;
|
||||
case 2:
|
||||
switch (model) {
|
||||
case 5:
|
||||
//Intel Core (Clarkdale) / Core (Arrandale)
|
||||
// Pentium (Clarkdale) / Pentium Mobile (Arrandale)
|
||||
// Xeon (Clarkdale), 32nm
|
||||
return CPUTYPE_NEHALEM;
|
||||
case 12:
|
||||
//Xeon Processor 5600 (Westmere-EP)
|
||||
return CPUTYPE_NEHALEM;
|
||||
}
|
||||
break;
|
||||
case 2:
|
||||
switch (model) {
|
||||
case 5:
|
||||
//Intel Core (Clarkdale) / Core (Arrandale)
|
||||
// Pentium (Clarkdale) / Pentium Mobile (Arrandale)
|
||||
// Xeon (Clarkdale), 32nm
|
||||
return CPUTYPE_NEHALEM;
|
||||
case 10:
|
||||
//Intel Core i5-2000 /i7-2000 (Sandy Bridge)
|
||||
if(support_avx())
|
||||
return CPUTYPE_SANDYBRIDGE;
|
||||
else
|
||||
return CPUTYPE_NEHALEM; //OS doesn't support AVX
|
||||
case 12:
|
||||
//Xeon Processor 5600 (Westmere-EP)
|
||||
return CPUTYPE_NEHALEM;
|
||||
case 13:
|
||||
//Intel Core i7-3000 / Xeon E5 (Sandy Bridge)
|
||||
if(support_avx())
|
||||
return CPUTYPE_SANDYBRIDGE;
|
||||
else
|
||||
return CPUTYPE_NEHALEM;
|
||||
case 14:
|
||||
// Xeon E7540
|
||||
case 15:
|
||||
//Xeon Processor E7 (Westmere-EX)
|
||||
return CPUTYPE_NEHALEM;
|
||||
}
|
||||
break;
|
||||
case 3:
|
||||
switch (model) {
|
||||
case 10:
|
||||
if(support_avx())
|
||||
return CPUTYPE_SANDYBRIDGE;
|
||||
else
|
||||
return CPUTYPE_NEHALEM;
|
||||
case 12:
|
||||
if(support_avx())
|
||||
return CPUTYPE_HASWELL;
|
||||
else
|
||||
return CPUTYPE_NEHALEM;
|
||||
}
|
||||
break;
|
||||
case 4:
|
||||
switch (model) {
|
||||
case 5:
|
||||
if(support_avx())
|
||||
return CPUTYPE_HASWELL;
|
||||
else
|
||||
return CPUTYPE_NEHALEM;
|
||||
}
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case 0x7:
|
||||
@@ -1018,6 +1103,23 @@ int get_cpuname(void){
|
||||
case 1:
|
||||
case 10:
|
||||
return CPUTYPE_BARCELONA;
|
||||
case 6:
|
||||
switch (model) {
|
||||
case 1:
|
||||
//AMD Bulldozer Opteron 6200 / Opteron 4200 / AMD FX-Series
|
||||
if(support_avx())
|
||||
return CPUTYPE_BULLDOZER;
|
||||
else
|
||||
return CPUTYPE_BARCELONA; //OS don't support AVX.
|
||||
case 2:
|
||||
if(support_avx())
|
||||
return CPUTYPE_PILEDRIVER;
|
||||
else
|
||||
return CPUTYPE_BARCELONA; //OS don't support AVX.
|
||||
}
|
||||
break;
|
||||
case 5:
|
||||
return CPUTYPE_BOBCAT;
|
||||
}
|
||||
break;
|
||||
}
|
||||
@@ -1137,6 +1239,11 @@ static char *cpuname[] = {
|
||||
"NSGEODE",
|
||||
"VIAC3",
|
||||
"NANO",
|
||||
"SANDYBRIDGE",
|
||||
"BOBCAT",
|
||||
"BULLDOZER",
|
||||
"PILEDRIVER",
|
||||
"HASWELL",
|
||||
};
|
||||
|
||||
static char *lowercpuname[] = {
|
||||
@@ -1183,6 +1290,11 @@ static char *lowercpuname[] = {
|
||||
"tms3x00",
|
||||
"nsgeode",
|
||||
"nano",
|
||||
"sandybridge",
|
||||
"bobcat",
|
||||
"bulldozer",
|
||||
"piledriver",
|
||||
"haswell",
|
||||
};
|
||||
|
||||
static char *corename[] = {
|
||||
@@ -1206,6 +1318,11 @@ static char *corename[] = {
|
||||
"NEHALEM",
|
||||
"ATOM",
|
||||
"NANO",
|
||||
"SANDYBRIDGE",
|
||||
"BOBCAT",
|
||||
"BULLDOZER",
|
||||
"PILEDRIVER",
|
||||
"HASWELL",
|
||||
};
|
||||
|
||||
static char *corename_lower[] = {
|
||||
@@ -1229,6 +1346,11 @@ static char *corename_lower[] = {
|
||||
"nehalem",
|
||||
"atom",
|
||||
"nano",
|
||||
"sandybridge",
|
||||
"bobcat",
|
||||
"bulldozer",
|
||||
"piledriver",
|
||||
"haswell",
|
||||
};
|
||||
|
||||
|
||||
@@ -1302,24 +1424,65 @@ int get_coretype(void){
|
||||
case 13:
|
||||
return CORE_DUNNINGTON;
|
||||
}
|
||||
break;
|
||||
case 2:
|
||||
switch (model) {
|
||||
case 5:
|
||||
//Intel Core (Clarkdale) / Core (Arrandale)
|
||||
// Pentium (Clarkdale) / Pentium Mobile (Arrandale)
|
||||
// Xeon (Clarkdale), 32nm
|
||||
return CORE_NEHALEM;
|
||||
case 12:
|
||||
//Xeon Processor 5600 (Westmere-EP)
|
||||
return CORE_NEHALEM;
|
||||
}
|
||||
break;
|
||||
|
||||
break;
|
||||
case 2:
|
||||
switch (model) {
|
||||
case 5:
|
||||
//Intel Core (Clarkdale) / Core (Arrandale)
|
||||
// Pentium (Clarkdale) / Pentium Mobile (Arrandale)
|
||||
// Xeon (Clarkdale), 32nm
|
||||
return CORE_NEHALEM;
|
||||
case 10:
|
||||
//Intel Core i5-2000 /i7-2000 (Sandy Bridge)
|
||||
if(support_avx())
|
||||
return CORE_SANDYBRIDGE;
|
||||
else
|
||||
return CORE_NEHALEM; //OS doesn't support AVX
|
||||
case 12:
|
||||
//Xeon Processor 5600 (Westmere-EP)
|
||||
return CORE_NEHALEM;
|
||||
case 13:
|
||||
//Intel Core i7-3000 / Xeon E5 (Sandy Bridge)
|
||||
if(support_avx())
|
||||
return CORE_SANDYBRIDGE;
|
||||
else
|
||||
return CORE_NEHALEM; //OS doesn't support AVX
|
||||
case 14:
|
||||
//Xeon E7540
|
||||
case 15:
|
||||
//Xeon Processor E7 (Westmere-EX)
|
||||
return CORE_NEHALEM;
|
||||
}
|
||||
break;
|
||||
case 3:
|
||||
switch (model) {
|
||||
case 10:
|
||||
if(support_avx())
|
||||
return CORE_SANDYBRIDGE;
|
||||
else
|
||||
return CORE_NEHALEM; //OS doesn't support AVX
|
||||
case 12:
|
||||
if(support_avx())
|
||||
return CORE_HASWELL;
|
||||
else
|
||||
return CORE_NEHALEM;
|
||||
}
|
||||
break;
|
||||
case 4:
|
||||
switch (model) {
|
||||
case 5:
|
||||
if(support_avx())
|
||||
return CORE_HASWELL;
|
||||
else
|
||||
return CORE_NEHALEM;
|
||||
}
|
||||
break;
|
||||
}
|
||||
break;
|
||||
|
||||
case 15:
|
||||
if (model <= 0x2) return CORE_NORTHWOOD;
|
||||
return CORE_PRESCOTT;
|
||||
if (model <= 0x2) return CORE_NORTHWOOD;
|
||||
else return CORE_PRESCOTT;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1327,7 +1490,23 @@ int get_coretype(void){
|
||||
if (family <= 0x5) return CORE_80486;
|
||||
if (family <= 0xe) return CORE_ATHLON;
|
||||
if (family == 0xf){
|
||||
if ((exfamily == 0) || (exfamily == 2)) return CORE_OPTERON; else return CORE_BARCELONA;
|
||||
if ((exfamily == 0) || (exfamily == 2)) return CORE_OPTERON;
|
||||
else if (exfamily == 5) return CORE_BOBCAT;
|
||||
else if (exfamily == 6) {
|
||||
switch (model) {
|
||||
case 1:
|
||||
//AMD Bulldozer Opteron 6200 / Opteron 4200 / AMD FX-Series
|
||||
if(support_avx())
|
||||
return CORE_BULLDOZER;
|
||||
else
|
||||
return CORE_BARCELONA; //OS don't support AVX.
|
||||
case 2:
|
||||
if(support_avx())
|
||||
return CORE_PILEDRIVER;
|
||||
else
|
||||
return CORE_BARCELONA; //OS don't support AVX.
|
||||
}
|
||||
}else return CORE_BARCELONA;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1392,7 +1571,10 @@ void get_cpuconfig(void){
|
||||
if (info.size > 0) {
|
||||
printf("#define DTB_SIZE %d\n", info.size * 1024);
|
||||
printf("#define DTB_ASSOCIATIVE %d\n", info.associative);
|
||||
printf("#define DTB_ENTRIES %d\n", info.linesize);
|
||||
printf("#define DTB_DEFAULT_ENTRIES %d\n", info.linesize);
|
||||
} else {
|
||||
//fall back for some virtual machines.
|
||||
printf("#define DTB_DEFAULT_ENTRIES 32\n");
|
||||
}
|
||||
|
||||
features = get_cputype(GET_FEATURE);
|
||||
@@ -1407,8 +1589,11 @@ void get_cpuconfig(void){
|
||||
if (features & HAVE_SSE4_2) printf("#define HAVE_SSE4_2\n");
|
||||
if (features & HAVE_SSE4A) printf("#define HAVE_SSE4A\n");
|
||||
if (features & HAVE_SSE5 ) printf("#define HAVE_SSSE5\n");
|
||||
if (features & HAVE_AVX ) printf("#define HAVE_AVX\n");
|
||||
if (features & HAVE_3DNOWEX) printf("#define HAVE_3DNOWEX\n");
|
||||
if (features & HAVE_3DNOW) printf("#define HAVE_3DNOW\n");
|
||||
if (features & HAVE_FMA4 ) printf("#define HAVE_FMA4\n");
|
||||
if (features & HAVE_FMA3 ) printf("#define HAVE_FMA3\n");
|
||||
if (features & HAVE_CFLUSH) printf("#define HAVE_CFLUSH\n");
|
||||
if (features & HAVE_HIT) printf("#define HAVE_HIT 1\n");
|
||||
if (features & HAVE_MISALIGNSSE) printf("#define HAVE_MISALIGNSSE\n");
|
||||
@@ -1421,7 +1606,7 @@ void get_cpuconfig(void){
|
||||
features = get_coretype();
|
||||
if (features > 0) printf("#define CORE_%s\n", corename[features]);
|
||||
} else {
|
||||
printf("#define DTB_ENTRIES 16\n");
|
||||
printf("#define DTB_DEFAULT_ENTRIES 16\n");
|
||||
printf("#define L1_CODE_SIZE 8192\n");
|
||||
printf("#define L1_DATA_SIZE 8192\n");
|
||||
printf("#define L2_SIZE 0\n");
|
||||
@@ -1472,7 +1657,10 @@ void get_sse(void){
|
||||
if (features & HAVE_SSE4_2) printf("HAVE_SSE4_2=1\n");
|
||||
if (features & HAVE_SSE4A) printf("HAVE_SSE4A=1\n");
|
||||
if (features & HAVE_SSE5 ) printf("HAVE_SSSE5=1\n");
|
||||
if (features & HAVE_AVX ) printf("HAVE_AVX=1\n");
|
||||
if (features & HAVE_3DNOWEX) printf("HAVE_3DNOWEX=1\n");
|
||||
if (features & HAVE_3DNOW) printf("HAVE_3DNOW=1\n");
|
||||
if (features & HAVE_FMA4 ) printf("HAVE_FMA4=1\n");
|
||||
if (features & HAVE_FMA3 ) printf("HAVE_FMA3=1\n");
|
||||
|
||||
}
|
||||
|
||||
40
ctest.c
40
ctest.c
@@ -1,3 +1,17 @@
|
||||
//LSB (Linux Standard Base) compiler
|
||||
//only support lsbc++
|
||||
#if defined (__LSB_VERSION__)
|
||||
#if !defined (__cplusplus)
|
||||
COMPILER_LSB
|
||||
#else
|
||||
#error "OpenBLAS only supports lsbcc."
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(__clang__)
|
||||
COMPILER_CLANG
|
||||
#endif
|
||||
|
||||
#if defined(__PGI) || defined(__PGIC__)
|
||||
COMPILER_PGI
|
||||
#endif
|
||||
@@ -34,20 +48,20 @@ COMPILER_GNU
|
||||
OS_LINUX
|
||||
#endif
|
||||
|
||||
#if defined(__FreeBSD__)
|
||||
OS_FreeBSD
|
||||
#if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
|
||||
OS_FREEBSD
|
||||
#endif
|
||||
|
||||
#if defined(__NetBSD__)
|
||||
OS_NetBSD
|
||||
OS_NETBSD
|
||||
#endif
|
||||
|
||||
#if defined(__sun)
|
||||
OS_SunOS
|
||||
OS_SUNOS
|
||||
#endif
|
||||
|
||||
#if defined(__APPLE__)
|
||||
OS_Darwin
|
||||
OS_DARWIN
|
||||
#endif
|
||||
|
||||
#if defined(_AIX)
|
||||
@@ -63,13 +77,18 @@ OS_WINNT
|
||||
#endif
|
||||
|
||||
#if defined(__CYGWIN__)
|
||||
OS_CYGWIN
|
||||
OS_CYGWIN_NT
|
||||
#endif
|
||||
|
||||
#if defined(__INTERIX)
|
||||
OS_INTERIX
|
||||
#endif
|
||||
|
||||
#if defined(__gnu_hurd__)
|
||||
/* Hurd is very similar to GNU/Linux, it should work out of the box */
|
||||
OS_LINUX
|
||||
#endif
|
||||
|
||||
#if defined(__i386) || defined(_X86)
|
||||
ARCH_X86
|
||||
#endif
|
||||
@@ -105,3 +124,12 @@ ARCH_IA64
|
||||
#if defined(__LP64) || defined(__LP64__) || defined(__ptr64) || defined(__x86_64__) || defined(__amd64__) || defined(__64BIT__)
|
||||
BINARY_64
|
||||
#endif
|
||||
|
||||
#if defined(__ARM_ARCH) || defined(__ARM_ARCH_7A__)
|
||||
ARCH_ARM
|
||||
#endif
|
||||
|
||||
#if defined(__aarch64__)
|
||||
ARCH_ARM64
|
||||
#endif
|
||||
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
TOPDIR = ..
|
||||
include $(TOPDIR)/Makefile.system
|
||||
|
||||
CFLAGS += -DADD$(BU) -DCBLAS
|
||||
override CFLAGS += -DADD$(BU) -DCBLAS
|
||||
|
||||
LIB = $(TOPDIR)/$(LIBNAME)
|
||||
|
||||
@@ -36,27 +36,48 @@ ztestl3o = c_zblas3.o c_z3chke.o auxiliary.o c_xerbla.o constant.o
|
||||
all :: all1 all2 all3
|
||||
|
||||
all1: xscblat1 xdcblat1 xccblat1 xzcblat1
|
||||
ifeq ($(USE_OPENMP), 1)
|
||||
OMP_NUM_THREADS=2 ./xscblat1
|
||||
OMP_NUM_THREADS=2 ./xdcblat1
|
||||
OMP_NUM_THREADS=2 ./xccblat1
|
||||
OMP_NUM_THREADS=2 ./xzcblat1
|
||||
else
|
||||
OPENBLAS_NUM_THREADS=2 ./xscblat1
|
||||
OPENBLAS_NUM_THREADS=2 ./xdcblat1
|
||||
OPENBLAS_NUM_THREADS=2 ./xccblat1
|
||||
OPENBLAS_NUM_THREADS=2 ./xzcblat1
|
||||
endif
|
||||
|
||||
all2: xscblat2 xdcblat2 xccblat2 xzcblat2
|
||||
ifeq ($(USE_OPENMP), 1)
|
||||
OMP_NUM_THREADS=2 ./xscblat2 < sin2
|
||||
OMP_NUM_THREADS=2 ./xdcblat2 < din2
|
||||
OMP_NUM_THREADS=2 ./xccblat2 < cin2
|
||||
OMP_NUM_THREADS=2 ./xzcblat2 < zin2
|
||||
else
|
||||
OPENBLAS_NUM_THREADS=2 ./xscblat2 < sin2
|
||||
OPENBLAS_NUM_THREADS=2 ./xdcblat2 < din2
|
||||
OPENBLAS_NUM_THREADS=2 ./xccblat2 < cin2
|
||||
OPENBLAS_NUM_THREADS=2 ./xzcblat2 < zin2
|
||||
endif
|
||||
|
||||
all3: xscblat3 xdcblat3 xccblat3 xzcblat3
|
||||
ifeq ($(USE_OPENMP), 1)
|
||||
OMP_NUM_THREADS=2 ./xscblat3 < sin3
|
||||
OMP_NUM_THREADS=2 ./xdcblat3 < din3
|
||||
OMP_NUM_THREADS=2 ./xccblat3 < cin3
|
||||
OMP_NUM_THREADS=2 ./xzcblat3 < zin3
|
||||
else
|
||||
OPENBLAS_NUM_THREADS=2 ./xscblat3 < sin3
|
||||
OPENBLAS_NUM_THREADS=2 ./xdcblat3 < din3
|
||||
OPENBLAS_NUM_THREADS=2 ./xccblat3 < cin3
|
||||
OPENBLAS_NUM_THREADS=2 ./xzcblat3 < zin3
|
||||
endif
|
||||
|
||||
clean ::
|
||||
rm -f x*
|
||||
|
||||
FLDFLAGS = $(FFLAGS:-fPIC=)
|
||||
FLDFLAGS = $(FFLAGS:-fPIC=) $(LDFLAGS)
|
||||
CEXTRALIB =
|
||||
|
||||
# Single real
|
||||
|
||||
@@ -65,7 +65,6 @@ static int sbmv_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, F
|
||||
|
||||
a = (FLOAT *)args -> a;
|
||||
x = (FLOAT *)args -> b;
|
||||
y = (FLOAT *)args -> c;
|
||||
|
||||
lda = args -> lda;
|
||||
incx = args -> ldb;
|
||||
@@ -76,6 +75,10 @@ static int sbmv_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, F
|
||||
n_from = 0;
|
||||
n_to = n;
|
||||
|
||||
//Use y as each thread's n* COMPSIZE elements in sb buffer
|
||||
y = buffer;
|
||||
buffer += ((COMPSIZE * n + 1023) & ~1023);
|
||||
|
||||
if (range_m) {
|
||||
n_from = *(range_m + 0);
|
||||
n_to = *(range_m + 1);
|
||||
@@ -83,7 +86,6 @@ static int sbmv_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, F
|
||||
a += n_from * lda * COMPSIZE;
|
||||
}
|
||||
|
||||
if (range_n) y += *range_n * COMPSIZE;
|
||||
|
||||
if (incx != 1) {
|
||||
COPY_K(n, x, incx, buffer, 1);
|
||||
@@ -331,7 +333,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT *alpha, FLOAT *a, BLASLONG lda, FLOAT *x
|
||||
|
||||
if (num_cpu) {
|
||||
queue[0].sa = NULL;
|
||||
queue[0].sb = buffer + num_cpu * (((n + 255) & ~255) + 16) * COMPSIZE;
|
||||
queue[0].sb = buffer;
|
||||
queue[num_cpu - 1].next = NULL;
|
||||
|
||||
exec_blas(num_cpu, queue);
|
||||
@@ -344,7 +346,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT *alpha, FLOAT *a, BLASLONG lda, FLOAT *x
|
||||
#else
|
||||
ONE, ZERO,
|
||||
#endif
|
||||
buffer + range_n[i] * COMPSIZE, 1, buffer, 1, NULL, 0);
|
||||
(FLOAT*)(queue[i].sb), 1, buffer, 1, NULL, 0);
|
||||
}
|
||||
|
||||
AXPYU_K(n, 0, 0,
|
||||
|
||||
@@ -77,8 +77,8 @@ int CNAME(int mode, blas_arg_t *arg, BLASLONG *range_m, BLASLONG *range_n, int (
|
||||
range_M[0] = 0;
|
||||
i = arg -> m;
|
||||
} else {
|
||||
range_M[0] = range_M[0];
|
||||
i = range_M[1] - range_M[0];
|
||||
range_M[0] = range_m[0];
|
||||
i = range_m[1] - range_m[0];
|
||||
}
|
||||
|
||||
num_cpu_m = 0;
|
||||
|
||||
@@ -71,16 +71,25 @@ int CNAME(int mode, blas_arg_t *arg, BLASLONG *range_m, BLASLONG *range_n, int (
|
||||
queue[num_cpu].args = arg;
|
||||
queue[num_cpu].range_m = range_m;
|
||||
queue[num_cpu].range_n = &range[num_cpu];
|
||||
queue[num_cpu].sa = NULL;
|
||||
#if 0 //defined(LOONGSON3A)
|
||||
queue[num_cpu].sa = sa + GEMM_OFFSET_A1 * num_cpu;
|
||||
queue[num_cpu].sb = queue[num_cpu].sa + GEMM_OFFSET_A1 * 5;
|
||||
#else
|
||||
queue[num_cpu].sa = NULL;
|
||||
queue[num_cpu].sb = NULL;
|
||||
#endif
|
||||
queue[num_cpu].next = &queue[num_cpu + 1];
|
||||
num_cpu ++;
|
||||
}
|
||||
|
||||
if (num_cpu) {
|
||||
#if 0 //defined(LOONGSON3A)
|
||||
queue[0].sa = sa;
|
||||
queue[0].sb = sb;
|
||||
|
||||
queue[0].sb = sa + GEMM_OFFSET_A1 * 5;
|
||||
#else
|
||||
queue[0].sa = sa;
|
||||
queue[0].sb = sb;
|
||||
#endif
|
||||
queue[num_cpu - 1].next = NULL;
|
||||
|
||||
exec_blas(num_cpu,
|
||||
|
||||
@@ -55,8 +55,8 @@ int CNAME(int mode,
|
||||
range_M[0] = 0;
|
||||
i = arg -> m;
|
||||
} else {
|
||||
range_M[0] = range_M[0];
|
||||
i = range_M[1] - range_M[0];
|
||||
range_M[0] = range_m[0];
|
||||
i = range_m[1] - range_m[0];
|
||||
}
|
||||
|
||||
num_cpu_m = 0;
|
||||
|
||||
@@ -332,7 +332,18 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
|
||||
#else
|
||||
for(jjs = js; jjs < js + min_j; jjs += min_jj){
|
||||
min_jj = min_j + js - jjs;
|
||||
if (min_jj > GEMM_UNROLL_N) min_jj = GEMM_UNROLL_N;
|
||||
|
||||
#if ( defined(BULLDOZER) || defined(PILEDRIVER) || defined(HASWELL) ) && defined(ARCH_X86_64) && !defined(XDOUBLE) && !defined(COMPLEX)
|
||||
if (min_jj >= 6*GEMM_UNROLL_N) min_jj = 6*GEMM_UNROLL_N;
|
||||
else
|
||||
if (min_jj >= 3*GEMM_UNROLL_N) min_jj = 3*GEMM_UNROLL_N;
|
||||
else
|
||||
if (min_jj > GEMM_UNROLL_N) min_jj = GEMM_UNROLL_N;
|
||||
#else
|
||||
|
||||
if (min_jj > GEMM_UNROLL_N) min_jj = GEMM_UNROLL_N;
|
||||
#endif
|
||||
|
||||
|
||||
START_RPCC();
|
||||
|
||||
|
||||
@@ -48,6 +48,12 @@
|
||||
#define SWITCH_RATIO 2
|
||||
#endif
|
||||
|
||||
//The array of job_t may overflow the stack.
|
||||
//Instead, use malloc to alloc job_t.
|
||||
#if MAX_CPU_NUMBER > BLAS3_MEM_ALLOC_THRESHOLD
|
||||
#define USE_ALLOC_HEAP
|
||||
#endif
|
||||
|
||||
#ifndef GEMM3M_LOCAL
|
||||
#if defined(NN)
|
||||
#define GEMM3M_LOCAL GEMM3M_NN
|
||||
@@ -836,7 +842,11 @@ static int gemm_driver(blas_arg_t *args, BLASLONG *range_m, BLASLONG
|
||||
BLASLONG range_M[MAX_CPU_NUMBER + 1];
|
||||
BLASLONG range_N[MAX_CPU_NUMBER + 1];
|
||||
|
||||
job_t job[MAX_CPU_NUMBER];
|
||||
#ifndef USE_ALLOC_HEAP
|
||||
job_t job[MAX_CPU_NUMBER];
|
||||
#else
|
||||
job_t * job = NULL;
|
||||
#endif
|
||||
|
||||
BLASLONG num_cpu_m, num_cpu_n;
|
||||
|
||||
@@ -866,6 +876,15 @@ static int gemm_driver(blas_arg_t *args, BLASLONG *range_m, BLASLONG
|
||||
newarg.alpha = args -> alpha;
|
||||
newarg.beta = args -> beta;
|
||||
newarg.nthreads = args -> nthreads;
|
||||
|
||||
#ifdef USE_ALLOC_HEAP
|
||||
job = (job_t*)malloc(MAX_CPU_NUMBER * sizeof(job_t));
|
||||
if(job==NULL){
|
||||
fprintf(stderr, "OpenBLAS: malloc failed in %s\n", __func__);
|
||||
exit(1);
|
||||
}
|
||||
#endif
|
||||
|
||||
newarg.common = (void *)job;
|
||||
|
||||
if (!range_m) {
|
||||
@@ -945,6 +964,10 @@ static int gemm_driver(blas_arg_t *args, BLASLONG *range_m, BLASLONG
|
||||
exec_blas(num_cpu_m, queue);
|
||||
}
|
||||
|
||||
#ifdef USE_ALLOC_HEAP
|
||||
free(job);
|
||||
#endif
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
@@ -48,6 +48,12 @@
|
||||
#define SWITCH_RATIO 2
|
||||
#endif
|
||||
|
||||
//The array of job_t may overflow the stack.
|
||||
//Instead, use malloc to alloc job_t.
|
||||
#if MAX_CPU_NUMBER > BLAS3_MEM_ALLOC_THRESHOLD
|
||||
#define USE_ALLOC_HEAP
|
||||
#endif
|
||||
|
||||
#ifndef SYRK_LOCAL
|
||||
#if !defined(LOWER) && !defined(TRANS)
|
||||
#define SYRK_LOCAL SYRK_UN
|
||||
@@ -502,7 +508,12 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
|
||||
|
||||
blas_arg_t newarg;
|
||||
|
||||
#ifndef USE_ALLOC_HEAP
|
||||
job_t job[MAX_CPU_NUMBER];
|
||||
#else
|
||||
job_t * job = NULL;
|
||||
#endif
|
||||
|
||||
blas_queue_t queue[MAX_CPU_NUMBER];
|
||||
|
||||
BLASLONG range[MAX_CPU_NUMBER + 100];
|
||||
@@ -556,6 +567,15 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
|
||||
newarg.ldc = args -> ldc;
|
||||
newarg.alpha = args -> alpha;
|
||||
newarg.beta = args -> beta;
|
||||
|
||||
#ifdef USE_ALLOC_HEAP
|
||||
job = (job_t*)malloc(MAX_CPU_NUMBER * sizeof(job_t));
|
||||
if(job==NULL){
|
||||
fprintf(stderr, "OpenBLAS: malloc failed in %s\n", __func__);
|
||||
exit(1);
|
||||
}
|
||||
#endif
|
||||
|
||||
newarg.common = (void *)job;
|
||||
|
||||
if (!range_n) {
|
||||
@@ -668,6 +688,9 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
|
||||
exec_blas(num_cpu, queue);
|
||||
}
|
||||
|
||||
#ifdef USE_ALLOC_HEAP
|
||||
free(job);
|
||||
#endif
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -48,6 +48,12 @@
|
||||
#define SWITCH_RATIO 2
|
||||
#endif
|
||||
|
||||
//The array of job_t may overflow the stack.
|
||||
//Instead, use malloc to alloc job_t.
|
||||
#if MAX_CPU_NUMBER > BLAS3_MEM_ALLOC_THRESHOLD
|
||||
#define USE_ALLOC_HEAP
|
||||
#endif
|
||||
|
||||
#ifndef GEMM_LOCAL
|
||||
#if defined(NN)
|
||||
#define GEMM_LOCAL GEMM_NN
|
||||
@@ -360,8 +366,18 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
|
||||
|
||||
for(jjs = xxx; jjs < MIN(n_to, xxx + div_n); jjs += min_jj){
|
||||
min_jj = MIN(n_to, xxx + div_n) - jjs;
|
||||
|
||||
#if ( defined(BULLDOZER) || defined(PILEDRIVER) || defined(HASWELL) ) && defined(ARCH_X86_64) && !defined(XDOUBLE) && !defined(COMPLEX)
|
||||
if (min_jj >= 6*GEMM_UNROLL_N) min_jj = 6*GEMM_UNROLL_N;
|
||||
else
|
||||
if (min_jj >= 3*GEMM_UNROLL_N) min_jj = 3*GEMM_UNROLL_N;
|
||||
else
|
||||
if (min_jj > GEMM_UNROLL_N) min_jj = GEMM_UNROLL_N;
|
||||
#else
|
||||
|
||||
if (min_jj > GEMM_UNROLL_N) min_jj = GEMM_UNROLL_N;
|
||||
|
||||
#endif
|
||||
|
||||
START_RPCC();
|
||||
|
||||
OCOPY_OPERATION(min_l, min_jj, b, ldb, ls, jjs,
|
||||
@@ -519,7 +535,12 @@ static int gemm_driver(blas_arg_t *args, BLASLONG *range_m, BLASLONG
|
||||
|
||||
blas_arg_t newarg;
|
||||
|
||||
#ifndef USE_ALLOC_HEAP
|
||||
job_t job[MAX_CPU_NUMBER];
|
||||
#else
|
||||
job_t * job = NULL;
|
||||
#endif
|
||||
|
||||
blas_queue_t queue[MAX_CPU_NUMBER];
|
||||
|
||||
BLASLONG range_M[MAX_CPU_NUMBER + 1];
|
||||
@@ -563,6 +584,15 @@ static int gemm_driver(blas_arg_t *args, BLASLONG *range_m, BLASLONG
|
||||
newarg.alpha = args -> alpha;
|
||||
newarg.beta = args -> beta;
|
||||
newarg.nthreads = args -> nthreads;
|
||||
|
||||
#ifdef USE_ALLOC_HEAP
|
||||
job = (job_t*)malloc(MAX_CPU_NUMBER * sizeof(job_t));
|
||||
if(job==NULL){
|
||||
fprintf(stderr, "OpenBLAS: malloc failed in %s\n", __func__);
|
||||
exit(1);
|
||||
}
|
||||
#endif
|
||||
|
||||
newarg.common = (void *)job;
|
||||
|
||||
#ifdef PARAMTEST
|
||||
@@ -634,7 +664,7 @@ static int gemm_driver(blas_arg_t *args, BLASLONG *range_m, BLASLONG
|
||||
|
||||
num_cpu_n ++;
|
||||
}
|
||||
|
||||
|
||||
for (j = 0; j < num_cpu_m; j++) {
|
||||
for (i = 0; i < num_cpu_m; i++) {
|
||||
for (k = 0; k < DIVIDE_RATE; k++) {
|
||||
@@ -648,6 +678,10 @@ static int gemm_driver(blas_arg_t *args, BLASLONG *range_m, BLASLONG
|
||||
exec_blas(num_cpu_m, queue);
|
||||
}
|
||||
|
||||
#ifdef USE_ALLOC_HEAP
|
||||
free(job);
|
||||
#endif
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
TOPDIR = ../..
|
||||
include ../../Makefile.system
|
||||
|
||||
COMMONOBJS = memory.$(SUFFIX) xerbla.$(SUFFIX) c_abs.$(SUFFIX) z_abs.$(SUFFIX)
|
||||
COMMONOBJS = memory.$(SUFFIX) xerbla.$(SUFFIX) c_abs.$(SUFFIX) z_abs.$(SUFFIX) openblas_set_num_threads.$(SUFFIX) openblas_get_config.$(SUFFIX) openblas_get_parallel.$(SUFFIX)
|
||||
|
||||
COMMONOBJS += slamch.$(SUFFIX) slamc3.$(SUFFIX) dlamch.$(SUFFIX) dlamc3.$(SUFFIX)
|
||||
|
||||
@@ -14,7 +14,7 @@ endif
|
||||
|
||||
# COMMONOBJS += info.$(SUFFIX)
|
||||
|
||||
ifdef DYNAMIC_ARCH
|
||||
ifeq ($(DYNAMIC_ARCH), 1)
|
||||
COMMONOBJS += dynamic.$(SUFFIX)
|
||||
else
|
||||
COMMONOBJS += parameter.$(SUFFIX)
|
||||
@@ -70,7 +70,7 @@ ifndef BLAS_SERVER
|
||||
BLAS_SERVER = blas_server.c
|
||||
endif
|
||||
|
||||
ifdef DYNAMIC_ARCH
|
||||
ifeq ($(DYNAMIC_ARCH), 1)
|
||||
HPLOBJS = memory.$(SUFFIX) xerbla.$(SUFFIX) dynamic.$(SUFFIX)
|
||||
else
|
||||
HPLOBJS = memory.$(SUFFIX) xerbla.$(SUFFIX) parameter.$(SUFFIX)
|
||||
@@ -100,6 +100,15 @@ memory.$(SUFFIX) : $(MEMORY) ../../common.h ../../param.h
|
||||
blas_server.$(SUFFIX) : $(BLAS_SERVER) ../../common.h ../../common_thread.h ../../param.h
|
||||
$(CC) $(CFLAGS) -c $< -o $(@F)
|
||||
|
||||
openblas_set_num_threads.$(SUFFIX) : openblas_set_num_threads.c
|
||||
$(CC) $(CFLAGS) -c $< -o $(@F)
|
||||
|
||||
openblas_get_config.$(SUFFIX) : openblas_get_config.c
|
||||
$(CC) $(CFLAGS) -c $< -o $(@F)
|
||||
|
||||
openblas_get_parallel.$(SUFFIX) : openblas_get_parallel.c
|
||||
$(CC) $(CFLAGS) -c $< -o $(@F)
|
||||
|
||||
blasL1thread.$(SUFFIX) : blas_l1_thread.c ../../common.h ../../common_thread.h
|
||||
$(CC) $(CFLAGS) -c $< -o $(@F)
|
||||
|
||||
@@ -212,7 +221,7 @@ info.$(SUFFIX) : info.c info.h ../../common.h ../../param.h
|
||||
$(CC) $(CFLAGS) -c $< -o $(@F)
|
||||
|
||||
|
||||
hpl : CFLAGS += -DHPL
|
||||
hpl_p : CFLAGS += -DHPL
|
||||
hpl : override CFLAGS += -DHPL
|
||||
hpl_p : override CFLAGS += -DHPL
|
||||
|
||||
include $(TOPDIR)/Makefile.tail
|
||||
|
||||
@@ -385,6 +385,7 @@ static int blas_thread_server(void *arg){
|
||||
+ GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B);
|
||||
}
|
||||
}
|
||||
queue->sb=sb;
|
||||
}
|
||||
|
||||
#ifdef MONITOR
|
||||
@@ -435,7 +436,7 @@ static int blas_thread_server(void *arg){
|
||||
|
||||
blas_memory_free(buffer);
|
||||
|
||||
pthread_exit(NULL);
|
||||
//pthread_exit(NULL);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -500,6 +501,7 @@ static int blas_monitor(void *arg){
|
||||
/* Initializing routine */
|
||||
int blas_thread_init(void){
|
||||
BLASLONG i;
|
||||
int ret;
|
||||
#ifdef NEED_STACKATTR
|
||||
pthread_attr_t attr;
|
||||
#endif
|
||||
@@ -545,12 +547,16 @@ int blas_thread_init(void){
|
||||
pthread_cond_init (&thread_status[i].wakeup, NULL);
|
||||
|
||||
#ifdef NEED_STACKATTR
|
||||
pthread_create(&blas_threads[i], &attr,
|
||||
ret=pthread_create(&blas_threads[i], &attr,
|
||||
(void *)&blas_thread_server, (void *)i);
|
||||
#else
|
||||
pthread_create(&blas_threads[i], NULL,
|
||||
ret=pthread_create(&blas_threads[i], NULL,
|
||||
(void *)&blas_thread_server, (void *)i);
|
||||
#endif
|
||||
if(ret!=0){
|
||||
fprintf(STDERR,"OpenBLAS: pthread_creat error in blas_thread_init function. Error code:%d\n",ret);
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef MONITOR
|
||||
@@ -765,6 +771,19 @@ void goto_set_num_threads(int num_threads) {
|
||||
|
||||
if (num_threads < 1) num_threads = blas_num_threads;
|
||||
|
||||
#ifndef NO_AFFINITY
|
||||
if (num_threads == 1) {
|
||||
if (blas_cpu_number == 1){
|
||||
//OpenBLAS is already single thread.
|
||||
return;
|
||||
}else{
|
||||
//From multi-threads to single thread
|
||||
//Restore the original affinity mask
|
||||
gotoblas_set_affinity(-1);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
if (num_threads > MAX_CPU_NUMBER) num_threads = MAX_CPU_NUMBER;
|
||||
|
||||
if (num_threads > blas_num_threads) {
|
||||
@@ -795,8 +814,20 @@ void goto_set_num_threads(int num_threads) {
|
||||
UNLOCK_COMMAND(&server_lock);
|
||||
}
|
||||
|
||||
#ifndef NO_AFFINITY
|
||||
if(blas_cpu_number == 1 && num_threads > 1){
|
||||
//Restore the thread 0 affinity.
|
||||
gotoblas_set_affinity(0);
|
||||
}
|
||||
#endif
|
||||
|
||||
blas_cpu_number = num_threads;
|
||||
|
||||
#if defined(ARCH_MIPS64)
|
||||
//set parameters for different number of threads.
|
||||
blas_set_parameter();
|
||||
#endif
|
||||
|
||||
}
|
||||
|
||||
void openblas_set_num_threads(int num_threads) {
|
||||
|
||||
@@ -38,7 +38,7 @@
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <sys/mman.h>
|
||||
//#include <sys/mman.h>
|
||||
#include "common.h"
|
||||
|
||||
#ifndef USE_OPENMP
|
||||
@@ -49,19 +49,76 @@
|
||||
|
||||
int blas_server_avail = 0;
|
||||
|
||||
static void * blas_thread_buffer[MAX_CPU_NUMBER];
|
||||
|
||||
void goto_set_num_threads(int num_threads) {
|
||||
|
||||
int i=0;
|
||||
|
||||
if (num_threads < 1) num_threads = blas_num_threads;
|
||||
|
||||
if (num_threads > MAX_CPU_NUMBER) num_threads = MAX_CPU_NUMBER;
|
||||
|
||||
if (num_threads > blas_num_threads) {
|
||||
blas_num_threads = num_threads;
|
||||
}
|
||||
|
||||
blas_cpu_number = num_threads;
|
||||
|
||||
omp_set_num_threads(blas_cpu_number);
|
||||
|
||||
//adjust buffer for each thread
|
||||
for(i=0; i<blas_cpu_number; i++){
|
||||
if(blas_thread_buffer[i]==NULL){
|
||||
blas_thread_buffer[i]=blas_memory_alloc(2);
|
||||
}
|
||||
}
|
||||
for(; i<MAX_CPU_NUMBER; i++){
|
||||
if(blas_thread_buffer[i]!=NULL){
|
||||
blas_memory_free(blas_thread_buffer[i]);
|
||||
blas_thread_buffer[i]=NULL;
|
||||
}
|
||||
}
|
||||
#if defined(ARCH_MIPS64)
|
||||
//set parameters for different number of threads.
|
||||
blas_set_parameter();
|
||||
#endif
|
||||
|
||||
}
|
||||
void openblas_set_num_threads(int num_threads) {
|
||||
|
||||
goto_set_num_threads(num_threads);
|
||||
}
|
||||
|
||||
int blas_thread_init(void){
|
||||
|
||||
int i=0;
|
||||
|
||||
blas_get_cpu_number();
|
||||
|
||||
blas_server_avail = 1;
|
||||
|
||||
for(i=0; i<blas_num_threads; i++){
|
||||
blas_thread_buffer[i]=blas_memory_alloc(2);
|
||||
}
|
||||
for(; i<MAX_CPU_NUMBER; i++){
|
||||
blas_thread_buffer[i]=NULL;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int BLASFUNC(blas_thread_shutdown)(void){
|
||||
|
||||
int i=0;
|
||||
blas_server_avail = 0;
|
||||
|
||||
for(i=0; i<MAX_CPU_NUMBER; i++){
|
||||
if(blas_thread_buffer[i]!=NULL){
|
||||
blas_memory_free(blas_thread_buffer[i]);
|
||||
blas_thread_buffer[i]=NULL;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -152,7 +209,8 @@ static void legacy_exec(void *func, int mode, blas_arg_t *args, void *sb){
|
||||
static void exec_threads(blas_queue_t *queue){
|
||||
|
||||
void *buffer, *sa, *sb;
|
||||
|
||||
int pos=0, release_flag=0;
|
||||
|
||||
buffer = NULL;
|
||||
sa = queue -> sa;
|
||||
sb = queue -> sb;
|
||||
@@ -164,9 +222,19 @@ static void exec_threads(blas_queue_t *queue){
|
||||
|
||||
if ((sa == NULL) && (sb == NULL) && ((queue -> mode & BLAS_PTHREAD) == 0)) {
|
||||
|
||||
buffer = blas_memory_alloc(2);
|
||||
pos = omp_get_thread_num();
|
||||
buffer = blas_thread_buffer[pos];
|
||||
|
||||
if (sa == NULL) sa = (void *)((BLASLONG)buffer + GEMM_OFFSET_A);
|
||||
//fallback
|
||||
if(buffer==NULL) {
|
||||
buffer = blas_memory_alloc(2);
|
||||
release_flag=1;
|
||||
}
|
||||
|
||||
if (sa == NULL) {
|
||||
sa = (void *)((BLASLONG)buffer + GEMM_OFFSET_A);
|
||||
queue->sa=sa;
|
||||
}
|
||||
|
||||
if (sb == NULL) {
|
||||
if (!(queue -> mode & BLAS_COMPLEX)){
|
||||
@@ -199,6 +267,7 @@ static void exec_threads(blas_queue_t *queue){
|
||||
+ GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B);
|
||||
}
|
||||
}
|
||||
queue->sb=sb;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -216,7 +285,7 @@ static void exec_threads(blas_queue_t *queue){
|
||||
|
||||
}
|
||||
|
||||
if (buffer != NULL) blas_memory_free(buffer);
|
||||
if (release_flag) blas_memory_free(buffer);
|
||||
|
||||
}
|
||||
|
||||
|
||||
@@ -63,6 +63,8 @@ static blas_pool_t pool;
|
||||
static HANDLE blas_threads [MAX_CPU_NUMBER];
|
||||
static DWORD blas_threads_id[MAX_CPU_NUMBER];
|
||||
|
||||
|
||||
|
||||
static void legacy_exec(void *func, int mode, blas_arg_t *args, void *sb){
|
||||
|
||||
if (!(mode & BLAS_COMPLEX)){
|
||||
@@ -179,7 +181,7 @@ static DWORD WINAPI blas_thread_server(void *arg){
|
||||
|
||||
do {
|
||||
action = WaitForMultipleObjects(2, handles, FALSE, INFINITE);
|
||||
} while ((action != WAIT_OBJECT_0) && (action == WAIT_OBJECT_0 + 1));
|
||||
} while ((action != WAIT_OBJECT_0) && (action != WAIT_OBJECT_0 + 1));
|
||||
|
||||
if (action == WAIT_OBJECT_0 + 1) break;
|
||||
|
||||
@@ -251,6 +253,7 @@ static DWORD WINAPI blas_thread_server(void *arg){
|
||||
+ GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B);
|
||||
}
|
||||
}
|
||||
queue->sb=sb;
|
||||
}
|
||||
|
||||
#ifdef MONITOR
|
||||
@@ -263,7 +266,9 @@ static DWORD WINAPI blas_thread_server(void *arg){
|
||||
} else {
|
||||
legacy_exec(routine, queue -> mode, queue -> args, sb);
|
||||
}
|
||||
}
|
||||
}else{
|
||||
continue; //if queue == NULL
|
||||
}
|
||||
|
||||
#ifdef SMP_DEBUG
|
||||
fprintf(STDERR, "Server[%2ld] Finished!\n", cpu);
|
||||
@@ -425,7 +430,7 @@ int exec_blas(BLASLONG num, blas_queue_t *queue){
|
||||
/* Shutdown procedure, but user don't have to call this routine. The */
|
||||
/* kernel automatically kill threads. */
|
||||
|
||||
int blas_thread_shutdown_(void){
|
||||
int BLASFUNC(blas_thread_shutdown)(void){
|
||||
|
||||
int i;
|
||||
|
||||
@@ -436,9 +441,10 @@ int blas_thread_shutdown_(void){
|
||||
if (blas_server_avail){
|
||||
|
||||
SetEvent(pool.killed);
|
||||
|
||||
for(i = 0; i < blas_cpu_number - 1; i++){
|
||||
WaitForSingleObject(blas_threads[i], INFINITE);
|
||||
printf("blas_num_threads=%d\n", blas_num_threads);
|
||||
for(i = 0; i < blas_num_threads - 1; i++){
|
||||
WaitForSingleObject(blas_threads[i], 5); //INFINITE);
|
||||
TerminateThread(blas_threads[i],0);
|
||||
}
|
||||
|
||||
blas_server_avail = 0;
|
||||
@@ -448,3 +454,47 @@ int blas_thread_shutdown_(void){
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void goto_set_num_threads(int num_threads)
|
||||
{
|
||||
long i;
|
||||
|
||||
if (num_threads < 1) num_threads = blas_cpu_number;
|
||||
|
||||
if (num_threads > MAX_CPU_NUMBER) num_threads = MAX_CPU_NUMBER;
|
||||
|
||||
if (num_threads > blas_num_threads) {
|
||||
|
||||
LOCK_COMMAND(&server_lock);
|
||||
|
||||
//increased_threads = 1;
|
||||
if (!blas_server_avail){
|
||||
|
||||
InitializeCriticalSection(&pool.lock);
|
||||
pool.filled = CreateEvent(NULL, FALSE, FALSE, NULL);
|
||||
pool.killed = CreateEvent(NULL, TRUE, FALSE, NULL);
|
||||
|
||||
pool.shutdown = 0;
|
||||
pool.queue = NULL;
|
||||
blas_server_avail = 1;
|
||||
}
|
||||
|
||||
for(i = blas_num_threads - 1; i < num_threads - 1; i++){
|
||||
|
||||
blas_threads[i] = CreateThread(NULL, 0,
|
||||
blas_thread_server, (void *)i,
|
||||
0, &blas_threads_id[i]);
|
||||
}
|
||||
|
||||
blas_num_threads = num_threads;
|
||||
|
||||
UNLOCK_COMMAND(&server_lock);
|
||||
}
|
||||
|
||||
blas_cpu_number = num_threads;
|
||||
}
|
||||
|
||||
void openblas_set_num_threads(int num)
|
||||
{
|
||||
goto_set_num_threads(num);
|
||||
}
|
||||
|
||||
@@ -60,6 +60,20 @@ extern gotoblas_t gotoblas_NEHALEM;
|
||||
extern gotoblas_t gotoblas_OPTERON;
|
||||
extern gotoblas_t gotoblas_OPTERON_SSE3;
|
||||
extern gotoblas_t gotoblas_BARCELONA;
|
||||
extern gotoblas_t gotoblas_BOBCAT;
|
||||
#ifndef NO_AVX
|
||||
extern gotoblas_t gotoblas_SANDYBRIDGE;
|
||||
extern gotoblas_t gotoblas_BULLDOZER;
|
||||
extern gotoblas_t gotoblas_PILEDRIVER;
|
||||
extern gotoblas_t gotoblas_HASWELL;
|
||||
#else
|
||||
//Use NEHALEM kernels for sandy bridge
|
||||
#define gotoblas_SANDYBRIDGE gotoblas_NEHALEM
|
||||
#define gotoblas_HASWELL gotoblas_NEHALEM
|
||||
#define gotoblas_BULLDOZER gotoblas_BARCELONA
|
||||
#define gotoblas_PILEDRIVER gotoblas_BARCELONA
|
||||
#endif
|
||||
|
||||
|
||||
#define VENDOR_INTEL 1
|
||||
#define VENDOR_AMD 2
|
||||
@@ -68,6 +82,32 @@ extern gotoblas_t gotoblas_BARCELONA;
|
||||
|
||||
#define BITMASK(a, b, c) ((((a) >> (b)) & (c)))
|
||||
|
||||
#ifndef NO_AVX
|
||||
static inline void xgetbv(int op, int * eax, int * edx){
|
||||
//Use binary code for xgetbv
|
||||
__asm__ __volatile__
|
||||
(".byte 0x0f, 0x01, 0xd0": "=a" (*eax), "=d" (*edx) : "c" (op) : "cc");
|
||||
}
|
||||
#endif
|
||||
|
||||
int support_avx(){
|
||||
#ifndef NO_AVX
|
||||
int eax, ebx, ecx, edx;
|
||||
int ret=0;
|
||||
|
||||
cpuid(1, &eax, &ebx, &ecx, &edx);
|
||||
if ((ecx & (1 << 28)) != 0 && (ecx & (1 << 27)) != 0 && (ecx & (1 << 26)) != 0){
|
||||
xgetbv(0, &eax, &edx);
|
||||
if((eax & 6) == 6){
|
||||
ret=1; //OS support AVX
|
||||
}
|
||||
}
|
||||
return ret;
|
||||
#else
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
static int get_vendor(void){
|
||||
int eax, ebx, ecx, edx;
|
||||
char vendor[13];
|
||||
@@ -122,15 +162,59 @@ static gotoblas_t *get_coretype(void){
|
||||
if (model == 12) return &gotoblas_ATOM;
|
||||
return NULL;
|
||||
|
||||
case 2:
|
||||
//Intel Core (Clarkdale) / Core (Arrandale)
|
||||
// Pentium (Clarkdale) / Pentium Mobile (Arrandale)
|
||||
// Xeon (Clarkdale), 32nm
|
||||
if (model == 5) return &gotoblas_NEHALEM;
|
||||
case 2:
|
||||
//Intel Core (Clarkdale) / Core (Arrandale)
|
||||
// Pentium (Clarkdale) / Pentium Mobile (Arrandale)
|
||||
// Xeon (Clarkdale), 32nm
|
||||
if (model == 5) return &gotoblas_NEHALEM;
|
||||
|
||||
//Intel Xeon Processor 5600 (Westmere-EP)
|
||||
if (model == 12) return &gotoblas_NEHALEM;
|
||||
return NULL;
|
||||
//Intel Xeon Processor 5600 (Westmere-EP)
|
||||
//Xeon Processor E7 (Westmere-EX)
|
||||
//Xeon E7540
|
||||
if (model == 12 || model == 14 || model == 15) return &gotoblas_NEHALEM;
|
||||
|
||||
//Intel Core i5-2000 /i7-2000 (Sandy Bridge)
|
||||
//Intel Core i7-3000 / Xeon E5
|
||||
if (model == 10 || model == 13) {
|
||||
if(support_avx())
|
||||
return &gotoblas_SANDYBRIDGE;
|
||||
else{
|
||||
fprintf(stderr, "OpenBLAS : Your OS does not support AVX instructions. OpenBLAS is using Nehalem kernels as a fallback, which may give poorer performance.\n");
|
||||
return &gotoblas_NEHALEM; //OS doesn't support AVX. Use old kernels.
|
||||
}
|
||||
}
|
||||
return NULL;
|
||||
case 3:
|
||||
//Intel Sandy Bridge 22nm (Ivy Bridge?)
|
||||
if (model == 10) {
|
||||
if(support_avx())
|
||||
return &gotoblas_SANDYBRIDGE;
|
||||
else{
|
||||
fprintf(stderr, "OpenBLAS : Your OS does not support AVX instructions. OpenBLAS is using Nehalem kernels as a fallback, which may give poorer performance.\n");
|
||||
return &gotoblas_NEHALEM; //OS doesn't support AVX. Use old kernels.
|
||||
}
|
||||
}
|
||||
//Intel Haswell
|
||||
if (model == 12) {
|
||||
if(support_avx())
|
||||
return &gotoblas_HASWELL;
|
||||
else{
|
||||
fprintf(stderr, "OpenBLAS : Your OS does not support AVX instructions. OpenBLAS is using Nehalem kernels as a fallback, which may give poorer performance.\n");
|
||||
return &gotoblas_NEHALEM; //OS doesn't support AVX. Use old kernels.
|
||||
}
|
||||
}
|
||||
return NULL;
|
||||
case 4:
|
||||
//Intel Haswell
|
||||
if (model == 5) {
|
||||
if(support_avx())
|
||||
return &gotoblas_HASWELL;
|
||||
else{
|
||||
fprintf(stderr, "OpenBLAS : Your OS does not support AVX instructions. OpenBLAS is using Nehalem kernels as a fallback, which may give poorer performance.\n");
|
||||
return &gotoblas_NEHALEM; //OS doesn't support AVX. Use old kernels.
|
||||
}
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
case 0xf:
|
||||
if (model <= 0x2) return &gotoblas_NORTHWOOD;
|
||||
@@ -139,12 +223,44 @@ static gotoblas_t *get_coretype(void){
|
||||
}
|
||||
|
||||
if (vendor == VENDOR_AMD){
|
||||
if (family <= 0xe) return &gotoblas_ATHLON;
|
||||
if (family <= 0xe) {
|
||||
// Verify that CPU has 3dnow and 3dnowext before claiming it is Athlon
|
||||
cpuid(0x80000000, &eax, &ebx, &ecx, &edx);
|
||||
if (eax & 0xffff >= 0x01) {
|
||||
cpuid(0x80000001, &eax, &ebx, &ecx, &edx);
|
||||
if ((edx & (1 << 30)) == 0 || (edx & (1 << 31)) == 0)
|
||||
return NULL;
|
||||
}
|
||||
else
|
||||
return NULL;
|
||||
|
||||
return &gotoblas_ATHLON;
|
||||
}
|
||||
if (family == 0xf){
|
||||
if ((exfamily == 0) || (exfamily == 2)) {
|
||||
if (ecx & (1 << 0)) return &gotoblas_OPTERON_SSE3;
|
||||
else return &gotoblas_OPTERON;
|
||||
} else {
|
||||
} else if (exfamily == 5) {
|
||||
return &gotoblas_BOBCAT;
|
||||
} else if (exfamily == 6) {
|
||||
if(model == 1){
|
||||
//AMD Bulldozer Opteron 6200 / Opteron 4200 / AMD FX-Series
|
||||
if(support_avx())
|
||||
return &gotoblas_BULLDOZER;
|
||||
else{
|
||||
fprintf(stderr, "OpenBLAS : Your OS does not support AVX instructions. OpenBLAS is using Barcelona kernels as a fallback, which may give poorer performance.\n");
|
||||
return &gotoblas_BARCELONA; //OS doesn't support AVX. Use old kernels.
|
||||
}
|
||||
}else if(model == 2){
|
||||
//AMD Bulldozer Opteron 6300 / Opteron 4300 / Opteron 3300
|
||||
if(support_avx())
|
||||
return &gotoblas_PILEDRIVER;
|
||||
else{
|
||||
fprintf(stderr, "OpenBLAS : Your OS does not support AVX instructions. OpenBLAS is using Barcelona kernels as a fallback, which may give poorer performance.\n");
|
||||
return &gotoblas_BARCELONA; //OS doesn't support AVX. Use old kernels.
|
||||
}
|
||||
}
|
||||
} else {
|
||||
return &gotoblas_BARCELONA;
|
||||
}
|
||||
}
|
||||
@@ -178,6 +294,11 @@ static char *corename[] = {
|
||||
"Opteron(SSE3)",
|
||||
"Barcelona",
|
||||
"Nano",
|
||||
"Sandybridge",
|
||||
"Bobcat",
|
||||
"Bulldozer",
|
||||
"Piledriver",
|
||||
"Haswell",
|
||||
};
|
||||
|
||||
char *gotoblas_corename(void) {
|
||||
@@ -197,7 +318,12 @@ char *gotoblas_corename(void) {
|
||||
if (gotoblas == &gotoblas_OPTERON) return corename[13];
|
||||
if (gotoblas == &gotoblas_BARCELONA) return corename[14];
|
||||
if (gotoblas == &gotoblas_NANO) return corename[15];
|
||||
|
||||
if (gotoblas == &gotoblas_SANDYBRIDGE) return corename[16];
|
||||
if (gotoblas == &gotoblas_BOBCAT) return corename[17];
|
||||
if (gotoblas == &gotoblas_BULLDOZER) return corename[18];
|
||||
if (gotoblas == &gotoblas_PILEDRIVER) return corename[19];
|
||||
if (gotoblas == &gotoblas_HASWELL) return corename[20];
|
||||
|
||||
return corename[0];
|
||||
}
|
||||
|
||||
@@ -211,12 +337,21 @@ void gotoblas_dynamic_init(void) {
|
||||
if (gotoblas == NULL) gotoblas = &gotoblas_KATMAI;
|
||||
#else
|
||||
if (gotoblas == NULL) gotoblas = &gotoblas_PRESCOTT;
|
||||
/* sanity check, if 64bit pointer we can't have a 32 bit cpu */
|
||||
if (sizeof(void*) == 8) {
|
||||
if (gotoblas == &gotoblas_KATMAI ||
|
||||
gotoblas == &gotoblas_COPPERMINE ||
|
||||
gotoblas == &gotoblas_NORTHWOOD ||
|
||||
gotoblas == &gotoblas_BANIAS ||
|
||||
gotoblas == &gotoblas_ATHLON)
|
||||
gotoblas = &gotoblas_PRESCOTT;
|
||||
}
|
||||
#endif
|
||||
|
||||
if (gotoblas && gotoblas -> init) {
|
||||
gotoblas -> init();
|
||||
} else {
|
||||
fprintf(stderr, "GotoBLAS : Architecture Initialization failed. No initialization function found.\n");
|
||||
fprintf(stderr, "OpenBLAS : Architecture Initialization failed. No initialization function found.\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*****************************************************************************
|
||||
Copyright (c) 2011, Lab of Parallel Software and Computational Science,ICSAS
|
||||
Copyright (c) 2011,2012 Lab of Parallel Software and Computational Science,ISCAS
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
@@ -82,9 +82,16 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#include <sched.h>
|
||||
#include <dirent.h>
|
||||
#include <dlfcn.h>
|
||||
#include <unistd.h>
|
||||
#include <string.h>
|
||||
|
||||
#define MAX_NODES 16
|
||||
#define MAX_CPUS 256
|
||||
#define NCPUBITS (8*sizeof(unsigned long))
|
||||
#define MAX_BITMASK_LEN (MAX_CPUS/NCPUBITS)
|
||||
#define CPUELT(cpu) ((cpu) / NCPUBITS)
|
||||
#define CPUMASK(cpu) ((unsigned long) 1UL << ((cpu) % NCPUBITS))
|
||||
|
||||
|
||||
#define SH_MAGIC 0x510510
|
||||
|
||||
@@ -103,10 +110,10 @@ typedef struct {
|
||||
int num_nodes;
|
||||
int num_procs;
|
||||
int final_num_procs;
|
||||
unsigned long avail;
|
||||
|
||||
unsigned long avail [MAX_BITMASK_LEN];
|
||||
int avail_count;
|
||||
unsigned long cpu_info [MAX_CPUS];
|
||||
unsigned long node_info [MAX_NODES];
|
||||
unsigned long node_info [MAX_NODES][MAX_BITMASK_LEN];
|
||||
int cpu_use[MAX_CPUS];
|
||||
|
||||
} shm_t;
|
||||
@@ -126,7 +133,8 @@ static shm_t *common = (void *)-1;
|
||||
static int shmid, pshmid;
|
||||
static void *paddr;
|
||||
|
||||
static unsigned long lprocmask, lnodemask;
|
||||
static unsigned long lprocmask[MAX_BITMASK_LEN], lnodemask;
|
||||
static int lprocmask_count = 0;
|
||||
static int numprocs = 1;
|
||||
static int numnodes = 1;
|
||||
|
||||
@@ -172,62 +180,119 @@ static inline int rcount(unsigned long number) {
|
||||
return count;
|
||||
}
|
||||
|
||||
static inline unsigned long get_cpumap(int node) {
|
||||
/***
|
||||
Known issue: The number of CPUs/cores should less
|
||||
than sizeof(unsigned long). On 64 bits, the limit
|
||||
is 64. On 32 bits, it is 32.
|
||||
***/
|
||||
static inline void get_cpumap(int node, unsigned long * node_info) {
|
||||
|
||||
int infile;
|
||||
unsigned long affinity;
|
||||
unsigned long affinity[32];
|
||||
char name[160];
|
||||
char *p, *dummy;
|
||||
|
||||
char cpumap[160];
|
||||
char *dummy;
|
||||
int i=0;
|
||||
int count=0;
|
||||
int k=0;
|
||||
|
||||
sprintf(name, CPUMAP_NAME, node);
|
||||
|
||||
infile = open(name, O_RDONLY);
|
||||
for(i=0; i<32; i++){
|
||||
affinity[i] = 0;
|
||||
}
|
||||
|
||||
affinity = 0;
|
||||
|
||||
if (infile != -1) {
|
||||
|
||||
read(infile, name, sizeof(name));
|
||||
|
||||
p = name;
|
||||
read(infile, cpumap, sizeof(cpumap));
|
||||
|
||||
while ((*p == '0') || (*p == ',')) p++;
|
||||
for(i=0; i<160; i++){
|
||||
if(cpumap[i] == '\n')
|
||||
break;
|
||||
if(cpumap[i] != ','){
|
||||
name[k++]=cpumap[i];
|
||||
|
||||
//Enough data for Hex
|
||||
if(k >= NCPUBITS/4){
|
||||
affinity[count++] = strtoul(name, &dummy, 16);
|
||||
k=0;
|
||||
}
|
||||
}
|
||||
|
||||
affinity = strtol(p, &dummy, 16);
|
||||
|
||||
}
|
||||
if(k!=0){
|
||||
name[k]='\0';
|
||||
affinity[count++] = strtoul(name, &dummy, 16);
|
||||
k=0;
|
||||
}
|
||||
// 0-63bit -> node_info[0], 64-128bit -> node_info[1] ....
|
||||
// revert the sequence
|
||||
for(i=0; i<count && i<MAX_BITMASK_LEN; i++){
|
||||
node_info[i]=affinity[count-i-1];
|
||||
}
|
||||
close(infile);
|
||||
}
|
||||
|
||||
return affinity;
|
||||
return ;
|
||||
}
|
||||
|
||||
static inline unsigned long get_share(int cpu, int level) {
|
||||
static inline void get_share(int cpu, int level, unsigned long * share) {
|
||||
|
||||
int infile;
|
||||
unsigned long affinity;
|
||||
unsigned long affinity[32];
|
||||
char cpumap[160];
|
||||
char name[160];
|
||||
char *p;
|
||||
|
||||
char *dummy;
|
||||
int count=0;
|
||||
int i=0,k=0;
|
||||
int bitmask_idx = 0;
|
||||
|
||||
sprintf(name, SHARE_NAME, cpu, level);
|
||||
|
||||
infile = open(name, O_RDONLY);
|
||||
|
||||
affinity = (1UL << cpu);
|
||||
|
||||
// Init share
|
||||
for(i=0; i<MAX_BITMASK_LEN; i++){
|
||||
share[i]=0;
|
||||
}
|
||||
bitmask_idx = CPUELT(cpu);
|
||||
share[bitmask_idx] = CPUMASK(cpu);
|
||||
|
||||
if (infile != -1) {
|
||||
|
||||
read(infile, name, sizeof(name));
|
||||
read(infile, cpumap, sizeof(cpumap));
|
||||
|
||||
for(i=0; i<160; i++){
|
||||
if(cpumap[i] == '\n')
|
||||
break;
|
||||
if(cpumap[i] != ','){
|
||||
name[k++]=cpumap[i];
|
||||
|
||||
//Enough data
|
||||
if(k >= NCPUBITS/4){
|
||||
affinity[count++] = strtoul(name, &dummy, 16);
|
||||
k=0;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
if(k!=0){
|
||||
name[k]='\0';
|
||||
affinity[count++] = strtoul(name, &dummy, 16);
|
||||
k=0;
|
||||
}
|
||||
// 0-63bit -> node_info[0], 64-128bit -> node_info[1] ....
|
||||
// revert the sequence
|
||||
for(i=0; i<count && i<MAX_BITMASK_LEN; i++){
|
||||
share[i]=affinity[count-i-1];
|
||||
}
|
||||
|
||||
p = name;
|
||||
|
||||
while ((*p == '0') || (*p == ',')) p++;
|
||||
|
||||
affinity = strtol(p, &p, 16);
|
||||
|
||||
close(infile);
|
||||
}
|
||||
|
||||
return affinity;
|
||||
return ;
|
||||
}
|
||||
|
||||
static int numa_check(void) {
|
||||
@@ -235,6 +300,7 @@ static int numa_check(void) {
|
||||
DIR *dp;
|
||||
struct dirent *dir;
|
||||
int node;
|
||||
int j;
|
||||
|
||||
common -> num_nodes = 0;
|
||||
|
||||
@@ -245,20 +311,22 @@ static int numa_check(void) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
for (node = 0; node < MAX_NODES; node ++) common -> node_info[node] = 0;
|
||||
for (node = 0; node < MAX_NODES; node ++) {
|
||||
for (j = 0; j<MAX_BITMASK_LEN; j++) common -> node_info[node][j] = 0;
|
||||
}
|
||||
|
||||
while ((dir = readdir(dp)) != NULL) {
|
||||
if (*(unsigned int *) dir -> d_name == 0x065646f6eU) {
|
||||
if (strncmp(dir->d_name, "node", 4)==0) {
|
||||
|
||||
node = atoi(&dir -> d_name[4]);
|
||||
|
||||
if (node > MAX_NODES) {
|
||||
fprintf(stderr, "\nGotoBLAS Warining : MAX_NODES (NUMA) is too small. Terminated.\n");
|
||||
fprintf(stderr, "\nOpenBLAS Warning : MAX_NODES (NUMA) is too small. Terminated.\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
common -> num_nodes ++;
|
||||
common -> node_info[node] = get_cpumap(node);
|
||||
get_cpumap(node, common->node_info[node]);
|
||||
|
||||
}
|
||||
}
|
||||
@@ -271,7 +339,7 @@ static int numa_check(void) {
|
||||
fprintf(stderr, "Numa found : number of Nodes = %2d\n", common -> num_nodes);
|
||||
|
||||
for (node = 0; node < common -> num_nodes; node ++)
|
||||
fprintf(stderr, "MASK (%2d) : %08lx\n", node, common -> node_info[node]);
|
||||
fprintf(stderr, "MASK (%2d) : %08lx\n", node, common -> node_info[node][0]);
|
||||
#endif
|
||||
|
||||
return common -> num_nodes;
|
||||
@@ -283,11 +351,13 @@ static void numa_mapping(void) {
|
||||
int i, j, h;
|
||||
unsigned long work, bit;
|
||||
int count = 0;
|
||||
int bitmask_idx = 0;
|
||||
|
||||
for (node = 0; node < common -> num_nodes; node ++) {
|
||||
core = 0;
|
||||
for (cpu = 0; cpu < common -> num_procs; cpu ++) {
|
||||
if (common -> node_info[node] & common -> avail & (1UL << cpu)) {
|
||||
bitmask_idx = CPUELT(cpu);
|
||||
if (common -> node_info[node][bitmask_idx] & common -> avail[bitmask_idx] & CPUMASK(cpu)) {
|
||||
common -> cpu_info[count] = WRITE_CORE(core) | WRITE_NODE(node) | WRITE_CPU(cpu);
|
||||
count ++;
|
||||
core ++;
|
||||
@@ -344,46 +414,92 @@ static void numa_mapping(void) {
|
||||
|
||||
static void disable_hyperthread(void) {
|
||||
|
||||
unsigned long share;
|
||||
unsigned long share[MAX_BITMASK_LEN];
|
||||
int cpu;
|
||||
int bitmask_idx = 0;
|
||||
int i=0, count=0;
|
||||
bitmask_idx = CPUELT(common -> num_procs);
|
||||
|
||||
common -> avail = (1UL << common -> num_procs) - 1;
|
||||
for(i=0; i< bitmask_idx; i++){
|
||||
common -> avail[count++] = 0xFFFFFFFFFFFFFFFFUL;
|
||||
}
|
||||
if(CPUMASK(common -> num_procs) != 1){
|
||||
common -> avail[count++] = CPUMASK(common -> num_procs) - 1;
|
||||
}
|
||||
common -> avail_count = count;
|
||||
|
||||
/* if(common->num_procs > 64){ */
|
||||
/* fprintf(stderr, "\nOpenBLAS Warning : The number of CPU/Cores(%d) is beyond the limit(64). Terminated.\n", common->num_procs); */
|
||||
/* exit(1); */
|
||||
/* }else if(common->num_procs == 64){ */
|
||||
/* common -> avail = 0xFFFFFFFFFFFFFFFFUL; */
|
||||
/* }else */
|
||||
/* common -> avail = (1UL << common -> num_procs) - 1; */
|
||||
|
||||
#ifdef DEBUG
|
||||
fprintf(stderr, "\nAvail CPUs : %04lx.\n", common -> avail);
|
||||
fprintf(stderr, "\nAvail CPUs : ");
|
||||
for(i=0; i<count; i++)
|
||||
fprintf(stderr, "%04lx ", common -> avail[i]);
|
||||
fprintf(stderr, ".\n");
|
||||
#endif
|
||||
|
||||
for (cpu = 0; cpu < common -> num_procs; cpu ++) {
|
||||
|
||||
share = (get_share(cpu, 1) & common -> avail);
|
||||
|
||||
if (popcount(share) > 1) {
|
||||
|
||||
get_share(cpu, 1, share);
|
||||
|
||||
//When the shared cpu are in different element of share & avail array, this may be a bug.
|
||||
for (i = 0; i < count ; i++){
|
||||
|
||||
share[i] &= common->avail[i];
|
||||
|
||||
if (popcount(share[i]) > 1) {
|
||||
|
||||
#ifdef DEBUG
|
||||
fprintf(stderr, "Detected Hyper Threading on CPU %4x; disabled CPU %04lx.\n",
|
||||
cpu, share & ~(1UL << cpu));
|
||||
fprintf(stderr, "Detected Hyper Threading on CPU %4x; disabled CPU %04lx.\n",
|
||||
cpu, share[i] & ~(CPUMASK(cpu)));
|
||||
#endif
|
||||
|
||||
common -> avail &= ~((share & ~(1UL << cpu)));
|
||||
common -> avail[i] &= ~((share[i] & ~ CPUMASK(cpu)));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void disable_affinity(void) {
|
||||
|
||||
int i=0;
|
||||
int bitmask_idx=0;
|
||||
int count=0;
|
||||
#ifdef DEBUG
|
||||
fprintf(stderr, "Final all available CPUs : %04lx.\n\n", common -> avail);
|
||||
fprintf(stderr, "Final all available CPUs : %04lx.\n\n", common -> avail[0]);
|
||||
fprintf(stderr, "CPU mask : %04lx.\n\n", *(unsigned long *)&cpu_orig_mask[0]);
|
||||
#endif
|
||||
|
||||
lprocmask = (1UL << common -> final_num_procs) - 1;
|
||||
/* if(common->final_num_procs > 64){ */
|
||||
/* fprintf(stderr, "\nOpenBLAS Warining : The number of CPU/Cores(%d) is beyond the limit(64). Terminated.\n", common->final_num_procs); */
|
||||
/* exit(1); */
|
||||
/* }else if(common->final_num_procs == 64){ */
|
||||
/* lprocmask = 0xFFFFFFFFFFFFFFFFUL; */
|
||||
/* }else */
|
||||
/* lprocmask = (1UL << common -> final_num_procs) - 1; */
|
||||
|
||||
bitmask_idx = CPUELT(common -> final_num_procs);
|
||||
|
||||
for(i=0; i< bitmask_idx; i++){
|
||||
lprocmask[count++] = 0xFFFFFFFFFFFFFFFFUL;
|
||||
}
|
||||
if(CPUMASK(common -> final_num_procs) != 1){
|
||||
lprocmask[count++] = CPUMASK(common -> final_num_procs) - 1;
|
||||
}
|
||||
lprocmask_count = count;
|
||||
|
||||
#ifndef USE_OPENMP
|
||||
lprocmask &= *(unsigned long *)&cpu_orig_mask[0];
|
||||
for(i=0; i< count; i++){
|
||||
lprocmask[i] &= common->avail[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef DEBUG
|
||||
fprintf(stderr, "I choose these CPUs : %04lx.\n\n", lprocmask);
|
||||
fprintf(stderr, "I choose these CPUs : %04lx.\n\n", lprocmask[0]);
|
||||
#endif
|
||||
|
||||
}
|
||||
@@ -473,7 +589,7 @@ static void create_pshmem(void) {
|
||||
static void local_cpu_map(void) {
|
||||
|
||||
int cpu, id, mapping;
|
||||
|
||||
int bitmask_idx = 0;
|
||||
cpu = 0;
|
||||
mapping = 0;
|
||||
|
||||
@@ -483,8 +599,9 @@ static void local_cpu_map(void) {
|
||||
if (id > 0) {
|
||||
if (is_dead(id)) common -> cpu_use[cpu] = 0;
|
||||
}
|
||||
|
||||
if ((common -> cpu_use[cpu] == 0) && (lprocmask & (1UL << cpu))) {
|
||||
|
||||
bitmask_idx = CPUELT(cpu);
|
||||
if ((common -> cpu_use[cpu] == 0) && (lprocmask[bitmask_idx] & CPUMASK(cpu))) {
|
||||
|
||||
common -> cpu_use[cpu] = pshmid;
|
||||
cpu_mapping[mapping] = READ_CPU(common -> cpu_info[cpu]);
|
||||
@@ -570,6 +687,7 @@ void gotoblas_affinity_init(void) {
|
||||
#ifndef USE_OPENMP
|
||||
cpu_set_t cpu_mask;
|
||||
#endif
|
||||
int i;
|
||||
|
||||
if (initialized) return;
|
||||
|
||||
@@ -619,7 +737,13 @@ void gotoblas_affinity_init(void) {
|
||||
fprintf(stderr, "Shared Memory Initialization.\n");
|
||||
#endif
|
||||
|
||||
common -> num_procs = get_nprocs();
|
||||
//returns the number of processors which are currently online
|
||||
common -> num_procs = sysconf(_SC_NPROCESSORS_ONLN);;
|
||||
|
||||
if(common -> num_procs > MAX_CPUS) {
|
||||
fprintf(stderr, "\nOpenBLAS Warining : The number of CPU/Cores(%d) is beyond the limit(%d). Terminated.\n", common->num_procs, MAX_CPUS);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
for (cpu = 0; cpu < common -> num_procs; cpu++) common -> cpu_info[cpu] = cpu;
|
||||
|
||||
@@ -629,7 +753,8 @@ void gotoblas_affinity_init(void) {
|
||||
|
||||
if (common -> num_nodes > 1) numa_mapping();
|
||||
|
||||
common -> final_num_procs = popcount(common -> avail);
|
||||
common -> final_num_procs = 0;
|
||||
for(i = 0; i < common -> avail_count; i++) common -> final_num_procs += rcount(common -> avail[i]) + 1; //Make the max cpu number.
|
||||
|
||||
for (cpu = 0; cpu < common -> final_num_procs; cpu ++) common -> cpu_use[cpu] = 0;
|
||||
|
||||
@@ -639,7 +764,8 @@ void gotoblas_affinity_init(void) {
|
||||
|
||||
disable_affinity();
|
||||
|
||||
num_avail = popcount(lprocmask);
|
||||
num_avail = 0;
|
||||
for(i=0; i<lprocmask_count; i++) num_avail += popcount(lprocmask[i]);
|
||||
|
||||
if ((numprocs <= 0) || (numprocs > num_avail)) numprocs = num_avail;
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*****************************************************************************
|
||||
Copyright (c) 2011, Lab of Parallel Software and Computational Science,ICSAS
|
||||
Copyright (c) 2011,2012 Lab of Parallel Software and Computational Science,ISCAS
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
@@ -103,8 +103,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#include <sys/syscall.h>
|
||||
#endif
|
||||
|
||||
#if defined(OS_FreeBSD) || defined(OS_Darwin)
|
||||
#if defined(OS_FREEBSD) || defined(OS_DARWIN)
|
||||
#include <sys/sysctl.h>
|
||||
#include <sys/resource.h>
|
||||
#endif
|
||||
|
||||
#if defined(OS_WINDOWS) && (defined(__MINGW32__) || defined(__MINGW64__))
|
||||
@@ -125,7 +126,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#define NO_WARMUP
|
||||
#endif
|
||||
|
||||
#ifdef ALLOC_HUGETLB
|
||||
#ifndef SHM_HUGETLB
|
||||
#define SHM_HUGETLB 04000
|
||||
#endif
|
||||
|
||||
@@ -185,7 +186,7 @@ int get_num_procs(void) {
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(OS_FreeBSD) || defined(OS_Darwin)
|
||||
#if defined(OS_FREEBSD)
|
||||
|
||||
int get_num_procs(void) {
|
||||
|
||||
@@ -206,7 +207,46 @@ int get_num_procs(void) {
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(OS_DARWIN)
|
||||
int get_num_procs(void) {
|
||||
static int nums = 0;
|
||||
size_t len;
|
||||
if (nums == 0){
|
||||
len = sizeof(int);
|
||||
sysctlbyname("hw.physicalcpu", &nums, &len, NULL, 0);
|
||||
}
|
||||
return nums;
|
||||
}
|
||||
/*
|
||||
void set_stack_limit(int limitMB){
|
||||
int result=0;
|
||||
struct rlimit rl;
|
||||
rlim_t StackSize;
|
||||
|
||||
StackSize=limitMB*1024*1024;
|
||||
result=getrlimit(RLIMIT_STACK, &rl);
|
||||
if(result==0){
|
||||
if(rl.rlim_cur < StackSize){
|
||||
rl.rlim_cur=StackSize;
|
||||
result=setrlimit(RLIMIT_STACK, &rl);
|
||||
if(result !=0){
|
||||
fprintf(stderr, "OpenBLAS: set stack limit error =%d\n", result);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
*/
|
||||
#endif
|
||||
|
||||
/*
|
||||
OpenBLAS uses the numbers of CPU cores in multithreading.
|
||||
It can be set by openblas_set_num_threads(int num_threads);
|
||||
*/
|
||||
int blas_cpu_number = 0;
|
||||
/*
|
||||
The numbers of threads in the thread pool.
|
||||
This value is equal or large than blas_cpu_number. This means some threads are sleep.
|
||||
*/
|
||||
int blas_num_threads = 0;
|
||||
|
||||
int goto_get_num_procs (void) {
|
||||
@@ -215,7 +255,7 @@ int goto_get_num_procs (void) {
|
||||
|
||||
int blas_get_cpu_number(void){
|
||||
char *p;
|
||||
#if defined(OS_LINUX) || defined(OS_WINDOWS) || defined(OS_FreeBSD) || defined(OS_Darwin)
|
||||
#if defined(OS_LINUX) || defined(OS_WINDOWS) || defined(OS_FREEBSD) || defined(OS_DARWIN)
|
||||
int max_num;
|
||||
#endif
|
||||
int blas_goto_num = 0;
|
||||
@@ -223,7 +263,7 @@ int blas_get_cpu_number(void){
|
||||
|
||||
if (blas_num_threads) return blas_num_threads;
|
||||
|
||||
#if defined(OS_LINUX) || defined(OS_WINDOWS) || defined(OS_FreeBSD) || defined(OS_Darwin)
|
||||
#if defined(OS_LINUX) || defined(OS_WINDOWS) || defined(OS_FREEBSD) || defined(OS_DARWIN)
|
||||
max_num = get_num_procs();
|
||||
#endif
|
||||
|
||||
@@ -250,7 +290,7 @@ int blas_get_cpu_number(void){
|
||||
else if (blas_omp_num > 0) blas_num_threads = blas_omp_num;
|
||||
else blas_num_threads = MAX_CPU_NUMBER;
|
||||
|
||||
#if defined(OS_LINUX) || defined(OS_WINDOWS) || defined(OS_FreeBSD) || defined(OS_Darwin)
|
||||
#if defined(OS_LINUX) || defined(OS_WINDOWS) || defined(OS_FREEBSD) || defined(OS_DARWIN)
|
||||
if (blas_num_threads > max_num) blas_num_threads = max_num;
|
||||
#endif
|
||||
|
||||
@@ -323,7 +363,7 @@ static void *alloc_mmap(void *address){
|
||||
#define BENCH_ITERATION 4
|
||||
#define SCALING 2
|
||||
|
||||
static inline BLASULONG run_bench(BLASULONG address, long size) {
|
||||
static inline BLASULONG run_bench(BLASULONG address, BLASULONG size) {
|
||||
|
||||
BLASULONG original, *p;
|
||||
BLASULONG start, stop, min;
|
||||
@@ -390,11 +430,11 @@ static void *alloc_mmap(void *address){
|
||||
|
||||
#ifdef OS_LINUX
|
||||
#ifdef DEBUG
|
||||
int ret;
|
||||
int ret=0;
|
||||
ret=my_mbind(map_address, BUFFER_SIZE * SCALING, MPOL_PREFERRED, NULL, 0, 0);
|
||||
if(ret==-1){
|
||||
int errsv=errno;
|
||||
perror("alloc_mmap:");
|
||||
perror("OpenBLAS alloc_mmap:");
|
||||
printf("error code=%d,\tmap_address=%lx\n",errsv,map_address);
|
||||
}
|
||||
|
||||
@@ -410,12 +450,12 @@ static void *alloc_mmap(void *address){
|
||||
current = (SCALING - 1) * BUFFER_SIZE;
|
||||
|
||||
while(current > 0) {
|
||||
*(long *)start = (long)start + PAGESIZE;
|
||||
*(BLASLONG *)start = (BLASLONG)start + PAGESIZE;
|
||||
start += PAGESIZE;
|
||||
current -= PAGESIZE;
|
||||
}
|
||||
|
||||
*(long *)(start - PAGESIZE) = (BLASULONG)map_address;
|
||||
*(BLASLONG *)(start - PAGESIZE) = (BLASULONG)map_address;
|
||||
|
||||
start = (BLASULONG)map_address;
|
||||
|
||||
@@ -884,7 +924,7 @@ void *blas_memory_alloc(int procpos){
|
||||
if (!blas_num_threads) blas_cpu_number = blas_get_cpu_number();
|
||||
#endif
|
||||
|
||||
#if defined(ARCH_X86) || defined(ARCH_X86_64) || defined(ARCH_IA64)
|
||||
#if defined(ARCH_X86) || defined(ARCH_X86_64) || defined(ARCH_IA64) || defined(ARCH_MIPS64)
|
||||
#ifndef DYNAMIC_ARCH
|
||||
blas_set_parameter();
|
||||
#endif
|
||||
@@ -1128,9 +1168,9 @@ static BLASULONG init_lock = 0UL;
|
||||
static void _touch_memory(blas_arg_t *arg, BLASLONG *range_m, BLASLONG *range_n,
|
||||
void *sa, void *sb, BLASLONG pos) {
|
||||
|
||||
#ifndef ARCH_POWER
|
||||
#if !defined(ARCH_POWER) && !defined(ARCH_SPARC)
|
||||
|
||||
long size;
|
||||
size_t size;
|
||||
BLASULONG buffer;
|
||||
|
||||
size = BUFFER_SIZE - PAGESIZE;
|
||||
@@ -1228,6 +1268,7 @@ void CONSTRUCTOR gotoblas_init(void) {
|
||||
|
||||
if (gotoblas_initialized) return;
|
||||
|
||||
|
||||
#ifdef PROFILE
|
||||
moncontrol (0);
|
||||
#endif
|
||||
@@ -1289,6 +1330,7 @@ void DESTRUCTOR gotoblas_quit(void) {
|
||||
moncontrol (1);
|
||||
#endif
|
||||
|
||||
blas_shutdown();
|
||||
}
|
||||
|
||||
#if (defined(C_PGI) || (!defined(C_SUN) && defined(F_INTERFACE_SUN))) && (defined(ARCH_X86) || defined(ARCH_X86_64))
|
||||
|
||||
59
driver/others/openblas_get_config.c
Normal file
59
driver/others/openblas_get_config.c
Normal file
@@ -0,0 +1,59 @@
|
||||
/*****************************************************************************
|
||||
Copyright (c) 2011,2012 Lab of Parallel Software and Computational Science,ISCAS
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the ISCAS nor the names of its contributors may
|
||||
be used to endorse or promote products derived from this software
|
||||
without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
**********************************************************************************/
|
||||
|
||||
#include "common.h"
|
||||
|
||||
static char* openblas_config_str=""
|
||||
#ifdef USE64BITINT
|
||||
"USE64BITINT "
|
||||
#endif
|
||||
#ifdef NO_CBLAS
|
||||
"NO_CBLAS "
|
||||
#endif
|
||||
#ifdef NO_LAPACK
|
||||
"NO_LAPACK "
|
||||
#endif
|
||||
#ifdef NO_LAPACKE
|
||||
"NO_LAPACKE "
|
||||
#endif
|
||||
#ifdef DYNAMIC_ARCH
|
||||
"DYNAMIC_ARCH "
|
||||
#endif
|
||||
#ifdef NO_AFFINITY
|
||||
"NO_AFFINITY "
|
||||
#endif
|
||||
;
|
||||
|
||||
char* CNAME() {
|
||||
return openblas_config_str;
|
||||
}
|
||||
|
||||
52
driver/others/openblas_get_parallel.c
Normal file
52
driver/others/openblas_get_parallel.c
Normal file
@@ -0,0 +1,52 @@
|
||||
/*****************************************************************************
|
||||
Copyright (c) 2013 Martin Koehler, grisuthedragon@users.github.com
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the ISCAS nor the names of its contributors may
|
||||
be used to endorse or promote products derived from this software
|
||||
without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
**********************************************************************************/
|
||||
|
||||
#include "common.h"
|
||||
|
||||
#if defined(USE_OPENMP)
|
||||
static int parallel = 2 ;
|
||||
#elif defined(SMP_SERVER)
|
||||
static int parallel = 1;
|
||||
#else
|
||||
static int parallel = 0;
|
||||
#endif
|
||||
|
||||
int CNAME() {
|
||||
return parallel;
|
||||
}
|
||||
|
||||
int NAME() {
|
||||
return parallel;
|
||||
}
|
||||
|
||||
|
||||
|
||||
52
driver/others/openblas_set_num_threads.c
Normal file
52
driver/others/openblas_set_num_threads.c
Normal file
@@ -0,0 +1,52 @@
|
||||
/*****************************************************************************
|
||||
Copyright (c) 2011,2012 Lab of Parallel Software and Computational Science,ISCAS
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the ISCAS nor the names of its contributors may
|
||||
be used to endorse or promote products derived from this software
|
||||
without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
**********************************************************************************/
|
||||
|
||||
#include "common.h"
|
||||
|
||||
#ifdef SMP_SERVER
|
||||
|
||||
extern void openblas_set_num_threads(int num_threads) ;
|
||||
|
||||
void openblas_set_num_threads_(int* num_threads){
|
||||
openblas_set_num_threads(*num_threads);
|
||||
}
|
||||
|
||||
#else
|
||||
//Single thread
|
||||
|
||||
void openblas_set_num_threads(int num_threads) {
|
||||
}
|
||||
|
||||
void openblas_set_num_threads_(int* num_threads){
|
||||
|
||||
}
|
||||
#endif
|
||||
@@ -45,8 +45,22 @@ int get_L2_size(void);
|
||||
#define DEFAULT_GEMM_P 128
|
||||
#define DEFAULT_GEMM_Q 128
|
||||
#define DEFAULT_GEMM_R 128
|
||||
#define DEFAULT_GEMM_OFFSET_A 0
|
||||
#define DEFAULT_GEMM_OFFSET_B 0
|
||||
|
||||
/* Global Parameter */
|
||||
#if GEMM_OFFSET_A == gemm_offset_a
|
||||
BLASLONG gemm_offset_a = DEFAULT_GEMM_OFFSET_A;
|
||||
#else
|
||||
BLASLONG gemm_offset_a = GEMM_OFFSET_A;
|
||||
#endif
|
||||
|
||||
#if GEMM_OFFSET_B == gemm_offset_b
|
||||
BLASLONG gemm_offset_b = DEFAULT_GEMM_OFFSET_B;
|
||||
#else
|
||||
BLASLONG gemm_offset_b = GEMM_OFFSET_B;
|
||||
#endif
|
||||
|
||||
#if SGEMM_P == sgemm_p
|
||||
BLASLONG sgemm_p = DEFAULT_GEMM_P;
|
||||
#else
|
||||
@@ -149,9 +163,9 @@ int get_L2_size(void){
|
||||
|
||||
int eax, ebx, ecx, edx;
|
||||
|
||||
#if defined(ATHLON) || defined(OPTERON) || defined(BARCELONA) || \
|
||||
#if defined(ATHLON) || defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) || \
|
||||
defined(CORE_PRESCOTT) || defined(CORE_CORE2) || defined(PENRYN) || defined(DUNNINGTON) || \
|
||||
defined(CORE_NEHALEM) || defined(ATOM) || defined(GENERIC)
|
||||
defined(CORE_NEHALEM) || defined(CORE_SANDYBRIDGE) || defined(ATOM) || defined(GENERIC)
|
||||
|
||||
cpuid(0x80000006, &eax, &ebx, &ecx, &edx);
|
||||
|
||||
@@ -370,6 +384,17 @@ void blas_set_parameter(void){
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(SANDYBRIDGE)
|
||||
sgemm_p = 1024;
|
||||
dgemm_p = 512;
|
||||
cgemm_p = 512;
|
||||
zgemm_p = 256;
|
||||
#ifdef EXPRECISION
|
||||
qgemm_p = 256;
|
||||
xgemm_p = 128;
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(CORE_PRESCOTT) || defined(GENERIC)
|
||||
size >>= 6;
|
||||
|
||||
@@ -421,7 +446,7 @@ void blas_set_parameter(void){
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(CORE_BARCELONA)
|
||||
#if defined(CORE_BARCELONA) || defined(CORE_BOBCAT)
|
||||
size >>= 8;
|
||||
|
||||
sgemm_p = 232 * size;
|
||||
@@ -666,3 +691,36 @@ void blas_set_parameter(void){
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(ARCH_MIPS64)
|
||||
void blas_set_parameter(void){
|
||||
#if defined(LOONGSON3A)
|
||||
#ifdef SMP
|
||||
if(blas_num_threads == 1){
|
||||
#endif
|
||||
//single thread
|
||||
dgemm_r = 1024;
|
||||
#ifdef SMP
|
||||
}else{
|
||||
//multi thread
|
||||
dgemm_r = 200;
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(LOONGSON3B)
|
||||
#ifdef SMP
|
||||
if(blas_num_threads == 1 || blas_num_threads == 2){
|
||||
#endif
|
||||
//single thread
|
||||
dgemm_r = 640;
|
||||
#ifdef SMP
|
||||
}else{
|
||||
//multi thread
|
||||
dgemm_r = 160;
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -74,20 +74,21 @@ void gotoblas_profile_quit(void) {
|
||||
if (cycles > 0) {
|
||||
|
||||
fprintf(stderr, "\n\t====== BLAS Profiling Result =======\n\n");
|
||||
fprintf(stderr, " Function No. of Calls Time Consumption Efficiency Bytes/cycle\n");
|
||||
fprintf(stderr, " Function No. of Calls Time Consumption Efficiency Bytes/cycle Wall Time(Cycles)\n");
|
||||
|
||||
for (i = 0; i < MAX_PROF_TABLE; i ++) {
|
||||
if (function_profile_table[i].calls) {
|
||||
#ifndef OS_WINDOWS
|
||||
fprintf(stderr, "%-12s : %10Ld %8.2f%% %10.3f%% %8.2f\n",
|
||||
fprintf(stderr, "%-12s : %10Ld %8.2f%% %10.3f%% %8.2f %Ld\n",
|
||||
#else
|
||||
fprintf(stderr, "%-12s : %10lld %8.2f%% %10.3f%% %8.2f\n",
|
||||
fprintf(stderr, "%-12s : %10lld %8.2f%% %10.3f%% %8.2f %lld\n",
|
||||
#endif
|
||||
func_table[i],
|
||||
function_profile_table[i].calls,
|
||||
(double)function_profile_table[i].cycles / (double)cycles * 100.,
|
||||
(double)function_profile_table[i].fops / (double)function_profile_table[i].tcycles * 100.,
|
||||
(double)function_profile_table[i].area / (double)function_profile_table[i].cycles
|
||||
(double)function_profile_table[i].area / (double)function_profile_table[i].cycles,
|
||||
function_profile_table[i].cycles
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -10,10 +10,31 @@ ifndef NO_CBLAS
|
||||
NO_CBLAS = 0
|
||||
endif
|
||||
|
||||
ifndef NO_LAPACK
|
||||
NO_LAPACK = 0
|
||||
endif
|
||||
|
||||
ifndef NO_LAPACKE
|
||||
NO_LAPACKE = 0
|
||||
endif
|
||||
|
||||
ifndef NEED2UNDERSCORES
|
||||
NEED2UNDERSCORES=0
|
||||
endif
|
||||
|
||||
ifndef ONLY_CBLAS
|
||||
ONLY_CBLAS = 0
|
||||
endif
|
||||
|
||||
ifeq ($(OSNAME), WINNT)
|
||||
ifeq ($(F_COMPILER), GFORTRAN)
|
||||
EXTRALIB += -lgfortran
|
||||
endif
|
||||
ifeq ($(USE_OPENMP), 1)
|
||||
ifeq ($(C_COMPILER), GCC)
|
||||
EXTRALIB += -lgomp
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(OSNAME), CYGWIN_NT)
|
||||
@@ -53,38 +74,44 @@ dyn : $(LIBDYNNAME)
|
||||
zip : dll
|
||||
zip $(LIBZIPNAME) $(LIBDLLNAME) $(LIBNAME)
|
||||
|
||||
dll : libgoto2.dll
|
||||
dll : ../$(LIBDLLNAME)
|
||||
#libgoto2.dll
|
||||
|
||||
dll2 : libgoto2_shared.dll
|
||||
|
||||
libgoto2.dll : ../$(LIBNAME) libgoto2.def dllinit.$(SUFFIX)
|
||||
# On Windows, we only generate a DLL without a version suffix. This is because
|
||||
# applications which link against the dynamic library reference a fixed DLL name
|
||||
# in their import table. By instead using a stable name it is possible to
|
||||
# upgrade between library versions, without needing to re-link an application.
|
||||
# For more details see: https://github.com/xianyi/OpenBLAS/issues/127.
|
||||
../$(LIBDLLNAME) : ../$(LIBNAME) libopenblas.def dllinit.$(SUFFIX)
|
||||
$(RANLIB) ../$(LIBNAME)
|
||||
ifeq ($(BINARY32), 1)
|
||||
$(DLLWRAP) -o $(@F) --def libgoto2.def \
|
||||
--entry _dllinit@12 -s dllinit.$(SUFFIX) --dllname $(@F) ../$(LIBNAME) $(FEXTRALIB)
|
||||
-lib /machine:i386 /def:libgoto2.def
|
||||
$(DLLWRAP) -o ../$(LIBDLLNAME) --def libopenblas.def \
|
||||
--entry _dllinit@12 -s dllinit.$(SUFFIX) --dllname $(@F) ../$(LIBNAME) $(EXTRALIB)
|
||||
-lib /machine:i386 /def:libopenblas.def
|
||||
else
|
||||
$(DLLWRAP) -o $(@F) --def libgoto2.def \
|
||||
--entry _dllinit -s dllinit.$(SUFFIX) --dllname $(@F) ../$(LIBNAME) $(FEXTRALIB)
|
||||
-lib /machine:X64 /def:libgoto2.def
|
||||
$(DLLWRAP) -o ../$(LIBDLLNAME) --def libopenblas.def \
|
||||
--entry $(FU)dllinit -s dllinit.$(SUFFIX) --dllname $(@F) ../$(LIBNAME) $(EXTRALIB)
|
||||
-lib /machine:X64 /def:libopenblas.def
|
||||
endif
|
||||
|
||||
libgoto2_shared.dll : ../$(LIBNAME) libgoto2_shared.def
|
||||
$(CC) $(CFLAGS) libgoto2_shared.def -shared -o $(@F) \
|
||||
$(CC) $(CFLAGS) $(LDFLAGS) libgoto2_shared.def -shared -o $(@F) \
|
||||
-Wl,--whole-archive ../$(LIBNAME) -Wl,--no-whole-archive \
|
||||
-Wl,--out-implib,libgoto2_shared.lib $(FEXTRALIB)
|
||||
|
||||
libgoto2.def : gensymbol
|
||||
perl ./gensymbol win2k $(ARCH) dummy $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) > $(@F)
|
||||
libopenblas.def : gensymbol
|
||||
perl ./gensymbol win2k $(ARCH) dummy $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) > $(@F)
|
||||
|
||||
libgoto2_shared.def : gensymbol
|
||||
perl ./gensymbol win2k $(ARCH) dummy $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) > $(@F)
|
||||
perl ./gensymbol win2k $(ARCH) dummy $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) > $(@F)
|
||||
|
||||
libgoto_hpl.def : gensymbol
|
||||
perl ./gensymbol win2khpl $(ARCH) dummy $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) > $(@F)
|
||||
perl ./gensymbol win2khpl $(ARCH) dummy $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) > $(@F)
|
||||
|
||||
$(LIBDYNNAME) : ../$(LIBNAME) osx.def
|
||||
$(PREFIX)gcc $(CFLAGS) -all_load -dynamiclib -o $(LIBDYNNAME) $< -Wl,-exported_symbols_list,osx.def $(FEXTRALIB)
|
||||
$(FC) $(FFLAGS) -all_load -headerpad_max_install_names -install_name $(CURDIR)/../$(LIBDYNNAME) -dynamiclib -o ../$(LIBDYNNAME) $< -Wl,-exported_symbols_list,osx.def $(FEXTRALIB)
|
||||
|
||||
symbol.$(SUFFIX) : symbol.S
|
||||
$(CC) $(CFLAGS) -c -o $(@F) $^
|
||||
@@ -97,23 +124,32 @@ ifeq ($(OSNAME), Linux)
|
||||
so : ../$(LIBSONAME)
|
||||
|
||||
../$(LIBSONAME) : ../$(LIBNAME) linux.def linktest.c
|
||||
$(CC) $(CFLAGS) -shared -o ../$(LIBSONAME) \
|
||||
ifneq ($(C_COMPILER), LSB)
|
||||
$(CC) $(CFLAGS) $(LDFLAGS) -shared -o ../$(LIBSONAME) \
|
||||
-Wl,--whole-archive ../$(LIBNAME) -Wl,--no-whole-archive \
|
||||
-Wl,--retain-symbols-file=linux.def $(EXTRALIB)
|
||||
$(CC) $(CFLAGS) -w -o linktest linktest.c ../$(LIBSONAME) $(FEXTRALIB) && echo OK.
|
||||
-Wl,-soname,$(LIBPREFIX).so.$(MAJOR_VERSION) $(EXTRALIB)
|
||||
$(CC) $(CFLAGS) $(LDFLAGS) -w -o linktest linktest.c ../$(LIBSONAME) $(FEXTRALIB) && echo OK.
|
||||
else
|
||||
#for LSB
|
||||
env LSBCC_SHAREDLIBS=gfortran $(CC) $(CFLAGS) $(LDFLAGS) -shared -o ../$(LIBSONAME) \
|
||||
-Wl,--whole-archive ../$(LIBNAME) -Wl,--no-whole-archive \
|
||||
-Wl,-soname,$(LIBPREFIX).so.$(MAJOR_VERSION) $(EXTRALIB)
|
||||
$(FC) $(CFLAGS) $(LDFLAGS) -w -o linktest linktest.c ../$(LIBSONAME) $(FEXTRALIB) && echo OK.
|
||||
endif
|
||||
rm -f linktest
|
||||
|
||||
endif
|
||||
|
||||
ifeq ($(OSNAME), FreeBSD)
|
||||
#http://stackoverflow.com/questions/7656425/makefile-ifeq-logical-or
|
||||
ifeq ($(OSNAME), $(filter $(OSNAME),FreeBSD NetBSD))
|
||||
|
||||
so : ../$(LIBSONAME)
|
||||
|
||||
../$(LIBSONAME) : ../$(LIBNAME) linux.def linktest.c
|
||||
$(CC) $(CFLAGS) -shared -o ../$(LIBSONAME) \
|
||||
$(CC) $(CFLAGS) $(LDFLAGS) -shared -o ../$(LIBSONAME) \
|
||||
-Wl,--whole-archive ../$(LIBNAME) -Wl,--no-whole-archive \
|
||||
-Wl,--retain-symbols-file=linux.def $(EXTRALIB)
|
||||
$(CC) $(CFLAGS) -w -o linktest linktest.c ../$(LIBSONAME) $(FEXTRALIB) && echo OK.
|
||||
$(FEXTRALIB) $(EXTRALIB)
|
||||
$(CC) $(CFLAGS) $(LDFLAGS) -w -o linktest linktest.c ../$(LIBSONAME) $(FEXTRALIB) && echo OK.
|
||||
rm -f linktest
|
||||
|
||||
endif
|
||||
@@ -123,15 +159,15 @@ ifeq ($(OSNAME), OSF1)
|
||||
so : ../$(LIBSONAME)
|
||||
|
||||
../$(LIBSONAME) :
|
||||
$(CC) -shared -o ../$(LIBSONAME) ../$(LIBNAME)
|
||||
$(CC) $(CFLAGS) $(LDFLAGS) -shared -o ../$(LIBSONAME) ../$(LIBNAME)
|
||||
endif
|
||||
|
||||
ifeq ($(OSNAME), SunOS)
|
||||
|
||||
so : ../$(LIBSONAME)
|
||||
$(CC) $(CFLAGS) -shared -o ../$(LIBSONAME) \
|
||||
$(CC) $(CFLAGS) $(LDFLAGS) -shared -o ../$(LIBSONAME) \
|
||||
-Wl,--whole-archive ../$(LIBNAME) -Wl,--no-whole-archive $(EXTRALIB)
|
||||
$(CC) $(CFLAGS) -w -o linktest linktest.c ../$(LIBSONAME) $(FEXTRALIB) && echo OK.
|
||||
$(CC) $(CFLAGS) $(LDFLAGS) -w -o linktest linktest.c ../$(LIBSONAME) $(FEXTRALIB) && echo OK.
|
||||
rm -f linktest
|
||||
|
||||
endif
|
||||
@@ -162,23 +198,23 @@ static : ../$(LIBNAME)
|
||||
rm -f goto.$(SUFFIX)
|
||||
|
||||
linux.def : gensymbol ../Makefile.system ../getarch.c
|
||||
perl ./gensymbol linux $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) > $(@F)
|
||||
perl ./gensymbol linux $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) > $(@F)
|
||||
|
||||
osx.def : gensymbol ../Makefile.system ../getarch.c
|
||||
perl ./gensymbol osx $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) > $(@F)
|
||||
perl ./gensymbol osx $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) > $(@F)
|
||||
|
||||
aix.def : gensymbol ../Makefile.system ../getarch.c
|
||||
perl ./gensymbol aix $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) > $(@F)
|
||||
perl ./gensymbol aix $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) > $(@F)
|
||||
|
||||
symbol.S : gensymbol
|
||||
perl ./gensymbol win2kasm noarch dummy $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) > symbol.S
|
||||
perl ./gensymbol win2kasm noarch dummy $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) > symbol.S
|
||||
|
||||
test : linktest.c
|
||||
$(CC) $(CFLAGS) -w -o linktest linktest.c ../$(LIBSONAME) -lm && echo OK.
|
||||
$(CC) $(CFLAGS) $(LDFLAGS) -w -o linktest linktest.c ../$(LIBSONAME) -lm && echo OK.
|
||||
rm -f linktest
|
||||
|
||||
linktest.c : gensymbol ../Makefile.system ../getarch.c
|
||||
perl ./gensymbol linktest $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) > linktest.c
|
||||
perl ./gensymbol linktest $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) > linktest.c
|
||||
|
||||
clean ::
|
||||
@rm -f *.def *.dylib __.SYMDEF*
|
||||
|
||||
2730
exports/gensymbol
2730
exports/gensymbol
File diff suppressed because it is too large
Load Diff
43
f_check
43
f_check
@@ -24,7 +24,7 @@ $compiler = "" if $compiler eq "f77";
|
||||
|
||||
if ($compiler eq "") {
|
||||
|
||||
@lists = ("f77", "g77", "g95", "gfortran", "frt", "fort", "openf90", "openf95",
|
||||
@lists = ("g77", "g95", "gfortran", "frt", "fort", "openf90", "openf95",
|
||||
"sunf77", "sunf90", "sunf95",
|
||||
"xlf95", "xlf90", "xlf",
|
||||
"ppuf77", "ppuf95", "ppuf90", "ppuxlf",
|
||||
@@ -32,11 +32,12 @@ if ($compiler eq "") {
|
||||
"pgf95", "pgf90", "pgf77",
|
||||
"ifort");
|
||||
|
||||
OUTER:
|
||||
foreach $lists (@lists) {
|
||||
foreach $path (@path) {
|
||||
if (-f $path . "/" . $lists) {
|
||||
if (-x $path . "/" . $lists) {
|
||||
$compiler = $lists;
|
||||
break;
|
||||
last OUTER;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -113,6 +114,12 @@ if ($compiler eq "") {
|
||||
$vendor = IBM;
|
||||
$openmp = "-openmp";
|
||||
}
|
||||
|
||||
# for embeded underscore name, e.g. zho_ge, it may append 2 underscores.
|
||||
$data = `$compiler -O2 -S ftest3.f > /dev/null 2>&1 && cat ftest3.s && rm -f ftest3.s`;
|
||||
if ($data =~ /zho_ge__/) {
|
||||
$need2bu = 1;
|
||||
}
|
||||
}
|
||||
|
||||
if ($vendor eq "") {
|
||||
@@ -210,6 +217,10 @@ if (!$?) {
|
||||
if ($?) {
|
||||
$link = `$compiler $openmp -q32 -v ftest2.f 2>&1 && rm -f a.out a.exe`;
|
||||
}
|
||||
#For gfortran MIPS
|
||||
if ($?) {
|
||||
$link = `$compiler $openmp -mabi=n32 -v ftest2.f 2>&1 && rm -f a.out a.exe`;
|
||||
}
|
||||
$binary = "" if ($?);
|
||||
}
|
||||
|
||||
@@ -218,6 +229,10 @@ if (!$?) {
|
||||
if ($?) {
|
||||
$link = `$compiler $openmp -q64 -v ftest2.f 2>&1 && rm -f a.out a.exe`;
|
||||
}
|
||||
#For gfortran MIPS
|
||||
if ($?) {
|
||||
$link = `$compiler $openmp -mabi=64 -v ftest2.f 2>&1 && rm -f a.out a.exe`;
|
||||
}
|
||||
$binary = "" if ($?);
|
||||
}
|
||||
|
||||
@@ -236,7 +251,11 @@ if ($link ne "") {
|
||||
|
||||
$link =~ s/\-rpath\s+/\-rpath\@/g;
|
||||
|
||||
$link =~ s/\-rpath-link\s+/\-rpath-link\@/g;
|
||||
|
||||
@flags = split(/[\s\,\n]/, $link);
|
||||
# remove leading and trailing quotes from each flag.
|
||||
@flags = map {s/^['"]|['"]$//g; $_} @flags;
|
||||
|
||||
foreach $flags (@flags) {
|
||||
if (
|
||||
@@ -254,7 +273,15 @@ if ($link ne "") {
|
||||
$linker_L .= "-Wl,". $flags . " ";
|
||||
}
|
||||
|
||||
if ($flags =~ /^\-rpath/) {
|
||||
if ($flags =~ /^\-rpath\@/) {
|
||||
$flags =~ s/\@/\,/g;
|
||||
if ($vendor eq "PGI") {
|
||||
$flags =~ s/lib$/libso/;
|
||||
}
|
||||
$linker_L .= "-Wl,". $flags . " " ;
|
||||
}
|
||||
|
||||
if ($flags =~ /^\-rpath-link\@/) {
|
||||
$flags =~ s/\@/\,/g;
|
||||
if ($vendor eq "PGI") {
|
||||
$flags =~ s/lib$/libso/;
|
||||
@@ -274,6 +301,7 @@ if ($link ne "") {
|
||||
&& ($flags !~ /kernel32/)
|
||||
&& ($flags !~ /advapi32/)
|
||||
&& ($flags !~ /shell32/)
|
||||
&& ($flags !~ /^\-l$/)
|
||||
) {
|
||||
$linker_l .= $flags . " ";
|
||||
}
|
||||
@@ -283,6 +311,10 @@ if ($link ne "") {
|
||||
|
||||
}
|
||||
|
||||
if ($vendor eq "INTEL"){
|
||||
$linker_a .= "-lgfortran"
|
||||
}
|
||||
|
||||
open(MAKEFILE, ">> $makefile") || die "Can't append $makefile";
|
||||
open(CONFFILE, ">> $config" ) || die "Can't append $config";
|
||||
|
||||
@@ -293,6 +325,9 @@ print MAKEFILE "NOFORTRAN=1\n" if $nofortran == 1;
|
||||
|
||||
print CONFFILE "#define BUNDERSCORE\t$bu\n" if $bu ne "";
|
||||
print CONFFILE "#define NEEDBUNDERSCORE\t1\n" if $bu ne "";
|
||||
print CONFFILE "#define NEED2UNDERSCORES\t1\n" if $need2bu ne "";
|
||||
|
||||
print MAKEFILE "NEED2UNDERSCORES=1\n" if $need2bu ne "";
|
||||
|
||||
if (($linker_l ne "") || ($linker_a ne "")) {
|
||||
print MAKEFILE "FEXTRALIB=$linker_L $linker_l $linker_a\n";
|
||||
|
||||
6
ftest3.f
Normal file
6
ftest3.f
Normal file
@@ -0,0 +1,6 @@
|
||||
double complex function zho_ge()
|
||||
|
||||
zho_ge = (0.0d0,0.0d0)
|
||||
|
||||
return
|
||||
end
|
||||
278
getarch.c
278
getarch.c
@@ -1,5 +1,5 @@
|
||||
/*****************************************************************************
|
||||
Copyright (c) 2011, Lab of Parallel Software and Computational Science,ICSAS
|
||||
Copyright (c) 2011,2012 Lab of Parallel Software and Computational Science,ISCAS
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
@@ -83,6 +83,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#endif
|
||||
#ifdef linux
|
||||
#include <sys/sysinfo.h>
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
|
||||
/* #define FORCE_P2 */
|
||||
@@ -96,12 +97,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
/* #define FORCE_PENRYN */
|
||||
/* #define FORCE_DUNNINGTON */
|
||||
/* #define FORCE_NEHALEM */
|
||||
/* #define FORCE_SANDYBRIDGE */
|
||||
/* #define FORCE_ATOM */
|
||||
/* #define FORCE_ATHLON */
|
||||
/* #define FORCE_OPTERON */
|
||||
/* #define FORCE_OPTERON_SSE3 */
|
||||
/* #define FORCE_BARCELONA */
|
||||
/* #define FORCE_SHANGHAI */
|
||||
/* #define FORCE_ISTANBUL */
|
||||
/* #define FORCE_BOBCAT */
|
||||
/* #define FORCE_BULLDOZER */
|
||||
/* #define FORCE_PILEDRIVER */
|
||||
/* #define FORCE_SSE_GENERIC */
|
||||
/* #define FORCE_VIAC3 */
|
||||
/* #define FORCE_NANO */
|
||||
@@ -116,11 +122,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
/* #define FORCE_PPC440FP2 */
|
||||
/* #define FORCE_CELL */
|
||||
/* #define FORCE_SICORTEX */
|
||||
/* #define FORCE_LOONGSON3A */
|
||||
/* #define FORCE_LOONGSON3A */
|
||||
/* #define FORCE_LOONGSON3B */
|
||||
/* #define FORCE_ITANIUM2 */
|
||||
/* #define FORCE_GENERIC */
|
||||
/* #define FORCE_SPARC */
|
||||
/* #define FORCE_SPARCV7 */
|
||||
/* #define FORCE_GENERIC */
|
||||
|
||||
#ifdef FORCE_P2
|
||||
#define FORCE
|
||||
@@ -130,26 +137,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#define ARCHCONFIG "-DPENTIUM2 " \
|
||||
"-DL1_DATA_SIZE=16384 -DL1_DATA_LINESIZE=32 " \
|
||||
"-DL2_SIZE=512488 -DL2_LINESIZE=32 " \
|
||||
"-DDTB_ENTRIES=64 -DDTB_SIZE=4096 " \
|
||||
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \
|
||||
"-DHAVE_CMOV -DHAVE_MMX"
|
||||
#define LIBNAME "p2"
|
||||
#define CORENAME "P5"
|
||||
#endif
|
||||
|
||||
#ifdef FORCE_COPPERMINE
|
||||
#define FORCE
|
||||
#define FORCE_INTEL
|
||||
#define ARCHITECTURE "X86"
|
||||
#define SUBARCHITECTURE "PENTIUM3"
|
||||
#define ARCHCONFIG "-DPENTIUM3 " \
|
||||
"-DL1_DATA_SIZE=16384 -DL1_DATA_LINESIZE=32 " \
|
||||
"-DL2_SIZE=262144 -DL2_LINESIZE=32 " \
|
||||
"-DDTB_ENTRIES=64 -DDTB_SIZE=4096 " \
|
||||
"-DHAVE_CMOV -DHAVE_MMX -DHAVE_SSE "
|
||||
#define LIBNAME "coppermine"
|
||||
#define CORENAME "COPPERMINE"
|
||||
#endif
|
||||
|
||||
#ifdef FORCE_KATMAI
|
||||
#define FORCE
|
||||
#define FORCE_INTEL
|
||||
@@ -158,12 +151,26 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#define ARCHCONFIG "-DPENTIUM3 " \
|
||||
"-DL1_DATA_SIZE=16384 -DL1_DATA_LINESIZE=32 " \
|
||||
"-DL2_SIZE=524288 -DL2_LINESIZE=32 " \
|
||||
"-DDTB_ENTRIES=64 -DDTB_SIZE=4096 " \
|
||||
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \
|
||||
"-DHAVE_CMOV -DHAVE_MMX -DHAVE_SSE "
|
||||
#define LIBNAME "katmai"
|
||||
#define CORENAME "KATMAI"
|
||||
#endif
|
||||
|
||||
#ifdef FORCE_COPPERMINE
|
||||
#define FORCE
|
||||
#define FORCE_INTEL
|
||||
#define ARCHITECTURE "X86"
|
||||
#define SUBARCHITECTURE "PENTIUM3"
|
||||
#define ARCHCONFIG "-DPENTIUM3 " \
|
||||
"-DL1_DATA_SIZE=16384 -DL1_DATA_LINESIZE=32 " \
|
||||
"-DL2_SIZE=262144 -DL2_LINESIZE=32 " \
|
||||
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \
|
||||
"-DHAVE_CMOV -DHAVE_MMX -DHAVE_SSE "
|
||||
#define LIBNAME "coppermine"
|
||||
#define CORENAME "COPPERMINE"
|
||||
#endif
|
||||
|
||||
#ifdef FORCE_NORTHWOOD
|
||||
#define FORCE
|
||||
#define FORCE_INTEL
|
||||
@@ -172,7 +179,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#define ARCHCONFIG "-DPENTIUM4 " \
|
||||
"-DL1_DATA_SIZE=8192 -DL1_DATA_LINESIZE=64 " \
|
||||
"-DL2_SIZE=524288 -DL2_LINESIZE=64 " \
|
||||
"-DDTB_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 " \
|
||||
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 " \
|
||||
"-DHAVE_CMOV -DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 "
|
||||
#define LIBNAME "northwood"
|
||||
#define CORENAME "NORTHWOOD"
|
||||
@@ -186,7 +193,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#define ARCHCONFIG "-DPENTIUM4 " \
|
||||
"-DL1_DATA_SIZE=16384 -DL1_DATA_LINESIZE=64 " \
|
||||
"-DL2_SIZE=1048576 -DL2_LINESIZE=64 " \
|
||||
"-DDTB_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 " \
|
||||
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 " \
|
||||
"-DHAVE_CMOV -DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3"
|
||||
#define LIBNAME "prescott"
|
||||
#define CORENAME "PRESCOTT"
|
||||
@@ -200,7 +207,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#define ARCHCONFIG "-DPENTIUMM " \
|
||||
"-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 " \
|
||||
"-DL2_SIZE=1048576 -DL2_LINESIZE=64 " \
|
||||
"-DDTB_ENTRIES=64 -DDTB_SIZE=4096 " \
|
||||
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \
|
||||
"-DHAVE_CMOV -DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 "
|
||||
#define LIBNAME "banias"
|
||||
#define CORENAME "BANIAS"
|
||||
@@ -214,7 +221,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#define ARCHCONFIG "-DPENTIUMM " \
|
||||
"-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 " \
|
||||
"-DL2_SIZE=1048576 -DL2_LINESIZE=64 " \
|
||||
"-DDTB_ENTRIES=64 -DDTB_SIZE=4096 " \
|
||||
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \
|
||||
"-DHAVE_CMOV -DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 "
|
||||
#define LIBNAME "yonah"
|
||||
#define CORENAME "YONAH"
|
||||
@@ -228,7 +235,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#define ARCHCONFIG "-DCORE2 " \
|
||||
"-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 " \
|
||||
"-DL2_SIZE=1048576 -DL2_LINESIZE=64 " \
|
||||
"-DDTB_ENTRIES=256 -DDTB_SIZE=4096 " \
|
||||
"-DDTB_DEFAULT_ENTRIES=256 -DDTB_SIZE=4096 " \
|
||||
"-DHAVE_CMOV -DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3 -DHAVE_SSSE3"
|
||||
#define LIBNAME "core2"
|
||||
#define CORENAME "CORE2"
|
||||
@@ -242,7 +249,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#define ARCHCONFIG "-DPENRYN " \
|
||||
"-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 " \
|
||||
"-DL2_SIZE=1048576 -DL2_LINESIZE=64 " \
|
||||
"-DDTB_ENTRIES=256 -DDTB_SIZE=4096 " \
|
||||
"-DDTB_DEFAULT_ENTRIES=256 -DDTB_SIZE=4096 " \
|
||||
"-DHAVE_CMOV -DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3 -DHAVE_SSSE3 -DHAVE_SSE4_1"
|
||||
#define LIBNAME "penryn"
|
||||
#define CORENAME "PENRYN"
|
||||
@@ -257,7 +264,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
"-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 " \
|
||||
"-DL2_SIZE=1048576 -DL2_LINESIZE=64 " \
|
||||
"-DL3_SIZE=16777216 -DL3_LINESIZE=64 " \
|
||||
"-DDTB_ENTRIES=256 -DDTB_SIZE=4096 " \
|
||||
"-DDTB_DEFAULT_ENTRIES=256 -DDTB_SIZE=4096 " \
|
||||
"-DHAVE_CMOV -DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3 -DHAVE_SSSE3 -DHAVE_SSE4_1"
|
||||
#define LIBNAME "dunnington"
|
||||
#define CORENAME "DUNNINGTON"
|
||||
@@ -271,12 +278,41 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#define ARCHCONFIG "-DNEHALEM " \
|
||||
"-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 " \
|
||||
"-DL2_SIZE=262144 -DL2_LINESIZE=64 " \
|
||||
"-DDTB_ENTRIES=64 -DDTB_SIZE=4096 " \
|
||||
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \
|
||||
"-DHAVE_CMOV -DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3 -DHAVE_SSSE3 -DHAVE_SSE4_1 -DHAVE_SSE4_2"
|
||||
#define LIBNAME "nehalem"
|
||||
#define CORENAME "NEHALEM"
|
||||
#endif
|
||||
|
||||
#ifdef FORCE_SANDYBRIDGE
|
||||
#define FORCE
|
||||
#define FORCE_INTEL
|
||||
#define ARCHITECTURE "X86"
|
||||
#define SUBARCHITECTURE "SANDYBRIDGE"
|
||||
#define ARCHCONFIG "-DSANDYBRIDGE " \
|
||||
"-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 " \
|
||||
"-DL2_SIZE=262144 -DL2_LINESIZE=64 " \
|
||||
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \
|
||||
"-DHAVE_CMOV -DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3 -DHAVE_SSSE3 -DHAVE_SSE4_1 -DHAVE_SSE4_2 -DHAVE_AVX"
|
||||
#define LIBNAME "sandybridge"
|
||||
#define CORENAME "SANDYBRIDGE"
|
||||
#endif
|
||||
|
||||
#ifdef FORCE_HASWELL
|
||||
#define FORCE
|
||||
#define FORCE_INTEL
|
||||
#define ARCHITECTURE "X86"
|
||||
#define SUBARCHITECTURE "HASWELL"
|
||||
#define ARCHCONFIG "-DHASWELL " \
|
||||
"-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 " \
|
||||
"-DL2_SIZE=262144 -DL2_LINESIZE=64 " \
|
||||
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \
|
||||
"-DHAVE_CMOV -DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3 -DHAVE_SSSE3 -DHAVE_SSE4_1 -DHAVE_SSE4_2 -DHAVE_AVX " \
|
||||
"-DFMA3"
|
||||
#define LIBNAME "haswell"
|
||||
#define CORENAME "HASWELL"
|
||||
#endif
|
||||
|
||||
#ifdef FORCE_ATOM
|
||||
#define FORCE
|
||||
#define FORCE_INTEL
|
||||
@@ -285,7 +321,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#define ARCHCONFIG "-DATOM " \
|
||||
"-DL1_DATA_SIZE=24576 -DL1_DATA_LINESIZE=64 " \
|
||||
"-DL2_SIZE=524288 -DL2_LINESIZE=64 " \
|
||||
"-DDTB_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=4 " \
|
||||
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=4 " \
|
||||
"-DHAVE_CMOV -DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3 -DHAVE_SSSE3"
|
||||
#define LIBNAME "atom"
|
||||
#define CORENAME "ATOM"
|
||||
@@ -299,7 +335,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#define ARCHCONFIG "-DATHLON " \
|
||||
"-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=64 " \
|
||||
"-DL2_SIZE=1048576 -DL2_LINESIZE=64 " \
|
||||
"-DDTB_ENTRIES=32 -DDTB_SIZE=4096 -DHAVE_3DNOW " \
|
||||
"-DDTB_DEFAULT_ENTRIES=32 -DDTB_SIZE=4096 -DHAVE_3DNOW " \
|
||||
"-DHAVE_3DNOWEX -DHAVE_MMX -DHAVE_SSE "
|
||||
#define LIBNAME "athlon"
|
||||
#define CORENAME "ATHLON"
|
||||
@@ -313,7 +349,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#define ARCHCONFIG "-DOPTERON " \
|
||||
"-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=64 " \
|
||||
"-DL2_SIZE=1048576 -DL2_LINESIZE=64 " \
|
||||
"-DDTB_ENTRIES=32 -DDTB_SIZE=4096 -DHAVE_3DNOW " \
|
||||
"-DDTB_DEFAULT_ENTRIES=32 -DDTB_SIZE=4096 -DHAVE_3DNOW " \
|
||||
"-DHAVE_3DNOWEX -DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 "
|
||||
#define LIBNAME "opteron"
|
||||
#define CORENAME "OPTERON"
|
||||
@@ -327,7 +363,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#define ARCHCONFIG "-DOPTERON " \
|
||||
"-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=64 " \
|
||||
"-DL2_SIZE=1048576 -DL2_LINESIZE=64 " \
|
||||
"-DDTB_ENTRIES=32 -DDTB_SIZE=4096 -DHAVE_3DNOW " \
|
||||
"-DDTB_DEFAULT_ENTRIES=32 -DDTB_SIZE=4096 -DHAVE_3DNOW " \
|
||||
"-DHAVE_3DNOWEX -DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3"
|
||||
#define LIBNAME "opteron"
|
||||
#define CORENAME "OPTERON"
|
||||
@@ -341,13 +377,60 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#define ARCHCONFIG "-DBARCELONA " \
|
||||
"-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=64 " \
|
||||
"-DL2_SIZE=524288 -DL2_LINESIZE=64 -DL3_SIZE=2097152 " \
|
||||
"-DDTB_ENTRIES=48 -DDTB_SIZE=4096 -DHAVE_3DNOW " \
|
||||
"-DHAVE_3DNOWEX -DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3 " \
|
||||
"-DDTB_DEFAULT_ENTRIES=48 -DDTB_SIZE=4096 " \
|
||||
"-DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3 " \
|
||||
"-DHAVE_SSE4A -DHAVE_MISALIGNSSE -DHAVE_128BITFPU -DHAVE_FASTMOVU"
|
||||
#define LIBNAME "barcelona"
|
||||
#define CORENAME "BARCELONA"
|
||||
#endif
|
||||
|
||||
#if defined(FORCE_BOBCAT)
|
||||
#define FORCE
|
||||
#define FORCE_INTEL
|
||||
#define ARCHITECTURE "X86"
|
||||
#define SUBARCHITECTURE "BOBCAT"
|
||||
#define ARCHCONFIG "-DBOBCAT " \
|
||||
"-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 " \
|
||||
"-DL2_SIZE=524288 -DL2_LINESIZE=64 " \
|
||||
"-DDTB_DEFAULT_ENTRIES=40 -DDTB_SIZE=4096 " \
|
||||
"-DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3 -DHAVE_SSSE3 " \
|
||||
"-DHAVE_SSE4A -DHAVE_MISALIGNSSE -DHAVE_CFLUSH -DHAVE_CMOV"
|
||||
#define LIBNAME "bobcat"
|
||||
#define CORENAME "BOBCAT"
|
||||
#endif
|
||||
|
||||
#if defined (FORCE_BULLDOZER)
|
||||
#define FORCE
|
||||
#define FORCE_INTEL
|
||||
#define ARCHITECTURE "X86"
|
||||
#define SUBARCHITECTURE "BULLDOZER"
|
||||
#define ARCHCONFIG "-DBULLDOZER " \
|
||||
"-DL1_DATA_SIZE=49152 -DL1_DATA_LINESIZE=64 " \
|
||||
"-DL2_SIZE=1024000 -DL2_LINESIZE=64 -DL3_SIZE=16777216 " \
|
||||
"-DDTB_DEFAULT_ENTRIES=32 -DDTB_SIZE=4096 " \
|
||||
"-DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3 " \
|
||||
"-DHAVE_SSE4A -DHAVE_MISALIGNSSE -DHAVE_128BITFPU -DHAVE_FASTMOVU " \
|
||||
"-DHAVE_AVX -DHAVE_FMA4"
|
||||
#define LIBNAME "bulldozer"
|
||||
#define CORENAME "BULLDOZER"
|
||||
#endif
|
||||
|
||||
#if defined (FORCE_PILEDRIVER)
|
||||
#define FORCE
|
||||
#define FORCE_INTEL
|
||||
#define ARCHITECTURE "X86"
|
||||
#define SUBARCHITECTURE "PILEDRIVER"
|
||||
#define ARCHCONFIG "-DPILEDRIVER " \
|
||||
"-DL1_DATA_SIZE=16384 -DL1_DATA_LINESIZE=64 " \
|
||||
"-DL2_SIZE=2097152 -DL2_LINESIZE=64 -DL3_SIZE=12582912 " \
|
||||
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \
|
||||
"-DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3 -DHAVE_SSE4_1 -DHAVE_SSE4_2 " \
|
||||
"-DHAVE_SSE4A -DHAVE_MISALIGNSSE -DHAVE_128BITFPU -DHAVE_FASTMOVU -DHAVE_CFLUSH " \
|
||||
"-DHAVE_AVX -DHAVE_FMA4 -DHAVE_FMA3"
|
||||
#define LIBNAME "piledriver"
|
||||
#define CORENAME "PILEDRIVER"
|
||||
#endif
|
||||
|
||||
#ifdef FORCE_SSE_GENERIC
|
||||
#define FORCE
|
||||
#define FORCE_INTEL
|
||||
@@ -356,7 +439,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#define ARCHCONFIG "-DGENERIC " \
|
||||
"-DL1_DATA_SIZE=16384 -DL1_DATA_LINESIZE=64 " \
|
||||
"-DL2_SIZE=524288 -DL2_LINESIZE=64 " \
|
||||
"-DDTB_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 " \
|
||||
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 " \
|
||||
"-DHAVE_CMOV -DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2"
|
||||
#define LIBNAME "generic"
|
||||
#define CORENAME "GENERIC"
|
||||
@@ -370,7 +453,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#define ARCHCONFIG "-DVIAC3 " \
|
||||
"-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=32 " \
|
||||
"-DL2_SIZE=65536 -DL2_LINESIZE=32 " \
|
||||
"-DDTB_ENTRIES=128 -DDTB_SIZE=4096 " \
|
||||
"-DDTB_DEFAULT_ENTRIES=128 -DDTB_SIZE=4096 " \
|
||||
"-DHAVE_MMX -DHAVE_SSE "
|
||||
#define LIBNAME "viac3"
|
||||
#define CORENAME "VIAC3"
|
||||
@@ -384,7 +467,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#define ARCHCONFIG "-DNANO " \
|
||||
"-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=64 " \
|
||||
"-DL2_SIZE=1048576 -DL2_LINESIZE=64 " \
|
||||
"-DDTB_ENTRIES=128 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 " \
|
||||
"-DDTB_DEFAULT_ENTRIES=128 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 " \
|
||||
"-DHAVE_CMOV -DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3 -DHAVE_SSSE3"
|
||||
#define LIBNAME "nano"
|
||||
#define CORENAME "NANO"
|
||||
@@ -398,7 +481,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#define ARCHCONFIG "-DPOWER3 " \
|
||||
"-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=128 " \
|
||||
"-DL2_SIZE=2097152 -DL2_LINESIZE=128 " \
|
||||
"-DDTB_ENTRIES=256 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 "
|
||||
"-DDTB_DEFAULT_ENTRIES=256 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 "
|
||||
#define LIBNAME "power3"
|
||||
#define CORENAME "POWER3"
|
||||
#endif
|
||||
@@ -411,7 +494,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#define ARCHCONFIG "-DPOWER4 " \
|
||||
"-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=128 " \
|
||||
"-DL2_SIZE=1509949 -DL2_LINESIZE=128 " \
|
||||
"-DDTB_ENTRIES=128 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=6 "
|
||||
"-DDTB_DEFAULT_ENTRIES=128 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=6 "
|
||||
#define LIBNAME "power4"
|
||||
#define CORENAME "POWER4"
|
||||
#endif
|
||||
@@ -424,7 +507,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#define ARCHCONFIG "-DPOWER5 " \
|
||||
"-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=128 " \
|
||||
"-DL2_SIZE=1509949 -DL2_LINESIZE=128 " \
|
||||
"-DDTB_ENTRIES=128 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=6 "
|
||||
"-DDTB_DEFAULT_ENTRIES=128 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=6 "
|
||||
#define LIBNAME "power5"
|
||||
#define CORENAME "POWER5"
|
||||
#endif
|
||||
@@ -437,7 +520,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#define ARCHCONFIG "-DPOWER6 " \
|
||||
"-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=128 " \
|
||||
"-DL2_SIZE=4194304 -DL2_LINESIZE=128 " \
|
||||
"-DDTB_ENTRIES=128 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 "
|
||||
"-DDTB_DEFAULT_ENTRIES=128 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 "
|
||||
#define LIBNAME "power6"
|
||||
#define CORENAME "POWER6"
|
||||
#endif
|
||||
@@ -450,7 +533,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#define ARCHCONFIG "-DPPCG4 " \
|
||||
"-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=32 " \
|
||||
"-DL2_SIZE=262144 -DL2_LINESIZE=32 " \
|
||||
"-DDTB_ENTRIES=128 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 "
|
||||
"-DDTB_DEFAULT_ENTRIES=128 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 "
|
||||
#define LIBNAME "ppcg4"
|
||||
#define CORENAME "PPCG4"
|
||||
#endif
|
||||
@@ -463,7 +546,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#define ARCHCONFIG "-DPPC970 " \
|
||||
"-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=128 " \
|
||||
"-DL2_SIZE=512488 -DL2_LINESIZE=128 " \
|
||||
"-DDTB_ENTRIES=128 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 "
|
||||
"-DDTB_DEFAULT_ENTRIES=128 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 "
|
||||
#define LIBNAME "ppc970"
|
||||
#define CORENAME "PPC970"
|
||||
#endif
|
||||
@@ -476,7 +559,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#define ARCHCONFIG "-DPPC970 " \
|
||||
"-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=128 " \
|
||||
"-DL2_SIZE=1024976 -DL2_LINESIZE=128 " \
|
||||
"-DDTB_ENTRIES=128 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 "
|
||||
"-DDTB_DEFAULT_ENTRIES=128 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 "
|
||||
#define LIBNAME "ppc970mp"
|
||||
#define CORENAME "PPC970"
|
||||
#endif
|
||||
@@ -489,7 +572,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#define ARCHCONFIG "-DPPC440 " \
|
||||
"-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=32 " \
|
||||
"-DL2_SIZE=16384 -DL2_LINESIZE=128 " \
|
||||
"-DDTB_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=16 "
|
||||
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=16 "
|
||||
#define LIBNAME "ppc440"
|
||||
#define CORENAME "PPC440"
|
||||
#endif
|
||||
@@ -502,7 +585,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#define ARCHCONFIG "-DPPC440FP2 " \
|
||||
"-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=32 " \
|
||||
"-DL2_SIZE=16384 -DL2_LINESIZE=128 " \
|
||||
"-DDTB_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=16 "
|
||||
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=16 "
|
||||
#define LIBNAME "ppc440FP2"
|
||||
#define CORENAME "PPC440FP2"
|
||||
#endif
|
||||
@@ -515,7 +598,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#define ARCHCONFIG "-DCELL " \
|
||||
"-DL1_DATA_SIZE=262144 -DL1_DATA_LINESIZE=128 " \
|
||||
"-DL2_SIZE=512488 -DL2_LINESIZE=128 " \
|
||||
"-DDTB_ENTRIES=128 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 "
|
||||
"-DDTB_DEFAULT_ENTRIES=128 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 "
|
||||
#define LIBNAME "cell"
|
||||
#define CORENAME "CELL"
|
||||
#endif
|
||||
@@ -528,7 +611,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#define ARCHCONFIG "-DSICORTEX " \
|
||||
"-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=32 " \
|
||||
"-DL2_SIZE=512488 -DL2_LINESIZE=32 " \
|
||||
"-DDTB_ENTRIES=32 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 "
|
||||
"-DDTB_DEFAULT_ENTRIES=32 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 "
|
||||
#define LIBNAME "mips"
|
||||
#define CORENAME "sicortex"
|
||||
#endif
|
||||
@@ -542,12 +625,26 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#define ARCHCONFIG "-DLOONGSON3A " \
|
||||
"-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=32 " \
|
||||
"-DL2_SIZE=512488 -DL2_LINESIZE=32 " \
|
||||
"-DDTB_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=4 "
|
||||
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=4 "
|
||||
#define LIBNAME "loongson3a"
|
||||
#define CORENAME "LOONGSON3A"
|
||||
#else
|
||||
#endif
|
||||
|
||||
#ifdef FORCE_LOONGSON3B
|
||||
#define FORCE
|
||||
#define ARCHITECTURE "MIPS"
|
||||
#define SUBARCHITECTURE "LOONGSON3B"
|
||||
#define SUBDIRNAME "mips64"
|
||||
#define ARCHCONFIG "-DLOONGSON3B " \
|
||||
"-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=32 " \
|
||||
"-DL2_SIZE=512488 -DL2_LINESIZE=32 " \
|
||||
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=4 "
|
||||
#define LIBNAME "loongson3b"
|
||||
#define CORENAME "LOONGSON3B"
|
||||
#else
|
||||
#endif
|
||||
|
||||
#ifdef FORCE_ITANIUM2
|
||||
#define FORCE
|
||||
#define ARCHITECTURE "IA64"
|
||||
@@ -555,7 +652,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#define SUBDIRNAME "ia64"
|
||||
#define ARCHCONFIG "-DITANIUM2 " \
|
||||
"-DL1_DATA_SIZE=262144 -DL1_DATA_LINESIZE=128 " \
|
||||
"-DL2_SIZE=1572864 -DL2_LINESIZE=128 -DDTB_SIZE=16384 -DDTB_ENTRIES=128 "
|
||||
"-DL2_SIZE=1572864 -DL2_LINESIZE=128 -DDTB_SIZE=16384 -DDTB_DEFAULT_ENTRIES=128 "
|
||||
#define LIBNAME "itanium2"
|
||||
#define CORENAME "itanium2"
|
||||
#endif
|
||||
@@ -567,7 +664,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#define SUBDIRNAME "sparc"
|
||||
#define ARCHCONFIG "-DSPARC -DV9 " \
|
||||
"-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=64 " \
|
||||
"-DL2_SIZE=1572864 -DL2_LINESIZE=64 -DDTB_SIZE=8192 -DDTB_ENTRIES=64 "
|
||||
"-DL2_SIZE=1572864 -DL2_LINESIZE=64 -DDTB_SIZE=8192 -DDTB_DEFAULT_ENTRIES=64 "
|
||||
#define LIBNAME "sparc"
|
||||
#define CORENAME "sparc"
|
||||
#endif
|
||||
@@ -579,7 +676,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#define SUBDIRNAME "sparc"
|
||||
#define ARCHCONFIG "-DSPARC -DV7 " \
|
||||
"-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=64 " \
|
||||
"-DL2_SIZE=1572864 -DL2_LINESIZE=64 -DDTB_SIZE=8192 -DDTB_ENTRIES=64 "
|
||||
"-DL2_SIZE=1572864 -DL2_LINESIZE=64 -DDTB_SIZE=8192 -DDTB_DEFAULT_ENTRIES=64 "
|
||||
#define LIBNAME "sparcv7"
|
||||
#define CORENAME "sparcv7"
|
||||
#endif
|
||||
@@ -592,11 +689,57 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#define ARCHCONFIG "-DGENERIC " \
|
||||
"-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=128 " \
|
||||
"-DL2_SIZE=512488 -DL2_LINESIZE=128 " \
|
||||
"-DDTB_ENTRIES=128 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 "
|
||||
"-DDTB_DEFAULT_ENTRIES=128 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 "
|
||||
#define LIBNAME "generic"
|
||||
#define CORENAME "generic"
|
||||
#endif
|
||||
|
||||
#ifdef FORCE_ARMV7
|
||||
#define FORCE
|
||||
#define ARCHITECTURE "ARM"
|
||||
#define SUBARCHITECTURE "ARMV7"
|
||||
#define SUBDIRNAME "arm"
|
||||
#define ARCHCONFIG "-DARMV7 " \
|
||||
"-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=32 " \
|
||||
"-DL2_SIZE=512488 -DL2_LINESIZE=32 " \
|
||||
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=4 " \
|
||||
"-DHAVE_VFPV3 -DHAVE_VFP"
|
||||
#define LIBNAME "armv7"
|
||||
#define CORENAME "ARMV7"
|
||||
#else
|
||||
#endif
|
||||
|
||||
#ifdef FORCE_ARMV6
|
||||
#define FORCE
|
||||
#define ARCHITECTURE "ARM"
|
||||
#define SUBARCHITECTURE "ARMV6"
|
||||
#define SUBDIRNAME "arm"
|
||||
#define ARCHCONFIG "-DARMV6 " \
|
||||
"-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=32 " \
|
||||
"-DL2_SIZE=512488 -DL2_LINESIZE=32 " \
|
||||
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=4 " \
|
||||
"-DHAVE_VFP"
|
||||
#define LIBNAME "armv6"
|
||||
#define CORENAME "ARMV6"
|
||||
#else
|
||||
#endif
|
||||
|
||||
#ifdef FORCE_ARMV8
|
||||
#define FORCE
|
||||
#define ARCHITECTURE "ARM64"
|
||||
#define SUBARCHITECTURE "ARMV8"
|
||||
#define SUBDIRNAME "arm64"
|
||||
#define ARCHCONFIG "-DARMV8 " \
|
||||
"-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=32 " \
|
||||
"-DL2_SIZE=512488 -DL2_LINESIZE=32 " \
|
||||
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=4 " \
|
||||
"-DHAVE_VFP -DHAVE_VFPV3 -DHAVE_VFPV4"
|
||||
#define LIBNAME "armv8"
|
||||
#define CORENAME "ARMV8"
|
||||
#else
|
||||
#endif
|
||||
|
||||
|
||||
#ifndef FORCE
|
||||
|
||||
#if defined(__powerpc__) || defined(__powerpc) || defined(powerpc) || \
|
||||
@@ -604,30 +747,47 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#ifndef POWER
|
||||
#define POWER
|
||||
#endif
|
||||
#define OPENBLAS_SUPPORTED
|
||||
#endif
|
||||
|
||||
#if defined(__i386__) || (__x86_64__)
|
||||
#include "cpuid_x86.c"
|
||||
#define OPENBLAS_SUPPORTED
|
||||
#endif
|
||||
|
||||
#ifdef __ia64__
|
||||
#include "cpuid_ia64.c"
|
||||
#define OPENBLAS_SUPPORTED
|
||||
#endif
|
||||
|
||||
#ifdef __alpha
|
||||
#include "cpuid_alpha.c"
|
||||
#define OPENBLAS_SUPPORTED
|
||||
#endif
|
||||
|
||||
#ifdef POWER
|
||||
#include "cpuid_power.c"
|
||||
#define OPENBLAS_SUPPORTED
|
||||
#endif
|
||||
|
||||
#ifdef sparc
|
||||
#include "cpuid_sparc.c"
|
||||
#define OPENBLAS_SUPPORTED
|
||||
#endif
|
||||
|
||||
#ifdef __mips__
|
||||
#include "cpuid_mips.c"
|
||||
#define OPENBLAS_SUPPORTED
|
||||
#endif
|
||||
|
||||
#ifdef __arm__
|
||||
#include "cpuid_arm.c"
|
||||
#define OPENBLAS_SUPPORTED
|
||||
#endif
|
||||
|
||||
|
||||
#ifndef OPENBLAS_SUPPORTED
|
||||
#error "This arch/CPU is not supported by OpenBLAS."
|
||||
#endif
|
||||
|
||||
#else
|
||||
@@ -644,7 +804,8 @@ static int get_num_cores(void) {
|
||||
#endif
|
||||
|
||||
#ifdef linux
|
||||
return get_nprocs();
|
||||
//returns the number of processors which are currently online
|
||||
return sysconf(_SC_NPROCESSORS_ONLN);
|
||||
|
||||
#elif defined(OS_WINDOWS)
|
||||
|
||||
@@ -679,7 +840,7 @@ int main(int argc, char *argv[]){
|
||||
#ifdef FORCE
|
||||
printf("CORE=%s\n", CORENAME);
|
||||
#else
|
||||
#if defined(__i386__) || defined(__x86_64__) || defined(POWER) || defined(__mips__)
|
||||
#if defined(__i386__) || defined(__x86_64__) || defined(POWER) || defined(__mips__) || defined(__arm__)
|
||||
printf("CORE=%s\n", get_corename());
|
||||
#endif
|
||||
#endif
|
||||
@@ -694,6 +855,11 @@ int main(int argc, char *argv[]){
|
||||
|
||||
printf("NUM_CORES=%d\n", get_num_cores());
|
||||
|
||||
#if defined(__arm__) && !defined(FORCE)
|
||||
get_features();
|
||||
#endif
|
||||
|
||||
|
||||
#if defined(__i386__) || defined(__x86_64__)
|
||||
#ifndef FORCE
|
||||
get_sse();
|
||||
@@ -729,8 +895,12 @@ int main(int argc, char *argv[]){
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if NO_PARALLEL_MAKE==1
|
||||
printf("MAKE += -j 1\n");
|
||||
#else
|
||||
#ifndef OS_WINDOWS
|
||||
printf("MAKE += -j %d\n", get_num_cores());
|
||||
#endif
|
||||
#endif
|
||||
|
||||
break;
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
|
||||
int main(int argc, char **argv) {
|
||||
|
||||
if ((argc < 1) || (*argv[1] == '0')) {
|
||||
if ( (argc <= 1) || ((argc >= 2) && (*argv[1] == '0'))) {
|
||||
printf("SGEMM_UNROLL_M=%d\n", SGEMM_DEFAULT_UNROLL_M);
|
||||
printf("SGEMM_UNROLL_N=%d\n", SGEMM_DEFAULT_UNROLL_N);
|
||||
printf("DGEMM_UNROLL_M=%d\n", DGEMM_DEFAULT_UNROLL_M);
|
||||
@@ -22,14 +22,57 @@ int main(int argc, char **argv) {
|
||||
printf("ZGEMM_UNROLL_N=%d\n", ZGEMM_DEFAULT_UNROLL_N);
|
||||
printf("XGEMM_UNROLL_M=%d\n", XGEMM_DEFAULT_UNROLL_M);
|
||||
printf("XGEMM_UNROLL_N=%d\n", XGEMM_DEFAULT_UNROLL_N);
|
||||
|
||||
#ifdef CGEMM3M_DEFAULT_UNROLL_M
|
||||
printf("CGEMM3M_UNROLL_M=%d\n", CGEMM3M_DEFAULT_UNROLL_M);
|
||||
#else
|
||||
printf("CGEMM3M_UNROLL_M=%d\n", SGEMM_DEFAULT_UNROLL_M);
|
||||
#endif
|
||||
|
||||
#ifdef CGEMM3M_DEFAULT_UNROLL_N
|
||||
printf("CGEMM3M_UNROLL_N=%d\n", CGEMM3M_DEFAULT_UNROLL_N);
|
||||
#else
|
||||
printf("CGEMM3M_UNROLL_N=%d\n", SGEMM_DEFAULT_UNROLL_N);
|
||||
#endif
|
||||
|
||||
#ifdef ZGEMM3M_DEFAULT_UNROLL_M
|
||||
printf("ZGEMM3M_UNROLL_M=%d\n", ZGEMM3M_DEFAULT_UNROLL_M);
|
||||
#else
|
||||
printf("ZGEMM3M_UNROLL_M=%d\n", DGEMM_DEFAULT_UNROLL_M);
|
||||
#endif
|
||||
|
||||
#ifdef ZGEMM3M_DEFAULT_UNROLL_N
|
||||
printf("ZGEMM3M_UNROLL_N=%d\n", ZGEMM3M_DEFAULT_UNROLL_N);
|
||||
#else
|
||||
printf("ZGEMM3M_UNROLL_N=%d\n", DGEMM_DEFAULT_UNROLL_N);
|
||||
#endif
|
||||
|
||||
#ifdef XGEMM3M_DEFAULT_UNROLL_M
|
||||
printf("XGEMM3M_UNROLL_M=%d\n", ZGEMM3M_DEFAULT_UNROLL_M);
|
||||
#else
|
||||
printf("XGEMM3M_UNROLL_M=%d\n", QGEMM_DEFAULT_UNROLL_M);
|
||||
#endif
|
||||
|
||||
#ifdef XGEMM3M_DEFAULT_UNROLL_N
|
||||
printf("XGEMM3M_UNROLL_N=%d\n", ZGEMM3M_DEFAULT_UNROLL_N);
|
||||
#else
|
||||
printf("XGEMM3M_UNROLL_N=%d\n", QGEMM_DEFAULT_UNROLL_N);
|
||||
#endif
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
if ((argc >= 1) && (*argv[1] == '1')) {
|
||||
if ((argc >= 2) && (*argv[1] == '1')) {
|
||||
printf("#define SLOCAL_BUFFER_SIZE\t%ld\n", (SGEMM_DEFAULT_Q * SGEMM_DEFAULT_UNROLL_N * 4 * 1 * sizeof(float)));
|
||||
printf("#define DLOCAL_BUFFER_SIZE\t%ld\n", (DGEMM_DEFAULT_Q * DGEMM_DEFAULT_UNROLL_N * 2 * 1 * sizeof(double)));
|
||||
printf("#define CLOCAL_BUFFER_SIZE\t%ld\n", (CGEMM_DEFAULT_Q * CGEMM_DEFAULT_UNROLL_N * 4 * 2 * sizeof(float)));
|
||||
printf("#define ZLOCAL_BUFFER_SIZE\t%ld\n", (ZGEMM_DEFAULT_Q * ZGEMM_DEFAULT_UNROLL_N * 2 * 2 * sizeof(double)));
|
||||
|
||||
#ifdef USE64BITINT
|
||||
printf("#define USE64BITINT\n");
|
||||
#endif
|
||||
printf("#define GEMM_MULTITHREAD_THRESHOLD\t%ld\n", (long int)GEMM_MULTITHREAD_THRESHOLD);
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
||||
@@ -318,7 +318,7 @@ CZBLAS3OBJS = \
|
||||
|
||||
ifndef NO_CBLAS
|
||||
|
||||
CFLAGS += -I.
|
||||
override CFLAGS += -I.
|
||||
|
||||
SBLAS1OBJS += $(CSBLAS1OBJS)
|
||||
SBLAS2OBJS += $(CSBLAS2OBJS)
|
||||
@@ -400,7 +400,7 @@ all :: libs
|
||||
|
||||
ifdef FUNCTION_PROFILE
|
||||
$(BLASOBJS) $(BLASOBJS_P) : functable.h
|
||||
$(BLASOBJS) $(BLASOBJS_P) : CFLAGS += -DPROFILE_FUNC_NAME=interface_$(*F)
|
||||
$(BLASOBJS) $(BLASOBJS_P) : override CFLAGS += -DPROFILE_FUNC_NAME=interface_$(*F)
|
||||
|
||||
functable.h : Makefile
|
||||
./create $(FUNCALLFILES) > functable.h
|
||||
@@ -420,7 +420,7 @@ level3 : $(SBLAS3OBJS) $(DBLAS3OBJS) $(QBLAS3OBJS) $(CBLAS3OBJS) $(ZBLAS3OBJS) $
|
||||
$(AR) $(ARFLAGS) -ru $(TOPDIR)/$(LIBNAME) $^
|
||||
|
||||
$(CSBLASOBJS) $(CSBLASOBJS_P) $(CDBLASOBJS) $(CDBLASOBJS_P) $(CQBLASOBJS) $(CQBLASOBJS_P) \
|
||||
$(CCBLASOBJS) $(CCBLASOBJS_P) $(CZBLASOBJS) $(CZBLASOBJS_P) $(CXBLASOBJS) $(CXBLASOBJS_P) : CFLAGS += -DCBLAS
|
||||
$(CCBLASOBJS) $(CCBLASOBJS_P) $(CZBLASOBJS) $(CZBLASOBJS_P) $(CXBLASOBJS) $(CXBLASOBJS_P) : override CFLAGS += -DCBLAS
|
||||
|
||||
srot.$(SUFFIX) srot.$(PSUFFIX) : rot.c
|
||||
$(CC) $(CFLAGS) -c $< -o $(@F)
|
||||
@@ -770,20 +770,36 @@ xgeru.$(SUFFIX) xgeru.$(PSUFFIX) : zger.c
|
||||
xgerc.$(SUFFIX) xgerc.$(PSUFFIX) : zger.c
|
||||
$(CC) -c $(CFLAGS) -DCONJ $< -o $(@F)
|
||||
|
||||
ifndef USE_NETLIB_GEMV
|
||||
sgemv.$(SUFFIX) sgemv.$(PSUFFIX): gemv.c
|
||||
$(CC) -c $(CFLAGS) -o $(@F) $<
|
||||
|
||||
dgemv.$(SUFFIX) dgemv.$(PSUFFIX): gemv.c
|
||||
$(CC) -c $(CFLAGS) -o $(@F) $<
|
||||
else
|
||||
sgemv.$(SUFFIX) sgemv.$(PSUFFIX): netlib/sgemv.f
|
||||
$(FC) -c $(FFLAGS) -o $(@F) $<
|
||||
|
||||
dgemv.$(SUFFIX) dgemv.$(PSUFFIX): netlib/dgemv.f
|
||||
$(FC) -c $(FFLAGS) -o $(@F) $<
|
||||
endif
|
||||
|
||||
qgemv.$(SUFFIX) qgemv.$(PSUFFIX): gemv.c
|
||||
$(CC) -c $(CFLAGS) -o $(@F) $<
|
||||
|
||||
ifndef USE_NETLIB_GEMV
|
||||
cgemv.$(SUFFIX) cgemv.$(PSUFFIX): zgemv.c
|
||||
$(CC) -c $(CFLAGS) -o $(@F) $<
|
||||
|
||||
zgemv.$(SUFFIX) zgemv.$(PSUFFIX): zgemv.c
|
||||
$(CC) -c $(CFLAGS) -o $(@F) $<
|
||||
else
|
||||
cgemv.$(SUFFIX) cgemv.$(PSUFFIX): netlib/cgemv.f
|
||||
$(FC) -c $(FFLAGS) -o $(@F) $<
|
||||
|
||||
zgemv.$(SUFFIX) zgemv.$(PSUFFIX): netlib/zgemv.f
|
||||
$(FC) -c $(FFLAGS) -o $(@F) $<
|
||||
endif
|
||||
|
||||
xgemv.$(SUFFIX) xgemv.$(PSUFFIX): zgemv.c
|
||||
$(CC) -c $(CFLAGS) -o $(@F) $<
|
||||
|
||||
@@ -85,7 +85,11 @@ void CNAME(blasint n, FLOAT alpha, FLOAT *x, blasint incx, FLOAT *y, blasint inc
|
||||
//In that case, the threads would be dependent.
|
||||
if (incx == 0 || incy == 0)
|
||||
nthreads = 1;
|
||||
|
||||
|
||||
//Temporarily walk around the low performance issue with small imput size & multithreads.
|
||||
if (n <= 10000)
|
||||
nthreads = 1;
|
||||
|
||||
if (nthreads == 1) {
|
||||
#endif
|
||||
|
||||
|
||||
0
interface/create
Normal file → Executable file
0
interface/create
Normal file → Executable file
@@ -49,6 +49,7 @@ double NAME(blasint *N, float *x, blasint *INCX, float *y, blasint *INCY){
|
||||
BLASLONG n = *N;
|
||||
BLASLONG incx = *INCX;
|
||||
BLASLONG incy = *INCY;
|
||||
double ret = 0.0;
|
||||
|
||||
PRINT_DEBUG_NAME;
|
||||
|
||||
@@ -61,19 +62,21 @@ double NAME(blasint *N, float *x, blasint *INCX, float *y, blasint *INCY){
|
||||
if (incx < 0) x -= (n - 1) * incx;
|
||||
if (incy < 0) y -= (n - 1) * incy;
|
||||
|
||||
return DSDOT_K(n, x, incx, y, incy);
|
||||
ret=DSDOT_K(n, x, incx, y, incy);
|
||||
|
||||
FUNCTION_PROFILE_END(1, n, n);
|
||||
|
||||
IDEBUG_END;
|
||||
|
||||
return 0;
|
||||
return ret;
|
||||
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
double CNAME(blasint n, float *x, blasint incx, float *y, blasint incy){
|
||||
|
||||
double ret = 0.0;
|
||||
|
||||
PRINT_DEBUG_CNAME;
|
||||
|
||||
@@ -86,13 +89,13 @@ double CNAME(blasint n, float *x, blasint incx, float *y, blasint incy){
|
||||
if (incx < 0) x -= (n - 1) * incx;
|
||||
if (incy < 0) y -= (n - 1) * incy;
|
||||
|
||||
return DSDOT_K(n, x, incx, y, incy);
|
||||
ret=DSDOT_K(n, x, incx, y, incy);
|
||||
|
||||
FUNCTION_PROFILE_END(1, n, n);
|
||||
|
||||
IDEBUG_END;
|
||||
|
||||
return 0;
|
||||
return ret;
|
||||
|
||||
}
|
||||
|
||||
|
||||
@@ -71,6 +71,10 @@
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifndef GEMM_MULTITHREAD_THRESHOLD
|
||||
# define GEMM_MULTITHREAD_THRESHOLD 4
|
||||
#endif
|
||||
|
||||
static int (*gemm[])(blas_arg_t *, BLASLONG *, BLASLONG *, FLOAT *, FLOAT *, BLASLONG) = {
|
||||
#ifndef GEMM3M
|
||||
GEMM_NN, GEMM_TN, GEMM_RN, GEMM_CN,
|
||||
@@ -397,8 +401,13 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANS
|
||||
mode |= (transb << BLAS_TRANSB_SHIFT);
|
||||
|
||||
args.common = NULL;
|
||||
args.nthreads = num_cpu_avail(3);
|
||||
|
||||
if(args.m <= GEMM_MULTITHREAD_THRESHOLD || args.n <= GEMM_MULTITHREAD_THRESHOLD
|
||||
|| args.k <=GEMM_MULTITHREAD_THRESHOLD){
|
||||
args.nthreads = 1;
|
||||
}else{
|
||||
args.nthreads = num_cpu_avail(3);
|
||||
}
|
||||
if (args.nthreads == 1) {
|
||||
#endif
|
||||
|
||||
|
||||
285
interface/netlib/cgemv.f
Normal file
285
interface/netlib/cgemv.f
Normal file
@@ -0,0 +1,285 @@
|
||||
SUBROUTINE CGEMV(TRANS,M,N,ALPHA,A,LDA,X,INCX,BETA,Y,INCY)
|
||||
* .. Scalar Arguments ..
|
||||
COMPLEX ALPHA,BETA
|
||||
INTEGER INCX,INCY,LDA,M,N
|
||||
CHARACTER TRANS
|
||||
* ..
|
||||
* .. Array Arguments ..
|
||||
COMPLEX A(LDA,*),X(*),Y(*)
|
||||
* ..
|
||||
*
|
||||
* Purpose
|
||||
* =======
|
||||
*
|
||||
* CGEMV performs one of the matrix-vector operations
|
||||
*
|
||||
* y := alpha*A*x + beta*y, or y := alpha*A**T*x + beta*y, or
|
||||
*
|
||||
* y := alpha*A**H*x + beta*y,
|
||||
*
|
||||
* where alpha and beta are scalars, x and y are vectors and A is an
|
||||
* m by n matrix.
|
||||
*
|
||||
* Arguments
|
||||
* ==========
|
||||
*
|
||||
* TRANS - CHARACTER*1.
|
||||
* On entry, TRANS specifies the operation to be performed as
|
||||
* follows:
|
||||
*
|
||||
* TRANS = 'N' or 'n' y := alpha*A*x + beta*y.
|
||||
*
|
||||
* TRANS = 'T' or 't' y := alpha*A**T*x + beta*y.
|
||||
*
|
||||
* TRANS = 'C' or 'c' y := alpha*A**H*x + beta*y.
|
||||
*
|
||||
* Unchanged on exit.
|
||||
*
|
||||
* M - INTEGER.
|
||||
* On entry, M specifies the number of rows of the matrix A.
|
||||
* M must be at least zero.
|
||||
* Unchanged on exit.
|
||||
*
|
||||
* N - INTEGER.
|
||||
* On entry, N specifies the number of columns of the matrix A.
|
||||
* N must be at least zero.
|
||||
* Unchanged on exit.
|
||||
*
|
||||
* ALPHA - COMPLEX .
|
||||
* On entry, ALPHA specifies the scalar alpha.
|
||||
* Unchanged on exit.
|
||||
*
|
||||
* A - COMPLEX array of DIMENSION ( LDA, n ).
|
||||
* Before entry, the leading m by n part of the array A must
|
||||
* contain the matrix of coefficients.
|
||||
* Unchanged on exit.
|
||||
*
|
||||
* LDA - INTEGER.
|
||||
* On entry, LDA specifies the first dimension of A as declared
|
||||
* in the calling (sub) program. LDA must be at least
|
||||
* max( 1, m ).
|
||||
* Unchanged on exit.
|
||||
*
|
||||
* X - COMPLEX array of DIMENSION at least
|
||||
* ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n'
|
||||
* and at least
|
||||
* ( 1 + ( m - 1 )*abs( INCX ) ) otherwise.
|
||||
* Before entry, the incremented array X must contain the
|
||||
* vector x.
|
||||
* Unchanged on exit.
|
||||
*
|
||||
* INCX - INTEGER.
|
||||
* On entry, INCX specifies the increment for the elements of
|
||||
* X. INCX must not be zero.
|
||||
* Unchanged on exit.
|
||||
*
|
||||
* BETA - COMPLEX .
|
||||
* On entry, BETA specifies the scalar beta. When BETA is
|
||||
* supplied as zero then Y need not be set on input.
|
||||
* Unchanged on exit.
|
||||
*
|
||||
* Y - COMPLEX array of DIMENSION at least
|
||||
* ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n'
|
||||
* and at least
|
||||
* ( 1 + ( n - 1 )*abs( INCY ) ) otherwise.
|
||||
* Before entry with BETA non-zero, the incremented array Y
|
||||
* must contain the vector y. On exit, Y is overwritten by the
|
||||
* updated vector y.
|
||||
*
|
||||
* INCY - INTEGER.
|
||||
* On entry, INCY specifies the increment for the elements of
|
||||
* Y. INCY must not be zero.
|
||||
* Unchanged on exit.
|
||||
*
|
||||
* Further Details
|
||||
* ===============
|
||||
*
|
||||
* Level 2 Blas routine.
|
||||
* The vector and matrix arguments are not referenced when N = 0, or M = 0
|
||||
*
|
||||
* -- Written on 22-October-1986.
|
||||
* Jack Dongarra, Argonne National Lab.
|
||||
* Jeremy Du Croz, Nag Central Office.
|
||||
* Sven Hammarling, Nag Central Office.
|
||||
* Richard Hanson, Sandia National Labs.
|
||||
*
|
||||
* =====================================================================
|
||||
*
|
||||
* .. Parameters ..
|
||||
COMPLEX ONE
|
||||
PARAMETER (ONE= (1.0E+0,0.0E+0))
|
||||
COMPLEX ZERO
|
||||
PARAMETER (ZERO= (0.0E+0,0.0E+0))
|
||||
* ..
|
||||
* .. Local Scalars ..
|
||||
COMPLEX TEMP
|
||||
INTEGER I,INFO,IX,IY,J,JX,JY,KX,KY,LENX,LENY
|
||||
LOGICAL NOCONJ
|
||||
* ..
|
||||
* .. External Functions ..
|
||||
LOGICAL LSAME
|
||||
EXTERNAL LSAME
|
||||
* ..
|
||||
* .. External Subroutines ..
|
||||
EXTERNAL XERBLA
|
||||
* ..
|
||||
* .. Intrinsic Functions ..
|
||||
INTRINSIC CONJG,MAX
|
||||
* ..
|
||||
*
|
||||
* Test the input parameters.
|
||||
*
|
||||
INFO = 0
|
||||
IF (.NOT.LSAME(TRANS,'N') .AND. .NOT.LSAME(TRANS,'T') .AND.
|
||||
+ .NOT.LSAME(TRANS,'C')) THEN
|
||||
INFO = 1
|
||||
ELSE IF (M.LT.0) THEN
|
||||
INFO = 2
|
||||
ELSE IF (N.LT.0) THEN
|
||||
INFO = 3
|
||||
ELSE IF (LDA.LT.MAX(1,M)) THEN
|
||||
INFO = 6
|
||||
ELSE IF (INCX.EQ.0) THEN
|
||||
INFO = 8
|
||||
ELSE IF (INCY.EQ.0) THEN
|
||||
INFO = 11
|
||||
END IF
|
||||
IF (INFO.NE.0) THEN
|
||||
CALL XERBLA('CGEMV ',INFO)
|
||||
RETURN
|
||||
END IF
|
||||
*
|
||||
* Quick return if possible.
|
||||
*
|
||||
IF ((M.EQ.0) .OR. (N.EQ.0) .OR.
|
||||
+ ((ALPHA.EQ.ZERO).AND. (BETA.EQ.ONE))) RETURN
|
||||
*
|
||||
NOCONJ = LSAME(TRANS,'T')
|
||||
*
|
||||
* Set LENX and LENY, the lengths of the vectors x and y, and set
|
||||
* up the start points in X and Y.
|
||||
*
|
||||
IF (LSAME(TRANS,'N')) THEN
|
||||
LENX = N
|
||||
LENY = M
|
||||
ELSE
|
||||
LENX = M
|
||||
LENY = N
|
||||
END IF
|
||||
IF (INCX.GT.0) THEN
|
||||
KX = 1
|
||||
ELSE
|
||||
KX = 1 - (LENX-1)*INCX
|
||||
END IF
|
||||
IF (INCY.GT.0) THEN
|
||||
KY = 1
|
||||
ELSE
|
||||
KY = 1 - (LENY-1)*INCY
|
||||
END IF
|
||||
*
|
||||
* Start the operations. In this version the elements of A are
|
||||
* accessed sequentially with one pass through A.
|
||||
*
|
||||
* First form y := beta*y.
|
||||
*
|
||||
IF (BETA.NE.ONE) THEN
|
||||
IF (INCY.EQ.1) THEN
|
||||
IF (BETA.EQ.ZERO) THEN
|
||||
DO 10 I = 1,LENY
|
||||
Y(I) = ZERO
|
||||
10 CONTINUE
|
||||
ELSE
|
||||
DO 20 I = 1,LENY
|
||||
Y(I) = BETA*Y(I)
|
||||
20 CONTINUE
|
||||
END IF
|
||||
ELSE
|
||||
IY = KY
|
||||
IF (BETA.EQ.ZERO) THEN
|
||||
DO 30 I = 1,LENY
|
||||
Y(IY) = ZERO
|
||||
IY = IY + INCY
|
||||
30 CONTINUE
|
||||
ELSE
|
||||
DO 40 I = 1,LENY
|
||||
Y(IY) = BETA*Y(IY)
|
||||
IY = IY + INCY
|
||||
40 CONTINUE
|
||||
END IF
|
||||
END IF
|
||||
END IF
|
||||
IF (ALPHA.EQ.ZERO) RETURN
|
||||
IF (LSAME(TRANS,'N')) THEN
|
||||
*
|
||||
* Form y := alpha*A*x + y.
|
||||
*
|
||||
JX = KX
|
||||
IF (INCY.EQ.1) THEN
|
||||
DO 60 J = 1,N
|
||||
IF (X(JX).NE.ZERO) THEN
|
||||
TEMP = ALPHA*X(JX)
|
||||
DO 50 I = 1,M
|
||||
Y(I) = Y(I) + TEMP*A(I,J)
|
||||
50 CONTINUE
|
||||
END IF
|
||||
JX = JX + INCX
|
||||
60 CONTINUE
|
||||
ELSE
|
||||
DO 80 J = 1,N
|
||||
IF (X(JX).NE.ZERO) THEN
|
||||
TEMP = ALPHA*X(JX)
|
||||
IY = KY
|
||||
DO 70 I = 1,M
|
||||
Y(IY) = Y(IY) + TEMP*A(I,J)
|
||||
IY = IY + INCY
|
||||
70 CONTINUE
|
||||
END IF
|
||||
JX = JX + INCX
|
||||
80 CONTINUE
|
||||
END IF
|
||||
ELSE
|
||||
*
|
||||
* Form y := alpha*A**T*x + y or y := alpha*A**H*x + y.
|
||||
*
|
||||
JY = KY
|
||||
IF (INCX.EQ.1) THEN
|
||||
DO 110 J = 1,N
|
||||
TEMP = ZERO
|
||||
IF (NOCONJ) THEN
|
||||
DO 90 I = 1,M
|
||||
TEMP = TEMP + A(I,J)*X(I)
|
||||
90 CONTINUE
|
||||
ELSE
|
||||
DO 100 I = 1,M
|
||||
TEMP = TEMP + CONJG(A(I,J))*X(I)
|
||||
100 CONTINUE
|
||||
END IF
|
||||
Y(JY) = Y(JY) + ALPHA*TEMP
|
||||
JY = JY + INCY
|
||||
110 CONTINUE
|
||||
ELSE
|
||||
DO 140 J = 1,N
|
||||
TEMP = ZERO
|
||||
IX = KX
|
||||
IF (NOCONJ) THEN
|
||||
DO 120 I = 1,M
|
||||
TEMP = TEMP + A(I,J)*X(IX)
|
||||
IX = IX + INCX
|
||||
120 CONTINUE
|
||||
ELSE
|
||||
DO 130 I = 1,M
|
||||
TEMP = TEMP + CONJG(A(I,J))*X(IX)
|
||||
IX = IX + INCX
|
||||
130 CONTINUE
|
||||
END IF
|
||||
Y(JY) = Y(JY) + ALPHA*TEMP
|
||||
JY = JY + INCY
|
||||
140 CONTINUE
|
||||
END IF
|
||||
END IF
|
||||
*
|
||||
RETURN
|
||||
*
|
||||
* End of CGEMV .
|
||||
*
|
||||
END
|
||||
265
interface/netlib/dgemv.f
Normal file
265
interface/netlib/dgemv.f
Normal file
@@ -0,0 +1,265 @@
|
||||
SUBROUTINE DGEMV(TRANS,M,N,ALPHA,A,LDA,X,INCX,BETA,Y,INCY)
|
||||
* .. Scalar Arguments ..
|
||||
DOUBLE PRECISION ALPHA,BETA
|
||||
INTEGER INCX,INCY,LDA,M,N
|
||||
CHARACTER TRANS
|
||||
* ..
|
||||
* .. Array Arguments ..
|
||||
DOUBLE PRECISION A(LDA,*),X(*),Y(*)
|
||||
* ..
|
||||
*
|
||||
* Purpose
|
||||
* =======
|
||||
*
|
||||
* DGEMV performs one of the matrix-vector operations
|
||||
*
|
||||
* y := alpha*A*x + beta*y, or y := alpha*A**T*x + beta*y,
|
||||
*
|
||||
* where alpha and beta are scalars, x and y are vectors and A is an
|
||||
* m by n matrix.
|
||||
*
|
||||
* Arguments
|
||||
* ==========
|
||||
*
|
||||
* TRANS - CHARACTER*1.
|
||||
* On entry, TRANS specifies the operation to be performed as
|
||||
* follows:
|
||||
*
|
||||
* TRANS = 'N' or 'n' y := alpha*A*x + beta*y.
|
||||
*
|
||||
* TRANS = 'T' or 't' y := alpha*A**T*x + beta*y.
|
||||
*
|
||||
* TRANS = 'C' or 'c' y := alpha*A**T*x + beta*y.
|
||||
*
|
||||
* Unchanged on exit.
|
||||
*
|
||||
* M - INTEGER.
|
||||
* On entry, M specifies the number of rows of the matrix A.
|
||||
* M must be at least zero.
|
||||
* Unchanged on exit.
|
||||
*
|
||||
* N - INTEGER.
|
||||
* On entry, N specifies the number of columns of the matrix A.
|
||||
* N must be at least zero.
|
||||
* Unchanged on exit.
|
||||
*
|
||||
* ALPHA - DOUBLE PRECISION.
|
||||
* On entry, ALPHA specifies the scalar alpha.
|
||||
* Unchanged on exit.
|
||||
*
|
||||
* A - DOUBLE PRECISION array of DIMENSION ( LDA, n ).
|
||||
* Before entry, the leading m by n part of the array A must
|
||||
* contain the matrix of coefficients.
|
||||
* Unchanged on exit.
|
||||
*
|
||||
* LDA - INTEGER.
|
||||
* On entry, LDA specifies the first dimension of A as declared
|
||||
* in the calling (sub) program. LDA must be at least
|
||||
* max( 1, m ).
|
||||
* Unchanged on exit.
|
||||
*
|
||||
* X - DOUBLE PRECISION array of DIMENSION at least
|
||||
* ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n'
|
||||
* and at least
|
||||
* ( 1 + ( m - 1 )*abs( INCX ) ) otherwise.
|
||||
* Before entry, the incremented array X must contain the
|
||||
* vector x.
|
||||
* Unchanged on exit.
|
||||
*
|
||||
* INCX - INTEGER.
|
||||
* On entry, INCX specifies the increment for the elements of
|
||||
* X. INCX must not be zero.
|
||||
* Unchanged on exit.
|
||||
*
|
||||
* BETA - DOUBLE PRECISION.
|
||||
* On entry, BETA specifies the scalar beta. When BETA is
|
||||
* supplied as zero then Y need not be set on input.
|
||||
* Unchanged on exit.
|
||||
*
|
||||
* Y - DOUBLE PRECISION array of DIMENSION at least
|
||||
* ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n'
|
||||
* and at least
|
||||
* ( 1 + ( n - 1 )*abs( INCY ) ) otherwise.
|
||||
* Before entry with BETA non-zero, the incremented array Y
|
||||
* must contain the vector y. On exit, Y is overwritten by the
|
||||
* updated vector y.
|
||||
*
|
||||
* INCY - INTEGER.
|
||||
* On entry, INCY specifies the increment for the elements of
|
||||
* Y. INCY must not be zero.
|
||||
* Unchanged on exit.
|
||||
*
|
||||
* Further Details
|
||||
* ===============
|
||||
*
|
||||
* Level 2 Blas routine.
|
||||
* The vector and matrix arguments are not referenced when N = 0, or M = 0
|
||||
*
|
||||
* -- Written on 22-October-1986.
|
||||
* Jack Dongarra, Argonne National Lab.
|
||||
* Jeremy Du Croz, Nag Central Office.
|
||||
* Sven Hammarling, Nag Central Office.
|
||||
* Richard Hanson, Sandia National Labs.
|
||||
*
|
||||
* =====================================================================
|
||||
*
|
||||
* .. Parameters ..
|
||||
DOUBLE PRECISION ONE,ZERO
|
||||
PARAMETER (ONE=1.0D+0,ZERO=0.0D+0)
|
||||
* ..
|
||||
* .. Local Scalars ..
|
||||
DOUBLE PRECISION TEMP
|
||||
INTEGER I,INFO,IX,IY,J,JX,JY,KX,KY,LENX,LENY
|
||||
* ..
|
||||
* .. External Functions ..
|
||||
LOGICAL LSAME
|
||||
EXTERNAL LSAME
|
||||
* ..
|
||||
* .. External Subroutines ..
|
||||
EXTERNAL XERBLA
|
||||
* ..
|
||||
* .. Intrinsic Functions ..
|
||||
INTRINSIC MAX
|
||||
* ..
|
||||
*
|
||||
* Test the input parameters.
|
||||
*
|
||||
INFO = 0
|
||||
IF (.NOT.LSAME(TRANS,'N') .AND. .NOT.LSAME(TRANS,'T') .AND.
|
||||
+ .NOT.LSAME(TRANS,'C')) THEN
|
||||
INFO = 1
|
||||
ELSE IF (M.LT.0) THEN
|
||||
INFO = 2
|
||||
ELSE IF (N.LT.0) THEN
|
||||
INFO = 3
|
||||
ELSE IF (LDA.LT.MAX(1,M)) THEN
|
||||
INFO = 6
|
||||
ELSE IF (INCX.EQ.0) THEN
|
||||
INFO = 8
|
||||
ELSE IF (INCY.EQ.0) THEN
|
||||
INFO = 11
|
||||
END IF
|
||||
IF (INFO.NE.0) THEN
|
||||
CALL XERBLA('DGEMV ',INFO)
|
||||
RETURN
|
||||
END IF
|
||||
*
|
||||
* Quick return if possible.
|
||||
*
|
||||
IF ((M.EQ.0) .OR. (N.EQ.0) .OR.
|
||||
+ ((ALPHA.EQ.ZERO).AND. (BETA.EQ.ONE))) RETURN
|
||||
*
|
||||
* Set LENX and LENY, the lengths of the vectors x and y, and set
|
||||
* up the start points in X and Y.
|
||||
*
|
||||
IF (LSAME(TRANS,'N')) THEN
|
||||
LENX = N
|
||||
LENY = M
|
||||
ELSE
|
||||
LENX = M
|
||||
LENY = N
|
||||
END IF
|
||||
IF (INCX.GT.0) THEN
|
||||
KX = 1
|
||||
ELSE
|
||||
KX = 1 - (LENX-1)*INCX
|
||||
END IF
|
||||
IF (INCY.GT.0) THEN
|
||||
KY = 1
|
||||
ELSE
|
||||
KY = 1 - (LENY-1)*INCY
|
||||
END IF
|
||||
*
|
||||
* Start the operations. In this version the elements of A are
|
||||
* accessed sequentially with one pass through A.
|
||||
*
|
||||
* First form y := beta*y.
|
||||
*
|
||||
IF (BETA.NE.ONE) THEN
|
||||
IF (INCY.EQ.1) THEN
|
||||
IF (BETA.EQ.ZERO) THEN
|
||||
DO 10 I = 1,LENY
|
||||
Y(I) = ZERO
|
||||
10 CONTINUE
|
||||
ELSE
|
||||
DO 20 I = 1,LENY
|
||||
Y(I) = BETA*Y(I)
|
||||
20 CONTINUE
|
||||
END IF
|
||||
ELSE
|
||||
IY = KY
|
||||
IF (BETA.EQ.ZERO) THEN
|
||||
DO 30 I = 1,LENY
|
||||
Y(IY) = ZERO
|
||||
IY = IY + INCY
|
||||
30 CONTINUE
|
||||
ELSE
|
||||
DO 40 I = 1,LENY
|
||||
Y(IY) = BETA*Y(IY)
|
||||
IY = IY + INCY
|
||||
40 CONTINUE
|
||||
END IF
|
||||
END IF
|
||||
END IF
|
||||
IF (ALPHA.EQ.ZERO) RETURN
|
||||
IF (LSAME(TRANS,'N')) THEN
|
||||
*
|
||||
* Form y := alpha*A*x + y.
|
||||
*
|
||||
JX = KX
|
||||
IF (INCY.EQ.1) THEN
|
||||
DO 60 J = 1,N
|
||||
IF (X(JX).NE.ZERO) THEN
|
||||
TEMP = ALPHA*X(JX)
|
||||
DO 50 I = 1,M
|
||||
Y(I) = Y(I) + TEMP*A(I,J)
|
||||
50 CONTINUE
|
||||
END IF
|
||||
JX = JX + INCX
|
||||
60 CONTINUE
|
||||
ELSE
|
||||
DO 80 J = 1,N
|
||||
IF (X(JX).NE.ZERO) THEN
|
||||
TEMP = ALPHA*X(JX)
|
||||
IY = KY
|
||||
DO 70 I = 1,M
|
||||
Y(IY) = Y(IY) + TEMP*A(I,J)
|
||||
IY = IY + INCY
|
||||
70 CONTINUE
|
||||
END IF
|
||||
JX = JX + INCX
|
||||
80 CONTINUE
|
||||
END IF
|
||||
ELSE
|
||||
*
|
||||
* Form y := alpha*A**T*x + y.
|
||||
*
|
||||
JY = KY
|
||||
IF (INCX.EQ.1) THEN
|
||||
DO 100 J = 1,N
|
||||
TEMP = ZERO
|
||||
DO 90 I = 1,M
|
||||
TEMP = TEMP + A(I,J)*X(I)
|
||||
90 CONTINUE
|
||||
Y(JY) = Y(JY) + ALPHA*TEMP
|
||||
JY = JY + INCY
|
||||
100 CONTINUE
|
||||
ELSE
|
||||
DO 120 J = 1,N
|
||||
TEMP = ZERO
|
||||
IX = KX
|
||||
DO 110 I = 1,M
|
||||
TEMP = TEMP + A(I,J)*X(IX)
|
||||
IX = IX + INCX
|
||||
110 CONTINUE
|
||||
Y(JY) = Y(JY) + ALPHA*TEMP
|
||||
JY = JY + INCY
|
||||
120 CONTINUE
|
||||
END IF
|
||||
END IF
|
||||
*
|
||||
RETURN
|
||||
*
|
||||
* End of DGEMV .
|
||||
*
|
||||
END
|
||||
265
interface/netlib/sgemv.f
Normal file
265
interface/netlib/sgemv.f
Normal file
@@ -0,0 +1,265 @@
|
||||
SUBROUTINE SGEMV(TRANS,M,N,ALPHA,A,LDA,X,INCX,BETA,Y,INCY)
|
||||
* .. Scalar Arguments ..
|
||||
REAL ALPHA,BETA
|
||||
INTEGER INCX,INCY,LDA,M,N
|
||||
CHARACTER TRANS
|
||||
* ..
|
||||
* .. Array Arguments ..
|
||||
REAL A(LDA,*),X(*),Y(*)
|
||||
* ..
|
||||
*
|
||||
* Purpose
|
||||
* =======
|
||||
*
|
||||
* SGEMV performs one of the matrix-vector operations
|
||||
*
|
||||
* y := alpha*A*x + beta*y, or y := alpha*A**T*x + beta*y,
|
||||
*
|
||||
* where alpha and beta are scalars, x and y are vectors and A is an
|
||||
* m by n matrix.
|
||||
*
|
||||
* Arguments
|
||||
* ==========
|
||||
*
|
||||
* TRANS - CHARACTER*1.
|
||||
* On entry, TRANS specifies the operation to be performed as
|
||||
* follows:
|
||||
*
|
||||
* TRANS = 'N' or 'n' y := alpha*A*x + beta*y.
|
||||
*
|
||||
* TRANS = 'T' or 't' y := alpha*A**T*x + beta*y.
|
||||
*
|
||||
* TRANS = 'C' or 'c' y := alpha*A**T*x + beta*y.
|
||||
*
|
||||
* Unchanged on exit.
|
||||
*
|
||||
* M - INTEGER.
|
||||
* On entry, M specifies the number of rows of the matrix A.
|
||||
* M must be at least zero.
|
||||
* Unchanged on exit.
|
||||
*
|
||||
* N - INTEGER.
|
||||
* On entry, N specifies the number of columns of the matrix A.
|
||||
* N must be at least zero.
|
||||
* Unchanged on exit.
|
||||
*
|
||||
* ALPHA - REAL .
|
||||
* On entry, ALPHA specifies the scalar alpha.
|
||||
* Unchanged on exit.
|
||||
*
|
||||
* A - REAL array of DIMENSION ( LDA, n ).
|
||||
* Before entry, the leading m by n part of the array A must
|
||||
* contain the matrix of coefficients.
|
||||
* Unchanged on exit.
|
||||
*
|
||||
* LDA - INTEGER.
|
||||
* On entry, LDA specifies the first dimension of A as declared
|
||||
* in the calling (sub) program. LDA must be at least
|
||||
* max( 1, m ).
|
||||
* Unchanged on exit.
|
||||
*
|
||||
* X - REAL array of DIMENSION at least
|
||||
* ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n'
|
||||
* and at least
|
||||
* ( 1 + ( m - 1 )*abs( INCX ) ) otherwise.
|
||||
* Before entry, the incremented array X must contain the
|
||||
* vector x.
|
||||
* Unchanged on exit.
|
||||
*
|
||||
* INCX - INTEGER.
|
||||
* On entry, INCX specifies the increment for the elements of
|
||||
* X. INCX must not be zero.
|
||||
* Unchanged on exit.
|
||||
*
|
||||
* BETA - REAL .
|
||||
* On entry, BETA specifies the scalar beta. When BETA is
|
||||
* supplied as zero then Y need not be set on input.
|
||||
* Unchanged on exit.
|
||||
*
|
||||
* Y - REAL array of DIMENSION at least
|
||||
* ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n'
|
||||
* and at least
|
||||
* ( 1 + ( n - 1 )*abs( INCY ) ) otherwise.
|
||||
* Before entry with BETA non-zero, the incremented array Y
|
||||
* must contain the vector y. On exit, Y is overwritten by the
|
||||
* updated vector y.
|
||||
*
|
||||
* INCY - INTEGER.
|
||||
* On entry, INCY specifies the increment for the elements of
|
||||
* Y. INCY must not be zero.
|
||||
* Unchanged on exit.
|
||||
*
|
||||
* Further Details
|
||||
* ===============
|
||||
*
|
||||
* Level 2 Blas routine.
|
||||
* The vector and matrix arguments are not referenced when N = 0, or M = 0
|
||||
*
|
||||
* -- Written on 22-October-1986.
|
||||
* Jack Dongarra, Argonne National Lab.
|
||||
* Jeremy Du Croz, Nag Central Office.
|
||||
* Sven Hammarling, Nag Central Office.
|
||||
* Richard Hanson, Sandia National Labs.
|
||||
*
|
||||
* =====================================================================
|
||||
*
|
||||
* .. Parameters ..
|
||||
REAL ONE,ZERO
|
||||
PARAMETER (ONE=1.0E+0,ZERO=0.0E+0)
|
||||
* ..
|
||||
* .. Local Scalars ..
|
||||
REAL TEMP
|
||||
INTEGER I,INFO,IX,IY,J,JX,JY,KX,KY,LENX,LENY
|
||||
* ..
|
||||
* .. External Functions ..
|
||||
LOGICAL LSAME
|
||||
EXTERNAL LSAME
|
||||
* ..
|
||||
* .. External Subroutines ..
|
||||
EXTERNAL XERBLA
|
||||
* ..
|
||||
* .. Intrinsic Functions ..
|
||||
INTRINSIC MAX
|
||||
* ..
|
||||
*
|
||||
* Test the input parameters.
|
||||
*
|
||||
INFO = 0
|
||||
IF (.NOT.LSAME(TRANS,'N') .AND. .NOT.LSAME(TRANS,'T') .AND.
|
||||
+ .NOT.LSAME(TRANS,'C')) THEN
|
||||
INFO = 1
|
||||
ELSE IF (M.LT.0) THEN
|
||||
INFO = 2
|
||||
ELSE IF (N.LT.0) THEN
|
||||
INFO = 3
|
||||
ELSE IF (LDA.LT.MAX(1,M)) THEN
|
||||
INFO = 6
|
||||
ELSE IF (INCX.EQ.0) THEN
|
||||
INFO = 8
|
||||
ELSE IF (INCY.EQ.0) THEN
|
||||
INFO = 11
|
||||
END IF
|
||||
IF (INFO.NE.0) THEN
|
||||
CALL XERBLA('SGEMV ',INFO)
|
||||
RETURN
|
||||
END IF
|
||||
*
|
||||
* Quick return if possible.
|
||||
*
|
||||
IF ((M.EQ.0) .OR. (N.EQ.0) .OR.
|
||||
+ ((ALPHA.EQ.ZERO).AND. (BETA.EQ.ONE))) RETURN
|
||||
*
|
||||
* Set LENX and LENY, the lengths of the vectors x and y, and set
|
||||
* up the start points in X and Y.
|
||||
*
|
||||
IF (LSAME(TRANS,'N')) THEN
|
||||
LENX = N
|
||||
LENY = M
|
||||
ELSE
|
||||
LENX = M
|
||||
LENY = N
|
||||
END IF
|
||||
IF (INCX.GT.0) THEN
|
||||
KX = 1
|
||||
ELSE
|
||||
KX = 1 - (LENX-1)*INCX
|
||||
END IF
|
||||
IF (INCY.GT.0) THEN
|
||||
KY = 1
|
||||
ELSE
|
||||
KY = 1 - (LENY-1)*INCY
|
||||
END IF
|
||||
*
|
||||
* Start the operations. In this version the elements of A are
|
||||
* accessed sequentially with one pass through A.
|
||||
*
|
||||
* First form y := beta*y.
|
||||
*
|
||||
IF (BETA.NE.ONE) THEN
|
||||
IF (INCY.EQ.1) THEN
|
||||
IF (BETA.EQ.ZERO) THEN
|
||||
DO 10 I = 1,LENY
|
||||
Y(I) = ZERO
|
||||
10 CONTINUE
|
||||
ELSE
|
||||
DO 20 I = 1,LENY
|
||||
Y(I) = BETA*Y(I)
|
||||
20 CONTINUE
|
||||
END IF
|
||||
ELSE
|
||||
IY = KY
|
||||
IF (BETA.EQ.ZERO) THEN
|
||||
DO 30 I = 1,LENY
|
||||
Y(IY) = ZERO
|
||||
IY = IY + INCY
|
||||
30 CONTINUE
|
||||
ELSE
|
||||
DO 40 I = 1,LENY
|
||||
Y(IY) = BETA*Y(IY)
|
||||
IY = IY + INCY
|
||||
40 CONTINUE
|
||||
END IF
|
||||
END IF
|
||||
END IF
|
||||
IF (ALPHA.EQ.ZERO) RETURN
|
||||
IF (LSAME(TRANS,'N')) THEN
|
||||
*
|
||||
* Form y := alpha*A*x + y.
|
||||
*
|
||||
JX = KX
|
||||
IF (INCY.EQ.1) THEN
|
||||
DO 60 J = 1,N
|
||||
IF (X(JX).NE.ZERO) THEN
|
||||
TEMP = ALPHA*X(JX)
|
||||
DO 50 I = 1,M
|
||||
Y(I) = Y(I) + TEMP*A(I,J)
|
||||
50 CONTINUE
|
||||
END IF
|
||||
JX = JX + INCX
|
||||
60 CONTINUE
|
||||
ELSE
|
||||
DO 80 J = 1,N
|
||||
IF (X(JX).NE.ZERO) THEN
|
||||
TEMP = ALPHA*X(JX)
|
||||
IY = KY
|
||||
DO 70 I = 1,M
|
||||
Y(IY) = Y(IY) + TEMP*A(I,J)
|
||||
IY = IY + INCY
|
||||
70 CONTINUE
|
||||
END IF
|
||||
JX = JX + INCX
|
||||
80 CONTINUE
|
||||
END IF
|
||||
ELSE
|
||||
*
|
||||
* Form y := alpha*A**T*x + y.
|
||||
*
|
||||
JY = KY
|
||||
IF (INCX.EQ.1) THEN
|
||||
DO 100 J = 1,N
|
||||
TEMP = ZERO
|
||||
DO 90 I = 1,M
|
||||
TEMP = TEMP + A(I,J)*X(I)
|
||||
90 CONTINUE
|
||||
Y(JY) = Y(JY) + ALPHA*TEMP
|
||||
JY = JY + INCY
|
||||
100 CONTINUE
|
||||
ELSE
|
||||
DO 120 J = 1,N
|
||||
TEMP = ZERO
|
||||
IX = KX
|
||||
DO 110 I = 1,M
|
||||
TEMP = TEMP + A(I,J)*X(IX)
|
||||
IX = IX + INCX
|
||||
110 CONTINUE
|
||||
Y(JY) = Y(JY) + ALPHA*TEMP
|
||||
JY = JY + INCY
|
||||
120 CONTINUE
|
||||
END IF
|
||||
END IF
|
||||
*
|
||||
RETURN
|
||||
*
|
||||
* End of SGEMV .
|
||||
*
|
||||
END
|
||||
285
interface/netlib/zgemv.f
Normal file
285
interface/netlib/zgemv.f
Normal file
@@ -0,0 +1,285 @@
|
||||
SUBROUTINE ZGEMV(TRANS,M,N,ALPHA,A,LDA,X,INCX,BETA,Y,INCY)
|
||||
* .. Scalar Arguments ..
|
||||
DOUBLE COMPLEX ALPHA,BETA
|
||||
INTEGER INCX,INCY,LDA,M,N
|
||||
CHARACTER TRANS
|
||||
* ..
|
||||
* .. Array Arguments ..
|
||||
DOUBLE COMPLEX A(LDA,*),X(*),Y(*)
|
||||
* ..
|
||||
*
|
||||
* Purpose
|
||||
* =======
|
||||
*
|
||||
* ZGEMV performs one of the matrix-vector operations
|
||||
*
|
||||
* y := alpha*A*x + beta*y, or y := alpha*A**T*x + beta*y, or
|
||||
*
|
||||
* y := alpha*A**H*x + beta*y,
|
||||
*
|
||||
* where alpha and beta are scalars, x and y are vectors and A is an
|
||||
* m by n matrix.
|
||||
*
|
||||
* Arguments
|
||||
* ==========
|
||||
*
|
||||
* TRANS - CHARACTER*1.
|
||||
* On entry, TRANS specifies the operation to be performed as
|
||||
* follows:
|
||||
*
|
||||
* TRANS = 'N' or 'n' y := alpha*A*x + beta*y.
|
||||
*
|
||||
* TRANS = 'T' or 't' y := alpha*A**T*x + beta*y.
|
||||
*
|
||||
* TRANS = 'C' or 'c' y := alpha*A**H*x + beta*y.
|
||||
*
|
||||
* Unchanged on exit.
|
||||
*
|
||||
* M - INTEGER.
|
||||
* On entry, M specifies the number of rows of the matrix A.
|
||||
* M must be at least zero.
|
||||
* Unchanged on exit.
|
||||
*
|
||||
* N - INTEGER.
|
||||
* On entry, N specifies the number of columns of the matrix A.
|
||||
* N must be at least zero.
|
||||
* Unchanged on exit.
|
||||
*
|
||||
* ALPHA - COMPLEX*16 .
|
||||
* On entry, ALPHA specifies the scalar alpha.
|
||||
* Unchanged on exit.
|
||||
*
|
||||
* A - COMPLEX*16 array of DIMENSION ( LDA, n ).
|
||||
* Before entry, the leading m by n part of the array A must
|
||||
* contain the matrix of coefficients.
|
||||
* Unchanged on exit.
|
||||
*
|
||||
* LDA - INTEGER.
|
||||
* On entry, LDA specifies the first dimension of A as declared
|
||||
* in the calling (sub) program. LDA must be at least
|
||||
* max( 1, m ).
|
||||
* Unchanged on exit.
|
||||
*
|
||||
* X - COMPLEX*16 array of DIMENSION at least
|
||||
* ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n'
|
||||
* and at least
|
||||
* ( 1 + ( m - 1 )*abs( INCX ) ) otherwise.
|
||||
* Before entry, the incremented array X must contain the
|
||||
* vector x.
|
||||
* Unchanged on exit.
|
||||
*
|
||||
* INCX - INTEGER.
|
||||
* On entry, INCX specifies the increment for the elements of
|
||||
* X. INCX must not be zero.
|
||||
* Unchanged on exit.
|
||||
*
|
||||
* BETA - COMPLEX*16 .
|
||||
* On entry, BETA specifies the scalar beta. When BETA is
|
||||
* supplied as zero then Y need not be set on input.
|
||||
* Unchanged on exit.
|
||||
*
|
||||
* Y - COMPLEX*16 array of DIMENSION at least
|
||||
* ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n'
|
||||
* and at least
|
||||
* ( 1 + ( n - 1 )*abs( INCY ) ) otherwise.
|
||||
* Before entry with BETA non-zero, the incremented array Y
|
||||
* must contain the vector y. On exit, Y is overwritten by the
|
||||
* updated vector y.
|
||||
*
|
||||
* INCY - INTEGER.
|
||||
* On entry, INCY specifies the increment for the elements of
|
||||
* Y. INCY must not be zero.
|
||||
* Unchanged on exit.
|
||||
*
|
||||
* Further Details
|
||||
* ===============
|
||||
*
|
||||
* Level 2 Blas routine.
|
||||
* The vector and matrix arguments are not referenced when N = 0, or M = 0
|
||||
*
|
||||
* -- Written on 22-October-1986.
|
||||
* Jack Dongarra, Argonne National Lab.
|
||||
* Jeremy Du Croz, Nag Central Office.
|
||||
* Sven Hammarling, Nag Central Office.
|
||||
* Richard Hanson, Sandia National Labs.
|
||||
*
|
||||
* =====================================================================
|
||||
*
|
||||
* .. Parameters ..
|
||||
DOUBLE COMPLEX ONE
|
||||
PARAMETER (ONE= (1.0D+0,0.0D+0))
|
||||
DOUBLE COMPLEX ZERO
|
||||
PARAMETER (ZERO= (0.0D+0,0.0D+0))
|
||||
* ..
|
||||
* .. Local Scalars ..
|
||||
DOUBLE COMPLEX TEMP
|
||||
INTEGER I,INFO,IX,IY,J,JX,JY,KX,KY,LENX,LENY
|
||||
LOGICAL NOCONJ
|
||||
* ..
|
||||
* .. External Functions ..
|
||||
LOGICAL LSAME
|
||||
EXTERNAL LSAME
|
||||
* ..
|
||||
* .. External Subroutines ..
|
||||
EXTERNAL XERBLA
|
||||
* ..
|
||||
* .. Intrinsic Functions ..
|
||||
INTRINSIC DCONJG,MAX
|
||||
* ..
|
||||
*
|
||||
* Test the input parameters.
|
||||
*
|
||||
INFO = 0
|
||||
IF (.NOT.LSAME(TRANS,'N') .AND. .NOT.LSAME(TRANS,'T') .AND.
|
||||
+ .NOT.LSAME(TRANS,'C')) THEN
|
||||
INFO = 1
|
||||
ELSE IF (M.LT.0) THEN
|
||||
INFO = 2
|
||||
ELSE IF (N.LT.0) THEN
|
||||
INFO = 3
|
||||
ELSE IF (LDA.LT.MAX(1,M)) THEN
|
||||
INFO = 6
|
||||
ELSE IF (INCX.EQ.0) THEN
|
||||
INFO = 8
|
||||
ELSE IF (INCY.EQ.0) THEN
|
||||
INFO = 11
|
||||
END IF
|
||||
IF (INFO.NE.0) THEN
|
||||
CALL XERBLA('ZGEMV ',INFO)
|
||||
RETURN
|
||||
END IF
|
||||
*
|
||||
* Quick return if possible.
|
||||
*
|
||||
IF ((M.EQ.0) .OR. (N.EQ.0) .OR.
|
||||
+ ((ALPHA.EQ.ZERO).AND. (BETA.EQ.ONE))) RETURN
|
||||
*
|
||||
NOCONJ = LSAME(TRANS,'T')
|
||||
*
|
||||
* Set LENX and LENY, the lengths of the vectors x and y, and set
|
||||
* up the start points in X and Y.
|
||||
*
|
||||
IF (LSAME(TRANS,'N')) THEN
|
||||
LENX = N
|
||||
LENY = M
|
||||
ELSE
|
||||
LENX = M
|
||||
LENY = N
|
||||
END IF
|
||||
IF (INCX.GT.0) THEN
|
||||
KX = 1
|
||||
ELSE
|
||||
KX = 1 - (LENX-1)*INCX
|
||||
END IF
|
||||
IF (INCY.GT.0) THEN
|
||||
KY = 1
|
||||
ELSE
|
||||
KY = 1 - (LENY-1)*INCY
|
||||
END IF
|
||||
*
|
||||
* Start the operations. In this version the elements of A are
|
||||
* accessed sequentially with one pass through A.
|
||||
*
|
||||
* First form y := beta*y.
|
||||
*
|
||||
IF (BETA.NE.ONE) THEN
|
||||
IF (INCY.EQ.1) THEN
|
||||
IF (BETA.EQ.ZERO) THEN
|
||||
DO 10 I = 1,LENY
|
||||
Y(I) = ZERO
|
||||
10 CONTINUE
|
||||
ELSE
|
||||
DO 20 I = 1,LENY
|
||||
Y(I) = BETA*Y(I)
|
||||
20 CONTINUE
|
||||
END IF
|
||||
ELSE
|
||||
IY = KY
|
||||
IF (BETA.EQ.ZERO) THEN
|
||||
DO 30 I = 1,LENY
|
||||
Y(IY) = ZERO
|
||||
IY = IY + INCY
|
||||
30 CONTINUE
|
||||
ELSE
|
||||
DO 40 I = 1,LENY
|
||||
Y(IY) = BETA*Y(IY)
|
||||
IY = IY + INCY
|
||||
40 CONTINUE
|
||||
END IF
|
||||
END IF
|
||||
END IF
|
||||
IF (ALPHA.EQ.ZERO) RETURN
|
||||
IF (LSAME(TRANS,'N')) THEN
|
||||
*
|
||||
* Form y := alpha*A*x + y.
|
||||
*
|
||||
JX = KX
|
||||
IF (INCY.EQ.1) THEN
|
||||
DO 60 J = 1,N
|
||||
IF (X(JX).NE.ZERO) THEN
|
||||
TEMP = ALPHA*X(JX)
|
||||
DO 50 I = 1,M
|
||||
Y(I) = Y(I) + TEMP*A(I,J)
|
||||
50 CONTINUE
|
||||
END IF
|
||||
JX = JX + INCX
|
||||
60 CONTINUE
|
||||
ELSE
|
||||
DO 80 J = 1,N
|
||||
IF (X(JX).NE.ZERO) THEN
|
||||
TEMP = ALPHA*X(JX)
|
||||
IY = KY
|
||||
DO 70 I = 1,M
|
||||
Y(IY) = Y(IY) + TEMP*A(I,J)
|
||||
IY = IY + INCY
|
||||
70 CONTINUE
|
||||
END IF
|
||||
JX = JX + INCX
|
||||
80 CONTINUE
|
||||
END IF
|
||||
ELSE
|
||||
*
|
||||
* Form y := alpha*A**T*x + y or y := alpha*A**H*x + y.
|
||||
*
|
||||
JY = KY
|
||||
IF (INCX.EQ.1) THEN
|
||||
DO 110 J = 1,N
|
||||
TEMP = ZERO
|
||||
IF (NOCONJ) THEN
|
||||
DO 90 I = 1,M
|
||||
TEMP = TEMP + A(I,J)*X(I)
|
||||
90 CONTINUE
|
||||
ELSE
|
||||
DO 100 I = 1,M
|
||||
TEMP = TEMP + DCONJG(A(I,J))*X(I)
|
||||
100 CONTINUE
|
||||
END IF
|
||||
Y(JY) = Y(JY) + ALPHA*TEMP
|
||||
JY = JY + INCY
|
||||
110 CONTINUE
|
||||
ELSE
|
||||
DO 140 J = 1,N
|
||||
TEMP = ZERO
|
||||
IX = KX
|
||||
IF (NOCONJ) THEN
|
||||
DO 120 I = 1,M
|
||||
TEMP = TEMP + A(I,J)*X(IX)
|
||||
IX = IX + INCX
|
||||
120 CONTINUE
|
||||
ELSE
|
||||
DO 130 I = 1,M
|
||||
TEMP = TEMP + DCONJG(A(I,J))*X(IX)
|
||||
IX = IX + INCX
|
||||
130 CONTINUE
|
||||
END IF
|
||||
Y(JY) = Y(JY) + ALPHA*TEMP
|
||||
JY = JY + INCY
|
||||
140 CONTINUE
|
||||
END IF
|
||||
END IF
|
||||
*
|
||||
RETURN
|
||||
*
|
||||
* End of ZGEMV .
|
||||
*
|
||||
END
|
||||
@@ -7,6 +7,12 @@
|
||||
#define GAMSQ 16777216.e0
|
||||
#define RGAMSQ 5.9604645e-8
|
||||
|
||||
#ifdef DOUBLE
|
||||
#define ABS(x) fabs(x)
|
||||
#else
|
||||
#define ABS(x) fabsf(x)
|
||||
#endif
|
||||
|
||||
#ifndef CBLAS
|
||||
|
||||
void NAME(FLOAT *dd1, FLOAT *dd2, FLOAT *dx1, FLOAT *DY1, FLOAT *dparam){
|
||||
@@ -47,7 +53,7 @@ void CNAME(FLOAT *dd1, FLOAT *dd2, FLOAT *dx1, FLOAT dy1, FLOAT *dparam){
|
||||
dq2 = dp2 * dy1;
|
||||
dq1 = dp1 * *dx1;
|
||||
|
||||
if (! (abs(dq1) > abs(dq2))) goto L40;
|
||||
if (! (ABS(dq1) > ABS(dq2))) goto L40;
|
||||
|
||||
dh21 = -(dy1) / *dx1;
|
||||
dh12 = dp2 / dp1;
|
||||
@@ -140,7 +146,7 @@ L150:
|
||||
goto L130;
|
||||
|
||||
L160:
|
||||
if (! (abs(*dd2) <= RGAMSQ)) {
|
||||
if (! (ABS(*dd2) <= RGAMSQ)) {
|
||||
goto L190;
|
||||
}
|
||||
if (*dd2 == ZERO) {
|
||||
@@ -157,7 +163,7 @@ L180:
|
||||
goto L160;
|
||||
|
||||
L190:
|
||||
if (! (abs(*dd2) >= GAMSQ)) {
|
||||
if (! (ABS(*dd2) >= GAMSQ)) {
|
||||
goto L220;
|
||||
}
|
||||
igo = 3;
|
||||
|
||||
@@ -136,6 +136,7 @@ void NAME(char *SIDE, char *UPLO,
|
||||
FLOAT *sa, *sb;
|
||||
|
||||
#ifdef SMP
|
||||
#ifndef COMPLEX
|
||||
#ifdef XDOUBLE
|
||||
int mode = BLAS_XDOUBLE | BLAS_REAL;
|
||||
#elif defined(DOUBLE)
|
||||
@@ -143,6 +144,15 @@ void NAME(char *SIDE, char *UPLO,
|
||||
#else
|
||||
int mode = BLAS_SINGLE | BLAS_REAL;
|
||||
#endif
|
||||
#else
|
||||
#ifdef XDOUBLE
|
||||
int mode = BLAS_XDOUBLE | BLAS_COMPLEX;
|
||||
#elif defined(DOUBLE)
|
||||
int mode = BLAS_DOUBLE | BLAS_COMPLEX;
|
||||
#else
|
||||
int mode = BLAS_SINGLE | BLAS_COMPLEX;
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(SMP) && !defined(NO_AFFINITY)
|
||||
@@ -237,6 +247,7 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo,
|
||||
FLOAT *sa, *sb;
|
||||
|
||||
#ifdef SMP
|
||||
#ifndef COMPLEX
|
||||
#ifdef XDOUBLE
|
||||
int mode = BLAS_XDOUBLE | BLAS_REAL;
|
||||
#elif defined(DOUBLE)
|
||||
@@ -244,6 +255,15 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo,
|
||||
#else
|
||||
int mode = BLAS_SINGLE | BLAS_REAL;
|
||||
#endif
|
||||
#else
|
||||
#ifdef XDOUBLE
|
||||
int mode = BLAS_XDOUBLE | BLAS_COMPLEX;
|
||||
#elif defined(DOUBLE)
|
||||
int mode = BLAS_DOUBLE | BLAS_COMPLEX;
|
||||
#else
|
||||
int mode = BLAS_SINGLE | BLAS_COMPLEX;
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(SMP) && !defined(NO_AFFINITY)
|
||||
|
||||
@@ -60,6 +60,7 @@ static blasint (*trtri_parallel[])(blas_arg_t *, BLASLONG *, BLASLONG *, FLOAT *
|
||||
};
|
||||
#endif
|
||||
|
||||
|
||||
int NAME(char *UPLO, char *DIAG, blasint *N, FLOAT *a, blasint *ldA, blasint *Info){
|
||||
|
||||
blas_arg_t args;
|
||||
@@ -83,6 +84,7 @@ int NAME(char *UPLO, char *DIAG, blasint *N, FLOAT *a, blasint *ldA, blasint *In
|
||||
TOUPPER(uplo_arg);
|
||||
TOUPPER(diag_arg);
|
||||
|
||||
|
||||
uplo = -1;
|
||||
if (uplo_arg == 'U') uplo = 0;
|
||||
if (uplo_arg == 'L') uplo = 1;
|
||||
@@ -90,6 +92,7 @@ int NAME(char *UPLO, char *DIAG, blasint *N, FLOAT *a, blasint *ldA, blasint *In
|
||||
if (diag_arg == 'U') diag = 0;
|
||||
if (diag_arg == 'N') diag = 1;
|
||||
|
||||
|
||||
info = 0;
|
||||
if (args.lda < MAX(1,args.n)) info = 5;
|
||||
if (args.n < 0) info = 3;
|
||||
|
||||
@@ -6,7 +6,7 @@ TOPDIR = ..
|
||||
include $(TOPDIR)/Makefile.system
|
||||
|
||||
ifdef TARGET_CORE
|
||||
CFLAGS += -DBUILD_KERNEL -DTABLE_NAME=gotoblas_$(TARGET_CORE)
|
||||
override CFLAGS += -DBUILD_KERNEL -DTABLE_NAME=gotoblas_$(TARGET_CORE)
|
||||
BUILD_KERNEL = 1
|
||||
KDIR =
|
||||
TSUFFIX = _$(TARGET_CORE)
|
||||
@@ -48,11 +48,16 @@ HPLOBJS = \
|
||||
|
||||
COMMONOBJS += lsame.$(SUFFIX) scabs1.$(SUFFIX) dcabs1.$(SUFFIX)
|
||||
|
||||
ifdef DYNAMIC_ARCH
|
||||
ifeq ($(DYNAMIC_ARCH), 1)
|
||||
SBLASOBJS += setparam$(TSUFFIX).$(SUFFIX)
|
||||
CCOMMON_OPT += -DTS=$(TSUFFIX)
|
||||
endif
|
||||
|
||||
KERNEL_INTERFACE = ../common_level1.h ../common_level2.h ../common_level3.h
|
||||
ifneq ($(NO_LAPACK), 1)
|
||||
KERNEL_INTERFACE += ../common_lapack.h
|
||||
endif
|
||||
|
||||
ifeq ($(ARCH), x86)
|
||||
COMMONOBJS += cpuid.$(SUFFIX)
|
||||
endif
|
||||
@@ -88,9 +93,10 @@ setparam$(TSUFFIX).$(SUFFIX): setparam$(TSUFFIX).c kernel$(TSUFFIX).h
|
||||
setparam$(TSUFFIX).c : setparam-ref.c
|
||||
sed 's/TS/$(TSUFFIX)/g' $< > $(@F)
|
||||
|
||||
kernel$(TSUFFIX).h : ../common_level1.h ../common_level2.h ../common_level3.h ../common_lapack.h
|
||||
kernel$(TSUFFIX).h : $(KERNEL_INTERFACE)
|
||||
sed 's/\ *(/$(TSUFFIX)(/g' $^ > $(@F)
|
||||
|
||||
|
||||
cpuid.$(SUFFIX): $(KERNELDIR)/cpuid.S
|
||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||
|
||||
@@ -112,10 +118,10 @@ lsame.$(PSUFFIX): $(KERNELDIR)/$(LSAME_KERNEL)
|
||||
cpuid.$(PSUFFIX): $(KERNELDIR)/cpuid.S
|
||||
$(CC) -c $(PFLAGS) $< -o $(@F)
|
||||
|
||||
ifdef DYNAMIC_ARCH
|
||||
#ifdef DYNAMIC_ARCH
|
||||
clean ::
|
||||
@rm -f setparam_*.c kernel_*.h setparam.h kernel.h
|
||||
|
||||
endif
|
||||
#endif
|
||||
|
||||
include $(TOPDIR)/Makefile.tail
|
||||
|
||||
@@ -668,7 +668,7 @@ $(KDIR)qdot_k$(TSUFFIX).$(SUFFIX) $(KDIR)qdot_k$(TPSUFFIX).$(PSUFFIX) : $(KERNEL
|
||||
$(CC) -c $(CFLAGS) -UCOMPLEX -DXDOUBLE $< -o $@
|
||||
|
||||
$(KDIR)dsdot_k$(TSUFFIX).$(SUFFIX) $(KDIR)dsdot_k$(TPSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(SDOTKERNEL)
|
||||
$(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE $< -o $@
|
||||
$(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -DDSDOT $< -o $@
|
||||
|
||||
$(KDIR)sdot_k$(TSUFFIX).$(SUFFIX) $(KDIR)sdot_k$(TPSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(SDOTKERNEL)
|
||||
$(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE $< -o $@
|
||||
|
||||
@@ -388,7 +388,7 @@ $(KDIR)xgerv_k$(TSUFFIX).$(SUFFIX) $(KDIR)xgerv_k$(TSUFFIX).$(PSUFFIX) : $(KER
|
||||
$(CC) -c $(CFLAGS) -DXDOUBLE -UCONJ -DXCONJ $< -o $@
|
||||
|
||||
$(KDIR)xgerd_k$(TSUFFIX).$(SUFFIX) $(KDIR)xgerd_k$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(XGERCKERNEL) $(XGERPARAM)
|
||||
$(CC) -c $(CFLAGS) -DXDOUBLE -DCONJ-DXCONJ $< -o $@
|
||||
$(CC) -c $(CFLAGS) -DXDOUBLE -DCONJ -DXCONJ $< -o $@
|
||||
|
||||
$(KDIR)chemv_U$(TSUFFIX).$(SUFFIX) $(KDIR)chemv_U$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(CHEMV_U_KERNEL) $(CHEMV_U_PARAM)
|
||||
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -ULOWER -DHEMV $< -o $@
|
||||
|
||||
@@ -14,6 +14,20 @@ ifeq ($(ARCH), MIPS)
|
||||
USE_GEMM3M = 1
|
||||
endif
|
||||
|
||||
ifeq ($(ARCH), arm)
|
||||
USE_TRMM = 1
|
||||
endif
|
||||
|
||||
ifeq ($(ARCH), arm64)
|
||||
USE_TRMM = 1
|
||||
endif
|
||||
|
||||
ifeq ($(TARGET), LOONGSON3B)
|
||||
USE_TRMM = 1
|
||||
endif
|
||||
|
||||
|
||||
|
||||
SKERNELOBJS += \
|
||||
sgemm_kernel$(TSUFFIX).$(SUFFIX) \
|
||||
$(SGEMMINCOPYOBJ) $(SGEMMITCOPYOBJ) \
|
||||
@@ -498,6 +512,92 @@ $(KDIR)xgemm_kernel_r$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(XGEMMKERNEL) $(XGEMMD
|
||||
$(KDIR)xgemm_kernel_b$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(XGEMMKERNEL) $(XGEMMDEPEND)
|
||||
$(CC) $(CFLAGS) -c -DXDOUBLE -DCOMPLEX -DCC $< -o $@
|
||||
|
||||
|
||||
ifdef USE_TRMM
|
||||
$(KDIR)strmm_kernel_LN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(STRMMKERNEL)
|
||||
$(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -UCOMPLEX -DLEFT -UTRANSA $< -o $@
|
||||
|
||||
$(KDIR)strmm_kernel_LT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(STRMMKERNEL)
|
||||
$(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -UCOMPLEX -DLEFT -DTRANSA $< -o $@
|
||||
|
||||
$(KDIR)strmm_kernel_RN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(STRMMKERNEL)
|
||||
$(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -UCOMPLEX -ULEFT -UTRANSA $< -o $@
|
||||
|
||||
$(KDIR)strmm_kernel_RT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(STRMMKERNEL)
|
||||
$(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -UCOMPLEX -ULEFT -DTRANSA $< -o $@
|
||||
|
||||
$(KDIR)dtrmm_kernel_LN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DTRMMKERNEL)
|
||||
$(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -UCOMPLEX -DLEFT -UTRANSA $< -o $@
|
||||
|
||||
$(KDIR)dtrmm_kernel_LT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DTRMMKERNEL)
|
||||
$(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -UCOMPLEX -DLEFT -DTRANSA $< -o $@
|
||||
|
||||
$(KDIR)dtrmm_kernel_RN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DTRMMKERNEL)
|
||||
$(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -UCOMPLEX -ULEFT -UTRANSA $< -o $@
|
||||
|
||||
$(KDIR)dtrmm_kernel_RT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DTRMMKERNEL)
|
||||
$(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -UCOMPLEX -ULEFT -DTRANSA $< -o $@
|
||||
|
||||
$(KDIR)qtrmm_kernel_LN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(QGEMMKERNEL)
|
||||
$(CC) $(CFLAGS) -c -DTRMMKERNEL -DXDOUBLE -UCOMPLEX -DLEFT -UTRANSA $< -o $@
|
||||
|
||||
$(KDIR)qtrmm_kernel_LT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(QGEMMKERNEL)
|
||||
$(CC) $(CFLAGS) -c -DTRMMKERNEL -DXDOUBLE -UCOMPLEX -DLEFT -DTRANSA $< -o $@
|
||||
|
||||
$(KDIR)qtrmm_kernel_RN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(QGEMMKERNEL)
|
||||
$(CC) $(CFLAGS) -c -DTRMMKERNEL -DXDOUBLE -UCOMPLEX -ULEFT -UTRANSA $< -o $@
|
||||
|
||||
$(KDIR)qtrmm_kernel_RT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(QGEMMKERNEL)
|
||||
$(CC) $(CFLAGS) -c -DTRMMKERNEL -DXDOUBLE -UCOMPLEX -ULEFT -DTRANSA $< -o $@
|
||||
|
||||
$(KDIR)ctrmm_kernel_LN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CTRMMKERNEL)
|
||||
$(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -DCOMPLEX -DLEFT -UTRANSA -UCONJ -DNN $< -o $@
|
||||
|
||||
$(KDIR)ctrmm_kernel_LT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CTRMMKERNEL)
|
||||
$(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -DCOMPLEX -DLEFT -DTRANSA -UCONJ -DNN $< -o $@
|
||||
|
||||
$(KDIR)ctrmm_kernel_LR$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CTRMMKERNEL)
|
||||
$(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -DCOMPLEX -DLEFT -UTRANSA -DCONJ -DCN $< -o $@
|
||||
|
||||
$(KDIR)ctrmm_kernel_LC$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CTRMMKERNEL)
|
||||
$(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -DCOMPLEX -DLEFT -DTRANSA -DCONJ -DCN $< -o $@
|
||||
|
||||
$(KDIR)ctrmm_kernel_RN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CTRMMKERNEL)
|
||||
$(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -DCOMPLEX -ULEFT -UTRANSA -UCONJ -DNN $< -o $@
|
||||
|
||||
$(KDIR)ctrmm_kernel_RT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CTRMMKERNEL)
|
||||
$(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -DCOMPLEX -ULEFT -DTRANSA -UCONJ -DNN $< -o $@
|
||||
|
||||
$(KDIR)ctrmm_kernel_RR$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CTRMMKERNEL)
|
||||
$(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -DCOMPLEX -ULEFT -UTRANSA -DCONJ -DNC $< -o $@
|
||||
|
||||
$(KDIR)ctrmm_kernel_RC$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CTRMMKERNEL)
|
||||
$(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -DCOMPLEX -ULEFT -DTRANSA -DCONJ -DNC $< -o $@
|
||||
|
||||
$(KDIR)ztrmm_kernel_LN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZTRMMKERNEL)
|
||||
$(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -UTRANSA -UCONJ -DNN $< -o $@
|
||||
|
||||
$(KDIR)ztrmm_kernel_LT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZTRMMKERNEL)
|
||||
$(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -DTRANSA -UCONJ -DNN $< -o $@
|
||||
|
||||
$(KDIR)ztrmm_kernel_LR$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZTRMMKERNEL)
|
||||
$(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -UTRANSA -DCONJ -DCN $< -o $@
|
||||
|
||||
$(KDIR)ztrmm_kernel_LC$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZTRMMKERNEL)
|
||||
$(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -DTRANSA -DCONJ -DCN $< -o $@
|
||||
|
||||
$(KDIR)ztrmm_kernel_RN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZTRMMKERNEL)
|
||||
$(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -UTRANSA -UCONJ -DNN $< -o $@
|
||||
|
||||
$(KDIR)ztrmm_kernel_RT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZTRMMKERNEL)
|
||||
$(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -DTRANSA -UCONJ -DNN $< -o $@
|
||||
|
||||
$(KDIR)ztrmm_kernel_RR$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZTRMMKERNEL)
|
||||
$(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -UTRANSA -DCONJ -DNC $< -o $@
|
||||
|
||||
$(KDIR)ztrmm_kernel_RC$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZTRMMKERNEL)
|
||||
$(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -DTRANSA -DCONJ -DNC $< -o $@
|
||||
else
|
||||
$(KDIR)strmm_kernel_LN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SGEMMKERNEL)
|
||||
$(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -UCOMPLEX -DLEFT -UTRANSA $< -o $@
|
||||
|
||||
@@ -581,6 +681,10 @@ $(KDIR)ztrmm_kernel_RR$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZGEMMKERNEL)
|
||||
|
||||
$(KDIR)ztrmm_kernel_RC$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZGEMMKERNEL)
|
||||
$(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -DTRANSA -DCONJ -DNC $< -o $@
|
||||
endif
|
||||
|
||||
|
||||
|
||||
|
||||
$(KDIR)xtrmm_kernel_LN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(XGEMMKERNEL)
|
||||
$(CC) $(CFLAGS) -c -DTRMMKERNEL -DXDOUBLE -DCOMPLEX -DLEFT -UTRANSA -UCONJ -DNN $< -o $@
|
||||
@@ -1120,328 +1224,328 @@ $(KDIR)xhemm_iutcopy$(TSUFFIX).$(SUFFIX) : generic/zhemm_utcopy_$(XGEMM_UNROLL_M
|
||||
$(KDIR)xhemm_iltcopy$(TSUFFIX).$(SUFFIX) : generic/zhemm_ltcopy_$(XGEMM_UNROLL_M).c
|
||||
$(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DXDOUBLE -DCOMPLEX -UOUTER $< -DLOWER -o $@
|
||||
|
||||
$(KDIR)cgemm3m_oncopyb$(TSUFFIX).$(SUFFIX) : generic/zgemm3m_ncopy_$(SGEMM_UNROLL_N).c
|
||||
$(KDIR)cgemm3m_oncopyb$(TSUFFIX).$(SUFFIX) : generic/zgemm3m_ncopy_$(CGEMM3M_UNROLL_N).c
|
||||
$(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -DUSE_ALPHA $< -o $@
|
||||
|
||||
$(KDIR)cgemm3m_oncopyr$(TSUFFIX).$(SUFFIX) : generic/zgemm3m_ncopy_$(SGEMM_UNROLL_N).c
|
||||
$(KDIR)cgemm3m_oncopyr$(TSUFFIX).$(SUFFIX) : generic/zgemm3m_ncopy_$(CGEMM3M_UNROLL_N).c
|
||||
$(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -DUSE_ALPHA -DREAL_ONLY $< -o $@
|
||||
|
||||
$(KDIR)cgemm3m_oncopyi$(TSUFFIX).$(SUFFIX) : generic/zgemm3m_ncopy_$(SGEMM_UNROLL_N).c
|
||||
$(KDIR)cgemm3m_oncopyi$(TSUFFIX).$(SUFFIX) : generic/zgemm3m_ncopy_$(CGEMM3M_UNROLL_N).c
|
||||
$(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -DUSE_ALPHA -DIMAGE_ONLY $< -o $@
|
||||
|
||||
$(KDIR)cgemm3m_otcopyb$(TSUFFIX).$(SUFFIX) : generic/zgemm3m_tcopy_$(SGEMM_UNROLL_N).c
|
||||
$(KDIR)cgemm3m_otcopyb$(TSUFFIX).$(SUFFIX) : generic/zgemm3m_tcopy_$(CGEMM3M_UNROLL_N).c
|
||||
$(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -DUSE_ALPHA $< -o $@
|
||||
|
||||
$(KDIR)cgemm3m_otcopyr$(TSUFFIX).$(SUFFIX) : generic/zgemm3m_tcopy_$(SGEMM_UNROLL_N).c
|
||||
$(KDIR)cgemm3m_otcopyr$(TSUFFIX).$(SUFFIX) : generic/zgemm3m_tcopy_$(CGEMM3M_UNROLL_N).c
|
||||
$(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -DUSE_ALPHA -DREAL_ONLY $< -o $@
|
||||
|
||||
$(KDIR)cgemm3m_otcopyi$(TSUFFIX).$(SUFFIX) : generic/zgemm3m_tcopy_$(SGEMM_UNROLL_N).c
|
||||
$(KDIR)cgemm3m_otcopyi$(TSUFFIX).$(SUFFIX) : generic/zgemm3m_tcopy_$(CGEMM3M_UNROLL_N).c
|
||||
$(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -DUSE_ALPHA -DIMAGE_ONLY $< -o $@
|
||||
|
||||
$(KDIR)cgemm3m_incopyb$(TSUFFIX).$(SUFFIX) : generic/zgemm3m_ncopy_$(SGEMM_UNROLL_M).c
|
||||
$(KDIR)cgemm3m_incopyb$(TSUFFIX).$(SUFFIX) : generic/zgemm3m_ncopy_$(CGEMM3M_UNROLL_M).c
|
||||
$(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -DICOPY -UUSE_ALPHA $< -o $@
|
||||
|
||||
$(KDIR)cgemm3m_incopyr$(TSUFFIX).$(SUFFIX) : generic/zgemm3m_ncopy_$(SGEMM_UNROLL_M).c
|
||||
$(KDIR)cgemm3m_incopyr$(TSUFFIX).$(SUFFIX) : generic/zgemm3m_ncopy_$(CGEMM3M_UNROLL_M).c
|
||||
$(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -DICOPY -UUSE_ALPHA -DREAL_ONLY $< -o $@
|
||||
|
||||
$(KDIR)cgemm3m_incopyi$(TSUFFIX).$(SUFFIX) : generic/zgemm3m_ncopy_$(SGEMM_UNROLL_M).c
|
||||
$(KDIR)cgemm3m_incopyi$(TSUFFIX).$(SUFFIX) : generic/zgemm3m_ncopy_$(CGEMM3M_UNROLL_M).c
|
||||
$(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -DICOPY -UUSE_ALPHA -DIMAGE_ONLY $< -o $@
|
||||
|
||||
$(KDIR)cgemm3m_itcopyb$(TSUFFIX).$(SUFFIX) : generic/zgemm3m_tcopy_$(SGEMM_UNROLL_M).c
|
||||
$(KDIR)cgemm3m_itcopyb$(TSUFFIX).$(SUFFIX) : generic/zgemm3m_tcopy_$(CGEMM3M_UNROLL_M).c
|
||||
$(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -DICOPY -UUSE_ALPHA $< -o $@
|
||||
|
||||
$(KDIR)cgemm3m_itcopyr$(TSUFFIX).$(SUFFIX) : generic/zgemm3m_tcopy_$(SGEMM_UNROLL_M).c
|
||||
$(KDIR)cgemm3m_itcopyr$(TSUFFIX).$(SUFFIX) : generic/zgemm3m_tcopy_$(CGEMM3M_UNROLL_M).c
|
||||
$(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -DICOPY -UUSE_ALPHA -DREAL_ONLY $< -o $@
|
||||
|
||||
$(KDIR)cgemm3m_itcopyi$(TSUFFIX).$(SUFFIX) : generic/zgemm3m_tcopy_$(SGEMM_UNROLL_M).c
|
||||
$(KDIR)cgemm3m_itcopyi$(TSUFFIX).$(SUFFIX) : generic/zgemm3m_tcopy_$(CGEMM3M_UNROLL_M).c
|
||||
$(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -DICOPY -UUSE_ALPHA -DIMAGE_ONLY $< -o $@
|
||||
|
||||
$(KDIR)zgemm3m_oncopyb$(TSUFFIX).$(SUFFIX) : generic/zgemm3m_ncopy_$(DGEMM_UNROLL_N).c
|
||||
$(KDIR)zgemm3m_oncopyb$(TSUFFIX).$(SUFFIX) : generic/zgemm3m_ncopy_$(ZGEMM3M_UNROLL_N).c
|
||||
$(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DUSE_ALPHA $< -o $@
|
||||
|
||||
$(KDIR)zgemm3m_oncopyr$(TSUFFIX).$(SUFFIX) : generic/zgemm3m_ncopy_$(DGEMM_UNROLL_N).c
|
||||
$(KDIR)zgemm3m_oncopyr$(TSUFFIX).$(SUFFIX) : generic/zgemm3m_ncopy_$(ZGEMM3M_UNROLL_N).c
|
||||
$(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DUSE_ALPHA -DREAL_ONLY $< -o $@
|
||||
|
||||
$(KDIR)zgemm3m_oncopyi$(TSUFFIX).$(SUFFIX) : generic/zgemm3m_ncopy_$(DGEMM_UNROLL_N).c
|
||||
$(KDIR)zgemm3m_oncopyi$(TSUFFIX).$(SUFFIX) : generic/zgemm3m_ncopy_$(ZGEMM3M_UNROLL_N).c
|
||||
$(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DUSE_ALPHA -DIMAGE_ONLY $< -o $@
|
||||
|
||||
$(KDIR)zgemm3m_otcopyb$(TSUFFIX).$(SUFFIX) : generic/zgemm3m_tcopy_$(DGEMM_UNROLL_N).c
|
||||
$(KDIR)zgemm3m_otcopyb$(TSUFFIX).$(SUFFIX) : generic/zgemm3m_tcopy_$(ZGEMM3M_UNROLL_N).c
|
||||
$(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DUSE_ALPHA $< -o $@
|
||||
|
||||
$(KDIR)zgemm3m_otcopyr$(TSUFFIX).$(SUFFIX) : generic/zgemm3m_tcopy_$(DGEMM_UNROLL_N).c
|
||||
$(KDIR)zgemm3m_otcopyr$(TSUFFIX).$(SUFFIX) : generic/zgemm3m_tcopy_$(ZGEMM3M_UNROLL_N).c
|
||||
$(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DUSE_ALPHA -DREAL_ONLY $< -o $@
|
||||
|
||||
$(KDIR)zgemm3m_otcopyi$(TSUFFIX).$(SUFFIX) : generic/zgemm3m_tcopy_$(DGEMM_UNROLL_N).c
|
||||
$(KDIR)zgemm3m_otcopyi$(TSUFFIX).$(SUFFIX) : generic/zgemm3m_tcopy_$(ZGEMM3M_UNROLL_N).c
|
||||
$(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DUSE_ALPHA -DIMAGE_ONLY $< -o $@
|
||||
|
||||
$(KDIR)zgemm3m_incopyb$(TSUFFIX).$(SUFFIX) : generic/zgemm3m_ncopy_$(DGEMM_UNROLL_M).c
|
||||
$(KDIR)zgemm3m_incopyb$(TSUFFIX).$(SUFFIX) : generic/zgemm3m_ncopy_$(ZGEMM3M_UNROLL_M).c
|
||||
$(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DICOPY -UUSE_ALPHA $< -o $@
|
||||
|
||||
$(KDIR)zgemm3m_incopyr$(TSUFFIX).$(SUFFIX) : generic/zgemm3m_ncopy_$(DGEMM_UNROLL_M).c
|
||||
$(KDIR)zgemm3m_incopyr$(TSUFFIX).$(SUFFIX) : generic/zgemm3m_ncopy_$(ZGEMM3M_UNROLL_M).c
|
||||
$(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DICOPY -UUSE_ALPHA -DREAL_ONLY $< -o $@
|
||||
|
||||
$(KDIR)zgemm3m_incopyi$(TSUFFIX).$(SUFFIX) : generic/zgemm3m_ncopy_$(DGEMM_UNROLL_M).c
|
||||
$(KDIR)zgemm3m_incopyi$(TSUFFIX).$(SUFFIX) : generic/zgemm3m_ncopy_$(ZGEMM3M_UNROLL_M).c
|
||||
$(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DICOPY -UUSE_ALPHA -DIMAGE_ONLY $< -o $@
|
||||
|
||||
$(KDIR)zgemm3m_itcopyb$(TSUFFIX).$(SUFFIX) : generic/zgemm3m_tcopy_$(DGEMM_UNROLL_M).c
|
||||
$(KDIR)zgemm3m_itcopyb$(TSUFFIX).$(SUFFIX) : generic/zgemm3m_tcopy_$(ZGEMM3M_UNROLL_M).c
|
||||
$(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DICOPY -UUSE_ALPHA $< -o $@
|
||||
|
||||
$(KDIR)zgemm3m_itcopyr$(TSUFFIX).$(SUFFIX) : generic/zgemm3m_tcopy_$(DGEMM_UNROLL_M).c
|
||||
$(KDIR)zgemm3m_itcopyr$(TSUFFIX).$(SUFFIX) : generic/zgemm3m_tcopy_$(ZGEMM3M_UNROLL_M).c
|
||||
$(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DICOPY -UUSE_ALPHA -DREAL_ONLY $< -o $@
|
||||
|
||||
$(KDIR)zgemm3m_itcopyi$(TSUFFIX).$(SUFFIX) : generic/zgemm3m_tcopy_$(DGEMM_UNROLL_M).c
|
||||
$(KDIR)zgemm3m_itcopyi$(TSUFFIX).$(SUFFIX) : generic/zgemm3m_tcopy_$(ZGEMM3M_UNROLL_M).c
|
||||
$(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DICOPY -UUSE_ALPHA -DIMAGE_ONLY $< -o $@
|
||||
|
||||
$(KDIR)xgemm3m_oncopyb$(TSUFFIX).$(SUFFIX) : generic/zgemm3m_ncopy_$(QGEMM_UNROLL_N).c
|
||||
$(KDIR)xgemm3m_oncopyb$(TSUFFIX).$(SUFFIX) : generic/zgemm3m_ncopy_$(XGEMM3M_UNROLL_N).c
|
||||
$(CC) $(CFLAGS) -c -DXDOUBLE -DCOMPLEX -DUSE_ALPHA $< -o $@
|
||||
|
||||
$(KDIR)xgemm3m_oncopyr$(TSUFFIX).$(SUFFIX) : generic/zgemm3m_ncopy_$(QGEMM_UNROLL_N).c
|
||||
$(KDIR)xgemm3m_oncopyr$(TSUFFIX).$(SUFFIX) : generic/zgemm3m_ncopy_$(XGEMM3M_UNROLL_N).c
|
||||
$(CC) $(CFLAGS) -c -DXDOUBLE -DCOMPLEX -DUSE_ALPHA -DREAL_ONLY $< -o $@
|
||||
|
||||
$(KDIR)xgemm3m_oncopyi$(TSUFFIX).$(SUFFIX) : generic/zgemm3m_ncopy_$(QGEMM_UNROLL_N).c
|
||||
$(KDIR)xgemm3m_oncopyi$(TSUFFIX).$(SUFFIX) : generic/zgemm3m_ncopy_$(XGEMM3M_UNROLL_N).c
|
||||
$(CC) $(CFLAGS) -c -DXDOUBLE -DCOMPLEX -DUSE_ALPHA -DIMAGE_ONLY $< -o $@
|
||||
|
||||
$(KDIR)xgemm3m_otcopyb$(TSUFFIX).$(SUFFIX) : generic/zgemm3m_tcopy_$(QGEMM_UNROLL_N).c
|
||||
$(KDIR)xgemm3m_otcopyb$(TSUFFIX).$(SUFFIX) : generic/zgemm3m_tcopy_$(XGEMM3M_UNROLL_N).c
|
||||
$(CC) $(CFLAGS) -c -DXDOUBLE -DCOMPLEX -DUSE_ALPHA $< -o $@
|
||||
|
||||
$(KDIR)xgemm3m_otcopyr$(TSUFFIX).$(SUFFIX) : generic/zgemm3m_tcopy_$(QGEMM_UNROLL_N).c
|
||||
$(KDIR)xgemm3m_otcopyr$(TSUFFIX).$(SUFFIX) : generic/zgemm3m_tcopy_$(XGEMM3M_UNROLL_N).c
|
||||
$(CC) $(CFLAGS) -c -DXDOUBLE -DCOMPLEX -DUSE_ALPHA -DREAL_ONLY $< -o $@
|
||||
|
||||
$(KDIR)xgemm3m_otcopyi$(TSUFFIX).$(SUFFIX) : generic/zgemm3m_tcopy_$(QGEMM_UNROLL_N).c
|
||||
$(KDIR)xgemm3m_otcopyi$(TSUFFIX).$(SUFFIX) : generic/zgemm3m_tcopy_$(XGEMM3M_UNROLL_N).c
|
||||
$(CC) $(CFLAGS) -c -DXDOUBLE -DCOMPLEX -DUSE_ALPHA -DIMAGE_ONLY $< -o $@
|
||||
|
||||
$(KDIR)xgemm3m_incopyb$(TSUFFIX).$(SUFFIX) : generic/zgemm3m_ncopy_$(QGEMM_UNROLL_M).c
|
||||
$(KDIR)xgemm3m_incopyb$(TSUFFIX).$(SUFFIX) : generic/zgemm3m_ncopy_$(XGEMM3M_UNROLL_M).c
|
||||
$(CC) $(CFLAGS) -c -DXDOUBLE -DCOMPLEX -DICOPY -UUSE_ALPHA $< -o $@
|
||||
|
||||
$(KDIR)xgemm3m_incopyr$(TSUFFIX).$(SUFFIX) : generic/zgemm3m_ncopy_$(QGEMM_UNROLL_M).c
|
||||
$(KDIR)xgemm3m_incopyr$(TSUFFIX).$(SUFFIX) : generic/zgemm3m_ncopy_$(XGEMM3M_UNROLL_M).c
|
||||
$(CC) $(CFLAGS) -c -DXDOUBLE -DCOMPLEX -DICOPY -UUSE_ALPHA -DREAL_ONLY $< -o $@
|
||||
|
||||
$(KDIR)xgemm3m_incopyi$(TSUFFIX).$(SUFFIX) : generic/zgemm3m_ncopy_$(QGEMM_UNROLL_M).c
|
||||
$(KDIR)xgemm3m_incopyi$(TSUFFIX).$(SUFFIX) : generic/zgemm3m_ncopy_$(XGEMM3M_UNROLL_M).c
|
||||
$(CC) $(CFLAGS) -c -DXDOUBLE -DCOMPLEX -DICOPY -UUSE_ALPHA -DIMAGE_ONLY $< -o $@
|
||||
|
||||
$(KDIR)xgemm3m_itcopyb$(TSUFFIX).$(SUFFIX) : generic/zgemm3m_tcopy_$(QGEMM_UNROLL_M).c
|
||||
$(KDIR)xgemm3m_itcopyb$(TSUFFIX).$(SUFFIX) : generic/zgemm3m_tcopy_$(XGEMM3M_UNROLL_M).c
|
||||
$(CC) $(CFLAGS) -c -DXDOUBLE -DCOMPLEX -DICOPY -UUSE_ALPHA $< -o $@
|
||||
|
||||
$(KDIR)xgemm3m_itcopyr$(TSUFFIX).$(SUFFIX) : generic/zgemm3m_tcopy_$(QGEMM_UNROLL_M).c
|
||||
$(KDIR)xgemm3m_itcopyr$(TSUFFIX).$(SUFFIX) : generic/zgemm3m_tcopy_$(XGEMM3M_UNROLL_M).c
|
||||
$(CC) $(CFLAGS) -c -DXDOUBLE -DCOMPLEX -DICOPY -UUSE_ALPHA -DREAL_ONLY $< -o $@
|
||||
|
||||
$(KDIR)xgemm3m_itcopyi$(TSUFFIX).$(SUFFIX) : generic/zgemm3m_tcopy_$(QGEMM_UNROLL_M).c
|
||||
$(KDIR)xgemm3m_itcopyi$(TSUFFIX).$(SUFFIX) : generic/zgemm3m_tcopy_$(XGEMM3M_UNROLL_M).c
|
||||
$(CC) $(CFLAGS) -c -DXDOUBLE -DCOMPLEX -DICOPY -UUSE_ALPHA -DIMAGE_ONLY $< -o $@
|
||||
|
||||
$(KDIR)csymm3m_oucopyb$(TSUFFIX).$(SUFFIX) : generic/zsymm3m_ucopy_$(SGEMM_UNROLL_N).c
|
||||
$(KDIR)csymm3m_oucopyb$(TSUFFIX).$(SUFFIX) : generic/zsymm3m_ucopy_$(CGEMM3M_UNROLL_N).c
|
||||
$(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -UDOUBLE -DCOMPLEX -DUSE_ALPHA $< -o $@
|
||||
|
||||
$(KDIR)csymm3m_olcopyb$(TSUFFIX).$(SUFFIX) : generic/zsymm3m_lcopy_$(SGEMM_UNROLL_N).c
|
||||
$(KDIR)csymm3m_olcopyb$(TSUFFIX).$(SUFFIX) : generic/zsymm3m_lcopy_$(CGEMM3M_UNROLL_N).c
|
||||
$(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -UDOUBLE -DCOMPLEX -DUSE_ALPHA $< -o $@
|
||||
|
||||
$(KDIR)csymm3m_oucopyr$(TSUFFIX).$(SUFFIX) : generic/zsymm3m_ucopy_$(SGEMM_UNROLL_N).c
|
||||
$(KDIR)csymm3m_oucopyr$(TSUFFIX).$(SUFFIX) : generic/zsymm3m_ucopy_$(CGEMM3M_UNROLL_N).c
|
||||
$(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -UDOUBLE -DCOMPLEX -DUSE_ALPHA -DREAL_ONLY $< -o $@
|
||||
|
||||
$(KDIR)csymm3m_olcopyr$(TSUFFIX).$(SUFFIX) : generic/zsymm3m_lcopy_$(SGEMM_UNROLL_N).c
|
||||
$(KDIR)csymm3m_olcopyr$(TSUFFIX).$(SUFFIX) : generic/zsymm3m_lcopy_$(CGEMM3M_UNROLL_N).c
|
||||
$(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -UDOUBLE -DCOMPLEX -DUSE_ALPHA -DREAL_ONLY $< -o $@
|
||||
|
||||
$(KDIR)csymm3m_oucopyi$(TSUFFIX).$(SUFFIX) : generic/zsymm3m_ucopy_$(SGEMM_UNROLL_N).c
|
||||
$(KDIR)csymm3m_oucopyi$(TSUFFIX).$(SUFFIX) : generic/zsymm3m_ucopy_$(CGEMM3M_UNROLL_N).c
|
||||
$(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -UDOUBLE -DCOMPLEX -DUSE_ALPHA -DIMAGE_ONLY $< -o $@
|
||||
|
||||
$(KDIR)csymm3m_olcopyi$(TSUFFIX).$(SUFFIX) : generic/zsymm3m_lcopy_$(SGEMM_UNROLL_N).c
|
||||
$(KDIR)csymm3m_olcopyi$(TSUFFIX).$(SUFFIX) : generic/zsymm3m_lcopy_$(CGEMM3M_UNROLL_N).c
|
||||
$(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -UDOUBLE -DCOMPLEX -DUSE_ALPHA -DIMAGE_ONLY $< -o $@
|
||||
|
||||
$(KDIR)csymm3m_iucopyb$(TSUFFIX).$(SUFFIX) : generic/zsymm3m_ucopy_$(SGEMM_UNROLL_M).c
|
||||
$(KDIR)csymm3m_iucopyb$(TSUFFIX).$(SUFFIX) : generic/zsymm3m_ucopy_$(CGEMM3M_UNROLL_M).c
|
||||
$(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -UDOUBLE -DCOMPLEX -UUSE_ALPHA $< -o $@
|
||||
|
||||
$(KDIR)csymm3m_ilcopyb$(TSUFFIX).$(SUFFIX) : generic/zsymm3m_lcopy_$(SGEMM_UNROLL_M).c
|
||||
$(KDIR)csymm3m_ilcopyb$(TSUFFIX).$(SUFFIX) : generic/zsymm3m_lcopy_$(CGEMM3M_UNROLL_M).c
|
||||
$(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -UDOUBLE -DCOMPLEX -UUSE_ALPHA $< -o $@
|
||||
|
||||
$(KDIR)csymm3m_iucopyr$(TSUFFIX).$(SUFFIX) : generic/zsymm3m_ucopy_$(SGEMM_UNROLL_M).c
|
||||
$(KDIR)csymm3m_iucopyr$(TSUFFIX).$(SUFFIX) : generic/zsymm3m_ucopy_$(CGEMM3M_UNROLL_M).c
|
||||
$(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -UDOUBLE -DCOMPLEX -UUSE_ALPHA -DREAL_ONLY $< -o $@
|
||||
|
||||
$(KDIR)csymm3m_ilcopyr$(TSUFFIX).$(SUFFIX) : generic/zsymm3m_lcopy_$(SGEMM_UNROLL_M).c
|
||||
$(KDIR)csymm3m_ilcopyr$(TSUFFIX).$(SUFFIX) : generic/zsymm3m_lcopy_$(CGEMM3M_UNROLL_M).c
|
||||
$(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -UDOUBLE -DCOMPLEX -UUSE_ALPHA -DREAL_ONLY $< -o $@
|
||||
|
||||
$(KDIR)csymm3m_iucopyi$(TSUFFIX).$(SUFFIX) : generic/zsymm3m_ucopy_$(SGEMM_UNROLL_M).c
|
||||
$(KDIR)csymm3m_iucopyi$(TSUFFIX).$(SUFFIX) : generic/zsymm3m_ucopy_$(CGEMM3M_UNROLL_M).c
|
||||
$(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -UDOUBLE -DCOMPLEX -UUSE_ALPHA -DIMAGE_ONLY $< -o $@
|
||||
|
||||
$(KDIR)csymm3m_ilcopyi$(TSUFFIX).$(SUFFIX) : generic/zsymm3m_lcopy_$(SGEMM_UNROLL_M).c
|
||||
$(KDIR)csymm3m_ilcopyi$(TSUFFIX).$(SUFFIX) : generic/zsymm3m_lcopy_$(CGEMM3M_UNROLL_M).c
|
||||
$(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -UDOUBLE -DCOMPLEX -UUSE_ALPHA -DIMAGE_ONLY $< -o $@
|
||||
|
||||
$(KDIR)zsymm3m_oucopyb$(TSUFFIX).$(SUFFIX) : generic/zsymm3m_ucopy_$(DGEMM_UNROLL_N).c
|
||||
$(KDIR)zsymm3m_oucopyb$(TSUFFIX).$(SUFFIX) : generic/zsymm3m_ucopy_$(ZGEMM3M_UNROLL_N).c
|
||||
$(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -DDOUBLE -DCOMPLEX -DUSE_ALPHA $< -o $@
|
||||
|
||||
$(KDIR)zsymm3m_olcopyb$(TSUFFIX).$(SUFFIX) : generic/zsymm3m_lcopy_$(DGEMM_UNROLL_N).c
|
||||
$(KDIR)zsymm3m_olcopyb$(TSUFFIX).$(SUFFIX) : generic/zsymm3m_lcopy_$(ZGEMM3M_UNROLL_N).c
|
||||
$(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -DDOUBLE -DCOMPLEX -DUSE_ALPHA $< -o $@
|
||||
|
||||
$(KDIR)zsymm3m_oucopyr$(TSUFFIX).$(SUFFIX) : generic/zsymm3m_ucopy_$(DGEMM_UNROLL_N).c
|
||||
$(KDIR)zsymm3m_oucopyr$(TSUFFIX).$(SUFFIX) : generic/zsymm3m_ucopy_$(ZGEMM3M_UNROLL_N).c
|
||||
$(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -DDOUBLE -DCOMPLEX -DUSE_ALPHA -DREAL_ONLY $< -o $@
|
||||
|
||||
$(KDIR)zsymm3m_olcopyr$(TSUFFIX).$(SUFFIX) : generic/zsymm3m_lcopy_$(DGEMM_UNROLL_N).c
|
||||
$(KDIR)zsymm3m_olcopyr$(TSUFFIX).$(SUFFIX) : generic/zsymm3m_lcopy_$(ZGEMM3M_UNROLL_N).c
|
||||
$(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -DDOUBLE -DCOMPLEX -DUSE_ALPHA -DREAL_ONLY $< -o $@
|
||||
|
||||
$(KDIR)zsymm3m_oucopyi$(TSUFFIX).$(SUFFIX) : generic/zsymm3m_ucopy_$(DGEMM_UNROLL_N).c
|
||||
$(KDIR)zsymm3m_oucopyi$(TSUFFIX).$(SUFFIX) : generic/zsymm3m_ucopy_$(ZGEMM3M_UNROLL_N).c
|
||||
$(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -DDOUBLE -DCOMPLEX -DUSE_ALPHA -DIMAGE_ONLY $< -o $@
|
||||
|
||||
$(KDIR)zsymm3m_olcopyi$(TSUFFIX).$(SUFFIX) : generic/zsymm3m_lcopy_$(DGEMM_UNROLL_N).c
|
||||
$(KDIR)zsymm3m_olcopyi$(TSUFFIX).$(SUFFIX) : generic/zsymm3m_lcopy_$(ZGEMM3M_UNROLL_N).c
|
||||
$(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -DDOUBLE -DCOMPLEX -DUSE_ALPHA -DIMAGE_ONLY $< -o $@
|
||||
|
||||
$(KDIR)zsymm3m_iucopyb$(TSUFFIX).$(SUFFIX) : generic/zsymm3m_ucopy_$(DGEMM_UNROLL_M).c
|
||||
$(KDIR)zsymm3m_iucopyb$(TSUFFIX).$(SUFFIX) : generic/zsymm3m_ucopy_$(ZGEMM3M_UNROLL_M).c
|
||||
$(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -DDOUBLE -DCOMPLEX -UUSE_ALPHA $< -o $@
|
||||
|
||||
$(KDIR)zsymm3m_ilcopyb$(TSUFFIX).$(SUFFIX) : generic/zsymm3m_lcopy_$(DGEMM_UNROLL_M).c
|
||||
$(KDIR)zsymm3m_ilcopyb$(TSUFFIX).$(SUFFIX) : generic/zsymm3m_lcopy_$(ZGEMM3M_UNROLL_M).c
|
||||
$(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -DDOUBLE -DCOMPLEX -UUSE_ALPHA $< -o $@
|
||||
|
||||
$(KDIR)zsymm3m_iucopyr$(TSUFFIX).$(SUFFIX) : generic/zsymm3m_ucopy_$(DGEMM_UNROLL_M).c
|
||||
$(KDIR)zsymm3m_iucopyr$(TSUFFIX).$(SUFFIX) : generic/zsymm3m_ucopy_$(ZGEMM3M_UNROLL_M).c
|
||||
$(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -DDOUBLE -DCOMPLEX -UUSE_ALPHA -DREAL_ONLY $< -o $@
|
||||
|
||||
$(KDIR)zsymm3m_ilcopyr$(TSUFFIX).$(SUFFIX) : generic/zsymm3m_lcopy_$(DGEMM_UNROLL_M).c
|
||||
$(KDIR)zsymm3m_ilcopyr$(TSUFFIX).$(SUFFIX) : generic/zsymm3m_lcopy_$(ZGEMM3M_UNROLL_M).c
|
||||
$(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -DDOUBLE -DCOMPLEX -UUSE_ALPHA -DREAL_ONLY $< -o $@
|
||||
|
||||
$(KDIR)zsymm3m_iucopyi$(TSUFFIX).$(SUFFIX) : generic/zsymm3m_ucopy_$(DGEMM_UNROLL_M).c
|
||||
$(KDIR)zsymm3m_iucopyi$(TSUFFIX).$(SUFFIX) : generic/zsymm3m_ucopy_$(ZGEMM3M_UNROLL_M).c
|
||||
$(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -DDOUBLE -DCOMPLEX -UUSE_ALPHA -DIMAGE_ONLY $< -o $@
|
||||
|
||||
$(KDIR)zsymm3m_ilcopyi$(TSUFFIX).$(SUFFIX) : generic/zsymm3m_lcopy_$(DGEMM_UNROLL_M).c
|
||||
$(KDIR)zsymm3m_ilcopyi$(TSUFFIX).$(SUFFIX) : generic/zsymm3m_lcopy_$(ZGEMM3M_UNROLL_M).c
|
||||
$(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -DDOUBLE -DCOMPLEX -UUSE_ALPHA -DIMAGE_ONLY $< -o $@
|
||||
|
||||
$(KDIR)xsymm3m_oucopyb$(TSUFFIX).$(SUFFIX) : generic/zsymm3m_ucopy_$(QGEMM_UNROLL_N).c
|
||||
$(KDIR)xsymm3m_oucopyb$(TSUFFIX).$(SUFFIX) : generic/zsymm3m_ucopy_$(XGEMM3M_UNROLL_N).c
|
||||
$(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -DXDOUBLE -DCOMPLEX -DUSE_ALPHA $< -o $@
|
||||
|
||||
$(KDIR)xsymm3m_olcopyb$(TSUFFIX).$(SUFFIX) : generic/zsymm3m_lcopy_$(QGEMM_UNROLL_N).c
|
||||
$(KDIR)xsymm3m_olcopyb$(TSUFFIX).$(SUFFIX) : generic/zsymm3m_lcopy_$(XGEMM3M_UNROLL_N).c
|
||||
$(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -DXDOUBLE -DCOMPLEX -DUSE_ALPHA $< -o $@
|
||||
|
||||
$(KDIR)xsymm3m_oucopyr$(TSUFFIX).$(SUFFIX) : generic/zsymm3m_ucopy_$(QGEMM_UNROLL_N).c
|
||||
$(KDIR)xsymm3m_oucopyr$(TSUFFIX).$(SUFFIX) : generic/zsymm3m_ucopy_$(XGEMM3M_UNROLL_N).c
|
||||
$(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -DXDOUBLE -DCOMPLEX -DUSE_ALPHA -DREAL_ONLY $< -o $@
|
||||
|
||||
$(KDIR)xsymm3m_olcopyr$(TSUFFIX).$(SUFFIX) : generic/zsymm3m_lcopy_$(QGEMM_UNROLL_N).c
|
||||
$(KDIR)xsymm3m_olcopyr$(TSUFFIX).$(SUFFIX) : generic/zsymm3m_lcopy_$(XGEMM3M_UNROLL_N).c
|
||||
$(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -DXDOUBLE -DCOMPLEX -DUSE_ALPHA -DREAL_ONLY $< -o $@
|
||||
|
||||
$(KDIR)xsymm3m_oucopyi$(TSUFFIX).$(SUFFIX) : generic/zsymm3m_ucopy_$(QGEMM_UNROLL_N).c
|
||||
$(KDIR)xsymm3m_oucopyi$(TSUFFIX).$(SUFFIX) : generic/zsymm3m_ucopy_$(XGEMM3M_UNROLL_N).c
|
||||
$(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -DXDOUBLE -DCOMPLEX -DUSE_ALPHA -DIMAGE_ONLY $< -o $@
|
||||
|
||||
$(KDIR)xsymm3m_olcopyi$(TSUFFIX).$(SUFFIX) : generic/zsymm3m_lcopy_$(QGEMM_UNROLL_N).c
|
||||
$(KDIR)xsymm3m_olcopyi$(TSUFFIX).$(SUFFIX) : generic/zsymm3m_lcopy_$(XGEMM3M_UNROLL_N).c
|
||||
$(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -DXDOUBLE -DCOMPLEX -DUSE_ALPHA -DIMAGE_ONLY $< -o $@
|
||||
|
||||
$(KDIR)xsymm3m_iucopyb$(TSUFFIX).$(SUFFIX) : generic/zsymm3m_ucopy_$(QGEMM_UNROLL_M).c
|
||||
$(KDIR)xsymm3m_iucopyb$(TSUFFIX).$(SUFFIX) : generic/zsymm3m_ucopy_$(XGEMM3M_UNROLL_M).c
|
||||
$(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -DXDOUBLE -DCOMPLEX -UUSE_ALPHA $< -o $@
|
||||
|
||||
$(KDIR)xsymm3m_ilcopyb$(TSUFFIX).$(SUFFIX) : generic/zsymm3m_lcopy_$(QGEMM_UNROLL_M).c
|
||||
$(KDIR)xsymm3m_ilcopyb$(TSUFFIX).$(SUFFIX) : generic/zsymm3m_lcopy_$(XGEMM3M_UNROLL_M).c
|
||||
$(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -DXDOUBLE -DCOMPLEX -UUSE_ALPHA $< -o $@
|
||||
|
||||
$(KDIR)xsymm3m_iucopyr$(TSUFFIX).$(SUFFIX) : generic/zsymm3m_ucopy_$(QGEMM_UNROLL_M).c
|
||||
$(KDIR)xsymm3m_iucopyr$(TSUFFIX).$(SUFFIX) : generic/zsymm3m_ucopy_$(XGEMM3M_UNROLL_M).c
|
||||
$(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -DXDOUBLE -DCOMPLEX -UUSE_ALPHA -DREAL_ONLY $< -o $@
|
||||
|
||||
$(KDIR)xsymm3m_ilcopyr$(TSUFFIX).$(SUFFIX) : generic/zsymm3m_lcopy_$(QGEMM_UNROLL_M).c
|
||||
$(KDIR)xsymm3m_ilcopyr$(TSUFFIX).$(SUFFIX) : generic/zsymm3m_lcopy_$(XGEMM3M_UNROLL_M).c
|
||||
$(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -DXDOUBLE -DCOMPLEX -UUSE_ALPHA -DREAL_ONLY $< -o $@
|
||||
|
||||
$(KDIR)xsymm3m_iucopyi$(TSUFFIX).$(SUFFIX) : generic/zsymm3m_ucopy_$(QGEMM_UNROLL_M).c
|
||||
$(KDIR)xsymm3m_iucopyi$(TSUFFIX).$(SUFFIX) : generic/zsymm3m_ucopy_$(XGEMM3M_UNROLL_M).c
|
||||
$(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -DXDOUBLE -DCOMPLEX -UUSE_ALPHA -DIMAGE_ONLY $< -o $@
|
||||
|
||||
$(KDIR)xsymm3m_ilcopyi$(TSUFFIX).$(SUFFIX) : generic/zsymm3m_lcopy_$(QGEMM_UNROLL_M).c
|
||||
$(KDIR)xsymm3m_ilcopyi$(TSUFFIX).$(SUFFIX) : generic/zsymm3m_lcopy_$(XGEMM3M_UNROLL_M).c
|
||||
$(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -DXDOUBLE -DCOMPLEX -UUSE_ALPHA -DIMAGE_ONLY $< -o $@
|
||||
|
||||
$(KDIR)chemm3m_oucopyb$(TSUFFIX).$(SUFFIX) : generic/zhemm3m_ucopy_$(SGEMM_UNROLL_N).c
|
||||
$(KDIR)chemm3m_oucopyb$(TSUFFIX).$(SUFFIX) : generic/zhemm3m_ucopy_$(CGEMM3M_UNROLL_N).c
|
||||
$(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -UDOUBLE -DCOMPLEX -DUSE_ALPHA $< -o $@
|
||||
|
||||
$(KDIR)chemm3m_olcopyb$(TSUFFIX).$(SUFFIX) : generic/zhemm3m_lcopy_$(SGEMM_UNROLL_N).c
|
||||
$(KDIR)chemm3m_olcopyb$(TSUFFIX).$(SUFFIX) : generic/zhemm3m_lcopy_$(CGEMM3M_UNROLL_N).c
|
||||
$(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -UDOUBLE -DCOMPLEX -DUSE_ALPHA $< -o $@
|
||||
|
||||
$(KDIR)chemm3m_oucopyr$(TSUFFIX).$(SUFFIX) : generic/zhemm3m_ucopy_$(SGEMM_UNROLL_N).c
|
||||
$(KDIR)chemm3m_oucopyr$(TSUFFIX).$(SUFFIX) : generic/zhemm3m_ucopy_$(CGEMM3M_UNROLL_N).c
|
||||
$(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -UDOUBLE -DCOMPLEX -DUSE_ALPHA -DREAL_ONLY $< -o $@
|
||||
|
||||
$(KDIR)chemm3m_olcopyr$(TSUFFIX).$(SUFFIX) : generic/zhemm3m_lcopy_$(SGEMM_UNROLL_N).c
|
||||
$(KDIR)chemm3m_olcopyr$(TSUFFIX).$(SUFFIX) : generic/zhemm3m_lcopy_$(CGEMM3M_UNROLL_N).c
|
||||
$(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -UDOUBLE -DCOMPLEX -DUSE_ALPHA -DREAL_ONLY $< -o $@
|
||||
|
||||
$(KDIR)chemm3m_oucopyi$(TSUFFIX).$(SUFFIX) : generic/zhemm3m_ucopy_$(SGEMM_UNROLL_N).c
|
||||
$(KDIR)chemm3m_oucopyi$(TSUFFIX).$(SUFFIX) : generic/zhemm3m_ucopy_$(CGEMM3M_UNROLL_N).c
|
||||
$(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -UDOUBLE -DCOMPLEX -DUSE_ALPHA -DIMAGE_ONLY $< -o $@
|
||||
|
||||
$(KDIR)chemm3m_olcopyi$(TSUFFIX).$(SUFFIX) : generic/zhemm3m_lcopy_$(SGEMM_UNROLL_N).c
|
||||
$(KDIR)chemm3m_olcopyi$(TSUFFIX).$(SUFFIX) : generic/zhemm3m_lcopy_$(CGEMM3M_UNROLL_N).c
|
||||
$(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -UDOUBLE -DCOMPLEX -DUSE_ALPHA -DIMAGE_ONLY $< -o $@
|
||||
|
||||
$(KDIR)chemm3m_iucopyb$(TSUFFIX).$(SUFFIX) : generic/zhemm3m_ucopy_$(SGEMM_UNROLL_M).c
|
||||
$(KDIR)chemm3m_iucopyb$(TSUFFIX).$(SUFFIX) : generic/zhemm3m_ucopy_$(CGEMM3M_UNROLL_M).c
|
||||
$(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -UDOUBLE -DCOMPLEX -UUSE_ALPHA $< -o $@
|
||||
|
||||
$(KDIR)chemm3m_ilcopyb$(TSUFFIX).$(SUFFIX) : generic/zhemm3m_lcopy_$(SGEMM_UNROLL_M).c
|
||||
$(KDIR)chemm3m_ilcopyb$(TSUFFIX).$(SUFFIX) : generic/zhemm3m_lcopy_$(CGEMM3M_UNROLL_M).c
|
||||
$(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -UDOUBLE -DCOMPLEX -UUSE_ALPHA $< -o $@
|
||||
|
||||
$(KDIR)chemm3m_iucopyr$(TSUFFIX).$(SUFFIX) : generic/zhemm3m_ucopy_$(SGEMM_UNROLL_M).c
|
||||
$(KDIR)chemm3m_iucopyr$(TSUFFIX).$(SUFFIX) : generic/zhemm3m_ucopy_$(CGEMM3M_UNROLL_M).c
|
||||
$(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -UDOUBLE -DCOMPLEX -UUSE_ALPHA -DREAL_ONLY $< -o $@
|
||||
|
||||
$(KDIR)chemm3m_ilcopyr$(TSUFFIX).$(SUFFIX) : generic/zhemm3m_lcopy_$(SGEMM_UNROLL_M).c
|
||||
$(KDIR)chemm3m_ilcopyr$(TSUFFIX).$(SUFFIX) : generic/zhemm3m_lcopy_$(CGEMM3M_UNROLL_M).c
|
||||
$(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -UDOUBLE -DCOMPLEX -UUSE_ALPHA -DREAL_ONLY $< -o $@
|
||||
|
||||
$(KDIR)chemm3m_iucopyi$(TSUFFIX).$(SUFFIX) : generic/zhemm3m_ucopy_$(SGEMM_UNROLL_M).c
|
||||
$(KDIR)chemm3m_iucopyi$(TSUFFIX).$(SUFFIX) : generic/zhemm3m_ucopy_$(CGEMM3M_UNROLL_M).c
|
||||
$(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -UDOUBLE -DCOMPLEX -UUSE_ALPHA -DIMAGE_ONLY $< -o $@
|
||||
|
||||
$(KDIR)chemm3m_ilcopyi$(TSUFFIX).$(SUFFIX) : generic/zhemm3m_lcopy_$(SGEMM_UNROLL_M).c
|
||||
$(KDIR)chemm3m_ilcopyi$(TSUFFIX).$(SUFFIX) : generic/zhemm3m_lcopy_$(CGEMM3M_UNROLL_M).c
|
||||
$(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -UDOUBLE -DCOMPLEX -UUSE_ALPHA -DIMAGE_ONLY $< -o $@
|
||||
|
||||
$(KDIR)zhemm3m_oucopyb$(TSUFFIX).$(SUFFIX) : generic/zhemm3m_ucopy_$(DGEMM_UNROLL_N).c
|
||||
$(KDIR)zhemm3m_oucopyb$(TSUFFIX).$(SUFFIX) : generic/zhemm3m_ucopy_$(ZGEMM3M_UNROLL_N).c
|
||||
$(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -DDOUBLE -DCOMPLEX -DUSE_ALPHA $< -o $@
|
||||
|
||||
$(KDIR)zhemm3m_olcopyb$(TSUFFIX).$(SUFFIX) : generic/zhemm3m_lcopy_$(DGEMM_UNROLL_N).c
|
||||
$(KDIR)zhemm3m_olcopyb$(TSUFFIX).$(SUFFIX) : generic/zhemm3m_lcopy_$(ZGEMM3M_UNROLL_N).c
|
||||
$(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -DDOUBLE -DCOMPLEX -DUSE_ALPHA $< -o $@
|
||||
|
||||
$(KDIR)zhemm3m_oucopyr$(TSUFFIX).$(SUFFIX) : generic/zhemm3m_ucopy_$(DGEMM_UNROLL_N).c
|
||||
$(KDIR)zhemm3m_oucopyr$(TSUFFIX).$(SUFFIX) : generic/zhemm3m_ucopy_$(ZGEMM3M_UNROLL_N).c
|
||||
$(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -DDOUBLE -DCOMPLEX -DUSE_ALPHA -DREAL_ONLY $< -o $@
|
||||
|
||||
$(KDIR)zhemm3m_olcopyr$(TSUFFIX).$(SUFFIX) : generic/zhemm3m_lcopy_$(DGEMM_UNROLL_N).c
|
||||
$(KDIR)zhemm3m_olcopyr$(TSUFFIX).$(SUFFIX) : generic/zhemm3m_lcopy_$(ZGEMM3M_UNROLL_N).c
|
||||
$(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -DDOUBLE -DCOMPLEX -DUSE_ALPHA -DREAL_ONLY $< -o $@
|
||||
|
||||
$(KDIR)zhemm3m_oucopyi$(TSUFFIX).$(SUFFIX) : generic/zhemm3m_ucopy_$(DGEMM_UNROLL_N).c
|
||||
$(KDIR)zhemm3m_oucopyi$(TSUFFIX).$(SUFFIX) : generic/zhemm3m_ucopy_$(ZGEMM3M_UNROLL_N).c
|
||||
$(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -DDOUBLE -DCOMPLEX -DUSE_ALPHA -DIMAGE_ONLY $< -o $@
|
||||
|
||||
$(KDIR)zhemm3m_olcopyi$(TSUFFIX).$(SUFFIX) : generic/zhemm3m_lcopy_$(DGEMM_UNROLL_N).c
|
||||
$(KDIR)zhemm3m_olcopyi$(TSUFFIX).$(SUFFIX) : generic/zhemm3m_lcopy_$(ZGEMM3M_UNROLL_N).c
|
||||
$(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -DDOUBLE -DCOMPLEX -DUSE_ALPHA -DIMAGE_ONLY $< -o $@
|
||||
|
||||
$(KDIR)zhemm3m_iucopyb$(TSUFFIX).$(SUFFIX) : generic/zhemm3m_ucopy_$(DGEMM_UNROLL_M).c
|
||||
$(KDIR)zhemm3m_iucopyb$(TSUFFIX).$(SUFFIX) : generic/zhemm3m_ucopy_$(ZGEMM3M_UNROLL_M).c
|
||||
$(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -DDOUBLE -DCOMPLEX -UUSE_ALPHA $< -o $@
|
||||
|
||||
$(KDIR)zhemm3m_ilcopyb$(TSUFFIX).$(SUFFIX) : generic/zhemm3m_lcopy_$(DGEMM_UNROLL_M).c
|
||||
$(KDIR)zhemm3m_ilcopyb$(TSUFFIX).$(SUFFIX) : generic/zhemm3m_lcopy_$(ZGEMM3M_UNROLL_M).c
|
||||
$(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -DDOUBLE -DCOMPLEX -UUSE_ALPHA $< -o $@
|
||||
|
||||
$(KDIR)zhemm3m_iucopyr$(TSUFFIX).$(SUFFIX) : generic/zhemm3m_ucopy_$(DGEMM_UNROLL_M).c
|
||||
$(KDIR)zhemm3m_iucopyr$(TSUFFIX).$(SUFFIX) : generic/zhemm3m_ucopy_$(ZGEMM3M_UNROLL_M).c
|
||||
$(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -DDOUBLE -DCOMPLEX -UUSE_ALPHA -DREAL_ONLY $< -o $@
|
||||
|
||||
$(KDIR)zhemm3m_ilcopyr$(TSUFFIX).$(SUFFIX) : generic/zhemm3m_lcopy_$(DGEMM_UNROLL_M).c
|
||||
$(KDIR)zhemm3m_ilcopyr$(TSUFFIX).$(SUFFIX) : generic/zhemm3m_lcopy_$(ZGEMM3M_UNROLL_M).c
|
||||
$(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -DDOUBLE -DCOMPLEX -UUSE_ALPHA -DREAL_ONLY $< -o $@
|
||||
|
||||
$(KDIR)zhemm3m_iucopyi$(TSUFFIX).$(SUFFIX) : generic/zhemm3m_ucopy_$(DGEMM_UNROLL_M).c
|
||||
$(KDIR)zhemm3m_iucopyi$(TSUFFIX).$(SUFFIX) : generic/zhemm3m_ucopy_$(ZGEMM3M_UNROLL_M).c
|
||||
$(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -DDOUBLE -DCOMPLEX -UUSE_ALPHA -DIMAGE_ONLY $< -o $@
|
||||
|
||||
$(KDIR)zhemm3m_ilcopyi$(TSUFFIX).$(SUFFIX) : generic/zhemm3m_lcopy_$(DGEMM_UNROLL_M).c
|
||||
$(KDIR)zhemm3m_ilcopyi$(TSUFFIX).$(SUFFIX) : generic/zhemm3m_lcopy_$(ZGEMM3M_UNROLL_M).c
|
||||
$(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -DDOUBLE -DCOMPLEX -UUSE_ALPHA -DIMAGE_ONLY $< -o $@
|
||||
|
||||
$(KDIR)xhemm3m_oucopyb$(TSUFFIX).$(SUFFIX) : generic/zhemm3m_ucopy_$(QGEMM_UNROLL_N).c
|
||||
$(KDIR)xhemm3m_oucopyb$(TSUFFIX).$(SUFFIX) : generic/zhemm3m_ucopy_$(XGEMM3M_UNROLL_N).c
|
||||
$(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -DXDOUBLE -DCOMPLEX -DUSE_ALPHA $< -o $@
|
||||
|
||||
$(KDIR)xhemm3m_olcopyb$(TSUFFIX).$(SUFFIX) : generic/zhemm3m_lcopy_$(QGEMM_UNROLL_N).c
|
||||
$(KDIR)xhemm3m_olcopyb$(TSUFFIX).$(SUFFIX) : generic/zhemm3m_lcopy_$(XGEMM3M_UNROLL_N).c
|
||||
$(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -DXDOUBLE -DCOMPLEX -DUSE_ALPHA $< -o $@
|
||||
|
||||
$(KDIR)xhemm3m_oucopyr$(TSUFFIX).$(SUFFIX) : generic/zhemm3m_ucopy_$(QGEMM_UNROLL_N).c
|
||||
$(KDIR)xhemm3m_oucopyr$(TSUFFIX).$(SUFFIX) : generic/zhemm3m_ucopy_$(XGEMM3M_UNROLL_N).c
|
||||
$(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -DXDOUBLE -DCOMPLEX -DUSE_ALPHA -DREAL_ONLY $< -o $@
|
||||
|
||||
$(KDIR)xhemm3m_olcopyr$(TSUFFIX).$(SUFFIX) : generic/zhemm3m_lcopy_$(QGEMM_UNROLL_N).c
|
||||
$(KDIR)xhemm3m_olcopyr$(TSUFFIX).$(SUFFIX) : generic/zhemm3m_lcopy_$(XGEMM3M_UNROLL_N).c
|
||||
$(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -DXDOUBLE -DCOMPLEX -DUSE_ALPHA -DREAL_ONLY $< -o $@
|
||||
|
||||
$(KDIR)xhemm3m_oucopyi$(TSUFFIX).$(SUFFIX) : generic/zhemm3m_ucopy_$(QGEMM_UNROLL_N).c
|
||||
$(KDIR)xhemm3m_oucopyi$(TSUFFIX).$(SUFFIX) : generic/zhemm3m_ucopy_$(XGEMM3M_UNROLL_N).c
|
||||
$(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -DXDOUBLE -DCOMPLEX -DUSE_ALPHA -DIMAGE_ONLY $< -o $@
|
||||
|
||||
$(KDIR)xhemm3m_olcopyi$(TSUFFIX).$(SUFFIX) : generic/zhemm3m_lcopy_$(QGEMM_UNROLL_N).c
|
||||
$(KDIR)xhemm3m_olcopyi$(TSUFFIX).$(SUFFIX) : generic/zhemm3m_lcopy_$(XGEMM3M_UNROLL_N).c
|
||||
$(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -DXDOUBLE -DCOMPLEX -DUSE_ALPHA -DIMAGE_ONLY $< -o $@
|
||||
|
||||
$(KDIR)xhemm3m_iucopyb$(TSUFFIX).$(SUFFIX) : generic/zhemm3m_ucopy_$(QGEMM_UNROLL_M).c
|
||||
$(KDIR)xhemm3m_iucopyb$(TSUFFIX).$(SUFFIX) : generic/zhemm3m_ucopy_$(XGEMM3M_UNROLL_M).c
|
||||
$(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -DXDOUBLE -DCOMPLEX -UUSE_ALPHA $< -o $@
|
||||
|
||||
$(KDIR)xhemm3m_ilcopyb$(TSUFFIX).$(SUFFIX) : generic/zhemm3m_lcopy_$(QGEMM_UNROLL_M).c
|
||||
$(KDIR)xhemm3m_ilcopyb$(TSUFFIX).$(SUFFIX) : generic/zhemm3m_lcopy_$(XGEMM3M_UNROLL_M).c
|
||||
$(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -DXDOUBLE -DCOMPLEX -UUSE_ALPHA $< -o $@
|
||||
|
||||
$(KDIR)xhemm3m_iucopyr$(TSUFFIX).$(SUFFIX) : generic/zhemm3m_ucopy_$(QGEMM_UNROLL_M).c
|
||||
$(KDIR)xhemm3m_iucopyr$(TSUFFIX).$(SUFFIX) : generic/zhemm3m_ucopy_$(XGEMM3M_UNROLL_M).c
|
||||
$(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -DXDOUBLE -DCOMPLEX -UUSE_ALPHA -DREAL_ONLY $< -o $@
|
||||
|
||||
$(KDIR)xhemm3m_ilcopyr$(TSUFFIX).$(SUFFIX) : generic/zhemm3m_lcopy_$(QGEMM_UNROLL_M).c
|
||||
$(KDIR)xhemm3m_ilcopyr$(TSUFFIX).$(SUFFIX) : generic/zhemm3m_lcopy_$(XGEMM3M_UNROLL_M).c
|
||||
$(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -DXDOUBLE -DCOMPLEX -UUSE_ALPHA -DREAL_ONLY $< -o $@
|
||||
|
||||
$(KDIR)xhemm3m_iucopyi$(TSUFFIX).$(SUFFIX) : generic/zhemm3m_ucopy_$(QGEMM_UNROLL_M).c
|
||||
$(KDIR)xhemm3m_iucopyi$(TSUFFIX).$(SUFFIX) : generic/zhemm3m_ucopy_$(XGEMM3M_UNROLL_M).c
|
||||
$(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -DXDOUBLE -DCOMPLEX -UUSE_ALPHA -DIMAGE_ONLY $< -o $@
|
||||
|
||||
$(KDIR)xhemm3m_ilcopyi$(TSUFFIX).$(SUFFIX) : generic/zhemm3m_lcopy_$(QGEMM_UNROLL_M).c
|
||||
$(KDIR)xhemm3m_ilcopyi$(TSUFFIX).$(SUFFIX) : generic/zhemm3m_lcopy_$(XGEMM3M_UNROLL_M).c
|
||||
$(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -DXDOUBLE -DCOMPLEX -UUSE_ALPHA -DIMAGE_ONLY $< -o $@
|
||||
|
||||
$(KDIR)strsm_iunucopy$(TSUFFIX).$(SUFFIX) : generic/trsm_uncopy_$(SGEMM_UNROLL_M).c
|
||||
@@ -2522,328 +2626,328 @@ $(KDIR)xhemm_iutcopy$(TSUFFIX).$(PSUFFIX) : generic/zhemm_utcopy_$(XGEMM_UNROLL_
|
||||
$(KDIR)xhemm_iltcopy$(TSUFFIX).$(PSUFFIX) : generic/zhemm_ltcopy_$(XGEMM_UNROLL_M).c
|
||||
$(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -DXDOUBLE -DCOMPLEX -UOUTER $< -DLOWER -o $@
|
||||
|
||||
$(KDIR)cgemm3m_oncopyb$(TSUFFIX).$(PSUFFIX) : generic/zgemm3m_ncopy_$(SGEMM_UNROLL_N).c
|
||||
$(KDIR)cgemm3m_oncopyb$(TSUFFIX).$(PSUFFIX) : generic/zgemm3m_ncopy_$(CGEMM3M_UNROLL_N).c
|
||||
$(CC) $(PFLAGS) -c -UDOUBLE -DCOMPLEX -DUSE_ALPHA $< -o $@
|
||||
|
||||
$(KDIR)cgemm3m_oncopyr$(TSUFFIX).$(PSUFFIX) : generic/zgemm3m_ncopy_$(SGEMM_UNROLL_N).c
|
||||
$(KDIR)cgemm3m_oncopyr$(TSUFFIX).$(PSUFFIX) : generic/zgemm3m_ncopy_$(CGEMM3M_UNROLL_N).c
|
||||
$(CC) $(PFLAGS) -c -UDOUBLE -DCOMPLEX -DUSE_ALPHA -DREAL_ONLY $< -o $@
|
||||
|
||||
$(KDIR)cgemm3m_oncopyi$(TSUFFIX).$(PSUFFIX) : generic/zgemm3m_ncopy_$(SGEMM_UNROLL_N).c
|
||||
$(KDIR)cgemm3m_oncopyi$(TSUFFIX).$(PSUFFIX) : generic/zgemm3m_ncopy_$(CGEMM3M_UNROLL_N).c
|
||||
$(CC) $(PFLAGS) -c -UDOUBLE -DCOMPLEX -DUSE_ALPHA -DIMAGE_ONLY $< -o $@
|
||||
|
||||
$(KDIR)cgemm3m_otcopyb$(TSUFFIX).$(PSUFFIX) : generic/zgemm3m_tcopy_$(SGEMM_UNROLL_N).c
|
||||
$(KDIR)cgemm3m_otcopyb$(TSUFFIX).$(PSUFFIX) : generic/zgemm3m_tcopy_$(CGEMM3M_UNROLL_N).c
|
||||
$(CC) $(PFLAGS) -c -UDOUBLE -DCOMPLEX -DUSE_ALPHA $< -o $@
|
||||
|
||||
$(KDIR)cgemm3m_otcopyr$(TSUFFIX).$(PSUFFIX) : generic/zgemm3m_tcopy_$(SGEMM_UNROLL_N).c
|
||||
$(KDIR)cgemm3m_otcopyr$(TSUFFIX).$(PSUFFIX) : generic/zgemm3m_tcopy_$(CGEMM3M_UNROLL_N).c
|
||||
$(CC) $(PFLAGS) -c -UDOUBLE -DCOMPLEX -DUSE_ALPHA -DREAL_ONLY $< -o $@
|
||||
|
||||
$(KDIR)cgemm3m_otcopyi$(TSUFFIX).$(PSUFFIX) : generic/zgemm3m_tcopy_$(SGEMM_UNROLL_N).c
|
||||
$(KDIR)cgemm3m_otcopyi$(TSUFFIX).$(PSUFFIX) : generic/zgemm3m_tcopy_$(CGEMM3M_UNROLL_N).c
|
||||
$(CC) $(PFLAGS) -c -UDOUBLE -DCOMPLEX -DUSE_ALPHA -DIMAGE_ONLY $< -o $@
|
||||
|
||||
$(KDIR)cgemm3m_incopyb$(TSUFFIX).$(PSUFFIX) : generic/zgemm3m_ncopy_$(SGEMM_UNROLL_M).c
|
||||
$(KDIR)cgemm3m_incopyb$(TSUFFIX).$(PSUFFIX) : generic/zgemm3m_ncopy_$(CGEMM3M_UNROLL_M).c
|
||||
$(CC) $(PFLAGS) -c -UDOUBLE -DCOMPLEX -DICOPY -UUSE_ALPHA $< -o $@
|
||||
|
||||
$(KDIR)cgemm3m_incopyr$(TSUFFIX).$(PSUFFIX) : generic/zgemm3m_ncopy_$(SGEMM_UNROLL_M).c
|
||||
$(KDIR)cgemm3m_incopyr$(TSUFFIX).$(PSUFFIX) : generic/zgemm3m_ncopy_$(CGEMM3M_UNROLL_M).c
|
||||
$(CC) $(PFLAGS) -c -UDOUBLE -DCOMPLEX -DICOPY -UUSE_ALPHA -DREAL_ONLY $< -o $@
|
||||
|
||||
$(KDIR)cgemm3m_incopyi$(TSUFFIX).$(PSUFFIX) : generic/zgemm3m_ncopy_$(SGEMM_UNROLL_M).c
|
||||
$(KDIR)cgemm3m_incopyi$(TSUFFIX).$(PSUFFIX) : generic/zgemm3m_ncopy_$(CGEMM3M_UNROLL_M).c
|
||||
$(CC) $(PFLAGS) -c -UDOUBLE -DCOMPLEX -DICOPY -UUSE_ALPHA -DIMAGE_ONLY $< -o $@
|
||||
|
||||
$(KDIR)cgemm3m_itcopyb$(TSUFFIX).$(PSUFFIX) : generic/zgemm3m_tcopy_$(SGEMM_UNROLL_M).c
|
||||
$(KDIR)cgemm3m_itcopyb$(TSUFFIX).$(PSUFFIX) : generic/zgemm3m_tcopy_$(CGEMM3M_UNROLL_M).c
|
||||
$(CC) $(PFLAGS) -c -UDOUBLE -DCOMPLEX -DICOPY -UUSE_ALPHA $< -o $@
|
||||
|
||||
$(KDIR)cgemm3m_itcopyr$(TSUFFIX).$(PSUFFIX) : generic/zgemm3m_tcopy_$(SGEMM_UNROLL_M).c
|
||||
$(KDIR)cgemm3m_itcopyr$(TSUFFIX).$(PSUFFIX) : generic/zgemm3m_tcopy_$(CGEMM3M_UNROLL_M).c
|
||||
$(CC) $(PFLAGS) -c -UDOUBLE -DCOMPLEX -DICOPY -UUSE_ALPHA -DREAL_ONLY $< -o $@
|
||||
|
||||
$(KDIR)cgemm3m_itcopyi$(TSUFFIX).$(PSUFFIX) : generic/zgemm3m_tcopy_$(SGEMM_UNROLL_M).c
|
||||
$(KDIR)cgemm3m_itcopyi$(TSUFFIX).$(PSUFFIX) : generic/zgemm3m_tcopy_$(CGEMM3M_UNROLL_M).c
|
||||
$(CC) $(PFLAGS) -c -UDOUBLE -DCOMPLEX -DICOPY -UUSE_ALPHA -DIMAGE_ONLY $< -o $@
|
||||
|
||||
$(KDIR)zgemm3m_oncopyb$(TSUFFIX).$(PSUFFIX) : generic/zgemm3m_ncopy_$(DGEMM_UNROLL_N).c
|
||||
$(KDIR)zgemm3m_oncopyb$(TSUFFIX).$(PSUFFIX) : generic/zgemm3m_ncopy_$(ZGEMM3M_UNROLL_N).c
|
||||
$(CC) $(PFLAGS) -c -DDOUBLE -DCOMPLEX -DUSE_ALPHA $< -o $@
|
||||
|
||||
$(KDIR)zgemm3m_oncopyr$(TSUFFIX).$(PSUFFIX) : generic/zgemm3m_ncopy_$(DGEMM_UNROLL_N).c
|
||||
$(KDIR)zgemm3m_oncopyr$(TSUFFIX).$(PSUFFIX) : generic/zgemm3m_ncopy_$(ZGEMM3M_UNROLL_N).c
|
||||
$(CC) $(PFLAGS) -c -DDOUBLE -DCOMPLEX -DUSE_ALPHA -DREAL_ONLY $< -o $@
|
||||
|
||||
$(KDIR)zgemm3m_oncopyi$(TSUFFIX).$(PSUFFIX) : generic/zgemm3m_ncopy_$(DGEMM_UNROLL_N).c
|
||||
$(KDIR)zgemm3m_oncopyi$(TSUFFIX).$(PSUFFIX) : generic/zgemm3m_ncopy_$(ZGEMM3M_UNROLL_N).c
|
||||
$(CC) $(PFLAGS) -c -DDOUBLE -DCOMPLEX -DUSE_ALPHA -DIMAGE_ONLY $< -o $@
|
||||
|
||||
$(KDIR)zgemm3m_otcopyb$(TSUFFIX).$(PSUFFIX) : generic/zgemm3m_tcopy_$(DGEMM_UNROLL_N).c
|
||||
$(KDIR)zgemm3m_otcopyb$(TSUFFIX).$(PSUFFIX) : generic/zgemm3m_tcopy_$(ZGEMM3M_UNROLL_N).c
|
||||
$(CC) $(PFLAGS) -c -DDOUBLE -DCOMPLEX -DUSE_ALPHA $< -o $@
|
||||
|
||||
$(KDIR)zgemm3m_otcopyr$(TSUFFIX).$(PSUFFIX) : generic/zgemm3m_tcopy_$(DGEMM_UNROLL_N).c
|
||||
$(KDIR)zgemm3m_otcopyr$(TSUFFIX).$(PSUFFIX) : generic/zgemm3m_tcopy_$(ZGEMM3M_UNROLL_N).c
|
||||
$(CC) $(PFLAGS) -c -DDOUBLE -DCOMPLEX -DUSE_ALPHA -DREAL_ONLY $< -o $@
|
||||
|
||||
$(KDIR)zgemm3m_otcopyi$(TSUFFIX).$(PSUFFIX) : generic/zgemm3m_tcopy_$(DGEMM_UNROLL_N).c
|
||||
$(KDIR)zgemm3m_otcopyi$(TSUFFIX).$(PSUFFIX) : generic/zgemm3m_tcopy_$(ZGEMM3M_UNROLL_N).c
|
||||
$(CC) $(PFLAGS) -c -DDOUBLE -DCOMPLEX -DUSE_ALPHA -DIMAGE_ONLY $< -o $@
|
||||
|
||||
$(KDIR)zgemm3m_incopyb$(TSUFFIX).$(PSUFFIX) : generic/zgemm3m_ncopy_$(DGEMM_UNROLL_M).c
|
||||
$(KDIR)zgemm3m_incopyb$(TSUFFIX).$(PSUFFIX) : generic/zgemm3m_ncopy_$(ZGEMM3M_UNROLL_M).c
|
||||
$(CC) $(PFLAGS) -c -DDOUBLE -DCOMPLEX -DICOPY -UUSE_ALPHA $< -o $@
|
||||
|
||||
$(KDIR)zgemm3m_incopyr$(TSUFFIX).$(PSUFFIX) : generic/zgemm3m_ncopy_$(DGEMM_UNROLL_M).c
|
||||
$(KDIR)zgemm3m_incopyr$(TSUFFIX).$(PSUFFIX) : generic/zgemm3m_ncopy_$(ZGEMM3M_UNROLL_M).c
|
||||
$(CC) $(PFLAGS) -c -DDOUBLE -DCOMPLEX -DICOPY -UUSE_ALPHA -DREAL_ONLY $< -o $@
|
||||
|
||||
$(KDIR)zgemm3m_incopyi$(TSUFFIX).$(PSUFFIX) : generic/zgemm3m_ncopy_$(DGEMM_UNROLL_M).c
|
||||
$(KDIR)zgemm3m_incopyi$(TSUFFIX).$(PSUFFIX) : generic/zgemm3m_ncopy_$(ZGEMM3M_UNROLL_M).c
|
||||
$(CC) $(PFLAGS) -c -DDOUBLE -DCOMPLEX -DICOPY -UUSE_ALPHA -DIMAGE_ONLY $< -o $@
|
||||
|
||||
$(KDIR)zgemm3m_itcopyb$(TSUFFIX).$(PSUFFIX) : generic/zgemm3m_tcopy_$(DGEMM_UNROLL_M).c
|
||||
$(KDIR)zgemm3m_itcopyb$(TSUFFIX).$(PSUFFIX) : generic/zgemm3m_tcopy_$(ZGEMM3M_UNROLL_M).c
|
||||
$(CC) $(PFLAGS) -c -DDOUBLE -DCOMPLEX -DICOPY -UUSE_ALPHA $< -o $@
|
||||
|
||||
$(KDIR)zgemm3m_itcopyr$(TSUFFIX).$(PSUFFIX) : generic/zgemm3m_tcopy_$(DGEMM_UNROLL_M).c
|
||||
$(KDIR)zgemm3m_itcopyr$(TSUFFIX).$(PSUFFIX) : generic/zgemm3m_tcopy_$(ZGEMM3M_UNROLL_M).c
|
||||
$(CC) $(PFLAGS) -c -DDOUBLE -DCOMPLEX -DICOPY -UUSE_ALPHA -DREAL_ONLY $< -o $@
|
||||
|
||||
$(KDIR)zgemm3m_itcopyi$(TSUFFIX).$(PSUFFIX) : generic/zgemm3m_tcopy_$(DGEMM_UNROLL_M).c
|
||||
$(KDIR)zgemm3m_itcopyi$(TSUFFIX).$(PSUFFIX) : generic/zgemm3m_tcopy_$(ZGEMM3M_UNROLL_M).c
|
||||
$(CC) $(PFLAGS) -c -DDOUBLE -DCOMPLEX -DICOPY -UUSE_ALPHA -DIMAGE_ONLY $< -o $@
|
||||
|
||||
$(KDIR)xgemm3m_oncopyb$(TSUFFIX).$(PSUFFIX) : generic/zgemm3m_ncopy_$(QGEMM_UNROLL_N).c
|
||||
$(KDIR)xgemm3m_oncopyb$(TSUFFIX).$(PSUFFIX) : generic/zgemm3m_ncopy_$(XGEMM3M_UNROLL_N).c
|
||||
$(CC) $(PFLAGS) -c -DXDOUBLE -DCOMPLEX -DUSE_ALPHA $< -o $@
|
||||
|
||||
$(KDIR)xgemm3m_oncopyr$(TSUFFIX).$(PSUFFIX) : generic/zgemm3m_ncopy_$(QGEMM_UNROLL_N).c
|
||||
$(KDIR)xgemm3m_oncopyr$(TSUFFIX).$(PSUFFIX) : generic/zgemm3m_ncopy_$(XGEMM3M_UNROLL_N).c
|
||||
$(CC) $(PFLAGS) -c -DXDOUBLE -DCOMPLEX -DUSE_ALPHA -DREAL_ONLY $< -o $@
|
||||
|
||||
$(KDIR)xgemm3m_oncopyi$(TSUFFIX).$(PSUFFIX) : generic/zgemm3m_ncopy_$(QGEMM_UNROLL_N).c
|
||||
$(KDIR)xgemm3m_oncopyi$(TSUFFIX).$(PSUFFIX) : generic/zgemm3m_ncopy_$(XGEMM3M_UNROLL_N).c
|
||||
$(CC) $(PFLAGS) -c -DXDOUBLE -DCOMPLEX -DUSE_ALPHA -DIMAGE_ONLY $< -o $@
|
||||
|
||||
$(KDIR)xgemm3m_otcopyb$(TSUFFIX).$(PSUFFIX) : generic/zgemm3m_tcopy_$(QGEMM_UNROLL_N).c
|
||||
$(KDIR)xgemm3m_otcopyb$(TSUFFIX).$(PSUFFIX) : generic/zgemm3m_tcopy_$(XGEMM3M_UNROLL_N).c
|
||||
$(CC) $(PFLAGS) -c -DXDOUBLE -DCOMPLEX -DUSE_ALPHA $< -o $@
|
||||
|
||||
$(KDIR)xgemm3m_otcopyr$(TSUFFIX).$(PSUFFIX) : generic/zgemm3m_tcopy_$(QGEMM_UNROLL_N).c
|
||||
$(KDIR)xgemm3m_otcopyr$(TSUFFIX).$(PSUFFIX) : generic/zgemm3m_tcopy_$(XGEMM3M_UNROLL_N).c
|
||||
$(CC) $(PFLAGS) -c -DXDOUBLE -DCOMPLEX -DUSE_ALPHA -DREAL_ONLY $< -o $@
|
||||
|
||||
$(KDIR)xgemm3m_otcopyi$(TSUFFIX).$(PSUFFIX) : generic/zgemm3m_tcopy_$(QGEMM_UNROLL_N).c
|
||||
$(KDIR)xgemm3m_otcopyi$(TSUFFIX).$(PSUFFIX) : generic/zgemm3m_tcopy_$(XGEMM3M_UNROLL_N).c
|
||||
$(CC) $(PFLAGS) -c -DXDOUBLE -DCOMPLEX -DUSE_ALPHA -DIMAGE_ONLY $< -o $@
|
||||
|
||||
$(KDIR)xgemm3m_incopyb$(TSUFFIX).$(PSUFFIX) : generic/zgemm3m_ncopy_$(QGEMM_UNROLL_M).c
|
||||
$(KDIR)xgemm3m_incopyb$(TSUFFIX).$(PSUFFIX) : generic/zgemm3m_ncopy_$(XGEMM3M_UNROLL_M).c
|
||||
$(CC) $(PFLAGS) -c -DXDOUBLE -DCOMPLEX -DICOPY -UUSE_ALPHA $< -o $@
|
||||
|
||||
$(KDIR)xgemm3m_incopyr$(TSUFFIX).$(PSUFFIX) : generic/zgemm3m_ncopy_$(QGEMM_UNROLL_M).c
|
||||
$(KDIR)xgemm3m_incopyr$(TSUFFIX).$(PSUFFIX) : generic/zgemm3m_ncopy_$(XGEMM3M_UNROLL_M).c
|
||||
$(CC) $(PFLAGS) -c -DXDOUBLE -DCOMPLEX -DICOPY -UUSE_ALPHA -DREAL_ONLY $< -o $@
|
||||
|
||||
$(KDIR)xgemm3m_incopyi$(TSUFFIX).$(PSUFFIX) : generic/zgemm3m_ncopy_$(QGEMM_UNROLL_M).c
|
||||
$(KDIR)xgemm3m_incopyi$(TSUFFIX).$(PSUFFIX) : generic/zgemm3m_ncopy_$(XGEMM3M_UNROLL_M).c
|
||||
$(CC) $(PFLAGS) -c -DXDOUBLE -DCOMPLEX -DICOPY -UUSE_ALPHA -DIMAGE_ONLY $< -o $@
|
||||
|
||||
$(KDIR)xgemm3m_itcopyb$(TSUFFIX).$(PSUFFIX) : generic/zgemm3m_tcopy_$(QGEMM_UNROLL_M).c
|
||||
$(KDIR)xgemm3m_itcopyb$(TSUFFIX).$(PSUFFIX) : generic/zgemm3m_tcopy_$(XGEMM3M_UNROLL_M).c
|
||||
$(CC) $(PFLAGS) -c -DXDOUBLE -DCOMPLEX -DICOPY -UUSE_ALPHA $< -o $@
|
||||
|
||||
$(KDIR)xgemm3m_itcopyr$(TSUFFIX).$(PSUFFIX) : generic/zgemm3m_tcopy_$(QGEMM_UNROLL_M).c
|
||||
$(KDIR)xgemm3m_itcopyr$(TSUFFIX).$(PSUFFIX) : generic/zgemm3m_tcopy_$(XGEMM3M_UNROLL_M).c
|
||||
$(CC) $(PFLAGS) -c -DXDOUBLE -DCOMPLEX -DICOPY -UUSE_ALPHA -DREAL_ONLY $< -o $@
|
||||
|
||||
$(KDIR)xgemm3m_itcopyi$(TSUFFIX).$(PSUFFIX) : generic/zgemm3m_tcopy_$(QGEMM_UNROLL_M).c
|
||||
$(KDIR)xgemm3m_itcopyi$(TSUFFIX).$(PSUFFIX) : generic/zgemm3m_tcopy_$(XGEMM3M_UNROLL_M).c
|
||||
$(CC) $(PFLAGS) -c -DXDOUBLE -DCOMPLEX -DICOPY -UUSE_ALPHA -DIMAGE_ONLY $< -o $@
|
||||
|
||||
$(KDIR)csymm3m_oucopyb$(TSUFFIX).$(PSUFFIX) : generic/zsymm3m_ucopy_$(SGEMM_UNROLL_N).c
|
||||
$(KDIR)csymm3m_oucopyb$(TSUFFIX).$(PSUFFIX) : generic/zsymm3m_ucopy_$(CGEMM3M_UNROLL_N).c
|
||||
$(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -UDOUBLE -DCOMPLEX -DUSE_ALPHA $< -o $@
|
||||
|
||||
$(KDIR)csymm3m_olcopyb$(TSUFFIX).$(PSUFFIX) : generic/zsymm3m_lcopy_$(SGEMM_UNROLL_N).c
|
||||
$(KDIR)csymm3m_olcopyb$(TSUFFIX).$(PSUFFIX) : generic/zsymm3m_lcopy_$(CGEMM3M_UNROLL_N).c
|
||||
$(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -UDOUBLE -DCOMPLEX -DUSE_ALPHA $< -o $@
|
||||
|
||||
$(KDIR)csymm3m_oucopyr$(TSUFFIX).$(PSUFFIX) : generic/zsymm3m_ucopy_$(SGEMM_UNROLL_N).c
|
||||
$(KDIR)csymm3m_oucopyr$(TSUFFIX).$(PSUFFIX) : generic/zsymm3m_ucopy_$(CGEMM3M_UNROLL_N).c
|
||||
$(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -UDOUBLE -DCOMPLEX -DUSE_ALPHA -DREAL_ONLY $< -o $@
|
||||
|
||||
$(KDIR)csymm3m_olcopyr$(TSUFFIX).$(PSUFFIX) : generic/zsymm3m_lcopy_$(SGEMM_UNROLL_N).c
|
||||
$(KDIR)csymm3m_olcopyr$(TSUFFIX).$(PSUFFIX) : generic/zsymm3m_lcopy_$(CGEMM3M_UNROLL_N).c
|
||||
$(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -UDOUBLE -DCOMPLEX -DUSE_ALPHA -DREAL_ONLY $< -o $@
|
||||
|
||||
$(KDIR)csymm3m_oucopyi$(TSUFFIX).$(PSUFFIX) : generic/zsymm3m_ucopy_$(SGEMM_UNROLL_N).c
|
||||
$(KDIR)csymm3m_oucopyi$(TSUFFIX).$(PSUFFIX) : generic/zsymm3m_ucopy_$(CGEMM3M_UNROLL_N).c
|
||||
$(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -UDOUBLE -DCOMPLEX -DUSE_ALPHA -DIMAGE_ONLY $< -o $@
|
||||
|
||||
$(KDIR)csymm3m_olcopyi$(TSUFFIX).$(PSUFFIX) : generic/zsymm3m_lcopy_$(SGEMM_UNROLL_N).c
|
||||
$(KDIR)csymm3m_olcopyi$(TSUFFIX).$(PSUFFIX) : generic/zsymm3m_lcopy_$(CGEMM3M_UNROLL_N).c
|
||||
$(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -UDOUBLE -DCOMPLEX -DUSE_ALPHA -DIMAGE_ONLY $< -o $@
|
||||
|
||||
$(KDIR)csymm3m_iucopyb$(TSUFFIX).$(PSUFFIX) : generic/zsymm3m_ucopy_$(SGEMM_UNROLL_M).c
|
||||
$(KDIR)csymm3m_iucopyb$(TSUFFIX).$(PSUFFIX) : generic/zsymm3m_ucopy_$(CGEMM3M_UNROLL_M).c
|
||||
$(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -UDOUBLE -DCOMPLEX -UUSE_ALPHA $< -o $@
|
||||
|
||||
$(KDIR)csymm3m_ilcopyb$(TSUFFIX).$(PSUFFIX) : generic/zsymm3m_lcopy_$(SGEMM_UNROLL_M).c
|
||||
$(KDIR)csymm3m_ilcopyb$(TSUFFIX).$(PSUFFIX) : generic/zsymm3m_lcopy_$(CGEMM3M_UNROLL_M).c
|
||||
$(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -UDOUBLE -DCOMPLEX -UUSE_ALPHA $< -o $@
|
||||
|
||||
$(KDIR)csymm3m_iucopyr$(TSUFFIX).$(PSUFFIX) : generic/zsymm3m_ucopy_$(SGEMM_UNROLL_M).c
|
||||
$(KDIR)csymm3m_iucopyr$(TSUFFIX).$(PSUFFIX) : generic/zsymm3m_ucopy_$(CGEMM3M_UNROLL_M).c
|
||||
$(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -UDOUBLE -DCOMPLEX -UUSE_ALPHA -DREAL_ONLY $< -o $@
|
||||
|
||||
$(KDIR)csymm3m_ilcopyr$(TSUFFIX).$(PSUFFIX) : generic/zsymm3m_lcopy_$(SGEMM_UNROLL_M).c
|
||||
$(KDIR)csymm3m_ilcopyr$(TSUFFIX).$(PSUFFIX) : generic/zsymm3m_lcopy_$(CGEMM3M_UNROLL_M).c
|
||||
$(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -UDOUBLE -DCOMPLEX -UUSE_ALPHA -DREAL_ONLY $< -o $@
|
||||
|
||||
$(KDIR)csymm3m_iucopyi$(TSUFFIX).$(PSUFFIX) : generic/zsymm3m_ucopy_$(SGEMM_UNROLL_M).c
|
||||
$(KDIR)csymm3m_iucopyi$(TSUFFIX).$(PSUFFIX) : generic/zsymm3m_ucopy_$(CGEMM3M_UNROLL_M).c
|
||||
$(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -UDOUBLE -DCOMPLEX -UUSE_ALPHA -DIMAGE_ONLY $< -o $@
|
||||
|
||||
$(KDIR)csymm3m_ilcopyi$(TSUFFIX).$(PSUFFIX) : generic/zsymm3m_lcopy_$(SGEMM_UNROLL_M).c
|
||||
$(KDIR)csymm3m_ilcopyi$(TSUFFIX).$(PSUFFIX) : generic/zsymm3m_lcopy_$(CGEMM3M_UNROLL_M).c
|
||||
$(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -UDOUBLE -DCOMPLEX -UUSE_ALPHA -DIMAGE_ONLY $< -o $@
|
||||
|
||||
$(KDIR)zsymm3m_oucopyb$(TSUFFIX).$(PSUFFIX) : generic/zsymm3m_ucopy_$(DGEMM_UNROLL_N).c
|
||||
$(KDIR)zsymm3m_oucopyb$(TSUFFIX).$(PSUFFIX) : generic/zsymm3m_ucopy_$(ZGEMM3M_UNROLL_N).c
|
||||
$(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -DDOUBLE -DCOMPLEX -DUSE_ALPHA $< -o $@
|
||||
|
||||
$(KDIR)zsymm3m_olcopyb$(TSUFFIX).$(PSUFFIX) : generic/zsymm3m_lcopy_$(DGEMM_UNROLL_N).c
|
||||
$(KDIR)zsymm3m_olcopyb$(TSUFFIX).$(PSUFFIX) : generic/zsymm3m_lcopy_$(ZGEMM3M_UNROLL_N).c
|
||||
$(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -DDOUBLE -DCOMPLEX -DUSE_ALPHA $< -o $@
|
||||
|
||||
$(KDIR)zsymm3m_oucopyr$(TSUFFIX).$(PSUFFIX) : generic/zsymm3m_ucopy_$(DGEMM_UNROLL_N).c
|
||||
$(KDIR)zsymm3m_oucopyr$(TSUFFIX).$(PSUFFIX) : generic/zsymm3m_ucopy_$(ZGEMM3M_UNROLL_N).c
|
||||
$(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -DDOUBLE -DCOMPLEX -DUSE_ALPHA -DREAL_ONLY $< -o $@
|
||||
|
||||
$(KDIR)zsymm3m_olcopyr$(TSUFFIX).$(PSUFFIX) : generic/zsymm3m_lcopy_$(DGEMM_UNROLL_N).c
|
||||
$(KDIR)zsymm3m_olcopyr$(TSUFFIX).$(PSUFFIX) : generic/zsymm3m_lcopy_$(ZGEMM3M_UNROLL_N).c
|
||||
$(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -DDOUBLE -DCOMPLEX -DUSE_ALPHA -DREAL_ONLY $< -o $@
|
||||
|
||||
$(KDIR)zsymm3m_oucopyi$(TSUFFIX).$(PSUFFIX) : generic/zsymm3m_ucopy_$(DGEMM_UNROLL_N).c
|
||||
$(KDIR)zsymm3m_oucopyi$(TSUFFIX).$(PSUFFIX) : generic/zsymm3m_ucopy_$(ZGEMM3M_UNROLL_N).c
|
||||
$(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -DDOUBLE -DCOMPLEX -DUSE_ALPHA -DIMAGE_ONLY $< -o $@
|
||||
|
||||
$(KDIR)zsymm3m_olcopyi$(TSUFFIX).$(PSUFFIX) : generic/zsymm3m_lcopy_$(DGEMM_UNROLL_N).c
|
||||
$(KDIR)zsymm3m_olcopyi$(TSUFFIX).$(PSUFFIX) : generic/zsymm3m_lcopy_$(ZGEMM3M_UNROLL_N).c
|
||||
$(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -DDOUBLE -DCOMPLEX -DUSE_ALPHA -DIMAGE_ONLY $< -o $@
|
||||
|
||||
$(KDIR)zsymm3m_iucopyb$(TSUFFIX).$(PSUFFIX) : generic/zsymm3m_ucopy_$(DGEMM_UNROLL_M).c
|
||||
$(KDIR)zsymm3m_iucopyb$(TSUFFIX).$(PSUFFIX) : generic/zsymm3m_ucopy_$(ZGEMM3M_UNROLL_M).c
|
||||
$(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -DDOUBLE -DCOMPLEX -UUSE_ALPHA $< -o $@
|
||||
|
||||
$(KDIR)zsymm3m_ilcopyb$(TSUFFIX).$(PSUFFIX) : generic/zsymm3m_lcopy_$(DGEMM_UNROLL_M).c
|
||||
$(KDIR)zsymm3m_ilcopyb$(TSUFFIX).$(PSUFFIX) : generic/zsymm3m_lcopy_$(ZGEMM3M_UNROLL_M).c
|
||||
$(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -DDOUBLE -DCOMPLEX -UUSE_ALPHA $< -o $@
|
||||
|
||||
$(KDIR)zsymm3m_iucopyr$(TSUFFIX).$(PSUFFIX) : generic/zsymm3m_ucopy_$(DGEMM_UNROLL_M).c
|
||||
$(KDIR)zsymm3m_iucopyr$(TSUFFIX).$(PSUFFIX) : generic/zsymm3m_ucopy_$(ZGEMM3M_UNROLL_M).c
|
||||
$(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -DDOUBLE -DCOMPLEX -UUSE_ALPHA -DREAL_ONLY $< -o $@
|
||||
|
||||
$(KDIR)zsymm3m_ilcopyr$(TSUFFIX).$(PSUFFIX) : generic/zsymm3m_lcopy_$(DGEMM_UNROLL_M).c
|
||||
$(KDIR)zsymm3m_ilcopyr$(TSUFFIX).$(PSUFFIX) : generic/zsymm3m_lcopy_$(ZGEMM3M_UNROLL_M).c
|
||||
$(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -DDOUBLE -DCOMPLEX -UUSE_ALPHA -DREAL_ONLY $< -o $@
|
||||
|
||||
$(KDIR)zsymm3m_iucopyi$(TSUFFIX).$(PSUFFIX) : generic/zsymm3m_ucopy_$(DGEMM_UNROLL_M).c
|
||||
$(KDIR)zsymm3m_iucopyi$(TSUFFIX).$(PSUFFIX) : generic/zsymm3m_ucopy_$(ZGEMM3M_UNROLL_M).c
|
||||
$(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -DDOUBLE -DCOMPLEX -UUSE_ALPHA -DIMAGE_ONLY $< -o $@
|
||||
|
||||
$(KDIR)zsymm3m_ilcopyi$(TSUFFIX).$(PSUFFIX) : generic/zsymm3m_lcopy_$(DGEMM_UNROLL_M).c
|
||||
$(KDIR)zsymm3m_ilcopyi$(TSUFFIX).$(PSUFFIX) : generic/zsymm3m_lcopy_$(ZGEMM3M_UNROLL_M).c
|
||||
$(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -DDOUBLE -DCOMPLEX -UUSE_ALPHA -DIMAGE_ONLY $< -o $@
|
||||
|
||||
$(KDIR)xsymm3m_oucopyb$(TSUFFIX).$(PSUFFIX) : generic/zsymm3m_ucopy_$(QGEMM_UNROLL_N).c
|
||||
$(KDIR)xsymm3m_oucopyb$(TSUFFIX).$(PSUFFIX) : generic/zsymm3m_ucopy_$(XGEMM3M_UNROLL_N).c
|
||||
$(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -DXDOUBLE -DCOMPLEX -DUSE_ALPHA $< -o $@
|
||||
|
||||
$(KDIR)xsymm3m_olcopyb$(TSUFFIX).$(PSUFFIX) : generic/zsymm3m_lcopy_$(QGEMM_UNROLL_N).c
|
||||
$(KDIR)xsymm3m_olcopyb$(TSUFFIX).$(PSUFFIX) : generic/zsymm3m_lcopy_$(XGEMM3M_UNROLL_N).c
|
||||
$(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -DXDOUBLE -DCOMPLEX -DUSE_ALPHA $< -o $@
|
||||
|
||||
$(KDIR)xsymm3m_oucopyr$(TSUFFIX).$(PSUFFIX) : generic/zsymm3m_ucopy_$(QGEMM_UNROLL_N).c
|
||||
$(KDIR)xsymm3m_oucopyr$(TSUFFIX).$(PSUFFIX) : generic/zsymm3m_ucopy_$(XGEMM3M_UNROLL_N).c
|
||||
$(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -DXDOUBLE -DCOMPLEX -DUSE_ALPHA -DREAL_ONLY $< -o $@
|
||||
|
||||
$(KDIR)xsymm3m_olcopyr$(TSUFFIX).$(PSUFFIX) : generic/zsymm3m_lcopy_$(QGEMM_UNROLL_N).c
|
||||
$(KDIR)xsymm3m_olcopyr$(TSUFFIX).$(PSUFFIX) : generic/zsymm3m_lcopy_$(XGEMM3M_UNROLL_N).c
|
||||
$(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -DXDOUBLE -DCOMPLEX -DUSE_ALPHA -DREAL_ONLY $< -o $@
|
||||
|
||||
$(KDIR)xsymm3m_oucopyi$(TSUFFIX).$(PSUFFIX) : generic/zsymm3m_ucopy_$(QGEMM_UNROLL_N).c
|
||||
$(KDIR)xsymm3m_oucopyi$(TSUFFIX).$(PSUFFIX) : generic/zsymm3m_ucopy_$(XGEMM3M_UNROLL_N).c
|
||||
$(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -DXDOUBLE -DCOMPLEX -DUSE_ALPHA -DIMAGE_ONLY $< -o $@
|
||||
|
||||
$(KDIR)xsymm3m_olcopyi$(TSUFFIX).$(PSUFFIX) : generic/zsymm3m_lcopy_$(QGEMM_UNROLL_N).c
|
||||
$(KDIR)xsymm3m_olcopyi$(TSUFFIX).$(PSUFFIX) : generic/zsymm3m_lcopy_$(XGEMM3M_UNROLL_N).c
|
||||
$(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -DXDOUBLE -DCOMPLEX -DUSE_ALPHA -DIMAGE_ONLY $< -o $@
|
||||
|
||||
$(KDIR)xsymm3m_iucopyb$(TSUFFIX).$(PSUFFIX) : generic/zsymm3m_ucopy_$(QGEMM_UNROLL_M).c
|
||||
$(KDIR)xsymm3m_iucopyb$(TSUFFIX).$(PSUFFIX) : generic/zsymm3m_ucopy_$(XGEMM3M_UNROLL_M).c
|
||||
$(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -DXDOUBLE -DCOMPLEX -UUSE_ALPHA $< -o $@
|
||||
|
||||
$(KDIR)xsymm3m_ilcopyb$(TSUFFIX).$(PSUFFIX) : generic/zsymm3m_lcopy_$(QGEMM_UNROLL_M).c
|
||||
$(KDIR)xsymm3m_ilcopyb$(TSUFFIX).$(PSUFFIX) : generic/zsymm3m_lcopy_$(XGEMM3M_UNROLL_M).c
|
||||
$(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -DXDOUBLE -DCOMPLEX -UUSE_ALPHA $< -o $@
|
||||
|
||||
$(KDIR)xsymm3m_iucopyr$(TSUFFIX).$(PSUFFIX) : generic/zsymm3m_ucopy_$(QGEMM_UNROLL_M).c
|
||||
$(KDIR)xsymm3m_iucopyr$(TSUFFIX).$(PSUFFIX) : generic/zsymm3m_ucopy_$(XGEMM3M_UNROLL_M).c
|
||||
$(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -DXDOUBLE -DCOMPLEX -UUSE_ALPHA -DREAL_ONLY $< -o $@
|
||||
|
||||
$(KDIR)xsymm3m_ilcopyr$(TSUFFIX).$(PSUFFIX) : generic/zsymm3m_lcopy_$(QGEMM_UNROLL_M).c
|
||||
$(KDIR)xsymm3m_ilcopyr$(TSUFFIX).$(PSUFFIX) : generic/zsymm3m_lcopy_$(XGEMM3M_UNROLL_M).c
|
||||
$(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -DXDOUBLE -DCOMPLEX -UUSE_ALPHA -DREAL_ONLY $< -o $@
|
||||
|
||||
$(KDIR)xsymm3m_iucopyi$(TSUFFIX).$(PSUFFIX) : generic/zsymm3m_ucopy_$(QGEMM_UNROLL_M).c
|
||||
$(KDIR)xsymm3m_iucopyi$(TSUFFIX).$(PSUFFIX) : generic/zsymm3m_ucopy_$(XGEMM3M_UNROLL_M).c
|
||||
$(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -DXDOUBLE -DCOMPLEX -UUSE_ALPHA -DIMAGE_ONLY $< -o $@
|
||||
|
||||
$(KDIR)xsymm3m_ilcopyi$(TSUFFIX).$(PSUFFIX) : generic/zsymm3m_lcopy_$(QGEMM_UNROLL_M).c
|
||||
$(KDIR)xsymm3m_ilcopyi$(TSUFFIX).$(PSUFFIX) : generic/zsymm3m_lcopy_$(XGEMM3M_UNROLL_M).c
|
||||
$(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -DXDOUBLE -DCOMPLEX -UUSE_ALPHA -DIMAGE_ONLY $< -o $@
|
||||
|
||||
$(KDIR)chemm3m_oucopyb$(TSUFFIX).$(PSUFFIX) : generic/zhemm3m_ucopy_$(SGEMM_UNROLL_N).c
|
||||
$(KDIR)chemm3m_oucopyb$(TSUFFIX).$(PSUFFIX) : generic/zhemm3m_ucopy_$(CGEMM3M_UNROLL_N).c
|
||||
$(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -UDOUBLE -DCOMPLEX -DUSE_ALPHA $< -o $@
|
||||
|
||||
$(KDIR)chemm3m_olcopyb$(TSUFFIX).$(PSUFFIX) : generic/zhemm3m_lcopy_$(SGEMM_UNROLL_N).c
|
||||
$(KDIR)chemm3m_olcopyb$(TSUFFIX).$(PSUFFIX) : generic/zhemm3m_lcopy_$(CGEMM3M_UNROLL_N).c
|
||||
$(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -UDOUBLE -DCOMPLEX -DUSE_ALPHA $< -o $@
|
||||
|
||||
$(KDIR)chemm3m_oucopyr$(TSUFFIX).$(PSUFFIX) : generic/zhemm3m_ucopy_$(SGEMM_UNROLL_N).c
|
||||
$(KDIR)chemm3m_oucopyr$(TSUFFIX).$(PSUFFIX) : generic/zhemm3m_ucopy_$(CGEMM3M_UNROLL_N).c
|
||||
$(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -UDOUBLE -DCOMPLEX -DUSE_ALPHA -DREAL_ONLY $< -o $@
|
||||
|
||||
$(KDIR)chemm3m_olcopyr$(TSUFFIX).$(PSUFFIX) : generic/zhemm3m_lcopy_$(SGEMM_UNROLL_N).c
|
||||
$(KDIR)chemm3m_olcopyr$(TSUFFIX).$(PSUFFIX) : generic/zhemm3m_lcopy_$(CGEMM3M_UNROLL_N).c
|
||||
$(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -UDOUBLE -DCOMPLEX -DUSE_ALPHA -DREAL_ONLY $< -o $@
|
||||
|
||||
$(KDIR)chemm3m_oucopyi$(TSUFFIX).$(PSUFFIX) : generic/zhemm3m_ucopy_$(SGEMM_UNROLL_N).c
|
||||
$(KDIR)chemm3m_oucopyi$(TSUFFIX).$(PSUFFIX) : generic/zhemm3m_ucopy_$(CGEMM3M_UNROLL_N).c
|
||||
$(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -UDOUBLE -DCOMPLEX -DUSE_ALPHA -DIMAGE_ONLY $< -o $@
|
||||
|
||||
$(KDIR)chemm3m_olcopyi$(TSUFFIX).$(PSUFFIX) : generic/zhemm3m_lcopy_$(SGEMM_UNROLL_N).c
|
||||
$(KDIR)chemm3m_olcopyi$(TSUFFIX).$(PSUFFIX) : generic/zhemm3m_lcopy_$(CGEMM3M_UNROLL_N).c
|
||||
$(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -UDOUBLE -DCOMPLEX -DUSE_ALPHA -DIMAGE_ONLY $< -o $@
|
||||
|
||||
$(KDIR)chemm3m_iucopyb$(TSUFFIX).$(PSUFFIX) : generic/zhemm3m_ucopy_$(SGEMM_UNROLL_M).c
|
||||
$(KDIR)chemm3m_iucopyb$(TSUFFIX).$(PSUFFIX) : generic/zhemm3m_ucopy_$(CGEMM3M_UNROLL_M).c
|
||||
$(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -UDOUBLE -DCOMPLEX -UUSE_ALPHA $< -o $@
|
||||
|
||||
$(KDIR)chemm3m_ilcopyb$(TSUFFIX).$(PSUFFIX) : generic/zhemm3m_lcopy_$(SGEMM_UNROLL_M).c
|
||||
$(KDIR)chemm3m_ilcopyb$(TSUFFIX).$(PSUFFIX) : generic/zhemm3m_lcopy_$(CGEMM3M_UNROLL_M).c
|
||||
$(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -UDOUBLE -DCOMPLEX -UUSE_ALPHA $< -o $@
|
||||
|
||||
$(KDIR)chemm3m_iucopyr$(TSUFFIX).$(PSUFFIX) : generic/zhemm3m_ucopy_$(SGEMM_UNROLL_M).c
|
||||
$(KDIR)chemm3m_iucopyr$(TSUFFIX).$(PSUFFIX) : generic/zhemm3m_ucopy_$(CGEMM3M_UNROLL_M).c
|
||||
$(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -UDOUBLE -DCOMPLEX -UUSE_ALPHA -DREAL_ONLY $< -o $@
|
||||
|
||||
$(KDIR)chemm3m_ilcopyr$(TSUFFIX).$(PSUFFIX) : generic/zhemm3m_lcopy_$(SGEMM_UNROLL_M).c
|
||||
$(KDIR)chemm3m_ilcopyr$(TSUFFIX).$(PSUFFIX) : generic/zhemm3m_lcopy_$(CGEMM3M_UNROLL_M).c
|
||||
$(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -UDOUBLE -DCOMPLEX -UUSE_ALPHA -DREAL_ONLY $< -o $@
|
||||
|
||||
$(KDIR)chemm3m_iucopyi$(TSUFFIX).$(PSUFFIX) : generic/zhemm3m_ucopy_$(SGEMM_UNROLL_M).c
|
||||
$(KDIR)chemm3m_iucopyi$(TSUFFIX).$(PSUFFIX) : generic/zhemm3m_ucopy_$(CGEMM3M_UNROLL_M).c
|
||||
$(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -UDOUBLE -DCOMPLEX -UUSE_ALPHA -DIMAGE_ONLY $< -o $@
|
||||
|
||||
$(KDIR)chemm3m_ilcopyi$(TSUFFIX).$(PSUFFIX) : generic/zhemm3m_lcopy_$(SGEMM_UNROLL_M).c
|
||||
$(KDIR)chemm3m_ilcopyi$(TSUFFIX).$(PSUFFIX) : generic/zhemm3m_lcopy_$(CGEMM3M_UNROLL_M).c
|
||||
$(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -UDOUBLE -DCOMPLEX -UUSE_ALPHA -DIMAGE_ONLY $< -o $@
|
||||
|
||||
$(KDIR)zhemm3m_oucopyb$(TSUFFIX).$(PSUFFIX) : generic/zhemm3m_ucopy_$(DGEMM_UNROLL_N).c
|
||||
$(KDIR)zhemm3m_oucopyb$(TSUFFIX).$(PSUFFIX) : generic/zhemm3m_ucopy_$(ZGEMM3M_UNROLL_N).c
|
||||
$(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -DDOUBLE -DCOMPLEX -DUSE_ALPHA $< -o $@
|
||||
|
||||
$(KDIR)zhemm3m_olcopyb$(TSUFFIX).$(PSUFFIX) : generic/zhemm3m_lcopy_$(DGEMM_UNROLL_N).c
|
||||
$(KDIR)zhemm3m_olcopyb$(TSUFFIX).$(PSUFFIX) : generic/zhemm3m_lcopy_$(ZGEMM3M_UNROLL_N).c
|
||||
$(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -DDOUBLE -DCOMPLEX -DUSE_ALPHA $< -o $@
|
||||
|
||||
$(KDIR)zhemm3m_oucopyr$(TSUFFIX).$(PSUFFIX) : generic/zhemm3m_ucopy_$(DGEMM_UNROLL_N).c
|
||||
$(KDIR)zhemm3m_oucopyr$(TSUFFIX).$(PSUFFIX) : generic/zhemm3m_ucopy_$(ZGEMM3M_UNROLL_N).c
|
||||
$(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -DDOUBLE -DCOMPLEX -DUSE_ALPHA -DREAL_ONLY $< -o $@
|
||||
|
||||
$(KDIR)zhemm3m_olcopyr$(TSUFFIX).$(PSUFFIX) : generic/zhemm3m_lcopy_$(DGEMM_UNROLL_N).c
|
||||
$(KDIR)zhemm3m_olcopyr$(TSUFFIX).$(PSUFFIX) : generic/zhemm3m_lcopy_$(ZGEMM3M_UNROLL_N).c
|
||||
$(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -DDOUBLE -DCOMPLEX -DUSE_ALPHA -DREAL_ONLY $< -o $@
|
||||
|
||||
$(KDIR)zhemm3m_oucopyi$(TSUFFIX).$(PSUFFIX) : generic/zhemm3m_ucopy_$(DGEMM_UNROLL_N).c
|
||||
$(KDIR)zhemm3m_oucopyi$(TSUFFIX).$(PSUFFIX) : generic/zhemm3m_ucopy_$(ZGEMM3M_UNROLL_N).c
|
||||
$(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -DDOUBLE -DCOMPLEX -DUSE_ALPHA -DIMAGE_ONLY $< -o $@
|
||||
|
||||
$(KDIR)zhemm3m_olcopyi$(TSUFFIX).$(PSUFFIX) : generic/zhemm3m_lcopy_$(DGEMM_UNROLL_N).c
|
||||
$(KDIR)zhemm3m_olcopyi$(TSUFFIX).$(PSUFFIX) : generic/zhemm3m_lcopy_$(ZGEMM3M_UNROLL_N).c
|
||||
$(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -DDOUBLE -DCOMPLEX -DUSE_ALPHA -DIMAGE_ONLY $< -o $@
|
||||
|
||||
$(KDIR)zhemm3m_iucopyb$(TSUFFIX).$(PSUFFIX) : generic/zhemm3m_ucopy_$(DGEMM_UNROLL_M).c
|
||||
$(KDIR)zhemm3m_iucopyb$(TSUFFIX).$(PSUFFIX) : generic/zhemm3m_ucopy_$(ZGEMM3M_UNROLL_M).c
|
||||
$(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -DDOUBLE -DCOMPLEX -UUSE_ALPHA $< -o $@
|
||||
|
||||
$(KDIR)zhemm3m_ilcopyb$(TSUFFIX).$(PSUFFIX) : generic/zhemm3m_lcopy_$(DGEMM_UNROLL_M).c
|
||||
$(KDIR)zhemm3m_ilcopyb$(TSUFFIX).$(PSUFFIX) : generic/zhemm3m_lcopy_$(ZGEMM3M_UNROLL_M).c
|
||||
$(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -DDOUBLE -DCOMPLEX -UUSE_ALPHA $< -o $@
|
||||
|
||||
$(KDIR)zhemm3m_iucopyr$(TSUFFIX).$(PSUFFIX) : generic/zhemm3m_ucopy_$(DGEMM_UNROLL_M).c
|
||||
$(KDIR)zhemm3m_iucopyr$(TSUFFIX).$(PSUFFIX) : generic/zhemm3m_ucopy_$(ZGEMM3M_UNROLL_M).c
|
||||
$(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -DDOUBLE -DCOMPLEX -UUSE_ALPHA -DREAL_ONLY $< -o $@
|
||||
|
||||
$(KDIR)zhemm3m_ilcopyr$(TSUFFIX).$(PSUFFIX) : generic/zhemm3m_lcopy_$(DGEMM_UNROLL_M).c
|
||||
$(KDIR)zhemm3m_ilcopyr$(TSUFFIX).$(PSUFFIX) : generic/zhemm3m_lcopy_$(ZGEMM3M_UNROLL_M).c
|
||||
$(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -DDOUBLE -DCOMPLEX -UUSE_ALPHA -DREAL_ONLY $< -o $@
|
||||
|
||||
$(KDIR)zhemm3m_iucopyi$(TSUFFIX).$(PSUFFIX) : generic/zhemm3m_ucopy_$(DGEMM_UNROLL_M).c
|
||||
$(KDIR)zhemm3m_iucopyi$(TSUFFIX).$(PSUFFIX) : generic/zhemm3m_ucopy_$(ZGEMM3M_UNROLL_M).c
|
||||
$(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -DDOUBLE -DCOMPLEX -UUSE_ALPHA -DIMAGE_ONLY $< -o $@
|
||||
|
||||
$(KDIR)zhemm3m_ilcopyi$(TSUFFIX).$(PSUFFIX) : generic/zhemm3m_lcopy_$(DGEMM_UNROLL_M).c
|
||||
$(KDIR)zhemm3m_ilcopyi$(TSUFFIX).$(PSUFFIX) : generic/zhemm3m_lcopy_$(ZGEMM3M_UNROLL_M).c
|
||||
$(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -DDOUBLE -DCOMPLEX -UUSE_ALPHA -DIMAGE_ONLY $< -o $@
|
||||
|
||||
$(KDIR)xhemm3m_oucopyb$(TSUFFIX).$(PSUFFIX) : generic/zhemm3m_ucopy_$(QGEMM_UNROLL_N).c
|
||||
$(KDIR)xhemm3m_oucopyb$(TSUFFIX).$(PSUFFIX) : generic/zhemm3m_ucopy_$(XGEMM3M_UNROLL_N).c
|
||||
$(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -DXDOUBLE -DCOMPLEX -DUSE_ALPHA $< -o $@
|
||||
|
||||
$(KDIR)xhemm3m_olcopyb$(TSUFFIX).$(PSUFFIX) : generic/zhemm3m_lcopy_$(QGEMM_UNROLL_N).c
|
||||
$(KDIR)xhemm3m_olcopyb$(TSUFFIX).$(PSUFFIX) : generic/zhemm3m_lcopy_$(XGEMM3M_UNROLL_N).c
|
||||
$(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -DXDOUBLE -DCOMPLEX -DUSE_ALPHA $< -o $@
|
||||
|
||||
$(KDIR)xhemm3m_oucopyr$(TSUFFIX).$(PSUFFIX) : generic/zhemm3m_ucopy_$(QGEMM_UNROLL_N).c
|
||||
$(KDIR)xhemm3m_oucopyr$(TSUFFIX).$(PSUFFIX) : generic/zhemm3m_ucopy_$(XGEMM3M_UNROLL_N).c
|
||||
$(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -DXDOUBLE -DCOMPLEX -DUSE_ALPHA -DREAL_ONLY $< -o $@
|
||||
|
||||
$(KDIR)xhemm3m_olcopyr$(TSUFFIX).$(PSUFFIX) : generic/zhemm3m_lcopy_$(QGEMM_UNROLL_N).c
|
||||
$(KDIR)xhemm3m_olcopyr$(TSUFFIX).$(PSUFFIX) : generic/zhemm3m_lcopy_$(XGEMM3M_UNROLL_N).c
|
||||
$(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -DXDOUBLE -DCOMPLEX -DUSE_ALPHA -DREAL_ONLY $< -o $@
|
||||
|
||||
$(KDIR)xhemm3m_oucopyi$(TSUFFIX).$(PSUFFIX) : generic/zhemm3m_ucopy_$(QGEMM_UNROLL_N).c
|
||||
$(KDIR)xhemm3m_oucopyi$(TSUFFIX).$(PSUFFIX) : generic/zhemm3m_ucopy_$(XGEMM3M_UNROLL_N).c
|
||||
$(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -DXDOUBLE -DCOMPLEX -DUSE_ALPHA -DIMAGE_ONLY $< -o $@
|
||||
|
||||
$(KDIR)xhemm3m_olcopyi$(TSUFFIX).$(PSUFFIX) : generic/zhemm3m_lcopy_$(QGEMM_UNROLL_N).c
|
||||
$(KDIR)xhemm3m_olcopyi$(TSUFFIX).$(PSUFFIX) : generic/zhemm3m_lcopy_$(XGEMM3M_UNROLL_N).c
|
||||
$(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -DXDOUBLE -DCOMPLEX -DUSE_ALPHA -DIMAGE_ONLY $< -o $@
|
||||
|
||||
$(KDIR)xhemm3m_iucopyb$(TSUFFIX).$(PSUFFIX) : generic/zhemm3m_ucopy_$(QGEMM_UNROLL_M).c
|
||||
$(KDIR)xhemm3m_iucopyb$(TSUFFIX).$(PSUFFIX) : generic/zhemm3m_ucopy_$(XGEMM3M_UNROLL_M).c
|
||||
$(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -DXDOUBLE -DCOMPLEX -UUSE_ALPHA $< -o $@
|
||||
|
||||
$(KDIR)xhemm3m_ilcopyb$(TSUFFIX).$(PSUFFIX) : generic/zhemm3m_lcopy_$(QGEMM_UNROLL_M).c
|
||||
$(KDIR)xhemm3m_ilcopyb$(TSUFFIX).$(PSUFFIX) : generic/zhemm3m_lcopy_$(XGEMM3M_UNROLL_M).c
|
||||
$(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -DXDOUBLE -DCOMPLEX -UUSE_ALPHA $< -o $@
|
||||
|
||||
$(KDIR)xhemm3m_iucopyr$(TSUFFIX).$(PSUFFIX) : generic/zhemm3m_ucopy_$(QGEMM_UNROLL_M).c
|
||||
$(KDIR)xhemm3m_iucopyr$(TSUFFIX).$(PSUFFIX) : generic/zhemm3m_ucopy_$(XGEMM3M_UNROLL_M).c
|
||||
$(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -DXDOUBLE -DCOMPLEX -UUSE_ALPHA -DREAL_ONLY $< -o $@
|
||||
|
||||
$(KDIR)xhemm3m_ilcopyr$(TSUFFIX).$(PSUFFIX) : generic/zhemm3m_lcopy_$(QGEMM_UNROLL_M).c
|
||||
$(KDIR)xhemm3m_ilcopyr$(TSUFFIX).$(PSUFFIX) : generic/zhemm3m_lcopy_$(XGEMM3M_UNROLL_M).c
|
||||
$(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -DXDOUBLE -DCOMPLEX -UUSE_ALPHA -DREAL_ONLY $< -o $@
|
||||
|
||||
$(KDIR)xhemm3m_iucopyi$(TSUFFIX).$(PSUFFIX) : generic/zhemm3m_ucopy_$(QGEMM_UNROLL_M).c
|
||||
$(KDIR)xhemm3m_iucopyi$(TSUFFIX).$(PSUFFIX) : generic/zhemm3m_ucopy_$(XGEMM3M_UNROLL_M).c
|
||||
$(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -DXDOUBLE -DCOMPLEX -UUSE_ALPHA -DIMAGE_ONLY $< -o $@
|
||||
|
||||
$(KDIR)xhemm3m_ilcopyi$(TSUFFIX).$(PSUFFIX) : generic/zhemm3m_lcopy_$(QGEMM_UNROLL_M).c
|
||||
$(KDIR)xhemm3m_ilcopyi$(TSUFFIX).$(PSUFFIX) : generic/zhemm3m_lcopy_$(XGEMM3M_UNROLL_M).c
|
||||
$(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -DXDOUBLE -DCOMPLEX -UUSE_ALPHA -DIMAGE_ONLY $< -o $@
|
||||
|
||||
$(KDIR)strsm_iunucopy$(TSUFFIX).$(PSUFFIX) : generic/trsm_uncopy_$(SGEMM_UNROLL_M).c
|
||||
|
||||
46
kernel/arm/KERNEL
Normal file
46
kernel/arm/KERNEL
Normal file
@@ -0,0 +1,46 @@
|
||||
ifndef SNRM2KERNEL
|
||||
SNRM2KERNEL = nrm2.c
|
||||
endif
|
||||
|
||||
ifndef DNRM2KERNEL
|
||||
DNRM2KERNEL = nrm2.c
|
||||
endif
|
||||
|
||||
ifndef CNRM2KERNEL
|
||||
CNRM2KERNEL = znrm2.c
|
||||
endif
|
||||
|
||||
ifndef ZNRM2KERNEL
|
||||
ZNRM2KERNEL = znrm2.c
|
||||
endif
|
||||
|
||||
ifndef SCABS_KERNEL
|
||||
SCABS_KERNEL = ../generic/cabs.c
|
||||
endif
|
||||
|
||||
ifndef DCABS_KERNEL
|
||||
DCABS_KERNEL = ../generic/cabs.c
|
||||
endif
|
||||
|
||||
ifndef QCABS_KERNEL
|
||||
QCABS_KERNEL = ../generic/cabs.c
|
||||
endif
|
||||
|
||||
ifndef LSAME_KERNEL
|
||||
LSAME_KERNEL = ../generic/lsame.c
|
||||
endif
|
||||
|
||||
ifndef SGEMM_BETA
|
||||
SGEMM_BETA = ../generic/gemm_beta.c
|
||||
endif
|
||||
ifndef DGEMM_BETA
|
||||
DGEMM_BETA = ../generic/gemm_beta.c
|
||||
endif
|
||||
ifndef CGEMM_BETA
|
||||
CGEMM_BETA = ../generic/zgemm_beta.c
|
||||
endif
|
||||
ifndef ZGEMM_BETA
|
||||
ZGEMM_BETA = ../generic/zgemm_beta.c
|
||||
endif
|
||||
|
||||
|
||||
142
kernel/arm/KERNEL.ARMV6
Normal file
142
kernel/arm/KERNEL.ARMV6
Normal file
@@ -0,0 +1,142 @@
|
||||
SAMAXKERNEL = iamax_vfp.S
|
||||
DAMAXKERNEL = iamax_vfp.S
|
||||
CAMAXKERNEL = iamax_vfp.S
|
||||
ZAMAXKERNEL = iamax_vfp.S
|
||||
|
||||
SAMINKERNEL = iamax_vfp.S
|
||||
DAMINKERNEL = iamax_vfp.S
|
||||
CAMINKERNEL = iamax_vfp.S
|
||||
ZAMINKERNEL = iamax_vfp.S
|
||||
|
||||
SMAXKERNEL = iamax_vfp.S
|
||||
DMAXKERNEL = iamax_vfp.S
|
||||
|
||||
SMINKERNEL = iamax_vfp.S
|
||||
DMINKERNEL = iamax_vfp.S
|
||||
|
||||
ISAMAXKERNEL = iamax_vfp.S
|
||||
IDAMAXKERNEL = iamax_vfp.S
|
||||
ICAMAXKERNEL = iamax_vfp.S
|
||||
IZAMAXKERNEL = iamax_vfp.S
|
||||
|
||||
ISAMINKERNEL = iamax_vfp.S
|
||||
IDAMINKERNEL = iamax_vfp.S
|
||||
ICAMINKERNEL = iamax_vfp.S
|
||||
IZAMINKERNEL = iamax_vfp.S
|
||||
|
||||
ISMAXKERNEL = iamax_vfp.S
|
||||
IDMAXKERNEL = iamax_vfp.S
|
||||
|
||||
ISMINKERNEL = iamax_vfp.S
|
||||
IDMINKERNEL = iamax_vfp.S
|
||||
|
||||
SASUMKERNEL = asum_vfp.S
|
||||
DASUMKERNEL = asum_vfp.S
|
||||
CASUMKERNEL = asum_vfp.S
|
||||
ZASUMKERNEL = asum_vfp.S
|
||||
|
||||
SAXPYKERNEL = axpy_vfp.S
|
||||
DAXPYKERNEL = axpy_vfp.S
|
||||
CAXPYKERNEL = axpy_vfp.S
|
||||
ZAXPYKERNEL = axpy_vfp.S
|
||||
|
||||
SCOPYKERNEL = scopy_vfp.S
|
||||
DCOPYKERNEL = dcopy_vfp.S
|
||||
CCOPYKERNEL = ccopy_vfp.S
|
||||
ZCOPYKERNEL = zcopy_vfp.S
|
||||
|
||||
SDOTKERNEL = sdot_vfp.S
|
||||
DDOTKERNEL = ddot_vfp.S
|
||||
CDOTKERNEL = cdot_vfp.S
|
||||
ZDOTKERNEL = zdot_vfp.S
|
||||
|
||||
SNRM2KERNEL = nrm2_vfp.S
|
||||
DNRM2KERNEL = nrm2_vfp.S
|
||||
CNRM2KERNEL = nrm2_vfp.S
|
||||
ZNRM2KERNEL = nrm2_vfp.S
|
||||
|
||||
SROTKERNEL = rot_vfp.S
|
||||
DROTKERNEL = rot_vfp.S
|
||||
CROTKERNEL = rot_vfp.S
|
||||
ZROTKERNEL = rot_vfp.S
|
||||
|
||||
SSCALKERNEL = scal_vfp.S
|
||||
DSCALKERNEL = scal_vfp.S
|
||||
CSCALKERNEL = scal_vfp.S
|
||||
ZSCALKERNEL = scal_vfp.S
|
||||
|
||||
SSWAPKERNEL = swap_vfp.S
|
||||
DSWAPKERNEL = swap_vfp.S
|
||||
CSWAPKERNEL = swap_vfp.S
|
||||
ZSWAPKERNEL = swap_vfp.S
|
||||
|
||||
SGEMVNKERNEL = gemv_n_vfp.S
|
||||
DGEMVNKERNEL = gemv_n_vfp.S
|
||||
CGEMVNKERNEL = cgemv_n_vfp.S
|
||||
ZGEMVNKERNEL = zgemv_n_vfp.S
|
||||
|
||||
SGEMVTKERNEL = gemv_t_vfp.S
|
||||
DGEMVTKERNEL = gemv_t_vfp.S
|
||||
CGEMVTKERNEL = cgemv_t_vfp.S
|
||||
ZGEMVTKERNEL = zgemv_t_vfp.S
|
||||
|
||||
STRMMKERNEL = strmm_kernel_4x2_vfp.S
|
||||
DTRMMKERNEL = dtrmm_kernel_4x2_vfp.S
|
||||
CTRMMKERNEL = ctrmm_kernel_2x2_vfp.S
|
||||
ZTRMMKERNEL = ztrmm_kernel_2x2_vfp.S
|
||||
|
||||
SGEMMKERNEL = sgemm_kernel_4x2_vfp.S
|
||||
SGEMMINCOPY = sgemm_ncopy_4_vfp.S
|
||||
SGEMMITCOPY = sgemm_tcopy_4_vfp.S
|
||||
SGEMMINCOPYOBJ = sgemm_incopy.o
|
||||
SGEMMITCOPYOBJ = sgemm_itcopy.o
|
||||
SGEMMONCOPY = sgemm_ncopy_2_vfp.S
|
||||
SGEMMOTCOPY = ../generic/gemm_tcopy_2.c
|
||||
SGEMMONCOPYOBJ = sgemm_oncopy.o
|
||||
SGEMMOTCOPYOBJ = sgemm_otcopy.o
|
||||
|
||||
DGEMMKERNEL = dgemm_kernel_4x2_vfp.S
|
||||
DGEMMINCOPY = dgemm_ncopy_4_vfp.S
|
||||
DGEMMITCOPY = dgemm_tcopy_4_vfp.S
|
||||
DGEMMINCOPYOBJ = dgemm_incopy.o
|
||||
DGEMMITCOPYOBJ = dgemm_itcopy.o
|
||||
DGEMMONCOPY = dgemm_ncopy_2_vfp.S
|
||||
DGEMMOTCOPY = ../generic/gemm_tcopy_2.c
|
||||
DGEMMONCOPYOBJ = dgemm_oncopy.o
|
||||
DGEMMOTCOPYOBJ = dgemm_otcopy.o
|
||||
|
||||
CGEMMKERNEL = cgemm_kernel_2x2_vfp.S
|
||||
CGEMMONCOPY = cgemm_ncopy_2_vfp.S
|
||||
CGEMMOTCOPY = cgemm_tcopy_2_vfp.S
|
||||
CGEMMONCOPYOBJ = cgemm_oncopy.o
|
||||
CGEMMOTCOPYOBJ = cgemm_otcopy.o
|
||||
|
||||
ZGEMMKERNEL = zgemm_kernel_2x2_vfp.S
|
||||
ZGEMMONCOPY = zgemm_ncopy_2_vfp.S
|
||||
ZGEMMOTCOPY = zgemm_tcopy_2_vfp.S
|
||||
ZGEMMONCOPYOBJ = zgemm_oncopy.o
|
||||
ZGEMMOTCOPYOBJ = zgemm_otcopy.o
|
||||
|
||||
STRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
|
||||
STRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
|
||||
STRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
|
||||
STRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
|
||||
|
||||
DTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
|
||||
DTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
|
||||
DTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
|
||||
DTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
|
||||
|
||||
CTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
|
||||
CTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
|
||||
CTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
|
||||
CTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
|
||||
|
||||
ZTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
|
||||
ZTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
|
||||
ZTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
|
||||
ZTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
|
||||
|
||||
|
||||
|
||||
|
||||
141
kernel/arm/KERNEL.ARMV7
Normal file
141
kernel/arm/KERNEL.ARMV7
Normal file
@@ -0,0 +1,141 @@
|
||||
SAMAXKERNEL = iamax_vfp.S
|
||||
DAMAXKERNEL = iamax_vfp.S
|
||||
CAMAXKERNEL = iamax_vfp.S
|
||||
ZAMAXKERNEL = iamax_vfp.S
|
||||
|
||||
SAMINKERNEL = iamax_vfp.S
|
||||
DAMINKERNEL = iamax_vfp.S
|
||||
CAMINKERNEL = iamax_vfp.S
|
||||
ZAMINKERNEL = iamax_vfp.S
|
||||
|
||||
SMAXKERNEL = iamax_vfp.S
|
||||
DMAXKERNEL = iamax_vfp.S
|
||||
|
||||
SMINKERNEL = iamax_vfp.S
|
||||
DMINKERNEL = iamax_vfp.S
|
||||
|
||||
ISAMAXKERNEL = iamax_vfp.S
|
||||
IDAMAXKERNEL = iamax_vfp.S
|
||||
ICAMAXKERNEL = iamax_vfp.S
|
||||
IZAMAXKERNEL = iamax_vfp.S
|
||||
|
||||
ISAMINKERNEL = iamax_vfp.S
|
||||
IDAMINKERNEL = iamax_vfp.S
|
||||
ICAMINKERNEL = iamax_vfp.S
|
||||
IZAMINKERNEL = iamax_vfp.S
|
||||
|
||||
ISMAXKERNEL = iamax_vfp.S
|
||||
IDMAXKERNEL = iamax_vfp.S
|
||||
|
||||
ISMINKERNEL = iamax_vfp.S
|
||||
IDMINKERNEL = iamax_vfp.S
|
||||
|
||||
SSWAPKERNEL = swap_vfp.S
|
||||
DSWAPKERNEL = swap_vfp.S
|
||||
CSWAPKERNEL = swap_vfp.S
|
||||
ZSWAPKERNEL = swap_vfp.S
|
||||
|
||||
SASUMKERNEL = asum_vfp.S
|
||||
DASUMKERNEL = asum_vfp.S
|
||||
CASUMKERNEL = asum_vfp.S
|
||||
ZASUMKERNEL = asum_vfp.S
|
||||
|
||||
SAXPYKERNEL = axpy_vfp.S
|
||||
DAXPYKERNEL = axpy_vfp.S
|
||||
CAXPYKERNEL = axpy_vfp.S
|
||||
ZAXPYKERNEL = axpy_vfp.S
|
||||
|
||||
SCOPYKERNEL = scopy_vfp.S
|
||||
DCOPYKERNEL = dcopy_vfp.S
|
||||
CCOPYKERNEL = ccopy_vfp.S
|
||||
ZCOPYKERNEL = zcopy_vfp.S
|
||||
|
||||
SDOTKERNEL = sdot_vfp.S
|
||||
DDOTKERNEL = ddot_vfp.S
|
||||
CDOTKERNEL = cdot_vfp.S
|
||||
ZDOTKERNEL = zdot_vfp.S
|
||||
|
||||
SNRM2KERNEL = nrm2_vfpv3.S
|
||||
DNRM2KERNEL = nrm2_vfpv3.S
|
||||
CNRM2KERNEL = nrm2_vfpv3.S
|
||||
ZNRM2KERNEL = nrm2_vfpv3.S
|
||||
|
||||
SROTKERNEL = rot_vfp.S
|
||||
DROTKERNEL = rot_vfp.S
|
||||
CROTKERNEL = rot_vfp.S
|
||||
ZROTKERNEL = rot_vfp.S
|
||||
|
||||
SSCALKERNEL = scal_vfp.S
|
||||
DSCALKERNEL = scal_vfp.S
|
||||
CSCALKERNEL = scal_vfp.S
|
||||
ZSCALKERNEL = scal_vfp.S
|
||||
|
||||
SGEMVNKERNEL = gemv_n_vfp.S
|
||||
DGEMVNKERNEL = gemv_n_vfp.S
|
||||
CGEMVNKERNEL = cgemv_n_vfp.S
|
||||
ZGEMVNKERNEL = zgemv_n_vfp.S
|
||||
|
||||
SGEMVTKERNEL = gemv_t_vfp.S
|
||||
DGEMVTKERNEL = gemv_t_vfp.S
|
||||
CGEMVTKERNEL = cgemv_t_vfp.S
|
||||
ZGEMVTKERNEL = zgemv_t_vfp.S
|
||||
|
||||
STRMMKERNEL = strmm_kernel_4x4_vfpv3.S
|
||||
DTRMMKERNEL = dtrmm_kernel_4x4_vfpv3.S
|
||||
CTRMMKERNEL = ctrmm_kernel_2x2_vfpv3.S
|
||||
ZTRMMKERNEL = ztrmm_kernel_2x2_vfpv3.S
|
||||
|
||||
#SGEMMKERNEL = ../generic/gemmkernel_2x2.c
|
||||
SGEMMKERNEL = sgemm_kernel_4x4_vfpv3.S
|
||||
SGEMMINCOPY =
|
||||
SGEMMITCOPY =
|
||||
SGEMMONCOPY = sgemm_ncopy_4_vfp.S
|
||||
SGEMMOTCOPY = sgemm_tcopy_4_vfp.S
|
||||
SGEMMINCOPYOBJ =
|
||||
SGEMMITCOPYOBJ =
|
||||
SGEMMONCOPYOBJ = sgemm_oncopy.o
|
||||
SGEMMOTCOPYOBJ = sgemm_otcopy.o
|
||||
|
||||
DGEMMKERNEL = dgemm_kernel_4x4_vfpv3.S
|
||||
DGEMMINCOPY =
|
||||
DGEMMITCOPY =
|
||||
DGEMMONCOPY = dgemm_ncopy_4_vfp.S
|
||||
DGEMMOTCOPY = dgemm_tcopy_4_vfp.S
|
||||
DGEMMINCOPYOBJ =
|
||||
DGEMMITCOPYOBJ =
|
||||
DGEMMONCOPYOBJ = dgemm_oncopy.o
|
||||
DGEMMOTCOPYOBJ = dgemm_otcopy.o
|
||||
|
||||
CGEMMKERNEL = cgemm_kernel_2x2_vfpv3.S
|
||||
CGEMMONCOPY = cgemm_ncopy_2_vfp.S
|
||||
CGEMMOTCOPY = cgemm_tcopy_2_vfp.S
|
||||
CGEMMONCOPYOBJ = cgemm_oncopy.o
|
||||
CGEMMOTCOPYOBJ = cgemm_otcopy.o
|
||||
|
||||
ZGEMMKERNEL = zgemm_kernel_2x2_vfpv3.S
|
||||
ZGEMMONCOPY = zgemm_ncopy_2_vfp.S
|
||||
ZGEMMOTCOPY = zgemm_tcopy_2_vfp.S
|
||||
ZGEMMONCOPYOBJ = zgemm_oncopy.o
|
||||
ZGEMMOTCOPYOBJ = zgemm_otcopy.o
|
||||
|
||||
STRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
|
||||
STRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
|
||||
STRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
|
||||
STRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
|
||||
|
||||
DTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
|
||||
DTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
|
||||
DTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
|
||||
DTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
|
||||
|
||||
CTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
|
||||
CTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
|
||||
CTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
|
||||
CTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
|
||||
|
||||
ZTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
|
||||
ZTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
|
||||
ZTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
|
||||
ZTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
|
||||
|
||||
|
||||
2
kernel/arm/Makefile
Normal file
2
kernel/arm/Makefile
Normal file
@@ -0,0 +1,2 @@
|
||||
clean ::
|
||||
|
||||
73
kernel/arm/amax.c
Normal file
73
kernel/arm/amax.c
Normal file
@@ -0,0 +1,73 @@
|
||||
/***************************************************************************
|
||||
Copyright (c) 2013, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
/**************************************************************************************
|
||||
* 2013/09/14 Saar
|
||||
* BLASTEST float : OK
|
||||
* BLASTEST double : OK
|
||||
* CTEST : NoTest
|
||||
* TEST : NoTest
|
||||
*
|
||||
**************************************************************************************/
|
||||
|
||||
#include "common.h"
|
||||
#include <math.h>
|
||||
|
||||
#if defined(DOUBLE)
|
||||
|
||||
#define ABS fabs
|
||||
|
||||
#else
|
||||
|
||||
#define ABS fabsf
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
|
||||
{
|
||||
BLASLONG i=0;
|
||||
BLASLONG ix=0;
|
||||
FLOAT maxf=0.0;
|
||||
|
||||
if (n < 0 || inc_x < 1 ) return(maxf);
|
||||
|
||||
maxf=ABS(x[0]);
|
||||
|
||||
while(i < n)
|
||||
{
|
||||
if( ABS(x[ix]) > ABS(maxf) )
|
||||
{
|
||||
maxf = ABS(x[ix]);
|
||||
}
|
||||
ix += inc_x;
|
||||
i++;
|
||||
}
|
||||
return(maxf);
|
||||
}
|
||||
|
||||
|
||||
73
kernel/arm/amin.c
Normal file
73
kernel/arm/amin.c
Normal file
@@ -0,0 +1,73 @@
|
||||
/***************************************************************************
|
||||
Copyright (c) 2013, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
/**************************************************************************************
|
||||
* 2013/09/14 Saar
|
||||
* BLASTEST float : OK
|
||||
* BLASTEST double : OK
|
||||
* CTEST : NoTest
|
||||
* TEST : NoTest
|
||||
*
|
||||
**************************************************************************************/
|
||||
|
||||
#include "common.h"
|
||||
#include <math.h>
|
||||
|
||||
#if defined(DOUBLE)
|
||||
|
||||
#define ABS fabs
|
||||
|
||||
#else
|
||||
|
||||
#define ABS fabsf
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
|
||||
{
|
||||
BLASLONG i=0;
|
||||
BLASLONG ix=0;
|
||||
FLOAT minf=0.0;
|
||||
|
||||
if (n < 0 || inc_x < 1 ) return(minf);
|
||||
|
||||
minf=ABS(x[0]);
|
||||
|
||||
while(i < n)
|
||||
{
|
||||
if( ABS(x[ix]) < ABS(minf) )
|
||||
{
|
||||
minf = ABS(x[ix]);
|
||||
}
|
||||
ix += inc_x;
|
||||
i++;
|
||||
}
|
||||
return(minf);
|
||||
}
|
||||
|
||||
|
||||
67
kernel/arm/asum.c
Normal file
67
kernel/arm/asum.c
Normal file
@@ -0,0 +1,67 @@
|
||||
/***************************************************************************
|
||||
Copyright (c) 2013, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
/**************************************************************************************
|
||||
* 2013/09/14 Saar
|
||||
* BLASTEST float : OK
|
||||
* BLASTEST double : OK
|
||||
* CTEST : OK
|
||||
* TEST : OK
|
||||
*
|
||||
**************************************************************************************/
|
||||
|
||||
|
||||
#include "common.h"
|
||||
#include <math.h>
|
||||
|
||||
#if defined(DOUBLE)
|
||||
|
||||
#define ABS fabs
|
||||
|
||||
#else
|
||||
|
||||
#define ABS fabsf
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
|
||||
{
|
||||
BLASLONG i=0;
|
||||
FLOAT sumf = 0.0;
|
||||
if (n < 0 || inc_x < 1 ) return(sumf);
|
||||
|
||||
n *= inc_x;
|
||||
while(i < n)
|
||||
{
|
||||
sumf += ABS(x[i]);
|
||||
i += inc_x;
|
||||
}
|
||||
return(sumf);
|
||||
}
|
||||
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user