Compare commits
660 Commits
v0.1alpha2
...
piledriver
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
d51868190e | ||
|
|
9b5be29886 | ||
|
|
768d2042d4 | ||
|
|
91eaea364b | ||
|
|
a136426624 | ||
|
|
2f0d2ce1ea | ||
|
|
0f6e79f918 | ||
|
|
ea74f331f4 | ||
|
|
53eaf41901 | ||
|
|
9423f980f6 | ||
|
|
c6156b2ef2 | ||
|
|
034a5b2083 | ||
|
|
27d4234d4d | ||
|
|
aae75b2461 | ||
|
|
a74ac84981 | ||
|
|
402d6e91db | ||
|
|
6d2c6b5c74 | ||
|
|
1b5a267cdd | ||
|
|
4a1575e748 | ||
|
|
339fef2649 | ||
|
|
b3254eecaf | ||
|
|
d910404f00 | ||
|
|
1cae4114a8 | ||
|
|
91ca041cc2 | ||
|
|
d2385f0d52 | ||
|
|
ec2dadde9b | ||
|
|
d56292e2e4 | ||
|
|
ffe70b1fdc | ||
|
|
ecbc85b954 | ||
|
|
9e38dbb658 | ||
|
|
d844901062 | ||
|
|
eab79631c3 | ||
|
|
d13aa79d26 | ||
|
|
dfaa489224 | ||
|
|
9fbf437177 | ||
|
|
38ce786754 | ||
|
|
2594728eb7 | ||
|
|
cc7876f660 | ||
|
|
51e5983599 | ||
|
|
65ebab0688 | ||
|
|
7594216412 | ||
|
|
fac07b0687 | ||
|
|
3169f524e4 | ||
|
|
d4833f1801 | ||
|
|
fe8b841c82 | ||
|
|
9b1b01a478 | ||
|
|
2a625447ea | ||
|
|
be18cd47f6 | ||
|
|
8191efc420 | ||
|
|
39dc69db4a | ||
|
|
04d51536a4 | ||
|
|
c947ab85dc | ||
|
|
0b6e13b689 | ||
|
|
e09dc279a2 | ||
|
|
1596ced242 | ||
|
|
4be4db590c | ||
|
|
5c648a8984 | ||
|
|
7581e2e9cb | ||
|
|
c44dc4dd3c | ||
|
|
ac7735e01f | ||
|
|
271ceeba15 | ||
|
|
71eecaaf37 | ||
|
|
9d3fae15a8 | ||
|
|
2d3c884294 | ||
|
|
d54a061713 | ||
|
|
86afb47e83 | ||
|
|
42a4dff056 | ||
|
|
5bc322a66c | ||
|
|
dec7ad0dfd | ||
|
|
274304bd03 | ||
|
|
5007a534c4 | ||
|
|
a537d7d8d7 | ||
|
|
b42145834f | ||
|
|
3d5e792c72 | ||
|
|
a9bd12da2c | ||
|
|
697e198e8a | ||
|
|
36b0f7fe1d | ||
|
|
d2b20c5c51 | ||
|
|
fd1d9fdb22 | ||
|
|
fe5f46c330 | ||
|
|
e25de3d182 | ||
|
|
25c6050593 | ||
|
|
12e02a00e0 | ||
|
|
29a3196f56 | ||
|
|
8776a73773 | ||
|
|
7e84acd3e8 | ||
|
|
33d3ab6e09 | ||
|
|
9a0f978929 | ||
|
|
7f210587f0 | ||
|
|
9f0a3a35b3 | ||
|
|
dbae93110b | ||
|
|
19cd5c64a2 | ||
|
|
9adf87495e | ||
|
|
440db4cdda | ||
|
|
cd93cae5a7 | ||
|
|
8565afb3c2 | ||
|
|
5bf7cf8d67 | ||
|
|
29a005c635 | ||
|
|
f1be3a168a | ||
|
|
410afda9b4 | ||
|
|
bf04544902 | ||
|
|
86283c0be1 | ||
|
|
f27cabfd08 | ||
|
|
23dd474cd0 | ||
|
|
f1b452e160 | ||
|
|
3dabd7e6e6 | ||
|
|
6f4a0ebe38 | ||
|
|
5048a80032 | ||
|
|
6e679266f8 | ||
|
|
f1db386211 | ||
|
|
6da558d2ab | ||
|
|
a2942456ef | ||
|
|
f750103336 | ||
|
|
00f33c0134 | ||
|
|
5b36cc0f47 | ||
|
|
c8f1aeb154 | ||
|
|
8fa93be06e | ||
|
|
1e8128f41c | ||
|
|
6d8095bcb9 | ||
|
|
2f5fdd2000 | ||
|
|
80a2e901b1 | ||
|
|
73770e60b8 | ||
|
|
ac50bccbd2 | ||
|
|
82015beaef | ||
|
|
6216ab8a7e | ||
|
|
370e3834a9 | ||
|
|
95aedfa0ff | ||
|
|
cba97daf3c | ||
|
|
5400a9f4e4 | ||
|
|
e31186efd4 | ||
|
|
2b801a00a5 | ||
|
|
b3eab8fcb7 | ||
|
|
6d9d70c55c | ||
|
|
dfd1064d7b | ||
|
|
02bc36ac79 | ||
|
|
5118a7f4d1 | ||
|
|
e172b70ea2 | ||
|
|
1cf4b974b2 | ||
|
|
7bccff1512 | ||
|
|
afe44b0241 | ||
|
|
a77c71eaf5 | ||
|
|
b2219b3478 | ||
|
|
f5a0038bad | ||
|
|
c937090121 | ||
|
|
fe8c5666f9 | ||
|
|
f6b50057e2 | ||
|
|
2840d56aeb | ||
|
|
2d49db2f5b | ||
|
|
04391e6d9c | ||
|
|
85484a42df | ||
|
|
3983011f0b | ||
|
|
2a1515c9dd | ||
|
|
31f51e78bc | ||
|
|
beffee7d91 | ||
|
|
a35f4343fa | ||
|
|
ce5626a384 | ||
|
|
e0b968c3a7 | ||
|
|
93f1074dd4 | ||
|
|
1c63180bb6 | ||
|
|
22a8fcc4b7 | ||
|
|
9965d48005 | ||
|
|
4a474ea7dc | ||
|
|
69ce737cc5 | ||
|
|
d13788d1b4 | ||
|
|
70411af888 | ||
|
|
16eb780e13 | ||
|
|
a746724e84 | ||
|
|
3f7b0cd994 | ||
|
|
cc6db2ecfe | ||
|
|
3175be4b3d | ||
|
|
a29e6592da | ||
|
|
212463dce9 | ||
|
|
037bd82bef | ||
|
|
eae4cfa3f6 | ||
|
|
6c4a7d0828 | ||
|
|
fe98de2f68 | ||
|
|
db389b5915 | ||
|
|
52f587db7f | ||
|
|
067e8417fd | ||
|
|
a82da3d069 | ||
|
|
1569bf14f8 | ||
|
|
df554aebd2 | ||
|
|
eae6920f2d | ||
|
|
c92ae012a6 | ||
|
|
f51a849d91 | ||
|
|
44ef70420c | ||
|
|
d488b1b1aa | ||
|
|
4070d9a123 | ||
|
|
0b90c0ec64 | ||
|
|
2b8ab8f55b | ||
|
|
1cb9579cd0 | ||
|
|
2638370844 | ||
|
|
89637f87c8 | ||
|
|
c0b1e41bec | ||
|
|
49faee1a51 | ||
|
|
c0159d44a3 | ||
|
|
c17a850c1c | ||
|
|
099853fff6 | ||
|
|
44d23881b5 | ||
|
|
2905042c6a | ||
|
|
32fb6b9bb2 | ||
|
|
673e453b3f | ||
|
|
143cca4dd5 | ||
|
|
aaeb8eaecd | ||
|
|
8aeec32ea0 | ||
|
|
87fc9de572 | ||
|
|
564aa60fec | ||
|
|
f645665dd6 | ||
|
|
e45a347cd2 | ||
|
|
99727ac013 | ||
|
|
6e0a2fbc0c | ||
|
|
0a22f99c58 | ||
|
|
79ba52115d | ||
|
|
835293cc1a | ||
|
|
b736aa8110 | ||
|
|
ae521ecc3e | ||
|
|
36adfe8d64 | ||
|
|
a07cc39571 | ||
|
|
cff70a666d | ||
|
|
b5c2ac4fd6 | ||
|
|
749f45ffc8 | ||
|
|
534c5ec919 | ||
|
|
bd2da90e13 | ||
|
|
84bd0aabaa | ||
|
|
5b504d6c23 | ||
|
|
72b1edaf1b | ||
|
|
a2930664f4 | ||
|
|
6e0db36373 | ||
|
|
1e1250b703 | ||
|
|
23186d9f21 | ||
|
|
e6ebbfd314 | ||
|
|
4471c77905 | ||
|
|
9f0fb6e662 | ||
|
|
f26b7a08aa | ||
|
|
63f14189e3 | ||
|
|
e39384432b | ||
|
|
c5437149c0 | ||
|
|
6f5b395009 | ||
|
|
d4f9571818 | ||
|
|
937d838619 | ||
|
|
a8f9b6a665 | ||
|
|
6209c8fc44 | ||
|
|
238ceb4ac0 | ||
|
|
77b572fa0b | ||
|
|
f69f89b846 | ||
|
|
c77032b0cc | ||
|
|
1b3b9e841d | ||
|
|
b67252c2e4 | ||
|
|
c69e73b868 | ||
|
|
b51e2ba1ee | ||
|
|
9c0a834f98 | ||
|
|
2a7503e563 | ||
|
|
fd0c388681 | ||
|
|
61a9582987 | ||
|
|
b681064c6c | ||
|
|
e80e285928 | ||
|
|
2ed0f6ab60 | ||
|
|
5448643557 | ||
|
|
824c3c4df3 | ||
|
|
c19a488af2 | ||
|
|
32d2ca3035 | ||
|
|
6df39ad9e7 | ||
|
|
3a96e4cbcb | ||
|
|
6f008abcef | ||
|
|
3eb5af1955 | ||
|
|
fbb75e58b1 | ||
|
|
f54f5bac9e | ||
|
|
5d3312142a | ||
|
|
886cbaf4e4 | ||
|
|
0c4074e10b | ||
|
|
cc522aa21d | ||
|
|
9c78fad721 | ||
|
|
6028232ad1 | ||
|
|
feb9a3889a | ||
|
|
32dbeb636d | ||
|
|
57944538b6 | ||
|
|
3ce2c62b0b | ||
|
|
50464997a3 | ||
|
|
8e7cad1650 | ||
|
|
590e6aeafc | ||
|
|
88ef307cef | ||
|
|
6e8501c8a1 | ||
|
|
fa916a0fac | ||
|
|
fb298b34ae | ||
|
|
16012767f4 | ||
|
|
bcbac31b47 | ||
|
|
8dc0c72583 | ||
|
|
89405a1a0b | ||
|
|
4f2b12b8a8 | ||
|
|
646e168d26 | ||
|
|
93dbbe1fb8 | ||
|
|
a135f5d9ed | ||
|
|
d0b6299b13 | ||
|
|
9e58dd509e | ||
|
|
7c8227101b | ||
|
|
f67fa62851 | ||
|
|
cd1d473ba0 | ||
|
|
56f160134d | ||
|
|
0ded1fcc1c | ||
|
|
a789b588cd | ||
|
|
8eaa04acbb | ||
|
|
d854b30ae6 | ||
|
|
d65bbec99b | ||
|
|
e4c39c7c26 | ||
|
|
ba800f0883 | ||
|
|
25491e42f9 | ||
|
|
960b0c88a7 | ||
|
|
65ffead0cf | ||
|
|
f2fb8c7035 | ||
|
|
9f59f384d8 | ||
|
|
23965f164c | ||
|
|
6a72840945 | ||
|
|
947457fb7c | ||
|
|
79120bf9a0 | ||
|
|
acb11905d5 | ||
|
|
109500178c | ||
|
|
e50a664865 | ||
|
|
357078b93e | ||
|
|
731220f870 | ||
|
|
69aa6c8fb1 | ||
|
|
60b263f3d2 | ||
|
|
7ac306e0da | ||
|
|
4cb454cdf2 | ||
|
|
19ad2fb128 | ||
|
|
5d96e4f224 | ||
|
|
6821677489 | ||
|
|
dbbda55e67 | ||
|
|
6c34a7f43c | ||
|
|
3326f3152c | ||
|
|
7641f6e253 | ||
|
|
48bdc1ad3b | ||
|
|
3ad29452d1 | ||
|
|
6e3f6f25a5 | ||
|
|
990efcab6e | ||
|
|
75a5dc3975 | ||
|
|
986d542acb | ||
|
|
6958c1a1aa | ||
|
|
a068d54981 | ||
|
|
d692ee07f7 | ||
|
|
1a57717b1a | ||
|
|
6b01d58712 | ||
|
|
35b943f17f | ||
|
|
e029242870 | ||
|
|
7a9b94b519 | ||
|
|
66b919d99f | ||
|
|
f4846afbad | ||
|
|
53588bc786 | ||
|
|
b47f13ee4c | ||
|
|
309f90e563 | ||
|
|
773c01f496 | ||
|
|
d831b2ff8b | ||
|
|
724ae159ce | ||
|
|
2c9a203bd1 | ||
|
|
f300ce3df5 | ||
|
|
e2c7c75715 | ||
|
|
66e64131ed | ||
|
|
5900b1462e | ||
|
|
9405f26f4b | ||
|
|
54e7b37630 | ||
|
|
529f1b5006 | ||
|
|
e5ac3007e0 | ||
|
|
0d0405b434 | ||
|
|
f1ce74ffdd | ||
|
|
d744c9590a | ||
|
|
3cc6ae793e | ||
|
|
4c2123c334 | ||
|
|
5155e3f509 | ||
|
|
5c8bf6ae0e | ||
|
|
6ae2f868fd | ||
|
|
a1ead62f28 | ||
|
|
0133580148 | ||
|
|
274246651d | ||
|
|
299b5a44dc | ||
|
|
a9500d0079 | ||
|
|
64ad8b9809 | ||
|
|
875d520ccf | ||
|
|
d311236dfd | ||
|
|
36e0982966 | ||
|
|
8cdb795438 | ||
|
|
4db6660de4 | ||
|
|
0b08f7479e | ||
|
|
200e4acf15 | ||
|
|
99d1978df7 | ||
|
|
08bf6674d5 | ||
|
|
8b122ff9dc | ||
|
|
69200884e1 | ||
|
|
0d1518add9 | ||
|
|
91ed4e4450 | ||
|
|
fd3046b32a | ||
|
|
a4ee6f3915 | ||
|
|
a0363e9b48 | ||
|
|
b471d52e61 | ||
|
|
9fb341a9f8 | ||
|
|
fba6b590f2 | ||
|
|
97f68f7f3a | ||
|
|
1138817dd2 | ||
|
|
13f8fc0b1a | ||
|
|
bdf8d9411e | ||
|
|
bb10cb8442 | ||
|
|
d48cff8cf1 | ||
|
|
f19af5ecc0 | ||
|
|
bfaaa975e6 | ||
|
|
b7c0fa6bd2 | ||
|
|
7110d17146 | ||
|
|
e01b3d4b54 | ||
|
|
cea1a885b5 | ||
|
|
f78eb335d6 | ||
|
|
2345bdec68 | ||
|
|
5f0117385e | ||
|
|
6caf1bab73 | ||
|
|
01e3c984ce | ||
|
|
6751f7b9a7 | ||
|
|
d5717a97ea | ||
|
|
b45d43d295 | ||
|
|
dcfb69c2b5 | ||
|
|
e85549ee11 | ||
|
|
789f205177 | ||
|
|
378acfe826 | ||
|
|
538c764d2b | ||
|
|
0f26a21624 | ||
|
|
5c1efa1149 | ||
|
|
ca4136cf41 | ||
|
|
3a26470fb7 | ||
|
|
6c5899dff5 | ||
|
|
2df2878dfc | ||
|
|
0b719945c5 | ||
|
|
b1a54a0107 | ||
|
|
08c177ca36 | ||
|
|
2573311308 | ||
|
|
1d72b8bf1b | ||
|
|
758e34efbb | ||
|
|
735ca38b8f | ||
|
|
f76a384841 | ||
|
|
9419a43a7f | ||
|
|
b695680a33 | ||
|
|
d0e731e8b8 | ||
|
|
48f075cfd5 | ||
|
|
3e87648de3 | ||
|
|
fe4ab95cd5 | ||
|
|
801383effe | ||
|
|
54cd65e47f | ||
|
|
a55821a2ec | ||
|
|
068861a927 | ||
|
|
d007cca61d | ||
|
|
a92895939e | ||
|
|
7bd1834d59 | ||
|
|
1b056c5328 | ||
|
|
e8306f623a | ||
|
|
3108a1853d | ||
|
|
25f1a573fd | ||
|
|
71d29fa3d0 | ||
|
|
50848e34ec | ||
|
|
4a5d08d0cf | ||
|
|
6fcdaa4387 | ||
|
|
699fc7641f | ||
|
|
3692b4d631 | ||
|
|
4b7677a916 | ||
|
|
5719b7a58d | ||
|
|
f22bfe6a55 | ||
|
|
551f478477 | ||
|
|
a430880729 | ||
|
|
a507b56ab1 | ||
|
|
f430e54daf | ||
|
|
47860cf002 | ||
|
|
13f5f18140 | ||
|
|
857a0fa0df | ||
|
|
0a958b6a02 | ||
|
|
b39c51195b | ||
|
|
853d16ed7e | ||
|
|
422359d09a | ||
|
|
544af1efec | ||
|
|
a6214c057e | ||
|
|
fe809c39f9 | ||
|
|
157cc54449 | ||
|
|
cbfacf9a10 | ||
|
|
fda5e0da8a | ||
|
|
98d004c50b | ||
|
|
037d995c4d | ||
|
|
e4a27d194e | ||
|
|
d34fce56e4 | ||
|
|
b8b922d334 | ||
|
|
88c272f6a7 | ||
|
|
6cfcb54a28 | ||
|
|
3ef96aa567 | ||
|
|
996dc6d1c8 | ||
|
|
61b1c2db5b | ||
|
|
34fd3b85a8 | ||
|
|
f76f952547 | ||
|
|
11b4a0e4b6 | ||
|
|
eefd30881c | ||
|
|
d3b67d0bd8 | ||
|
|
d6cab3f37e | ||
|
|
37edae1c90 | ||
|
|
90d6ad569d | ||
|
|
a4daa34db7 | ||
|
|
cfc2940412 | ||
|
|
a6adbb299d | ||
|
|
8cc7f86cf7 | ||
|
|
a53c6e2440 | ||
|
|
5199809bba | ||
|
|
a431042475 | ||
|
|
33941033e2 | ||
|
|
4e29b6ffc0 | ||
|
|
839b18aa26 | ||
|
|
a27339b244 | ||
|
|
10e25690b4 | ||
|
|
660d9e09f8 | ||
|
|
e9be1fdd2b | ||
|
|
44124d3055 | ||
|
|
14c3511e92 | ||
|
|
d647f751ee | ||
|
|
be1692d64f | ||
|
|
f404a17787 | ||
|
|
fc4927fa0f | ||
|
|
06e208c5c3 | ||
|
|
52485e5fd0 | ||
|
|
dee74174ff | ||
|
|
e7846547be | ||
|
|
7f89edee3e | ||
|
|
11cc9dc151 | ||
|
|
4236d0d938 | ||
|
|
8218cbea2a | ||
|
|
5b7f443cf4 | ||
|
|
0e39699c8c | ||
|
|
60a58e03ea | ||
|
|
e154920bc6 | ||
|
|
fcb89ad94d | ||
|
|
d48a1d1928 | ||
|
|
d02171b494 | ||
|
|
c8a5d4b86f | ||
|
|
006200c9a4 | ||
|
|
5d657c6e67 | ||
|
|
14428af879 | ||
|
|
5656cca4f3 | ||
|
|
08570c4248 | ||
|
|
b2bdb6f7c4 | ||
|
|
9037782a9c | ||
|
|
f93318a6c8 | ||
|
|
001c2c322b | ||
|
|
f4eee224d8 | ||
|
|
74306b54d7 | ||
|
|
fd2ee0c9e2 | ||
|
|
78914475ae | ||
|
|
435420d6d5 | ||
|
|
03b0eb19f7 | ||
|
|
910338f071 | ||
|
|
fad089ffff | ||
|
|
19a48b82cf | ||
|
|
09b2417848 | ||
|
|
5cbbc496b0 | ||
|
|
09f74f6d23 | ||
|
|
2b3eae6cc7 | ||
|
|
0b89a7a92d | ||
|
|
91ce66a0a8 | ||
|
|
ccdba3c771 | ||
|
|
3871b6a86d | ||
|
|
83ecfbb9b3 | ||
|
|
31c836ac25 | ||
|
|
3afedbf6f0 | ||
|
|
b4ec36debc | ||
|
|
1f15bee02a | ||
|
|
dff146e306 | ||
|
|
722dd08703 | ||
|
|
d047afe615 | ||
|
|
a7a7751be7 | ||
|
|
0bbf955d4c | ||
|
|
70abe10fc0 | ||
|
|
0a696bd4ce | ||
|
|
fda39c6cb0 | ||
|
|
875da22a43 | ||
|
|
363a563ec2 | ||
|
|
8da6fdc2ce | ||
|
|
0caa5616f2 | ||
|
|
727e6d83c0 | ||
|
|
da3f101a77 | ||
|
|
fe613de8e1 | ||
|
|
142e99d4e9 | ||
|
|
7af0139a09 | ||
|
|
8e53b57bb2 | ||
|
|
0d3647c395 | ||
|
|
0d76196a09 | ||
|
|
b281f3dee4 | ||
|
|
a4292976e9 | ||
|
|
c2dad58ad1 | ||
|
|
d5a6d789e6 | ||
|
|
875dde437d | ||
|
|
5be22ca80d | ||
|
|
66904fc4e8 | ||
|
|
8163ab7e55 | ||
|
|
ef6f7f32ae | ||
|
|
285e69e2d1 | ||
|
|
d1baf14a64 | ||
|
|
0884f6b78d | ||
|
|
2d78fb05c8 | ||
|
|
b95ad4cfaf | ||
|
|
3bbe3ddb31 | ||
|
|
a32e56500a | ||
|
|
c1e618ea2d | ||
|
|
19f5b5c132 | ||
|
|
c852ce3981 | ||
|
|
ba31b19c00 | ||
|
|
66a3c6df4e | ||
|
|
57658a8c14 | ||
|
|
9fe3049de6 | ||
|
|
831858b883 | ||
|
|
8de2ba67dd | ||
|
|
fe7a932ab8 | ||
|
|
1d31c79dc9 | ||
|
|
d40e5621e9 | ||
|
|
bcc7956216 | ||
|
|
821cbb2995 | ||
|
|
74fa790354 | ||
|
|
756477bfe3 | ||
|
|
864c68ffc5 | ||
|
|
68cae521df | ||
|
|
d0152ec8ca | ||
|
|
e08cfaf9ca | ||
|
|
ee4bb8bd25 | ||
|
|
7fa3d23dd9 | ||
|
|
9679dd077e | ||
|
|
048742f38f | ||
|
|
7b410b7f0e | ||
|
|
d238a768ab | ||
|
|
260db9fb9e | ||
|
|
e27b761d7c | ||
|
|
16fc083322 | ||
|
|
3c856c0c1a | ||
|
|
dc9c69db93 | ||
|
|
b1fe26c45a | ||
|
|
0389b631fa | ||
|
|
64fa709d1f | ||
|
|
4727fe8abf | ||
|
|
90481ce742 | ||
|
|
9fc6764fa7 | ||
|
|
74d4cdb81a | ||
|
|
7906146836 | ||
|
|
3274ff47b8 | ||
|
|
a059c553a1 | ||
|
|
23e182ca7c | ||
|
|
a15bc95824 | ||
|
|
74a3f63489 | ||
|
|
09f49fa891 | ||
|
|
b9d89f8aaa | ||
|
|
cb0214787b | ||
|
|
2e8cdd1542 | ||
|
|
b29d327d14 | ||
|
|
c8360e3ae5 | ||
|
|
19d2ab4853 | ||
|
|
12d77deeee | ||
|
|
043927c7db | ||
|
|
30947ea2d5 | ||
|
|
33313b0221 | ||
|
|
a5300420e2 | ||
|
|
68532fa9ec | ||
|
|
708d2b6255 | ||
|
|
e72113f06a | ||
|
|
14f81da375 | ||
|
|
fc21f7ad28 | ||
|
|
ca8bf5abb0 | ||
|
|
1c96d345e2 |
13
.gitignore
vendored
13
.gitignore
vendored
@@ -1,16 +1,29 @@
|
||||
*.obj
|
||||
*.lib
|
||||
*.dll
|
||||
*.dylib
|
||||
*.def
|
||||
*.o
|
||||
*.out
|
||||
lapack-3.1.1
|
||||
lapack-3.1.1.tgz
|
||||
lapack-3.4.1
|
||||
lapack-3.4.1.tgz
|
||||
lapack-3.4.2
|
||||
lapack-3.4.2.tgz
|
||||
lapack-netlib/make.inc
|
||||
lapack-netlib/lapacke/include/lapacke_mangling.h
|
||||
lapack-netlib/TESTING/testing_results.txt
|
||||
*.so
|
||||
*.a
|
||||
.svn
|
||||
*~
|
||||
lib.grd
|
||||
nohup.out
|
||||
config.h
|
||||
Makefile.conf
|
||||
Makefile.conf_last
|
||||
config_last.h
|
||||
getarch
|
||||
getarch_2nd
|
||||
utest/openblas_utest
|
||||
|
||||
24
.travis.yml
Normal file
24
.travis.yml
Normal file
@@ -0,0 +1,24 @@
|
||||
language: c
|
||||
compiler:
|
||||
- gcc
|
||||
|
||||
env:
|
||||
- TARGET_BOX=LINUX64 BTYPE="BINARY=64"
|
||||
- TARGET_BOX=LINUX64 BTYPE="BINARY=64 USE_OPENMP=1"
|
||||
- TARGET_BOX=LINUX64 BTYPE="BINARY=64 INTERFACE64=1"
|
||||
- TARGET_BOX=LINUX32 BTYPE="BINARY=32"
|
||||
- TARGET_BOX=WIN64 BTYPE="BINARY=64 HOSTCC=gcc CC=x86_64-w64-mingw32-gcc FC=x86_64-w64-mingw32-gfortran"
|
||||
|
||||
before_install:
|
||||
- sudo apt-get update -qq
|
||||
- sudo apt-get install -qq gfortran
|
||||
- if [[ "$TARGET_BOX" == "WIN64" ]]; then sudo apt-get install -qq binutils-mingw-w64-x86-64 gcc-mingw-w64-x86-64 gfortran-mingw-w64-x86-64; fi
|
||||
- if [[ "$TARGET_BOX" == "LINUX32" ]]; then sudo apt-get install -qq gcc-multilib gfortran-multilib; fi
|
||||
|
||||
script: make QUIET_MAKE=1 DYNAMIC_ARCH=1 TARGET=NEHALEM NUM_THREADS=32 $BTYPE
|
||||
|
||||
# whitelist
|
||||
branches:
|
||||
only:
|
||||
- master
|
||||
- develop
|
||||
39
BACKERS.md
Normal file
39
BACKERS.md
Normal file
@@ -0,0 +1,39 @@
|
||||
Thank you for the support.
|
||||
|
||||
### [2013.8] [Testbed for OpenBLAS project](https://www.bountysource.com/fundraisers/443-testbed-for-openblas-project)
|
||||
|
||||
https://www.bountysource.com/fundraisers/443-testbed-for-openblas-project/pledges
|
||||
|
||||
In chronological order:
|
||||
|
||||
* aeberspaecher
|
||||
* fmolina
|
||||
* saullocastro
|
||||
* xianyi
|
||||
* cuda
|
||||
* carter
|
||||
* StefanKarpinski
|
||||
* staticfloat
|
||||
* sebastien-villemot
|
||||
* JeffBezanson
|
||||
* ihnorton
|
||||
* simonp0420
|
||||
* andrioni
|
||||
* Tim Holy
|
||||
* ivarne
|
||||
* johnmyleswhite
|
||||
* traz
|
||||
* Jean-Francis Roy
|
||||
* bkalpert
|
||||
* Anirban
|
||||
* pgermain
|
||||
* alexandre.lacoste.18
|
||||
* foges
|
||||
* ssam
|
||||
* WestleyArgentum
|
||||
* daniebmariani
|
||||
* pjpuglia
|
||||
* albarrentine
|
||||
* Alexander Vogt
|
||||
|
||||
|
||||
90
CONTRIBUTORS.md
Normal file
90
CONTRIBUTORS.md
Normal file
@@ -0,0 +1,90 @@
|
||||
# Contributions to the OpenBLAS project
|
||||
|
||||
## Creator & Maintainer
|
||||
|
||||
* Zhang Xianyi <traits.zhang@gmail.com>
|
||||
|
||||
## Active Developers
|
||||
|
||||
* Wang Qian <traz0824@gmail.com>
|
||||
* Optimize BLAS3 on ICT Loongson 3A.
|
||||
* Optimize BLAS3 on Intel Sandy Bridge.
|
||||
|
||||
* Zaheer Chothia <zaheer.chothia@gmail.com>
|
||||
* Improve the compatibility about complex number
|
||||
* Build LAPACKE: C interface to LAPACK
|
||||
* Improve the windows build.
|
||||
|
||||
## Previous Developers
|
||||
|
||||
* Chen Shaohu <huhumartinwar@gmail.com>
|
||||
* Optimize GEMV on the Loongson 3A processor.
|
||||
|
||||
* Luo Wen
|
||||
* Intern. Test Level-2 BLAS.
|
||||
|
||||
## Contributors
|
||||
|
||||
In chronological order:
|
||||
|
||||
* pipping <http://page.mi.fu-berlin.de/pipping>
|
||||
* [2011-06-11] Make USE_OPENMP=0 disable openmp.
|
||||
|
||||
* Stefan Karpinski <stefan@karpinski.org>
|
||||
* [2011-12-28] Fix a bug about SystemStubs on Mac OS X.
|
||||
|
||||
* Alexander Eberspächer <https://github.com/aeberspaecher>
|
||||
* [2012-05-02] Add note on patch for segfaults on Linux kernel 2.6.32.
|
||||
|
||||
* Mike Nolta <mike@nolta.net>
|
||||
* [2012-05-19] Fix building bug on FreeBSD and NetBSD.
|
||||
|
||||
* Sylvestre Ledru <https://github.com/sylvestre>
|
||||
* [2012-07-01] Improve the detection of sparc. Fix building bug under
|
||||
Hurd and kfreebsd.
|
||||
|
||||
* Jameson Nash <https://github.com/vtjnash>
|
||||
* [2012-08-20] Provide support for passing CFLAGS, FFLAGS, PFLAGS, FPFLAGS to
|
||||
make on the command line.
|
||||
|
||||
* Alexander Nasonov <alnsn@yandex.ru>
|
||||
* [2012-11-10] Fix NetBSD build.
|
||||
|
||||
* Sébastien Villemot <sebastien@debian.org>
|
||||
* [2012-11-14] Fix compilation with TARGET=GENERIC. Patch applied to Debian package.
|
||||
|
||||
* Werner Saar <wernsaar@googlemail.com>
|
||||
* [2013-03-04] Optimize AVX and FMA4 DGEMM on AMD Bulldozer
|
||||
* [2013-04-27] Optimize AVX and FMA4 TRSM on AMD Bulldozer
|
||||
* [2013-06-09] Optimize AVX and FMA4 SGEMM on AMD Bulldozer
|
||||
* [2013-06-11] Optimize AVX and FMA4 ZGEMM on AMD Bulldozer
|
||||
* [2013-06-12] Optimize AVX and FMA4 CGEMM on AMD Bulldozer
|
||||
* [2013-06-16] Optimize dgemv_n kernel on AMD Bulldozer
|
||||
* [2013-06-20] Optimize ddot, daxpy kernel on AMD Bulldozer
|
||||
* [2013-06-21] Optimize dcopy kernel on AMD Bulldozer
|
||||
|
||||
* Kang-Che Sung <Explorer09@gmail.com>
|
||||
* [2013-05-17] Fix typo in the document. Re-order the architecture list in getarch.c.
|
||||
|
||||
* Kenneth Hoste <kenneth.hoste@gmail.com>
|
||||
* [2013-05-22] Adjust Makefile about downloading LAPACK source files.
|
||||
|
||||
* Lei WANG <https://github.com/wlbksy>
|
||||
* [2013-05-22] Fix a bug about wget.
|
||||
|
||||
* Dan Luu <http://www.linkedin.com/in/danluu>
|
||||
* [2013-06-30] Add Intel Haswell support (using sandybridge optimizations).
|
||||
|
||||
* grisuthedragon <https://github.com/grisuthedragon>
|
||||
* [2013-07-11] create openblas_get_parallel to retrieve information which parallelization
|
||||
model is used by OpenBLAS.
|
||||
|
||||
* Sébastien Fabbro <bicatali@gentoo.org>
|
||||
* [2013-07-24] Modify makefile to respect user's LDFLAGS
|
||||
* [2013-07-24] Add stack markings for GNU as arch-independent for assembler files
|
||||
|
||||
* carlkl <https://github.com/carlkl>
|
||||
* [2013-12-13] Fixed LAPACKE building bug on Windows
|
||||
|
||||
* [Your name or handle] <[email or website]>
|
||||
* [Date] [Brief summary of your changes]
|
||||
221
Changelog.txt
221
Changelog.txt
@@ -1,4 +1,225 @@
|
||||
OpenBLAS ChangeLog
|
||||
====================================================================
|
||||
Version 0.2.8
|
||||
01-Aug-2013
|
||||
common:
|
||||
* Support Open64 5.0. (#266)
|
||||
* Add executable stack markings. (#262, Thank Sébastien Fabbro)
|
||||
* Respect user's LDFLAGS (Thank Sébastien Fabbro)
|
||||
|
||||
x86/x86-64:
|
||||
* Rollback bulldozer and piledriver kernels to barcelona kernels (#263)
|
||||
We will fix the compuational error bug in bulldozer and piledriver kernels.
|
||||
|
||||
====================================================================
|
||||
Version 0.2.7
|
||||
20-Jul-2013
|
||||
common:
|
||||
* Support LSB (Linux Standard Base) 4.1.
|
||||
e.g. make CC=lsbcc
|
||||
* Include LAPACK 3.4.2 source codes to the repo.
|
||||
Avoid downloading at compile time.
|
||||
* Add NO_PARALLEL_MAKE flag to disable parallel make.
|
||||
* Create openblas_get_parallel to retrieve information which
|
||||
parallelization model is used by OpenBLAS. (Thank grisuthedragon)
|
||||
* Detect LLVM/Clang compiler. The default compiler is Clang on Mac OS X.
|
||||
* Change LIBSUFFIX from .lib to .a on windows.
|
||||
* A walk round for dtrti_U single thread bug. Replace it with LAPACK codes. (#191)
|
||||
|
||||
x86/x86-64:
|
||||
* Optimize c/zgemm, trsm, dgemv_n, ddot, daxpy, dcopy on
|
||||
AMD Bulldozer. (Thank Werner Saar)
|
||||
* Add Intel Haswell support (using Sandybridge optimizations).
|
||||
(Thank Dan Luu)
|
||||
* Add AMD Piledriver support (using Bulldozer optimizations).
|
||||
* Fix the computational error in zgemm avx kernel on
|
||||
Sandybridge. (#237)
|
||||
* Fix the overflow bug in gemv.
|
||||
* Fix the overflow bug in multi-threaded BLAS3, getrf when NUM_THREADS
|
||||
is very large.(#214, #221, #246).
|
||||
MIPS64:
|
||||
* Support loongcc (Open64 based) compiler for ICT Loongson 3A/B.
|
||||
|
||||
Power:
|
||||
* Support Power7 by old Power6 kernels. (#220)
|
||||
|
||||
====================================================================
|
||||
Version 0.2.6
|
||||
2-Mar-2013
|
||||
common:
|
||||
* Improved OpenMP performance slightly. (d744c9)
|
||||
* Improved cblas.h compatibility with Intel MKL.(#185)
|
||||
* Fixed the overflowing bug in single thread cholesky factorization.
|
||||
* Fixed the overflowing buffer bug of multithreading hbmv and sbmv.(#174)
|
||||
|
||||
x86/x86-64:
|
||||
* Added AMD Bulldozer x86-64 S/DGEMM AVX kernels. (Thank Werner Saar)
|
||||
We will tune the performance in future.
|
||||
* Auto-detect Intel Xeon E7540.
|
||||
* Fixed the overflowing buffer bug of gemv. (#173)
|
||||
* Fixed the bug of s/cdot about invalid reading NAN on x86_64. (#189)
|
||||
|
||||
MIPS64:
|
||||
|
||||
====================================================================
|
||||
Version 0.2.5
|
||||
26-Nov-2012
|
||||
common:
|
||||
* Added NO_SHARED flag to disable generating the shared library.
|
||||
* Compile LAPACKE with ILP64 modle when INTERFACE64=1 (#158)
|
||||
* Export LAPACK 3.4.2 symbols in shared library. (#147)
|
||||
* Only detect the number of physical CPU cores on Mac OSX. (#157)
|
||||
* Fixed NetBSD build. (#155)
|
||||
* Fixed compilation with TARGET=GENERIC. (#160)
|
||||
x86/x86-64:
|
||||
* Restore the original CPU affinity when calling
|
||||
openblas_set_num_threads(1) (#153)
|
||||
* Fixed a SEGFAULT bug in dgemv_t when m is very large.(#154)
|
||||
MIPS64:
|
||||
|
||||
====================================================================
|
||||
Version 0.2.4
|
||||
8-Oct-2012
|
||||
common:
|
||||
* Upgraded LAPACK to 3.4.2 version. (#145)
|
||||
* Provided support for passing CFLAGS, FFLAGS, PFLAGS,
|
||||
FPFLAGS to make. (#137)
|
||||
* f77blas.h:compatibility for compilers without C99 complex
|
||||
number support. (#141)
|
||||
x86/x86-64:
|
||||
* Added NO_AVX flag. Check OS supporting AVX on runtime. (#139)
|
||||
* Fixed zdot incompatibility ABI issue with GCC 4.7 on
|
||||
Windows 32-bit. (#140)
|
||||
MIPS64:
|
||||
* Fixed the generation of shared library bug.
|
||||
* Fixed the detection bug on the Loongson 3A server.
|
||||
====================================================================
|
||||
Version 0.2.3
|
||||
20-Aug-2012
|
||||
common:
|
||||
* Fixed LAPACK unstable bug about ?laswp. (#130)
|
||||
* Fixed the shared library bug about unloading the library on
|
||||
Linux (#132).
|
||||
* Fixed the compilation failure on BlueGene/P (TARGET=PPC440FP2)
|
||||
Please use gcc and IBM xlf. (#134)
|
||||
x86/x86-64:
|
||||
* Supported goto_set_num_threads and openblas_set_num_threads
|
||||
APIs in Windows. They can set the number of threads on runtime.
|
||||
|
||||
====================================================================
|
||||
Version 0.2.2
|
||||
6-July-2012
|
||||
common:
|
||||
* Fixed exporting DLL functions bug on Windows/MingW
|
||||
* Support GNU Hurd (Thank Sylvestre Ledru)
|
||||
* Support kfreebsd kernel (Thank Sylvestre Ledru)
|
||||
x86/x86-64:
|
||||
* Support Intel Sandy Bridge 22nm desktop/mobile CPU
|
||||
SPARC:
|
||||
* Improve the detection of SPARC (Thank Sylvestre Ledru)
|
||||
|
||||
====================================================================
|
||||
Version 0.2.1
|
||||
30-Jun-2012
|
||||
common:
|
||||
x86/x86-64:
|
||||
* Fixed the SEGFAULT bug about hyper-theading
|
||||
* Support AMD Bulldozer by using GotoBLAS2 AMD Barcelona codes
|
||||
|
||||
====================================================================
|
||||
Version 0.2.0
|
||||
26-Jun-2012
|
||||
common:
|
||||
* Removed the limitation (64) of numbers of CPU cores.
|
||||
Now, it supports 256 cores at max.
|
||||
* Supported clang compiler.
|
||||
* Fixed some build bugs on FreeBSD
|
||||
x86/x86-64:
|
||||
* Optimized Level-3 BLAS on Intel Sandy Bridge x86-64 by AVX instructions.
|
||||
Please use gcc >= 4.6 or clang >=3.1.
|
||||
* Support AMD Bobcat by using GotoBLAS2 AMD Barcelona codes.
|
||||
|
||||
====================================================================
|
||||
Version 0.1.1
|
||||
29-Apr-2012
|
||||
common:
|
||||
* Upgraded LAPACK to 3.4.1 version. (Thank Zaheer Chothia)
|
||||
* Supported LAPACKE, a C interface to LAPACKE. (Thank Zaheer Chothia)
|
||||
* Fixed the build bug (MD5 and download) on Mac OSX.
|
||||
* Auto download CUnit 2.1.2-2 from SF.net with UTEST_CHECK=1.
|
||||
* Fxied the compatibility issue for compilers without C99 complex number
|
||||
(e.g. Visual Studio)
|
||||
x86/x86_64:
|
||||
* Auto-detect Intel Sandy Bridge Core i7-3xxx & Xeon E7 Westmere-EX.
|
||||
* Test alpha=Nan in dscale.
|
||||
* Fixed a SEGFAULT bug in samax on x86 windows.
|
||||
|
||||
====================================================================
|
||||
Version 0.1.0
|
||||
23-Mar-2012
|
||||
common:
|
||||
* Set soname of shared library on Linux.
|
||||
* Added LIBNAMESUFFIX flag in Makefile.rule. The user can use
|
||||
this flag to control the library name, e.g. libopenblas.a,
|
||||
libopenblas_ifort.a or libopenblas_omp.a.
|
||||
* Added GEMM_MULTITHREAD_THRESHOLD flag in Makefile.rule.
|
||||
The lib use single thread in GEMM function with small matrices.
|
||||
x86/x86_64:
|
||||
* Used GEMV SSE/SSE2 kernels on x86 32-bit.
|
||||
* Exported CBLAS functions in Windows DLL.
|
||||
MIPS64:
|
||||
* Completed Level-3 BLAS optimization on Loongson 3A CPU.
|
||||
* Improved GEMV performance on Loongson 3A CPU.
|
||||
* Improved Level-3 BLAS performance on Loongson 3B CPU. (EXPERIMENT)
|
||||
|
||||
====================================================================
|
||||
Version 0.1 alpha2.5
|
||||
19-Feb-2012
|
||||
common:
|
||||
* Fixed missing "#include <sched.h>" bug on Mac OS X.
|
||||
Thank Mike Nolta for the patch.
|
||||
* Upgraded LAPACK to 3.4.0 version
|
||||
* Fixed a bug on Mac OS X. Don't require SystemStubs on OS X.
|
||||
SystemStubs does not exist on Lion. Thank Stefan Karpinski.
|
||||
* Improved README with using OpenMP. Check the internal threads
|
||||
count less than or equal to omp_get_max_threads()
|
||||
x86/x86_64:
|
||||
* Auto-detect Intel Core i6/i7 (Sandy Bridge) CPU with Nehalem assembly kernels
|
||||
* Fixed some bugs on MingW 64-bit including zgemv, cdot, zdot.
|
||||
|
||||
====================================================================
|
||||
Version 0.1 alpha2.4
|
||||
18-Sep-2011
|
||||
common:
|
||||
* Fixed a bug about installation. The header file "fblas77.h"
|
||||
works fine now.
|
||||
* Fixed #61 a building bug about setting TARGET and DYNAMIC_ARCH.
|
||||
* Try to handle absolute path of shared library in OSX. (#57)
|
||||
Thank Dr Kane O'Donnell.
|
||||
* Changed the installation folder layout to $(PREFIX)/include and
|
||||
$(PREFIX)/lib
|
||||
|
||||
x86/x86_64:
|
||||
* Fixed #58 zdot/xdot SEGFAULT bug with GCC-4.6 on x86. According
|
||||
to i386 calling convention, The callee should remove the first
|
||||
hidden parameter.Thank Mr. John for this patch.
|
||||
|
||||
====================================================================
|
||||
Version 0.1 alpha2.3
|
||||
5-Sep-2011
|
||||
|
||||
x86/x86_64:
|
||||
* Added DTB_ENTRIES into dynamic arch setting parameters. Now,
|
||||
it can read DTB_ENTRIES on runtime. (Refs issue #55 on github)
|
||||
|
||||
====================================================================
|
||||
Version 0.1 alpha2.2
|
||||
14-Jul-2011
|
||||
|
||||
common:
|
||||
* Fixed a building bug when DYNAMIC_ARCH=1 & INTERFACE64=1.
|
||||
(Refs issue #44 on github)
|
||||
|
||||
====================================================================
|
||||
Version 0.1 alpha2.1
|
||||
28-Jun-2011
|
||||
|
||||
@@ -90,6 +90,15 @@
|
||||
number of threads will consume extra resource. I recommend you to
|
||||
specify minimum number of threads.
|
||||
|
||||
1.9 Q I have segfaults when I compile with USE_OPENMP=1. What's wrong?
|
||||
|
||||
A This may be related to a bug in the Linux kernel 2.6.32. Try applying
|
||||
the patch segaults.patch using
|
||||
|
||||
patch < segfaults.patch
|
||||
|
||||
and see if the crashes persist. Note that this patch will lead to many
|
||||
compiler warnings.
|
||||
|
||||
2. Architecture Specific issue or Implementation
|
||||
|
||||
|
||||
2
LICENSE
2
LICENSE
@@ -1,4 +1,4 @@
|
||||
Copyright (c) 2011, Lab of Parallel Software and Computational Science,ICSAS
|
||||
Copyright (c) 2011,2012 Lab of Parallel Software and Computational Science,ISCAS
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
|
||||
218
Makefile
218
Makefile
@@ -3,7 +3,7 @@ include ./Makefile.system
|
||||
|
||||
BLASDIRS = interface driver/level2 driver/level3 driver/others
|
||||
|
||||
ifndef DYNAMIC_ARCH
|
||||
ifneq ($(DYNAMIC_ARCH), 1)
|
||||
BLASDIRS += kernel
|
||||
endif
|
||||
|
||||
@@ -15,10 +15,6 @@ ifdef SANITY_CHECK
|
||||
BLASDIRS += reference
|
||||
endif
|
||||
|
||||
ifndef PREFIX
|
||||
PREFIX = /opt/OpenBLAS
|
||||
endif
|
||||
|
||||
SUBDIRS = $(BLASDIRS)
|
||||
ifneq ($(NO_LAPACK), 1)
|
||||
SUBDIRS += lapack
|
||||
@@ -26,12 +22,12 @@ endif
|
||||
|
||||
SUBDIRS_ALL = $(SUBDIRS) test ctest utest exports benchmark ../laswp ../bench
|
||||
|
||||
.PHONY : all libs netlib test ctest shared install
|
||||
.PHONY : all libs netlib test ctest shared install
|
||||
.NOTPARALLEL : all libs prof lapack-test install
|
||||
|
||||
all :: libs netlib tests shared
|
||||
@echo
|
||||
@echo " GotoBLAS build complete."
|
||||
@echo " OpenBLAS build complete. ($(LIB_COMPONENTS))"
|
||||
@echo
|
||||
@echo " OS ... $(OSNAME) "
|
||||
@echo " Architecture ... $(ARCH) "
|
||||
@@ -39,9 +35,14 @@ ifndef BINARY64
|
||||
@echo " BINARY ... 32bit "
|
||||
else
|
||||
@echo " BINARY ... 64bit "
|
||||
endif
|
||||
ifdef INTERFACE64
|
||||
@echo " Use 64 bits int (equivalent to \"-i8\" in Fortran) "
|
||||
endif
|
||||
@echo " C compiler ... $(C_COMPILER) (command line : $(CC))"
|
||||
ifndef NOFORTRAN
|
||||
@echo " Fortran compiler ... $(F_COMPILER) (command line : $(FC))"
|
||||
endif
|
||||
ifneq ($(OSNAME), AIX)
|
||||
@echo -n " Library Name ... $(LIBNAME)"
|
||||
else
|
||||
@@ -53,32 +54,54 @@ ifndef SMP
|
||||
else
|
||||
@echo " (Multi threaded; Max num-threads is $(NUM_THREADS))"
|
||||
endif
|
||||
|
||||
ifeq ($(USE_OPENMP), 1)
|
||||
@echo
|
||||
@echo " Use OpenMP in the multithreading. Becasue of ignoring OPENBLAS_NUM_THREADS and GOTO_NUM_THREADS flags, "
|
||||
@echo " you should use OMP_NUM_THREADS environment variable to control the number of threads."
|
||||
@echo
|
||||
endif
|
||||
|
||||
ifeq ($(OSNAME), Darwin)
|
||||
@echo "WARNING: If you plan to use the dynamic library $(LIBDYNNAME), you must run:"
|
||||
@echo
|
||||
@echo "\"make PREFIX=/your_installation_path/ install\"."
|
||||
@echo
|
||||
@echo "(or set PREFIX in Makefile.rule and run make install."
|
||||
@echo "If you want to move the .dylib to a new location later, make sure you change"
|
||||
@echo "the internal name of the dylib with:"
|
||||
@echo
|
||||
@echo "install_name_tool -id /new/absolute/path/to/$(LIBDYNNAME) $(LIBDYNNAME)"
|
||||
endif
|
||||
@echo
|
||||
@echo "To install the library, you can run \"make PREFIX=/path/to/your/installation install\"."
|
||||
@echo
|
||||
|
||||
shared :
|
||||
ifndef NO_SHARED
|
||||
ifeq ($(OSNAME), Linux)
|
||||
$(MAKE) -C exports so
|
||||
-ln -fs $(LIBSONAME) libopenblas.so
|
||||
@$(MAKE) -C exports so
|
||||
@-ln -fs $(LIBSONAME) $(LIBPREFIX).so
|
||||
@-ln -fs $(LIBSONAME) $(LIBPREFIX).so.$(MAJOR_VERSION)
|
||||
endif
|
||||
ifeq ($(OSNAME), FreeBSD)
|
||||
$(MAKE) -C exports so
|
||||
-ln -fs $(LIBSONAME) libopenblas.so
|
||||
@$(MAKE) -C exports so
|
||||
@-ln -fs $(LIBSONAME) $(LIBPREFIX).so
|
||||
endif
|
||||
ifeq ($(OSNAME), NetBSD)
|
||||
$(MAKE) -C exports so
|
||||
-ln -fs $(LIBSONAME) libopenblas.so
|
||||
@$(MAKE) -C exports so
|
||||
@-ln -fs $(LIBSONAME) $(LIBPREFIX).so
|
||||
endif
|
||||
ifeq ($(OSNAME), Darwin)
|
||||
$(MAKE) -C exports dyn
|
||||
-ln -fs $(LIBDYNNAME) libopenblas.dylib
|
||||
@$(MAKE) -C exports dyn
|
||||
@-ln -fs $(LIBDYNNAME) $(LIBPREFIX).dylib
|
||||
endif
|
||||
ifeq ($(OSNAME), WINNT)
|
||||
$(MAKE) -C exports dll
|
||||
-ln -fs $(LIBDLLNAME) libopenblas.dll
|
||||
@$(MAKE) -C exports dll
|
||||
endif
|
||||
ifeq ($(OSNAME), CYGWIN_NT)
|
||||
$(MAKE) -C exports dll
|
||||
-ln -fs $(LIBDLLNAME) libopenblas.dll
|
||||
@$(MAKE) -C exports dll
|
||||
endif
|
||||
endif
|
||||
|
||||
tests :
|
||||
@@ -106,39 +129,49 @@ endif
|
||||
ifeq ($(NOFORTRAN), 1)
|
||||
$(error OpenBLAS: Detecting fortran compiler failed. Please install fortran compiler, e.g. gfortran, ifort, openf90.)
|
||||
endif
|
||||
-ln -fs $(LIBNAME) libopenblas.$(LIBSUFFIX)
|
||||
for d in $(SUBDIRS) ; \
|
||||
@-ln -fs $(LIBNAME) $(LIBPREFIX).$(LIBSUFFIX)
|
||||
@for d in $(SUBDIRS) ; \
|
||||
do if test -d $$d; then \
|
||||
$(MAKE) -C $$d $(@F) || exit 1 ; \
|
||||
fi; \
|
||||
done
|
||||
#Save the config files for installation
|
||||
cp Makefile.conf Makefile.conf_last
|
||||
cp config.h config_last.h
|
||||
ifdef DYNAMIC_ARCH
|
||||
$(MAKE) -C kernel commonlibs || exit 1
|
||||
for d in $(DYNAMIC_CORE) ; \
|
||||
@cp Makefile.conf Makefile.conf_last
|
||||
@cp config.h config_last.h
|
||||
ifdef QUAD_PRECISION
|
||||
@echo "#define QUAD_PRECISION">> config_last.h
|
||||
endif
|
||||
ifeq ($(EXPRECISION), 1)
|
||||
@echo "#define EXPRECISION">> config_last.h
|
||||
endif
|
||||
##
|
||||
ifeq ($(DYNAMIC_ARCH), 1)
|
||||
@$(MAKE) -C kernel commonlibs || exit 1
|
||||
@for d in $(DYNAMIC_CORE) ; \
|
||||
do $(MAKE) GOTOBLAS_MAKEFILE= -C kernel TARGET_CORE=$$d kernel || exit 1 ;\
|
||||
done
|
||||
echo DYNAMIC_ARCH=1 >> Makefile.conf_last
|
||||
@echo DYNAMIC_ARCH=1 >> Makefile.conf_last
|
||||
endif
|
||||
touch lib.grd
|
||||
ifdef USE_THREAD
|
||||
@echo USE_THREAD=$(USE_THREAD) >> Makefile.conf_last
|
||||
endif
|
||||
@touch lib.grd
|
||||
|
||||
prof : prof_blas prof_lapack
|
||||
|
||||
prof_blas :
|
||||
ln -fs $(LIBNAME_P) libopenblas_p.$(LIBSUFFIX)
|
||||
ln -fs $(LIBNAME_P) $(LIBPREFIX)_p.$(LIBSUFFIX)
|
||||
for d in $(SUBDIRS) ; \
|
||||
do if test -d $$d; then \
|
||||
$(MAKE) -C $$d prof || exit 1 ; \
|
||||
fi; \
|
||||
done
|
||||
ifdef DYNAMIC_ARCH
|
||||
ifeq ($(DYNAMIC_ARCH), 1)
|
||||
$(MAKE) -C kernel commonprof || exit 1
|
||||
endif
|
||||
|
||||
blas :
|
||||
ln -fs $(LIBNAME) libopenblas.$(LIBSUFFIX)
|
||||
ln -fs $(LIBNAME) $(LIBPREFIX).$(LIBSUFFIX)
|
||||
for d in $(BLASDIRS) ; \
|
||||
do if test -d $$d; then \
|
||||
$(MAKE) -C $$d libs || exit 1 ; \
|
||||
@@ -146,13 +179,13 @@ blas :
|
||||
done
|
||||
|
||||
hpl :
|
||||
ln -fs $(LIBNAME) libopenblas.$(LIBSUFFIX)
|
||||
ln -fs $(LIBNAME) $(LIBPREFIX).$(LIBSUFFIX)
|
||||
for d in $(BLASDIRS) ../laswp exports ; \
|
||||
do if test -d $$d; then \
|
||||
$(MAKE) -C $$d $(@F) || exit 1 ; \
|
||||
fi; \
|
||||
done
|
||||
ifdef DYNAMIC_ARCH
|
||||
ifeq ($(DYNAMIC_ARCH), 1)
|
||||
$(MAKE) -C kernel commonlibs || exit 1
|
||||
for d in $(DYNAMIC_CORE) ; \
|
||||
do $(MAKE) GOTOBLAS_MAKEFILE= -C kernel TARGET_CORE=$$d kernel || exit 1 ;\
|
||||
@@ -160,7 +193,7 @@ ifdef DYNAMIC_ARCH
|
||||
endif
|
||||
|
||||
hpl_p :
|
||||
ln -fs $(LIBNAME_P) libopenblas_p.$(LIBSUFFIX)
|
||||
ln -fs $(LIBNAME_P) $(LIBPREFIX)_p.$(LIBSUFFIX)
|
||||
for d in $(SUBDIRS) ../laswp exports ; \
|
||||
do if test -d $$d; then \
|
||||
$(MAKE) -C $$d $(@F) || exit 1 ; \
|
||||
@@ -171,74 +204,100 @@ ifeq ($(NO_LAPACK), 1)
|
||||
netlib :
|
||||
|
||||
else
|
||||
netlib : lapack-3.1.1 patch.for_lapack-3.1.1 lapack-3.1.1/make.inc
|
||||
netlib : lapack_prebuild
|
||||
ifndef NOFORTRAN
|
||||
-@$(MAKE) -C lapack-3.1.1 lapacklib
|
||||
@$(MAKE) -C $(NETLIB_LAPACK_DIR) lapacklib
|
||||
endif
|
||||
ifndef NO_LAPACKE
|
||||
@$(MAKE) -C $(NETLIB_LAPACK_DIR) lapackelib
|
||||
endif
|
||||
endif
|
||||
|
||||
prof_lapack : lapack-3.1.1 lapack-3.1.1/make.inc
|
||||
-@$(MAKE) -C lapack-3.1.1 lapack_prof
|
||||
prof_lapack : lapack_prebuild
|
||||
@$(MAKE) -C $(NETLIB_LAPACK_DIR) lapack_prof
|
||||
|
||||
lapack-3.1.1/make.inc :
|
||||
lapack_prebuild :
|
||||
ifndef NOFORTRAN
|
||||
-@echo "FORTRAN = $(FC)" > lapack-3.1.1/make.inc
|
||||
-@echo "OPTS = $(FFLAGS)" >> lapack-3.1.1/make.inc
|
||||
-@echo "POPTS = $(FPFLAGS)" >> lapack-3.1.1/make.inc
|
||||
-@echo "NOOPT = $(FFLAGS) -O0" >> lapack-3.1.1/make.inc
|
||||
-@echo "PNOOPT = $(FPFLAGS) -O0" >> lapack-3.1.1/make.inc
|
||||
-@echo "LOADOPTS = $(FFLAGS) $(EXTRALIB)" >> lapack-3.1.1/make.inc
|
||||
-@echo "ARCH = $(AR)" >> lapack-3.1.1/make.inc
|
||||
-@echo "RANLIB = $(RANLIB)" >> lapack-3.1.1/make.inc
|
||||
-@echo "LAPACKLIB = ../$(LIBNAME)" >> lapack-3.1.1/make.inc
|
||||
-@echo "LAPACKLIB_P = ../$(LIBNAME_P)" >> lapack-3.1.1/make.inc
|
||||
-@echo "SUFFIX = $(SUFFIX)" >> lapack-3.1.1/make.inc
|
||||
-@echo "PSUFFIX = $(PSUFFIX)" >> lapack-3.1.1/make.inc
|
||||
# -@echo "CEXTRALIB = $(CEXTRALIB)" >> lapack-3.1.1/make.inc
|
||||
-@cat make.inc >> lapack-3.1.1/make.inc
|
||||
-@echo "FORTRAN = $(FC)" > $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@echo "OPTS = $(LAPACK_FFLAGS)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@echo "POPTS = $(LAPACK_FPFLAGS)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@echo "NOOPT = $(LAPACK_FFLAGS) -O0" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@echo "PNOOPT = $(LAPACK_FPFLAGS) -O0" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@echo "LOADOPTS = $(FFLAGS) $(EXTRALIB)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@echo "CC = $(CC)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@echo "override CFLAGS = $(LAPACK_CFLAGS)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@echo "ARCH = $(AR)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@echo "ARCHFLAGS = -ru" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@echo "RANLIB = $(RANLIB)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@echo "LAPACKLIB = ../$(LIBNAME)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@echo "LAPACKELIB = ../$(LIBNAME)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@echo "LAPACKLIB_P = ../$(LIBNAME_P)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@echo "SUFFIX = $(SUFFIX)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@echo "PSUFFIX = $(PSUFFIX)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@echo "CEXTRALIB = $(EXTRALIB)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@cat make.inc >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
endif
|
||||
|
||||
lapack-3.1.1 : lapack-3.1.1.tgz
|
||||
lapack-3.4.2 : lapack-3.4.2.tgz
|
||||
ifndef NOFORTRAN
|
||||
@if test `$(MD5SUM) lapack-3.1.1.tgz | $(AWK) '{print $$1}'` = 00b21551a899bcfbaa7b8443e1faeef9; then \
|
||||
ifndef NO_LAPACK
|
||||
@if test `$(MD5SUM) $< | $(AWK) '{print $$1}'` = 61bf1a8a4469d4bdb7604f5897179478; then \
|
||||
echo $(TAR) zxf $< ;\
|
||||
$(TAR) zxf $< && (cd lapack-3.1.1; $(PATCH) -p1 < ../patch.for_lapack-3.1.1) ;\
|
||||
$(TAR) zxf $< && (cd $(NETLIB_LAPACK_DIR); $(PATCH) -p1 < ../patch.for_lapack-3.4.2) ;\
|
||||
rm -f $(NETLIB_LAPACK_DIR)/lapacke/make.inc ;\
|
||||
else \
|
||||
echo " lapack-3.1.1.tgz check sum is wrong (Please use orignal)." ;\
|
||||
rm -rf lapack-3.1.1 ;\
|
||||
rm -rf $(NETLIB_LAPACK_DIR) ;\
|
||||
echo " Cannot download lapack-3.4.2.tgz or the MD5 check sum is wrong (Please use orignal)."; \
|
||||
exit 1; \
|
||||
fi
|
||||
endif
|
||||
endif
|
||||
|
||||
lapack-3.1.1.tgz :
|
||||
LAPACK_URL=http://www.netlib.org/lapack/lapack-3.4.2.tgz
|
||||
|
||||
lapack-3.4.2.tgz :
|
||||
ifndef NOFORTRAN
|
||||
-wget http://www.netlib.org/lapack/lapack-3.1.1.tgz
|
||||
#http://stackoverflow.com/questions/7656425/makefile-ifeq-logical-or
|
||||
ifeq ($(OSNAME), $(filter $(OSNAME),Darwin NetBSD))
|
||||
curl -O $(LAPACK_URL);
|
||||
else
|
||||
ifeq ($(OSNAME), FreeBSD)
|
||||
fetch $(LAPACK_URL);
|
||||
else
|
||||
wget -O $@ $(LAPACK_URL);
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
large.tgz :
|
||||
ifndef NOFORTRAN
|
||||
-wget http://www.netlib.org/lapack/timing/large.tgz
|
||||
if [ ! -a $< ]; then
|
||||
-wget http://www.netlib.org/lapack/timing/large.tgz;
|
||||
fi
|
||||
endif
|
||||
|
||||
timing.tgz :
|
||||
ifndef NOFORTRAN
|
||||
-wget http://www.netlib.org/lapack/timing/timing.tgz
|
||||
if [ ! -a $< ]; then
|
||||
-wget http://www.netlib.org/lapack/timing/timing.tgz;
|
||||
fi
|
||||
endif
|
||||
|
||||
lapack-timing : lapack-3.1.1 large.tgz timing.tgz
|
||||
lapack-timing : large.tgz timing.tgz
|
||||
ifndef NOFORTRAN
|
||||
(cd lapack-3.1.1; $(TAR) zxf ../timing.tgz TIMING)
|
||||
(cd lapack-3.1.1/TIMING; $(TAR) zxf ../../large.tgz )
|
||||
make -C lapack-3.1.1 tmglib
|
||||
make -C lapack-3.1.1/TIMING
|
||||
(cd $(NETLIB_LAPACK_DIR); $(TAR) zxf ../timing.tgz TIMING)
|
||||
(cd $(NETLIB_LAPACK_DIR)/TIMING; $(TAR) zxf ../../large.tgz )
|
||||
make -C $(NETLIB_LAPACK_DIR) tmglib
|
||||
make -C $(NETLIB_LAPACK_DIR)/TIMING
|
||||
endif
|
||||
|
||||
|
||||
lapack-test :
|
||||
$(MAKE) -C lapack-3.1.1 tmglib
|
||||
$(MAKE) -C lapack-3.1.1/TESTING xeigtstc xeigtstd xeigtsts xeigtstz xlintstc xlintstd xlintstds xlintsts xlintstz xlintstzc
|
||||
@rm -f lapack-3.1.1/TESTING/*.out
|
||||
$(MAKE) -j 1 -C lapack-3.1.1/TESTING
|
||||
$(GREP) failed lapack-3.1.1/TESTING/*.out
|
||||
$(MAKE) -C $(NETLIB_LAPACK_DIR) tmglib
|
||||
$(MAKE) -C $(NETLIB_LAPACK_DIR)/TESTING xeigtstc xeigtstd xeigtsts xeigtstz xlintstc xlintstd xlintstds xlintsts xlintstz xlintstzc
|
||||
@rm -f $(NETLIB_LAPACK_DIR)/TESTING/*.out
|
||||
$(MAKE) -j 1 -C $(NETLIB_LAPACK_DIR)/TESTING
|
||||
$(GREP) failed $(NETLIB_LAPACK_DIR)/TESTING/*.out
|
||||
|
||||
dummy :
|
||||
|
||||
@@ -254,11 +313,14 @@ clean ::
|
||||
#ifdef DYNAMIC_ARCH
|
||||
@$(MAKE) -C kernel clean
|
||||
#endif
|
||||
@rm -f *.$(LIBSUFFIX) *.so *~ *.exe getarch getarch_2nd *.dll *.lib *.$(SUFFIX) *.dwf libopenblas.$(LIBSUFFIX) libopenblas_p.$(LIBSUFFIX) *.lnk myconfig.h
|
||||
@$(MAKE) -C reference clean
|
||||
@rm -f *.$(LIBSUFFIX) *.so *~ *.exe getarch getarch_2nd *.dll *.lib *.$(SUFFIX) *.dwf $(LIBPREFIX).$(LIBSUFFIX) $(LIBPREFIX)_p.$(LIBSUFFIX) $(LIBPREFIX).so.$(MAJOR_VERSION) *.lnk myconfig.h
|
||||
ifeq ($(OSNAME), Darwin)
|
||||
@rm -rf getarch.dSYM getarch_2nd.dSYM
|
||||
endif
|
||||
@rm -f Makefile.conf config.h Makefile_kernel.conf config_kernel.h st* *.dylib
|
||||
@if test -d lapack-3.1.1; then \
|
||||
echo deleting lapack-3.1.1; \
|
||||
rm -rf lapack-3.1.1 ;\
|
||||
fi
|
||||
@touch $(NETLIB_LAPACK_DIR)/make.inc
|
||||
@$(MAKE) -C $(NETLIB_LAPACK_DIR) clean
|
||||
@rm -f $(NETLIB_LAPACK_DIR)/make.inc $(NETLIB_LAPACK_DIR)/lapacke/include/lapacke_mangling.h
|
||||
@rm -f *.grd Makefile.conf_last config_last.h
|
||||
@echo Done.
|
||||
@echo Done.
|
||||
|
||||
12
Makefile.arm
Normal file
12
Makefile.arm
Normal file
@@ -0,0 +1,12 @@
|
||||
|
||||
ifeq ($(CORE), ARMV7)
|
||||
CCOMMON_OPT += -marm -mfpu=vfpv3 -mfloat-abi=hard -march=armv7-a
|
||||
FCOMMON_OPT += -marm -mfpu=vfpv3 -mfloat-abi=hard -march=armv7-a
|
||||
endif
|
||||
|
||||
ifeq ($(CORE), ARMV6)
|
||||
CCOMMON_OPT += -marm -mfpu=vfp -mfloat-abi=hard -march=armv6
|
||||
FCOMMON_OPT += -marm -mfpu=vfp -mfloat-abi=hard -march=armv6
|
||||
endif
|
||||
|
||||
|
||||
7
Makefile.arm64
Normal file
7
Makefile.arm64
Normal file
@@ -0,0 +1,7 @@
|
||||
|
||||
ifeq ($(CORE), ARMV8)
|
||||
CCOMMON_OPT += -march=armv8-a
|
||||
FCOMMON_OPT += -march=armv8-a
|
||||
endif
|
||||
|
||||
|
||||
@@ -1,6 +1 @@
|
||||
COPT = -Wall -O2 # -DGEMMTEST
|
||||
ifdef BINARY64
|
||||
else
|
||||
# LDFLAGS = -m elf32ppc
|
||||
LDFLAGS = -m elf_i386
|
||||
endif
|
||||
|
||||
@@ -3,6 +3,12 @@ export GOTOBLAS_MAKEFILE = 1
|
||||
-include $(TOPDIR)/Makefile.conf_last
|
||||
include ./Makefile.system
|
||||
|
||||
PREFIX ?= /opt/OpenBLAS
|
||||
|
||||
OPENBLAS_INCLUDE_DIR := $(PREFIX)/include
|
||||
OPENBLAS_LIBRARY_DIR := $(PREFIX)/lib
|
||||
OPENBLAS_BUILD_DIR := $(CURDIR)
|
||||
|
||||
.PHONY : install
|
||||
.NOTPARALLEL : install
|
||||
|
||||
@@ -10,55 +16,73 @@ lib.grd :
|
||||
$(error OpenBLAS: Please run "make" firstly)
|
||||
|
||||
install : lib.grd
|
||||
@-mkdir -p $(PREFIX)
|
||||
@echo Generating openblas_config.h in $(PREFIX)
|
||||
@-mkdir -p $(DESTDIR)$(PREFIX)
|
||||
@-mkdir -p $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)
|
||||
@-mkdir -p $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)
|
||||
@echo Generating openblas_config.h in $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)
|
||||
#for inc
|
||||
@echo \#ifndef OPENBLAS_CONFIG_H > $(PREFIX)/openblas_config.h
|
||||
@echo \#define OPENBLAS_CONFIG_H >> $(PREFIX)/openblas_config.h
|
||||
@cat config_last.h >> $(PREFIX)/openblas_config.h
|
||||
@echo \#define VERSION \" OpenBLAS $(VERSION) \" >> $(PREFIX)/openblas_config.h
|
||||
@cat openblas_config_template.h >> $(PREFIX)/openblas_config.h
|
||||
@echo \#endif >> $(PREFIX)/openblas_config.h
|
||||
@echo \#ifndef OPENBLAS_CONFIG_H > $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/openblas_config.h
|
||||
@echo \#define OPENBLAS_CONFIG_H >> $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/openblas_config.h
|
||||
@awk '{print $$1, "OPENBLAS_"$$2, $$3}' config_last.h >> $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/openblas_config.h
|
||||
@echo \#define OPENBLAS_VERSION \" OpenBLAS $(VERSION) \" >> $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/openblas_config.h
|
||||
@cat openblas_config_template.h >> $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/openblas_config.h
|
||||
@echo \#endif \/\* OPENBLAS_CONFIG_H \*\/ >> $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/openblas_config.h
|
||||
|
||||
@echo Generating f77blas.h in $(PREFIX)
|
||||
@echo \#ifndef OPENBLAS_F77BLAS_H > $(PREFIX)/f77blas.h
|
||||
@echo \#define OPENBLAS_F77BLAS_H >> $(PREFIX)/f77blas.h
|
||||
@echo \#include \"openblas_config.h\" >> $(PREFIX)/f77blas.h
|
||||
@cat common_interface.h >> $(PREFIX)/f77blas.h
|
||||
@echo \#endif >> $(PREFIX)/f77blas.h
|
||||
@echo Generating f77blas.h in $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)
|
||||
@echo \#ifndef OPENBLAS_F77BLAS_H > $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/f77blas.h
|
||||
@echo \#define OPENBLAS_F77BLAS_H >> $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/f77blas.h
|
||||
@echo \#include \"openblas_config.h\" >> $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/f77blas.h
|
||||
@cat common_interface.h >> $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/f77blas.h
|
||||
@echo \#endif >> $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/f77blas.h
|
||||
|
||||
@echo Generating cblas.h in $(PREFIX)
|
||||
@sed 's/common/openblas_config/g' cblas.h > $(PREFIX)/cblas.h
|
||||
ifndef NO_CBLAS
|
||||
@echo Generating cblas.h in $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)
|
||||
@sed 's/common/openblas_config/g' cblas.h > $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/cblas.h
|
||||
endif
|
||||
|
||||
ifndef NO_LAPACKE
|
||||
@echo Copying LAPACKE header files to $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)
|
||||
@-install -pDm644 $(NETLIB_LAPACK_DIR)/lapacke/include/lapacke.h $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke.h
|
||||
@-install -pDm644 $(NETLIB_LAPACK_DIR)/lapacke/include/lapacke_config.h $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke_config.h
|
||||
@-install -pDm644 $(NETLIB_LAPACK_DIR)/lapacke/include/lapacke_mangling_with_flags.h $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke_mangling.h
|
||||
@-install -pDm644 $(NETLIB_LAPACK_DIR)/lapacke/include/lapacke_utils.h $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke_utils.h
|
||||
endif
|
||||
|
||||
#for install static library
|
||||
@echo Copy the static library to $(PREFIX)
|
||||
@cp $(LIBNAME) $(PREFIX)
|
||||
@-ln -fs $(PREFIX)/$(LIBNAME) $(PREFIX)/libopenblas.$(LIBSUFFIX)
|
||||
@echo Copying the static library to $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)
|
||||
@install -pm644 $(LIBNAME) $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)
|
||||
@cd $(DESTDIR)$(OPENBLAS_LIBRARY_DIR) ; \
|
||||
ln -fs $(LIBNAME) $(LIBPREFIX).$(LIBSUFFIX)
|
||||
#for install shared library
|
||||
@echo Copy the shared library to $(PREFIX)
|
||||
ifndef NO_SHARED
|
||||
@echo Copying the shared library to $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)
|
||||
ifeq ($(OSNAME), Linux)
|
||||
-cp $(LIBSONAME) $(PREFIX)
|
||||
-ln -fs $(PREFIX)/$(LIBSONAME) $(PREFIX)/libopenblas.so
|
||||
@install -pm755 $(LIBSONAME) $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)
|
||||
@cd $(DESTDIR)$(OPENBLAS_LIBRARY_DIR) ; \
|
||||
ln -fs $(LIBSONAME) $(LIBPREFIX).so ; \
|
||||
ln -fs $(LIBSONAME) $(LIBPREFIX).so.$(MAJOR_VERSION)
|
||||
endif
|
||||
ifeq ($(OSNAME), FreeBSD)
|
||||
-cp $(LIBSONAME) $(PREFIX)
|
||||
-ln -fs $(PREFIX)/$(LIBSONAME) $(PREFIX)/libopenblas.so
|
||||
@cp $(LIBSONAME) $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)
|
||||
@cd $(DESTDIR)$(OPENBLAS_LIBRARY_DIR) ; \
|
||||
ln -fs $(LIBSONAME) $(LIBPREFIX).so
|
||||
endif
|
||||
ifeq ($(OSNAME), NetBSD)
|
||||
-cp $(LIBSONAME) $(PREFIX)
|
||||
-ln -fs $(PREFIX)/$(LIBSONAME) $(PREFIX)/libopenblas.so
|
||||
@cp $(LIBSONAME) $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)
|
||||
@cd $(DESTDIR)$(OPENBLAS_LIBRARY_DIR) ; \
|
||||
ln -fs $(LIBSONAME) $(LIBPREFIX).so
|
||||
endif
|
||||
ifeq ($(OSNAME), Darwin)
|
||||
-cp $(LIBDYNNAME) $(PREFIX)
|
||||
-ln -fs $(PREFIX)/$(LIBDYNNAME) $(PREFIX)/libopenblas.dylib
|
||||
ifeq ($(OSNAME), Darwin)
|
||||
@-cp $(LIBDYNNAME) $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)
|
||||
@-install_name_tool -id $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)/$(LIBDYNNAME) $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)/$(LIBDYNNAME)
|
||||
@-ln -fs $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)/$(LIBDYNNAME) $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)/$(LIBPREFIX).dylib
|
||||
endif
|
||||
ifeq ($(OSNAME), WINNT)
|
||||
-cp $(LIBDLLNAME) $(PREFIX)
|
||||
-ln -fs $(PREFIX)/$(LIBDLLNAME) $(PREFIX)/libopenblas.dll
|
||||
@-cp $(LIBDLLNAME) $(OPENBLAS_LIBRARY_DIR)
|
||||
endif
|
||||
ifeq ($(OSNAME), CYGWIN_NT)
|
||||
-cp $(LIBDLLNAME) $(PREFIX)
|
||||
-ln -fs $(PREFIX)/$(LIBDLLNAME) $(PREFIX)/libopenblas.dll
|
||||
@-cp $(LIBDLLNAME) $(OPENBLAS_LIBRARY_DIR)
|
||||
endif
|
||||
endif
|
||||
|
||||
@echo Install OK!
|
||||
|
||||
@@ -17,13 +17,7 @@ endif
|
||||
endif
|
||||
|
||||
ifdef BINARY64
|
||||
ifeq ($(OSNAME), Linux)
|
||||
LDFLAGS = -m elf64ppc
|
||||
endif
|
||||
|
||||
ifeq ($(OSNAME), Darwin)
|
||||
LDFLAGS = -arch ppc64
|
||||
endif
|
||||
|
||||
ifeq ($(OSNAME), AIX)
|
||||
CCOMMON_OPT += -mpowerpc64 -maix64
|
||||
@@ -34,17 +28,12 @@ ifeq ($(COMPILER_F77), xlf)
|
||||
FCOMMON_OPT += -q64
|
||||
endif
|
||||
ARFLAGS = -X 64
|
||||
LDFLAGS = -b64
|
||||
ASFLAGS = -a64
|
||||
endif
|
||||
else
|
||||
ifeq ($(OSNAME), Linux)
|
||||
LDFLAGS = -m elf32ppc
|
||||
endif
|
||||
ifeq ($(OSNAME), AIX)
|
||||
CCOMMON_OPT += -Wa,-a32
|
||||
ARFLAGS = -X 32
|
||||
LDFLAGS = -b32
|
||||
ASFLAGS = -a32
|
||||
endif
|
||||
endif
|
||||
|
||||
@@ -1,3 +1,5 @@
|
||||
# This is triggered by Makefile.system and runs before any of the code is built.
|
||||
|
||||
export BINARY
|
||||
export USE_OPENMP
|
||||
|
||||
@@ -21,7 +23,17 @@ all: getarch_2nd
|
||||
|
||||
config.h : c_check f_check getarch
|
||||
perl ./c_check $(TARGET_MAKE) $(TARGET_CONF) $(CC)
|
||||
ifneq ($(ONLY_CBLAS), 1)
|
||||
perl ./f_check $(TARGET_MAKE) $(TARGET_CONF) $(FC)
|
||||
else
|
||||
#When we only build CBLAS, we set NOFORTRAN=2
|
||||
echo "NOFORTRAN=2" >> $(TARGET_MAKE)
|
||||
echo "NO_FBLAS=1" >> $(TARGET_MAKE)
|
||||
echo "F_COMPILER=GFORTRAN" >> $(TARGET_MAKE)
|
||||
echo "BU=_" >> $(TARGET_MAKE)
|
||||
echo "#define BUNDERSCORE _" >> $(TARGET_CONF)
|
||||
echo "#define NEEDBUNDERSCORE 1" >> $(TARGET_CONF)
|
||||
endif
|
||||
./getarch 0 >> $(TARGET_MAKE)
|
||||
./getarch 1 >> $(TARGET_CONF)
|
||||
|
||||
@@ -3,7 +3,12 @@
|
||||
#
|
||||
|
||||
# This library's version
|
||||
VERSION = 0.1alpha2.1
|
||||
VERSION = 0.2.8
|
||||
|
||||
# If you set the suffix, the library name will be libopenblas_$(LIBNAMESUFFIX).a
|
||||
# and libopenblas_$(LIBNAMESUFFIX).so. Meanwhile, the soname in shared library
|
||||
# is libopenblas_$(LIBNAMESUFFIX).so.0.
|
||||
# LIBNAMESUFFIX = omp
|
||||
|
||||
# You can specify the target architecture, otherwise it's
|
||||
# automatically detected.
|
||||
@@ -19,10 +24,13 @@ VERSION = 0.1alpha2.1
|
||||
# Fortran compiler. Default is g77.
|
||||
# FC = gfortran
|
||||
|
||||
# Even you can specify cross compiler
|
||||
# Even you can specify cross compiler. Meanwhile, please set HOSTCC.
|
||||
# CC = x86_64-w64-mingw32-gcc
|
||||
# FC = x86_64-w64-mingw32-gfortran
|
||||
|
||||
# If you use the cross compiler, please set this host compiler.
|
||||
# HOSTCC = gcc
|
||||
|
||||
# If you need 32bit binary, define BINARY=32, otherwise define BINARY=64
|
||||
# BINARY=64
|
||||
|
||||
@@ -40,12 +48,23 @@ VERSION = 0.1alpha2.1
|
||||
# automatically detected by the the script.
|
||||
# NUM_THREADS = 24
|
||||
|
||||
# if you don't need generate the shared library, please comment it in.
|
||||
# NO_SHARED = 1
|
||||
|
||||
# If you don't need CBLAS interface, please comment it in.
|
||||
# NO_CBLAS = 1
|
||||
|
||||
# If you don't need LAPACK, please comment it in.
|
||||
# If you only want CBLAS interface without installing Fortran compiler,
|
||||
# please comment it in.
|
||||
# ONLY_CBLAS = 1
|
||||
|
||||
# If you don't need LAPACK, please comment it in.
|
||||
# If you set NO_LAPACK=1, the library automatically sets NO_LAPACKE=1.
|
||||
# NO_LAPACK = 1
|
||||
|
||||
# If you don't need LAPACKE (C Interface to LAPACK), please comment it in.
|
||||
# NO_LAPACKE = 1
|
||||
|
||||
# If you want to use legacy threaded Level 3 implementation.
|
||||
# USE_SIMPLE_THREADED_LEVEL3 = 1
|
||||
|
||||
@@ -62,6 +81,13 @@ VERSION = 0.1alpha2.1
|
||||
# If you want to disable CPU/Memory affinity on Linux.
|
||||
# NO_AFFINITY = 1
|
||||
|
||||
# Don't use AVX kernel on Sandy Bridge. It is compatible with old compilers
|
||||
# and OS. However, the performance is low.
|
||||
# NO_AVX = 1
|
||||
|
||||
# Don't use parallel make.
|
||||
# NO_PARALLEL_MAKE = 1
|
||||
|
||||
# If you would like to know minute performance report of GotoBLAS.
|
||||
# FUNCTION_PROFILE = 1
|
||||
|
||||
@@ -83,6 +109,11 @@ VERSION = 0.1alpha2.1
|
||||
# If you need to synchronize FP CSR between threads (for x86/x86_64 only).
|
||||
# CONSISTENT_FPCSR = 1
|
||||
|
||||
# If any gemm arguement m, n or k is less or equal this threshold, gemm will be execute
|
||||
# with single thread. You can use this flag to avoid the overhead of multi-threading
|
||||
# in small matrix sizes. The default value is 4.
|
||||
# GEMM_MULTITHREAD_THRESHOLD = 4
|
||||
|
||||
# If you need santy check by comparing reference BLAS. It'll be very
|
||||
# slow (Not implemented yet).
|
||||
# SANITY_CHECK = 1
|
||||
@@ -94,19 +125,16 @@ VERSION = 0.1alpha2.1
|
||||
# The installation directory.
|
||||
# PREFIX = /opt/OpenBLAS
|
||||
|
||||
# Common Optimization Flag; -O2 is enough.
|
||||
# DEBUG = 1
|
||||
|
||||
ifeq ($(DEBUG), 1)
|
||||
COMMON_OPT += -g
|
||||
# -DDEBUG
|
||||
else
|
||||
COMMON_OPT += -O2
|
||||
endif
|
||||
# Common Optimization Flag;
|
||||
# The default -O2 is enough.
|
||||
# COMMON_OPT = -O2
|
||||
|
||||
# Profiling flags
|
||||
COMMON_PROF = -pg
|
||||
|
||||
# Build Debug version
|
||||
# DEBUG = 1
|
||||
|
||||
#
|
||||
# End of user configuration
|
||||
#
|
||||
|
||||
@@ -10,7 +10,6 @@ endif
|
||||
ifeq ($(COMPILER_F77), f90)
|
||||
FCOMMON_OPT += -xarch=v9
|
||||
endif
|
||||
LDFLAGS = -64
|
||||
else
|
||||
|
||||
CCOMMON_OPT += -mcpu=v9
|
||||
|
||||
318
Makefile.system
318
Makefile.system
@@ -9,8 +9,24 @@ ifndef TOPDIR
|
||||
TOPDIR = .
|
||||
endif
|
||||
|
||||
NETLIB_LAPACK_DIR = $(TOPDIR)/lapack-netlib
|
||||
|
||||
# Default C compiler
|
||||
# - Only set if not specified on the command line or inherited from the environment.
|
||||
# - CC is an implicit variable so neither '?=' or 'ifndef' can be used.
|
||||
# http://stackoverflow.com/questions/4029274/mingw-and-make-variables
|
||||
# - Default value is 'cc' which is not always a valid command (e.g. MinGW).
|
||||
ifeq ($(origin CC),default)
|
||||
CC = gcc
|
||||
# Change the default compile to clang on Mac OSX.
|
||||
# http://stackoverflow.com/questions/714100/os-detecting-makefile
|
||||
UNAME_S := $(shell uname -s)
|
||||
ifeq ($(UNAME_S),Darwin)
|
||||
CC = clang
|
||||
endif
|
||||
endif
|
||||
|
||||
# Default Fortran compiler (FC) is selected by f_check.
|
||||
|
||||
ifndef MAKEFILE_RULE
|
||||
include $(TOPDIR)/Makefile.rule
|
||||
@@ -27,19 +43,58 @@ HOSTCC = $(CC)
|
||||
endif
|
||||
|
||||
ifdef TARGET
|
||||
GETARCH_FLAGS += -DFORCE_$(TARGET)
|
||||
GETARCH_FLAGS := -DFORCE_$(TARGET)
|
||||
endif
|
||||
|
||||
#TARGET_CORE will override TARGET which is used in DYNAMIC_ARCH=1.
|
||||
#
|
||||
ifdef TARGET_CORE
|
||||
GETARCH_FLAGS := -DFORCE_$(TARGET_CORE)
|
||||
endif
|
||||
|
||||
ifdef INTERFACE64
|
||||
GETARCH_FLAGS += -DUSE64BITINT
|
||||
endif
|
||||
|
||||
ifndef GEMM_MULTITHREAD_THRESHOLD
|
||||
GEMM_MULTITHREAD_THRESHOLD=4
|
||||
endif
|
||||
GETARCH_FLAGS += -DGEMM_MULTITHREAD_THRESHOLD=$(GEMM_MULTITHREAD_THRESHOLD)
|
||||
|
||||
ifeq ($(NO_AVX), 1)
|
||||
GETARCH_FLAGS += -DNO_AVX
|
||||
endif
|
||||
|
||||
ifeq ($(DEBUG), 1)
|
||||
GETARCH_FLAGS += -g
|
||||
endif
|
||||
|
||||
ifeq ($(QUIET_MAKE), 1)
|
||||
MAKE += -s
|
||||
endif
|
||||
|
||||
ifndef NO_PARALLEL_MAKE
|
||||
NO_PARALLEL_MAKE=0
|
||||
endif
|
||||
GETARCH_FLAGS += -DNO_PARALLEL_MAKE=$(NO_PARALLEL_MAKE)
|
||||
|
||||
ifeq ($(HOSTCC), loongcc)
|
||||
GETARCH_FLAGS += -static
|
||||
endif
|
||||
|
||||
#if don't use Fortran, it will only compile CBLAS.
|
||||
ifeq ($(ONLY_CBLAS), 1)
|
||||
NO_LAPACK = 1
|
||||
else
|
||||
ONLY_CBLAS = 0
|
||||
endif
|
||||
|
||||
# This operation is expensive, so execution should be once.
|
||||
ifndef GOTOBLAS_MAKEFILE
|
||||
export GOTOBLAS_MAKEFILE = 1
|
||||
|
||||
# Generating Makefile.conf and config.h
|
||||
DUMMY := $(shell $(MAKE) -C $(TOPDIR) -f Makefile.getarch CC="$(CC)" FC="$(FC)" HOSTCC="$(HOSTCC)" CFLAGS=$(GETARCH_FLAGS) BINARY=$(BINARY) USE_OPENMP=$(USE_OPENMP) TARGET_CORE=$(TARGET_CORE) all)
|
||||
DUMMY := $(shell $(MAKE) -C $(TOPDIR) -f Makefile.prebuild CC="$(CC)" FC="$(FC)" HOSTCC="$(HOSTCC)" CFLAGS="$(GETARCH_FLAGS)" BINARY=$(BINARY) USE_OPENMP=$(USE_OPENMP) TARGET_CORE=$(TARGET_CORE) ONLY_CBLAS=$(ONLY_CBLAS) all)
|
||||
|
||||
ifndef TARGET_CORE
|
||||
include $(TOPDIR)/Makefile.conf
|
||||
@@ -89,8 +144,16 @@ DLLWRAP = $(CROSS_SUFFIX)dllwrap
|
||||
#
|
||||
|
||||
ifeq ($(OSNAME), Darwin)
|
||||
EXTRALIB += -lSystemStubs
|
||||
export MACOSX_DEPLOYMENT_TARGET=10.2
|
||||
MD5SUM = md5 -r
|
||||
endif
|
||||
|
||||
ifeq ($(OSNAME), FreeBSD)
|
||||
MD5SUM = md5 -r
|
||||
endif
|
||||
|
||||
ifeq ($(OSNAME), NetBSD)
|
||||
MD5SUM = md5 -n
|
||||
endif
|
||||
|
||||
ifeq ($(OSNAME), Linux)
|
||||
@@ -109,7 +172,39 @@ EXTRALIB += -defaultlib:advapi32
|
||||
|
||||
SUFFIX = obj
|
||||
PSUFFIX = pobj
|
||||
LIBSUFFIX = lib
|
||||
LIBSUFFIX = a
|
||||
|
||||
ifeq ($(C_COMPILER), CLANG)
|
||||
CCOMMON_OPT += -DMS_ABI
|
||||
endif
|
||||
|
||||
ifeq ($(C_COMPILER), GCC)
|
||||
#Test for supporting MS_ABI
|
||||
GCCVERSIONGTEQ4 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 4)
|
||||
GCCVERSIONGT4 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \> 4)
|
||||
GCCMINORVERSIONGTEQ7 := $(shell expr `$(CC) -dumpversion | cut -f2 -d.` \>= 7)
|
||||
ifeq ($(GCCVERSIONGT4), 1)
|
||||
# GCC Majar version > 4
|
||||
# It is compatible with MSVC ABI.
|
||||
CCOMMON_OPT += -DMS_ABI
|
||||
endif
|
||||
|
||||
ifeq ($(GCCVERSIONGTEQ4), 1)
|
||||
ifeq ($(GCCMINORVERSIONGTEQ7), 1)
|
||||
# GCC Version >=4.7
|
||||
# It is compatible with MSVC ABI.
|
||||
CCOMMON_OPT += -DMS_ABI
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
# Ensure the correct stack alignment on Win32
|
||||
# http://permalink.gmane.org/gmane.comp.lib.openblas.general/97
|
||||
ifeq ($(ARCH), x86)
|
||||
CCOMMON_OPT += -mincoming-stack-boundary=2
|
||||
FCOMMON_OPT += -mincoming-stack-boundary=2
|
||||
endif
|
||||
|
||||
endif
|
||||
|
||||
ifeq ($(OSNAME), Interix)
|
||||
@@ -134,6 +229,11 @@ endif
|
||||
endif
|
||||
endif
|
||||
|
||||
# ifeq logical or
|
||||
ifeq ($(OSNAME), $(filter $(OSNAME),WINNT CYGWIN_NT Interix))
|
||||
OS_WINDOWS=1
|
||||
endif
|
||||
|
||||
ifdef QUAD_PRECISION
|
||||
CCOMMON_OPT += -DQUAD_PRECISION
|
||||
NO_EXPRECISION = 1
|
||||
@@ -164,11 +264,17 @@ NO_BINARY_MODE = 1
|
||||
endif
|
||||
ifndef NO_EXPRECISION
|
||||
ifeq ($(F_COMPILER), GFORTRAN)
|
||||
ifeq ($(C_COMPILER), GCC)
|
||||
# ifeq logical or. GCC or LSB
|
||||
ifeq ($(C_COMPILER), $(filter $(C_COMPILER),GCC LSB))
|
||||
EXPRECISION = 1
|
||||
CCOMMON_OPT += -DEXPRECISION -m128bit-long-double
|
||||
FCOMMON_OPT += -m128bit-long-double
|
||||
endif
|
||||
ifeq ($(C_COMPILER), CLANG)
|
||||
EXPRECISION = 1
|
||||
CCOMMON_OPT += -DEXPRECISION
|
||||
FCOMMON_OPT += -m128bit-long-double
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
@@ -176,11 +282,17 @@ endif
|
||||
ifeq ($(ARCH), x86_64)
|
||||
ifndef NO_EXPRECISION
|
||||
ifeq ($(F_COMPILER), GFORTRAN)
|
||||
ifeq ($(C_COMPILER), GCC)
|
||||
# ifeq logical or. GCC or LSB
|
||||
ifeq ($(C_COMPILER), $(filter $(C_COMPILER),GCC LSB))
|
||||
EXPRECISION = 1
|
||||
CCOMMON_OPT += -DEXPRECISION -m128bit-long-double
|
||||
FCOMMON_OPT += -m128bit-long-double
|
||||
endif
|
||||
ifeq ($(C_COMPILER), CLANG)
|
||||
EXPRECISION = 1
|
||||
CCOMMON_OPT += -DEXPRECISION
|
||||
FCOMMON_OPT += -m128bit-long-double
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
@@ -190,7 +302,13 @@ CCOMMON_OPT += -wd981
|
||||
endif
|
||||
|
||||
ifeq ($(USE_OPENMP), 1)
|
||||
ifeq ($(C_COMPILER), GCC)
|
||||
# ifeq logical or. GCC or LSB
|
||||
ifeq ($(C_COMPILER), $(filter $(C_COMPILER),GCC LSB))
|
||||
CCOMMON_OPT += -fopenmp
|
||||
endif
|
||||
|
||||
ifeq ($(C_COMPILER), CLANG)
|
||||
$(error OpenBLAS: Clang didn't support OpenMP yet.)
|
||||
CCOMMON_OPT += -fopenmp
|
||||
endif
|
||||
|
||||
@@ -213,14 +331,20 @@ endif
|
||||
endif
|
||||
|
||||
|
||||
ifdef DYNAMIC_ARCH
|
||||
ifeq ($(DYNAMIC_ARCH), 1)
|
||||
ifeq ($(ARCH), x86)
|
||||
DYNAMIC_CORE = KATMAI COPPERMINE NORTHWOOD PRESCOTT BANIAS \
|
||||
CORE2 PENRYN DUNNINGTON NEHALEM ATHLON OPTERON OPTERON_SSE3 BARCELONA ATOM NANO
|
||||
CORE2 PENRYN DUNNINGTON NEHALEM ATHLON OPTERON OPTERON_SSE3 BARCELONA BOBCAT ATOM NANO
|
||||
ifneq ($(NO_AVX), 1)
|
||||
DYNAMIC_CORE += SANDYBRIDGE BULLDOZER PILEDRIVER HASWELL
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(ARCH), x86_64)
|
||||
DYNAMIC_CORE = PRESCOTT CORE2 PENRYN DUNNINGTON NEHALEM OPTERON OPTERON_SSE3 BARCELONA ATOM NANO
|
||||
DYNAMIC_CORE = PRESCOTT CORE2 PENRYN DUNNINGTON NEHALEM OPTERON OPTERON_SSE3 BARCELONA BOBCAT ATOM NANO
|
||||
ifneq ($(NO_AVX), 1)
|
||||
DYNAMIC_CORE += SANDYBRIDGE BULLDOZER PILEDRIVER HASWELL
|
||||
endif
|
||||
endif
|
||||
|
||||
ifndef DYNAMIC_CORE
|
||||
@@ -249,15 +373,35 @@ NO_BINARY_MODE = 1
|
||||
BINARY_DEFINED = 1
|
||||
endif
|
||||
|
||||
ifeq ($(ARCH), arm)
|
||||
NO_BINARY_MODE = 1
|
||||
BINARY_DEFINED = 1
|
||||
endif
|
||||
|
||||
ifeq ($(ARCH), arm64)
|
||||
NO_BINARY_MODE = 1
|
||||
BINARY_DEFINED = 1
|
||||
endif
|
||||
|
||||
|
||||
|
||||
|
||||
#
|
||||
# C Compiler dependent settings
|
||||
#
|
||||
|
||||
ifeq ($(C_COMPILER), GCC)
|
||||
|
||||
# ifeq logical or. GCC or CLANG or LSB
|
||||
# http://stackoverflow.com/questions/7656425/makefile-ifeq-logical-or
|
||||
ifeq ($(C_COMPILER), $(filter $(C_COMPILER),GCC CLANG LSB))
|
||||
CCOMMON_OPT += -Wall
|
||||
COMMON_PROF += -fno-inline
|
||||
NO_UNINITIALIZED_WARN = -Wno-uninitialized
|
||||
|
||||
ifeq ($(QUIET_MAKE), 1)
|
||||
CCOMMON_OPT += $(NO_UNINITIALIZED_WARN) -Wno-unused
|
||||
endif
|
||||
|
||||
ifdef NO_BINARY_MODE
|
||||
|
||||
ifeq ($(ARCH), mips64)
|
||||
@@ -269,7 +413,12 @@ endif
|
||||
BINARY_DEFINED = 1
|
||||
endif
|
||||
|
||||
ifeq ($(CORE), LOONGSON3A)
|
||||
ifeq ($(CORE), LOONGSON3A)
|
||||
CCOMMON_OPT += -march=mips64
|
||||
FCOMMON_OPT += -march=mips64
|
||||
endif
|
||||
|
||||
ifeq ($(CORE), LOONGSON3B)
|
||||
CCOMMON_OPT += -march=mips64
|
||||
FCOMMON_OPT += -march=mips64
|
||||
endif
|
||||
@@ -336,7 +485,11 @@ endif
|
||||
|
||||
ifeq ($(F_COMPILER), GFORTRAN)
|
||||
CCOMMON_OPT += -DF_INTERFACE_GFORT
|
||||
FCOMMON_OPT += -Wall
|
||||
FCOMMON_OPT += -Wall
|
||||
#Don't include -lgfortran, when NO_LAPACK=1 or lsbcc
|
||||
ifneq ($(NO_LAPACK), 1)
|
||||
EXTRALIB += -lgfortran
|
||||
endif
|
||||
ifdef NO_BINARY_MODE
|
||||
ifeq ($(ARCH), mips64)
|
||||
ifdef BINARY64
|
||||
@@ -443,11 +596,28 @@ ifdef INTERFACE64
|
||||
FCOMMON_OPT += -i8
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(ARCH), mips64)
|
||||
ifndef BINARY64
|
||||
FCOMMON_OPT += -n32
|
||||
else
|
||||
FCOMMON_OPT += -n64
|
||||
endif
|
||||
ifeq ($(CORE), LOONGSON3A)
|
||||
FCOMMON_OPT += -loongson3 -static
|
||||
endif
|
||||
|
||||
ifeq ($(CORE), LOONGSON3B)
|
||||
FCOMMON_OPT += -loongson3 -static
|
||||
endif
|
||||
|
||||
else
|
||||
ifndef BINARY64
|
||||
FCOMMON_OPT += -m32
|
||||
else
|
||||
FCOMMON_OPT += -m64
|
||||
endif
|
||||
endif
|
||||
|
||||
ifdef USE_OPENMP
|
||||
FEXTRALIB += -lstdc++
|
||||
@@ -456,12 +626,30 @@ endif
|
||||
endif
|
||||
|
||||
ifeq ($(C_COMPILER), OPEN64)
|
||||
|
||||
ifeq ($(ARCH), mips64)
|
||||
ifndef BINARY64
|
||||
CCOMMON_OPT += -n32
|
||||
else
|
||||
CCOMMON_OPT += -n64
|
||||
endif
|
||||
ifeq ($(CORE), LOONGSON3A)
|
||||
CCOMMON_OPT += -loongson3 -static
|
||||
endif
|
||||
|
||||
ifeq ($(CORE), LOONGSON3B)
|
||||
CCOMMON_OPT += -loongson3 -static
|
||||
endif
|
||||
|
||||
else
|
||||
|
||||
ifndef BINARY64
|
||||
CCOMMON_OPT += -m32
|
||||
else
|
||||
CCOMMON_OPT += -m64
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(C_COMPILER), SUN)
|
||||
CCOMMON_OPT += -w
|
||||
@@ -517,14 +705,26 @@ endif
|
||||
|
||||
ifeq ($(NO_LAPACK), 1)
|
||||
CCOMMON_OPT += -DNO_LAPACK
|
||||
#Disable LAPACK C interface
|
||||
NO_LAPACKE = 1
|
||||
endif
|
||||
|
||||
ifeq ($(NO_LAPACKE), 1)
|
||||
CCOMMON_OPT += -DNO_LAPACKE
|
||||
endif
|
||||
|
||||
ifeq ($(NO_AVX), 1)
|
||||
CCOMMON_OPT += -DNO_AVX
|
||||
endif
|
||||
|
||||
ifdef SMP
|
||||
CCOMMON_OPT += -DSMP_SERVER
|
||||
|
||||
ifeq ($(ARCH), mips64)
|
||||
ifneq ($(CORE), LOONGSON3B)
|
||||
USE_SIMPLE_THREADED_LEVEL3 = 1
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(USE_OPENMP), 1)
|
||||
# USE_SIMPLE_THREADED_LEVEL3 = 1
|
||||
@@ -563,7 +763,11 @@ ifdef USE_SIMPLE_THREADED_LEVEL3
|
||||
CCOMMON_OPT += -DUSE_SIMPLE_THREADED_LEVEL3
|
||||
endif
|
||||
|
||||
ifndef LIBNAMESUFFIX
|
||||
LIBPREFIX = libopenblas
|
||||
else
|
||||
LIBPREFIX = libopenblas_$(LIBNAMESUFFIX)
|
||||
endif
|
||||
|
||||
KERNELDIR = $(TOPDIR)/kernel/$(ARCH)
|
||||
|
||||
@@ -585,9 +789,11 @@ endif
|
||||
|
||||
ifneq ($(ARCH), x86_64)
|
||||
ifneq ($(ARCH), x86)
|
||||
ifneq ($(CORE), LOONGSON3B)
|
||||
NO_AFFINITY = 1
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
ifdef NO_AFFINITY
|
||||
CCOMMON_OPT += -DNO_AFFINITY
|
||||
@@ -627,16 +833,59 @@ PATCH = patch
|
||||
GREP = grep
|
||||
endif
|
||||
|
||||
ifndef MD5SUM
|
||||
MD5SUM = md5sum
|
||||
endif
|
||||
|
||||
AWK = awk
|
||||
|
||||
REVISION = -r$(VERSION)
|
||||
MAJOR_VERSION = $(word 1,$(subst ., ,$(VERSION)))
|
||||
|
||||
CFLAGS = $(COMMON_OPT) $(CCOMMON_OPT) -I$(TOPDIR)
|
||||
PFLAGS = $(COMMON_OPT) $(CCOMMON_OPT) -I$(TOPDIR) -DPROFILE $(COMMON_PROF)
|
||||
ifeq ($(DEBUG), 1)
|
||||
COMMON_OPT += -g
|
||||
endif
|
||||
|
||||
FFLAGS = $(COMMON_OPT) $(FCOMMON_OPT)
|
||||
FPFLAGS = $(COMMON_OPT) $(FCOMMON_OPT) $(COMMON_PROF)
|
||||
ifndef COMMON_OPT
|
||||
ifeq ($(ARCH), arm)
|
||||
COMMON_OPT = -O3
|
||||
endif
|
||||
endif
|
||||
|
||||
ifndef COMMON_OPT
|
||||
ifeq ($(ARCH), arm64)
|
||||
COMMON_OPT = -O3
|
||||
endif
|
||||
endif
|
||||
|
||||
|
||||
ifndef COMMON_OPT
|
||||
COMMON_OPT = -O2
|
||||
endif
|
||||
|
||||
|
||||
override CFLAGS += $(COMMON_OPT) $(CCOMMON_OPT) -I$(TOPDIR)
|
||||
override PFLAGS += $(COMMON_OPT) $(CCOMMON_OPT) -I$(TOPDIR) -DPROFILE $(COMMON_PROF)
|
||||
|
||||
override FFLAGS += $(COMMON_OPT) $(FCOMMON_OPT)
|
||||
override FPFLAGS += $(COMMON_OPT) $(FCOMMON_OPT) $(COMMON_PROF)
|
||||
#MAKEOVERRIDES =
|
||||
|
||||
#For LAPACK Fortran codes.
|
||||
LAPACK_FFLAGS := $(filter-out -fopenmp -mp -openmp -xopenmp=parallel,$(FFLAGS))
|
||||
LAPACK_FPFLAGS := $(filter-out -fopenmp -mp -openmp -xopenmp=parallel,$(FPFLAGS))
|
||||
|
||||
LAPACK_CFLAGS = $(CFLAGS)
|
||||
LAPACK_CFLAGS += -DHAVE_LAPACK_CONFIG_H
|
||||
ifdef INTERFACE64
|
||||
LAPACK_CFLAGS += -DLAPACK_ILP64
|
||||
endif
|
||||
ifdef OS_WINDOWS
|
||||
LAPACK_CFLAGS += -DOPENBLAS_OS_WINDOWS
|
||||
endif
|
||||
ifeq ($(C_COMPILER), LSB)
|
||||
LAPACK_CFLAGS += -DLAPACK_COMPLEX_STRUCTURE
|
||||
endif
|
||||
|
||||
ifndef SUFFIX
|
||||
SUFFIX = o
|
||||
@@ -650,7 +899,7 @@ ifndef LIBSUFFIX
|
||||
LIBSUFFIX = a
|
||||
endif
|
||||
|
||||
ifndef DYNAMIC_ARCH
|
||||
ifneq ($(DYNAMIC_ARCH), 1)
|
||||
ifndef SMP
|
||||
LIBNAME = $(LIBPREFIX)_$(LIBCORE)$(REVISION).$(LIBSUFFIX)
|
||||
LIBNAME_P = $(LIBPREFIX)_$(LIBCORE)$(REVISION)_p.$(LIBSUFFIX)
|
||||
@@ -669,8 +918,8 @@ endif
|
||||
endif
|
||||
|
||||
|
||||
LIBDLLNAME = $(LIBPREFIX).dll
|
||||
LIBSONAME = $(LIBNAME:.$(LIBSUFFIX)=.so)
|
||||
LIBDLLNAME = $(LIBNAME:.$(LIBSUFFIX)=.dll)
|
||||
LIBDYNNAME = $(LIBNAME:.$(LIBSUFFIX)=.dylib)
|
||||
LIBDEFNAME = $(LIBNAME:.$(LIBSUFFIX)=.def)
|
||||
LIBEXPNAME = $(LIBNAME:.$(LIBSUFFIX)=.exp)
|
||||
@@ -679,6 +928,23 @@ LIBZIPNAME = $(LIBNAME:.$(LIBSUFFIX)=.zip)
|
||||
LIBS = $(TOPDIR)/$(LIBNAME)
|
||||
LIBS_P = $(TOPDIR)/$(LIBNAME_P)
|
||||
|
||||
|
||||
LIB_COMPONENTS = BLAS
|
||||
ifneq ($(NO_CBLAS), 1)
|
||||
LIB_COMPONENTS += CBLAS
|
||||
endif
|
||||
|
||||
ifneq ($(NO_LAPACK), 1)
|
||||
LIB_COMPONENTS += LAPACK
|
||||
ifneq ($(NO_LAPACKE), 1)
|
||||
LIB_COMPONENTS += LAPACKE
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(ONLY_CBLAS), 1)
|
||||
LIB_COMPONENTS = CBLAS
|
||||
endif
|
||||
|
||||
export OSNAME
|
||||
export ARCH
|
||||
export CORE
|
||||
@@ -689,6 +955,7 @@ export CC
|
||||
export FC
|
||||
export BU
|
||||
export FU
|
||||
export NEED2UNDERSCORES
|
||||
export USE_THREAD
|
||||
export NUM_THREADS
|
||||
export NUM_CORES
|
||||
@@ -704,6 +971,7 @@ export USE_OPENMP
|
||||
export CROSS
|
||||
export CROSS_SUFFIX
|
||||
export NOFORTRAN
|
||||
export NO_FBLAS
|
||||
export EXTRALIB
|
||||
export CEXTRALIB
|
||||
export FEXTRALIB
|
||||
@@ -715,6 +983,11 @@ export HAVE_SSE4_1
|
||||
export HAVE_SSE4_2
|
||||
export HAVE_SSE4A
|
||||
export HAVE_SSE5
|
||||
export HAVE_AVX
|
||||
export HAVE_VFP
|
||||
export HAVE_VFPV3
|
||||
export HAVE_VFPV4
|
||||
export HAVE_NEON
|
||||
export KERNELDIR
|
||||
export FUNCTION_PROFILE
|
||||
export TARGET_CORE
|
||||
@@ -731,6 +1004,13 @@ export ZGEMM_UNROLL_M
|
||||
export ZGEMM_UNROLL_N
|
||||
export XGEMM_UNROLL_M
|
||||
export XGEMM_UNROLL_N
|
||||
export CGEMM3M_UNROLL_M
|
||||
export CGEMM3M_UNROLL_N
|
||||
export ZGEMM3M_UNROLL_M
|
||||
export ZGEMM3M_UNROLL_N
|
||||
export XGEMM3M_UNROLL_M
|
||||
export XGEMM3M_UNROLL_N
|
||||
|
||||
|
||||
ifdef USE_CUDA
|
||||
export CUDADIR
|
||||
|
||||
@@ -22,19 +22,19 @@ BLASOBJS += $(QBLASOBJS) $(XBLASOBJS)
|
||||
BLASOBJS_P += $(QBLASOBJS_P) $(XBLASOBJS_P)
|
||||
endif
|
||||
|
||||
$(SBLASOBJS) $(SBLASOBJS_P) : CFLAGS += -UDOUBLE -UCOMPLEX
|
||||
$(DBLASOBJS) $(DBLASOBJS_P) : CFLAGS += -DDOUBLE -UCOMPLEX
|
||||
$(QBLASOBJS) $(QBLASOBJS_P) : CFLAGS += -DXDOUBLE -UCOMPLEX
|
||||
$(CBLASOBJS) $(CBLASOBJS_P) : CFLAGS += -UDOUBLE -DCOMPLEX
|
||||
$(ZBLASOBJS) $(ZBLASOBJS_P) : CFLAGS += -DDOUBLE -DCOMPLEX
|
||||
$(XBLASOBJS) $(XBLASOBJS_P) : CFLAGS += -DXDOUBLE -DCOMPLEX
|
||||
$(SBLASOBJS) $(SBLASOBJS_P) : override CFLAGS += -UDOUBLE -UCOMPLEX
|
||||
$(DBLASOBJS) $(DBLASOBJS_P) : override CFLAGS += -DDOUBLE -UCOMPLEX
|
||||
$(QBLASOBJS) $(QBLASOBJS_P) : override CFLAGS += -DXDOUBLE -UCOMPLEX
|
||||
$(CBLASOBJS) $(CBLASOBJS_P) : override CFLAGS += -UDOUBLE -DCOMPLEX
|
||||
$(ZBLASOBJS) $(ZBLASOBJS_P) : override CFLAGS += -DDOUBLE -DCOMPLEX
|
||||
$(XBLASOBJS) $(XBLASOBJS_P) : override CFLAGS += -DXDOUBLE -DCOMPLEX
|
||||
|
||||
$(SBLASOBJS_P) : CFLAGS += -DPROFILE $(COMMON_PROF)
|
||||
$(DBLASOBJS_P) : CFLAGS += -DPROFILE $(COMMON_PROF)
|
||||
$(QBLASOBJS_P) : CFLAGS += -DPROFILE $(COMMON_PROF)
|
||||
$(CBLASOBJS_P) : CFLAGS += -DPROFILE $(COMMON_PROF)
|
||||
$(ZBLASOBJS_P) : CFLAGS += -DPROFILE $(COMMON_PROF)
|
||||
$(XBLASOBJS_P) : CFLAGS += -DPROFILE $(COMMON_PROF)
|
||||
$(SBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF)
|
||||
$(DBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF)
|
||||
$(QBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF)
|
||||
$(CBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF)
|
||||
$(ZBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF)
|
||||
$(XBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF)
|
||||
|
||||
libs :: $(BLASOBJS) $(COMMONOBJS)
|
||||
$(AR) $(ARFLAGS) -ru $(TOPDIR)/$(LIBNAME) $^
|
||||
@@ -606,7 +606,8 @@ clean ::
|
||||
@if test -d $(ARCH); then \
|
||||
(cd $(ARCH) && $(MAKE) clean) \
|
||||
fi
|
||||
@rm -rf *.a *.s *.o *.po *.obj *.i *.so core core.* gmon.out *.cso \
|
||||
@find . -name '*.o' | xargs rm -rf
|
||||
@rm -rf *.a *.s *.po *.obj *.i *.so core core.* gmon.out *.cso \
|
||||
*.csx *.is *~ *.exe *.flame *.pdb *.dwf \
|
||||
gen_insn_flash.c gen_insn_flash *.stackdump *.dll *.exp *.lib \
|
||||
*.pc *.pcl *.def *.i *.prof linktest.c \
|
||||
|
||||
@@ -1,8 +1,5 @@
|
||||
# COMPILER_PREFIX = mingw32-
|
||||
|
||||
ifeq ($(OSNAME), Linux)
|
||||
LDFLAGS = -melf_i386
|
||||
endif
|
||||
|
||||
ifeq ($(OSNAME), Interix)
|
||||
ARFLAGS = -m x86
|
||||
|
||||
@@ -2,25 +2,12 @@
|
||||
|
||||
ifeq ($(OSNAME), SunOS)
|
||||
ifdef BINARY64
|
||||
LDFLAGS = -64
|
||||
ifeq ($(F_COMPILER), SUN)
|
||||
FCOMMON_OPT += -m64
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(OSNAME), FreeBSD)
|
||||
LDFLAGS = -m elf_x86_64_fbsd
|
||||
endif
|
||||
|
||||
ifeq ($(OSNAME), Linux)
|
||||
LDFLAGS = -m elf_x86_64
|
||||
endif
|
||||
|
||||
ifeq ($(OSNAME), Darwin)
|
||||
LDFLAGS =
|
||||
endif
|
||||
|
||||
ifeq ($(OSNAME), Interix)
|
||||
ARFLAGS = -m x64
|
||||
endif
|
||||
|
||||
83
README
83
README
@@ -1,83 +0,0 @@
|
||||
OpenBLAS Readme
|
||||
|
||||
1.Introduction
|
||||
OpenBLAS is an optimized BLAS library based on GotoBLAS2 1.13 BSD version. OpenBLAS is an open source project supported by Lab of Parallel Software and Computational Science, ISCAS.(http://www.rdcps.ac.cn)
|
||||
|
||||
2.Intallation
|
||||
Download from project homepage. http://xianyi.github.com/OpenBLAS/
|
||||
Or,
|
||||
check out codes from git://github.com/xianyi/OpenBLAS.git
|
||||
1)Normal compile
|
||||
(a) type "make" to detect the CPU automatically.
|
||||
or
|
||||
(b) type "make TARGET=xxx" to set target CPU, e.g. "make TARGET=NEHALEM". The full target list is in file TargetList.txt.
|
||||
|
||||
2)Cross compile
|
||||
Please set CC and FC with the cross toolchains. Then, set HOSTCC with your host C compiler. At last, set TARGET explicitly.
|
||||
|
||||
examples:
|
||||
On X86 box, compile this library for loongson3a CPU.
|
||||
make BINARY=64 CC=mips64el-unknown-linux-gnu-gcc FC=mips64el-unknown-linux-gnu-gfortran HOSTCC=gcc TARGET=LOONGSON3A
|
||||
|
||||
3)Debug version
|
||||
make DEBUG=1
|
||||
|
||||
4)Intall to the directory (Optional)
|
||||
e.g.
|
||||
make install PREFIX=your_installation_directory
|
||||
The default directory is /opt/OpenBLAS
|
||||
|
||||
3.Support CPU & OS
|
||||
Please read GotoBLAS_01Readme.txt
|
||||
|
||||
Additional support CPU:
|
||||
x86_64:
|
||||
Intel Xeon 56xx (Westmere) //Used GotoBLAS2 Nehalem codes.
|
||||
MIPS64:
|
||||
ICT Loongson 3A //The initial version used GotoBLAS2 MIPS64 kernels. Thus, the performance is not good.
|
||||
|
||||
4.Usages
|
||||
Link with libopenblas.a or -lopenblas for shared library.
|
||||
|
||||
4.1 Set the number of threads with environment variables. for example,
|
||||
export OPENBLAS_NUM_THREADS=4
|
||||
or
|
||||
export GOTO_NUM_THREADS=4
|
||||
or
|
||||
export OMP_NUM_THREADS=4
|
||||
|
||||
The priorities are OPENBLAS_NUM_THREADS > GOTO_NUM_THREADS > OMP_NUM_THREADS.
|
||||
|
||||
If you compile this lib with USE_OPENMP=1, you should only set OMP_NUM_THREADS environment variable.
|
||||
|
||||
4.2 Set the number of threads with calling functions. for example,
|
||||
void goto_set_num_threads(int num_threads);
|
||||
or
|
||||
void openblas_set_num_threads(int num_threads);
|
||||
|
||||
If you compile this lib with USE_OPENMP=1, you should use the above functions, too.
|
||||
|
||||
5.Report Bugs
|
||||
Please add a issue in https://github.com/xianyi/OpenBLAS/issues
|
||||
|
||||
6.To-Do List:
|
||||
Optimization on ICT Loongson 3A CPU
|
||||
|
||||
7.Contact
|
||||
OpenBLAS users mailing list: http://list.rdcps.ac.cn/mailman/listinfo/openblas
|
||||
|
||||
8.ChangeLog
|
||||
Please see Changelog.txt to obtain the differences between GotoBLAS2 1.13 BSD version.
|
||||
|
||||
9.Known Issues
|
||||
* The number of CPUs/Cores should less than or equal to 8*sizeof(unsigned long). On 64 bits, the limit
|
||||
is 64. On 32 bits, it is 32.
|
||||
* This library is not compatible with EKOPath Compiler Suite 4.0.10 (http://www.pathscale.com/ekopath-compiler-suite). However, Path64 (https://github.com/path64/compiler) could compile the codes successfully.
|
||||
|
||||
10. Specification of Git Branches
|
||||
We used the git branching model in this article (http://nvie.com/posts/a-successful-git-branching-model/).
|
||||
Now, there are 4 branches in github.com.
|
||||
* The master branch. This a main branch to reflect a production-ready state.
|
||||
* The develop branch. This a main branch to reflect a state with the latest delivered development changes for the next release.
|
||||
* The loongson3a branch. This is a feature branch. We develop Loongson3A codes on this branch. We will merge this feature to develop branch in future.
|
||||
* The gh-pages branch. This is for web pages
|
||||
130
README.md
Normal file
130
README.md
Normal file
@@ -0,0 +1,130 @@
|
||||
# OpenBLAS
|
||||
|
||||
[](https://travis-ci.org/xianyi/OpenBLAS)
|
||||
|
||||
## Introduction
|
||||
OpenBLAS is an optimized BLAS library based on GotoBLAS2 1.13 BSD version.
|
||||
|
||||
Please read the documents on OpenBLAS wiki pages <http://github.com/xianyi/OpenBLAS/wiki>.
|
||||
|
||||
## Binary Packages
|
||||
We provide binary packages for the following platform.
|
||||
|
||||
* Windows x86/x86_64
|
||||
|
||||
You can download them from [file hosting on sourceforge.net](https://sourceforge.net/projects/openblas/files/).
|
||||
|
||||
## Installation from Source
|
||||
Download from project homepage. http://xianyi.github.com/OpenBLAS/
|
||||
|
||||
Or, check out codes from git://github.com/xianyi/OpenBLAS.git
|
||||
### Normal compile
|
||||
* type "make" to detect the CPU automatically.
|
||||
or
|
||||
* type "make TARGET=xxx" to set target CPU, e.g. "make TARGET=NEHALEM". The full target list is in file TargetList.txt.
|
||||
|
||||
### Cross compile
|
||||
Please set CC and FC with the cross toolchains. Then, set HOSTCC with your host C compiler. At last, set TARGET explicitly.
|
||||
|
||||
Examples:
|
||||
|
||||
On X86 box, compile this library for loongson3a CPU.
|
||||
|
||||
make BINARY=64 CC=mips64el-unknown-linux-gnu-gcc FC=mips64el-unknown-linux-gnu-gfortran HOSTCC=gcc TARGET=LOONGSON3A
|
||||
|
||||
On X86 box, compile this library for loongson3a CPU with loongcc (based on Open64) compiler.
|
||||
|
||||
make CC=loongcc FC=loongf95 HOSTCC=gcc TARGET=LOONGSON3A CROSS=1 CROSS_SUFFIX=mips64el-st-linux-gnu- NO_LAPACKE=1 NO_SHARED=1 BINARY=32
|
||||
|
||||
### Debug version
|
||||
|
||||
make DEBUG=1
|
||||
|
||||
### Install to the directory (optional)
|
||||
|
||||
Example:
|
||||
|
||||
make install PREFIX=your_installation_directory
|
||||
|
||||
The default directory is /opt/OpenBLAS
|
||||
|
||||
## Support CPU & OS
|
||||
Please read GotoBLAS_01Readme.txt
|
||||
|
||||
### Additional support CPU:
|
||||
|
||||
#### x86/x86-64:
|
||||
- **Intel Xeon 56xx (Westmere)**: Used GotoBLAS2 Nehalem codes.
|
||||
- **Intel Sandy Bridge**: Optimized Level-3 BLAS with AVX on x86-64.
|
||||
- **Intel Haswell**: Optimized Level-3 BLAS with AVX on x86-64 (identical to Sandy Bridge).
|
||||
- **AMD Bobcat**: Used GotoBLAS2 Barcelona codes.
|
||||
- **AMD Bulldozer**: x86-64 S/DGEMM AVX kernels. (Thank Werner Saar)
|
||||
- **AMD PILEDRIVER**: Used Bulldozer codes.
|
||||
|
||||
#### MIPS64:
|
||||
- **ICT Loongson 3A**: Optimized Level-3 BLAS and the part of Level-1,2.
|
||||
- **ICT Loongson 3B**: Experimental
|
||||
|
||||
### Support OS:
|
||||
- **GNU/Linux**
|
||||
- **MingWin/Windows**: Please read <https://github.com/xianyi/OpenBLAS/wiki/How-to-use-OpenBLAS-in-Microsoft-Visual-Studio>.
|
||||
- **Darwin/Mac OS X**: Experimental. Although GotoBLAS2 supports Darwin, we are the beginner on Mac OS X.
|
||||
- **FreeBSD**: Supported by community. We didn't test the library on this OS.
|
||||
|
||||
## Usages
|
||||
Link with libopenblas.a or -lopenblas for shared library.
|
||||
|
||||
### Set the number of threads with environment variables.
|
||||
|
||||
Examples:
|
||||
|
||||
export OPENBLAS_NUM_THREADS=4
|
||||
|
||||
or
|
||||
|
||||
export GOTO_NUM_THREADS=4
|
||||
|
||||
or
|
||||
|
||||
export OMP_NUM_THREADS=4
|
||||
|
||||
The priorities are OPENBLAS_NUM_THREADS > GOTO_NUM_THREADS > OMP_NUM_THREADS.
|
||||
|
||||
If you compile this lib with USE_OPENMP=1, you should set OMP_NUM_THREADS environment variable. OpenBLAS ignores OPENBLAS_NUM_THREADS and GOTO_NUM_THREADS with USE_OPENMP=1.
|
||||
|
||||
### Set the number of threads on runtime.
|
||||
|
||||
We provided the below functions to control the number of threads on runtime.
|
||||
|
||||
void goto_set_num_threads(int num_threads);
|
||||
|
||||
void openblas_set_num_threads(int num_threads);
|
||||
|
||||
If you compile this lib with USE_OPENMP=1, you should use the above functions, too.
|
||||
|
||||
## Report Bugs
|
||||
Please add a issue in https://github.com/xianyi/OpenBLAS/issues
|
||||
|
||||
## Contact
|
||||
* OpenBLAS users mailing list: https://groups.google.com/forum/#!forum/openblas-users
|
||||
* OpenBLAS developers mailing list: https://groups.google.com/forum/#!forum/openblas-dev
|
||||
|
||||
## ChangeLog
|
||||
Please see Changelog.txt to obtain the differences between GotoBLAS2 1.13 BSD version.
|
||||
|
||||
## Troubleshooting
|
||||
* Please read [Faq](https://github.com/xianyi/OpenBLAS/wiki/Faq) at first.
|
||||
* Please use gcc version 4.6 and above to compile Sandy Bridge AVX kernels on Linux/MingW/BSD.
|
||||
* Please use Clang version 3.1 and above to compile the library on Sandy Bridge microarchitecture. The Clang 3.0 will generate the wrong AVX binary code.
|
||||
* The number of CPUs/Cores should less than or equal to 256.
|
||||
* On Linux, OpenBLAS sets the processor affinity by default. This may cause [the conflict with R parallel](https://stat.ethz.ch/pipermail/r-sig-hpc/2012-April/001348.html). You can build the library with NO_AFFINITY=1.
|
||||
* On Loongson 3A. make test would be failed because of pthread_create error. The error code is EAGAIN. However, it will be OK when you run the same testcase on shell.
|
||||
|
||||
## Contributing
|
||||
1. [Check for open issues](https://github.com/xianyi/OpenBLAS/issues) or open a fresh issue to start a discussion around a feature idea or a bug.
|
||||
1. Fork the [OpenBLAS](https://github.com/xianyi/OpenBLAS) repository to start making your changes.
|
||||
1. Write a test which shows that the bug was fixed or that the feature works as expected.
|
||||
1. Send a pull request. Make sure to add yourself to `CONTRIBUTORS.md`.
|
||||
|
||||
## Donation
|
||||
Please read [this wiki page](https://github.com/xianyi/OpenBLAS/wiki/Donation).
|
||||
@@ -8,8 +8,8 @@ Supported List:
|
||||
1.X86/X86_64
|
||||
a)Intel CPU:
|
||||
P2
|
||||
COPPERMINE
|
||||
KATMAI
|
||||
COPPERMINE
|
||||
NORTHWOOD
|
||||
PRESCOTT
|
||||
BANIAS
|
||||
@@ -18,6 +18,7 @@ CORE2
|
||||
PENRYN
|
||||
DUNNINGTON
|
||||
NEHALEM
|
||||
SANDYBRIDGE
|
||||
ATOM
|
||||
|
||||
b)AMD CPU:
|
||||
@@ -27,6 +28,8 @@ OPTERON_SSE3
|
||||
BARCELONA
|
||||
SHANGHAI
|
||||
ISTANBUL
|
||||
BOBCAT
|
||||
BULLDOZER
|
||||
|
||||
c)VIA CPU:
|
||||
SSE_GENERIC
|
||||
@@ -47,6 +50,7 @@ CELL
|
||||
3.MIPS64 CPU:
|
||||
SICORTEX
|
||||
LOONGSON3A
|
||||
LOONGSON3B
|
||||
|
||||
4.IA64 CPU:
|
||||
ITANIUM2
|
||||
|
||||
34
c_check
34
c_check
@@ -33,6 +33,8 @@ if ($ARGV[0] =~ /(.*)(-[.\d]+)/) {
|
||||
}
|
||||
|
||||
$compiler = "";
|
||||
$compiler = LSB if ($data =~ /COMPILER_LSB/);
|
||||
$compiler = CLANG if ($data =~ /COMPILER_CLANG/);
|
||||
$compiler = PGI if ($data =~ /COMPILER_PGI/);
|
||||
$compiler = PATHSCALE if ($data =~ /COMPILER_PATHSCALE/);
|
||||
$compiler = INTEL if ($data =~ /COMPILER_INTEL/);
|
||||
@@ -43,14 +45,14 @@ $compiler = DEC if ($data =~ /COMPILER_DEC/);
|
||||
$compiler = GCC if ($compiler eq "");
|
||||
|
||||
$os = Linux if ($data =~ /OS_LINUX/);
|
||||
$os = FreeBSD if ($data =~ /OS_FreeBSD/);
|
||||
$os = NetBSD if ($data =~ /OS_NetBSD/);
|
||||
$os = Darwin if ($data =~ /OS_Darwin/);
|
||||
$os = SunOS if ($data =~ /OS_SunOS/);
|
||||
$os = FreeBSD if ($data =~ /OS_FREEBSD/);
|
||||
$os = NetBSD if ($data =~ /OS_NETBSD/);
|
||||
$os = Darwin if ($data =~ /OS_DARWIN/);
|
||||
$os = SunOS if ($data =~ /OS_SUNOS/);
|
||||
$os = AIX if ($data =~ /OS_AIX/);
|
||||
$os = osf if ($data =~ /OS_OSF/);
|
||||
$os = WINNT if ($data =~ /OS_WINNT/);
|
||||
$os = CYGWIN_NT if ($data =~ /OS_CYGWIN/);
|
||||
$os = CYGWIN_NT if ($data =~ /OS_CYGWIN_NT/);
|
||||
$os = Interix if ($data =~ /OS_INTERIX/);
|
||||
|
||||
$architecture = x86 if ($data =~ /ARCH_X86/);
|
||||
@@ -61,6 +63,8 @@ $architecture = mips64 if ($data =~ /ARCH_MIPS64/);
|
||||
$architecture = alpha if ($data =~ /ARCH_ALPHA/);
|
||||
$architecture = sparc if ($data =~ /ARCH_SPARC/);
|
||||
$architecture = ia64 if ($data =~ /ARCH_IA64/);
|
||||
$architecture = arm if ($data =~ /ARCH_ARM/);
|
||||
$architecture = arm64 if ($data =~ /ARCH_ARM64/);
|
||||
|
||||
$defined = 0;
|
||||
|
||||
@@ -117,7 +121,11 @@ if ($compiler eq "OPEN64") {
|
||||
$openmp = "-mp";
|
||||
}
|
||||
|
||||
if ($compiler eq "GCC") {
|
||||
if ($compiler eq "CLANG") {
|
||||
$openmp = "-fopenmp";
|
||||
}
|
||||
|
||||
if ($compiler eq "GCC" || $compiler eq "LSB") {
|
||||
$openmp = "-fopenmp";
|
||||
}
|
||||
|
||||
@@ -143,6 +151,8 @@ $architecture = mips64 if ($data =~ /ARCH_MIPS64/);
|
||||
$architecture = alpha if ($data =~ /ARCH_ALPHA/);
|
||||
$architecture = sparc if ($data =~ /ARCH_SPARC/);
|
||||
$architecture = ia64 if ($data =~ /ARCH_IA64/);
|
||||
$architecture = arm if ($data =~ /ARCH_ARM/);
|
||||
$architecture = arm64 if ($data =~ /ARCH_ARM64/);
|
||||
|
||||
$binformat = bin32;
|
||||
$binformat = bin64 if ($data =~ /BINARY_64/);
|
||||
@@ -174,6 +184,8 @@ $linker_a = "";
|
||||
$link =~ s/\-Y\sP\,/\-Y/g;
|
||||
|
||||
@flags = split(/[\s\,\n]/, $link);
|
||||
# remove leading and trailing quotes from each flag.
|
||||
@flags = map {s/^['"]|['"]$//g; $_} @flags;
|
||||
|
||||
foreach $flags (@flags) {
|
||||
if (
|
||||
@@ -239,13 +251,13 @@ print CONFFILE "#define FUNDERSCORE\t$need_fu\n" if $need_fu ne "";
|
||||
|
||||
if ($os eq "LINUX") {
|
||||
|
||||
@pthread = split(/\s+/, `nm /lib/libpthread.so* | grep _pthread_create`);
|
||||
# @pthread = split(/\s+/, `nm /lib/libpthread.so* | grep _pthread_create`);
|
||||
|
||||
if ($pthread[2] ne "") {
|
||||
print CONFFILE "#define PTHREAD_CREATE_FUNC $pthread[2]\n";
|
||||
} else {
|
||||
# if ($pthread[2] ne "") {
|
||||
# print CONFFILE "#define PTHREAD_CREATE_FUNC $pthread[2]\n";
|
||||
# } else {
|
||||
print CONFFILE "#define PTHREAD_CREATE_FUNC pthread_create\n";
|
||||
}
|
||||
# }
|
||||
} else {
|
||||
print CONFFILE "#define PTHREAD_CREATE_FUNC pthread_create\n";
|
||||
}
|
||||
|
||||
469
cblas.h
469
cblas.h
@@ -1,287 +1,312 @@
|
||||
#ifndef CBLAS_H
|
||||
#define CBLAS_H
|
||||
|
||||
#include <stddef.h>
|
||||
#include "common.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
/* Assume C declarations for C++ */
|
||||
#endif /* __cplusplus */
|
||||
|
||||
#include <stddef.h>
|
||||
#include "common.h"
|
||||
/*Set the number of threads on runtime.*/
|
||||
void openblas_set_num_threads(int num_threads);
|
||||
void goto_set_num_threads(int num_threads);
|
||||
|
||||
/*Get the build configure on runtime.*/
|
||||
char* openblas_get_config(void);
|
||||
|
||||
/* Get the parallelization type which is used by OpenBLAS */
|
||||
int openblas_get_parallel(void);
|
||||
/* OpenBLAS is compiled for sequential use */
|
||||
#define OPENBLAS_SEQUENTIAL 0
|
||||
/* OpenBLAS is compiled using normal threading model */
|
||||
#define OPENBLAS_THREAD 1
|
||||
/* OpenBLAS is compiled using OpenMP threading model */
|
||||
#define OPENBLAS_OPENMP 2
|
||||
|
||||
|
||||
/*
|
||||
* Since all of GotoBlas was written without const,
|
||||
* we disable it at build time.
|
||||
*/
|
||||
#ifndef OPENBLAS_CONST
|
||||
# define OPENBLAS_CONST const
|
||||
#endif
|
||||
|
||||
|
||||
#define CBLAS_INDEX size_t
|
||||
|
||||
enum CBLAS_ORDER {CblasRowMajor=101, CblasColMajor=102};
|
||||
enum CBLAS_TRANSPOSE {CblasNoTrans=111, CblasTrans=112, CblasConjTrans=113, CblasConjNoTrans=114};
|
||||
enum CBLAS_UPLO {CblasUpper=121, CblasLower=122};
|
||||
enum CBLAS_DIAG {CblasNonUnit=131, CblasUnit=132};
|
||||
enum CBLAS_SIDE {CblasLeft=141, CblasRight=142};
|
||||
typedef enum CBLAS_ORDER {CblasRowMajor=101, CblasColMajor=102} CBLAS_ORDER;
|
||||
typedef enum CBLAS_TRANSPOSE {CblasNoTrans=111, CblasTrans=112, CblasConjTrans=113, CblasConjNoTrans=114} CBLAS_TRANSPOSE;
|
||||
typedef enum CBLAS_UPLO {CblasUpper=121, CblasLower=122} CBLAS_UPLO;
|
||||
typedef enum CBLAS_DIAG {CblasNonUnit=131, CblasUnit=132} CBLAS_DIAG;
|
||||
typedef enum CBLAS_SIDE {CblasLeft=141, CblasRight=142} CBLAS_SIDE;
|
||||
|
||||
float cblas_sdsdot(blasint n, float, float *x, blasint incx, float *y, blasint incy);
|
||||
double cblas_dsdot (blasint n, float *x, blasint incx, float *y, blasint incy);
|
||||
float cblas_sdot(blasint n, float *x, blasint incx, float *y, blasint incy);
|
||||
double cblas_ddot(blasint n, double *x, blasint incx, double *y, blasint incy);
|
||||
float cblas_sdsdot(OPENBLAS_CONST blasint n, OPENBLAS_CONST float alpha, OPENBLAS_CONST float *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST float *y, OPENBLAS_CONST blasint incy);
|
||||
double cblas_dsdot (OPENBLAS_CONST blasint n, OPENBLAS_CONST float *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST float *y, OPENBLAS_CONST blasint incy);
|
||||
float cblas_sdot(OPENBLAS_CONST blasint n, OPENBLAS_CONST float *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST float *y, OPENBLAS_CONST blasint incy);
|
||||
double cblas_ddot(OPENBLAS_CONST blasint n, OPENBLAS_CONST double *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST double *y, OPENBLAS_CONST blasint incy);
|
||||
|
||||
float _Complex cblas_cdotu(blasint n, float *x, blasint incx, float *y, blasint incy);
|
||||
float _Complex cblas_cdotc(blasint n, float *x, blasint incx, float *y, blasint incy);
|
||||
double _Complex cblas_zdotu(blasint n, double *x, blasint incx, double *y, blasint incy);
|
||||
double _Complex cblas_zdotc(blasint n, double *x, blasint incx, double *y, blasint incy);
|
||||
openblas_complex_float cblas_cdotu(OPENBLAS_CONST blasint n, OPENBLAS_CONST float *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST float *y, OPENBLAS_CONST blasint incy);
|
||||
openblas_complex_float cblas_cdotc(OPENBLAS_CONST blasint n, OPENBLAS_CONST float *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST float *y, OPENBLAS_CONST blasint incy);
|
||||
openblas_complex_double cblas_zdotu(OPENBLAS_CONST blasint n, OPENBLAS_CONST double *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST double *y, OPENBLAS_CONST blasint incy);
|
||||
openblas_complex_double cblas_zdotc(OPENBLAS_CONST blasint n, OPENBLAS_CONST double *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST double *y, OPENBLAS_CONST blasint incy);
|
||||
|
||||
void cblas_cdotu_sub(blasint n, float *x, blasint incx, float *y, blasint incy, float _Complex *ret);
|
||||
void cblas_cdotc_sub(blasint n, float *x, blasint incx, float *y, blasint incy, float _Complex *ret);
|
||||
void cblas_zdotu_sub(blasint n, double *x, blasint incx, double *y, blasint incy, double _Complex *ret);
|
||||
void cblas_zdotc_sub(blasint n, double *x, blasint incx, double *y, blasint incy, double _Complex *ret);
|
||||
void cblas_cdotu_sub(OPENBLAS_CONST blasint n, OPENBLAS_CONST float *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST float *y, OPENBLAS_CONST blasint incy, openblas_complex_float *ret);
|
||||
void cblas_cdotc_sub(OPENBLAS_CONST blasint n, OPENBLAS_CONST float *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST float *y, OPENBLAS_CONST blasint incy, openblas_complex_float *ret);
|
||||
void cblas_zdotu_sub(OPENBLAS_CONST blasint n, OPENBLAS_CONST double *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST double *y, OPENBLAS_CONST blasint incy, openblas_complex_double *ret);
|
||||
void cblas_zdotc_sub(OPENBLAS_CONST blasint n, OPENBLAS_CONST double *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST double *y, OPENBLAS_CONST blasint incy, openblas_complex_double *ret);
|
||||
|
||||
float cblas_sasum (blasint n, float *x, blasint incx);
|
||||
double cblas_dasum (blasint n, double *x, blasint incx);
|
||||
float cblas_scasum(blasint n, float *x, blasint incx);
|
||||
double cblas_dzasum(blasint n, double *x, blasint incx);
|
||||
float cblas_sasum (OPENBLAS_CONST blasint n, OPENBLAS_CONST float *x, OPENBLAS_CONST blasint incx);
|
||||
double cblas_dasum (OPENBLAS_CONST blasint n, OPENBLAS_CONST double *x, OPENBLAS_CONST blasint incx);
|
||||
float cblas_scasum(OPENBLAS_CONST blasint n, OPENBLAS_CONST float *x, OPENBLAS_CONST blasint incx);
|
||||
double cblas_dzasum(OPENBLAS_CONST blasint n, OPENBLAS_CONST double *x, OPENBLAS_CONST blasint incx);
|
||||
|
||||
float cblas_snrm2 (blasint N, float *X, blasint incX);
|
||||
double cblas_dnrm2 (blasint N, double *X, blasint incX);
|
||||
float cblas_scnrm2(blasint N, float *X, blasint incX);
|
||||
double cblas_dznrm2(blasint N, double *X, blasint incX);
|
||||
float cblas_snrm2 (OPENBLAS_CONST blasint N, OPENBLAS_CONST float *X, OPENBLAS_CONST blasint incX);
|
||||
double cblas_dnrm2 (OPENBLAS_CONST blasint N, OPENBLAS_CONST double *X, OPENBLAS_CONST blasint incX);
|
||||
float cblas_scnrm2(OPENBLAS_CONST blasint N, OPENBLAS_CONST float *X, OPENBLAS_CONST blasint incX);
|
||||
double cblas_dznrm2(OPENBLAS_CONST blasint N, OPENBLAS_CONST double *X, OPENBLAS_CONST blasint incX);
|
||||
|
||||
CBLAS_INDEX cblas_isamax(blasint n, float *x, blasint incx);
|
||||
CBLAS_INDEX cblas_idamax(blasint n, double *x, blasint incx);
|
||||
CBLAS_INDEX cblas_icamax(blasint n, float *x, blasint incx);
|
||||
CBLAS_INDEX cblas_izamax(blasint n, double *x, blasint incx);
|
||||
CBLAS_INDEX cblas_isamax(OPENBLAS_CONST blasint n, OPENBLAS_CONST float *x, OPENBLAS_CONST blasint incx);
|
||||
CBLAS_INDEX cblas_idamax(OPENBLAS_CONST blasint n, OPENBLAS_CONST double *x, OPENBLAS_CONST blasint incx);
|
||||
CBLAS_INDEX cblas_icamax(OPENBLAS_CONST blasint n, OPENBLAS_CONST float *x, OPENBLAS_CONST blasint incx);
|
||||
CBLAS_INDEX cblas_izamax(OPENBLAS_CONST blasint n, OPENBLAS_CONST double *x, OPENBLAS_CONST blasint incx);
|
||||
|
||||
void cblas_saxpy(blasint n, float, float *x, blasint incx, float *y, blasint incy);
|
||||
void cblas_daxpy(blasint n, double, double *x, blasint incx, double *y, blasint incy);
|
||||
void cblas_caxpy(blasint n, float *, float *x, blasint incx, float *y, blasint incy);
|
||||
void cblas_zaxpy(blasint n, double *, double *x, blasint incx, double *y, blasint incy);
|
||||
void cblas_saxpy(OPENBLAS_CONST blasint n, OPENBLAS_CONST float alpha, OPENBLAS_CONST float *x, OPENBLAS_CONST blasint incx, float *y, OPENBLAS_CONST blasint incy);
|
||||
void cblas_daxpy(OPENBLAS_CONST blasint n, OPENBLAS_CONST double alpha, OPENBLAS_CONST double *x, OPENBLAS_CONST blasint incx, double *y, OPENBLAS_CONST blasint incy);
|
||||
void cblas_caxpy(OPENBLAS_CONST blasint n, OPENBLAS_CONST float *alpha, OPENBLAS_CONST float *x, OPENBLAS_CONST blasint incx, float *y, OPENBLAS_CONST blasint incy);
|
||||
void cblas_zaxpy(OPENBLAS_CONST blasint n, OPENBLAS_CONST double *alpha, OPENBLAS_CONST double *x, OPENBLAS_CONST blasint incx, double *y, OPENBLAS_CONST blasint incy);
|
||||
|
||||
void cblas_scopy(blasint n, float *x, blasint incx, float *y, blasint incy);
|
||||
void cblas_dcopy(blasint n, double *x, blasint incx, double *y, blasint incy);
|
||||
void cblas_ccopy(blasint n, float *x, blasint incx, float *y, blasint incy);
|
||||
void cblas_zcopy(blasint n, double *x, blasint incx, double *y, blasint incy);
|
||||
void cblas_scopy(OPENBLAS_CONST blasint n, OPENBLAS_CONST float *x, OPENBLAS_CONST blasint incx, float *y, OPENBLAS_CONST blasint incy);
|
||||
void cblas_dcopy(OPENBLAS_CONST blasint n, OPENBLAS_CONST double *x, OPENBLAS_CONST blasint incx, double *y, OPENBLAS_CONST blasint incy);
|
||||
void cblas_ccopy(OPENBLAS_CONST blasint n, OPENBLAS_CONST float *x, OPENBLAS_CONST blasint incx, float *y, OPENBLAS_CONST blasint incy);
|
||||
void cblas_zcopy(OPENBLAS_CONST blasint n, OPENBLAS_CONST double *x, OPENBLAS_CONST blasint incx, double *y, OPENBLAS_CONST blasint incy);
|
||||
|
||||
void cblas_sswap(blasint n, float *x, blasint incx, float *y, blasint incy);
|
||||
void cblas_dswap(blasint n, double *x, blasint incx, double *y, blasint incy);
|
||||
void cblas_cswap(blasint n, float *x, blasint incx, float *y, blasint incy);
|
||||
void cblas_zswap(blasint n, double *x, blasint incx, double *y, blasint incy);
|
||||
void cblas_sswap(OPENBLAS_CONST blasint n, float *x, OPENBLAS_CONST blasint incx, float *y, OPENBLAS_CONST blasint incy);
|
||||
void cblas_dswap(OPENBLAS_CONST blasint n, double *x, OPENBLAS_CONST blasint incx, double *y, OPENBLAS_CONST blasint incy);
|
||||
void cblas_cswap(OPENBLAS_CONST blasint n, float *x, OPENBLAS_CONST blasint incx, float *y, OPENBLAS_CONST blasint incy);
|
||||
void cblas_zswap(OPENBLAS_CONST blasint n, double *x, OPENBLAS_CONST blasint incx, double *y, OPENBLAS_CONST blasint incy);
|
||||
|
||||
void cblas_srot(blasint N, float *X, blasint incX, float *Y, blasint incY, float c, float s);
|
||||
void cblas_drot(blasint N, double *X, blasint incX, double *Y, blasint incY, double c, double s);
|
||||
void cblas_srot(OPENBLAS_CONST blasint N, float *X, OPENBLAS_CONST blasint incX, float *Y, OPENBLAS_CONST blasint incY, OPENBLAS_CONST float c, OPENBLAS_CONST float s);
|
||||
void cblas_drot(OPENBLAS_CONST blasint N, double *X, OPENBLAS_CONST blasint incX, double *Y, OPENBLAS_CONST blasint incY, OPENBLAS_CONST double c, OPENBLAS_CONST double s);
|
||||
|
||||
void cblas_srotg(float *a, float *b, float *c, float *s);
|
||||
void cblas_drotg(double *a, double *b, double *c, double *s);
|
||||
|
||||
void cblas_srotm(blasint N, float *X, blasint incX, float *Y, blasint incY, float *P);
|
||||
void cblas_drotm(blasint N, double *X, blasint incX, double *Y, blasint incY, double *P);
|
||||
void cblas_srotm(OPENBLAS_CONST blasint N, float *X, OPENBLAS_CONST blasint incX, float *Y, OPENBLAS_CONST blasint incY, OPENBLAS_CONST float *P);
|
||||
void cblas_drotm(OPENBLAS_CONST blasint N, double *X, OPENBLAS_CONST blasint incX, double *Y, OPENBLAS_CONST blasint incY, OPENBLAS_CONST double *P);
|
||||
|
||||
void cblas_srotmg(float *d1, float *d2, float *b1, float b2, float *P);
|
||||
void cblas_drotmg(double *d1, double *d2, double *b1, double b2, double *P);
|
||||
void cblas_srotmg(float *d1, float *d2, float *b1, OPENBLAS_CONST float b2, float *P);
|
||||
void cblas_drotmg(double *d1, double *d2, double *b1, OPENBLAS_CONST double b2, double *P);
|
||||
|
||||
void cblas_sscal(blasint N, float alpha, float *X, blasint incX);
|
||||
void cblas_dscal(blasint N, double alpha, double *X, blasint incX);
|
||||
void cblas_cscal(blasint N, float *alpha, float *X, blasint incX);
|
||||
void cblas_zscal(blasint N, double *alpha, double *X, blasint incX);
|
||||
void cblas_csscal(blasint N, float alpha, float *X, blasint incX);
|
||||
void cblas_zdscal(blasint N, double alpha, double *X, blasint incX);
|
||||
void cblas_sscal(OPENBLAS_CONST blasint N, OPENBLAS_CONST float alpha, float *X, OPENBLAS_CONST blasint incX);
|
||||
void cblas_dscal(OPENBLAS_CONST blasint N, OPENBLAS_CONST double alpha, double *X, OPENBLAS_CONST blasint incX);
|
||||
void cblas_cscal(OPENBLAS_CONST blasint N, OPENBLAS_CONST float *alpha, float *X, OPENBLAS_CONST blasint incX);
|
||||
void cblas_zscal(OPENBLAS_CONST blasint N, OPENBLAS_CONST double *alpha, double *X, OPENBLAS_CONST blasint incX);
|
||||
void cblas_csscal(OPENBLAS_CONST blasint N, OPENBLAS_CONST float alpha, float *X, OPENBLAS_CONST blasint incX);
|
||||
void cblas_zdscal(OPENBLAS_CONST blasint N, OPENBLAS_CONST double alpha, double *X, OPENBLAS_CONST blasint incX);
|
||||
|
||||
void cblas_sgemv(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE trans, blasint m, blasint n,
|
||||
float alpha, float *a, blasint lda, float *x, blasint incx, float beta, float *y, blasint incy);
|
||||
void cblas_dgemv(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE trans, blasint m, blasint n,
|
||||
double alpha, double *a, blasint lda, double *x, blasint incx, double beta, double *y, blasint incy);
|
||||
void cblas_cgemv(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE trans, blasint m, blasint n,
|
||||
float *alpha, float *a, blasint lda, float *x, blasint incx, float *beta, float *y, blasint incy);
|
||||
void cblas_zgemv(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE trans, blasint m, blasint n,
|
||||
double *alpha, double *a, blasint lda, double *x, blasint incx, double *beta, double *y, blasint incy);
|
||||
void cblas_sgemv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_TRANSPOSE trans, OPENBLAS_CONST blasint m, OPENBLAS_CONST blasint n,
|
||||
OPENBLAS_CONST float alpha, OPENBLAS_CONST float *a, OPENBLAS_CONST blasint lda, OPENBLAS_CONST float *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST float beta, float *y, OPENBLAS_CONST blasint incy);
|
||||
void cblas_dgemv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_TRANSPOSE trans, OPENBLAS_CONST blasint m, OPENBLAS_CONST blasint n,
|
||||
OPENBLAS_CONST double alpha, OPENBLAS_CONST double *a, OPENBLAS_CONST blasint lda, OPENBLAS_CONST double *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST double beta, double *y, OPENBLAS_CONST blasint incy);
|
||||
void cblas_cgemv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_TRANSPOSE trans, OPENBLAS_CONST blasint m, OPENBLAS_CONST blasint n,
|
||||
OPENBLAS_CONST float *alpha, OPENBLAS_CONST float *a, OPENBLAS_CONST blasint lda, OPENBLAS_CONST float *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST float *beta, float *y, OPENBLAS_CONST blasint incy);
|
||||
void cblas_zgemv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_TRANSPOSE trans, OPENBLAS_CONST blasint m, OPENBLAS_CONST blasint n,
|
||||
OPENBLAS_CONST double *alpha, OPENBLAS_CONST double *a, OPENBLAS_CONST blasint lda, OPENBLAS_CONST double *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST double *beta, double *y, OPENBLAS_CONST blasint incy);
|
||||
|
||||
void cblas_sger (enum CBLAS_ORDER order, blasint M, blasint N, float alpha, float *X, blasint incX, float *Y, blasint incY, float *A, blasint lda);
|
||||
void cblas_dger (enum CBLAS_ORDER order, blasint M, blasint N, double alpha, double *X, blasint incX, double *Y, blasint incY, double *A, blasint lda);
|
||||
void cblas_cgeru(enum CBLAS_ORDER order, blasint M, blasint N, float *alpha, float *X, blasint incX, float *Y, blasint incY, float *A, blasint lda);
|
||||
void cblas_cgerc(enum CBLAS_ORDER order, blasint M, blasint N, float *alpha, float *X, blasint incX, float *Y, blasint incY, float *A, blasint lda);
|
||||
void cblas_zgeru(enum CBLAS_ORDER order, blasint M, blasint N, double *alpha, double *X, blasint incX, double *Y, blasint incY, double *A, blasint lda);
|
||||
void cblas_zgerc(enum CBLAS_ORDER order, blasint M, blasint N, double *alpha, double *X, blasint incX, double *Y, blasint incY, double *A, blasint lda);
|
||||
void cblas_sger (OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST float alpha, OPENBLAS_CONST float *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST float *Y, OPENBLAS_CONST blasint incY, float *A, OPENBLAS_CONST blasint lda);
|
||||
void cblas_dger (OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST double alpha, OPENBLAS_CONST double *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST double *Y, OPENBLAS_CONST blasint incY, double *A, OPENBLAS_CONST blasint lda);
|
||||
void cblas_cgeru(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST float *alpha, OPENBLAS_CONST float *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST float *Y, OPENBLAS_CONST blasint incY, float *A, OPENBLAS_CONST blasint lda);
|
||||
void cblas_cgerc(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST float *alpha, OPENBLAS_CONST float *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST float *Y, OPENBLAS_CONST blasint incY, float *A, OPENBLAS_CONST blasint lda);
|
||||
void cblas_zgeru(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST double *alpha, OPENBLAS_CONST double *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST double *Y, OPENBLAS_CONST blasint incY, double *A, OPENBLAS_CONST blasint lda);
|
||||
void cblas_zgerc(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST double *alpha, OPENBLAS_CONST double *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST double *Y, OPENBLAS_CONST blasint incY, double *A, OPENBLAS_CONST blasint lda);
|
||||
|
||||
void cblas_strsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, blasint N, float *A, blasint lda, float *X, blasint incX);
|
||||
void cblas_dtrsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, blasint N, double *A, blasint lda, double *X, blasint incX);
|
||||
void cblas_ctrsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, blasint N, float *A, blasint lda, float *X, blasint incX);
|
||||
void cblas_ztrsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, blasint N, double *A, blasint lda, double *X, blasint incX);
|
||||
void cblas_strsv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag, OPENBLAS_CONST blasint N, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, float *X, OPENBLAS_CONST blasint incX);
|
||||
void cblas_dtrsv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag, OPENBLAS_CONST blasint N, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, double *X, OPENBLAS_CONST blasint incX);
|
||||
void cblas_ctrsv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag, OPENBLAS_CONST blasint N, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, float *X, OPENBLAS_CONST blasint incX);
|
||||
void cblas_ztrsv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag, OPENBLAS_CONST blasint N, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, double *X, OPENBLAS_CONST blasint incX);
|
||||
|
||||
void cblas_strmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, blasint N, float *A, blasint lda, float *X, blasint incX);
|
||||
void cblas_dtrmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, blasint N, double *A, blasint lda, double *X, blasint incX);
|
||||
void cblas_ctrmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, blasint N, float *A, blasint lda, float *X, blasint incX);
|
||||
void cblas_ztrmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, blasint N, double *A, blasint lda, double *X, blasint incX);
|
||||
void cblas_strmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag, OPENBLAS_CONST blasint N, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, float *X, OPENBLAS_CONST blasint incX);
|
||||
void cblas_dtrmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag, OPENBLAS_CONST blasint N, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, double *X, OPENBLAS_CONST blasint incX);
|
||||
void cblas_ctrmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag, OPENBLAS_CONST blasint N, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, float *X, OPENBLAS_CONST blasint incX);
|
||||
void cblas_ztrmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag, OPENBLAS_CONST blasint N, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, double *X, OPENBLAS_CONST blasint incX);
|
||||
|
||||
void cblas_ssyr(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, float alpha, float *X, blasint incX, float *A, blasint lda);
|
||||
void cblas_dsyr(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, double alpha, double *X, blasint incX, double *A, blasint lda);
|
||||
void cblas_cher(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, float alpha, float *X, blasint incX, float *A, blasint lda);
|
||||
void cblas_zher(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, double alpha, double *X, blasint incX, double *A, blasint lda);
|
||||
void cblas_ssyr(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST float alpha, OPENBLAS_CONST float *X, OPENBLAS_CONST blasint incX, float *A, OPENBLAS_CONST blasint lda);
|
||||
void cblas_dsyr(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST double alpha, OPENBLAS_CONST double *X, OPENBLAS_CONST blasint incX, double *A, OPENBLAS_CONST blasint lda);
|
||||
void cblas_cher(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST float alpha, OPENBLAS_CONST float *X, OPENBLAS_CONST blasint incX, float *A, OPENBLAS_CONST blasint lda);
|
||||
void cblas_zher(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST double alpha, OPENBLAS_CONST double *X, OPENBLAS_CONST blasint incX, double *A, OPENBLAS_CONST blasint lda);
|
||||
|
||||
void cblas_ssyr2(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo,blasint N, float alpha, float *X,
|
||||
blasint incX, float *Y, blasint incY, float *A, blasint lda);
|
||||
void cblas_dsyr2(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, double alpha, double *X,
|
||||
blasint incX, double *Y, blasint incY, double *A, blasint lda);
|
||||
void cblas_cher2(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, float *alpha, float *X, blasint incX,
|
||||
float *Y, blasint incY, float *A, blasint lda);
|
||||
void cblas_zher2(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, double *alpha, double *X, blasint incX,
|
||||
double *Y, blasint incY, double *A, blasint lda);
|
||||
void cblas_ssyr2(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo,OPENBLAS_CONST blasint N, OPENBLAS_CONST float alpha, OPENBLAS_CONST float *X,
|
||||
OPENBLAS_CONST blasint incX, OPENBLAS_CONST float *Y, OPENBLAS_CONST blasint incY, float *A, OPENBLAS_CONST blasint lda);
|
||||
void cblas_dsyr2(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST double alpha, OPENBLAS_CONST double *X,
|
||||
OPENBLAS_CONST blasint incX, OPENBLAS_CONST double *Y, OPENBLAS_CONST blasint incY, double *A, OPENBLAS_CONST blasint lda);
|
||||
void cblas_cher2(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST float *alpha, OPENBLAS_CONST float *X, OPENBLAS_CONST blasint incX,
|
||||
OPENBLAS_CONST float *Y, OPENBLAS_CONST blasint incY, float *A, OPENBLAS_CONST blasint lda);
|
||||
void cblas_zher2(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST double *alpha, OPENBLAS_CONST double *X, OPENBLAS_CONST blasint incX,
|
||||
OPENBLAS_CONST double *Y, OPENBLAS_CONST blasint incY, double *A, OPENBLAS_CONST blasint lda);
|
||||
|
||||
void cblas_sgbmv(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE TransA, blasint M, blasint N,
|
||||
blasint KL, blasint KU, float alpha, float *A, blasint lda, float *X, blasint incX, float beta, float *Y, blasint incY);
|
||||
void cblas_dgbmv(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE TransA, blasint M, blasint N,
|
||||
blasint KL, blasint KU, double alpha, double *A, blasint lda, double *X, blasint incX, double beta, double *Y, blasint incY);
|
||||
void cblas_cgbmv(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE TransA, blasint M, blasint N,
|
||||
blasint KL, blasint KU, float *alpha, float *A, blasint lda, float *X, blasint incX, float *beta, float *Y, blasint incY);
|
||||
void cblas_zgbmv(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE TransA, blasint M, blasint N,
|
||||
blasint KL, blasint KU, double *alpha, double *A, blasint lda, double *X, blasint incX, double *beta, double *Y, blasint incY);
|
||||
void cblas_sgbmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N,
|
||||
OPENBLAS_CONST blasint KL, OPENBLAS_CONST blasint KU, OPENBLAS_CONST float alpha, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST float *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST float beta, float *Y, OPENBLAS_CONST blasint incY);
|
||||
void cblas_dgbmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N,
|
||||
OPENBLAS_CONST blasint KL, OPENBLAS_CONST blasint KU, OPENBLAS_CONST double alpha, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST double *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST double beta, double *Y, OPENBLAS_CONST blasint incY);
|
||||
void cblas_cgbmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N,
|
||||
OPENBLAS_CONST blasint KL, OPENBLAS_CONST blasint KU, OPENBLAS_CONST float *alpha, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST float *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST float *beta, float *Y, OPENBLAS_CONST blasint incY);
|
||||
void cblas_zgbmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N,
|
||||
OPENBLAS_CONST blasint KL, OPENBLAS_CONST blasint KU, OPENBLAS_CONST double *alpha, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST double *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST double *beta, double *Y, OPENBLAS_CONST blasint incY);
|
||||
|
||||
void cblas_ssbmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, blasint K, float alpha, float *A,
|
||||
blasint lda, float *X, blasint incX, float beta, float *Y, blasint incY);
|
||||
void cblas_dsbmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, blasint K, double alpha, double *A,
|
||||
blasint lda, double *X, blasint incX, double beta, double *Y, blasint incY);
|
||||
void cblas_ssbmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST float alpha, OPENBLAS_CONST float *A,
|
||||
OPENBLAS_CONST blasint lda, OPENBLAS_CONST float *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST float beta, float *Y, OPENBLAS_CONST blasint incY);
|
||||
void cblas_dsbmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST double alpha, OPENBLAS_CONST double *A,
|
||||
OPENBLAS_CONST blasint lda, OPENBLAS_CONST double *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST double beta, double *Y, OPENBLAS_CONST blasint incY);
|
||||
|
||||
|
||||
void cblas_stbmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
|
||||
blasint N, blasint K, float *A, blasint lda, float *X, blasint incX);
|
||||
void cblas_dtbmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
|
||||
blasint N, blasint K, double *A, blasint lda, double *X, blasint incX);
|
||||
void cblas_ctbmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
|
||||
blasint N, blasint K, float *A, blasint lda, float *X, blasint incX);
|
||||
void cblas_ztbmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
|
||||
blasint N, blasint K, double *A, blasint lda, double *X, blasint incX);
|
||||
void cblas_stbmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag,
|
||||
OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, float *X, OPENBLAS_CONST blasint incX);
|
||||
void cblas_dtbmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag,
|
||||
OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, double *X, OPENBLAS_CONST blasint incX);
|
||||
void cblas_ctbmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag,
|
||||
OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, float *X, OPENBLAS_CONST blasint incX);
|
||||
void cblas_ztbmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag,
|
||||
OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, double *X, OPENBLAS_CONST blasint incX);
|
||||
|
||||
void cblas_stbsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
|
||||
blasint N, blasint K, float *A, blasint lda, float *X, blasint incX);
|
||||
void cblas_dtbsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
|
||||
blasint N, blasint K, double *A, blasint lda, double *X, blasint incX);
|
||||
void cblas_ctbsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
|
||||
blasint N, blasint K, float *A, blasint lda, float *X, blasint incX);
|
||||
void cblas_ztbsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
|
||||
blasint N, blasint K, double *A, blasint lda, double *X, blasint incX);
|
||||
void cblas_stbsv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag,
|
||||
OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, float *X, OPENBLAS_CONST blasint incX);
|
||||
void cblas_dtbsv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag,
|
||||
OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, double *X, OPENBLAS_CONST blasint incX);
|
||||
void cblas_ctbsv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag,
|
||||
OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, float *X, OPENBLAS_CONST blasint incX);
|
||||
void cblas_ztbsv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag,
|
||||
OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, double *X, OPENBLAS_CONST blasint incX);
|
||||
|
||||
void cblas_stpmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
|
||||
blasint N, float *Ap, float *X, blasint incX);
|
||||
void cblas_dtpmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
|
||||
blasint N, double *Ap, double *X, blasint incX);
|
||||
void cblas_ctpmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
|
||||
blasint N, float *Ap, float *X, blasint incX);
|
||||
void cblas_ztpmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
|
||||
blasint N, double *Ap, double *X, blasint incX);
|
||||
void cblas_stpmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag,
|
||||
OPENBLAS_CONST blasint N, OPENBLAS_CONST float *Ap, float *X, OPENBLAS_CONST blasint incX);
|
||||
void cblas_dtpmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag,
|
||||
OPENBLAS_CONST blasint N, OPENBLAS_CONST double *Ap, double *X, OPENBLAS_CONST blasint incX);
|
||||
void cblas_ctpmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag,
|
||||
OPENBLAS_CONST blasint N, OPENBLAS_CONST float *Ap, float *X, OPENBLAS_CONST blasint incX);
|
||||
void cblas_ztpmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag,
|
||||
OPENBLAS_CONST blasint N, OPENBLAS_CONST double *Ap, double *X, OPENBLAS_CONST blasint incX);
|
||||
|
||||
void cblas_stpsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
|
||||
blasint N, float *Ap, float *X, blasint incX);
|
||||
void cblas_dtpsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
|
||||
blasint N, double *Ap, double *X, blasint incX);
|
||||
void cblas_ctpsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
|
||||
blasint N, float *Ap, float *X, blasint incX);
|
||||
void cblas_ztpsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
|
||||
blasint N, double *Ap, double *X, blasint incX);
|
||||
void cblas_stpsv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag,
|
||||
OPENBLAS_CONST blasint N, OPENBLAS_CONST float *Ap, float *X, OPENBLAS_CONST blasint incX);
|
||||
void cblas_dtpsv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag,
|
||||
OPENBLAS_CONST blasint N, OPENBLAS_CONST double *Ap, double *X, OPENBLAS_CONST blasint incX);
|
||||
void cblas_ctpsv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag,
|
||||
OPENBLAS_CONST blasint N, OPENBLAS_CONST float *Ap, float *X, OPENBLAS_CONST blasint incX);
|
||||
void cblas_ztpsv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag,
|
||||
OPENBLAS_CONST blasint N, OPENBLAS_CONST double *Ap, double *X, OPENBLAS_CONST blasint incX);
|
||||
|
||||
void cblas_ssymv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, float alpha, float *A,
|
||||
blasint lda, float *X, blasint incX, float beta, float *Y, blasint incY);
|
||||
void cblas_dsymv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, double alpha, double *A,
|
||||
blasint lda, double *X, blasint incX, double beta, double *Y, blasint incY);
|
||||
void cblas_chemv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, float *alpha, float *A,
|
||||
blasint lda, float *X, blasint incX, float *beta, float *Y, blasint incY);
|
||||
void cblas_zhemv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, double *alpha, double *A,
|
||||
blasint lda, double *X, blasint incX, double *beta, double *Y, blasint incY);
|
||||
void cblas_ssymv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST float alpha, OPENBLAS_CONST float *A,
|
||||
OPENBLAS_CONST blasint lda, OPENBLAS_CONST float *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST float beta, float *Y, OPENBLAS_CONST blasint incY);
|
||||
void cblas_dsymv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST double alpha, OPENBLAS_CONST double *A,
|
||||
OPENBLAS_CONST blasint lda, OPENBLAS_CONST double *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST double beta, double *Y, OPENBLAS_CONST blasint incY);
|
||||
void cblas_chemv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST float *alpha, OPENBLAS_CONST float *A,
|
||||
OPENBLAS_CONST blasint lda, OPENBLAS_CONST float *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST float *beta, float *Y, OPENBLAS_CONST blasint incY);
|
||||
void cblas_zhemv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST double *alpha, OPENBLAS_CONST double *A,
|
||||
OPENBLAS_CONST blasint lda, OPENBLAS_CONST double *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST double *beta, double *Y, OPENBLAS_CONST blasint incY);
|
||||
|
||||
|
||||
void cblas_sspmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, float alpha, float *Ap,
|
||||
float *X, blasint incX, float beta, float *Y, blasint incY);
|
||||
void cblas_dspmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, double alpha, double *Ap,
|
||||
double *X, blasint incX, double beta, double *Y, blasint incY);
|
||||
void cblas_sspmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST float alpha, OPENBLAS_CONST float *Ap,
|
||||
OPENBLAS_CONST float *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST float beta, float *Y, OPENBLAS_CONST blasint incY);
|
||||
void cblas_dspmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST double alpha, OPENBLAS_CONST double *Ap,
|
||||
OPENBLAS_CONST double *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST double beta, double *Y, OPENBLAS_CONST blasint incY);
|
||||
|
||||
void cblas_sspr(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, float alpha, float *X, blasint incX, float *Ap);
|
||||
void cblas_dspr(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, double alpha, double *X, blasint incX, double *Ap);
|
||||
void cblas_sspr(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST float alpha, OPENBLAS_CONST float *X, OPENBLAS_CONST blasint incX, float *Ap);
|
||||
void cblas_dspr(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST double alpha, OPENBLAS_CONST double *X, OPENBLAS_CONST blasint incX, double *Ap);
|
||||
|
||||
void cblas_chpr(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, float alpha, float *X, blasint incX, float *A);
|
||||
void cblas_zhpr(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, double alpha, double *X,blasint incX, double *A);
|
||||
void cblas_chpr(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST float alpha, OPENBLAS_CONST float *X, OPENBLAS_CONST blasint incX, float *A);
|
||||
void cblas_zhpr(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST double alpha, OPENBLAS_CONST double *X,OPENBLAS_CONST blasint incX, double *A);
|
||||
|
||||
void cblas_sspr2(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, float alpha, float *X, blasint incX, float *Y, blasint incY, float *A);
|
||||
void cblas_dspr2(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, double alpha, double *X, blasint incX, double *Y, blasint incY, double *A);
|
||||
void cblas_chpr2(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, float *alpha, float *X, blasint incX, float *Y, blasint incY, float *Ap);
|
||||
void cblas_zhpr2(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, double *alpha, double *X, blasint incX, double *Y, blasint incY, double *Ap);
|
||||
void cblas_sspr2(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST float alpha, OPENBLAS_CONST float *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST float *Y, OPENBLAS_CONST blasint incY, float *A);
|
||||
void cblas_dspr2(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST double alpha, OPENBLAS_CONST double *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST double *Y, OPENBLAS_CONST blasint incY, double *A);
|
||||
void cblas_chpr2(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST float *alpha, OPENBLAS_CONST float *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST float *Y, OPENBLAS_CONST blasint incY, float *Ap);
|
||||
void cblas_zhpr2(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST double *alpha, OPENBLAS_CONST double *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST double *Y, OPENBLAS_CONST blasint incY, double *Ap);
|
||||
|
||||
void cblas_chbmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, blasint K,
|
||||
float *alpha, float *A, blasint lda, float *X, blasint incX, float *beta, float *Y, blasint incY);
|
||||
void cblas_zhbmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, blasint K,
|
||||
double *alpha, double *A, blasint lda, double *X, blasint incX, double *beta, double *Y, blasint incY);
|
||||
void cblas_chbmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K,
|
||||
OPENBLAS_CONST float *alpha, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST float *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST float *beta, float *Y, OPENBLAS_CONST blasint incY);
|
||||
void cblas_zhbmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K,
|
||||
OPENBLAS_CONST double *alpha, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST double *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST double *beta, double *Y, OPENBLAS_CONST blasint incY);
|
||||
|
||||
void cblas_chpmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N,
|
||||
float *alpha, float *Ap, float *X, blasint incX, float *beta, float *Y, blasint incY);
|
||||
void cblas_zhpmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N,
|
||||
double *alpha, double *Ap, double *X, blasint incX, double *beta, double *Y, blasint incY);
|
||||
void cblas_chpmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N,
|
||||
OPENBLAS_CONST float *alpha, OPENBLAS_CONST float *Ap, OPENBLAS_CONST float *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST float *beta, float *Y, OPENBLAS_CONST blasint incY);
|
||||
void cblas_zhpmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N,
|
||||
OPENBLAS_CONST double *alpha, OPENBLAS_CONST double *Ap, OPENBLAS_CONST double *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST double *beta, double *Y, OPENBLAS_CONST blasint incY);
|
||||
|
||||
void cblas_sgemm(enum CBLAS_ORDER Order, enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANSPOSE TransB, blasint M, blasint N, blasint K,
|
||||
float alpha, float *A, blasint lda, float *B, blasint ldb, float beta, float *C, blasint ldc);
|
||||
void cblas_dgemm(enum CBLAS_ORDER Order, enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANSPOSE TransB, blasint M, blasint N, blasint K,
|
||||
double alpha, double *A, blasint lda, double *B, blasint ldb, double beta, double *C, blasint ldc);
|
||||
void cblas_cgemm(enum CBLAS_ORDER Order, enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANSPOSE TransB, blasint M, blasint N, blasint K,
|
||||
float *alpha, float *A, blasint lda, float *B, blasint ldb, float *beta, float *C, blasint ldc);
|
||||
void cblas_zgemm(enum CBLAS_ORDER Order, enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANSPOSE TransB, blasint M, blasint N, blasint K,
|
||||
double *alpha, double *A, blasint lda, double *B, blasint ldb, double *beta, double *C, blasint ldc);
|
||||
void cblas_sgemm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransB, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K,
|
||||
OPENBLAS_CONST float alpha, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST float *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST float beta, float *C, OPENBLAS_CONST blasint ldc);
|
||||
void cblas_dgemm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransB, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K,
|
||||
OPENBLAS_CONST double alpha, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST double *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST double beta, double *C, OPENBLAS_CONST blasint ldc);
|
||||
void cblas_cgemm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransB, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K,
|
||||
OPENBLAS_CONST float *alpha, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST float *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST float *beta, float *C, OPENBLAS_CONST blasint ldc);
|
||||
void cblas_zgemm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransB, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K,
|
||||
OPENBLAS_CONST double *alpha, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST double *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST double *beta, double *C, OPENBLAS_CONST blasint ldc);
|
||||
|
||||
void cblas_ssymm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, blasint M, blasint N,
|
||||
float alpha, float *A, blasint lda, float *B, blasint ldb, float beta, float *C, blasint ldc);
|
||||
void cblas_dsymm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, blasint M, blasint N,
|
||||
double alpha, double *A, blasint lda, double *B, blasint ldb, double beta, double *C, blasint ldc);
|
||||
void cblas_csymm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, blasint M, blasint N,
|
||||
float *alpha, float *A, blasint lda, float *B, blasint ldb, float *beta, float *C, blasint ldc);
|
||||
void cblas_zsymm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, blasint M, blasint N,
|
||||
double *alpha, double *A, blasint lda, double *B, blasint ldb, double *beta, double *C, blasint ldc);
|
||||
void cblas_ssymm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_SIDE Side, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N,
|
||||
OPENBLAS_CONST float alpha, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST float *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST float beta, float *C, OPENBLAS_CONST blasint ldc);
|
||||
void cblas_dsymm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_SIDE Side, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N,
|
||||
OPENBLAS_CONST double alpha, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST double *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST double beta, double *C, OPENBLAS_CONST blasint ldc);
|
||||
void cblas_csymm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_SIDE Side, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N,
|
||||
OPENBLAS_CONST float *alpha, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST float *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST float *beta, float *C, OPENBLAS_CONST blasint ldc);
|
||||
void cblas_zsymm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_SIDE Side, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N,
|
||||
OPENBLAS_CONST double *alpha, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST double *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST double *beta, double *C, OPENBLAS_CONST blasint ldc);
|
||||
|
||||
void cblas_ssyrk(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans,
|
||||
blasint N, blasint K, float alpha, float *A, blasint lda, float beta, float *C, blasint ldc);
|
||||
void cblas_dsyrk(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans,
|
||||
blasint N, blasint K, double alpha, double *A, blasint lda, double beta, double *C, blasint ldc);
|
||||
void cblas_csyrk(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans,
|
||||
blasint N, blasint K, float *alpha, float *A, blasint lda, float *beta, float *C, blasint ldc);
|
||||
void cblas_zsyrk(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans,
|
||||
blasint N, blasint K, double *alpha, double *A, blasint lda, double *beta, double *C, blasint ldc);
|
||||
void cblas_ssyrk(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE Trans,
|
||||
OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST float alpha, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST float beta, float *C, OPENBLAS_CONST blasint ldc);
|
||||
void cblas_dsyrk(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE Trans,
|
||||
OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST double alpha, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST double beta, double *C, OPENBLAS_CONST blasint ldc);
|
||||
void cblas_csyrk(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE Trans,
|
||||
OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST float *alpha, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST float *beta, float *C, OPENBLAS_CONST blasint ldc);
|
||||
void cblas_zsyrk(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE Trans,
|
||||
OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST double *alpha, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST double *beta, double *C, OPENBLAS_CONST blasint ldc);
|
||||
|
||||
void cblas_ssyr2k(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans,
|
||||
blasint N, blasint K, float alpha, float *A, blasint lda, float *B, blasint ldb, float beta, float *C, blasint ldc);
|
||||
void cblas_dsyr2k(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans,
|
||||
blasint N, blasint K, double alpha, double *A, blasint lda, double *B, blasint ldb, double beta, double *C, blasint ldc);
|
||||
void cblas_csyr2k(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans,
|
||||
blasint N, blasint K, float *alpha, float *A, blasint lda, float *B, blasint ldb, float *beta, float *C, blasint ldc);
|
||||
void cblas_zsyr2k(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans,
|
||||
blasint N, blasint K, double *alpha, double *A, blasint lda, double *B, blasint ldb, double *beta, double *C, blasint ldc);
|
||||
void cblas_ssyr2k(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE Trans,
|
||||
OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST float alpha, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST float *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST float beta, float *C, OPENBLAS_CONST blasint ldc);
|
||||
void cblas_dsyr2k(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE Trans,
|
||||
OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST double alpha, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST double *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST double beta, double *C, OPENBLAS_CONST blasint ldc);
|
||||
void cblas_csyr2k(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE Trans,
|
||||
OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST float *alpha, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST float *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST float *beta, float *C, OPENBLAS_CONST blasint ldc);
|
||||
void cblas_zsyr2k(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE Trans,
|
||||
OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST double *alpha, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST double *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST double *beta, double *C, OPENBLAS_CONST blasint ldc);
|
||||
|
||||
void cblas_strmm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA,
|
||||
enum CBLAS_DIAG Diag, blasint M, blasint N, float alpha, float *A, blasint lda, float *B, blasint ldb);
|
||||
void cblas_dtrmm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA,
|
||||
enum CBLAS_DIAG Diag, blasint M, blasint N, double alpha, double *A, blasint lda, double *B, blasint ldb);
|
||||
void cblas_ctrmm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA,
|
||||
enum CBLAS_DIAG Diag, blasint M, blasint N, float *alpha, float *A, blasint lda, float *B, blasint ldb);
|
||||
void cblas_ztrmm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA,
|
||||
enum CBLAS_DIAG Diag, blasint M, blasint N, double *alpha, double *A, blasint lda, double *B, blasint ldb);
|
||||
void cblas_strmm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_SIDE Side, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA,
|
||||
OPENBLAS_CONST enum CBLAS_DIAG Diag, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST float alpha, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, float *B, OPENBLAS_CONST blasint ldb);
|
||||
void cblas_dtrmm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_SIDE Side, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA,
|
||||
OPENBLAS_CONST enum CBLAS_DIAG Diag, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST double alpha, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, double *B, OPENBLAS_CONST blasint ldb);
|
||||
void cblas_ctrmm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_SIDE Side, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA,
|
||||
OPENBLAS_CONST enum CBLAS_DIAG Diag, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST float *alpha, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, float *B, OPENBLAS_CONST blasint ldb);
|
||||
void cblas_ztrmm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_SIDE Side, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA,
|
||||
OPENBLAS_CONST enum CBLAS_DIAG Diag, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST double *alpha, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, double *B, OPENBLAS_CONST blasint ldb);
|
||||
|
||||
void cblas_strsm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA,
|
||||
enum CBLAS_DIAG Diag, blasint M, blasint N, float alpha, float *A, blasint lda, float *B, blasint ldb);
|
||||
void cblas_dtrsm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA,
|
||||
enum CBLAS_DIAG Diag, blasint M, blasint N, double alpha, double *A, blasint lda, double *B, blasint ldb);
|
||||
void cblas_ctrsm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA,
|
||||
enum CBLAS_DIAG Diag, blasint M, blasint N, float *alpha, float *A, blasint lda, float *B, blasint ldb);
|
||||
void cblas_ztrsm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA,
|
||||
enum CBLAS_DIAG Diag, blasint M, blasint N, double *alpha, double *A, blasint lda, double *B, blasint ldb);
|
||||
void cblas_strsm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_SIDE Side, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA,
|
||||
OPENBLAS_CONST enum CBLAS_DIAG Diag, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST float alpha, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, float *B, OPENBLAS_CONST blasint ldb);
|
||||
void cblas_dtrsm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_SIDE Side, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA,
|
||||
OPENBLAS_CONST enum CBLAS_DIAG Diag, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST double alpha, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, double *B, OPENBLAS_CONST blasint ldb);
|
||||
void cblas_ctrsm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_SIDE Side, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA,
|
||||
OPENBLAS_CONST enum CBLAS_DIAG Diag, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST float *alpha, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, float *B, OPENBLAS_CONST blasint ldb);
|
||||
void cblas_ztrsm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_SIDE Side, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA,
|
||||
OPENBLAS_CONST enum CBLAS_DIAG Diag, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST double *alpha, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, double *B, OPENBLAS_CONST blasint ldb);
|
||||
|
||||
void cblas_chemm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, blasint M, blasint N,
|
||||
float *alpha, float *A, blasint lda, float *B, blasint ldb, float *beta, float *C, blasint ldc);
|
||||
void cblas_zhemm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, blasint M, blasint N,
|
||||
double *alpha, double *A, blasint lda, double *B, blasint ldb, double *beta, double *C, blasint ldc);
|
||||
void cblas_chemm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_SIDE Side, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N,
|
||||
OPENBLAS_CONST float *alpha, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST float *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST float *beta, float *C, OPENBLAS_CONST blasint ldc);
|
||||
void cblas_zhemm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_SIDE Side, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N,
|
||||
OPENBLAS_CONST double *alpha, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST double *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST double *beta, double *C, OPENBLAS_CONST blasint ldc);
|
||||
|
||||
void cblas_cherk(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans, blasint N, blasint K,
|
||||
float alpha, float *A, blasint lda, float beta, float *C, blasint ldc);
|
||||
void cblas_zherk(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans, blasint N, blasint K,
|
||||
double alpha, double *A, blasint lda, double beta, double *C, blasint ldc);
|
||||
void cblas_cherk(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE Trans, OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K,
|
||||
OPENBLAS_CONST float alpha, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST float beta, float *C, OPENBLAS_CONST blasint ldc);
|
||||
void cblas_zherk(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE Trans, OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K,
|
||||
OPENBLAS_CONST double alpha, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST double beta, double *C, OPENBLAS_CONST blasint ldc);
|
||||
|
||||
void cblas_cher2k(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans, blasint N, blasint K,
|
||||
float *alpha, float *A, blasint lda, float *B, blasint ldb, float beta, float *C, blasint ldc);
|
||||
void cblas_zher2k(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans, blasint N, blasint K,
|
||||
double *alpha, double *A, blasint lda, double *B, blasint ldb, double beta, double *C, blasint ldc);
|
||||
void cblas_cher2k(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE Trans, OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K,
|
||||
OPENBLAS_CONST float *alpha, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST float *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST float beta, float *C, OPENBLAS_CONST blasint ldc);
|
||||
void cblas_zher2k(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE Trans, OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K,
|
||||
OPENBLAS_CONST double *alpha, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST double *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST double beta, double *C, OPENBLAS_CONST blasint ldc);
|
||||
|
||||
void cblas_xerbla(blasint p, char *rout, char *form, ...);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
||||
#endif /* __cplusplus */
|
||||
|
||||
#endif
|
||||
|
||||
303
cblas_noconst.h
Normal file
303
cblas_noconst.h
Normal file
@@ -0,0 +1,303 @@
|
||||
#ifndef CBLAS_H
|
||||
#define CBLAS_H
|
||||
|
||||
#include <stddef.h>
|
||||
#include "common.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
/* Assume C declarations for C++ */
|
||||
#endif /* __cplusplus */
|
||||
|
||||
/*Set the number of threads on runtime.*/
|
||||
void openblas_set_num_threads(int num_threads);
|
||||
void goto_set_num_threads(int num_threads);
|
||||
|
||||
/*Get the build configure on runtime.*/
|
||||
char* openblas_get_config(void);
|
||||
|
||||
/* Get the parallelization type which is used by OpenBLAS */
|
||||
int openblas_get_parallel(void);
|
||||
/* OpenBLAS is compiled for sequential use */
|
||||
#define OPENBLAS_SEQUENTIAL 0
|
||||
/* OpenBLAS is compiled using normal threading model */
|
||||
#define OPENBLAS_THREAD 1
|
||||
/* OpenBLAS is compiled using OpenMP threading model */
|
||||
#define OPENBLAS_OPENMP 2
|
||||
|
||||
|
||||
#define CBLAS_INDEX size_t
|
||||
|
||||
typedef enum CBLAS_ORDER {CblasRowMajor=101, CblasColMajor=102} CBLAS_ORDER;
|
||||
typedef enum CBLAS_TRANSPOSE {CblasNoTrans=111, CblasTrans=112, CblasConjTrans=113, CblasConjNoTrans=114} CBLAS_TRANSPOSE;
|
||||
typedef enum CBLAS_UPLO {CblasUpper=121, CblasLower=122} CBLAS_UPLO;
|
||||
typedef enum CBLAS_DIAG {CblasNonUnit=131, CblasUnit=132} CBLAS_DIAG;
|
||||
typedef enum CBLAS_SIDE {CblasLeft=141, CblasRight=142} CBLAS_SIDE;
|
||||
|
||||
float cblas_sdsdot(blasint n, float alpha, float *x, blasint incx, float *y, blasint incy);
|
||||
double cblas_dsdot (blasint n, float *x, blasint incx, float *y, blasint incy);
|
||||
float cblas_sdot(blasint n, float *x, blasint incx, float *y, blasint incy);
|
||||
double cblas_ddot(blasint n, double *x, blasint incx, double *y, blasint incy);
|
||||
|
||||
openblas_complex_float cblas_cdotu(blasint n, float *x, blasint incx, float *y, blasint incy);
|
||||
openblas_complex_float cblas_cdotc(blasint n, float *x, blasint incx, float *y, blasint incy);
|
||||
openblas_complex_double cblas_zdotu(blasint n, double *x, blasint incx, double *y, blasint incy);
|
||||
openblas_complex_double cblas_zdotc(blasint n, double *x, blasint incx, double *y, blasint incy);
|
||||
|
||||
void cblas_cdotu_sub(blasint n, float *x, blasint incx, float *y, blasint incy, openblas_complex_float *ret);
|
||||
void cblas_cdotc_sub(blasint n, float *x, blasint incx, float *y, blasint incy, openblas_complex_float *ret);
|
||||
void cblas_zdotu_sub(blasint n, double *x, blasint incx, double *y, blasint incy, openblas_complex_double *ret);
|
||||
void cblas_zdotc_sub(blasint n, double *x, blasint incx, double *y, blasint incy, openblas_complex_double *ret);
|
||||
|
||||
float cblas_sasum (blasint n, float *x, blasint incx);
|
||||
double cblas_dasum (blasint n, double *x, blasint incx);
|
||||
float cblas_scasum(blasint n, float *x, blasint incx);
|
||||
double cblas_dzasum(blasint n, double *x, blasint incx);
|
||||
|
||||
float cblas_snrm2 (blasint N, float *X, blasint incX);
|
||||
double cblas_dnrm2 (blasint N, double *X, blasint incX);
|
||||
float cblas_scnrm2(blasint N, float *X, blasint incX);
|
||||
double cblas_dznrm2(blasint N, double *X, blasint incX);
|
||||
|
||||
CBLAS_INDEX cblas_isamax(blasint n, float *x, blasint incx);
|
||||
CBLAS_INDEX cblas_idamax(blasint n, double *x, blasint incx);
|
||||
CBLAS_INDEX cblas_icamax(blasint n, float *x, blasint incx);
|
||||
CBLAS_INDEX cblas_izamax(blasint n, double *x, blasint incx);
|
||||
|
||||
void cblas_saxpy(blasint n, float alpha, float *x, blasint incx, float *y, blasint incy);
|
||||
void cblas_daxpy(blasint n, double alpha, double *x, blasint incx, double *y, blasint incy);
|
||||
void cblas_caxpy(blasint n, float *alpha, float *x, blasint incx, float *y, blasint incy);
|
||||
void cblas_zaxpy(blasint n, double *alpha, double *x, blasint incx, double *y, blasint incy);
|
||||
|
||||
void cblas_scopy(blasint n, float *x, blasint incx, float *y, blasint incy);
|
||||
void cblas_dcopy(blasint n, double *x, blasint incx, double *y, blasint incy);
|
||||
void cblas_ccopy(blasint n, float *x, blasint incx, float *y, blasint incy);
|
||||
void cblas_zcopy(blasint n, double *x, blasint incx, double *y, blasint incy);
|
||||
|
||||
void cblas_sswap(blasint n, float *x, blasint incx, float *y, blasint incy);
|
||||
void cblas_dswap(blasint n, double *x, blasint incx, double *y, blasint incy);
|
||||
void cblas_cswap(blasint n, float *x, blasint incx, float *y, blasint incy);
|
||||
void cblas_zswap(blasint n, double *x, blasint incx, double *y, blasint incy);
|
||||
|
||||
void cblas_srot(blasint N, float *X, blasint incX, float *Y, blasint incY, float c, float s);
|
||||
void cblas_drot(blasint N, double *X, blasint incX, double *Y, blasint incY, double c, double s);
|
||||
|
||||
void cblas_srotg(float *a, float *b, float *c, float *s);
|
||||
void cblas_drotg(double *a, double *b, double *c, double *s);
|
||||
|
||||
void cblas_srotm(blasint N, float *X, blasint incX, float *Y, blasint incY, float *P);
|
||||
void cblas_drotm(blasint N, double *X, blasint incX, double *Y, blasint incY, double *P);
|
||||
|
||||
void cblas_srotmg(float *d1, float *d2, float *b1, float b2, float *P);
|
||||
void cblas_drotmg(double *d1, double *d2, double *b1, double b2, double *P);
|
||||
|
||||
void cblas_sscal(blasint N, float alpha, float *X, blasint incX);
|
||||
void cblas_dscal(blasint N, double alpha, double *X, blasint incX);
|
||||
void cblas_cscal(blasint N, float *alpha, float *X, blasint incX);
|
||||
void cblas_zscal(blasint N, double *alpha, double *X, blasint incX);
|
||||
void cblas_csscal(blasint N, float alpha, float *X, blasint incX);
|
||||
void cblas_zdscal(blasint N, double alpha, double *X, blasint incX);
|
||||
|
||||
void cblas_sgemv(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE trans, blasint m, blasint n,
|
||||
float alpha, float *a, blasint lda, float *x, blasint incx, float beta, float *y, blasint incy);
|
||||
void cblas_dgemv(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE trans, blasint m, blasint n,
|
||||
double alpha, double *a, blasint lda, double *x, blasint incx, double beta, double *y, blasint incy);
|
||||
void cblas_cgemv(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE trans, blasint m, blasint n,
|
||||
float *alpha, float *a, blasint lda, float *x, blasint incx, float *beta, float *y, blasint incy);
|
||||
void cblas_zgemv(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE trans, blasint m, blasint n,
|
||||
double *alpha, double *a, blasint lda, double *x, blasint incx, double *beta, double *y, blasint incy);
|
||||
|
||||
void cblas_sger (enum CBLAS_ORDER order, blasint M, blasint N, float alpha, float *X, blasint incX, float *Y, blasint incY, float *A, blasint lda);
|
||||
void cblas_dger (enum CBLAS_ORDER order, blasint M, blasint N, double alpha, double *X, blasint incX, double *Y, blasint incY, double *A, blasint lda);
|
||||
void cblas_cgeru(enum CBLAS_ORDER order, blasint M, blasint N, float *alpha, float *X, blasint incX, float *Y, blasint incY, float *A, blasint lda);
|
||||
void cblas_cgerc(enum CBLAS_ORDER order, blasint M, blasint N, float *alpha, float *X, blasint incX, float *Y, blasint incY, float *A, blasint lda);
|
||||
void cblas_zgeru(enum CBLAS_ORDER order, blasint M, blasint N, double *alpha, double *X, blasint incX, double *Y, blasint incY, double *A, blasint lda);
|
||||
void cblas_zgerc(enum CBLAS_ORDER order, blasint M, blasint N, double *alpha, double *X, blasint incX, double *Y, blasint incY, double *A, blasint lda);
|
||||
|
||||
void cblas_strsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, blasint N, float *A, blasint lda, float *X, blasint incX);
|
||||
void cblas_dtrsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, blasint N, double *A, blasint lda, double *X, blasint incX);
|
||||
void cblas_ctrsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, blasint N, float *A, blasint lda, float *X, blasint incX);
|
||||
void cblas_ztrsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, blasint N, double *A, blasint lda, double *X, blasint incX);
|
||||
|
||||
void cblas_strmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, blasint N, float *A, blasint lda, float *X, blasint incX);
|
||||
void cblas_dtrmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, blasint N, double *A, blasint lda, double *X, blasint incX);
|
||||
void cblas_ctrmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, blasint N, float *A, blasint lda, float *X, blasint incX);
|
||||
void cblas_ztrmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, blasint N, double *A, blasint lda, double *X, blasint incX);
|
||||
|
||||
void cblas_ssyr(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, float alpha, float *X, blasint incX, float *A, blasint lda);
|
||||
void cblas_dsyr(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, double alpha, double *X, blasint incX, double *A, blasint lda);
|
||||
void cblas_cher(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, float alpha, float *X, blasint incX, float *A, blasint lda);
|
||||
void cblas_zher(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, double alpha, double *X, blasint incX, double *A, blasint lda);
|
||||
|
||||
void cblas_ssyr2(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo,blasint N, float alpha, float *X,
|
||||
blasint incX, float *Y, blasint incY, float *A, blasint lda);
|
||||
void cblas_dsyr2(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, double alpha, double *X,
|
||||
blasint incX, double *Y, blasint incY, double *A, blasint lda);
|
||||
void cblas_cher2(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, float *alpha, float *X, blasint incX,
|
||||
float *Y, blasint incY, float *A, blasint lda);
|
||||
void cblas_zher2(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, double *alpha, double *X, blasint incX,
|
||||
double *Y, blasint incY, double *A, blasint lda);
|
||||
|
||||
void cblas_sgbmv(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE TransA, blasint M, blasint N,
|
||||
blasint KL, blasint KU, float alpha, float *A, blasint lda, float *X, blasint incX, float beta, float *Y, blasint incY);
|
||||
void cblas_dgbmv(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE TransA, blasint M, blasint N,
|
||||
blasint KL, blasint KU, double alpha, double *A, blasint lda, double *X, blasint incX, double beta, double *Y, blasint incY);
|
||||
void cblas_cgbmv(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE TransA, blasint M, blasint N,
|
||||
blasint KL, blasint KU, float *alpha, float *A, blasint lda, float *X, blasint incX, float *beta, float *Y, blasint incY);
|
||||
void cblas_zgbmv(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE TransA, blasint M, blasint N,
|
||||
blasint KL, blasint KU, double *alpha, double *A, blasint lda, double *X, blasint incX, double *beta, double *Y, blasint incY);
|
||||
|
||||
void cblas_ssbmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, blasint K, float alpha, float *A,
|
||||
blasint lda, float *X, blasint incX, float beta, float *Y, blasint incY);
|
||||
void cblas_dsbmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, blasint K, double alpha, double *A,
|
||||
blasint lda, double *X, blasint incX, double beta, double *Y, blasint incY);
|
||||
|
||||
|
||||
void cblas_stbmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
|
||||
blasint N, blasint K, float *A, blasint lda, float *X, blasint incX);
|
||||
void cblas_dtbmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
|
||||
blasint N, blasint K, double *A, blasint lda, double *X, blasint incX);
|
||||
void cblas_ctbmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
|
||||
blasint N, blasint K, float *A, blasint lda, float *X, blasint incX);
|
||||
void cblas_ztbmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
|
||||
blasint N, blasint K, double *A, blasint lda, double *X, blasint incX);
|
||||
|
||||
void cblas_stbsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
|
||||
blasint N, blasint K, float *A, blasint lda, float *X, blasint incX);
|
||||
void cblas_dtbsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
|
||||
blasint N, blasint K, double *A, blasint lda, double *X, blasint incX);
|
||||
void cblas_ctbsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
|
||||
blasint N, blasint K, float *A, blasint lda, float *X, blasint incX);
|
||||
void cblas_ztbsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
|
||||
blasint N, blasint K, double *A, blasint lda, double *X, blasint incX);
|
||||
|
||||
void cblas_stpmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
|
||||
blasint N, float *Ap, float *X, blasint incX);
|
||||
void cblas_dtpmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
|
||||
blasint N, double *Ap, double *X, blasint incX);
|
||||
void cblas_ctpmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
|
||||
blasint N, float *Ap, float *X, blasint incX);
|
||||
void cblas_ztpmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
|
||||
blasint N, double *Ap, double *X, blasint incX);
|
||||
|
||||
void cblas_stpsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
|
||||
blasint N, float *Ap, float *X, blasint incX);
|
||||
void cblas_dtpsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
|
||||
blasint N, double *Ap, double *X, blasint incX);
|
||||
void cblas_ctpsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
|
||||
blasint N, float *Ap, float *X, blasint incX);
|
||||
void cblas_ztpsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
|
||||
blasint N, double *Ap, double *X, blasint incX);
|
||||
|
||||
void cblas_ssymv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, float alpha, float *A,
|
||||
blasint lda, float *X, blasint incX, float beta, float *Y, blasint incY);
|
||||
void cblas_dsymv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, double alpha, double *A,
|
||||
blasint lda, double *X, blasint incX, double beta, double *Y, blasint incY);
|
||||
void cblas_chemv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, float *alpha, float *A,
|
||||
blasint lda, float *X, blasint incX, float *beta, float *Y, blasint incY);
|
||||
void cblas_zhemv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, double *alpha, double *A,
|
||||
blasint lda, double *X, blasint incX, double *beta, double *Y, blasint incY);
|
||||
|
||||
|
||||
void cblas_sspmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, float alpha, float *Ap,
|
||||
float *X, blasint incX, float beta, float *Y, blasint incY);
|
||||
void cblas_dspmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, double alpha, double *Ap,
|
||||
double *X, blasint incX, double beta, double *Y, blasint incY);
|
||||
|
||||
void cblas_sspr(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, float alpha, float *X, blasint incX, float *Ap);
|
||||
void cblas_dspr(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, double alpha, double *X, blasint incX, double *Ap);
|
||||
|
||||
void cblas_chpr(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, float alpha, float *X, blasint incX, float *A);
|
||||
void cblas_zhpr(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, double alpha, double *X,blasint incX, double *A);
|
||||
|
||||
void cblas_sspr2(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, float alpha, float *X, blasint incX, float *Y, blasint incY, float *A);
|
||||
void cblas_dspr2(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, double alpha, double *X, blasint incX, double *Y, blasint incY, double *A);
|
||||
void cblas_chpr2(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, float *alpha, float *X, blasint incX, float *Y, blasint incY, float *Ap);
|
||||
void cblas_zhpr2(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, double *alpha, double *X, blasint incX, double *Y, blasint incY, double *Ap);
|
||||
|
||||
void cblas_chbmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, blasint K,
|
||||
float *alpha, float *A, blasint lda, float *X, blasint incX, float *beta, float *Y, blasint incY);
|
||||
void cblas_zhbmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, blasint K,
|
||||
double *alpha, double *A, blasint lda, double *X, blasint incX, double *beta, double *Y, blasint incY);
|
||||
|
||||
void cblas_chpmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N,
|
||||
float *alpha, float *Ap, float *X, blasint incX, float *beta, float *Y, blasint incY);
|
||||
void cblas_zhpmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N,
|
||||
double *alpha, double *Ap, double *X, blasint incX, double *beta, double *Y, blasint incY);
|
||||
|
||||
void cblas_sgemm(enum CBLAS_ORDER Order, enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANSPOSE TransB, blasint M, blasint N, blasint K,
|
||||
float alpha, float *A, blasint lda, float *B, blasint ldb, float beta, float *C, blasint ldc);
|
||||
void cblas_dgemm(enum CBLAS_ORDER Order, enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANSPOSE TransB, blasint M, blasint N, blasint K,
|
||||
double alpha, double *A, blasint lda, double *B, blasint ldb, double beta, double *C, blasint ldc);
|
||||
void cblas_cgemm(enum CBLAS_ORDER Order, enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANSPOSE TransB, blasint M, blasint N, blasint K,
|
||||
float *alpha, float *A, blasint lda, float *B, blasint ldb, float *beta, float *C, blasint ldc);
|
||||
void cblas_zgemm(enum CBLAS_ORDER Order, enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANSPOSE TransB, blasint M, blasint N, blasint K,
|
||||
double *alpha, double *A, blasint lda, double *B, blasint ldb, double *beta, double *C, blasint ldc);
|
||||
|
||||
void cblas_ssymm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, blasint M, blasint N,
|
||||
float alpha, float *A, blasint lda, float *B, blasint ldb, float beta, float *C, blasint ldc);
|
||||
void cblas_dsymm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, blasint M, blasint N,
|
||||
double alpha, double *A, blasint lda, double *B, blasint ldb, double beta, double *C, blasint ldc);
|
||||
void cblas_csymm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, blasint M, blasint N,
|
||||
float *alpha, float *A, blasint lda, float *B, blasint ldb, float *beta, float *C, blasint ldc);
|
||||
void cblas_zsymm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, blasint M, blasint N,
|
||||
double *alpha, double *A, blasint lda, double *B, blasint ldb, double *beta, double *C, blasint ldc);
|
||||
|
||||
void cblas_ssyrk(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans,
|
||||
blasint N, blasint K, float alpha, float *A, blasint lda, float beta, float *C, blasint ldc);
|
||||
void cblas_dsyrk(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans,
|
||||
blasint N, blasint K, double alpha, double *A, blasint lda, double beta, double *C, blasint ldc);
|
||||
void cblas_csyrk(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans,
|
||||
blasint N, blasint K, float *alpha, float *A, blasint lda, float *beta, float *C, blasint ldc);
|
||||
void cblas_zsyrk(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans,
|
||||
blasint N, blasint K, double *alpha, double *A, blasint lda, double *beta, double *C, blasint ldc);
|
||||
|
||||
void cblas_ssyr2k(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans,
|
||||
blasint N, blasint K, float alpha, float *A, blasint lda, float *B, blasint ldb, float beta, float *C, blasint ldc);
|
||||
void cblas_dsyr2k(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans,
|
||||
blasint N, blasint K, double alpha, double *A, blasint lda, double *B, blasint ldb, double beta, double *C, blasint ldc);
|
||||
void cblas_csyr2k(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans,
|
||||
blasint N, blasint K, float *alpha, float *A, blasint lda, float *B, blasint ldb, float *beta, float *C, blasint ldc);
|
||||
void cblas_zsyr2k(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans,
|
||||
blasint N, blasint K, double *alpha, double *A, blasint lda, double *B, blasint ldb, double *beta, double *C, blasint ldc);
|
||||
|
||||
void cblas_strmm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA,
|
||||
enum CBLAS_DIAG Diag, blasint M, blasint N, float alpha, float *A, blasint lda, float *B, blasint ldb);
|
||||
void cblas_dtrmm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA,
|
||||
enum CBLAS_DIAG Diag, blasint M, blasint N, double alpha, double *A, blasint lda, double *B, blasint ldb);
|
||||
void cblas_ctrmm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA,
|
||||
enum CBLAS_DIAG Diag, blasint M, blasint N, float *alpha, float *A, blasint lda, float *B, blasint ldb);
|
||||
void cblas_ztrmm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA,
|
||||
enum CBLAS_DIAG Diag, blasint M, blasint N, double *alpha, double *A, blasint lda, double *B, blasint ldb);
|
||||
|
||||
void cblas_strsm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA,
|
||||
enum CBLAS_DIAG Diag, blasint M, blasint N, float alpha, float *A, blasint lda, float *B, blasint ldb);
|
||||
void cblas_dtrsm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA,
|
||||
enum CBLAS_DIAG Diag, blasint M, blasint N, double alpha, double *A, blasint lda, double *B, blasint ldb);
|
||||
void cblas_ctrsm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA,
|
||||
enum CBLAS_DIAG Diag, blasint M, blasint N, float *alpha, float *A, blasint lda, float *B, blasint ldb);
|
||||
void cblas_ztrsm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA,
|
||||
enum CBLAS_DIAG Diag, blasint M, blasint N, double *alpha, double *A, blasint lda, double *B, blasint ldb);
|
||||
|
||||
void cblas_chemm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, blasint M, blasint N,
|
||||
float *alpha, float *A, blasint lda, float *B, blasint ldb, float *beta, float *C, blasint ldc);
|
||||
void cblas_zhemm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, blasint M, blasint N,
|
||||
double *alpha, double *A, blasint lda, double *B, blasint ldb, double *beta, double *C, blasint ldc);
|
||||
|
||||
void cblas_cherk(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans, blasint N, blasint K,
|
||||
float alpha, float *A, blasint lda, float beta, float *C, blasint ldc);
|
||||
void cblas_zherk(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans, blasint N, blasint K,
|
||||
double alpha, double *A, blasint lda, double beta, double *C, blasint ldc);
|
||||
|
||||
void cblas_cher2k(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans, blasint N, blasint K,
|
||||
float *alpha, float *A, blasint lda, float *B, blasint ldb, float beta, float *C, blasint ldc);
|
||||
void cblas_zher2k(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans, blasint N, blasint K,
|
||||
double *alpha, double *A, blasint lda, double *B, blasint ldb, double beta, double *C, blasint ldc);
|
||||
|
||||
void cblas_xerbla(blasint p, char *rout, char *form, ...);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif /* __cplusplus */
|
||||
|
||||
#endif
|
||||
75
common.h
75
common.h
@@ -68,7 +68,7 @@ extern "C" {
|
||||
#define SMP
|
||||
#endif
|
||||
|
||||
#if defined(OS_WINNT) || defined(OS_CYGWIN_NT) || defined(OS_Interix)
|
||||
#if defined(OS_WINNT) || defined(OS_CYGWIN_NT) || defined(OS_INTERIX)
|
||||
#define WINDOWS_ABI
|
||||
#define OS_WINDOWS
|
||||
|
||||
@@ -89,6 +89,10 @@ extern "C" {
|
||||
#include <sched.h>
|
||||
#endif
|
||||
|
||||
#if defined(OS_DARWIN) || defined(OS_FREEBSD) || defined(OS_NETBSD)
|
||||
#include <sched.h>
|
||||
#endif
|
||||
|
||||
#ifdef OS_WINDOWS
|
||||
#ifdef ATOM
|
||||
#define GOTO_ATOM ATOM
|
||||
@@ -306,10 +310,36 @@ typedef int blasint;
|
||||
#define YIELDING SwitchToThread()
|
||||
#endif
|
||||
|
||||
#if defined(ARMV7) || defined(ARMV6) || defined(ARMV8)
|
||||
#define YIELDING asm volatile ("nop;nop;nop;nop;nop;nop;nop;nop; \n");
|
||||
#endif
|
||||
|
||||
#ifdef PILEDRIVER
|
||||
#define YIELDING __asm__ __volatile__ ("nop;nop;nop;nop;nop;nop;nop;nop;\n");
|
||||
#endif
|
||||
|
||||
|
||||
#ifndef YIELDING
|
||||
#define YIELDING sched_yield()
|
||||
#endif
|
||||
|
||||
/***
|
||||
To alloc job_t on heap or statck.
|
||||
please https://github.com/xianyi/OpenBLAS/issues/246
|
||||
***/
|
||||
#if defined(OS_WINDOWS)
|
||||
#define GETRF_MEM_ALLOC_THRESHOLD 32
|
||||
#define BLAS3_MEM_ALLOC_THRESHOLD 32
|
||||
#endif
|
||||
|
||||
#ifndef GETRF_MEM_ALLOC_THRESHOLD
|
||||
#define GETRF_MEM_ALLOC_THRESHOLD 80
|
||||
#endif
|
||||
|
||||
#ifndef BLAS3_MEM_ALLOC_THRESHOLD
|
||||
#define BLAS3_MEM_ALLOC_THRESHOLD 160
|
||||
#endif
|
||||
|
||||
#ifdef QUAD_PRECISION
|
||||
#include "common_quad.h"
|
||||
#endif
|
||||
@@ -342,12 +372,26 @@ typedef int blasint;
|
||||
#include "common_mips64.h"
|
||||
#endif
|
||||
|
||||
#ifdef ARCH_ARM
|
||||
#include "common_arm.h"
|
||||
#endif
|
||||
|
||||
#ifdef ARCH_ARM64
|
||||
#include "common_arm64.h"
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef OS_LINUX
|
||||
#include "common_linux.h"
|
||||
#endif
|
||||
|
||||
#define MMAP_ACCESS (PROT_READ | PROT_WRITE)
|
||||
|
||||
#ifdef __NetBSD__
|
||||
#define MMAP_POLICY (MAP_PRIVATE | MAP_ANON)
|
||||
#else
|
||||
#define MMAP_POLICY (MAP_PRIVATE | MAP_ANONYMOUS)
|
||||
#endif
|
||||
|
||||
#include "param.h"
|
||||
#include "common_param.h"
|
||||
@@ -370,6 +414,31 @@ typedef int blasint;
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifndef ASSEMBLER
|
||||
#ifndef NOINCLUDE
|
||||
/* Inclusion of a standard header file is needed for definition of __STDC_*
|
||||
predefined macros with some compilers (e.g. GCC 4.7 on Linux). This occurs
|
||||
as a side effect of including either <features.h> or <stdc-predef.h>. */
|
||||
#include <stdio.h>
|
||||
#endif // NOINCLUDE
|
||||
|
||||
/* C99 supports complex floating numbers natively, which GCC also offers as an
|
||||
extension since version 3.0. If neither are available, use a compatible
|
||||
structure as fallback (see Clause 6.2.5.13 of the C99 standard). */
|
||||
#if (defined(__STDC_IEC_559_COMPLEX__) || __STDC_VERSION__ >= 199901L || \
|
||||
(__GNUC__ >= 3 && !defined(__cplusplus)))
|
||||
#define OPENBLAS_COMPLEX_C99
|
||||
typedef float _Complex openblas_complex_float;
|
||||
typedef double _Complex openblas_complex_double;
|
||||
typedef xdouble _Complex openblas_complex_xdouble;
|
||||
#else
|
||||
#define OPENBLAS_COMPLEX_STRUCT
|
||||
typedef struct { float real, imag; } openblas_complex_float;
|
||||
typedef struct { double real, imag; } openblas_complex_double;
|
||||
typedef struct { xdouble real, imag; } openblas_complex_xdouble;
|
||||
#endif
|
||||
#endif // ASSEMBLER
|
||||
|
||||
#ifndef IFLUSH
|
||||
#define IFLUSH
|
||||
#endif
|
||||
@@ -523,8 +592,10 @@ typedef struct {
|
||||
#include "common_level2.h"
|
||||
#include "common_level3.h"
|
||||
#include "common_lapack.h"
|
||||
|
||||
#ifdef CBLAS
|
||||
#include "cblas.h"
|
||||
# define OPENBLAS_CONST /* see comment in cblas.h */
|
||||
# include "cblas.h"
|
||||
#endif
|
||||
|
||||
#ifndef ASSEMBLER
|
||||
|
||||
@@ -150,9 +150,17 @@ REALNAME:
|
||||
#define PROFCODE .prologue 0
|
||||
#endif
|
||||
|
||||
#if defined(__linux__) && defined(__ELF__)
|
||||
#define GNUSTACK .section .note.GNU-stack,"",@progbits
|
||||
#else
|
||||
#define GNUSTACK
|
||||
#endif
|
||||
|
||||
#define EPILOGUE \
|
||||
.end REALNAME; \
|
||||
.ident VERSION
|
||||
.ident VERSION; \
|
||||
GNUSTACK
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef DOUBLE
|
||||
|
||||
169
common_arm.h
Normal file
169
common_arm.h
Normal file
@@ -0,0 +1,169 @@
|
||||
/*****************************************************************************
|
||||
Copyright (c) 2011, Lab of Parallel Software and Computational Science,ICSAS
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the ISCAS nor the names of its contributors may
|
||||
be used to endorse or promote products derived from this software
|
||||
without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
**********************************************************************************/
|
||||
|
||||
/*********************************************************************/
|
||||
/* Copyright 2009, 2010 The University of Texas at Austin. */
|
||||
/* All rights reserved. */
|
||||
/* */
|
||||
/* Redistribution and use in source and binary forms, with or */
|
||||
/* without modification, are permitted provided that the following */
|
||||
/* conditions are met: */
|
||||
/* */
|
||||
/* 1. Redistributions of source code must retain the above */
|
||||
/* copyright notice, this list of conditions and the following */
|
||||
/* disclaimer. */
|
||||
/* */
|
||||
/* 2. Redistributions in binary form must reproduce the above */
|
||||
/* copyright notice, this list of conditions and the following */
|
||||
/* disclaimer in the documentation and/or other materials */
|
||||
/* provided with the distribution. */
|
||||
/* */
|
||||
/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
|
||||
/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
|
||||
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
|
||||
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
|
||||
/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
|
||||
/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
|
||||
/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
|
||||
/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
|
||||
/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
|
||||
/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
|
||||
/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
|
||||
/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
|
||||
/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
|
||||
/* POSSIBILITY OF SUCH DAMAGE. */
|
||||
/* */
|
||||
/* The views and conclusions contained in the software and */
|
||||
/* documentation are those of the authors and should not be */
|
||||
/* interpreted as representing official policies, either expressed */
|
||||
/* or implied, of The University of Texas at Austin. */
|
||||
/*********************************************************************/
|
||||
|
||||
#ifndef COMMON_ARM
|
||||
#define COMMON_ARM
|
||||
|
||||
#define MB
|
||||
#define WMB
|
||||
|
||||
#define INLINE inline
|
||||
|
||||
#define RETURN_BY_COMPLEX
|
||||
|
||||
#ifndef ASSEMBLER
|
||||
|
||||
static void __inline blas_lock(volatile BLASULONG *address){
|
||||
|
||||
int register ret;
|
||||
|
||||
do {
|
||||
while (*address) {YIELDING;};
|
||||
|
||||
__asm__ __volatile__(
|
||||
"ldrex r2, [%1] \n\t"
|
||||
"mov r2, #0 \n\t"
|
||||
"strex r3, r2, [%1] \n\t"
|
||||
"mov %0 , r3 \n\t"
|
||||
: "=r"(ret), "=r"(address)
|
||||
: "1"(address)
|
||||
: "memory", "r2" , "r3"
|
||||
|
||||
|
||||
);
|
||||
|
||||
} while (ret);
|
||||
|
||||
}
|
||||
|
||||
|
||||
static inline unsigned long long rpcc(void){
|
||||
unsigned long long ret=0;
|
||||
double v;
|
||||
struct timeval tv;
|
||||
gettimeofday(&tv,NULL);
|
||||
v=(double) tv.tv_sec + (double) tv.tv_usec * 1e-6;
|
||||
ret = (unsigned long long) ( v * 1000.0d );
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline int blas_quickdivide(blasint x, blasint y){
|
||||
return x / y;
|
||||
}
|
||||
|
||||
#if defined(DOUBLE)
|
||||
#define GET_IMAGE(res) __asm__ __volatile__("vstr.f64 d1, %0" : "=m"(res) : : "memory")
|
||||
#else
|
||||
#define GET_IMAGE(res) __asm__ __volatile__("vstr.f32 s1, %0" : "=m"(res) : : "memory")
|
||||
#endif
|
||||
|
||||
#define GET_IMAGE_CANCEL
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
#ifndef F_INTERFACE
|
||||
#define REALNAME ASMNAME
|
||||
#else
|
||||
#define REALNAME ASMFNAME
|
||||
#endif
|
||||
|
||||
#if defined(ASSEMBLER) && !defined(NEEDPARAM)
|
||||
|
||||
#define PROLOGUE \
|
||||
.arm ;\
|
||||
.global REALNAME ;\
|
||||
.func REALNAME ;\
|
||||
REALNAME:
|
||||
|
||||
#define EPILOGUE
|
||||
|
||||
#define PROFCODE
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
#define SEEK_ADDRESS
|
||||
|
||||
#ifndef PAGESIZE
|
||||
#define PAGESIZE ( 4 << 10)
|
||||
#endif
|
||||
#define HUGE_PAGESIZE ( 4 << 20)
|
||||
|
||||
#define BUFFER_SIZE (16 << 20)
|
||||
|
||||
|
||||
#define BASE_ADDRESS (START_ADDRESS - BUFFER_SIZE * MAX_CPU_NUMBER)
|
||||
|
||||
#ifndef MAP_ANONYMOUS
|
||||
#define MAP_ANONYMOUS MAP_ANON
|
||||
#endif
|
||||
|
||||
#endif
|
||||
169
common_arm64.h
Normal file
169
common_arm64.h
Normal file
@@ -0,0 +1,169 @@
|
||||
/*****************************************************************************
|
||||
Copyright (c) 2011, Lab of Parallel Software and Computational Science,ICSAS
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the ISCAS nor the names of its contributors may
|
||||
be used to endorse or promote products derived from this software
|
||||
without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
**********************************************************************************/
|
||||
|
||||
/*********************************************************************/
|
||||
/* Copyright 2009, 2010 The University of Texas at Austin. */
|
||||
/* All rights reserved. */
|
||||
/* */
|
||||
/* Redistribution and use in source and binary forms, with or */
|
||||
/* without modification, are permitted provided that the following */
|
||||
/* conditions are met: */
|
||||
/* */
|
||||
/* 1. Redistributions of source code must retain the above */
|
||||
/* copyright notice, this list of conditions and the following */
|
||||
/* disclaimer. */
|
||||
/* */
|
||||
/* 2. Redistributions in binary form must reproduce the above */
|
||||
/* copyright notice, this list of conditions and the following */
|
||||
/* disclaimer in the documentation and/or other materials */
|
||||
/* provided with the distribution. */
|
||||
/* */
|
||||
/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
|
||||
/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
|
||||
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
|
||||
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
|
||||
/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
|
||||
/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
|
||||
/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
|
||||
/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
|
||||
/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
|
||||
/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
|
||||
/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
|
||||
/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
|
||||
/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
|
||||
/* POSSIBILITY OF SUCH DAMAGE. */
|
||||
/* */
|
||||
/* The views and conclusions contained in the software and */
|
||||
/* documentation are those of the authors and should not be */
|
||||
/* interpreted as representing official policies, either expressed */
|
||||
/* or implied, of The University of Texas at Austin. */
|
||||
/*********************************************************************/
|
||||
|
||||
#ifndef COMMON_ARM64
|
||||
#define COMMON_ARM64
|
||||
|
||||
#define MB
|
||||
#define WMB
|
||||
|
||||
#define INLINE inline
|
||||
|
||||
#define RETURN_BY_COMPLEX
|
||||
|
||||
#ifndef ASSEMBLER
|
||||
|
||||
static void __inline blas_lock(volatile BLASULONG *address){
|
||||
/*
|
||||
int register ret;
|
||||
|
||||
do {
|
||||
while (*address) {YIELDING;};
|
||||
|
||||
__asm__ __volatile__(
|
||||
"ldrex r2, [%1] \n\t"
|
||||
"mov r2, #0 \n\t"
|
||||
"strex r3, r2, [%1] \n\t"
|
||||
"mov %0 , r3 \n\t"
|
||||
: "=r"(ret), "=r"(address)
|
||||
: "1"(address)
|
||||
: "memory", "r2" , "r3"
|
||||
|
||||
|
||||
);
|
||||
|
||||
} while (ret);
|
||||
*/
|
||||
}
|
||||
|
||||
|
||||
static inline unsigned long long rpcc(void){
|
||||
unsigned long long ret=0;
|
||||
double v;
|
||||
struct timeval tv;
|
||||
gettimeofday(&tv,NULL);
|
||||
v=(double) tv.tv_sec + (double) tv.tv_usec * 1e-6;
|
||||
ret = (unsigned long long) ( v * 1000.0d );
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline int blas_quickdivide(blasint x, blasint y){
|
||||
return x / y;
|
||||
}
|
||||
|
||||
#if defined(DOUBLE)
|
||||
#define GET_IMAGE(res) __asm__ __volatile__("vstr.f64 d1, %0" : "=m"(res) : : "memory")
|
||||
#else
|
||||
#define GET_IMAGE(res) __asm__ __volatile__("vstr.f32 s1, %0" : "=m"(res) : : "memory")
|
||||
#endif
|
||||
|
||||
#define GET_IMAGE_CANCEL
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
#ifndef F_INTERFACE
|
||||
#define REALNAME ASMNAME
|
||||
#else
|
||||
#define REALNAME ASMFNAME
|
||||
#endif
|
||||
|
||||
#if defined(ASSEMBLER) && !defined(NEEDPARAM)
|
||||
|
||||
#define PROLOGUE \
|
||||
.arm ;\
|
||||
.global REALNAME ;\
|
||||
.func REALNAME ;\
|
||||
REALNAME:
|
||||
|
||||
#define EPILOGUE
|
||||
|
||||
#define PROFCODE
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
#define SEEK_ADDRESS
|
||||
|
||||
#ifndef PAGESIZE
|
||||
#define PAGESIZE ( 4 << 10)
|
||||
#endif
|
||||
#define HUGE_PAGESIZE ( 4 << 20)
|
||||
|
||||
#define BUFFER_SIZE (16 << 20)
|
||||
|
||||
|
||||
#define BASE_ADDRESS (START_ADDRESS - BUFFER_SIZE * MAX_CPU_NUMBER)
|
||||
|
||||
#ifndef MAP_ANONYMOUS
|
||||
#define MAP_ANONYMOUS MAP_ANON
|
||||
#endif
|
||||
|
||||
#endif
|
||||
@@ -379,8 +379,15 @@ REALNAME:
|
||||
#define PROFCODE
|
||||
#endif
|
||||
|
||||
#if defined(__linux__) && defined(__ELF__)
|
||||
#define GNUSTACK .section .note.GNU-stack,"",@progbits
|
||||
#else
|
||||
#define GNUSTACK
|
||||
#endif
|
||||
|
||||
#define EPILOGUE \
|
||||
.endp REALNAME
|
||||
.endp REALNAME ; \
|
||||
GNUSTACK
|
||||
|
||||
#define START_ADDRESS 0x20000fc800000000UL
|
||||
|
||||
|
||||
@@ -38,8 +38,15 @@
|
||||
|
||||
#ifndef ASSEMBLER
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
/* Assume C declarations for C++ */
|
||||
#endif /* __cplusplus */
|
||||
|
||||
int BLASFUNC(xerbla)(char *, blasint *info, blasint);
|
||||
|
||||
void openblas_set_num_threads_(int *);
|
||||
|
||||
FLOATRET BLASFUNC(sdot) (blasint *, float *, blasint *, float *, blasint *);
|
||||
FLOATRET BLASFUNC(sdsdot)(blasint *, float *, float *, blasint *, float *, blasint *);
|
||||
|
||||
@@ -69,19 +76,19 @@ myxcomplex_t BLASFUNC(xdotu) (blasint *, xdouble *, blasint *, xdouble *,
|
||||
myxcomplex_t BLASFUNC(xdotc) (blasint *, xdouble *, blasint *, xdouble *, blasint *);
|
||||
|
||||
#elif defined RETURN_BY_STACK
|
||||
void BLASFUNC(cdotu) (float _Complex *, blasint *, float * , blasint *, float *, blasint *);
|
||||
void BLASFUNC(cdotc) (float _Complex *, blasint *, float *, blasint *, float *, blasint *);
|
||||
void BLASFUNC(zdotu) (double _Complex *, blasint *, double *, blasint *, double *, blasint *);
|
||||
void BLASFUNC(zdotc) (double _Complex *, blasint *, double *, blasint *, double *, blasint *);
|
||||
void BLASFUNC(xdotu) (xdouble _Complex *, blasint *, xdouble *, blasint *, xdouble *, blasint *);
|
||||
void BLASFUNC(xdotc) (xdouble _Complex *, blasint *, xdouble *, blasint *, xdouble *, blasint *);
|
||||
void BLASFUNC(cdotu) (openblas_complex_float *, blasint *, float * , blasint *, float *, blasint *);
|
||||
void BLASFUNC(cdotc) (openblas_complex_float *, blasint *, float *, blasint *, float *, blasint *);
|
||||
void BLASFUNC(zdotu) (openblas_complex_double *, blasint *, double *, blasint *, double *, blasint *);
|
||||
void BLASFUNC(zdotc) (openblas_complex_double *, blasint *, double *, blasint *, double *, blasint *);
|
||||
void BLASFUNC(xdotu) (openblas_complex_xdouble *, blasint *, xdouble *, blasint *, xdouble *, blasint *);
|
||||
void BLASFUNC(xdotc) (openblas_complex_xdouble *, blasint *, xdouble *, blasint *, xdouble *, blasint *);
|
||||
#else
|
||||
float _Complex BLASFUNC(cdotu) (blasint *, float *, blasint *, float *, blasint *);
|
||||
float _Complex BLASFUNC(cdotc) (blasint *, float *, blasint *, float *, blasint *);
|
||||
double _Complex BLASFUNC(zdotu) (blasint *, double *, blasint *, double *, blasint *);
|
||||
double _Complex BLASFUNC(zdotc) (blasint *, double *, blasint *, double *, blasint *);
|
||||
xdouble _Complex BLASFUNC(xdotu) (blasint *, xdouble *, blasint *, xdouble *, blasint *);
|
||||
xdouble _Complex BLASFUNC(xdotc) (blasint *, xdouble *, blasint *, xdouble *, blasint *);
|
||||
openblas_complex_float BLASFUNC(cdotu) (blasint *, float *, blasint *, float *, blasint *);
|
||||
openblas_complex_float BLASFUNC(cdotc) (blasint *, float *, blasint *, float *, blasint *);
|
||||
openblas_complex_double BLASFUNC(zdotu) (blasint *, double *, blasint *, double *, blasint *);
|
||||
openblas_complex_double BLASFUNC(zdotc) (blasint *, double *, blasint *, double *, blasint *);
|
||||
openblas_complex_xdouble BLASFUNC(xdotu) (blasint *, xdouble *, blasint *, xdouble *, blasint *);
|
||||
openblas_complex_xdouble BLASFUNC(xdotc) (blasint *, xdouble *, blasint *, xdouble *, blasint *);
|
||||
#endif
|
||||
|
||||
void BLASFUNC(saxpy) (blasint *, float *, float *, blasint *, float *, blasint *);
|
||||
@@ -635,6 +642,8 @@ int BLASFUNC(zgemc)(char *, char *, blasint *, blasint *, blasint *, double *,
|
||||
int BLASFUNC(xgemc)(char *, char *, blasint *, blasint *, blasint *, xdouble *,
|
||||
xdouble *, blasint *, xdouble *, blasint *, xdouble *, blasint *, xdouble *, xdouble *, blasint *);
|
||||
|
||||
/* Lapack routines */
|
||||
|
||||
int BLASFUNC(sgetf2)(blasint *, blasint *, float *, blasint *, blasint *, blasint *);
|
||||
int BLASFUNC(dgetf2)(blasint *, blasint *, double *, blasint *, blasint *, blasint *);
|
||||
int BLASFUNC(qgetf2)(blasint *, blasint *, xdouble *, blasint *, blasint *, blasint *);
|
||||
@@ -670,6 +679,13 @@ int BLASFUNC(cgesv)(blasint *, blasint *, float *, blasint *, blasint *, float
|
||||
int BLASFUNC(zgesv)(blasint *, blasint *, double *, blasint *, blasint *, double*, blasint *, blasint *);
|
||||
int BLASFUNC(xgesv)(blasint *, blasint *, xdouble *, blasint *, blasint *, xdouble*, blasint *, blasint *);
|
||||
|
||||
int BLASFUNC(sgesvd)(char *, char *, blasint *, blasint *, float *, blasint *, float *, float *, blasint *, float *, blasint *, float *, blasint *, blasint *);
|
||||
int BLASFUNC(dgesvd)(char *, char *, blasint *, blasint *, double *, blasint *, double *, double *, blasint *, double *, blasint *, double *, blasint *, blasint *);
|
||||
int BLASFUNC(qgesvd)(char *, char *, blasint *, blasint *, xdouble *, blasint *, xdouble *, xdouble *, blasint *, xdouble *, blasint *, xdouble *, blasint *, blasint *);
|
||||
int BLASFUNC(cgesvd)(char *, char *, blasint *, blasint *, float *, blasint *, float *, float *, blasint *, float *, blasint *, float *, blasint *, blasint *);
|
||||
int BLASFUNC(zgesvd)(char *, char *, blasint *, blasint *, double *, blasint *, double *, double *, blasint *, double *, blasint *, double *, blasint *, blasint *);
|
||||
int BLASFUNC(xgesvd)(char *, char *, blasint *, blasint *, xdouble *, blasint *, xdouble *, xdouble *, blasint *, xdouble *, blasint *, xdouble *, blasint *, blasint *);
|
||||
|
||||
int BLASFUNC(spotf2)(char *, blasint *, float *, blasint *, blasint *);
|
||||
int BLASFUNC(dpotf2)(char *, blasint *, double *, blasint *, blasint *);
|
||||
int BLASFUNC(qpotf2)(char *, blasint *, xdouble *, blasint *, blasint *);
|
||||
@@ -684,6 +700,13 @@ int BLASFUNC(cpotrf)(char *, blasint *, float *, blasint *, blasint *);
|
||||
int BLASFUNC(zpotrf)(char *, blasint *, double *, blasint *, blasint *);
|
||||
int BLASFUNC(xpotrf)(char *, blasint *, xdouble *, blasint *, blasint *);
|
||||
|
||||
int BLASFUNC(spotrs)(char *, blasint *, blasint *, float *, blasint *, float *, blasint *, blasint *);
|
||||
int BLASFUNC(dpotrs)(char *, blasint *, blasint *, double *, blasint *, double *, blasint *, blasint *);
|
||||
int BLASFUNC(qpotrs)(char *, blasint *, blasint *, xdouble *, blasint *, xdouble *, blasint *, blasint *);
|
||||
int BLASFUNC(cpotrs)(char *, blasint *, blasint *, float *, blasint *, float *, blasint *, blasint *);
|
||||
int BLASFUNC(zpotrs)(char *, blasint *, blasint *, double *, blasint *, double *, blasint *, blasint *);
|
||||
int BLASFUNC(xpotrs)(char *, blasint *, blasint *, xdouble *, blasint *, xdouble *, blasint *, blasint *);
|
||||
|
||||
int BLASFUNC(slauu2)(char *, blasint *, float *, blasint *, blasint *);
|
||||
int BLASFUNC(dlauu2)(char *, blasint *, double *, blasint *, blasint *);
|
||||
int BLASFUNC(qlauu2)(char *, blasint *, xdouble *, blasint *, blasint *);
|
||||
@@ -733,4 +756,10 @@ xdouble BLASFUNC(qlamch)(char *);
|
||||
FLOATRET BLASFUNC(slamc3)(float *, float *);
|
||||
double BLASFUNC(dlamc3)(double *, double *);
|
||||
xdouble BLASFUNC(qlamc3)(xdouble *, xdouble *);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
||||
#endif /* __cplusplus */
|
||||
|
||||
#endif
|
||||
|
||||
@@ -65,20 +65,47 @@ extern long int syscall (long int __sysno, ...);
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
static inline int my_mbind(void *addr, unsigned long len, int mode,
|
||||
unsigned long *nodemask, unsigned long maxnode,
|
||||
unsigned flags) {
|
||||
#if defined (__LSB_VERSION__)
|
||||
// So far, LSB (Linux Standard Base) don't support syscall().
|
||||
// https://lsbbugs.linuxfoundation.org/show_bug.cgi?id=3482
|
||||
return 0;
|
||||
#else
|
||||
#if defined (LOONGSON3B)
|
||||
#if defined (__64BIT__)
|
||||
return syscall(SYS_mbind, addr, len, mode, nodemask, maxnode, flags);
|
||||
#else
|
||||
return 0; //NULL Implementation on Loongson 3B 32bit.
|
||||
#endif
|
||||
#else
|
||||
//Fixed randomly SEGFAULT when nodemask==NULL with above Linux 2.6.34
|
||||
unsigned long null_nodemask=0;
|
||||
return syscall(SYS_mbind, addr, len, mode, &null_nodemask, maxnode, flags);
|
||||
// unsigned long null_nodemask=0;
|
||||
return syscall(SYS_mbind, addr, len, mode, nodemask, maxnode, flags);
|
||||
#endif
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline int my_set_mempolicy(int mode, const unsigned long *addr, unsigned long flag) {
|
||||
|
||||
#if defined (__LSB_VERSION__)
|
||||
// So far, LSB (Linux Standard Base) don't support syscall().
|
||||
// https://lsbbugs.linuxfoundation.org/show_bug.cgi?id=3482
|
||||
return 0;
|
||||
#else
|
||||
return syscall(SYS_set_mempolicy, mode, addr, flag);
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline int my_gettid(void) { return syscall(SYS_gettid); }
|
||||
static inline int my_gettid(void) {
|
||||
#ifdef SYS_gettid
|
||||
return syscall(SYS_gettid);
|
||||
#else
|
||||
return getpid();
|
||||
#endif
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
||||
@@ -2127,7 +2127,9 @@
|
||||
#endif
|
||||
|
||||
#ifndef ASSEMBLER
|
||||
#if defined(ARCH_X86) || defined(ARCH_X86_64) || defined(ARCH_IA64)
|
||||
#if defined(ARCH_X86) || defined(ARCH_X86_64) || defined(ARCH_IA64) || defined(ARCH_MIPS64)
|
||||
extern BLASLONG gemm_offset_a;
|
||||
extern BLASLONG gemm_offset_b;
|
||||
extern BLASLONG sgemm_p;
|
||||
extern BLASLONG sgemm_q;
|
||||
extern BLASLONG sgemm_r;
|
||||
|
||||
@@ -101,10 +101,15 @@ static void INLINE blas_lock(volatile unsigned long *address){
|
||||
|
||||
static inline unsigned int rpcc(void){
|
||||
unsigned long ret;
|
||||
#if defined(LOONGSON3A)
|
||||
unsigned long long tmp;
|
||||
__asm__ __volatile__("dmfc0 %0, $25, 1": "=r"(tmp):: "memory");
|
||||
ret=tmp;
|
||||
#if defined(LOONGSON3A) || defined(LOONGSON3B)
|
||||
// unsigned long long tmp;
|
||||
//__asm__ __volatile__("dmfc0 %0, $25, 1": "=r"(tmp):: "memory");
|
||||
//ret=tmp;
|
||||
__asm__ __volatile__(".set push \n"
|
||||
".set mips32r2\n"
|
||||
"rdhwr %0, $2\n"
|
||||
".set pop": "=r"(ret):: "memory");
|
||||
|
||||
#else
|
||||
__asm__ __volatile__(".set push \n"
|
||||
".set mips32r2\n"
|
||||
@@ -114,6 +119,21 @@ static inline unsigned int rpcc(void){
|
||||
return ret;
|
||||
}
|
||||
|
||||
#if defined(LOONGSON3A) || defined(LOONGSON3B)
|
||||
#ifndef NO_AFFINITY
|
||||
#define WHEREAMI
|
||||
static inline int WhereAmI(void){
|
||||
int ret=0;
|
||||
__asm__ __volatile__(".set push \n"
|
||||
".set mips32r2\n"
|
||||
"rdhwr %0, $0\n"
|
||||
".set pop": "=r"(ret):: "memory");
|
||||
return ret;
|
||||
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
||||
static inline int blas_quickdivide(blasint x, blasint y){
|
||||
return x / y;
|
||||
}
|
||||
@@ -152,6 +172,7 @@ static inline int blas_quickdivide(blasint x, blasint y){
|
||||
#define CMPEQ c.eq.d
|
||||
#define CMPLE c.le.d
|
||||
#define CMPLT c.lt.d
|
||||
#define NEG neg.d
|
||||
#else
|
||||
#define LD lwc1
|
||||
#define ST swc1
|
||||
@@ -170,6 +191,14 @@ static inline int blas_quickdivide(blasint x, blasint y){
|
||||
#define CMPEQ c.eq.s
|
||||
#define CMPLE c.le.s
|
||||
#define CMPLT c.lt.s
|
||||
#define PLU plu.ps
|
||||
#define PLL pll.ps
|
||||
#define PUU puu.ps
|
||||
#define PUL pul.ps
|
||||
#define MADPS madd.ps
|
||||
#define CVTU cvt.s.pu
|
||||
#define CVTL cvt.s.pl
|
||||
#define NEG neg.s
|
||||
#endif
|
||||
|
||||
#if defined(__64BIT__) && defined(USE64BITINT)
|
||||
@@ -206,10 +235,17 @@ REALNAME: ;\
|
||||
.set noreorder ;\
|
||||
.set nomacro
|
||||
|
||||
#if defined(__linux__) && defined(__ELF__)
|
||||
#define GNUSTACK .section .note.GNU-stack,"",@progbits
|
||||
#else
|
||||
#define GNUSTACK
|
||||
#endif
|
||||
|
||||
#define EPILOGUE \
|
||||
.set macro ;\
|
||||
.set reorder ;\
|
||||
.end REALNAME
|
||||
.end REALNAME ;\
|
||||
GNUSTACK
|
||||
|
||||
#define PROFCODE
|
||||
#endif
|
||||
@@ -218,13 +254,18 @@ REALNAME: ;\
|
||||
|
||||
#define SEEK_ADDRESS
|
||||
|
||||
#define BUFFER_SIZE ( 8 << 20)
|
||||
#define BUFFER_SIZE ( 32 << 20)
|
||||
|
||||
#if defined(LOONGSON3A)
|
||||
#define PAGESIZE (16UL << 10)
|
||||
#define FIXED_PAGESIZE (16UL << 10)
|
||||
#endif
|
||||
|
||||
#if defined(LOONGSON3B)
|
||||
#define PAGESIZE (16UL << 10)
|
||||
#define FIXED_PAGESIZE (16UL << 10)
|
||||
#endif
|
||||
|
||||
#ifndef PAGESIZE
|
||||
#define PAGESIZE (64UL << 10)
|
||||
#endif
|
||||
@@ -236,7 +277,7 @@ REALNAME: ;\
|
||||
#define MAP_ANONYMOUS MAP_ANON
|
||||
#endif
|
||||
|
||||
#if defined(LOONGSON3A)
|
||||
#if defined(LOONGSON3A) || defined(LOONGSON3B)
|
||||
#define PREFETCHD_(x) ld $0, x
|
||||
#define PREFETCHD(x) PREFETCHD_(x)
|
||||
#else
|
||||
|
||||
@@ -44,6 +44,7 @@
|
||||
#ifdef DYNAMIC_ARCH
|
||||
|
||||
typedef struct {
|
||||
int dtb_entries;
|
||||
int offsetA, offsetB, align;
|
||||
|
||||
int sgemm_p, sgemm_q, sgemm_r;
|
||||
@@ -813,6 +814,7 @@ BLASLONG (*ixamin_k)(BLASLONG, xdouble *, BLASLONG);
|
||||
|
||||
extern gotoblas_t *gotoblas;
|
||||
|
||||
#define DTB_ENTRIES gotoblas -> dtb_entries
|
||||
#define GEMM_OFFSET_A gotoblas -> offsetA
|
||||
#define GEMM_OFFSET_B gotoblas -> offsetB
|
||||
#define GEMM_ALIGN gotoblas -> align
|
||||
@@ -863,6 +865,8 @@ extern gotoblas_t *gotoblas;
|
||||
|
||||
#else
|
||||
|
||||
#define DTB_ENTRIES DTB_DEFAULT_ENTRIES
|
||||
|
||||
#define GEMM_OFFSET_A GEMM_DEFAULT_OFFSET_A
|
||||
#define GEMM_OFFSET_B GEMM_DEFAULT_OFFSET_B
|
||||
#define GEMM_ALIGN GEMM_DEFAULT_ALIGN
|
||||
@@ -997,14 +1001,14 @@ extern gotoblas_t *gotoblas;
|
||||
#endif
|
||||
|
||||
#ifdef XDOUBLE
|
||||
#define GEMM3M_UNROLL_M QGEMM_DEFAULT_UNROLL_M
|
||||
#define GEMM3M_UNROLL_N QGEMM_DEFAULT_UNROLL_N
|
||||
#define GEMM3M_UNROLL_M QGEMM_UNROLL_M
|
||||
#define GEMM3M_UNROLL_N QGEMM_UNROLL_N
|
||||
#elif defined(DOUBLE)
|
||||
#define GEMM3M_UNROLL_M DGEMM_DEFAULT_UNROLL_M
|
||||
#define GEMM3M_UNROLL_N DGEMM_DEFAULT_UNROLL_N
|
||||
#define GEMM3M_UNROLL_M DGEMM_UNROLL_M
|
||||
#define GEMM3M_UNROLL_N DGEMM_UNROLL_N
|
||||
#else
|
||||
#define GEMM3M_UNROLL_M SGEMM_DEFAULT_UNROLL_M
|
||||
#define GEMM3M_UNROLL_N SGEMM_DEFAULT_UNROLL_N
|
||||
#define GEMM3M_UNROLL_M SGEMM_UNROLL_M
|
||||
#define GEMM3M_UNROLL_N SGEMM_UNROLL_N
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
@@ -63,5 +63,7 @@ double _Complex BLASFUNC_REF(zdotc) (blasint *, double *, blasint *, double
|
||||
void BLASFUNC_REF(drotmg)(double *, double *, double *, double *, double *);
|
||||
|
||||
double BLASFUNC_REF(dsdot)(blasint *, float *, blasint *, float *, blasint*);
|
||||
|
||||
FLOATRET BLASFUNC_REF(samax) (blasint *, float *, blasint *);
|
||||
|
||||
#endif
|
||||
|
||||
@@ -199,8 +199,17 @@ static __inline int blas_quickdivide(blasint x, blasint y){
|
||||
.type REALNAME, #function; \
|
||||
.proc 07; \
|
||||
REALNAME:;
|
||||
|
||||
#if defined(__linux__) && defined(__ELF__)
|
||||
#define GNUSTACK .section .note.GNU-stack,"",@progbits
|
||||
#else
|
||||
#define GNUSTACK
|
||||
#endif
|
||||
|
||||
#define EPILOGUE \
|
||||
.size REALNAME, .-REALNAME
|
||||
.size REALNAME, .-REALNAME; \
|
||||
GNUSTACK
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
@@ -39,6 +39,11 @@
|
||||
#ifndef COMMON_THREAD
|
||||
#define COMMON_THREAD
|
||||
|
||||
#ifdef USE_OPENMP
|
||||
#include <omp.h>
|
||||
extern void goto_set_num_threads(int nthreads);
|
||||
#endif
|
||||
|
||||
/* Basic Thread Debugging */
|
||||
#undef SMP_DEBUG
|
||||
|
||||
@@ -98,7 +103,7 @@ typedef struct blas_queue {
|
||||
|
||||
struct blas_queue *next;
|
||||
|
||||
#if defined( __WIN32__) || defined(__CYGWIN32__)
|
||||
#if defined( __WIN32__) || defined(__CYGWIN32__) || defined(_WIN32) || defined(__CYGWIN__)
|
||||
CRITICAL_SECTION lock;
|
||||
HANDLE finish;
|
||||
#else
|
||||
@@ -126,13 +131,24 @@ extern int blas_server_avail;
|
||||
|
||||
static __inline int num_cpu_avail(int level) {
|
||||
|
||||
if ((blas_cpu_number == 1)
|
||||
#ifdef USE_OPENMP
|
||||
int openmp_nthreads=0;
|
||||
#endif
|
||||
|
||||
if (blas_cpu_number == 1
|
||||
|
||||
#ifdef USE_OPENMP
|
||||
|| omp_in_parallel()
|
||||
#endif
|
||||
) return 1;
|
||||
|
||||
#ifdef USE_OPENMP
|
||||
openmp_nthreads=omp_get_max_threads();
|
||||
if (blas_cpu_number != openmp_nthreads) {
|
||||
goto_set_num_threads(openmp_nthreads);
|
||||
}
|
||||
#endif
|
||||
|
||||
return blas_cpu_number;
|
||||
|
||||
}
|
||||
|
||||
22
common_x86.h
22
common_x86.h
@@ -171,6 +171,11 @@ static __inline int blas_quickdivide(unsigned int x, unsigned int y){
|
||||
#define MMXSTORE movd
|
||||
#endif
|
||||
|
||||
#if defined(PILEDRIVER) || defined(BULLDOZER)
|
||||
//Enable some optimazation for barcelona.
|
||||
#define BARCELONA_OPTIMIZATION
|
||||
#endif
|
||||
|
||||
#if defined(HAVE_3DNOW)
|
||||
#define EMMS femms
|
||||
#elif defined(HAVE_MMX)
|
||||
@@ -254,7 +259,7 @@ static __inline int blas_quickdivide(unsigned int x, unsigned int y){
|
||||
#define PROFCODE
|
||||
#endif
|
||||
|
||||
#if defined(OS_WINNT) || defined(OS_CYGWIN_NT) || defined(OS_INERIX)
|
||||
#if defined(OS_WINNT) || defined(OS_CYGWIN_NT) || defined(OS_INTERIX)
|
||||
#define SAVEREGISTERS \
|
||||
subl $32, %esp;\
|
||||
movups %xmm6, 0(%esp);\
|
||||
@@ -269,7 +274,7 @@ static __inline int blas_quickdivide(unsigned int x, unsigned int y){
|
||||
#define RESTOREREGISTERS
|
||||
#endif
|
||||
|
||||
#if defined(OS_WINNT) || defined(OS_CYGWIN_NT) || defined(OS_INERIX)
|
||||
#if defined(OS_WINNT) || defined(OS_CYGWIN_NT) || defined(OS_INTERIX)
|
||||
#define PROLOGUE \
|
||||
.text; \
|
||||
.align 16; \
|
||||
@@ -282,7 +287,7 @@ REALNAME:
|
||||
#define EPILOGUE .end REALNAME
|
||||
#endif
|
||||
|
||||
#if defined(OS_LINUX) || defined(OS_FreeBSD) || defined(OS_NetBSD) || defined(__ELF__)
|
||||
#if defined(OS_LINUX) || defined(OS_FREEBSD) || defined(OS_NETBSD) || defined(__ELF__)
|
||||
#define PROLOGUE \
|
||||
.text; \
|
||||
.align 16; \
|
||||
@@ -296,7 +301,9 @@ REALNAME:
|
||||
#define PROFCODE
|
||||
#endif
|
||||
|
||||
#define EPILOGUE .size REALNAME, .-REALNAME
|
||||
#define EPILOGUE \
|
||||
.size REALNAME, .-REALNAME; \
|
||||
.section .note.GNU-stack,"",@progbits
|
||||
|
||||
#endif
|
||||
|
||||
@@ -335,6 +342,7 @@ REALNAME:
|
||||
#define ALIGN_2 .align 2
|
||||
#define ALIGN_3 .align 3
|
||||
#define ALIGN_4 .align 4
|
||||
#define ALIGN_5 .align 5
|
||||
#define ffreep fstp
|
||||
#endif
|
||||
|
||||
@@ -357,3 +365,9 @@ REALNAME:
|
||||
#ifndef ALIGN_6
|
||||
#define ALIGN_6 .align 64
|
||||
#endif
|
||||
// ffreep %st(0).
|
||||
// Because Clang didn't support ffreep, we directly use the opcode.
|
||||
// Please check out http://www.sandpile.org/x86/opc_fpu.htm
|
||||
#ifndef ffreep
|
||||
#define ffreep .byte 0xdf, 0xc0 #
|
||||
#endif
|
||||
|
||||
@@ -218,6 +218,11 @@ static __inline int blas_quickdivide(unsigned int x, unsigned int y){
|
||||
|
||||
#ifdef ASSEMBLER
|
||||
|
||||
#if defined(PILEDRIVER) || defined(BULLDOZER)
|
||||
//Enable some optimazation for barcelona.
|
||||
#define BARCELONA_OPTIMIZATION
|
||||
#endif
|
||||
|
||||
#if defined(HAVE_3DNOW)
|
||||
#define EMMS femms
|
||||
#elif defined(HAVE_MMX)
|
||||
@@ -353,7 +358,7 @@ REALNAME:
|
||||
#define EPILOGUE .end REALNAME
|
||||
#endif
|
||||
|
||||
#if defined(OS_LINUX) || defined(OS_FreeBSD) || defined(OS_NetBSD) || defined(__ELF__) || defined(C_PGI)
|
||||
#if defined(OS_LINUX) || defined(OS_FREEBSD) || defined(OS_NETBSD) || defined(__ELF__) || defined(C_PGI)
|
||||
#define PROLOGUE \
|
||||
.text; \
|
||||
.align 512; \
|
||||
@@ -367,7 +372,10 @@ REALNAME:
|
||||
#define PROFCODE
|
||||
#endif
|
||||
|
||||
#define EPILOGUE .size REALNAME, .-REALNAME
|
||||
#define EPILOGUE \
|
||||
.size REALNAME, .-REALNAME; \
|
||||
.section .note.GNU-stack,"",@progbits
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
@@ -425,6 +433,7 @@ REALNAME:
|
||||
#define ALIGN_2 .align 2
|
||||
#define ALIGN_3 .align 3
|
||||
#define ALIGN_4 .align 4
|
||||
#define ALIGN_5 .align 5
|
||||
#define ffreep fstp
|
||||
#endif
|
||||
|
||||
@@ -448,4 +457,10 @@ REALNAME:
|
||||
#define ALIGN_6 .align 64
|
||||
#endif
|
||||
|
||||
// ffreep %st(0).
|
||||
// Because Clang didn't support ffreep, we directly use the opcode.
|
||||
// Please check out http://www.sandpile.org/x86/opc_fpu.htm
|
||||
#ifndef ffreep
|
||||
#define ffreep .byte 0xdf, 0xc0 #
|
||||
#endif
|
||||
#endif
|
||||
|
||||
14
cpuid.h
14
cpuid.h
@@ -103,6 +103,11 @@
|
||||
#define CORE_NEHALEM 17
|
||||
#define CORE_ATOM 18
|
||||
#define CORE_NANO 19
|
||||
#define CORE_SANDYBRIDGE 20
|
||||
#define CORE_BOBCAT 21
|
||||
#define CORE_BULLDOZER 22
|
||||
#define CORE_PILEDRIVER 23
|
||||
#define CORE_HASWELL 24
|
||||
|
||||
#define HAVE_SSE (1 << 0)
|
||||
#define HAVE_SSE2 (1 << 1)
|
||||
@@ -122,6 +127,9 @@
|
||||
#define HAVE_MISALIGNSSE (1 << 15)
|
||||
#define HAVE_128BITFPU (1 << 16)
|
||||
#define HAVE_FASTMOVU (1 << 17)
|
||||
#define HAVE_AVX (1 << 18)
|
||||
#define HAVE_FMA4 (1 << 19)
|
||||
#define HAVE_FMA3 (1 << 20)
|
||||
|
||||
#define CACHE_INFO_L1_I 1
|
||||
#define CACHE_INFO_L1_D 2
|
||||
@@ -188,4 +196,10 @@ typedef struct {
|
||||
#define CPUTYPE_NSGEODE 41
|
||||
#define CPUTYPE_VIAC3 42
|
||||
#define CPUTYPE_NANO 43
|
||||
#define CPUTYPE_SANDYBRIDGE 44
|
||||
#define CPUTYPE_BOBCAT 45
|
||||
#define CPUTYPE_BULLDOZER 46
|
||||
#define CPUTYPE_PILEDRIVER 47
|
||||
#define CPUTYPE_HASWELL 48
|
||||
|
||||
#endif
|
||||
|
||||
@@ -72,7 +72,7 @@ void get_cpuconfig(void){
|
||||
printf("#define L1_DATA_LINESIZE 32\n");
|
||||
printf("#define L2_SIZE 2097152\n");
|
||||
printf("#define L2_LINESIZE 32\n");
|
||||
printf("#define DTB_ENTRIES 32\n");
|
||||
printf("#define DTB_DEFAULT_ENTRIES 32\n");
|
||||
printf("#define DTB_SIZE 8192\n");
|
||||
break;
|
||||
|
||||
@@ -81,7 +81,7 @@ void get_cpuconfig(void){
|
||||
printf("#define L1_DATA_LINESIZE 32\n");
|
||||
printf("#define L2_SIZE 2097152\n");
|
||||
printf("#define L2_LINESIZE 64\n");
|
||||
printf("#define DTB_ENTRIES 64\n");
|
||||
printf("#define DTB_DEFAULT_ENTRIES 64\n");
|
||||
printf("#define DTB_SIZE 8192\n");
|
||||
break;
|
||||
|
||||
@@ -90,7 +90,7 @@ void get_cpuconfig(void){
|
||||
printf("#define L1_DATA_LINESIZE 64\n");
|
||||
printf("#define L2_SIZE 4194304\n");
|
||||
printf("#define L2_LINESIZE 64\n");
|
||||
printf("#define DTB_ENTRIES 64\n");
|
||||
printf("#define DTB_DEFAULT_ENTRIES 64\n");
|
||||
printf("#define DTB_SIZE 8192\n");
|
||||
break;
|
||||
}
|
||||
|
||||
262
cpuid_arm.c
Normal file
262
cpuid_arm.c
Normal file
@@ -0,0 +1,262 @@
|
||||
/**************************************************************************
|
||||
Copyright (c) 2013, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include <string.h>
|
||||
|
||||
#define CPU_UNKNOWN 0
|
||||
#define CPU_ARMV6 1
|
||||
#define CPU_ARMV7 2
|
||||
#define CPU_CORTEXA15 3
|
||||
|
||||
static char *cpuname[] = {
|
||||
"UNKOWN",
|
||||
"ARMV6",
|
||||
"ARMV7",
|
||||
"CORTEXA15"
|
||||
};
|
||||
|
||||
|
||||
int get_feature(char *search)
|
||||
{
|
||||
|
||||
#ifdef linux
|
||||
FILE *infile;
|
||||
char buffer[2048], *p,*t;
|
||||
p = (char *) NULL ;
|
||||
|
||||
infile = fopen("/proc/cpuinfo", "r");
|
||||
|
||||
while (fgets(buffer, sizeof(buffer), infile))
|
||||
{
|
||||
|
||||
if (!strncmp("Features", buffer, 8))
|
||||
{
|
||||
p = strchr(buffer, ':') + 2;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
fclose(infile);
|
||||
|
||||
|
||||
if( p == NULL ) return;
|
||||
|
||||
t = strtok(p," ");
|
||||
while( t = strtok(NULL," "))
|
||||
{
|
||||
if (!strcmp(t, search)) { return(1); }
|
||||
}
|
||||
|
||||
#endif
|
||||
return(0);
|
||||
}
|
||||
|
||||
|
||||
int detect(void)
|
||||
{
|
||||
|
||||
#ifdef linux
|
||||
|
||||
FILE *infile;
|
||||
char buffer[512], *p;
|
||||
p = (char *) NULL ;
|
||||
|
||||
infile = fopen("/proc/cpuinfo", "r");
|
||||
|
||||
while (fgets(buffer, sizeof(buffer), infile))
|
||||
{
|
||||
|
||||
if (!strncmp("model name", buffer, 10))
|
||||
{
|
||||
p = strchr(buffer, ':') + 2;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
fclose(infile);
|
||||
|
||||
if(p != NULL)
|
||||
{
|
||||
|
||||
if (strstr(p, "ARMv7"))
|
||||
{
|
||||
if ( get_feature("vfpv4"))
|
||||
return CPU_ARMV7;
|
||||
|
||||
if ( get_feature("vfpv3"))
|
||||
return CPU_ARMV7;
|
||||
|
||||
if ( get_feature("vfp"))
|
||||
return CPU_ARMV6;
|
||||
|
||||
|
||||
}
|
||||
|
||||
if (strstr(p, "ARMv6"))
|
||||
{
|
||||
if ( get_feature("vfp"))
|
||||
return CPU_ARMV6;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
#endif
|
||||
|
||||
return CPU_UNKNOWN;
|
||||
}
|
||||
|
||||
char *get_corename(void)
|
||||
{
|
||||
return cpuname[detect()];
|
||||
}
|
||||
|
||||
void get_architecture(void)
|
||||
{
|
||||
printf("ARM");
|
||||
}
|
||||
|
||||
void get_subarchitecture(void)
|
||||
{
|
||||
int d = detect();
|
||||
switch (d)
|
||||
{
|
||||
|
||||
case CPU_ARMV7:
|
||||
printf("ARMV7");
|
||||
break;
|
||||
|
||||
case CPU_ARMV6:
|
||||
printf("ARMV6");
|
||||
break;
|
||||
|
||||
default:
|
||||
printf("UNKNOWN");
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void get_subdirname(void)
|
||||
{
|
||||
printf("arm");
|
||||
}
|
||||
|
||||
void get_cpuconfig(void)
|
||||
{
|
||||
|
||||
int d = detect();
|
||||
switch (d)
|
||||
{
|
||||
|
||||
case CPU_ARMV7:
|
||||
printf("#define ARMV7\n");
|
||||
printf("#define HAVE_VFP\n");
|
||||
printf("#define HAVE_VFPV3\n");
|
||||
if ( get_feature("neon")) printf("#define HAVE_NEON\n");
|
||||
if ( get_feature("vfpv4")) printf("#define HAVE_VFPV4\n");
|
||||
printf("#define L1_DATA_SIZE 65536\n");
|
||||
printf("#define L1_DATA_LINESIZE 32\n");
|
||||
printf("#define L2_SIZE 512488\n");
|
||||
printf("#define L2_LINESIZE 32\n");
|
||||
printf("#define DTB_DEFAULT_ENTRIES 64\n");
|
||||
printf("#define DTB_SIZE 4096\n");
|
||||
printf("#define L2_ASSOCIATIVE 4\n");
|
||||
break;
|
||||
|
||||
case CPU_ARMV6:
|
||||
printf("#define ARMV6\n");
|
||||
printf("#define HAVE_VFP\n");
|
||||
printf("#define L1_DATA_SIZE 65536\n");
|
||||
printf("#define L1_DATA_LINESIZE 32\n");
|
||||
printf("#define L2_SIZE 512488\n");
|
||||
printf("#define L2_LINESIZE 32\n");
|
||||
printf("#define DTB_DEFAULT_ENTRIES 64\n");
|
||||
printf("#define DTB_SIZE 4096\n");
|
||||
printf("#define L2_ASSOCIATIVE 4\n");
|
||||
break;
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void get_libname(void)
|
||||
{
|
||||
|
||||
int d = detect();
|
||||
switch (d)
|
||||
{
|
||||
|
||||
case CPU_ARMV7:
|
||||
printf("armv7\n");
|
||||
break;
|
||||
|
||||
case CPU_ARMV6:
|
||||
printf("armv6\n");
|
||||
break;
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void get_features(void)
|
||||
{
|
||||
|
||||
#ifdef linux
|
||||
FILE *infile;
|
||||
char buffer[2048], *p,*t;
|
||||
p = (char *) NULL ;
|
||||
|
||||
infile = fopen("/proc/cpuinfo", "r");
|
||||
|
||||
while (fgets(buffer, sizeof(buffer), infile))
|
||||
{
|
||||
|
||||
if (!strncmp("Features", buffer, 8))
|
||||
{
|
||||
p = strchr(buffer, ':') + 2;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
fclose(infile);
|
||||
|
||||
|
||||
if( p == NULL ) return;
|
||||
|
||||
t = strtok(p," ");
|
||||
while( t = strtok(NULL," "))
|
||||
{
|
||||
if (!strcmp(t, "vfp")) { printf("HAVE_VFP=1\n"); continue; }
|
||||
if (!strcmp(t, "vfpv3")) { printf("HAVE_VFPV3=1\n"); continue; }
|
||||
if (!strcmp(t, "vfpv4")) { printf("HAVE_VFPV4=1\n"); continue; }
|
||||
if (!strcmp(t, "neon")) { printf("HAVE_NEON=1\n"); continue; }
|
||||
}
|
||||
|
||||
#endif
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
@@ -133,6 +133,6 @@ void get_cpuconfig(void){
|
||||
printf("#define L2_SIZE 1572864\n");
|
||||
printf("#define L2_LINESIZE 128\n");
|
||||
printf("#define DTB_SIZE 16384\n");
|
||||
printf("#define DTB_ENTRIES 128\n");
|
||||
printf("#define DTB_DEFAULT_ENTRIES 128\n");
|
||||
}
|
||||
|
||||
|
||||
45
cpuid_mips.c
45
cpuid_mips.c
@@ -1,5 +1,5 @@
|
||||
/*****************************************************************************
|
||||
Copyright (c) 2011, Lab of Parallel Software and Computational Science,ICSAS
|
||||
Copyright (c) 2011,2012 Lab of Parallel Software and Computational Science,ISCAS
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
@@ -72,11 +72,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#define CPU_UNKNOWN 0
|
||||
#define CPU_SICORTEX 1
|
||||
#define CPU_LOONGSON3A 2
|
||||
#define CPU_LOONGSON3B 3
|
||||
|
||||
static char *cpuname[] = {
|
||||
"UNKOWN",
|
||||
"SICORTEX",
|
||||
"LOONGSON3A"
|
||||
"LOONGSON3A",
|
||||
"LOONGSON3B"
|
||||
};
|
||||
|
||||
int detect(void){
|
||||
@@ -99,10 +101,14 @@ int detect(void){
|
||||
|
||||
fclose(infile);
|
||||
|
||||
if(p != NULL){
|
||||
if (strstr(p, "Loongson-3A")){
|
||||
return CPU_LOONGSON3A;
|
||||
}else if(strstr(p, "Loongson-3B")){
|
||||
return CPU_LOONGSON3B;
|
||||
}else if (strstr(p, "Loongson-3")){
|
||||
infile = fopen("/proc/cpuinfo", "r");
|
||||
p = (char *)NULL;
|
||||
while (fgets(buffer, sizeof(buffer), infile)){
|
||||
if (!strncmp("system type", buffer, 11)){
|
||||
p = strchr(buffer, ':') + 2;
|
||||
@@ -115,6 +121,24 @@ int detect(void){
|
||||
}else{
|
||||
return CPU_SICORTEX;
|
||||
}
|
||||
}
|
||||
//Check model name for Loongson3
|
||||
infile = fopen("/proc/cpuinfo", "r");
|
||||
p = (char *)NULL;
|
||||
while (fgets(buffer, sizeof(buffer), infile)){
|
||||
if (!strncmp("model name", buffer, 10)){
|
||||
p = strchr(buffer, ':') + 2;
|
||||
break;
|
||||
}
|
||||
}
|
||||
fclose(infile);
|
||||
if(p != NULL){
|
||||
if (strstr(p, "Loongson-3A")){
|
||||
return CPU_LOONGSON3A;
|
||||
}else if(strstr(p, "Loongson-3B")){
|
||||
return CPU_LOONGSON3B;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
return CPU_UNKNOWN;
|
||||
}
|
||||
@@ -130,6 +154,8 @@ void get_architecture(void){
|
||||
void get_subarchitecture(void){
|
||||
if(detect()==CPU_LOONGSON3A) {
|
||||
printf("LOONGSON3A");
|
||||
}else if(detect()==CPU_LOONGSON3B){
|
||||
printf("LOONGSON3B");
|
||||
}else{
|
||||
printf("SICORTEX");
|
||||
}
|
||||
@@ -146,7 +172,16 @@ void get_cpuconfig(void){
|
||||
printf("#define L1_DATA_LINESIZE 32\n");
|
||||
printf("#define L2_SIZE 512488\n");
|
||||
printf("#define L2_LINESIZE 32\n");
|
||||
printf("#define DTB_ENTRIES 64\n");
|
||||
printf("#define DTB_DEFAULT_ENTRIES 64\n");
|
||||
printf("#define DTB_SIZE 4096\n");
|
||||
printf("#define L2_ASSOCIATIVE 4\n");
|
||||
}else if(detect()==CPU_LOONGSON3B){
|
||||
printf("#define LOONGSON3B\n");
|
||||
printf("#define L1_DATA_SIZE 65536\n");
|
||||
printf("#define L1_DATA_LINESIZE 32\n");
|
||||
printf("#define L2_SIZE 512488\n");
|
||||
printf("#define L2_LINESIZE 32\n");
|
||||
printf("#define DTB_DEFAULT_ENTRIES 64\n");
|
||||
printf("#define DTB_SIZE 4096\n");
|
||||
printf("#define L2_ASSOCIATIVE 4\n");
|
||||
}else{
|
||||
@@ -155,7 +190,7 @@ void get_cpuconfig(void){
|
||||
printf("#define L1_DATA_LINESIZE 32\n");
|
||||
printf("#define L2_SIZE 512488\n");
|
||||
printf("#define L2_LINESIZE 32\n");
|
||||
printf("#define DTB_ENTRIES 32\n");
|
||||
printf("#define DTB_DEFAULT_ENTRIES 32\n");
|
||||
printf("#define DTB_SIZE 4096\n");
|
||||
printf("#define L2_ASSOCIATIVE 8\n");
|
||||
}
|
||||
@@ -164,6 +199,8 @@ void get_cpuconfig(void){
|
||||
void get_libname(void){
|
||||
if(detect()==CPU_LOONGSON3A) {
|
||||
printf("loongson3a\n");
|
||||
}else if(detect()==CPU_LOONGSON3B) {
|
||||
printf("loongson3b\n");
|
||||
}else{
|
||||
#ifdef __mips64
|
||||
printf("mips64\n");
|
||||
|
||||
@@ -114,6 +114,7 @@ int detect(void){
|
||||
if (!strncasecmp(p, "PPC970", 6)) return CPUTYPE_PPC970;
|
||||
if (!strncasecmp(p, "POWER5", 6)) return CPUTYPE_POWER5;
|
||||
if (!strncasecmp(p, "POWER6", 6)) return CPUTYPE_POWER6;
|
||||
if (!strncasecmp(p, "POWER7", 6)) return CPUTYPE_POWER6;
|
||||
if (!strncasecmp(p, "Cell", 4)) return CPUTYPE_CELL;
|
||||
if (!strncasecmp(p, "7447", 4)) return CPUTYPE_PPCG4;
|
||||
|
||||
@@ -165,7 +166,7 @@ void get_cpuconfig(void){
|
||||
printf("#define L1_DATA_LINESIZE 128\n");
|
||||
printf("#define L2_SIZE 524288\n");
|
||||
printf("#define L2_LINESIZE 128 \n");
|
||||
printf("#define DTB_ENTRIES 128\n");
|
||||
printf("#define DTB_DEFAULT_ENTRIES 128\n");
|
||||
printf("#define DTB_SIZE 4096\n");
|
||||
printf("#define L2_ASSOCIATIVE 8\n");
|
||||
|
||||
|
||||
@@ -50,7 +50,7 @@ void get_subdirname(void){
|
||||
|
||||
void get_cpuconfig(void){
|
||||
printf("#define V9\n");
|
||||
printf("#define DTB_ENTRIES 32\n");
|
||||
printf("#define DTB_DEFAULT_ENTRIES 32\n");
|
||||
}
|
||||
|
||||
void get_libname(void){
|
||||
|
||||
217
cpuid_x86.c
217
cpuid_x86.c
@@ -40,6 +40,17 @@
|
||||
#include <string.h>
|
||||
#include "cpuid.h"
|
||||
|
||||
#ifdef NO_AVX
|
||||
#define CPUTYPE_HASWELL CPUTYPE_NEHALEM
|
||||
#define CORE_HASWELL CORE_NEHALEM
|
||||
#define CPUTYPE_SANDYBRIDGE CPUTYPE_NEHALEM
|
||||
#define CORE_SANDYBRIDGE CORE_NEHALEM
|
||||
#define CPUTYPE_BULLDOZER CPUTYPE_BARCELONA
|
||||
#define CORE_BULLDOZER CORE_BARCELONA
|
||||
#define CPUTYPE_PILEDRIVER CPUTYPE_BARCELONA
|
||||
#define CORE_PILEDRIVER CORE_BARCELONA
|
||||
#endif
|
||||
|
||||
#ifndef CPUIDEMU
|
||||
|
||||
#if defined(__APPLE__) && defined(__i386__)
|
||||
@@ -109,6 +120,33 @@ static inline int have_excpuid(void){
|
||||
return eax & 0xffff;
|
||||
}
|
||||
|
||||
#ifndef NO_AVX
|
||||
static inline void xgetbv(int op, int * eax, int * edx){
|
||||
//Use binary code for xgetbv
|
||||
__asm__ __volatile__
|
||||
(".byte 0x0f, 0x01, 0xd0": "=a" (*eax), "=d" (*edx) : "c" (op) : "cc");
|
||||
}
|
||||
#endif
|
||||
|
||||
int support_avx(){
|
||||
#ifndef NO_AVX
|
||||
int eax, ebx, ecx, edx;
|
||||
int ret=0;
|
||||
|
||||
cpuid(1, &eax, &ebx, &ecx, &edx);
|
||||
if ((ecx & (1 << 28)) != 0 && (ecx & (1 << 27)) != 0 && (ecx & (1 << 26)) != 0){
|
||||
xgetbv(0, &eax, &edx);
|
||||
if((eax & 6) == 6){
|
||||
ret=1; //OS support AVX
|
||||
}
|
||||
}
|
||||
return ret;
|
||||
#else
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
int get_vendor(void){
|
||||
int eax, ebx, ecx, edx;
|
||||
char vendor[13];
|
||||
@@ -189,11 +227,18 @@ int get_cputype(int gettype){
|
||||
if ((ecx & (1 << 9)) != 0) feature |= HAVE_SSSE3;
|
||||
if ((ecx & (1 << 19)) != 0) feature |= HAVE_SSE4_1;
|
||||
if ((ecx & (1 << 20)) != 0) feature |= HAVE_SSE4_2;
|
||||
#ifndef NO_AVX
|
||||
if (support_avx()) feature |= HAVE_AVX;
|
||||
if ((ecx & (1 << 12)) != 0) feature |= HAVE_FMA3;
|
||||
#endif
|
||||
|
||||
if (have_excpuid() >= 0x01) {
|
||||
cpuid(0x80000001, &eax, &ebx, &ecx, &edx);
|
||||
if ((ecx & (1 << 6)) != 0) feature |= HAVE_SSE4A;
|
||||
if ((ecx & (1 << 7)) != 0) feature |= HAVE_MISALIGNSSE;
|
||||
#ifndef NO_AVX
|
||||
if ((ecx & (1 << 16)) != 0) feature |= HAVE_FMA4;
|
||||
#endif
|
||||
if ((edx & (1 << 30)) != 0) feature |= HAVE_3DNOWEX;
|
||||
if ((edx & (1 << 31)) != 0) feature |= HAVE_3DNOW;
|
||||
}
|
||||
@@ -974,18 +1019,58 @@ int get_cpuname(void){
|
||||
return CPUTYPE_DUNNINGTON;
|
||||
}
|
||||
break;
|
||||
case 2:
|
||||
switch (model) {
|
||||
case 5:
|
||||
//Intel Core (Clarkdale) / Core (Arrandale)
|
||||
// Pentium (Clarkdale) / Pentium Mobile (Arrandale)
|
||||
// Xeon (Clarkdale), 32nm
|
||||
return CPUTYPE_NEHALEM;
|
||||
case 12:
|
||||
//Xeon Processor 5600 (Westmere-EP)
|
||||
return CPUTYPE_NEHALEM;
|
||||
}
|
||||
break;
|
||||
case 2:
|
||||
switch (model) {
|
||||
case 5:
|
||||
//Intel Core (Clarkdale) / Core (Arrandale)
|
||||
// Pentium (Clarkdale) / Pentium Mobile (Arrandale)
|
||||
// Xeon (Clarkdale), 32nm
|
||||
return CPUTYPE_NEHALEM;
|
||||
case 10:
|
||||
//Intel Core i5-2000 /i7-2000 (Sandy Bridge)
|
||||
if(support_avx())
|
||||
return CPUTYPE_SANDYBRIDGE;
|
||||
else
|
||||
return CPUTYPE_NEHALEM; //OS doesn't support AVX
|
||||
case 12:
|
||||
//Xeon Processor 5600 (Westmere-EP)
|
||||
return CPUTYPE_NEHALEM;
|
||||
case 13:
|
||||
//Intel Core i7-3000 / Xeon E5 (Sandy Bridge)
|
||||
if(support_avx())
|
||||
return CPUTYPE_SANDYBRIDGE;
|
||||
else
|
||||
return CPUTYPE_NEHALEM;
|
||||
case 14:
|
||||
// Xeon E7540
|
||||
case 15:
|
||||
//Xeon Processor E7 (Westmere-EX)
|
||||
return CPUTYPE_NEHALEM;
|
||||
}
|
||||
break;
|
||||
case 3:
|
||||
switch (model) {
|
||||
case 10:
|
||||
if(support_avx())
|
||||
return CPUTYPE_SANDYBRIDGE;
|
||||
else
|
||||
return CPUTYPE_NEHALEM;
|
||||
case 12:
|
||||
if(support_avx())
|
||||
return CPUTYPE_HASWELL;
|
||||
else
|
||||
return CPUTYPE_NEHALEM;
|
||||
}
|
||||
break;
|
||||
case 4:
|
||||
switch (model) {
|
||||
case 5:
|
||||
if(support_avx())
|
||||
return CPUTYPE_HASWELL;
|
||||
else
|
||||
return CPUTYPE_NEHALEM;
|
||||
}
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case 0x7:
|
||||
@@ -1018,6 +1103,23 @@ int get_cpuname(void){
|
||||
case 1:
|
||||
case 10:
|
||||
return CPUTYPE_BARCELONA;
|
||||
case 6:
|
||||
switch (model) {
|
||||
case 1:
|
||||
//AMD Bulldozer Opteron 6200 / Opteron 4200 / AMD FX-Series
|
||||
if(support_avx())
|
||||
return CPUTYPE_BULLDOZER;
|
||||
else
|
||||
return CPUTYPE_BARCELONA; //OS don't support AVX.
|
||||
case 2:
|
||||
if(support_avx())
|
||||
return CPUTYPE_PILEDRIVER;
|
||||
else
|
||||
return CPUTYPE_BARCELONA; //OS don't support AVX.
|
||||
}
|
||||
break;
|
||||
case 5:
|
||||
return CPUTYPE_BOBCAT;
|
||||
}
|
||||
break;
|
||||
}
|
||||
@@ -1137,6 +1239,11 @@ static char *cpuname[] = {
|
||||
"NSGEODE",
|
||||
"VIAC3",
|
||||
"NANO",
|
||||
"SANDYBRIDGE",
|
||||
"BOBCAT",
|
||||
"BULLDOZER",
|
||||
"PILEDRIVER",
|
||||
"HASWELL",
|
||||
};
|
||||
|
||||
static char *lowercpuname[] = {
|
||||
@@ -1183,6 +1290,11 @@ static char *lowercpuname[] = {
|
||||
"tms3x00",
|
||||
"nsgeode",
|
||||
"nano",
|
||||
"sandybridge",
|
||||
"bobcat",
|
||||
"bulldozer",
|
||||
"piledriver",
|
||||
"haswell",
|
||||
};
|
||||
|
||||
static char *corename[] = {
|
||||
@@ -1206,6 +1318,11 @@ static char *corename[] = {
|
||||
"NEHALEM",
|
||||
"ATOM",
|
||||
"NANO",
|
||||
"SANDYBRIDGE",
|
||||
"BOBCAT",
|
||||
"BULLDOZER",
|
||||
"PILEDRIVER",
|
||||
"HASWELL",
|
||||
};
|
||||
|
||||
static char *corename_lower[] = {
|
||||
@@ -1229,6 +1346,11 @@ static char *corename_lower[] = {
|
||||
"nehalem",
|
||||
"atom",
|
||||
"nano",
|
||||
"sandybridge",
|
||||
"bobcat",
|
||||
"bulldozer",
|
||||
"piledriver",
|
||||
"haswell",
|
||||
};
|
||||
|
||||
|
||||
@@ -1310,11 +1432,51 @@ int get_coretype(void){
|
||||
// Pentium (Clarkdale) / Pentium Mobile (Arrandale)
|
||||
// Xeon (Clarkdale), 32nm
|
||||
return CORE_NEHALEM;
|
||||
case 10:
|
||||
//Intel Core i5-2000 /i7-2000 (Sandy Bridge)
|
||||
if(support_avx())
|
||||
return CORE_SANDYBRIDGE;
|
||||
else
|
||||
return CORE_NEHALEM; //OS doesn't support AVX
|
||||
case 12:
|
||||
//Xeon Processor 5600 (Westmere-EP)
|
||||
return CORE_NEHALEM;
|
||||
case 13:
|
||||
//Intel Core i7-3000 / Xeon E5 (Sandy Bridge)
|
||||
if(support_avx())
|
||||
return CORE_SANDYBRIDGE;
|
||||
else
|
||||
return CORE_NEHALEM; //OS doesn't support AVX
|
||||
case 14:
|
||||
//Xeon E7540
|
||||
case 15:
|
||||
//Xeon Processor E7 (Westmere-EX)
|
||||
return CORE_NEHALEM;
|
||||
}
|
||||
break;
|
||||
case 3:
|
||||
switch (model) {
|
||||
case 10:
|
||||
if(support_avx())
|
||||
return CORE_SANDYBRIDGE;
|
||||
else
|
||||
return CORE_NEHALEM; //OS doesn't support AVX
|
||||
case 12:
|
||||
if(support_avx())
|
||||
return CORE_HASWELL;
|
||||
else
|
||||
return CORE_NEHALEM;
|
||||
}
|
||||
break;
|
||||
case 4:
|
||||
switch (model) {
|
||||
case 5:
|
||||
if(support_avx())
|
||||
return CORE_HASWELL;
|
||||
else
|
||||
return CORE_NEHALEM;
|
||||
}
|
||||
break;
|
||||
}
|
||||
break;
|
||||
|
||||
@@ -1328,7 +1490,23 @@ int get_coretype(void){
|
||||
if (family <= 0x5) return CORE_80486;
|
||||
if (family <= 0xe) return CORE_ATHLON;
|
||||
if (family == 0xf){
|
||||
if ((exfamily == 0) || (exfamily == 2)) return CORE_OPTERON; else return CORE_BARCELONA;
|
||||
if ((exfamily == 0) || (exfamily == 2)) return CORE_OPTERON;
|
||||
else if (exfamily == 5) return CORE_BOBCAT;
|
||||
else if (exfamily == 6) {
|
||||
switch (model) {
|
||||
case 1:
|
||||
//AMD Bulldozer Opteron 6200 / Opteron 4200 / AMD FX-Series
|
||||
if(support_avx())
|
||||
return CORE_BULLDOZER;
|
||||
else
|
||||
return CORE_BARCELONA; //OS don't support AVX.
|
||||
case 2:
|
||||
if(support_avx())
|
||||
return CORE_PILEDRIVER;
|
||||
else
|
||||
return CORE_BARCELONA; //OS don't support AVX.
|
||||
}
|
||||
}else return CORE_BARCELONA;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1393,7 +1571,10 @@ void get_cpuconfig(void){
|
||||
if (info.size > 0) {
|
||||
printf("#define DTB_SIZE %d\n", info.size * 1024);
|
||||
printf("#define DTB_ASSOCIATIVE %d\n", info.associative);
|
||||
printf("#define DTB_ENTRIES %d\n", info.linesize);
|
||||
printf("#define DTB_DEFAULT_ENTRIES %d\n", info.linesize);
|
||||
} else {
|
||||
//fall back for some virtual machines.
|
||||
printf("#define DTB_DEFAULT_ENTRIES 32\n");
|
||||
}
|
||||
|
||||
features = get_cputype(GET_FEATURE);
|
||||
@@ -1408,8 +1589,11 @@ void get_cpuconfig(void){
|
||||
if (features & HAVE_SSE4_2) printf("#define HAVE_SSE4_2\n");
|
||||
if (features & HAVE_SSE4A) printf("#define HAVE_SSE4A\n");
|
||||
if (features & HAVE_SSE5 ) printf("#define HAVE_SSSE5\n");
|
||||
if (features & HAVE_AVX ) printf("#define HAVE_AVX\n");
|
||||
if (features & HAVE_3DNOWEX) printf("#define HAVE_3DNOWEX\n");
|
||||
if (features & HAVE_3DNOW) printf("#define HAVE_3DNOW\n");
|
||||
if (features & HAVE_FMA4 ) printf("#define HAVE_FMA4\n");
|
||||
if (features & HAVE_FMA3 ) printf("#define HAVE_FMA3\n");
|
||||
if (features & HAVE_CFLUSH) printf("#define HAVE_CFLUSH\n");
|
||||
if (features & HAVE_HIT) printf("#define HAVE_HIT 1\n");
|
||||
if (features & HAVE_MISALIGNSSE) printf("#define HAVE_MISALIGNSSE\n");
|
||||
@@ -1422,7 +1606,7 @@ void get_cpuconfig(void){
|
||||
features = get_coretype();
|
||||
if (features > 0) printf("#define CORE_%s\n", corename[features]);
|
||||
} else {
|
||||
printf("#define DTB_ENTRIES 16\n");
|
||||
printf("#define DTB_DEFAULT_ENTRIES 16\n");
|
||||
printf("#define L1_CODE_SIZE 8192\n");
|
||||
printf("#define L1_DATA_SIZE 8192\n");
|
||||
printf("#define L2_SIZE 0\n");
|
||||
@@ -1473,7 +1657,10 @@ void get_sse(void){
|
||||
if (features & HAVE_SSE4_2) printf("HAVE_SSE4_2=1\n");
|
||||
if (features & HAVE_SSE4A) printf("HAVE_SSE4A=1\n");
|
||||
if (features & HAVE_SSE5 ) printf("HAVE_SSSE5=1\n");
|
||||
if (features & HAVE_AVX ) printf("HAVE_AVX=1\n");
|
||||
if (features & HAVE_3DNOWEX) printf("HAVE_3DNOWEX=1\n");
|
||||
if (features & HAVE_3DNOW) printf("HAVE_3DNOW=1\n");
|
||||
if (features & HAVE_FMA4 ) printf("HAVE_FMA4=1\n");
|
||||
if (features & HAVE_FMA3 ) printf("HAVE_FMA3=1\n");
|
||||
|
||||
}
|
||||
|
||||
40
ctest.c
40
ctest.c
@@ -1,3 +1,17 @@
|
||||
//LSB (Linux Standard Base) compiler
|
||||
//only support lsbc++
|
||||
#if defined (__LSB_VERSION__)
|
||||
#if !defined (__cplusplus)
|
||||
COMPILER_LSB
|
||||
#else
|
||||
#error "OpenBLAS only supports lsbcc."
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(__clang__)
|
||||
COMPILER_CLANG
|
||||
#endif
|
||||
|
||||
#if defined(__PGI) || defined(__PGIC__)
|
||||
COMPILER_PGI
|
||||
#endif
|
||||
@@ -34,20 +48,20 @@ COMPILER_GNU
|
||||
OS_LINUX
|
||||
#endif
|
||||
|
||||
#if defined(__FreeBSD__)
|
||||
OS_FreeBSD
|
||||
#if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
|
||||
OS_FREEBSD
|
||||
#endif
|
||||
|
||||
#if defined(__NetBSD__)
|
||||
OS_NetBSD
|
||||
OS_NETBSD
|
||||
#endif
|
||||
|
||||
#if defined(__sun)
|
||||
OS_SunOS
|
||||
OS_SUNOS
|
||||
#endif
|
||||
|
||||
#if defined(__APPLE__)
|
||||
OS_Darwin
|
||||
OS_DARWIN
|
||||
#endif
|
||||
|
||||
#if defined(_AIX)
|
||||
@@ -63,13 +77,18 @@ OS_WINNT
|
||||
#endif
|
||||
|
||||
#if defined(__CYGWIN__)
|
||||
OS_CYGWIN
|
||||
OS_CYGWIN_NT
|
||||
#endif
|
||||
|
||||
#if defined(__INTERIX)
|
||||
OS_INTERIX
|
||||
#endif
|
||||
|
||||
#if defined(__gnu_hurd__)
|
||||
/* Hurd is very similar to GNU/Linux, it should work out of the box */
|
||||
OS_LINUX
|
||||
#endif
|
||||
|
||||
#if defined(__i386) || defined(_X86)
|
||||
ARCH_X86
|
||||
#endif
|
||||
@@ -105,3 +124,12 @@ ARCH_IA64
|
||||
#if defined(__LP64) || defined(__LP64__) || defined(__ptr64) || defined(__x86_64__) || defined(__amd64__) || defined(__64BIT__)
|
||||
BINARY_64
|
||||
#endif
|
||||
|
||||
#if defined(__ARM_ARCH) || defined(__ARM_ARCH_7A__)
|
||||
ARCH_ARM
|
||||
#endif
|
||||
|
||||
#if defined(__aarch64__)
|
||||
ARCH_ARM64
|
||||
#endif
|
||||
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
TOPDIR = ..
|
||||
include $(TOPDIR)/Makefile.system
|
||||
|
||||
CFLAGS += -DADD$(BU) -DCBLAS
|
||||
override CFLAGS += -DADD$(BU) -DCBLAS
|
||||
|
||||
LIB = $(TOPDIR)/$(LIBNAME)
|
||||
|
||||
@@ -36,27 +36,48 @@ ztestl3o = c_zblas3.o c_z3chke.o auxiliary.o c_xerbla.o constant.o
|
||||
all :: all1 all2 all3
|
||||
|
||||
all1: xscblat1 xdcblat1 xccblat1 xzcblat1
|
||||
ifeq ($(USE_OPENMP), 1)
|
||||
OMP_NUM_THREADS=2 ./xscblat1
|
||||
OMP_NUM_THREADS=2 ./xdcblat1
|
||||
OMP_NUM_THREADS=2 ./xccblat1
|
||||
OMP_NUM_THREADS=2 ./xzcblat1
|
||||
else
|
||||
OPENBLAS_NUM_THREADS=2 ./xscblat1
|
||||
OPENBLAS_NUM_THREADS=2 ./xdcblat1
|
||||
OPENBLAS_NUM_THREADS=2 ./xccblat1
|
||||
OPENBLAS_NUM_THREADS=2 ./xzcblat1
|
||||
endif
|
||||
|
||||
all2: xscblat2 xdcblat2 xccblat2 xzcblat2
|
||||
ifeq ($(USE_OPENMP), 1)
|
||||
OMP_NUM_THREADS=2 ./xscblat2 < sin2
|
||||
OMP_NUM_THREADS=2 ./xdcblat2 < din2
|
||||
OMP_NUM_THREADS=2 ./xccblat2 < cin2
|
||||
OMP_NUM_THREADS=2 ./xzcblat2 < zin2
|
||||
else
|
||||
OPENBLAS_NUM_THREADS=2 ./xscblat2 < sin2
|
||||
OPENBLAS_NUM_THREADS=2 ./xdcblat2 < din2
|
||||
OPENBLAS_NUM_THREADS=2 ./xccblat2 < cin2
|
||||
OPENBLAS_NUM_THREADS=2 ./xzcblat2 < zin2
|
||||
endif
|
||||
|
||||
all3: xscblat3 xdcblat3 xccblat3 xzcblat3
|
||||
ifeq ($(USE_OPENMP), 1)
|
||||
OMP_NUM_THREADS=2 ./xscblat3 < sin3
|
||||
OMP_NUM_THREADS=2 ./xdcblat3 < din3
|
||||
OMP_NUM_THREADS=2 ./xccblat3 < cin3
|
||||
OMP_NUM_THREADS=2 ./xzcblat3 < zin3
|
||||
else
|
||||
OPENBLAS_NUM_THREADS=2 ./xscblat3 < sin3
|
||||
OPENBLAS_NUM_THREADS=2 ./xdcblat3 < din3
|
||||
OPENBLAS_NUM_THREADS=2 ./xccblat3 < cin3
|
||||
OPENBLAS_NUM_THREADS=2 ./xzcblat3 < zin3
|
||||
endif
|
||||
|
||||
clean ::
|
||||
rm -f x*
|
||||
|
||||
FLDFLAGS = $(FFLAGS:-fPIC=)
|
||||
FLDFLAGS = $(FFLAGS:-fPIC=) $(LDFLAGS)
|
||||
CEXTRALIB =
|
||||
|
||||
# Single real
|
||||
|
||||
@@ -65,7 +65,6 @@ static int sbmv_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, F
|
||||
|
||||
a = (FLOAT *)args -> a;
|
||||
x = (FLOAT *)args -> b;
|
||||
y = (FLOAT *)args -> c;
|
||||
|
||||
lda = args -> lda;
|
||||
incx = args -> ldb;
|
||||
@@ -76,6 +75,10 @@ static int sbmv_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, F
|
||||
n_from = 0;
|
||||
n_to = n;
|
||||
|
||||
//Use y as each thread's n* COMPSIZE elements in sb buffer
|
||||
y = buffer;
|
||||
buffer += ((COMPSIZE * n + 1023) & ~1023);
|
||||
|
||||
if (range_m) {
|
||||
n_from = *(range_m + 0);
|
||||
n_to = *(range_m + 1);
|
||||
@@ -83,7 +86,6 @@ static int sbmv_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, F
|
||||
a += n_from * lda * COMPSIZE;
|
||||
}
|
||||
|
||||
if (range_n) y += *range_n * COMPSIZE;
|
||||
|
||||
if (incx != 1) {
|
||||
COPY_K(n, x, incx, buffer, 1);
|
||||
@@ -331,7 +333,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT *alpha, FLOAT *a, BLASLONG lda, FLOAT *x
|
||||
|
||||
if (num_cpu) {
|
||||
queue[0].sa = NULL;
|
||||
queue[0].sb = buffer + num_cpu * (((n + 255) & ~255) + 16) * COMPSIZE;
|
||||
queue[0].sb = buffer;
|
||||
queue[num_cpu - 1].next = NULL;
|
||||
|
||||
exec_blas(num_cpu, queue);
|
||||
@@ -344,7 +346,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT *alpha, FLOAT *a, BLASLONG lda, FLOAT *x
|
||||
#else
|
||||
ONE, ZERO,
|
||||
#endif
|
||||
buffer + range_n[i] * COMPSIZE, 1, buffer, 1, NULL, 0);
|
||||
(FLOAT*)(queue[i].sb), 1, buffer, 1, NULL, 0);
|
||||
}
|
||||
|
||||
AXPYU_K(n, 0, 0,
|
||||
|
||||
@@ -77,8 +77,8 @@ int CNAME(int mode, blas_arg_t *arg, BLASLONG *range_m, BLASLONG *range_n, int (
|
||||
range_M[0] = 0;
|
||||
i = arg -> m;
|
||||
} else {
|
||||
range_M[0] = range_M[0];
|
||||
i = range_M[1] - range_M[0];
|
||||
range_M[0] = range_m[0];
|
||||
i = range_m[1] - range_m[0];
|
||||
}
|
||||
|
||||
num_cpu_m = 0;
|
||||
|
||||
@@ -71,16 +71,25 @@ int CNAME(int mode, blas_arg_t *arg, BLASLONG *range_m, BLASLONG *range_n, int (
|
||||
queue[num_cpu].args = arg;
|
||||
queue[num_cpu].range_m = range_m;
|
||||
queue[num_cpu].range_n = &range[num_cpu];
|
||||
queue[num_cpu].sa = NULL;
|
||||
#if 0 //defined(LOONGSON3A)
|
||||
queue[num_cpu].sa = sa + GEMM_OFFSET_A1 * num_cpu;
|
||||
queue[num_cpu].sb = queue[num_cpu].sa + GEMM_OFFSET_A1 * 5;
|
||||
#else
|
||||
queue[num_cpu].sa = NULL;
|
||||
queue[num_cpu].sb = NULL;
|
||||
#endif
|
||||
queue[num_cpu].next = &queue[num_cpu + 1];
|
||||
num_cpu ++;
|
||||
}
|
||||
|
||||
if (num_cpu) {
|
||||
#if 0 //defined(LOONGSON3A)
|
||||
queue[0].sa = sa;
|
||||
queue[0].sb = sb;
|
||||
|
||||
queue[0].sb = sa + GEMM_OFFSET_A1 * 5;
|
||||
#else
|
||||
queue[0].sa = sa;
|
||||
queue[0].sb = sb;
|
||||
#endif
|
||||
queue[num_cpu - 1].next = NULL;
|
||||
|
||||
exec_blas(num_cpu,
|
||||
|
||||
@@ -55,8 +55,8 @@ int CNAME(int mode,
|
||||
range_M[0] = 0;
|
||||
i = arg -> m;
|
||||
} else {
|
||||
range_M[0] = range_M[0];
|
||||
i = range_M[1] - range_M[0];
|
||||
range_M[0] = range_m[0];
|
||||
i = range_m[1] - range_m[0];
|
||||
}
|
||||
|
||||
num_cpu_m = 0;
|
||||
|
||||
@@ -332,7 +332,18 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
|
||||
#else
|
||||
for(jjs = js; jjs < js + min_j; jjs += min_jj){
|
||||
min_jj = min_j + js - jjs;
|
||||
if (min_jj > GEMM_UNROLL_N) min_jj = GEMM_UNROLL_N;
|
||||
|
||||
#if ( defined(BULLDOZER) || defined(PILEDRIVER) || defined(HASWELL) ) && defined(ARCH_X86_64) && !defined(XDOUBLE) && !defined(COMPLEX)
|
||||
if (min_jj >= 6*GEMM_UNROLL_N) min_jj = 6*GEMM_UNROLL_N;
|
||||
else
|
||||
if (min_jj >= 3*GEMM_UNROLL_N) min_jj = 3*GEMM_UNROLL_N;
|
||||
else
|
||||
if (min_jj > GEMM_UNROLL_N) min_jj = GEMM_UNROLL_N;
|
||||
#else
|
||||
|
||||
if (min_jj > GEMM_UNROLL_N) min_jj = GEMM_UNROLL_N;
|
||||
#endif
|
||||
|
||||
|
||||
START_RPCC();
|
||||
|
||||
|
||||
@@ -48,6 +48,12 @@
|
||||
#define SWITCH_RATIO 2
|
||||
#endif
|
||||
|
||||
//The array of job_t may overflow the stack.
|
||||
//Instead, use malloc to alloc job_t.
|
||||
#if MAX_CPU_NUMBER > BLAS3_MEM_ALLOC_THRESHOLD
|
||||
#define USE_ALLOC_HEAP
|
||||
#endif
|
||||
|
||||
#ifndef GEMM3M_LOCAL
|
||||
#if defined(NN)
|
||||
#define GEMM3M_LOCAL GEMM3M_NN
|
||||
@@ -836,7 +842,11 @@ static int gemm_driver(blas_arg_t *args, BLASLONG *range_m, BLASLONG
|
||||
BLASLONG range_M[MAX_CPU_NUMBER + 1];
|
||||
BLASLONG range_N[MAX_CPU_NUMBER + 1];
|
||||
|
||||
job_t job[MAX_CPU_NUMBER];
|
||||
#ifndef USE_ALLOC_HEAP
|
||||
job_t job[MAX_CPU_NUMBER];
|
||||
#else
|
||||
job_t * job = NULL;
|
||||
#endif
|
||||
|
||||
BLASLONG num_cpu_m, num_cpu_n;
|
||||
|
||||
@@ -866,6 +876,15 @@ static int gemm_driver(blas_arg_t *args, BLASLONG *range_m, BLASLONG
|
||||
newarg.alpha = args -> alpha;
|
||||
newarg.beta = args -> beta;
|
||||
newarg.nthreads = args -> nthreads;
|
||||
|
||||
#ifdef USE_ALLOC_HEAP
|
||||
job = (job_t*)malloc(MAX_CPU_NUMBER * sizeof(job_t));
|
||||
if(job==NULL){
|
||||
fprintf(stderr, "OpenBLAS: malloc failed in %s\n", __func__);
|
||||
exit(1);
|
||||
}
|
||||
#endif
|
||||
|
||||
newarg.common = (void *)job;
|
||||
|
||||
if (!range_m) {
|
||||
@@ -945,6 +964,10 @@ static int gemm_driver(blas_arg_t *args, BLASLONG *range_m, BLASLONG
|
||||
exec_blas(num_cpu_m, queue);
|
||||
}
|
||||
|
||||
#ifdef USE_ALLOC_HEAP
|
||||
free(job);
|
||||
#endif
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
@@ -48,6 +48,12 @@
|
||||
#define SWITCH_RATIO 2
|
||||
#endif
|
||||
|
||||
//The array of job_t may overflow the stack.
|
||||
//Instead, use malloc to alloc job_t.
|
||||
#if MAX_CPU_NUMBER > BLAS3_MEM_ALLOC_THRESHOLD
|
||||
#define USE_ALLOC_HEAP
|
||||
#endif
|
||||
|
||||
#ifndef SYRK_LOCAL
|
||||
#if !defined(LOWER) && !defined(TRANS)
|
||||
#define SYRK_LOCAL SYRK_UN
|
||||
@@ -502,7 +508,12 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
|
||||
|
||||
blas_arg_t newarg;
|
||||
|
||||
#ifndef USE_ALLOC_HEAP
|
||||
job_t job[MAX_CPU_NUMBER];
|
||||
#else
|
||||
job_t * job = NULL;
|
||||
#endif
|
||||
|
||||
blas_queue_t queue[MAX_CPU_NUMBER];
|
||||
|
||||
BLASLONG range[MAX_CPU_NUMBER + 100];
|
||||
@@ -556,6 +567,15 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
|
||||
newarg.ldc = args -> ldc;
|
||||
newarg.alpha = args -> alpha;
|
||||
newarg.beta = args -> beta;
|
||||
|
||||
#ifdef USE_ALLOC_HEAP
|
||||
job = (job_t*)malloc(MAX_CPU_NUMBER * sizeof(job_t));
|
||||
if(job==NULL){
|
||||
fprintf(stderr, "OpenBLAS: malloc failed in %s\n", __func__);
|
||||
exit(1);
|
||||
}
|
||||
#endif
|
||||
|
||||
newarg.common = (void *)job;
|
||||
|
||||
if (!range_n) {
|
||||
@@ -668,6 +688,9 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
|
||||
exec_blas(num_cpu, queue);
|
||||
}
|
||||
|
||||
#ifdef USE_ALLOC_HEAP
|
||||
free(job);
|
||||
#endif
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -48,6 +48,12 @@
|
||||
#define SWITCH_RATIO 2
|
||||
#endif
|
||||
|
||||
//The array of job_t may overflow the stack.
|
||||
//Instead, use malloc to alloc job_t.
|
||||
#if MAX_CPU_NUMBER > BLAS3_MEM_ALLOC_THRESHOLD
|
||||
#define USE_ALLOC_HEAP
|
||||
#endif
|
||||
|
||||
#ifndef GEMM_LOCAL
|
||||
#if defined(NN)
|
||||
#define GEMM_LOCAL GEMM_NN
|
||||
@@ -360,8 +366,18 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
|
||||
|
||||
for(jjs = xxx; jjs < MIN(n_to, xxx + div_n); jjs += min_jj){
|
||||
min_jj = MIN(n_to, xxx + div_n) - jjs;
|
||||
|
||||
#if ( defined(BULLDOZER) || defined(PILEDRIVER) || defined(HASWELL) ) && defined(ARCH_X86_64) && !defined(XDOUBLE) && !defined(COMPLEX)
|
||||
if (min_jj >= 6*GEMM_UNROLL_N) min_jj = 6*GEMM_UNROLL_N;
|
||||
else
|
||||
if (min_jj >= 3*GEMM_UNROLL_N) min_jj = 3*GEMM_UNROLL_N;
|
||||
else
|
||||
if (min_jj > GEMM_UNROLL_N) min_jj = GEMM_UNROLL_N;
|
||||
#else
|
||||
|
||||
if (min_jj > GEMM_UNROLL_N) min_jj = GEMM_UNROLL_N;
|
||||
|
||||
#endif
|
||||
|
||||
START_RPCC();
|
||||
|
||||
OCOPY_OPERATION(min_l, min_jj, b, ldb, ls, jjs,
|
||||
@@ -519,7 +535,12 @@ static int gemm_driver(blas_arg_t *args, BLASLONG *range_m, BLASLONG
|
||||
|
||||
blas_arg_t newarg;
|
||||
|
||||
#ifndef USE_ALLOC_HEAP
|
||||
job_t job[MAX_CPU_NUMBER];
|
||||
#else
|
||||
job_t * job = NULL;
|
||||
#endif
|
||||
|
||||
blas_queue_t queue[MAX_CPU_NUMBER];
|
||||
|
||||
BLASLONG range_M[MAX_CPU_NUMBER + 1];
|
||||
@@ -563,6 +584,15 @@ static int gemm_driver(blas_arg_t *args, BLASLONG *range_m, BLASLONG
|
||||
newarg.alpha = args -> alpha;
|
||||
newarg.beta = args -> beta;
|
||||
newarg.nthreads = args -> nthreads;
|
||||
|
||||
#ifdef USE_ALLOC_HEAP
|
||||
job = (job_t*)malloc(MAX_CPU_NUMBER * sizeof(job_t));
|
||||
if(job==NULL){
|
||||
fprintf(stderr, "OpenBLAS: malloc failed in %s\n", __func__);
|
||||
exit(1);
|
||||
}
|
||||
#endif
|
||||
|
||||
newarg.common = (void *)job;
|
||||
|
||||
#ifdef PARAMTEST
|
||||
@@ -634,7 +664,7 @@ static int gemm_driver(blas_arg_t *args, BLASLONG *range_m, BLASLONG
|
||||
|
||||
num_cpu_n ++;
|
||||
}
|
||||
|
||||
|
||||
for (j = 0; j < num_cpu_m; j++) {
|
||||
for (i = 0; i < num_cpu_m; i++) {
|
||||
for (k = 0; k < DIVIDE_RATE; k++) {
|
||||
@@ -648,6 +678,10 @@ static int gemm_driver(blas_arg_t *args, BLASLONG *range_m, BLASLONG
|
||||
exec_blas(num_cpu_m, queue);
|
||||
}
|
||||
|
||||
#ifdef USE_ALLOC_HEAP
|
||||
free(job);
|
||||
#endif
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
@@ -1,12 +1,12 @@
|
||||
TOPDIR = ../..
|
||||
include ../../Makefile.system
|
||||
|
||||
COMMONOBJS = memory.$(SUFFIX) xerbla.$(SUFFIX) c_abs.$(SUFFIX) z_abs.$(SUFFIX)
|
||||
COMMONOBJS = memory.$(SUFFIX) xerbla.$(SUFFIX) c_abs.$(SUFFIX) z_abs.$(SUFFIX) openblas_set_num_threads.$(SUFFIX) openblas_get_config.$(SUFFIX) openblas_get_parallel.$(SUFFIX)
|
||||
|
||||
COMMONOBJS += slamch.$(SUFFIX) slamc3.$(SUFFIX) dlamch.$(SUFFIX) dlamc3.$(SUFFIX)
|
||||
|
||||
ifdef SMP
|
||||
COMMONOBJS += blas_server.$(SUFFIX) divtable.$(SUFFIX) blasL1thread.$(SUFFIX) openblas_set_num_threads.$(SUFFIX)
|
||||
COMMONOBJS += blas_server.$(SUFFIX) divtable.$(SUFFIX) blasL1thread.$(SUFFIX)
|
||||
ifndef NO_AFFINITY
|
||||
COMMONOBJS += init.$(SUFFIX)
|
||||
endif
|
||||
@@ -14,7 +14,7 @@ endif
|
||||
|
||||
# COMMONOBJS += info.$(SUFFIX)
|
||||
|
||||
ifdef DYNAMIC_ARCH
|
||||
ifeq ($(DYNAMIC_ARCH), 1)
|
||||
COMMONOBJS += dynamic.$(SUFFIX)
|
||||
else
|
||||
COMMONOBJS += parameter.$(SUFFIX)
|
||||
@@ -70,7 +70,7 @@ ifndef BLAS_SERVER
|
||||
BLAS_SERVER = blas_server.c
|
||||
endif
|
||||
|
||||
ifdef DYNAMIC_ARCH
|
||||
ifeq ($(DYNAMIC_ARCH), 1)
|
||||
HPLOBJS = memory.$(SUFFIX) xerbla.$(SUFFIX) dynamic.$(SUFFIX)
|
||||
else
|
||||
HPLOBJS = memory.$(SUFFIX) xerbla.$(SUFFIX) parameter.$(SUFFIX)
|
||||
@@ -103,6 +103,12 @@ blas_server.$(SUFFIX) : $(BLAS_SERVER) ../../common.h ../../common_thread.h ../.
|
||||
openblas_set_num_threads.$(SUFFIX) : openblas_set_num_threads.c
|
||||
$(CC) $(CFLAGS) -c $< -o $(@F)
|
||||
|
||||
openblas_get_config.$(SUFFIX) : openblas_get_config.c
|
||||
$(CC) $(CFLAGS) -c $< -o $(@F)
|
||||
|
||||
openblas_get_parallel.$(SUFFIX) : openblas_get_parallel.c
|
||||
$(CC) $(CFLAGS) -c $< -o $(@F)
|
||||
|
||||
blasL1thread.$(SUFFIX) : blas_l1_thread.c ../../common.h ../../common_thread.h
|
||||
$(CC) $(CFLAGS) -c $< -o $(@F)
|
||||
|
||||
@@ -215,7 +221,7 @@ info.$(SUFFIX) : info.c info.h ../../common.h ../../param.h
|
||||
$(CC) $(CFLAGS) -c $< -o $(@F)
|
||||
|
||||
|
||||
hpl : CFLAGS += -DHPL
|
||||
hpl_p : CFLAGS += -DHPL
|
||||
hpl : override CFLAGS += -DHPL
|
||||
hpl_p : override CFLAGS += -DHPL
|
||||
|
||||
include $(TOPDIR)/Makefile.tail
|
||||
|
||||
@@ -385,6 +385,7 @@ static int blas_thread_server(void *arg){
|
||||
+ GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B);
|
||||
}
|
||||
}
|
||||
queue->sb=sb;
|
||||
}
|
||||
|
||||
#ifdef MONITOR
|
||||
@@ -435,7 +436,7 @@ static int blas_thread_server(void *arg){
|
||||
|
||||
blas_memory_free(buffer);
|
||||
|
||||
pthread_exit(NULL);
|
||||
//pthread_exit(NULL);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -500,6 +501,7 @@ static int blas_monitor(void *arg){
|
||||
/* Initializing routine */
|
||||
int blas_thread_init(void){
|
||||
BLASLONG i;
|
||||
int ret;
|
||||
#ifdef NEED_STACKATTR
|
||||
pthread_attr_t attr;
|
||||
#endif
|
||||
@@ -545,12 +547,16 @@ int blas_thread_init(void){
|
||||
pthread_cond_init (&thread_status[i].wakeup, NULL);
|
||||
|
||||
#ifdef NEED_STACKATTR
|
||||
pthread_create(&blas_threads[i], &attr,
|
||||
ret=pthread_create(&blas_threads[i], &attr,
|
||||
(void *)&blas_thread_server, (void *)i);
|
||||
#else
|
||||
pthread_create(&blas_threads[i], NULL,
|
||||
ret=pthread_create(&blas_threads[i], NULL,
|
||||
(void *)&blas_thread_server, (void *)i);
|
||||
#endif
|
||||
if(ret!=0){
|
||||
fprintf(STDERR,"OpenBLAS: pthread_creat error in blas_thread_init function. Error code:%d\n",ret);
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef MONITOR
|
||||
@@ -765,6 +771,19 @@ void goto_set_num_threads(int num_threads) {
|
||||
|
||||
if (num_threads < 1) num_threads = blas_num_threads;
|
||||
|
||||
#ifndef NO_AFFINITY
|
||||
if (num_threads == 1) {
|
||||
if (blas_cpu_number == 1){
|
||||
//OpenBLAS is already single thread.
|
||||
return;
|
||||
}else{
|
||||
//From multi-threads to single thread
|
||||
//Restore the original affinity mask
|
||||
gotoblas_set_affinity(-1);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
if (num_threads > MAX_CPU_NUMBER) num_threads = MAX_CPU_NUMBER;
|
||||
|
||||
if (num_threads > blas_num_threads) {
|
||||
@@ -795,8 +814,20 @@ void goto_set_num_threads(int num_threads) {
|
||||
UNLOCK_COMMAND(&server_lock);
|
||||
}
|
||||
|
||||
#ifndef NO_AFFINITY
|
||||
if(blas_cpu_number == 1 && num_threads > 1){
|
||||
//Restore the thread 0 affinity.
|
||||
gotoblas_set_affinity(0);
|
||||
}
|
||||
#endif
|
||||
|
||||
blas_cpu_number = num_threads;
|
||||
|
||||
#if defined(ARCH_MIPS64)
|
||||
//set parameters for different number of threads.
|
||||
blas_set_parameter();
|
||||
#endif
|
||||
|
||||
}
|
||||
|
||||
void openblas_set_num_threads(int num_threads) {
|
||||
|
||||
@@ -49,8 +49,12 @@
|
||||
|
||||
int blas_server_avail = 0;
|
||||
|
||||
static void * blas_thread_buffer[MAX_CPU_NUMBER];
|
||||
|
||||
void goto_set_num_threads(int num_threads) {
|
||||
|
||||
int i=0;
|
||||
|
||||
if (num_threads < 1) num_threads = blas_num_threads;
|
||||
|
||||
if (num_threads > MAX_CPU_NUMBER) num_threads = MAX_CPU_NUMBER;
|
||||
@@ -62,7 +66,24 @@ void goto_set_num_threads(int num_threads) {
|
||||
blas_cpu_number = num_threads;
|
||||
|
||||
omp_set_num_threads(blas_cpu_number);
|
||||
|
||||
|
||||
//adjust buffer for each thread
|
||||
for(i=0; i<blas_cpu_number; i++){
|
||||
if(blas_thread_buffer[i]==NULL){
|
||||
blas_thread_buffer[i]=blas_memory_alloc(2);
|
||||
}
|
||||
}
|
||||
for(; i<MAX_CPU_NUMBER; i++){
|
||||
if(blas_thread_buffer[i]!=NULL){
|
||||
blas_memory_free(blas_thread_buffer[i]);
|
||||
blas_thread_buffer[i]=NULL;
|
||||
}
|
||||
}
|
||||
#if defined(ARCH_MIPS64)
|
||||
//set parameters for different number of threads.
|
||||
blas_set_parameter();
|
||||
#endif
|
||||
|
||||
}
|
||||
void openblas_set_num_threads(int num_threads) {
|
||||
|
||||
@@ -71,17 +92,33 @@ void openblas_set_num_threads(int num_threads) {
|
||||
|
||||
int blas_thread_init(void){
|
||||
|
||||
int i=0;
|
||||
|
||||
blas_get_cpu_number();
|
||||
|
||||
blas_server_avail = 1;
|
||||
|
||||
for(i=0; i<blas_num_threads; i++){
|
||||
blas_thread_buffer[i]=blas_memory_alloc(2);
|
||||
}
|
||||
for(; i<MAX_CPU_NUMBER; i++){
|
||||
blas_thread_buffer[i]=NULL;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int BLASFUNC(blas_thread_shutdown)(void){
|
||||
|
||||
int i=0;
|
||||
blas_server_avail = 0;
|
||||
|
||||
for(i=0; i<MAX_CPU_NUMBER; i++){
|
||||
if(blas_thread_buffer[i]!=NULL){
|
||||
blas_memory_free(blas_thread_buffer[i]);
|
||||
blas_thread_buffer[i]=NULL;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -172,7 +209,8 @@ static void legacy_exec(void *func, int mode, blas_arg_t *args, void *sb){
|
||||
static void exec_threads(blas_queue_t *queue){
|
||||
|
||||
void *buffer, *sa, *sb;
|
||||
|
||||
int pos=0, release_flag=0;
|
||||
|
||||
buffer = NULL;
|
||||
sa = queue -> sa;
|
||||
sb = queue -> sb;
|
||||
@@ -184,9 +222,19 @@ static void exec_threads(blas_queue_t *queue){
|
||||
|
||||
if ((sa == NULL) && (sb == NULL) && ((queue -> mode & BLAS_PTHREAD) == 0)) {
|
||||
|
||||
buffer = blas_memory_alloc(2);
|
||||
pos = omp_get_thread_num();
|
||||
buffer = blas_thread_buffer[pos];
|
||||
|
||||
if (sa == NULL) sa = (void *)((BLASLONG)buffer + GEMM_OFFSET_A);
|
||||
//fallback
|
||||
if(buffer==NULL) {
|
||||
buffer = blas_memory_alloc(2);
|
||||
release_flag=1;
|
||||
}
|
||||
|
||||
if (sa == NULL) {
|
||||
sa = (void *)((BLASLONG)buffer + GEMM_OFFSET_A);
|
||||
queue->sa=sa;
|
||||
}
|
||||
|
||||
if (sb == NULL) {
|
||||
if (!(queue -> mode & BLAS_COMPLEX)){
|
||||
@@ -219,6 +267,7 @@ static void exec_threads(blas_queue_t *queue){
|
||||
+ GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B);
|
||||
}
|
||||
}
|
||||
queue->sb=sb;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -236,7 +285,7 @@ static void exec_threads(blas_queue_t *queue){
|
||||
|
||||
}
|
||||
|
||||
if (buffer != NULL) blas_memory_free(buffer);
|
||||
if (release_flag) blas_memory_free(buffer);
|
||||
|
||||
}
|
||||
|
||||
|
||||
@@ -63,6 +63,8 @@ static blas_pool_t pool;
|
||||
static HANDLE blas_threads [MAX_CPU_NUMBER];
|
||||
static DWORD blas_threads_id[MAX_CPU_NUMBER];
|
||||
|
||||
|
||||
|
||||
static void legacy_exec(void *func, int mode, blas_arg_t *args, void *sb){
|
||||
|
||||
if (!(mode & BLAS_COMPLEX)){
|
||||
@@ -179,7 +181,7 @@ static DWORD WINAPI blas_thread_server(void *arg){
|
||||
|
||||
do {
|
||||
action = WaitForMultipleObjects(2, handles, FALSE, INFINITE);
|
||||
} while ((action != WAIT_OBJECT_0) && (action == WAIT_OBJECT_0 + 1));
|
||||
} while ((action != WAIT_OBJECT_0) && (action != WAIT_OBJECT_0 + 1));
|
||||
|
||||
if (action == WAIT_OBJECT_0 + 1) break;
|
||||
|
||||
@@ -251,6 +253,7 @@ static DWORD WINAPI blas_thread_server(void *arg){
|
||||
+ GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B);
|
||||
}
|
||||
}
|
||||
queue->sb=sb;
|
||||
}
|
||||
|
||||
#ifdef MONITOR
|
||||
@@ -263,7 +266,9 @@ static DWORD WINAPI blas_thread_server(void *arg){
|
||||
} else {
|
||||
legacy_exec(routine, queue -> mode, queue -> args, sb);
|
||||
}
|
||||
}
|
||||
}else{
|
||||
continue; //if queue == NULL
|
||||
}
|
||||
|
||||
#ifdef SMP_DEBUG
|
||||
fprintf(STDERR, "Server[%2ld] Finished!\n", cpu);
|
||||
@@ -425,7 +430,7 @@ int exec_blas(BLASLONG num, blas_queue_t *queue){
|
||||
/* Shutdown procedure, but user don't have to call this routine. The */
|
||||
/* kernel automatically kill threads. */
|
||||
|
||||
int blas_thread_shutdown_(void){
|
||||
int BLASFUNC(blas_thread_shutdown)(void){
|
||||
|
||||
int i;
|
||||
|
||||
@@ -436,9 +441,10 @@ int blas_thread_shutdown_(void){
|
||||
if (blas_server_avail){
|
||||
|
||||
SetEvent(pool.killed);
|
||||
|
||||
for(i = 0; i < blas_cpu_number - 1; i++){
|
||||
WaitForSingleObject(blas_threads[i], INFINITE);
|
||||
printf("blas_num_threads=%d\n", blas_num_threads);
|
||||
for(i = 0; i < blas_num_threads - 1; i++){
|
||||
WaitForSingleObject(blas_threads[i], 5); //INFINITE);
|
||||
TerminateThread(blas_threads[i],0);
|
||||
}
|
||||
|
||||
blas_server_avail = 0;
|
||||
@@ -448,3 +454,47 @@ int blas_thread_shutdown_(void){
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void goto_set_num_threads(int num_threads)
|
||||
{
|
||||
long i;
|
||||
|
||||
if (num_threads < 1) num_threads = blas_cpu_number;
|
||||
|
||||
if (num_threads > MAX_CPU_NUMBER) num_threads = MAX_CPU_NUMBER;
|
||||
|
||||
if (num_threads > blas_num_threads) {
|
||||
|
||||
LOCK_COMMAND(&server_lock);
|
||||
|
||||
//increased_threads = 1;
|
||||
if (!blas_server_avail){
|
||||
|
||||
InitializeCriticalSection(&pool.lock);
|
||||
pool.filled = CreateEvent(NULL, FALSE, FALSE, NULL);
|
||||
pool.killed = CreateEvent(NULL, TRUE, FALSE, NULL);
|
||||
|
||||
pool.shutdown = 0;
|
||||
pool.queue = NULL;
|
||||
blas_server_avail = 1;
|
||||
}
|
||||
|
||||
for(i = blas_num_threads - 1; i < num_threads - 1; i++){
|
||||
|
||||
blas_threads[i] = CreateThread(NULL, 0,
|
||||
blas_thread_server, (void *)i,
|
||||
0, &blas_threads_id[i]);
|
||||
}
|
||||
|
||||
blas_num_threads = num_threads;
|
||||
|
||||
UNLOCK_COMMAND(&server_lock);
|
||||
}
|
||||
|
||||
blas_cpu_number = num_threads;
|
||||
}
|
||||
|
||||
void openblas_set_num_threads(int num)
|
||||
{
|
||||
goto_set_num_threads(num);
|
||||
}
|
||||
|
||||
@@ -60,6 +60,20 @@ extern gotoblas_t gotoblas_NEHALEM;
|
||||
extern gotoblas_t gotoblas_OPTERON;
|
||||
extern gotoblas_t gotoblas_OPTERON_SSE3;
|
||||
extern gotoblas_t gotoblas_BARCELONA;
|
||||
extern gotoblas_t gotoblas_BOBCAT;
|
||||
#ifndef NO_AVX
|
||||
extern gotoblas_t gotoblas_SANDYBRIDGE;
|
||||
extern gotoblas_t gotoblas_BULLDOZER;
|
||||
extern gotoblas_t gotoblas_PILEDRIVER;
|
||||
extern gotoblas_t gotoblas_HASWELL;
|
||||
#else
|
||||
//Use NEHALEM kernels for sandy bridge
|
||||
#define gotoblas_SANDYBRIDGE gotoblas_NEHALEM
|
||||
#define gotoblas_HASWELL gotoblas_NEHALEM
|
||||
#define gotoblas_BULLDOZER gotoblas_BARCELONA
|
||||
#define gotoblas_PILEDRIVER gotoblas_BARCELONA
|
||||
#endif
|
||||
|
||||
|
||||
#define VENDOR_INTEL 1
|
||||
#define VENDOR_AMD 2
|
||||
@@ -68,6 +82,32 @@ extern gotoblas_t gotoblas_BARCELONA;
|
||||
|
||||
#define BITMASK(a, b, c) ((((a) >> (b)) & (c)))
|
||||
|
||||
#ifndef NO_AVX
|
||||
static inline void xgetbv(int op, int * eax, int * edx){
|
||||
//Use binary code for xgetbv
|
||||
__asm__ __volatile__
|
||||
(".byte 0x0f, 0x01, 0xd0": "=a" (*eax), "=d" (*edx) : "c" (op) : "cc");
|
||||
}
|
||||
#endif
|
||||
|
||||
int support_avx(){
|
||||
#ifndef NO_AVX
|
||||
int eax, ebx, ecx, edx;
|
||||
int ret=0;
|
||||
|
||||
cpuid(1, &eax, &ebx, &ecx, &edx);
|
||||
if ((ecx & (1 << 28)) != 0 && (ecx & (1 << 27)) != 0 && (ecx & (1 << 26)) != 0){
|
||||
xgetbv(0, &eax, &edx);
|
||||
if((eax & 6) == 6){
|
||||
ret=1; //OS support AVX
|
||||
}
|
||||
}
|
||||
return ret;
|
||||
#else
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
static int get_vendor(void){
|
||||
int eax, ebx, ecx, edx;
|
||||
char vendor[13];
|
||||
@@ -122,15 +162,59 @@ static gotoblas_t *get_coretype(void){
|
||||
if (model == 12) return &gotoblas_ATOM;
|
||||
return NULL;
|
||||
|
||||
case 2:
|
||||
//Intel Core (Clarkdale) / Core (Arrandale)
|
||||
// Pentium (Clarkdale) / Pentium Mobile (Arrandale)
|
||||
// Xeon (Clarkdale), 32nm
|
||||
if (model == 5) return &gotoblas_NEHALEM;
|
||||
case 2:
|
||||
//Intel Core (Clarkdale) / Core (Arrandale)
|
||||
// Pentium (Clarkdale) / Pentium Mobile (Arrandale)
|
||||
// Xeon (Clarkdale), 32nm
|
||||
if (model == 5) return &gotoblas_NEHALEM;
|
||||
|
||||
//Intel Xeon Processor 5600 (Westmere-EP)
|
||||
if (model == 12) return &gotoblas_NEHALEM;
|
||||
return NULL;
|
||||
//Intel Xeon Processor 5600 (Westmere-EP)
|
||||
//Xeon Processor E7 (Westmere-EX)
|
||||
//Xeon E7540
|
||||
if (model == 12 || model == 14 || model == 15) return &gotoblas_NEHALEM;
|
||||
|
||||
//Intel Core i5-2000 /i7-2000 (Sandy Bridge)
|
||||
//Intel Core i7-3000 / Xeon E5
|
||||
if (model == 10 || model == 13) {
|
||||
if(support_avx())
|
||||
return &gotoblas_SANDYBRIDGE;
|
||||
else{
|
||||
fprintf(stderr, "OpenBLAS : Your OS does not support AVX instructions. OpenBLAS is using Nehalem kernels as a fallback, which may give poorer performance.\n");
|
||||
return &gotoblas_NEHALEM; //OS doesn't support AVX. Use old kernels.
|
||||
}
|
||||
}
|
||||
return NULL;
|
||||
case 3:
|
||||
//Intel Sandy Bridge 22nm (Ivy Bridge?)
|
||||
if (model == 10) {
|
||||
if(support_avx())
|
||||
return &gotoblas_SANDYBRIDGE;
|
||||
else{
|
||||
fprintf(stderr, "OpenBLAS : Your OS does not support AVX instructions. OpenBLAS is using Nehalem kernels as a fallback, which may give poorer performance.\n");
|
||||
return &gotoblas_NEHALEM; //OS doesn't support AVX. Use old kernels.
|
||||
}
|
||||
}
|
||||
//Intel Haswell
|
||||
if (model == 12) {
|
||||
if(support_avx())
|
||||
return &gotoblas_HASWELL;
|
||||
else{
|
||||
fprintf(stderr, "OpenBLAS : Your OS does not support AVX instructions. OpenBLAS is using Nehalem kernels as a fallback, which may give poorer performance.\n");
|
||||
return &gotoblas_NEHALEM; //OS doesn't support AVX. Use old kernels.
|
||||
}
|
||||
}
|
||||
return NULL;
|
||||
case 4:
|
||||
//Intel Haswell
|
||||
if (model == 5) {
|
||||
if(support_avx())
|
||||
return &gotoblas_HASWELL;
|
||||
else{
|
||||
fprintf(stderr, "OpenBLAS : Your OS does not support AVX instructions. OpenBLAS is using Nehalem kernels as a fallback, which may give poorer performance.\n");
|
||||
return &gotoblas_NEHALEM; //OS doesn't support AVX. Use old kernels.
|
||||
}
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
case 0xf:
|
||||
if (model <= 0x2) return &gotoblas_NORTHWOOD;
|
||||
@@ -139,12 +223,44 @@ static gotoblas_t *get_coretype(void){
|
||||
}
|
||||
|
||||
if (vendor == VENDOR_AMD){
|
||||
if (family <= 0xe) return &gotoblas_ATHLON;
|
||||
if (family <= 0xe) {
|
||||
// Verify that CPU has 3dnow and 3dnowext before claiming it is Athlon
|
||||
cpuid(0x80000000, &eax, &ebx, &ecx, &edx);
|
||||
if (eax & 0xffff >= 0x01) {
|
||||
cpuid(0x80000001, &eax, &ebx, &ecx, &edx);
|
||||
if ((edx & (1 << 30)) == 0 || (edx & (1 << 31)) == 0)
|
||||
return NULL;
|
||||
}
|
||||
else
|
||||
return NULL;
|
||||
|
||||
return &gotoblas_ATHLON;
|
||||
}
|
||||
if (family == 0xf){
|
||||
if ((exfamily == 0) || (exfamily == 2)) {
|
||||
if (ecx & (1 << 0)) return &gotoblas_OPTERON_SSE3;
|
||||
else return &gotoblas_OPTERON;
|
||||
} else {
|
||||
} else if (exfamily == 5) {
|
||||
return &gotoblas_BOBCAT;
|
||||
} else if (exfamily == 6) {
|
||||
if(model == 1){
|
||||
//AMD Bulldozer Opteron 6200 / Opteron 4200 / AMD FX-Series
|
||||
if(support_avx())
|
||||
return &gotoblas_BULLDOZER;
|
||||
else{
|
||||
fprintf(stderr, "OpenBLAS : Your OS does not support AVX instructions. OpenBLAS is using Barcelona kernels as a fallback, which may give poorer performance.\n");
|
||||
return &gotoblas_BARCELONA; //OS doesn't support AVX. Use old kernels.
|
||||
}
|
||||
}else if(model == 2){
|
||||
//AMD Bulldozer Opteron 6300 / Opteron 4300 / Opteron 3300
|
||||
if(support_avx())
|
||||
return &gotoblas_PILEDRIVER;
|
||||
else{
|
||||
fprintf(stderr, "OpenBLAS : Your OS does not support AVX instructions. OpenBLAS is using Barcelona kernels as a fallback, which may give poorer performance.\n");
|
||||
return &gotoblas_BARCELONA; //OS doesn't support AVX. Use old kernels.
|
||||
}
|
||||
}
|
||||
} else {
|
||||
return &gotoblas_BARCELONA;
|
||||
}
|
||||
}
|
||||
@@ -178,6 +294,11 @@ static char *corename[] = {
|
||||
"Opteron(SSE3)",
|
||||
"Barcelona",
|
||||
"Nano",
|
||||
"Sandybridge",
|
||||
"Bobcat",
|
||||
"Bulldozer",
|
||||
"Piledriver",
|
||||
"Haswell",
|
||||
};
|
||||
|
||||
char *gotoblas_corename(void) {
|
||||
@@ -197,7 +318,12 @@ char *gotoblas_corename(void) {
|
||||
if (gotoblas == &gotoblas_OPTERON) return corename[13];
|
||||
if (gotoblas == &gotoblas_BARCELONA) return corename[14];
|
||||
if (gotoblas == &gotoblas_NANO) return corename[15];
|
||||
|
||||
if (gotoblas == &gotoblas_SANDYBRIDGE) return corename[16];
|
||||
if (gotoblas == &gotoblas_BOBCAT) return corename[17];
|
||||
if (gotoblas == &gotoblas_BULLDOZER) return corename[18];
|
||||
if (gotoblas == &gotoblas_PILEDRIVER) return corename[19];
|
||||
if (gotoblas == &gotoblas_HASWELL) return corename[20];
|
||||
|
||||
return corename[0];
|
||||
}
|
||||
|
||||
@@ -211,12 +337,21 @@ void gotoblas_dynamic_init(void) {
|
||||
if (gotoblas == NULL) gotoblas = &gotoblas_KATMAI;
|
||||
#else
|
||||
if (gotoblas == NULL) gotoblas = &gotoblas_PRESCOTT;
|
||||
/* sanity check, if 64bit pointer we can't have a 32 bit cpu */
|
||||
if (sizeof(void*) == 8) {
|
||||
if (gotoblas == &gotoblas_KATMAI ||
|
||||
gotoblas == &gotoblas_COPPERMINE ||
|
||||
gotoblas == &gotoblas_NORTHWOOD ||
|
||||
gotoblas == &gotoblas_BANIAS ||
|
||||
gotoblas == &gotoblas_ATHLON)
|
||||
gotoblas = &gotoblas_PRESCOTT;
|
||||
}
|
||||
#endif
|
||||
|
||||
if (gotoblas && gotoblas -> init) {
|
||||
gotoblas -> init();
|
||||
} else {
|
||||
fprintf(stderr, "GotoBLAS : Architecture Initialization failed. No initialization function found.\n");
|
||||
fprintf(stderr, "OpenBLAS : Architecture Initialization failed. No initialization function found.\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*****************************************************************************
|
||||
Copyright (c) 2011, Lab of Parallel Software and Computational Science,ICSAS
|
||||
Copyright (c) 2011,2012 Lab of Parallel Software and Computational Science,ISCAS
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
@@ -82,9 +82,16 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#include <sched.h>
|
||||
#include <dirent.h>
|
||||
#include <dlfcn.h>
|
||||
#include <unistd.h>
|
||||
#include <string.h>
|
||||
|
||||
#define MAX_NODES 16
|
||||
#define MAX_CPUS 256
|
||||
#define NCPUBITS (8*sizeof(unsigned long))
|
||||
#define MAX_BITMASK_LEN (MAX_CPUS/NCPUBITS)
|
||||
#define CPUELT(cpu) ((cpu) / NCPUBITS)
|
||||
#define CPUMASK(cpu) ((unsigned long) 1UL << ((cpu) % NCPUBITS))
|
||||
|
||||
|
||||
#define SH_MAGIC 0x510510
|
||||
|
||||
@@ -103,10 +110,10 @@ typedef struct {
|
||||
int num_nodes;
|
||||
int num_procs;
|
||||
int final_num_procs;
|
||||
unsigned long avail;
|
||||
|
||||
unsigned long avail [MAX_BITMASK_LEN];
|
||||
int avail_count;
|
||||
unsigned long cpu_info [MAX_CPUS];
|
||||
unsigned long node_info [MAX_NODES];
|
||||
unsigned long node_info [MAX_NODES][MAX_BITMASK_LEN];
|
||||
int cpu_use[MAX_CPUS];
|
||||
|
||||
} shm_t;
|
||||
@@ -126,7 +133,8 @@ static shm_t *common = (void *)-1;
|
||||
static int shmid, pshmid;
|
||||
static void *paddr;
|
||||
|
||||
static unsigned long lprocmask, lnodemask;
|
||||
static unsigned long lprocmask[MAX_BITMASK_LEN], lnodemask;
|
||||
static int lprocmask_count = 0;
|
||||
static int numprocs = 1;
|
||||
static int numnodes = 1;
|
||||
|
||||
@@ -177,70 +185,114 @@ static inline int rcount(unsigned long number) {
|
||||
than sizeof(unsigned long). On 64 bits, the limit
|
||||
is 64. On 32 bits, it is 32.
|
||||
***/
|
||||
static inline unsigned long get_cpumap(int node) {
|
||||
static inline void get_cpumap(int node, unsigned long * node_info) {
|
||||
|
||||
int infile;
|
||||
unsigned long affinity;
|
||||
unsigned long affinity[32];
|
||||
char name[160];
|
||||
char cpumap[160];
|
||||
char *p, *dummy;
|
||||
char *dummy;
|
||||
int i=0;
|
||||
int count=0;
|
||||
int k=0;
|
||||
|
||||
sprintf(name, CPUMAP_NAME, node);
|
||||
|
||||
infile = open(name, O_RDONLY);
|
||||
for(i=0; i<32; i++){
|
||||
affinity[i] = 0;
|
||||
}
|
||||
|
||||
affinity = 0;
|
||||
|
||||
if (infile != -1) {
|
||||
|
||||
read(infile, cpumap, sizeof(cpumap));
|
||||
p = cpumap;
|
||||
while (*p != '\n' && i<160){
|
||||
if(*p != ',') {
|
||||
name[i++]=*p;
|
||||
|
||||
for(i=0; i<160; i++){
|
||||
if(cpumap[i] == '\n')
|
||||
break;
|
||||
if(cpumap[i] != ','){
|
||||
name[k++]=cpumap[i];
|
||||
|
||||
//Enough data for Hex
|
||||
if(k >= NCPUBITS/4){
|
||||
affinity[count++] = strtoul(name, &dummy, 16);
|
||||
k=0;
|
||||
}
|
||||
}
|
||||
p++;
|
||||
|
||||
}
|
||||
if(k!=0){
|
||||
name[k]='\0';
|
||||
affinity[count++] = strtoul(name, &dummy, 16);
|
||||
k=0;
|
||||
}
|
||||
// 0-63bit -> node_info[0], 64-128bit -> node_info[1] ....
|
||||
// revert the sequence
|
||||
for(i=0; i<count && i<MAX_BITMASK_LEN; i++){
|
||||
node_info[i]=affinity[count-i-1];
|
||||
}
|
||||
p = name;
|
||||
|
||||
// while ((*p == '0') || (*p == ',')) p++;
|
||||
|
||||
affinity = strtoul(p, &dummy, 16);
|
||||
|
||||
close(infile);
|
||||
}
|
||||
|
||||
return affinity;
|
||||
return ;
|
||||
}
|
||||
|
||||
static inline unsigned long get_share(int cpu, int level) {
|
||||
static inline void get_share(int cpu, int level, unsigned long * share) {
|
||||
|
||||
int infile;
|
||||
unsigned long affinity;
|
||||
unsigned long affinity[32];
|
||||
char cpumap[160];
|
||||
char name[160];
|
||||
char *p;
|
||||
|
||||
char *dummy;
|
||||
int count=0;
|
||||
int i=0,k=0;
|
||||
int bitmask_idx = 0;
|
||||
|
||||
sprintf(name, SHARE_NAME, cpu, level);
|
||||
|
||||
infile = open(name, O_RDONLY);
|
||||
|
||||
affinity = (1UL << cpu);
|
||||
|
||||
// Init share
|
||||
for(i=0; i<MAX_BITMASK_LEN; i++){
|
||||
share[i]=0;
|
||||
}
|
||||
bitmask_idx = CPUELT(cpu);
|
||||
share[bitmask_idx] = CPUMASK(cpu);
|
||||
|
||||
if (infile != -1) {
|
||||
|
||||
read(infile, name, sizeof(name));
|
||||
read(infile, cpumap, sizeof(cpumap));
|
||||
|
||||
for(i=0; i<160; i++){
|
||||
if(cpumap[i] == '\n')
|
||||
break;
|
||||
if(cpumap[i] != ','){
|
||||
name[k++]=cpumap[i];
|
||||
|
||||
//Enough data
|
||||
if(k >= NCPUBITS/4){
|
||||
affinity[count++] = strtoul(name, &dummy, 16);
|
||||
k=0;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
if(k!=0){
|
||||
name[k]='\0';
|
||||
affinity[count++] = strtoul(name, &dummy, 16);
|
||||
k=0;
|
||||
}
|
||||
// 0-63bit -> node_info[0], 64-128bit -> node_info[1] ....
|
||||
// revert the sequence
|
||||
for(i=0; i<count && i<MAX_BITMASK_LEN; i++){
|
||||
share[i]=affinity[count-i-1];
|
||||
}
|
||||
|
||||
p = name;
|
||||
|
||||
while ((*p == '0') || (*p == ',')) p++;
|
||||
|
||||
affinity = strtol(p, &p, 16);
|
||||
|
||||
close(infile);
|
||||
}
|
||||
|
||||
return affinity;
|
||||
return ;
|
||||
}
|
||||
|
||||
static int numa_check(void) {
|
||||
@@ -248,6 +300,7 @@ static int numa_check(void) {
|
||||
DIR *dp;
|
||||
struct dirent *dir;
|
||||
int node;
|
||||
int j;
|
||||
|
||||
common -> num_nodes = 0;
|
||||
|
||||
@@ -258,20 +311,22 @@ static int numa_check(void) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
for (node = 0; node < MAX_NODES; node ++) common -> node_info[node] = 0;
|
||||
for (node = 0; node < MAX_NODES; node ++) {
|
||||
for (j = 0; j<MAX_BITMASK_LEN; j++) common -> node_info[node][j] = 0;
|
||||
}
|
||||
|
||||
while ((dir = readdir(dp)) != NULL) {
|
||||
if (*(unsigned int *) dir -> d_name == 0x065646f6eU) {
|
||||
if (strncmp(dir->d_name, "node", 4)==0) {
|
||||
|
||||
node = atoi(&dir -> d_name[4]);
|
||||
|
||||
if (node > MAX_NODES) {
|
||||
fprintf(stderr, "\nGotoBLAS Warining : MAX_NODES (NUMA) is too small. Terminated.\n");
|
||||
fprintf(stderr, "\nOpenBLAS Warning : MAX_NODES (NUMA) is too small. Terminated.\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
common -> num_nodes ++;
|
||||
common -> node_info[node] = get_cpumap(node);
|
||||
get_cpumap(node, common->node_info[node]);
|
||||
|
||||
}
|
||||
}
|
||||
@@ -284,7 +339,7 @@ static int numa_check(void) {
|
||||
fprintf(stderr, "Numa found : number of Nodes = %2d\n", common -> num_nodes);
|
||||
|
||||
for (node = 0; node < common -> num_nodes; node ++)
|
||||
fprintf(stderr, "MASK (%2d) : %08lx\n", node, common -> node_info[node]);
|
||||
fprintf(stderr, "MASK (%2d) : %08lx\n", node, common -> node_info[node][0]);
|
||||
#endif
|
||||
|
||||
return common -> num_nodes;
|
||||
@@ -296,11 +351,13 @@ static void numa_mapping(void) {
|
||||
int i, j, h;
|
||||
unsigned long work, bit;
|
||||
int count = 0;
|
||||
int bitmask_idx = 0;
|
||||
|
||||
for (node = 0; node < common -> num_nodes; node ++) {
|
||||
core = 0;
|
||||
for (cpu = 0; cpu < common -> num_procs; cpu ++) {
|
||||
if (common -> node_info[node] & common -> avail & (1UL << cpu)) {
|
||||
bitmask_idx = CPUELT(cpu);
|
||||
if (common -> node_info[node][bitmask_idx] & common -> avail[bitmask_idx] & CPUMASK(cpu)) {
|
||||
common -> cpu_info[count] = WRITE_CORE(core) | WRITE_NODE(node) | WRITE_CPU(cpu);
|
||||
count ++;
|
||||
core ++;
|
||||
@@ -357,58 +414,92 @@ static void numa_mapping(void) {
|
||||
|
||||
static void disable_hyperthread(void) {
|
||||
|
||||
unsigned long share;
|
||||
unsigned long share[MAX_BITMASK_LEN];
|
||||
int cpu;
|
||||
int bitmask_idx = 0;
|
||||
int i=0, count=0;
|
||||
bitmask_idx = CPUELT(common -> num_procs);
|
||||
|
||||
if(common->num_procs > 64){
|
||||
fprintf(stderr, "\nOpenBLAS Warining : The number of CPU/Cores(%d) is beyond the limit(64). Terminated.\n", common->num_procs);
|
||||
exit(1);
|
||||
}else if(common->num_procs == 64){
|
||||
common -> avail = 0xFFFFFFFFFFFFFFFFUL;
|
||||
}else
|
||||
common -> avail = (1UL << common -> num_procs) - 1;
|
||||
for(i=0; i< bitmask_idx; i++){
|
||||
common -> avail[count++] = 0xFFFFFFFFFFFFFFFFUL;
|
||||
}
|
||||
if(CPUMASK(common -> num_procs) != 1){
|
||||
common -> avail[count++] = CPUMASK(common -> num_procs) - 1;
|
||||
}
|
||||
common -> avail_count = count;
|
||||
|
||||
/* if(common->num_procs > 64){ */
|
||||
/* fprintf(stderr, "\nOpenBLAS Warning : The number of CPU/Cores(%d) is beyond the limit(64). Terminated.\n", common->num_procs); */
|
||||
/* exit(1); */
|
||||
/* }else if(common->num_procs == 64){ */
|
||||
/* common -> avail = 0xFFFFFFFFFFFFFFFFUL; */
|
||||
/* }else */
|
||||
/* common -> avail = (1UL << common -> num_procs) - 1; */
|
||||
|
||||
#ifdef DEBUG
|
||||
fprintf(stderr, "\nAvail CPUs : %04lx.\n", common -> avail);
|
||||
fprintf(stderr, "\nAvail CPUs : ");
|
||||
for(i=0; i<count; i++)
|
||||
fprintf(stderr, "%04lx ", common -> avail[i]);
|
||||
fprintf(stderr, ".\n");
|
||||
#endif
|
||||
|
||||
for (cpu = 0; cpu < common -> num_procs; cpu ++) {
|
||||
|
||||
share = (get_share(cpu, 1) & common -> avail);
|
||||
|
||||
if (popcount(share) > 1) {
|
||||
|
||||
get_share(cpu, 1, share);
|
||||
|
||||
//When the shared cpu are in different element of share & avail array, this may be a bug.
|
||||
for (i = 0; i < count ; i++){
|
||||
|
||||
share[i] &= common->avail[i];
|
||||
|
||||
if (popcount(share[i]) > 1) {
|
||||
|
||||
#ifdef DEBUG
|
||||
fprintf(stderr, "Detected Hyper Threading on CPU %4x; disabled CPU %04lx.\n",
|
||||
cpu, share & ~(1UL << cpu));
|
||||
fprintf(stderr, "Detected Hyper Threading on CPU %4x; disabled CPU %04lx.\n",
|
||||
cpu, share[i] & ~(CPUMASK(cpu)));
|
||||
#endif
|
||||
|
||||
common -> avail &= ~((share & ~(1UL << cpu)));
|
||||
common -> avail[i] &= ~((share[i] & ~ CPUMASK(cpu)));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void disable_affinity(void) {
|
||||
|
||||
int i=0;
|
||||
int bitmask_idx=0;
|
||||
int count=0;
|
||||
#ifdef DEBUG
|
||||
fprintf(stderr, "Final all available CPUs : %04lx.\n\n", common -> avail);
|
||||
fprintf(stderr, "Final all available CPUs : %04lx.\n\n", common -> avail[0]);
|
||||
fprintf(stderr, "CPU mask : %04lx.\n\n", *(unsigned long *)&cpu_orig_mask[0]);
|
||||
#endif
|
||||
|
||||
if(common->final_num_procs > 64){
|
||||
fprintf(stderr, "\nOpenBLAS Warining : The number of CPU/Cores(%d) is beyond the limit(64). Terminated.\n", common->final_num_procs);
|
||||
exit(1);
|
||||
}else if(common->final_num_procs == 64){
|
||||
lprocmask = 0xFFFFFFFFFFFFFFFFUL;
|
||||
}else
|
||||
lprocmask = (1UL << common -> final_num_procs) - 1;
|
||||
/* if(common->final_num_procs > 64){ */
|
||||
/* fprintf(stderr, "\nOpenBLAS Warining : The number of CPU/Cores(%d) is beyond the limit(64). Terminated.\n", common->final_num_procs); */
|
||||
/* exit(1); */
|
||||
/* }else if(common->final_num_procs == 64){ */
|
||||
/* lprocmask = 0xFFFFFFFFFFFFFFFFUL; */
|
||||
/* }else */
|
||||
/* lprocmask = (1UL << common -> final_num_procs) - 1; */
|
||||
|
||||
bitmask_idx = CPUELT(common -> final_num_procs);
|
||||
|
||||
for(i=0; i< bitmask_idx; i++){
|
||||
lprocmask[count++] = 0xFFFFFFFFFFFFFFFFUL;
|
||||
}
|
||||
if(CPUMASK(common -> final_num_procs) != 1){
|
||||
lprocmask[count++] = CPUMASK(common -> final_num_procs) - 1;
|
||||
}
|
||||
lprocmask_count = count;
|
||||
|
||||
#ifndef USE_OPENMP
|
||||
lprocmask &= *(unsigned long *)&cpu_orig_mask[0];
|
||||
for(i=0; i< count; i++){
|
||||
lprocmask[i] &= common->avail[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef DEBUG
|
||||
fprintf(stderr, "I choose these CPUs : %04lx.\n\n", lprocmask);
|
||||
fprintf(stderr, "I choose these CPUs : %04lx.\n\n", lprocmask[0]);
|
||||
#endif
|
||||
|
||||
}
|
||||
@@ -498,7 +589,7 @@ static void create_pshmem(void) {
|
||||
static void local_cpu_map(void) {
|
||||
|
||||
int cpu, id, mapping;
|
||||
|
||||
int bitmask_idx = 0;
|
||||
cpu = 0;
|
||||
mapping = 0;
|
||||
|
||||
@@ -508,8 +599,9 @@ static void local_cpu_map(void) {
|
||||
if (id > 0) {
|
||||
if (is_dead(id)) common -> cpu_use[cpu] = 0;
|
||||
}
|
||||
|
||||
if ((common -> cpu_use[cpu] == 0) && (lprocmask & (1UL << cpu))) {
|
||||
|
||||
bitmask_idx = CPUELT(cpu);
|
||||
if ((common -> cpu_use[cpu] == 0) && (lprocmask[bitmask_idx] & CPUMASK(cpu))) {
|
||||
|
||||
common -> cpu_use[cpu] = pshmid;
|
||||
cpu_mapping[mapping] = READ_CPU(common -> cpu_info[cpu]);
|
||||
@@ -595,6 +687,7 @@ void gotoblas_affinity_init(void) {
|
||||
#ifndef USE_OPENMP
|
||||
cpu_set_t cpu_mask;
|
||||
#endif
|
||||
int i;
|
||||
|
||||
if (initialized) return;
|
||||
|
||||
@@ -644,7 +737,13 @@ void gotoblas_affinity_init(void) {
|
||||
fprintf(stderr, "Shared Memory Initialization.\n");
|
||||
#endif
|
||||
|
||||
common -> num_procs = get_nprocs();
|
||||
//returns the number of processors which are currently online
|
||||
common -> num_procs = sysconf(_SC_NPROCESSORS_ONLN);;
|
||||
|
||||
if(common -> num_procs > MAX_CPUS) {
|
||||
fprintf(stderr, "\nOpenBLAS Warining : The number of CPU/Cores(%d) is beyond the limit(%d). Terminated.\n", common->num_procs, MAX_CPUS);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
for (cpu = 0; cpu < common -> num_procs; cpu++) common -> cpu_info[cpu] = cpu;
|
||||
|
||||
@@ -654,7 +753,8 @@ void gotoblas_affinity_init(void) {
|
||||
|
||||
if (common -> num_nodes > 1) numa_mapping();
|
||||
|
||||
common -> final_num_procs = popcount(common -> avail);
|
||||
common -> final_num_procs = 0;
|
||||
for(i = 0; i < common -> avail_count; i++) common -> final_num_procs += rcount(common -> avail[i]) + 1; //Make the max cpu number.
|
||||
|
||||
for (cpu = 0; cpu < common -> final_num_procs; cpu ++) common -> cpu_use[cpu] = 0;
|
||||
|
||||
@@ -664,7 +764,8 @@ void gotoblas_affinity_init(void) {
|
||||
|
||||
disable_affinity();
|
||||
|
||||
num_avail = popcount(lprocmask);
|
||||
num_avail = 0;
|
||||
for(i=0; i<lprocmask_count; i++) num_avail += popcount(lprocmask[i]);
|
||||
|
||||
if ((numprocs <= 0) || (numprocs > num_avail)) numprocs = num_avail;
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*****************************************************************************
|
||||
Copyright (c) 2011, Lab of Parallel Software and Computational Science,ICSAS
|
||||
Copyright (c) 2011,2012 Lab of Parallel Software and Computational Science,ISCAS
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
@@ -103,8 +103,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#include <sys/syscall.h>
|
||||
#endif
|
||||
|
||||
#if defined(OS_FreeBSD) || defined(OS_Darwin)
|
||||
#if defined(OS_FREEBSD) || defined(OS_DARWIN)
|
||||
#include <sys/sysctl.h>
|
||||
#include <sys/resource.h>
|
||||
#endif
|
||||
|
||||
#if defined(OS_WINDOWS) && (defined(__MINGW32__) || defined(__MINGW64__))
|
||||
@@ -125,7 +126,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#define NO_WARMUP
|
||||
#endif
|
||||
|
||||
#ifdef ALLOC_HUGETLB
|
||||
#ifndef SHM_HUGETLB
|
||||
#define SHM_HUGETLB 04000
|
||||
#endif
|
||||
|
||||
@@ -185,7 +186,7 @@ int get_num_procs(void) {
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(OS_FreeBSD) || defined(OS_Darwin)
|
||||
#if defined(OS_FREEBSD)
|
||||
|
||||
int get_num_procs(void) {
|
||||
|
||||
@@ -206,7 +207,46 @@ int get_num_procs(void) {
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(OS_DARWIN)
|
||||
int get_num_procs(void) {
|
||||
static int nums = 0;
|
||||
size_t len;
|
||||
if (nums == 0){
|
||||
len = sizeof(int);
|
||||
sysctlbyname("hw.physicalcpu", &nums, &len, NULL, 0);
|
||||
}
|
||||
return nums;
|
||||
}
|
||||
/*
|
||||
void set_stack_limit(int limitMB){
|
||||
int result=0;
|
||||
struct rlimit rl;
|
||||
rlim_t StackSize;
|
||||
|
||||
StackSize=limitMB*1024*1024;
|
||||
result=getrlimit(RLIMIT_STACK, &rl);
|
||||
if(result==0){
|
||||
if(rl.rlim_cur < StackSize){
|
||||
rl.rlim_cur=StackSize;
|
||||
result=setrlimit(RLIMIT_STACK, &rl);
|
||||
if(result !=0){
|
||||
fprintf(stderr, "OpenBLAS: set stack limit error =%d\n", result);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
*/
|
||||
#endif
|
||||
|
||||
/*
|
||||
OpenBLAS uses the numbers of CPU cores in multithreading.
|
||||
It can be set by openblas_set_num_threads(int num_threads);
|
||||
*/
|
||||
int blas_cpu_number = 0;
|
||||
/*
|
||||
The numbers of threads in the thread pool.
|
||||
This value is equal or large than blas_cpu_number. This means some threads are sleep.
|
||||
*/
|
||||
int blas_num_threads = 0;
|
||||
|
||||
int goto_get_num_procs (void) {
|
||||
@@ -215,7 +255,7 @@ int goto_get_num_procs (void) {
|
||||
|
||||
int blas_get_cpu_number(void){
|
||||
char *p;
|
||||
#if defined(OS_LINUX) || defined(OS_WINDOWS) || defined(OS_FreeBSD) || defined(OS_Darwin)
|
||||
#if defined(OS_LINUX) || defined(OS_WINDOWS) || defined(OS_FREEBSD) || defined(OS_DARWIN)
|
||||
int max_num;
|
||||
#endif
|
||||
int blas_goto_num = 0;
|
||||
@@ -223,7 +263,7 @@ int blas_get_cpu_number(void){
|
||||
|
||||
if (blas_num_threads) return blas_num_threads;
|
||||
|
||||
#if defined(OS_LINUX) || defined(OS_WINDOWS) || defined(OS_FreeBSD) || defined(OS_Darwin)
|
||||
#if defined(OS_LINUX) || defined(OS_WINDOWS) || defined(OS_FREEBSD) || defined(OS_DARWIN)
|
||||
max_num = get_num_procs();
|
||||
#endif
|
||||
|
||||
@@ -250,7 +290,7 @@ int blas_get_cpu_number(void){
|
||||
else if (blas_omp_num > 0) blas_num_threads = blas_omp_num;
|
||||
else blas_num_threads = MAX_CPU_NUMBER;
|
||||
|
||||
#if defined(OS_LINUX) || defined(OS_WINDOWS) || defined(OS_FreeBSD) || defined(OS_Darwin)
|
||||
#if defined(OS_LINUX) || defined(OS_WINDOWS) || defined(OS_FREEBSD) || defined(OS_DARWIN)
|
||||
if (blas_num_threads > max_num) blas_num_threads = max_num;
|
||||
#endif
|
||||
|
||||
@@ -323,7 +363,7 @@ static void *alloc_mmap(void *address){
|
||||
#define BENCH_ITERATION 4
|
||||
#define SCALING 2
|
||||
|
||||
static inline BLASULONG run_bench(BLASULONG address, long size) {
|
||||
static inline BLASULONG run_bench(BLASULONG address, BLASULONG size) {
|
||||
|
||||
BLASULONG original, *p;
|
||||
BLASULONG start, stop, min;
|
||||
@@ -390,11 +430,11 @@ static void *alloc_mmap(void *address){
|
||||
|
||||
#ifdef OS_LINUX
|
||||
#ifdef DEBUG
|
||||
int ret;
|
||||
int ret=0;
|
||||
ret=my_mbind(map_address, BUFFER_SIZE * SCALING, MPOL_PREFERRED, NULL, 0, 0);
|
||||
if(ret==-1){
|
||||
int errsv=errno;
|
||||
perror("alloc_mmap:");
|
||||
perror("OpenBLAS alloc_mmap:");
|
||||
printf("error code=%d,\tmap_address=%lx\n",errsv,map_address);
|
||||
}
|
||||
|
||||
@@ -410,12 +450,12 @@ static void *alloc_mmap(void *address){
|
||||
current = (SCALING - 1) * BUFFER_SIZE;
|
||||
|
||||
while(current > 0) {
|
||||
*(long *)start = (long)start + PAGESIZE;
|
||||
*(BLASLONG *)start = (BLASLONG)start + PAGESIZE;
|
||||
start += PAGESIZE;
|
||||
current -= PAGESIZE;
|
||||
}
|
||||
|
||||
*(long *)(start - PAGESIZE) = (BLASULONG)map_address;
|
||||
*(BLASLONG *)(start - PAGESIZE) = (BLASULONG)map_address;
|
||||
|
||||
start = (BLASULONG)map_address;
|
||||
|
||||
@@ -884,7 +924,7 @@ void *blas_memory_alloc(int procpos){
|
||||
if (!blas_num_threads) blas_cpu_number = blas_get_cpu_number();
|
||||
#endif
|
||||
|
||||
#if defined(ARCH_X86) || defined(ARCH_X86_64) || defined(ARCH_IA64)
|
||||
#if defined(ARCH_X86) || defined(ARCH_X86_64) || defined(ARCH_IA64) || defined(ARCH_MIPS64)
|
||||
#ifndef DYNAMIC_ARCH
|
||||
blas_set_parameter();
|
||||
#endif
|
||||
@@ -1128,9 +1168,9 @@ static BLASULONG init_lock = 0UL;
|
||||
static void _touch_memory(blas_arg_t *arg, BLASLONG *range_m, BLASLONG *range_n,
|
||||
void *sa, void *sb, BLASLONG pos) {
|
||||
|
||||
#ifndef ARCH_POWER
|
||||
#if !defined(ARCH_POWER) && !defined(ARCH_SPARC)
|
||||
|
||||
long size;
|
||||
size_t size;
|
||||
BLASULONG buffer;
|
||||
|
||||
size = BUFFER_SIZE - PAGESIZE;
|
||||
@@ -1228,6 +1268,7 @@ void CONSTRUCTOR gotoblas_init(void) {
|
||||
|
||||
if (gotoblas_initialized) return;
|
||||
|
||||
|
||||
#ifdef PROFILE
|
||||
moncontrol (0);
|
||||
#endif
|
||||
@@ -1289,6 +1330,7 @@ void DESTRUCTOR gotoblas_quit(void) {
|
||||
moncontrol (1);
|
||||
#endif
|
||||
|
||||
blas_shutdown();
|
||||
}
|
||||
|
||||
#if (defined(C_PGI) || (!defined(C_SUN) && defined(F_INTERFACE_SUN))) && (defined(ARCH_X86) || defined(ARCH_X86_64))
|
||||
|
||||
59
driver/others/openblas_get_config.c
Normal file
59
driver/others/openblas_get_config.c
Normal file
@@ -0,0 +1,59 @@
|
||||
/*****************************************************************************
|
||||
Copyright (c) 2011,2012 Lab of Parallel Software and Computational Science,ISCAS
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the ISCAS nor the names of its contributors may
|
||||
be used to endorse or promote products derived from this software
|
||||
without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
**********************************************************************************/
|
||||
|
||||
#include "common.h"
|
||||
|
||||
static char* openblas_config_str=""
|
||||
#ifdef USE64BITINT
|
||||
"USE64BITINT "
|
||||
#endif
|
||||
#ifdef NO_CBLAS
|
||||
"NO_CBLAS "
|
||||
#endif
|
||||
#ifdef NO_LAPACK
|
||||
"NO_LAPACK "
|
||||
#endif
|
||||
#ifdef NO_LAPACKE
|
||||
"NO_LAPACKE "
|
||||
#endif
|
||||
#ifdef DYNAMIC_ARCH
|
||||
"DYNAMIC_ARCH "
|
||||
#endif
|
||||
#ifdef NO_AFFINITY
|
||||
"NO_AFFINITY "
|
||||
#endif
|
||||
;
|
||||
|
||||
char* CNAME() {
|
||||
return openblas_config_str;
|
||||
}
|
||||
|
||||
52
driver/others/openblas_get_parallel.c
Normal file
52
driver/others/openblas_get_parallel.c
Normal file
@@ -0,0 +1,52 @@
|
||||
/*****************************************************************************
|
||||
Copyright (c) 2013 Martin Koehler, grisuthedragon@users.github.com
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the ISCAS nor the names of its contributors may
|
||||
be used to endorse or promote products derived from this software
|
||||
without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
**********************************************************************************/
|
||||
|
||||
#include "common.h"
|
||||
|
||||
#if defined(USE_OPENMP)
|
||||
static int parallel = 2 ;
|
||||
#elif defined(SMP_SERVER)
|
||||
static int parallel = 1;
|
||||
#else
|
||||
static int parallel = 0;
|
||||
#endif
|
||||
|
||||
int CNAME() {
|
||||
return parallel;
|
||||
}
|
||||
|
||||
int NAME() {
|
||||
return parallel;
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*****************************************************************************
|
||||
Copyright (c) 2011, Lab of Parallel Software and Computational Science,ICSAS
|
||||
Copyright (c) 2011,2012 Lab of Parallel Software and Computational Science,ISCAS
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
@@ -33,13 +33,20 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#include "common.h"
|
||||
|
||||
#ifdef SMP_SERVER
|
||||
#ifdef OS_LINUX
|
||||
|
||||
extern void openblas_set_num_threads(int num_threads) ;
|
||||
|
||||
void NAME(int* num_threads){
|
||||
void openblas_set_num_threads_(int* num_threads){
|
||||
openblas_set_num_threads(*num_threads);
|
||||
}
|
||||
|
||||
#endif
|
||||
#else
|
||||
//Single thread
|
||||
|
||||
void openblas_set_num_threads(int num_threads) {
|
||||
}
|
||||
|
||||
void openblas_set_num_threads_(int* num_threads){
|
||||
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -45,8 +45,22 @@ int get_L2_size(void);
|
||||
#define DEFAULT_GEMM_P 128
|
||||
#define DEFAULT_GEMM_Q 128
|
||||
#define DEFAULT_GEMM_R 128
|
||||
#define DEFAULT_GEMM_OFFSET_A 0
|
||||
#define DEFAULT_GEMM_OFFSET_B 0
|
||||
|
||||
/* Global Parameter */
|
||||
#if GEMM_OFFSET_A == gemm_offset_a
|
||||
BLASLONG gemm_offset_a = DEFAULT_GEMM_OFFSET_A;
|
||||
#else
|
||||
BLASLONG gemm_offset_a = GEMM_OFFSET_A;
|
||||
#endif
|
||||
|
||||
#if GEMM_OFFSET_B == gemm_offset_b
|
||||
BLASLONG gemm_offset_b = DEFAULT_GEMM_OFFSET_B;
|
||||
#else
|
||||
BLASLONG gemm_offset_b = GEMM_OFFSET_B;
|
||||
#endif
|
||||
|
||||
#if SGEMM_P == sgemm_p
|
||||
BLASLONG sgemm_p = DEFAULT_GEMM_P;
|
||||
#else
|
||||
@@ -149,9 +163,9 @@ int get_L2_size(void){
|
||||
|
||||
int eax, ebx, ecx, edx;
|
||||
|
||||
#if defined(ATHLON) || defined(OPTERON) || defined(BARCELONA) || \
|
||||
#if defined(ATHLON) || defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) || \
|
||||
defined(CORE_PRESCOTT) || defined(CORE_CORE2) || defined(PENRYN) || defined(DUNNINGTON) || \
|
||||
defined(CORE_NEHALEM) || defined(ATOM) || defined(GENERIC)
|
||||
defined(CORE_NEHALEM) || defined(CORE_SANDYBRIDGE) || defined(ATOM) || defined(GENERIC)
|
||||
|
||||
cpuid(0x80000006, &eax, &ebx, &ecx, &edx);
|
||||
|
||||
@@ -370,6 +384,17 @@ void blas_set_parameter(void){
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(SANDYBRIDGE)
|
||||
sgemm_p = 1024;
|
||||
dgemm_p = 512;
|
||||
cgemm_p = 512;
|
||||
zgemm_p = 256;
|
||||
#ifdef EXPRECISION
|
||||
qgemm_p = 256;
|
||||
xgemm_p = 128;
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(CORE_PRESCOTT) || defined(GENERIC)
|
||||
size >>= 6;
|
||||
|
||||
@@ -421,7 +446,7 @@ void blas_set_parameter(void){
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(CORE_BARCELONA)
|
||||
#if defined(CORE_BARCELONA) || defined(CORE_BOBCAT)
|
||||
size >>= 8;
|
||||
|
||||
sgemm_p = 232 * size;
|
||||
@@ -666,3 +691,36 @@ void blas_set_parameter(void){
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(ARCH_MIPS64)
|
||||
void blas_set_parameter(void){
|
||||
#if defined(LOONGSON3A)
|
||||
#ifdef SMP
|
||||
if(blas_num_threads == 1){
|
||||
#endif
|
||||
//single thread
|
||||
dgemm_r = 1024;
|
||||
#ifdef SMP
|
||||
}else{
|
||||
//multi thread
|
||||
dgemm_r = 200;
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(LOONGSON3B)
|
||||
#ifdef SMP
|
||||
if(blas_num_threads == 1 || blas_num_threads == 2){
|
||||
#endif
|
||||
//single thread
|
||||
dgemm_r = 640;
|
||||
#ifdef SMP
|
||||
}else{
|
||||
//multi thread
|
||||
dgemm_r = 160;
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -10,10 +10,31 @@ ifndef NO_CBLAS
|
||||
NO_CBLAS = 0
|
||||
endif
|
||||
|
||||
ifndef NO_LAPACK
|
||||
NO_LAPACK = 0
|
||||
endif
|
||||
|
||||
ifndef NO_LAPACKE
|
||||
NO_LAPACKE = 0
|
||||
endif
|
||||
|
||||
ifndef NEED2UNDERSCORES
|
||||
NEED2UNDERSCORES=0
|
||||
endif
|
||||
|
||||
ifndef ONLY_CBLAS
|
||||
ONLY_CBLAS = 0
|
||||
endif
|
||||
|
||||
ifeq ($(OSNAME), WINNT)
|
||||
ifeq ($(F_COMPILER), GFORTRAN)
|
||||
EXTRALIB += -lgfortran
|
||||
endif
|
||||
ifeq ($(USE_OPENMP), 1)
|
||||
ifeq ($(C_COMPILER), GCC)
|
||||
EXTRALIB += -lgomp
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(OSNAME), CYGWIN_NT)
|
||||
@@ -58,34 +79,39 @@ dll : ../$(LIBDLLNAME)
|
||||
|
||||
dll2 : libgoto2_shared.dll
|
||||
|
||||
../$(LIBDLLNAME) : ../$(LIBNAME) libgoto2.def dllinit.$(SUFFIX)
|
||||
# On Windows, we only generate a DLL without a version suffix. This is because
|
||||
# applications which link against the dynamic library reference a fixed DLL name
|
||||
# in their import table. By instead using a stable name it is possible to
|
||||
# upgrade between library versions, without needing to re-link an application.
|
||||
# For more details see: https://github.com/xianyi/OpenBLAS/issues/127.
|
||||
../$(LIBDLLNAME) : ../$(LIBNAME) libopenblas.def dllinit.$(SUFFIX)
|
||||
$(RANLIB) ../$(LIBNAME)
|
||||
ifeq ($(BINARY32), 1)
|
||||
$(DLLWRAP) -o ../$(LIBDLLNAME) --def libgoto2.def \
|
||||
--entry _dllinit@12 -s dllinit.$(SUFFIX) --dllname $(@F) ../$(LIBNAME) $(FEXTRALIB)
|
||||
-lib /machine:i386 /def:libgoto2.def
|
||||
$(DLLWRAP) -o ../$(LIBDLLNAME) --def libopenblas.def \
|
||||
--entry _dllinit@12 -s dllinit.$(SUFFIX) --dllname $(@F) ../$(LIBNAME) $(EXTRALIB)
|
||||
-lib /machine:i386 /def:libopenblas.def
|
||||
else
|
||||
$(DLLWRAP) -o ../$(LIBDLLNAME) --def libgoto2.def \
|
||||
--entry _dllinit -s dllinit.$(SUFFIX) --dllname $(@F) ../$(LIBNAME) $(FEXTRALIB)
|
||||
-lib /machine:X64 /def:libgoto2.def
|
||||
$(DLLWRAP) -o ../$(LIBDLLNAME) --def libopenblas.def \
|
||||
--entry $(FU)dllinit -s dllinit.$(SUFFIX) --dllname $(@F) ../$(LIBNAME) $(EXTRALIB)
|
||||
-lib /machine:X64 /def:libopenblas.def
|
||||
endif
|
||||
|
||||
libgoto2_shared.dll : ../$(LIBNAME) libgoto2_shared.def
|
||||
$(CC) $(CFLAGS) libgoto2_shared.def -shared -o $(@F) \
|
||||
$(CC) $(CFLAGS) $(LDFLAGS) libgoto2_shared.def -shared -o $(@F) \
|
||||
-Wl,--whole-archive ../$(LIBNAME) -Wl,--no-whole-archive \
|
||||
-Wl,--out-implib,libgoto2_shared.lib $(FEXTRALIB)
|
||||
|
||||
libgoto2.def : gensymbol
|
||||
perl ./gensymbol win2k $(ARCH) dummy $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) > $(@F)
|
||||
libopenblas.def : gensymbol
|
||||
perl ./gensymbol win2k $(ARCH) dummy $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) > $(@F)
|
||||
|
||||
libgoto2_shared.def : gensymbol
|
||||
perl ./gensymbol win2k $(ARCH) dummy $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) > $(@F)
|
||||
perl ./gensymbol win2k $(ARCH) dummy $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) > $(@F)
|
||||
|
||||
libgoto_hpl.def : gensymbol
|
||||
perl ./gensymbol win2khpl $(ARCH) dummy $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) > $(@F)
|
||||
perl ./gensymbol win2khpl $(ARCH) dummy $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) > $(@F)
|
||||
|
||||
$(LIBDYNNAME) : ../$(LIBNAME) osx.def
|
||||
$(PREFIX)gcc $(CFLAGS) -all_load -dynamiclib -o ../$(LIBDYNNAME) $< -Wl,-exported_symbols_list,osx.def $(FEXTRALIB)
|
||||
$(FC) $(FFLAGS) -all_load -headerpad_max_install_names -install_name $(CURDIR)/../$(LIBDYNNAME) -dynamiclib -o ../$(LIBDYNNAME) $< -Wl,-exported_symbols_list,osx.def $(FEXTRALIB)
|
||||
|
||||
symbol.$(SUFFIX) : symbol.S
|
||||
$(CC) $(CFLAGS) -c -o $(@F) $^
|
||||
@@ -98,23 +124,32 @@ ifeq ($(OSNAME), Linux)
|
||||
so : ../$(LIBSONAME)
|
||||
|
||||
../$(LIBSONAME) : ../$(LIBNAME) linux.def linktest.c
|
||||
$(CC) $(CFLAGS) -shared -o ../$(LIBSONAME) \
|
||||
ifneq ($(C_COMPILER), LSB)
|
||||
$(CC) $(CFLAGS) $(LDFLAGS) -shared -o ../$(LIBSONAME) \
|
||||
-Wl,--whole-archive ../$(LIBNAME) -Wl,--no-whole-archive \
|
||||
-Wl,--retain-symbols-file=linux.def $(EXTRALIB)
|
||||
$(CC) $(CFLAGS) -w -o linktest linktest.c ../$(LIBSONAME) $(FEXTRALIB) && echo OK.
|
||||
-Wl,-soname,$(LIBPREFIX).so.$(MAJOR_VERSION) $(EXTRALIB)
|
||||
$(CC) $(CFLAGS) $(LDFLAGS) -w -o linktest linktest.c ../$(LIBSONAME) $(FEXTRALIB) && echo OK.
|
||||
else
|
||||
#for LSB
|
||||
env LSBCC_SHAREDLIBS=gfortran $(CC) $(CFLAGS) $(LDFLAGS) -shared -o ../$(LIBSONAME) \
|
||||
-Wl,--whole-archive ../$(LIBNAME) -Wl,--no-whole-archive \
|
||||
-Wl,-soname,$(LIBPREFIX).so.$(MAJOR_VERSION) $(EXTRALIB)
|
||||
$(FC) $(CFLAGS) $(LDFLAGS) -w -o linktest linktest.c ../$(LIBSONAME) $(FEXTRALIB) && echo OK.
|
||||
endif
|
||||
rm -f linktest
|
||||
|
||||
endif
|
||||
|
||||
ifeq ($(OSNAME), FreeBSD)
|
||||
#http://stackoverflow.com/questions/7656425/makefile-ifeq-logical-or
|
||||
ifeq ($(OSNAME), $(filter $(OSNAME),FreeBSD NetBSD))
|
||||
|
||||
so : ../$(LIBSONAME)
|
||||
|
||||
../$(LIBSONAME) : ../$(LIBNAME) linux.def linktest.c
|
||||
$(CC) $(CFLAGS) -shared -o ../$(LIBSONAME) \
|
||||
$(CC) $(CFLAGS) $(LDFLAGS) -shared -o ../$(LIBSONAME) \
|
||||
-Wl,--whole-archive ../$(LIBNAME) -Wl,--no-whole-archive \
|
||||
-Wl,--retain-symbols-file=linux.def $(EXTRALIB)
|
||||
$(CC) $(CFLAGS) -w -o linktest linktest.c ../$(LIBSONAME) $(FEXTRALIB) && echo OK.
|
||||
$(FEXTRALIB) $(EXTRALIB)
|
||||
$(CC) $(CFLAGS) $(LDFLAGS) -w -o linktest linktest.c ../$(LIBSONAME) $(FEXTRALIB) && echo OK.
|
||||
rm -f linktest
|
||||
|
||||
endif
|
||||
@@ -124,15 +159,15 @@ ifeq ($(OSNAME), OSF1)
|
||||
so : ../$(LIBSONAME)
|
||||
|
||||
../$(LIBSONAME) :
|
||||
$(CC) -shared -o ../$(LIBSONAME) ../$(LIBNAME)
|
||||
$(CC) $(CFLAGS) $(LDFLAGS) -shared -o ../$(LIBSONAME) ../$(LIBNAME)
|
||||
endif
|
||||
|
||||
ifeq ($(OSNAME), SunOS)
|
||||
|
||||
so : ../$(LIBSONAME)
|
||||
$(CC) $(CFLAGS) -shared -o ../$(LIBSONAME) \
|
||||
$(CC) $(CFLAGS) $(LDFLAGS) -shared -o ../$(LIBSONAME) \
|
||||
-Wl,--whole-archive ../$(LIBNAME) -Wl,--no-whole-archive $(EXTRALIB)
|
||||
$(CC) $(CFLAGS) -w -o linktest linktest.c ../$(LIBSONAME) $(FEXTRALIB) && echo OK.
|
||||
$(CC) $(CFLAGS) $(LDFLAGS) -w -o linktest linktest.c ../$(LIBSONAME) $(FEXTRALIB) && echo OK.
|
||||
rm -f linktest
|
||||
|
||||
endif
|
||||
@@ -163,23 +198,23 @@ static : ../$(LIBNAME)
|
||||
rm -f goto.$(SUFFIX)
|
||||
|
||||
linux.def : gensymbol ../Makefile.system ../getarch.c
|
||||
perl ./gensymbol linux $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) > $(@F)
|
||||
perl ./gensymbol linux $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) > $(@F)
|
||||
|
||||
osx.def : gensymbol ../Makefile.system ../getarch.c
|
||||
perl ./gensymbol osx $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) > $(@F)
|
||||
perl ./gensymbol osx $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) > $(@F)
|
||||
|
||||
aix.def : gensymbol ../Makefile.system ../getarch.c
|
||||
perl ./gensymbol aix $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) > $(@F)
|
||||
perl ./gensymbol aix $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) > $(@F)
|
||||
|
||||
symbol.S : gensymbol
|
||||
perl ./gensymbol win2kasm noarch dummy $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) > symbol.S
|
||||
perl ./gensymbol win2kasm noarch dummy $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) > symbol.S
|
||||
|
||||
test : linktest.c
|
||||
$(CC) $(CFLAGS) -w -o linktest linktest.c ../$(LIBSONAME) -lm && echo OK.
|
||||
$(CC) $(CFLAGS) $(LDFLAGS) -w -o linktest linktest.c ../$(LIBSONAME) -lm && echo OK.
|
||||
rm -f linktest
|
||||
|
||||
linktest.c : gensymbol ../Makefile.system ../getarch.c
|
||||
perl ./gensymbol linktest $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) > linktest.c
|
||||
perl ./gensymbol linktest $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) > linktest.c
|
||||
|
||||
clean ::
|
||||
@rm -f *.def *.dylib __.SYMDEF*
|
||||
|
||||
2730
exports/gensymbol
2730
exports/gensymbol
File diff suppressed because it is too large
Load Diff
42
f_check
42
f_check
@@ -24,7 +24,7 @@ $compiler = "" if $compiler eq "f77";
|
||||
|
||||
if ($compiler eq "") {
|
||||
|
||||
@lists = ("f77", "g77", "g95", "gfortran", "frt", "fort", "openf90", "openf95",
|
||||
@lists = ("g77", "g95", "gfortran", "frt", "fort", "openf90", "openf95",
|
||||
"sunf77", "sunf90", "sunf95",
|
||||
"xlf95", "xlf90", "xlf",
|
||||
"ppuf77", "ppuf95", "ppuf90", "ppuxlf",
|
||||
@@ -32,11 +32,12 @@ if ($compiler eq "") {
|
||||
"pgf95", "pgf90", "pgf77",
|
||||
"ifort");
|
||||
|
||||
OUTER:
|
||||
foreach $lists (@lists) {
|
||||
foreach $path (@path) {
|
||||
if (-f $path . "/" . $lists) {
|
||||
if (-x $path . "/" . $lists) {
|
||||
$compiler = $lists;
|
||||
break;
|
||||
last OUTER;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -113,6 +114,12 @@ if ($compiler eq "") {
|
||||
$vendor = IBM;
|
||||
$openmp = "-openmp";
|
||||
}
|
||||
|
||||
# for embeded underscore name, e.g. zho_ge, it may append 2 underscores.
|
||||
$data = `$compiler -O2 -S ftest3.f > /dev/null 2>&1 && cat ftest3.s && rm -f ftest3.s`;
|
||||
if ($data =~ /zho_ge__/) {
|
||||
$need2bu = 1;
|
||||
}
|
||||
}
|
||||
|
||||
if ($vendor eq "") {
|
||||
@@ -210,6 +217,10 @@ if (!$?) {
|
||||
if ($?) {
|
||||
$link = `$compiler $openmp -q32 -v ftest2.f 2>&1 && rm -f a.out a.exe`;
|
||||
}
|
||||
#For gfortran MIPS
|
||||
if ($?) {
|
||||
$link = `$compiler $openmp -mabi=n32 -v ftest2.f 2>&1 && rm -f a.out a.exe`;
|
||||
}
|
||||
$binary = "" if ($?);
|
||||
}
|
||||
|
||||
@@ -218,6 +229,10 @@ if (!$?) {
|
||||
if ($?) {
|
||||
$link = `$compiler $openmp -q64 -v ftest2.f 2>&1 && rm -f a.out a.exe`;
|
||||
}
|
||||
#For gfortran MIPS
|
||||
if ($?) {
|
||||
$link = `$compiler $openmp -mabi=64 -v ftest2.f 2>&1 && rm -f a.out a.exe`;
|
||||
}
|
||||
$binary = "" if ($?);
|
||||
}
|
||||
|
||||
@@ -236,7 +251,11 @@ if ($link ne "") {
|
||||
|
||||
$link =~ s/\-rpath\s+/\-rpath\@/g;
|
||||
|
||||
$link =~ s/\-rpath-link\s+/\-rpath-link\@/g;
|
||||
|
||||
@flags = split(/[\s\,\n]/, $link);
|
||||
# remove leading and trailing quotes from each flag.
|
||||
@flags = map {s/^['"]|['"]$//g; $_} @flags;
|
||||
|
||||
foreach $flags (@flags) {
|
||||
if (
|
||||
@@ -254,7 +273,15 @@ if ($link ne "") {
|
||||
$linker_L .= "-Wl,". $flags . " ";
|
||||
}
|
||||
|
||||
if ($flags =~ /^\-rpath/) {
|
||||
if ($flags =~ /^\-rpath\@/) {
|
||||
$flags =~ s/\@/\,/g;
|
||||
if ($vendor eq "PGI") {
|
||||
$flags =~ s/lib$/libso/;
|
||||
}
|
||||
$linker_L .= "-Wl,". $flags . " " ;
|
||||
}
|
||||
|
||||
if ($flags =~ /^\-rpath-link\@/) {
|
||||
$flags =~ s/\@/\,/g;
|
||||
if ($vendor eq "PGI") {
|
||||
$flags =~ s/lib$/libso/;
|
||||
@@ -284,6 +311,10 @@ if ($link ne "") {
|
||||
|
||||
}
|
||||
|
||||
if ($vendor eq "INTEL"){
|
||||
$linker_a .= "-lgfortran"
|
||||
}
|
||||
|
||||
open(MAKEFILE, ">> $makefile") || die "Can't append $makefile";
|
||||
open(CONFFILE, ">> $config" ) || die "Can't append $config";
|
||||
|
||||
@@ -294,6 +325,9 @@ print MAKEFILE "NOFORTRAN=1\n" if $nofortran == 1;
|
||||
|
||||
print CONFFILE "#define BUNDERSCORE\t$bu\n" if $bu ne "";
|
||||
print CONFFILE "#define NEEDBUNDERSCORE\t1\n" if $bu ne "";
|
||||
print CONFFILE "#define NEED2UNDERSCORES\t1\n" if $need2bu ne "";
|
||||
|
||||
print MAKEFILE "NEED2UNDERSCORES=1\n" if $need2bu ne "";
|
||||
|
||||
if (($linker_l ne "") || ($linker_a ne "")) {
|
||||
print MAKEFILE "FEXTRALIB=$linker_L $linker_l $linker_a\n";
|
||||
|
||||
6
ftest3.f
Normal file
6
ftest3.f
Normal file
@@ -0,0 +1,6 @@
|
||||
double complex function zho_ge()
|
||||
|
||||
zho_ge = (0.0d0,0.0d0)
|
||||
|
||||
return
|
||||
end
|
||||
267
getarch.c
267
getarch.c
@@ -1,5 +1,5 @@
|
||||
/*****************************************************************************
|
||||
Copyright (c) 2011, Lab of Parallel Software and Computational Science,ICSAS
|
||||
Copyright (c) 2011,2012 Lab of Parallel Software and Computational Science,ISCAS
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
@@ -83,6 +83,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#endif
|
||||
#ifdef linux
|
||||
#include <sys/sysinfo.h>
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
|
||||
/* #define FORCE_P2 */
|
||||
@@ -96,12 +97,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
/* #define FORCE_PENRYN */
|
||||
/* #define FORCE_DUNNINGTON */
|
||||
/* #define FORCE_NEHALEM */
|
||||
/* #define FORCE_SANDYBRIDGE */
|
||||
/* #define FORCE_ATOM */
|
||||
/* #define FORCE_ATHLON */
|
||||
/* #define FORCE_OPTERON */
|
||||
/* #define FORCE_OPTERON_SSE3 */
|
||||
/* #define FORCE_BARCELONA */
|
||||
/* #define FORCE_SHANGHAI */
|
||||
/* #define FORCE_ISTANBUL */
|
||||
/* #define FORCE_BOBCAT */
|
||||
/* #define FORCE_BULLDOZER */
|
||||
/* #define FORCE_PILEDRIVER */
|
||||
/* #define FORCE_SSE_GENERIC */
|
||||
/* #define FORCE_VIAC3 */
|
||||
/* #define FORCE_NANO */
|
||||
@@ -116,11 +122,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
/* #define FORCE_PPC440FP2 */
|
||||
/* #define FORCE_CELL */
|
||||
/* #define FORCE_SICORTEX */
|
||||
/* #define FORCE_LOONGSON3A */
|
||||
/* #define FORCE_LOONGSON3A */
|
||||
/* #define FORCE_LOONGSON3B */
|
||||
/* #define FORCE_ITANIUM2 */
|
||||
/* #define FORCE_GENERIC */
|
||||
/* #define FORCE_SPARC */
|
||||
/* #define FORCE_SPARCV7 */
|
||||
/* #define FORCE_GENERIC */
|
||||
|
||||
#ifdef FORCE_P2
|
||||
#define FORCE
|
||||
@@ -130,26 +137,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#define ARCHCONFIG "-DPENTIUM2 " \
|
||||
"-DL1_DATA_SIZE=16384 -DL1_DATA_LINESIZE=32 " \
|
||||
"-DL2_SIZE=512488 -DL2_LINESIZE=32 " \
|
||||
"-DDTB_ENTRIES=64 -DDTB_SIZE=4096 " \
|
||||
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \
|
||||
"-DHAVE_CMOV -DHAVE_MMX"
|
||||
#define LIBNAME "p2"
|
||||
#define CORENAME "P5"
|
||||
#endif
|
||||
|
||||
#ifdef FORCE_COPPERMINE
|
||||
#define FORCE
|
||||
#define FORCE_INTEL
|
||||
#define ARCHITECTURE "X86"
|
||||
#define SUBARCHITECTURE "PENTIUM3"
|
||||
#define ARCHCONFIG "-DPENTIUM3 " \
|
||||
"-DL1_DATA_SIZE=16384 -DL1_DATA_LINESIZE=32 " \
|
||||
"-DL2_SIZE=262144 -DL2_LINESIZE=32 " \
|
||||
"-DDTB_ENTRIES=64 -DDTB_SIZE=4096 " \
|
||||
"-DHAVE_CMOV -DHAVE_MMX -DHAVE_SSE "
|
||||
#define LIBNAME "coppermine"
|
||||
#define CORENAME "COPPERMINE"
|
||||
#endif
|
||||
|
||||
#ifdef FORCE_KATMAI
|
||||
#define FORCE
|
||||
#define FORCE_INTEL
|
||||
@@ -158,12 +151,26 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#define ARCHCONFIG "-DPENTIUM3 " \
|
||||
"-DL1_DATA_SIZE=16384 -DL1_DATA_LINESIZE=32 " \
|
||||
"-DL2_SIZE=524288 -DL2_LINESIZE=32 " \
|
||||
"-DDTB_ENTRIES=64 -DDTB_SIZE=4096 " \
|
||||
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \
|
||||
"-DHAVE_CMOV -DHAVE_MMX -DHAVE_SSE "
|
||||
#define LIBNAME "katmai"
|
||||
#define CORENAME "KATMAI"
|
||||
#endif
|
||||
|
||||
#ifdef FORCE_COPPERMINE
|
||||
#define FORCE
|
||||
#define FORCE_INTEL
|
||||
#define ARCHITECTURE "X86"
|
||||
#define SUBARCHITECTURE "PENTIUM3"
|
||||
#define ARCHCONFIG "-DPENTIUM3 " \
|
||||
"-DL1_DATA_SIZE=16384 -DL1_DATA_LINESIZE=32 " \
|
||||
"-DL2_SIZE=262144 -DL2_LINESIZE=32 " \
|
||||
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \
|
||||
"-DHAVE_CMOV -DHAVE_MMX -DHAVE_SSE "
|
||||
#define LIBNAME "coppermine"
|
||||
#define CORENAME "COPPERMINE"
|
||||
#endif
|
||||
|
||||
#ifdef FORCE_NORTHWOOD
|
||||
#define FORCE
|
||||
#define FORCE_INTEL
|
||||
@@ -172,7 +179,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#define ARCHCONFIG "-DPENTIUM4 " \
|
||||
"-DL1_DATA_SIZE=8192 -DL1_DATA_LINESIZE=64 " \
|
||||
"-DL2_SIZE=524288 -DL2_LINESIZE=64 " \
|
||||
"-DDTB_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 " \
|
||||
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 " \
|
||||
"-DHAVE_CMOV -DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 "
|
||||
#define LIBNAME "northwood"
|
||||
#define CORENAME "NORTHWOOD"
|
||||
@@ -186,7 +193,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#define ARCHCONFIG "-DPENTIUM4 " \
|
||||
"-DL1_DATA_SIZE=16384 -DL1_DATA_LINESIZE=64 " \
|
||||
"-DL2_SIZE=1048576 -DL2_LINESIZE=64 " \
|
||||
"-DDTB_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 " \
|
||||
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 " \
|
||||
"-DHAVE_CMOV -DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3"
|
||||
#define LIBNAME "prescott"
|
||||
#define CORENAME "PRESCOTT"
|
||||
@@ -200,7 +207,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#define ARCHCONFIG "-DPENTIUMM " \
|
||||
"-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 " \
|
||||
"-DL2_SIZE=1048576 -DL2_LINESIZE=64 " \
|
||||
"-DDTB_ENTRIES=64 -DDTB_SIZE=4096 " \
|
||||
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \
|
||||
"-DHAVE_CMOV -DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 "
|
||||
#define LIBNAME "banias"
|
||||
#define CORENAME "BANIAS"
|
||||
@@ -214,7 +221,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#define ARCHCONFIG "-DPENTIUMM " \
|
||||
"-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 " \
|
||||
"-DL2_SIZE=1048576 -DL2_LINESIZE=64 " \
|
||||
"-DDTB_ENTRIES=64 -DDTB_SIZE=4096 " \
|
||||
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \
|
||||
"-DHAVE_CMOV -DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 "
|
||||
#define LIBNAME "yonah"
|
||||
#define CORENAME "YONAH"
|
||||
@@ -228,7 +235,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#define ARCHCONFIG "-DCORE2 " \
|
||||
"-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 " \
|
||||
"-DL2_SIZE=1048576 -DL2_LINESIZE=64 " \
|
||||
"-DDTB_ENTRIES=256 -DDTB_SIZE=4096 " \
|
||||
"-DDTB_DEFAULT_ENTRIES=256 -DDTB_SIZE=4096 " \
|
||||
"-DHAVE_CMOV -DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3 -DHAVE_SSSE3"
|
||||
#define LIBNAME "core2"
|
||||
#define CORENAME "CORE2"
|
||||
@@ -242,7 +249,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#define ARCHCONFIG "-DPENRYN " \
|
||||
"-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 " \
|
||||
"-DL2_SIZE=1048576 -DL2_LINESIZE=64 " \
|
||||
"-DDTB_ENTRIES=256 -DDTB_SIZE=4096 " \
|
||||
"-DDTB_DEFAULT_ENTRIES=256 -DDTB_SIZE=4096 " \
|
||||
"-DHAVE_CMOV -DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3 -DHAVE_SSSE3 -DHAVE_SSE4_1"
|
||||
#define LIBNAME "penryn"
|
||||
#define CORENAME "PENRYN"
|
||||
@@ -257,7 +264,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
"-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 " \
|
||||
"-DL2_SIZE=1048576 -DL2_LINESIZE=64 " \
|
||||
"-DL3_SIZE=16777216 -DL3_LINESIZE=64 " \
|
||||
"-DDTB_ENTRIES=256 -DDTB_SIZE=4096 " \
|
||||
"-DDTB_DEFAULT_ENTRIES=256 -DDTB_SIZE=4096 " \
|
||||
"-DHAVE_CMOV -DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3 -DHAVE_SSSE3 -DHAVE_SSE4_1"
|
||||
#define LIBNAME "dunnington"
|
||||
#define CORENAME "DUNNINGTON"
|
||||
@@ -271,12 +278,41 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#define ARCHCONFIG "-DNEHALEM " \
|
||||
"-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 " \
|
||||
"-DL2_SIZE=262144 -DL2_LINESIZE=64 " \
|
||||
"-DDTB_ENTRIES=64 -DDTB_SIZE=4096 " \
|
||||
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \
|
||||
"-DHAVE_CMOV -DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3 -DHAVE_SSSE3 -DHAVE_SSE4_1 -DHAVE_SSE4_2"
|
||||
#define LIBNAME "nehalem"
|
||||
#define CORENAME "NEHALEM"
|
||||
#endif
|
||||
|
||||
#ifdef FORCE_SANDYBRIDGE
|
||||
#define FORCE
|
||||
#define FORCE_INTEL
|
||||
#define ARCHITECTURE "X86"
|
||||
#define SUBARCHITECTURE "SANDYBRIDGE"
|
||||
#define ARCHCONFIG "-DSANDYBRIDGE " \
|
||||
"-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 " \
|
||||
"-DL2_SIZE=262144 -DL2_LINESIZE=64 " \
|
||||
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \
|
||||
"-DHAVE_CMOV -DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3 -DHAVE_SSSE3 -DHAVE_SSE4_1 -DHAVE_SSE4_2 -DHAVE_AVX"
|
||||
#define LIBNAME "sandybridge"
|
||||
#define CORENAME "SANDYBRIDGE"
|
||||
#endif
|
||||
|
||||
#ifdef FORCE_HASWELL
|
||||
#define FORCE
|
||||
#define FORCE_INTEL
|
||||
#define ARCHITECTURE "X86"
|
||||
#define SUBARCHITECTURE "HASWELL"
|
||||
#define ARCHCONFIG "-DHASWELL " \
|
||||
"-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 " \
|
||||
"-DL2_SIZE=262144 -DL2_LINESIZE=64 " \
|
||||
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \
|
||||
"-DHAVE_CMOV -DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3 -DHAVE_SSSE3 -DHAVE_SSE4_1 -DHAVE_SSE4_2 -DHAVE_AVX " \
|
||||
"-DFMA3"
|
||||
#define LIBNAME "haswell"
|
||||
#define CORENAME "HASWELL"
|
||||
#endif
|
||||
|
||||
#ifdef FORCE_ATOM
|
||||
#define FORCE
|
||||
#define FORCE_INTEL
|
||||
@@ -285,7 +321,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#define ARCHCONFIG "-DATOM " \
|
||||
"-DL1_DATA_SIZE=24576 -DL1_DATA_LINESIZE=64 " \
|
||||
"-DL2_SIZE=524288 -DL2_LINESIZE=64 " \
|
||||
"-DDTB_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=4 " \
|
||||
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=4 " \
|
||||
"-DHAVE_CMOV -DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3 -DHAVE_SSSE3"
|
||||
#define LIBNAME "atom"
|
||||
#define CORENAME "ATOM"
|
||||
@@ -299,7 +335,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#define ARCHCONFIG "-DATHLON " \
|
||||
"-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=64 " \
|
||||
"-DL2_SIZE=1048576 -DL2_LINESIZE=64 " \
|
||||
"-DDTB_ENTRIES=32 -DDTB_SIZE=4096 -DHAVE_3DNOW " \
|
||||
"-DDTB_DEFAULT_ENTRIES=32 -DDTB_SIZE=4096 -DHAVE_3DNOW " \
|
||||
"-DHAVE_3DNOWEX -DHAVE_MMX -DHAVE_SSE "
|
||||
#define LIBNAME "athlon"
|
||||
#define CORENAME "ATHLON"
|
||||
@@ -313,7 +349,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#define ARCHCONFIG "-DOPTERON " \
|
||||
"-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=64 " \
|
||||
"-DL2_SIZE=1048576 -DL2_LINESIZE=64 " \
|
||||
"-DDTB_ENTRIES=32 -DDTB_SIZE=4096 -DHAVE_3DNOW " \
|
||||
"-DDTB_DEFAULT_ENTRIES=32 -DDTB_SIZE=4096 -DHAVE_3DNOW " \
|
||||
"-DHAVE_3DNOWEX -DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 "
|
||||
#define LIBNAME "opteron"
|
||||
#define CORENAME "OPTERON"
|
||||
@@ -327,7 +363,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#define ARCHCONFIG "-DOPTERON " \
|
||||
"-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=64 " \
|
||||
"-DL2_SIZE=1048576 -DL2_LINESIZE=64 " \
|
||||
"-DDTB_ENTRIES=32 -DDTB_SIZE=4096 -DHAVE_3DNOW " \
|
||||
"-DDTB_DEFAULT_ENTRIES=32 -DDTB_SIZE=4096 -DHAVE_3DNOW " \
|
||||
"-DHAVE_3DNOWEX -DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3"
|
||||
#define LIBNAME "opteron"
|
||||
#define CORENAME "OPTERON"
|
||||
@@ -341,13 +377,60 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#define ARCHCONFIG "-DBARCELONA " \
|
||||
"-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=64 " \
|
||||
"-DL2_SIZE=524288 -DL2_LINESIZE=64 -DL3_SIZE=2097152 " \
|
||||
"-DDTB_ENTRIES=48 -DDTB_SIZE=4096 -DHAVE_3DNOW " \
|
||||
"-DHAVE_3DNOWEX -DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3 " \
|
||||
"-DDTB_DEFAULT_ENTRIES=48 -DDTB_SIZE=4096 " \
|
||||
"-DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3 " \
|
||||
"-DHAVE_SSE4A -DHAVE_MISALIGNSSE -DHAVE_128BITFPU -DHAVE_FASTMOVU"
|
||||
#define LIBNAME "barcelona"
|
||||
#define CORENAME "BARCELONA"
|
||||
#endif
|
||||
|
||||
#if defined(FORCE_BOBCAT)
|
||||
#define FORCE
|
||||
#define FORCE_INTEL
|
||||
#define ARCHITECTURE "X86"
|
||||
#define SUBARCHITECTURE "BOBCAT"
|
||||
#define ARCHCONFIG "-DBOBCAT " \
|
||||
"-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 " \
|
||||
"-DL2_SIZE=524288 -DL2_LINESIZE=64 " \
|
||||
"-DDTB_DEFAULT_ENTRIES=40 -DDTB_SIZE=4096 " \
|
||||
"-DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3 -DHAVE_SSSE3 " \
|
||||
"-DHAVE_SSE4A -DHAVE_MISALIGNSSE -DHAVE_CFLUSH -DHAVE_CMOV"
|
||||
#define LIBNAME "bobcat"
|
||||
#define CORENAME "BOBCAT"
|
||||
#endif
|
||||
|
||||
#if defined (FORCE_BULLDOZER)
|
||||
#define FORCE
|
||||
#define FORCE_INTEL
|
||||
#define ARCHITECTURE "X86"
|
||||
#define SUBARCHITECTURE "BULLDOZER"
|
||||
#define ARCHCONFIG "-DBULLDOZER " \
|
||||
"-DL1_DATA_SIZE=49152 -DL1_DATA_LINESIZE=64 " \
|
||||
"-DL2_SIZE=1024000 -DL2_LINESIZE=64 -DL3_SIZE=16777216 " \
|
||||
"-DDTB_DEFAULT_ENTRIES=32 -DDTB_SIZE=4096 " \
|
||||
"-DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3 " \
|
||||
"-DHAVE_SSE4A -DHAVE_MISALIGNSSE -DHAVE_128BITFPU -DHAVE_FASTMOVU " \
|
||||
"-DHAVE_AVX -DHAVE_FMA4"
|
||||
#define LIBNAME "bulldozer"
|
||||
#define CORENAME "BULLDOZER"
|
||||
#endif
|
||||
|
||||
#if defined (FORCE_PILEDRIVER)
|
||||
#define FORCE
|
||||
#define FORCE_INTEL
|
||||
#define ARCHITECTURE "X86"
|
||||
#define SUBARCHITECTURE "PILEDRIVER"
|
||||
#define ARCHCONFIG "-DPILEDRIVER " \
|
||||
"-DL1_DATA_SIZE=16384 -DL1_DATA_LINESIZE=64 " \
|
||||
"-DL2_SIZE=2097152 -DL2_LINESIZE=64 -DL3_SIZE=12582912 " \
|
||||
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \
|
||||
"-DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3 -DHAVE_SSE4_1 -DHAVE_SSE4_2 " \
|
||||
"-DHAVE_SSE4A -DHAVE_MISALIGNSSE -DHAVE_128BITFPU -DHAVE_FASTMOVU -DHAVE_CFLUSH " \
|
||||
"-DHAVE_AVX -DHAVE_FMA4 -DHAVE_FMA3"
|
||||
#define LIBNAME "piledriver"
|
||||
#define CORENAME "PILEDRIVER"
|
||||
#endif
|
||||
|
||||
#ifdef FORCE_SSE_GENERIC
|
||||
#define FORCE
|
||||
#define FORCE_INTEL
|
||||
@@ -356,7 +439,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#define ARCHCONFIG "-DGENERIC " \
|
||||
"-DL1_DATA_SIZE=16384 -DL1_DATA_LINESIZE=64 " \
|
||||
"-DL2_SIZE=524288 -DL2_LINESIZE=64 " \
|
||||
"-DDTB_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 " \
|
||||
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 " \
|
||||
"-DHAVE_CMOV -DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2"
|
||||
#define LIBNAME "generic"
|
||||
#define CORENAME "GENERIC"
|
||||
@@ -370,7 +453,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#define ARCHCONFIG "-DVIAC3 " \
|
||||
"-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=32 " \
|
||||
"-DL2_SIZE=65536 -DL2_LINESIZE=32 " \
|
||||
"-DDTB_ENTRIES=128 -DDTB_SIZE=4096 " \
|
||||
"-DDTB_DEFAULT_ENTRIES=128 -DDTB_SIZE=4096 " \
|
||||
"-DHAVE_MMX -DHAVE_SSE "
|
||||
#define LIBNAME "viac3"
|
||||
#define CORENAME "VIAC3"
|
||||
@@ -384,7 +467,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#define ARCHCONFIG "-DNANO " \
|
||||
"-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=64 " \
|
||||
"-DL2_SIZE=1048576 -DL2_LINESIZE=64 " \
|
||||
"-DDTB_ENTRIES=128 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 " \
|
||||
"-DDTB_DEFAULT_ENTRIES=128 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 " \
|
||||
"-DHAVE_CMOV -DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3 -DHAVE_SSSE3"
|
||||
#define LIBNAME "nano"
|
||||
#define CORENAME "NANO"
|
||||
@@ -398,7 +481,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#define ARCHCONFIG "-DPOWER3 " \
|
||||
"-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=128 " \
|
||||
"-DL2_SIZE=2097152 -DL2_LINESIZE=128 " \
|
||||
"-DDTB_ENTRIES=256 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 "
|
||||
"-DDTB_DEFAULT_ENTRIES=256 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 "
|
||||
#define LIBNAME "power3"
|
||||
#define CORENAME "POWER3"
|
||||
#endif
|
||||
@@ -411,7 +494,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#define ARCHCONFIG "-DPOWER4 " \
|
||||
"-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=128 " \
|
||||
"-DL2_SIZE=1509949 -DL2_LINESIZE=128 " \
|
||||
"-DDTB_ENTRIES=128 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=6 "
|
||||
"-DDTB_DEFAULT_ENTRIES=128 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=6 "
|
||||
#define LIBNAME "power4"
|
||||
#define CORENAME "POWER4"
|
||||
#endif
|
||||
@@ -424,7 +507,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#define ARCHCONFIG "-DPOWER5 " \
|
||||
"-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=128 " \
|
||||
"-DL2_SIZE=1509949 -DL2_LINESIZE=128 " \
|
||||
"-DDTB_ENTRIES=128 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=6 "
|
||||
"-DDTB_DEFAULT_ENTRIES=128 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=6 "
|
||||
#define LIBNAME "power5"
|
||||
#define CORENAME "POWER5"
|
||||
#endif
|
||||
@@ -437,7 +520,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#define ARCHCONFIG "-DPOWER6 " \
|
||||
"-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=128 " \
|
||||
"-DL2_SIZE=4194304 -DL2_LINESIZE=128 " \
|
||||
"-DDTB_ENTRIES=128 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 "
|
||||
"-DDTB_DEFAULT_ENTRIES=128 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 "
|
||||
#define LIBNAME "power6"
|
||||
#define CORENAME "POWER6"
|
||||
#endif
|
||||
@@ -450,7 +533,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#define ARCHCONFIG "-DPPCG4 " \
|
||||
"-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=32 " \
|
||||
"-DL2_SIZE=262144 -DL2_LINESIZE=32 " \
|
||||
"-DDTB_ENTRIES=128 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 "
|
||||
"-DDTB_DEFAULT_ENTRIES=128 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 "
|
||||
#define LIBNAME "ppcg4"
|
||||
#define CORENAME "PPCG4"
|
||||
#endif
|
||||
@@ -463,7 +546,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#define ARCHCONFIG "-DPPC970 " \
|
||||
"-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=128 " \
|
||||
"-DL2_SIZE=512488 -DL2_LINESIZE=128 " \
|
||||
"-DDTB_ENTRIES=128 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 "
|
||||
"-DDTB_DEFAULT_ENTRIES=128 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 "
|
||||
#define LIBNAME "ppc970"
|
||||
#define CORENAME "PPC970"
|
||||
#endif
|
||||
@@ -476,7 +559,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#define ARCHCONFIG "-DPPC970 " \
|
||||
"-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=128 " \
|
||||
"-DL2_SIZE=1024976 -DL2_LINESIZE=128 " \
|
||||
"-DDTB_ENTRIES=128 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 "
|
||||
"-DDTB_DEFAULT_ENTRIES=128 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 "
|
||||
#define LIBNAME "ppc970mp"
|
||||
#define CORENAME "PPC970"
|
||||
#endif
|
||||
@@ -489,7 +572,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#define ARCHCONFIG "-DPPC440 " \
|
||||
"-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=32 " \
|
||||
"-DL2_SIZE=16384 -DL2_LINESIZE=128 " \
|
||||
"-DDTB_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=16 "
|
||||
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=16 "
|
||||
#define LIBNAME "ppc440"
|
||||
#define CORENAME "PPC440"
|
||||
#endif
|
||||
@@ -502,7 +585,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#define ARCHCONFIG "-DPPC440FP2 " \
|
||||
"-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=32 " \
|
||||
"-DL2_SIZE=16384 -DL2_LINESIZE=128 " \
|
||||
"-DDTB_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=16 "
|
||||
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=16 "
|
||||
#define LIBNAME "ppc440FP2"
|
||||
#define CORENAME "PPC440FP2"
|
||||
#endif
|
||||
@@ -515,7 +598,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#define ARCHCONFIG "-DCELL " \
|
||||
"-DL1_DATA_SIZE=262144 -DL1_DATA_LINESIZE=128 " \
|
||||
"-DL2_SIZE=512488 -DL2_LINESIZE=128 " \
|
||||
"-DDTB_ENTRIES=128 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 "
|
||||
"-DDTB_DEFAULT_ENTRIES=128 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 "
|
||||
#define LIBNAME "cell"
|
||||
#define CORENAME "CELL"
|
||||
#endif
|
||||
@@ -528,7 +611,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#define ARCHCONFIG "-DSICORTEX " \
|
||||
"-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=32 " \
|
||||
"-DL2_SIZE=512488 -DL2_LINESIZE=32 " \
|
||||
"-DDTB_ENTRIES=32 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 "
|
||||
"-DDTB_DEFAULT_ENTRIES=32 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 "
|
||||
#define LIBNAME "mips"
|
||||
#define CORENAME "sicortex"
|
||||
#endif
|
||||
@@ -542,12 +625,26 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#define ARCHCONFIG "-DLOONGSON3A " \
|
||||
"-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=32 " \
|
||||
"-DL2_SIZE=512488 -DL2_LINESIZE=32 " \
|
||||
"-DDTB_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=4 "
|
||||
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=4 "
|
||||
#define LIBNAME "loongson3a"
|
||||
#define CORENAME "LOONGSON3A"
|
||||
#else
|
||||
#endif
|
||||
|
||||
#ifdef FORCE_LOONGSON3B
|
||||
#define FORCE
|
||||
#define ARCHITECTURE "MIPS"
|
||||
#define SUBARCHITECTURE "LOONGSON3B"
|
||||
#define SUBDIRNAME "mips64"
|
||||
#define ARCHCONFIG "-DLOONGSON3B " \
|
||||
"-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=32 " \
|
||||
"-DL2_SIZE=512488 -DL2_LINESIZE=32 " \
|
||||
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=4 "
|
||||
#define LIBNAME "loongson3b"
|
||||
#define CORENAME "LOONGSON3B"
|
||||
#else
|
||||
#endif
|
||||
|
||||
#ifdef FORCE_ITANIUM2
|
||||
#define FORCE
|
||||
#define ARCHITECTURE "IA64"
|
||||
@@ -555,7 +652,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#define SUBDIRNAME "ia64"
|
||||
#define ARCHCONFIG "-DITANIUM2 " \
|
||||
"-DL1_DATA_SIZE=262144 -DL1_DATA_LINESIZE=128 " \
|
||||
"-DL2_SIZE=1572864 -DL2_LINESIZE=128 -DDTB_SIZE=16384 -DDTB_ENTRIES=128 "
|
||||
"-DL2_SIZE=1572864 -DL2_LINESIZE=128 -DDTB_SIZE=16384 -DDTB_DEFAULT_ENTRIES=128 "
|
||||
#define LIBNAME "itanium2"
|
||||
#define CORENAME "itanium2"
|
||||
#endif
|
||||
@@ -567,7 +664,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#define SUBDIRNAME "sparc"
|
||||
#define ARCHCONFIG "-DSPARC -DV9 " \
|
||||
"-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=64 " \
|
||||
"-DL2_SIZE=1572864 -DL2_LINESIZE=64 -DDTB_SIZE=8192 -DDTB_ENTRIES=64 "
|
||||
"-DL2_SIZE=1572864 -DL2_LINESIZE=64 -DDTB_SIZE=8192 -DDTB_DEFAULT_ENTRIES=64 "
|
||||
#define LIBNAME "sparc"
|
||||
#define CORENAME "sparc"
|
||||
#endif
|
||||
@@ -579,7 +676,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#define SUBDIRNAME "sparc"
|
||||
#define ARCHCONFIG "-DSPARC -DV7 " \
|
||||
"-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=64 " \
|
||||
"-DL2_SIZE=1572864 -DL2_LINESIZE=64 -DDTB_SIZE=8192 -DDTB_ENTRIES=64 "
|
||||
"-DL2_SIZE=1572864 -DL2_LINESIZE=64 -DDTB_SIZE=8192 -DDTB_DEFAULT_ENTRIES=64 "
|
||||
#define LIBNAME "sparcv7"
|
||||
#define CORENAME "sparcv7"
|
||||
#endif
|
||||
@@ -592,11 +689,57 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#define ARCHCONFIG "-DGENERIC " \
|
||||
"-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=128 " \
|
||||
"-DL2_SIZE=512488 -DL2_LINESIZE=128 " \
|
||||
"-DDTB_ENTRIES=128 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 "
|
||||
"-DDTB_DEFAULT_ENTRIES=128 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 "
|
||||
#define LIBNAME "generic"
|
||||
#define CORENAME "generic"
|
||||
#endif
|
||||
|
||||
#ifdef FORCE_ARMV7
|
||||
#define FORCE
|
||||
#define ARCHITECTURE "ARM"
|
||||
#define SUBARCHITECTURE "ARMV7"
|
||||
#define SUBDIRNAME "arm"
|
||||
#define ARCHCONFIG "-DARMV7 " \
|
||||
"-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=32 " \
|
||||
"-DL2_SIZE=512488 -DL2_LINESIZE=32 " \
|
||||
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=4 " \
|
||||
"-DHAVE_VFPV3 -DHAVE_VFP"
|
||||
#define LIBNAME "armv7"
|
||||
#define CORENAME "ARMV7"
|
||||
#else
|
||||
#endif
|
||||
|
||||
#ifdef FORCE_ARMV6
|
||||
#define FORCE
|
||||
#define ARCHITECTURE "ARM"
|
||||
#define SUBARCHITECTURE "ARMV6"
|
||||
#define SUBDIRNAME "arm"
|
||||
#define ARCHCONFIG "-DARMV6 " \
|
||||
"-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=32 " \
|
||||
"-DL2_SIZE=512488 -DL2_LINESIZE=32 " \
|
||||
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=4 " \
|
||||
"-DHAVE_VFP"
|
||||
#define LIBNAME "armv6"
|
||||
#define CORENAME "ARMV6"
|
||||
#else
|
||||
#endif
|
||||
|
||||
#ifdef FORCE_ARMV8
|
||||
#define FORCE
|
||||
#define ARCHITECTURE "ARM64"
|
||||
#define SUBARCHITECTURE "ARMV8"
|
||||
#define SUBDIRNAME "arm64"
|
||||
#define ARCHCONFIG "-DARMV8 " \
|
||||
"-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=32 " \
|
||||
"-DL2_SIZE=512488 -DL2_LINESIZE=32 " \
|
||||
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=4 " \
|
||||
"-DHAVE_VFP -DHAVE_VFPV3 -DHAVE_VFPV4"
|
||||
#define LIBNAME "armv8"
|
||||
#define CORENAME "ARMV8"
|
||||
#else
|
||||
#endif
|
||||
|
||||
|
||||
#ifndef FORCE
|
||||
|
||||
#if defined(__powerpc__) || defined(__powerpc) || defined(powerpc) || \
|
||||
@@ -637,6 +780,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#define OPENBLAS_SUPPORTED
|
||||
#endif
|
||||
|
||||
#ifdef __arm__
|
||||
#include "cpuid_arm.c"
|
||||
#define OPENBLAS_SUPPORTED
|
||||
#endif
|
||||
|
||||
|
||||
#ifndef OPENBLAS_SUPPORTED
|
||||
#error "This arch/CPU is not supported by OpenBLAS."
|
||||
#endif
|
||||
@@ -655,7 +804,8 @@ static int get_num_cores(void) {
|
||||
#endif
|
||||
|
||||
#ifdef linux
|
||||
return get_nprocs();
|
||||
//returns the number of processors which are currently online
|
||||
return sysconf(_SC_NPROCESSORS_ONLN);
|
||||
|
||||
#elif defined(OS_WINDOWS)
|
||||
|
||||
@@ -690,7 +840,7 @@ int main(int argc, char *argv[]){
|
||||
#ifdef FORCE
|
||||
printf("CORE=%s\n", CORENAME);
|
||||
#else
|
||||
#if defined(__i386__) || defined(__x86_64__) || defined(POWER) || defined(__mips__)
|
||||
#if defined(__i386__) || defined(__x86_64__) || defined(POWER) || defined(__mips__) || defined(__arm__)
|
||||
printf("CORE=%s\n", get_corename());
|
||||
#endif
|
||||
#endif
|
||||
@@ -705,6 +855,11 @@ int main(int argc, char *argv[]){
|
||||
|
||||
printf("NUM_CORES=%d\n", get_num_cores());
|
||||
|
||||
#if defined(__arm__) && !defined(FORCE)
|
||||
get_features();
|
||||
#endif
|
||||
|
||||
|
||||
#if defined(__i386__) || defined(__x86_64__)
|
||||
#ifndef FORCE
|
||||
get_sse();
|
||||
@@ -740,8 +895,12 @@ int main(int argc, char *argv[]){
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if NO_PARALLEL_MAKE==1
|
||||
printf("MAKE += -j 1\n");
|
||||
#else
|
||||
#ifndef OS_WINDOWS
|
||||
printf("MAKE += -j %d\n", get_num_cores());
|
||||
#endif
|
||||
#endif
|
||||
|
||||
break;
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
|
||||
int main(int argc, char **argv) {
|
||||
|
||||
if ((argc < 1) || (*argv[1] == '0')) {
|
||||
if ( (argc <= 1) || ((argc >= 2) && (*argv[1] == '0'))) {
|
||||
printf("SGEMM_UNROLL_M=%d\n", SGEMM_DEFAULT_UNROLL_M);
|
||||
printf("SGEMM_UNROLL_N=%d\n", SGEMM_DEFAULT_UNROLL_N);
|
||||
printf("DGEMM_UNROLL_M=%d\n", DGEMM_DEFAULT_UNROLL_M);
|
||||
@@ -22,10 +22,48 @@ int main(int argc, char **argv) {
|
||||
printf("ZGEMM_UNROLL_N=%d\n", ZGEMM_DEFAULT_UNROLL_N);
|
||||
printf("XGEMM_UNROLL_M=%d\n", XGEMM_DEFAULT_UNROLL_M);
|
||||
printf("XGEMM_UNROLL_N=%d\n", XGEMM_DEFAULT_UNROLL_N);
|
||||
|
||||
#ifdef CGEMM3M_DEFAULT_UNROLL_M
|
||||
printf("CGEMM3M_UNROLL_M=%d\n", CGEMM3M_DEFAULT_UNROLL_M);
|
||||
#else
|
||||
printf("CGEMM3M_UNROLL_M=%d\n", SGEMM_DEFAULT_UNROLL_M);
|
||||
#endif
|
||||
|
||||
#ifdef CGEMM3M_DEFAULT_UNROLL_N
|
||||
printf("CGEMM3M_UNROLL_N=%d\n", CGEMM3M_DEFAULT_UNROLL_N);
|
||||
#else
|
||||
printf("CGEMM3M_UNROLL_N=%d\n", SGEMM_DEFAULT_UNROLL_N);
|
||||
#endif
|
||||
|
||||
#ifdef ZGEMM3M_DEFAULT_UNROLL_M
|
||||
printf("ZGEMM3M_UNROLL_M=%d\n", ZGEMM3M_DEFAULT_UNROLL_M);
|
||||
#else
|
||||
printf("ZGEMM3M_UNROLL_M=%d\n", DGEMM_DEFAULT_UNROLL_M);
|
||||
#endif
|
||||
|
||||
#ifdef ZGEMM3M_DEFAULT_UNROLL_N
|
||||
printf("ZGEMM3M_UNROLL_N=%d\n", ZGEMM3M_DEFAULT_UNROLL_N);
|
||||
#else
|
||||
printf("ZGEMM3M_UNROLL_N=%d\n", DGEMM_DEFAULT_UNROLL_N);
|
||||
#endif
|
||||
|
||||
#ifdef XGEMM3M_DEFAULT_UNROLL_M
|
||||
printf("XGEMM3M_UNROLL_M=%d\n", ZGEMM3M_DEFAULT_UNROLL_M);
|
||||
#else
|
||||
printf("XGEMM3M_UNROLL_M=%d\n", QGEMM_DEFAULT_UNROLL_M);
|
||||
#endif
|
||||
|
||||
#ifdef XGEMM3M_DEFAULT_UNROLL_N
|
||||
printf("XGEMM3M_UNROLL_N=%d\n", ZGEMM3M_DEFAULT_UNROLL_N);
|
||||
#else
|
||||
printf("XGEMM3M_UNROLL_N=%d\n", QGEMM_DEFAULT_UNROLL_N);
|
||||
#endif
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
if ((argc >= 1) && (*argv[1] == '1')) {
|
||||
if ((argc >= 2) && (*argv[1] == '1')) {
|
||||
printf("#define SLOCAL_BUFFER_SIZE\t%ld\n", (SGEMM_DEFAULT_Q * SGEMM_DEFAULT_UNROLL_N * 4 * 1 * sizeof(float)));
|
||||
printf("#define DLOCAL_BUFFER_SIZE\t%ld\n", (DGEMM_DEFAULT_Q * DGEMM_DEFAULT_UNROLL_N * 2 * 1 * sizeof(double)));
|
||||
printf("#define CLOCAL_BUFFER_SIZE\t%ld\n", (CGEMM_DEFAULT_Q * CGEMM_DEFAULT_UNROLL_N * 4 * 2 * sizeof(float)));
|
||||
@@ -34,6 +72,7 @@ int main(int argc, char **argv) {
|
||||
#ifdef USE64BITINT
|
||||
printf("#define USE64BITINT\n");
|
||||
#endif
|
||||
printf("#define GEMM_MULTITHREAD_THRESHOLD\t%ld\n", (long int)GEMM_MULTITHREAD_THRESHOLD);
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
||||
@@ -318,7 +318,7 @@ CZBLAS3OBJS = \
|
||||
|
||||
ifndef NO_CBLAS
|
||||
|
||||
CFLAGS += -I.
|
||||
override CFLAGS += -I.
|
||||
|
||||
SBLAS1OBJS += $(CSBLAS1OBJS)
|
||||
SBLAS2OBJS += $(CSBLAS2OBJS)
|
||||
@@ -400,7 +400,7 @@ all :: libs
|
||||
|
||||
ifdef FUNCTION_PROFILE
|
||||
$(BLASOBJS) $(BLASOBJS_P) : functable.h
|
||||
$(BLASOBJS) $(BLASOBJS_P) : CFLAGS += -DPROFILE_FUNC_NAME=interface_$(*F)
|
||||
$(BLASOBJS) $(BLASOBJS_P) : override CFLAGS += -DPROFILE_FUNC_NAME=interface_$(*F)
|
||||
|
||||
functable.h : Makefile
|
||||
./create $(FUNCALLFILES) > functable.h
|
||||
@@ -420,7 +420,7 @@ level3 : $(SBLAS3OBJS) $(DBLAS3OBJS) $(QBLAS3OBJS) $(CBLAS3OBJS) $(ZBLAS3OBJS) $
|
||||
$(AR) $(ARFLAGS) -ru $(TOPDIR)/$(LIBNAME) $^
|
||||
|
||||
$(CSBLASOBJS) $(CSBLASOBJS_P) $(CDBLASOBJS) $(CDBLASOBJS_P) $(CQBLASOBJS) $(CQBLASOBJS_P) \
|
||||
$(CCBLASOBJS) $(CCBLASOBJS_P) $(CZBLASOBJS) $(CZBLASOBJS_P) $(CXBLASOBJS) $(CXBLASOBJS_P) : CFLAGS += -DCBLAS
|
||||
$(CCBLASOBJS) $(CCBLASOBJS_P) $(CZBLASOBJS) $(CZBLASOBJS_P) $(CXBLASOBJS) $(CXBLASOBJS_P) : override CFLAGS += -DCBLAS
|
||||
|
||||
srot.$(SUFFIX) srot.$(PSUFFIX) : rot.c
|
||||
$(CC) $(CFLAGS) -c $< -o $(@F)
|
||||
@@ -770,20 +770,36 @@ xgeru.$(SUFFIX) xgeru.$(PSUFFIX) : zger.c
|
||||
xgerc.$(SUFFIX) xgerc.$(PSUFFIX) : zger.c
|
||||
$(CC) -c $(CFLAGS) -DCONJ $< -o $(@F)
|
||||
|
||||
ifndef USE_NETLIB_GEMV
|
||||
sgemv.$(SUFFIX) sgemv.$(PSUFFIX): gemv.c
|
||||
$(CC) -c $(CFLAGS) -o $(@F) $<
|
||||
|
||||
dgemv.$(SUFFIX) dgemv.$(PSUFFIX): gemv.c
|
||||
$(CC) -c $(CFLAGS) -o $(@F) $<
|
||||
else
|
||||
sgemv.$(SUFFIX) sgemv.$(PSUFFIX): netlib/sgemv.f
|
||||
$(FC) -c $(FFLAGS) -o $(@F) $<
|
||||
|
||||
dgemv.$(SUFFIX) dgemv.$(PSUFFIX): netlib/dgemv.f
|
||||
$(FC) -c $(FFLAGS) -o $(@F) $<
|
||||
endif
|
||||
|
||||
qgemv.$(SUFFIX) qgemv.$(PSUFFIX): gemv.c
|
||||
$(CC) -c $(CFLAGS) -o $(@F) $<
|
||||
|
||||
ifndef USE_NETLIB_GEMV
|
||||
cgemv.$(SUFFIX) cgemv.$(PSUFFIX): zgemv.c
|
||||
$(CC) -c $(CFLAGS) -o $(@F) $<
|
||||
|
||||
zgemv.$(SUFFIX) zgemv.$(PSUFFIX): zgemv.c
|
||||
$(CC) -c $(CFLAGS) -o $(@F) $<
|
||||
else
|
||||
cgemv.$(SUFFIX) cgemv.$(PSUFFIX): netlib/cgemv.f
|
||||
$(FC) -c $(FFLAGS) -o $(@F) $<
|
||||
|
||||
zgemv.$(SUFFIX) zgemv.$(PSUFFIX): netlib/zgemv.f
|
||||
$(FC) -c $(FFLAGS) -o $(@F) $<
|
||||
endif
|
||||
|
||||
xgemv.$(SUFFIX) xgemv.$(PSUFFIX): zgemv.c
|
||||
$(CC) -c $(CFLAGS) -o $(@F) $<
|
||||
|
||||
@@ -71,6 +71,10 @@
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifndef GEMM_MULTITHREAD_THRESHOLD
|
||||
# define GEMM_MULTITHREAD_THRESHOLD 4
|
||||
#endif
|
||||
|
||||
static int (*gemm[])(blas_arg_t *, BLASLONG *, BLASLONG *, FLOAT *, FLOAT *, BLASLONG) = {
|
||||
#ifndef GEMM3M
|
||||
GEMM_NN, GEMM_TN, GEMM_RN, GEMM_CN,
|
||||
@@ -397,8 +401,13 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANS
|
||||
mode |= (transb << BLAS_TRANSB_SHIFT);
|
||||
|
||||
args.common = NULL;
|
||||
args.nthreads = num_cpu_avail(3);
|
||||
|
||||
if(args.m <= GEMM_MULTITHREAD_THRESHOLD || args.n <= GEMM_MULTITHREAD_THRESHOLD
|
||||
|| args.k <=GEMM_MULTITHREAD_THRESHOLD){
|
||||
args.nthreads = 1;
|
||||
}else{
|
||||
args.nthreads = num_cpu_avail(3);
|
||||
}
|
||||
if (args.nthreads == 1) {
|
||||
#endif
|
||||
|
||||
|
||||
285
interface/netlib/cgemv.f
Normal file
285
interface/netlib/cgemv.f
Normal file
@@ -0,0 +1,285 @@
|
||||
SUBROUTINE CGEMV(TRANS,M,N,ALPHA,A,LDA,X,INCX,BETA,Y,INCY)
|
||||
* .. Scalar Arguments ..
|
||||
COMPLEX ALPHA,BETA
|
||||
INTEGER INCX,INCY,LDA,M,N
|
||||
CHARACTER TRANS
|
||||
* ..
|
||||
* .. Array Arguments ..
|
||||
COMPLEX A(LDA,*),X(*),Y(*)
|
||||
* ..
|
||||
*
|
||||
* Purpose
|
||||
* =======
|
||||
*
|
||||
* CGEMV performs one of the matrix-vector operations
|
||||
*
|
||||
* y := alpha*A*x + beta*y, or y := alpha*A**T*x + beta*y, or
|
||||
*
|
||||
* y := alpha*A**H*x + beta*y,
|
||||
*
|
||||
* where alpha and beta are scalars, x and y are vectors and A is an
|
||||
* m by n matrix.
|
||||
*
|
||||
* Arguments
|
||||
* ==========
|
||||
*
|
||||
* TRANS - CHARACTER*1.
|
||||
* On entry, TRANS specifies the operation to be performed as
|
||||
* follows:
|
||||
*
|
||||
* TRANS = 'N' or 'n' y := alpha*A*x + beta*y.
|
||||
*
|
||||
* TRANS = 'T' or 't' y := alpha*A**T*x + beta*y.
|
||||
*
|
||||
* TRANS = 'C' or 'c' y := alpha*A**H*x + beta*y.
|
||||
*
|
||||
* Unchanged on exit.
|
||||
*
|
||||
* M - INTEGER.
|
||||
* On entry, M specifies the number of rows of the matrix A.
|
||||
* M must be at least zero.
|
||||
* Unchanged on exit.
|
||||
*
|
||||
* N - INTEGER.
|
||||
* On entry, N specifies the number of columns of the matrix A.
|
||||
* N must be at least zero.
|
||||
* Unchanged on exit.
|
||||
*
|
||||
* ALPHA - COMPLEX .
|
||||
* On entry, ALPHA specifies the scalar alpha.
|
||||
* Unchanged on exit.
|
||||
*
|
||||
* A - COMPLEX array of DIMENSION ( LDA, n ).
|
||||
* Before entry, the leading m by n part of the array A must
|
||||
* contain the matrix of coefficients.
|
||||
* Unchanged on exit.
|
||||
*
|
||||
* LDA - INTEGER.
|
||||
* On entry, LDA specifies the first dimension of A as declared
|
||||
* in the calling (sub) program. LDA must be at least
|
||||
* max( 1, m ).
|
||||
* Unchanged on exit.
|
||||
*
|
||||
* X - COMPLEX array of DIMENSION at least
|
||||
* ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n'
|
||||
* and at least
|
||||
* ( 1 + ( m - 1 )*abs( INCX ) ) otherwise.
|
||||
* Before entry, the incremented array X must contain the
|
||||
* vector x.
|
||||
* Unchanged on exit.
|
||||
*
|
||||
* INCX - INTEGER.
|
||||
* On entry, INCX specifies the increment for the elements of
|
||||
* X. INCX must not be zero.
|
||||
* Unchanged on exit.
|
||||
*
|
||||
* BETA - COMPLEX .
|
||||
* On entry, BETA specifies the scalar beta. When BETA is
|
||||
* supplied as zero then Y need not be set on input.
|
||||
* Unchanged on exit.
|
||||
*
|
||||
* Y - COMPLEX array of DIMENSION at least
|
||||
* ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n'
|
||||
* and at least
|
||||
* ( 1 + ( n - 1 )*abs( INCY ) ) otherwise.
|
||||
* Before entry with BETA non-zero, the incremented array Y
|
||||
* must contain the vector y. On exit, Y is overwritten by the
|
||||
* updated vector y.
|
||||
*
|
||||
* INCY - INTEGER.
|
||||
* On entry, INCY specifies the increment for the elements of
|
||||
* Y. INCY must not be zero.
|
||||
* Unchanged on exit.
|
||||
*
|
||||
* Further Details
|
||||
* ===============
|
||||
*
|
||||
* Level 2 Blas routine.
|
||||
* The vector and matrix arguments are not referenced when N = 0, or M = 0
|
||||
*
|
||||
* -- Written on 22-October-1986.
|
||||
* Jack Dongarra, Argonne National Lab.
|
||||
* Jeremy Du Croz, Nag Central Office.
|
||||
* Sven Hammarling, Nag Central Office.
|
||||
* Richard Hanson, Sandia National Labs.
|
||||
*
|
||||
* =====================================================================
|
||||
*
|
||||
* .. Parameters ..
|
||||
COMPLEX ONE
|
||||
PARAMETER (ONE= (1.0E+0,0.0E+0))
|
||||
COMPLEX ZERO
|
||||
PARAMETER (ZERO= (0.0E+0,0.0E+0))
|
||||
* ..
|
||||
* .. Local Scalars ..
|
||||
COMPLEX TEMP
|
||||
INTEGER I,INFO,IX,IY,J,JX,JY,KX,KY,LENX,LENY
|
||||
LOGICAL NOCONJ
|
||||
* ..
|
||||
* .. External Functions ..
|
||||
LOGICAL LSAME
|
||||
EXTERNAL LSAME
|
||||
* ..
|
||||
* .. External Subroutines ..
|
||||
EXTERNAL XERBLA
|
||||
* ..
|
||||
* .. Intrinsic Functions ..
|
||||
INTRINSIC CONJG,MAX
|
||||
* ..
|
||||
*
|
||||
* Test the input parameters.
|
||||
*
|
||||
INFO = 0
|
||||
IF (.NOT.LSAME(TRANS,'N') .AND. .NOT.LSAME(TRANS,'T') .AND.
|
||||
+ .NOT.LSAME(TRANS,'C')) THEN
|
||||
INFO = 1
|
||||
ELSE IF (M.LT.0) THEN
|
||||
INFO = 2
|
||||
ELSE IF (N.LT.0) THEN
|
||||
INFO = 3
|
||||
ELSE IF (LDA.LT.MAX(1,M)) THEN
|
||||
INFO = 6
|
||||
ELSE IF (INCX.EQ.0) THEN
|
||||
INFO = 8
|
||||
ELSE IF (INCY.EQ.0) THEN
|
||||
INFO = 11
|
||||
END IF
|
||||
IF (INFO.NE.0) THEN
|
||||
CALL XERBLA('CGEMV ',INFO)
|
||||
RETURN
|
||||
END IF
|
||||
*
|
||||
* Quick return if possible.
|
||||
*
|
||||
IF ((M.EQ.0) .OR. (N.EQ.0) .OR.
|
||||
+ ((ALPHA.EQ.ZERO).AND. (BETA.EQ.ONE))) RETURN
|
||||
*
|
||||
NOCONJ = LSAME(TRANS,'T')
|
||||
*
|
||||
* Set LENX and LENY, the lengths of the vectors x and y, and set
|
||||
* up the start points in X and Y.
|
||||
*
|
||||
IF (LSAME(TRANS,'N')) THEN
|
||||
LENX = N
|
||||
LENY = M
|
||||
ELSE
|
||||
LENX = M
|
||||
LENY = N
|
||||
END IF
|
||||
IF (INCX.GT.0) THEN
|
||||
KX = 1
|
||||
ELSE
|
||||
KX = 1 - (LENX-1)*INCX
|
||||
END IF
|
||||
IF (INCY.GT.0) THEN
|
||||
KY = 1
|
||||
ELSE
|
||||
KY = 1 - (LENY-1)*INCY
|
||||
END IF
|
||||
*
|
||||
* Start the operations. In this version the elements of A are
|
||||
* accessed sequentially with one pass through A.
|
||||
*
|
||||
* First form y := beta*y.
|
||||
*
|
||||
IF (BETA.NE.ONE) THEN
|
||||
IF (INCY.EQ.1) THEN
|
||||
IF (BETA.EQ.ZERO) THEN
|
||||
DO 10 I = 1,LENY
|
||||
Y(I) = ZERO
|
||||
10 CONTINUE
|
||||
ELSE
|
||||
DO 20 I = 1,LENY
|
||||
Y(I) = BETA*Y(I)
|
||||
20 CONTINUE
|
||||
END IF
|
||||
ELSE
|
||||
IY = KY
|
||||
IF (BETA.EQ.ZERO) THEN
|
||||
DO 30 I = 1,LENY
|
||||
Y(IY) = ZERO
|
||||
IY = IY + INCY
|
||||
30 CONTINUE
|
||||
ELSE
|
||||
DO 40 I = 1,LENY
|
||||
Y(IY) = BETA*Y(IY)
|
||||
IY = IY + INCY
|
||||
40 CONTINUE
|
||||
END IF
|
||||
END IF
|
||||
END IF
|
||||
IF (ALPHA.EQ.ZERO) RETURN
|
||||
IF (LSAME(TRANS,'N')) THEN
|
||||
*
|
||||
* Form y := alpha*A*x + y.
|
||||
*
|
||||
JX = KX
|
||||
IF (INCY.EQ.1) THEN
|
||||
DO 60 J = 1,N
|
||||
IF (X(JX).NE.ZERO) THEN
|
||||
TEMP = ALPHA*X(JX)
|
||||
DO 50 I = 1,M
|
||||
Y(I) = Y(I) + TEMP*A(I,J)
|
||||
50 CONTINUE
|
||||
END IF
|
||||
JX = JX + INCX
|
||||
60 CONTINUE
|
||||
ELSE
|
||||
DO 80 J = 1,N
|
||||
IF (X(JX).NE.ZERO) THEN
|
||||
TEMP = ALPHA*X(JX)
|
||||
IY = KY
|
||||
DO 70 I = 1,M
|
||||
Y(IY) = Y(IY) + TEMP*A(I,J)
|
||||
IY = IY + INCY
|
||||
70 CONTINUE
|
||||
END IF
|
||||
JX = JX + INCX
|
||||
80 CONTINUE
|
||||
END IF
|
||||
ELSE
|
||||
*
|
||||
* Form y := alpha*A**T*x + y or y := alpha*A**H*x + y.
|
||||
*
|
||||
JY = KY
|
||||
IF (INCX.EQ.1) THEN
|
||||
DO 110 J = 1,N
|
||||
TEMP = ZERO
|
||||
IF (NOCONJ) THEN
|
||||
DO 90 I = 1,M
|
||||
TEMP = TEMP + A(I,J)*X(I)
|
||||
90 CONTINUE
|
||||
ELSE
|
||||
DO 100 I = 1,M
|
||||
TEMP = TEMP + CONJG(A(I,J))*X(I)
|
||||
100 CONTINUE
|
||||
END IF
|
||||
Y(JY) = Y(JY) + ALPHA*TEMP
|
||||
JY = JY + INCY
|
||||
110 CONTINUE
|
||||
ELSE
|
||||
DO 140 J = 1,N
|
||||
TEMP = ZERO
|
||||
IX = KX
|
||||
IF (NOCONJ) THEN
|
||||
DO 120 I = 1,M
|
||||
TEMP = TEMP + A(I,J)*X(IX)
|
||||
IX = IX + INCX
|
||||
120 CONTINUE
|
||||
ELSE
|
||||
DO 130 I = 1,M
|
||||
TEMP = TEMP + CONJG(A(I,J))*X(IX)
|
||||
IX = IX + INCX
|
||||
130 CONTINUE
|
||||
END IF
|
||||
Y(JY) = Y(JY) + ALPHA*TEMP
|
||||
JY = JY + INCY
|
||||
140 CONTINUE
|
||||
END IF
|
||||
END IF
|
||||
*
|
||||
RETURN
|
||||
*
|
||||
* End of CGEMV .
|
||||
*
|
||||
END
|
||||
265
interface/netlib/dgemv.f
Normal file
265
interface/netlib/dgemv.f
Normal file
@@ -0,0 +1,265 @@
|
||||
SUBROUTINE DGEMV(TRANS,M,N,ALPHA,A,LDA,X,INCX,BETA,Y,INCY)
|
||||
* .. Scalar Arguments ..
|
||||
DOUBLE PRECISION ALPHA,BETA
|
||||
INTEGER INCX,INCY,LDA,M,N
|
||||
CHARACTER TRANS
|
||||
* ..
|
||||
* .. Array Arguments ..
|
||||
DOUBLE PRECISION A(LDA,*),X(*),Y(*)
|
||||
* ..
|
||||
*
|
||||
* Purpose
|
||||
* =======
|
||||
*
|
||||
* DGEMV performs one of the matrix-vector operations
|
||||
*
|
||||
* y := alpha*A*x + beta*y, or y := alpha*A**T*x + beta*y,
|
||||
*
|
||||
* where alpha and beta are scalars, x and y are vectors and A is an
|
||||
* m by n matrix.
|
||||
*
|
||||
* Arguments
|
||||
* ==========
|
||||
*
|
||||
* TRANS - CHARACTER*1.
|
||||
* On entry, TRANS specifies the operation to be performed as
|
||||
* follows:
|
||||
*
|
||||
* TRANS = 'N' or 'n' y := alpha*A*x + beta*y.
|
||||
*
|
||||
* TRANS = 'T' or 't' y := alpha*A**T*x + beta*y.
|
||||
*
|
||||
* TRANS = 'C' or 'c' y := alpha*A**T*x + beta*y.
|
||||
*
|
||||
* Unchanged on exit.
|
||||
*
|
||||
* M - INTEGER.
|
||||
* On entry, M specifies the number of rows of the matrix A.
|
||||
* M must be at least zero.
|
||||
* Unchanged on exit.
|
||||
*
|
||||
* N - INTEGER.
|
||||
* On entry, N specifies the number of columns of the matrix A.
|
||||
* N must be at least zero.
|
||||
* Unchanged on exit.
|
||||
*
|
||||
* ALPHA - DOUBLE PRECISION.
|
||||
* On entry, ALPHA specifies the scalar alpha.
|
||||
* Unchanged on exit.
|
||||
*
|
||||
* A - DOUBLE PRECISION array of DIMENSION ( LDA, n ).
|
||||
* Before entry, the leading m by n part of the array A must
|
||||
* contain the matrix of coefficients.
|
||||
* Unchanged on exit.
|
||||
*
|
||||
* LDA - INTEGER.
|
||||
* On entry, LDA specifies the first dimension of A as declared
|
||||
* in the calling (sub) program. LDA must be at least
|
||||
* max( 1, m ).
|
||||
* Unchanged on exit.
|
||||
*
|
||||
* X - DOUBLE PRECISION array of DIMENSION at least
|
||||
* ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n'
|
||||
* and at least
|
||||
* ( 1 + ( m - 1 )*abs( INCX ) ) otherwise.
|
||||
* Before entry, the incremented array X must contain the
|
||||
* vector x.
|
||||
* Unchanged on exit.
|
||||
*
|
||||
* INCX - INTEGER.
|
||||
* On entry, INCX specifies the increment for the elements of
|
||||
* X. INCX must not be zero.
|
||||
* Unchanged on exit.
|
||||
*
|
||||
* BETA - DOUBLE PRECISION.
|
||||
* On entry, BETA specifies the scalar beta. When BETA is
|
||||
* supplied as zero then Y need not be set on input.
|
||||
* Unchanged on exit.
|
||||
*
|
||||
* Y - DOUBLE PRECISION array of DIMENSION at least
|
||||
* ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n'
|
||||
* and at least
|
||||
* ( 1 + ( n - 1 )*abs( INCY ) ) otherwise.
|
||||
* Before entry with BETA non-zero, the incremented array Y
|
||||
* must contain the vector y. On exit, Y is overwritten by the
|
||||
* updated vector y.
|
||||
*
|
||||
* INCY - INTEGER.
|
||||
* On entry, INCY specifies the increment for the elements of
|
||||
* Y. INCY must not be zero.
|
||||
* Unchanged on exit.
|
||||
*
|
||||
* Further Details
|
||||
* ===============
|
||||
*
|
||||
* Level 2 Blas routine.
|
||||
* The vector and matrix arguments are not referenced when N = 0, or M = 0
|
||||
*
|
||||
* -- Written on 22-October-1986.
|
||||
* Jack Dongarra, Argonne National Lab.
|
||||
* Jeremy Du Croz, Nag Central Office.
|
||||
* Sven Hammarling, Nag Central Office.
|
||||
* Richard Hanson, Sandia National Labs.
|
||||
*
|
||||
* =====================================================================
|
||||
*
|
||||
* .. Parameters ..
|
||||
DOUBLE PRECISION ONE,ZERO
|
||||
PARAMETER (ONE=1.0D+0,ZERO=0.0D+0)
|
||||
* ..
|
||||
* .. Local Scalars ..
|
||||
DOUBLE PRECISION TEMP
|
||||
INTEGER I,INFO,IX,IY,J,JX,JY,KX,KY,LENX,LENY
|
||||
* ..
|
||||
* .. External Functions ..
|
||||
LOGICAL LSAME
|
||||
EXTERNAL LSAME
|
||||
* ..
|
||||
* .. External Subroutines ..
|
||||
EXTERNAL XERBLA
|
||||
* ..
|
||||
* .. Intrinsic Functions ..
|
||||
INTRINSIC MAX
|
||||
* ..
|
||||
*
|
||||
* Test the input parameters.
|
||||
*
|
||||
INFO = 0
|
||||
IF (.NOT.LSAME(TRANS,'N') .AND. .NOT.LSAME(TRANS,'T') .AND.
|
||||
+ .NOT.LSAME(TRANS,'C')) THEN
|
||||
INFO = 1
|
||||
ELSE IF (M.LT.0) THEN
|
||||
INFO = 2
|
||||
ELSE IF (N.LT.0) THEN
|
||||
INFO = 3
|
||||
ELSE IF (LDA.LT.MAX(1,M)) THEN
|
||||
INFO = 6
|
||||
ELSE IF (INCX.EQ.0) THEN
|
||||
INFO = 8
|
||||
ELSE IF (INCY.EQ.0) THEN
|
||||
INFO = 11
|
||||
END IF
|
||||
IF (INFO.NE.0) THEN
|
||||
CALL XERBLA('DGEMV ',INFO)
|
||||
RETURN
|
||||
END IF
|
||||
*
|
||||
* Quick return if possible.
|
||||
*
|
||||
IF ((M.EQ.0) .OR. (N.EQ.0) .OR.
|
||||
+ ((ALPHA.EQ.ZERO).AND. (BETA.EQ.ONE))) RETURN
|
||||
*
|
||||
* Set LENX and LENY, the lengths of the vectors x and y, and set
|
||||
* up the start points in X and Y.
|
||||
*
|
||||
IF (LSAME(TRANS,'N')) THEN
|
||||
LENX = N
|
||||
LENY = M
|
||||
ELSE
|
||||
LENX = M
|
||||
LENY = N
|
||||
END IF
|
||||
IF (INCX.GT.0) THEN
|
||||
KX = 1
|
||||
ELSE
|
||||
KX = 1 - (LENX-1)*INCX
|
||||
END IF
|
||||
IF (INCY.GT.0) THEN
|
||||
KY = 1
|
||||
ELSE
|
||||
KY = 1 - (LENY-1)*INCY
|
||||
END IF
|
||||
*
|
||||
* Start the operations. In this version the elements of A are
|
||||
* accessed sequentially with one pass through A.
|
||||
*
|
||||
* First form y := beta*y.
|
||||
*
|
||||
IF (BETA.NE.ONE) THEN
|
||||
IF (INCY.EQ.1) THEN
|
||||
IF (BETA.EQ.ZERO) THEN
|
||||
DO 10 I = 1,LENY
|
||||
Y(I) = ZERO
|
||||
10 CONTINUE
|
||||
ELSE
|
||||
DO 20 I = 1,LENY
|
||||
Y(I) = BETA*Y(I)
|
||||
20 CONTINUE
|
||||
END IF
|
||||
ELSE
|
||||
IY = KY
|
||||
IF (BETA.EQ.ZERO) THEN
|
||||
DO 30 I = 1,LENY
|
||||
Y(IY) = ZERO
|
||||
IY = IY + INCY
|
||||
30 CONTINUE
|
||||
ELSE
|
||||
DO 40 I = 1,LENY
|
||||
Y(IY) = BETA*Y(IY)
|
||||
IY = IY + INCY
|
||||
40 CONTINUE
|
||||
END IF
|
||||
END IF
|
||||
END IF
|
||||
IF (ALPHA.EQ.ZERO) RETURN
|
||||
IF (LSAME(TRANS,'N')) THEN
|
||||
*
|
||||
* Form y := alpha*A*x + y.
|
||||
*
|
||||
JX = KX
|
||||
IF (INCY.EQ.1) THEN
|
||||
DO 60 J = 1,N
|
||||
IF (X(JX).NE.ZERO) THEN
|
||||
TEMP = ALPHA*X(JX)
|
||||
DO 50 I = 1,M
|
||||
Y(I) = Y(I) + TEMP*A(I,J)
|
||||
50 CONTINUE
|
||||
END IF
|
||||
JX = JX + INCX
|
||||
60 CONTINUE
|
||||
ELSE
|
||||
DO 80 J = 1,N
|
||||
IF (X(JX).NE.ZERO) THEN
|
||||
TEMP = ALPHA*X(JX)
|
||||
IY = KY
|
||||
DO 70 I = 1,M
|
||||
Y(IY) = Y(IY) + TEMP*A(I,J)
|
||||
IY = IY + INCY
|
||||
70 CONTINUE
|
||||
END IF
|
||||
JX = JX + INCX
|
||||
80 CONTINUE
|
||||
END IF
|
||||
ELSE
|
||||
*
|
||||
* Form y := alpha*A**T*x + y.
|
||||
*
|
||||
JY = KY
|
||||
IF (INCX.EQ.1) THEN
|
||||
DO 100 J = 1,N
|
||||
TEMP = ZERO
|
||||
DO 90 I = 1,M
|
||||
TEMP = TEMP + A(I,J)*X(I)
|
||||
90 CONTINUE
|
||||
Y(JY) = Y(JY) + ALPHA*TEMP
|
||||
JY = JY + INCY
|
||||
100 CONTINUE
|
||||
ELSE
|
||||
DO 120 J = 1,N
|
||||
TEMP = ZERO
|
||||
IX = KX
|
||||
DO 110 I = 1,M
|
||||
TEMP = TEMP + A(I,J)*X(IX)
|
||||
IX = IX + INCX
|
||||
110 CONTINUE
|
||||
Y(JY) = Y(JY) + ALPHA*TEMP
|
||||
JY = JY + INCY
|
||||
120 CONTINUE
|
||||
END IF
|
||||
END IF
|
||||
*
|
||||
RETURN
|
||||
*
|
||||
* End of DGEMV .
|
||||
*
|
||||
END
|
||||
265
interface/netlib/sgemv.f
Normal file
265
interface/netlib/sgemv.f
Normal file
@@ -0,0 +1,265 @@
|
||||
SUBROUTINE SGEMV(TRANS,M,N,ALPHA,A,LDA,X,INCX,BETA,Y,INCY)
|
||||
* .. Scalar Arguments ..
|
||||
REAL ALPHA,BETA
|
||||
INTEGER INCX,INCY,LDA,M,N
|
||||
CHARACTER TRANS
|
||||
* ..
|
||||
* .. Array Arguments ..
|
||||
REAL A(LDA,*),X(*),Y(*)
|
||||
* ..
|
||||
*
|
||||
* Purpose
|
||||
* =======
|
||||
*
|
||||
* SGEMV performs one of the matrix-vector operations
|
||||
*
|
||||
* y := alpha*A*x + beta*y, or y := alpha*A**T*x + beta*y,
|
||||
*
|
||||
* where alpha and beta are scalars, x and y are vectors and A is an
|
||||
* m by n matrix.
|
||||
*
|
||||
* Arguments
|
||||
* ==========
|
||||
*
|
||||
* TRANS - CHARACTER*1.
|
||||
* On entry, TRANS specifies the operation to be performed as
|
||||
* follows:
|
||||
*
|
||||
* TRANS = 'N' or 'n' y := alpha*A*x + beta*y.
|
||||
*
|
||||
* TRANS = 'T' or 't' y := alpha*A**T*x + beta*y.
|
||||
*
|
||||
* TRANS = 'C' or 'c' y := alpha*A**T*x + beta*y.
|
||||
*
|
||||
* Unchanged on exit.
|
||||
*
|
||||
* M - INTEGER.
|
||||
* On entry, M specifies the number of rows of the matrix A.
|
||||
* M must be at least zero.
|
||||
* Unchanged on exit.
|
||||
*
|
||||
* N - INTEGER.
|
||||
* On entry, N specifies the number of columns of the matrix A.
|
||||
* N must be at least zero.
|
||||
* Unchanged on exit.
|
||||
*
|
||||
* ALPHA - REAL .
|
||||
* On entry, ALPHA specifies the scalar alpha.
|
||||
* Unchanged on exit.
|
||||
*
|
||||
* A - REAL array of DIMENSION ( LDA, n ).
|
||||
* Before entry, the leading m by n part of the array A must
|
||||
* contain the matrix of coefficients.
|
||||
* Unchanged on exit.
|
||||
*
|
||||
* LDA - INTEGER.
|
||||
* On entry, LDA specifies the first dimension of A as declared
|
||||
* in the calling (sub) program. LDA must be at least
|
||||
* max( 1, m ).
|
||||
* Unchanged on exit.
|
||||
*
|
||||
* X - REAL array of DIMENSION at least
|
||||
* ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n'
|
||||
* and at least
|
||||
* ( 1 + ( m - 1 )*abs( INCX ) ) otherwise.
|
||||
* Before entry, the incremented array X must contain the
|
||||
* vector x.
|
||||
* Unchanged on exit.
|
||||
*
|
||||
* INCX - INTEGER.
|
||||
* On entry, INCX specifies the increment for the elements of
|
||||
* X. INCX must not be zero.
|
||||
* Unchanged on exit.
|
||||
*
|
||||
* BETA - REAL .
|
||||
* On entry, BETA specifies the scalar beta. When BETA is
|
||||
* supplied as zero then Y need not be set on input.
|
||||
* Unchanged on exit.
|
||||
*
|
||||
* Y - REAL array of DIMENSION at least
|
||||
* ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n'
|
||||
* and at least
|
||||
* ( 1 + ( n - 1 )*abs( INCY ) ) otherwise.
|
||||
* Before entry with BETA non-zero, the incremented array Y
|
||||
* must contain the vector y. On exit, Y is overwritten by the
|
||||
* updated vector y.
|
||||
*
|
||||
* INCY - INTEGER.
|
||||
* On entry, INCY specifies the increment for the elements of
|
||||
* Y. INCY must not be zero.
|
||||
* Unchanged on exit.
|
||||
*
|
||||
* Further Details
|
||||
* ===============
|
||||
*
|
||||
* Level 2 Blas routine.
|
||||
* The vector and matrix arguments are not referenced when N = 0, or M = 0
|
||||
*
|
||||
* -- Written on 22-October-1986.
|
||||
* Jack Dongarra, Argonne National Lab.
|
||||
* Jeremy Du Croz, Nag Central Office.
|
||||
* Sven Hammarling, Nag Central Office.
|
||||
* Richard Hanson, Sandia National Labs.
|
||||
*
|
||||
* =====================================================================
|
||||
*
|
||||
* .. Parameters ..
|
||||
REAL ONE,ZERO
|
||||
PARAMETER (ONE=1.0E+0,ZERO=0.0E+0)
|
||||
* ..
|
||||
* .. Local Scalars ..
|
||||
REAL TEMP
|
||||
INTEGER I,INFO,IX,IY,J,JX,JY,KX,KY,LENX,LENY
|
||||
* ..
|
||||
* .. External Functions ..
|
||||
LOGICAL LSAME
|
||||
EXTERNAL LSAME
|
||||
* ..
|
||||
* .. External Subroutines ..
|
||||
EXTERNAL XERBLA
|
||||
* ..
|
||||
* .. Intrinsic Functions ..
|
||||
INTRINSIC MAX
|
||||
* ..
|
||||
*
|
||||
* Test the input parameters.
|
||||
*
|
||||
INFO = 0
|
||||
IF (.NOT.LSAME(TRANS,'N') .AND. .NOT.LSAME(TRANS,'T') .AND.
|
||||
+ .NOT.LSAME(TRANS,'C')) THEN
|
||||
INFO = 1
|
||||
ELSE IF (M.LT.0) THEN
|
||||
INFO = 2
|
||||
ELSE IF (N.LT.0) THEN
|
||||
INFO = 3
|
||||
ELSE IF (LDA.LT.MAX(1,M)) THEN
|
||||
INFO = 6
|
||||
ELSE IF (INCX.EQ.0) THEN
|
||||
INFO = 8
|
||||
ELSE IF (INCY.EQ.0) THEN
|
||||
INFO = 11
|
||||
END IF
|
||||
IF (INFO.NE.0) THEN
|
||||
CALL XERBLA('SGEMV ',INFO)
|
||||
RETURN
|
||||
END IF
|
||||
*
|
||||
* Quick return if possible.
|
||||
*
|
||||
IF ((M.EQ.0) .OR. (N.EQ.0) .OR.
|
||||
+ ((ALPHA.EQ.ZERO).AND. (BETA.EQ.ONE))) RETURN
|
||||
*
|
||||
* Set LENX and LENY, the lengths of the vectors x and y, and set
|
||||
* up the start points in X and Y.
|
||||
*
|
||||
IF (LSAME(TRANS,'N')) THEN
|
||||
LENX = N
|
||||
LENY = M
|
||||
ELSE
|
||||
LENX = M
|
||||
LENY = N
|
||||
END IF
|
||||
IF (INCX.GT.0) THEN
|
||||
KX = 1
|
||||
ELSE
|
||||
KX = 1 - (LENX-1)*INCX
|
||||
END IF
|
||||
IF (INCY.GT.0) THEN
|
||||
KY = 1
|
||||
ELSE
|
||||
KY = 1 - (LENY-1)*INCY
|
||||
END IF
|
||||
*
|
||||
* Start the operations. In this version the elements of A are
|
||||
* accessed sequentially with one pass through A.
|
||||
*
|
||||
* First form y := beta*y.
|
||||
*
|
||||
IF (BETA.NE.ONE) THEN
|
||||
IF (INCY.EQ.1) THEN
|
||||
IF (BETA.EQ.ZERO) THEN
|
||||
DO 10 I = 1,LENY
|
||||
Y(I) = ZERO
|
||||
10 CONTINUE
|
||||
ELSE
|
||||
DO 20 I = 1,LENY
|
||||
Y(I) = BETA*Y(I)
|
||||
20 CONTINUE
|
||||
END IF
|
||||
ELSE
|
||||
IY = KY
|
||||
IF (BETA.EQ.ZERO) THEN
|
||||
DO 30 I = 1,LENY
|
||||
Y(IY) = ZERO
|
||||
IY = IY + INCY
|
||||
30 CONTINUE
|
||||
ELSE
|
||||
DO 40 I = 1,LENY
|
||||
Y(IY) = BETA*Y(IY)
|
||||
IY = IY + INCY
|
||||
40 CONTINUE
|
||||
END IF
|
||||
END IF
|
||||
END IF
|
||||
IF (ALPHA.EQ.ZERO) RETURN
|
||||
IF (LSAME(TRANS,'N')) THEN
|
||||
*
|
||||
* Form y := alpha*A*x + y.
|
||||
*
|
||||
JX = KX
|
||||
IF (INCY.EQ.1) THEN
|
||||
DO 60 J = 1,N
|
||||
IF (X(JX).NE.ZERO) THEN
|
||||
TEMP = ALPHA*X(JX)
|
||||
DO 50 I = 1,M
|
||||
Y(I) = Y(I) + TEMP*A(I,J)
|
||||
50 CONTINUE
|
||||
END IF
|
||||
JX = JX + INCX
|
||||
60 CONTINUE
|
||||
ELSE
|
||||
DO 80 J = 1,N
|
||||
IF (X(JX).NE.ZERO) THEN
|
||||
TEMP = ALPHA*X(JX)
|
||||
IY = KY
|
||||
DO 70 I = 1,M
|
||||
Y(IY) = Y(IY) + TEMP*A(I,J)
|
||||
IY = IY + INCY
|
||||
70 CONTINUE
|
||||
END IF
|
||||
JX = JX + INCX
|
||||
80 CONTINUE
|
||||
END IF
|
||||
ELSE
|
||||
*
|
||||
* Form y := alpha*A**T*x + y.
|
||||
*
|
||||
JY = KY
|
||||
IF (INCX.EQ.1) THEN
|
||||
DO 100 J = 1,N
|
||||
TEMP = ZERO
|
||||
DO 90 I = 1,M
|
||||
TEMP = TEMP + A(I,J)*X(I)
|
||||
90 CONTINUE
|
||||
Y(JY) = Y(JY) + ALPHA*TEMP
|
||||
JY = JY + INCY
|
||||
100 CONTINUE
|
||||
ELSE
|
||||
DO 120 J = 1,N
|
||||
TEMP = ZERO
|
||||
IX = KX
|
||||
DO 110 I = 1,M
|
||||
TEMP = TEMP + A(I,J)*X(IX)
|
||||
IX = IX + INCX
|
||||
110 CONTINUE
|
||||
Y(JY) = Y(JY) + ALPHA*TEMP
|
||||
JY = JY + INCY
|
||||
120 CONTINUE
|
||||
END IF
|
||||
END IF
|
||||
*
|
||||
RETURN
|
||||
*
|
||||
* End of SGEMV .
|
||||
*
|
||||
END
|
||||
285
interface/netlib/zgemv.f
Normal file
285
interface/netlib/zgemv.f
Normal file
@@ -0,0 +1,285 @@
|
||||
SUBROUTINE ZGEMV(TRANS,M,N,ALPHA,A,LDA,X,INCX,BETA,Y,INCY)
|
||||
* .. Scalar Arguments ..
|
||||
DOUBLE COMPLEX ALPHA,BETA
|
||||
INTEGER INCX,INCY,LDA,M,N
|
||||
CHARACTER TRANS
|
||||
* ..
|
||||
* .. Array Arguments ..
|
||||
DOUBLE COMPLEX A(LDA,*),X(*),Y(*)
|
||||
* ..
|
||||
*
|
||||
* Purpose
|
||||
* =======
|
||||
*
|
||||
* ZGEMV performs one of the matrix-vector operations
|
||||
*
|
||||
* y := alpha*A*x + beta*y, or y := alpha*A**T*x + beta*y, or
|
||||
*
|
||||
* y := alpha*A**H*x + beta*y,
|
||||
*
|
||||
* where alpha and beta are scalars, x and y are vectors and A is an
|
||||
* m by n matrix.
|
||||
*
|
||||
* Arguments
|
||||
* ==========
|
||||
*
|
||||
* TRANS - CHARACTER*1.
|
||||
* On entry, TRANS specifies the operation to be performed as
|
||||
* follows:
|
||||
*
|
||||
* TRANS = 'N' or 'n' y := alpha*A*x + beta*y.
|
||||
*
|
||||
* TRANS = 'T' or 't' y := alpha*A**T*x + beta*y.
|
||||
*
|
||||
* TRANS = 'C' or 'c' y := alpha*A**H*x + beta*y.
|
||||
*
|
||||
* Unchanged on exit.
|
||||
*
|
||||
* M - INTEGER.
|
||||
* On entry, M specifies the number of rows of the matrix A.
|
||||
* M must be at least zero.
|
||||
* Unchanged on exit.
|
||||
*
|
||||
* N - INTEGER.
|
||||
* On entry, N specifies the number of columns of the matrix A.
|
||||
* N must be at least zero.
|
||||
* Unchanged on exit.
|
||||
*
|
||||
* ALPHA - COMPLEX*16 .
|
||||
* On entry, ALPHA specifies the scalar alpha.
|
||||
* Unchanged on exit.
|
||||
*
|
||||
* A - COMPLEX*16 array of DIMENSION ( LDA, n ).
|
||||
* Before entry, the leading m by n part of the array A must
|
||||
* contain the matrix of coefficients.
|
||||
* Unchanged on exit.
|
||||
*
|
||||
* LDA - INTEGER.
|
||||
* On entry, LDA specifies the first dimension of A as declared
|
||||
* in the calling (sub) program. LDA must be at least
|
||||
* max( 1, m ).
|
||||
* Unchanged on exit.
|
||||
*
|
||||
* X - COMPLEX*16 array of DIMENSION at least
|
||||
* ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n'
|
||||
* and at least
|
||||
* ( 1 + ( m - 1 )*abs( INCX ) ) otherwise.
|
||||
* Before entry, the incremented array X must contain the
|
||||
* vector x.
|
||||
* Unchanged on exit.
|
||||
*
|
||||
* INCX - INTEGER.
|
||||
* On entry, INCX specifies the increment for the elements of
|
||||
* X. INCX must not be zero.
|
||||
* Unchanged on exit.
|
||||
*
|
||||
* BETA - COMPLEX*16 .
|
||||
* On entry, BETA specifies the scalar beta. When BETA is
|
||||
* supplied as zero then Y need not be set on input.
|
||||
* Unchanged on exit.
|
||||
*
|
||||
* Y - COMPLEX*16 array of DIMENSION at least
|
||||
* ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n'
|
||||
* and at least
|
||||
* ( 1 + ( n - 1 )*abs( INCY ) ) otherwise.
|
||||
* Before entry with BETA non-zero, the incremented array Y
|
||||
* must contain the vector y. On exit, Y is overwritten by the
|
||||
* updated vector y.
|
||||
*
|
||||
* INCY - INTEGER.
|
||||
* On entry, INCY specifies the increment for the elements of
|
||||
* Y. INCY must not be zero.
|
||||
* Unchanged on exit.
|
||||
*
|
||||
* Further Details
|
||||
* ===============
|
||||
*
|
||||
* Level 2 Blas routine.
|
||||
* The vector and matrix arguments are not referenced when N = 0, or M = 0
|
||||
*
|
||||
* -- Written on 22-October-1986.
|
||||
* Jack Dongarra, Argonne National Lab.
|
||||
* Jeremy Du Croz, Nag Central Office.
|
||||
* Sven Hammarling, Nag Central Office.
|
||||
* Richard Hanson, Sandia National Labs.
|
||||
*
|
||||
* =====================================================================
|
||||
*
|
||||
* .. Parameters ..
|
||||
DOUBLE COMPLEX ONE
|
||||
PARAMETER (ONE= (1.0D+0,0.0D+0))
|
||||
DOUBLE COMPLEX ZERO
|
||||
PARAMETER (ZERO= (0.0D+0,0.0D+0))
|
||||
* ..
|
||||
* .. Local Scalars ..
|
||||
DOUBLE COMPLEX TEMP
|
||||
INTEGER I,INFO,IX,IY,J,JX,JY,KX,KY,LENX,LENY
|
||||
LOGICAL NOCONJ
|
||||
* ..
|
||||
* .. External Functions ..
|
||||
LOGICAL LSAME
|
||||
EXTERNAL LSAME
|
||||
* ..
|
||||
* .. External Subroutines ..
|
||||
EXTERNAL XERBLA
|
||||
* ..
|
||||
* .. Intrinsic Functions ..
|
||||
INTRINSIC DCONJG,MAX
|
||||
* ..
|
||||
*
|
||||
* Test the input parameters.
|
||||
*
|
||||
INFO = 0
|
||||
IF (.NOT.LSAME(TRANS,'N') .AND. .NOT.LSAME(TRANS,'T') .AND.
|
||||
+ .NOT.LSAME(TRANS,'C')) THEN
|
||||
INFO = 1
|
||||
ELSE IF (M.LT.0) THEN
|
||||
INFO = 2
|
||||
ELSE IF (N.LT.0) THEN
|
||||
INFO = 3
|
||||
ELSE IF (LDA.LT.MAX(1,M)) THEN
|
||||
INFO = 6
|
||||
ELSE IF (INCX.EQ.0) THEN
|
||||
INFO = 8
|
||||
ELSE IF (INCY.EQ.0) THEN
|
||||
INFO = 11
|
||||
END IF
|
||||
IF (INFO.NE.0) THEN
|
||||
CALL XERBLA('ZGEMV ',INFO)
|
||||
RETURN
|
||||
END IF
|
||||
*
|
||||
* Quick return if possible.
|
||||
*
|
||||
IF ((M.EQ.0) .OR. (N.EQ.0) .OR.
|
||||
+ ((ALPHA.EQ.ZERO).AND. (BETA.EQ.ONE))) RETURN
|
||||
*
|
||||
NOCONJ = LSAME(TRANS,'T')
|
||||
*
|
||||
* Set LENX and LENY, the lengths of the vectors x and y, and set
|
||||
* up the start points in X and Y.
|
||||
*
|
||||
IF (LSAME(TRANS,'N')) THEN
|
||||
LENX = N
|
||||
LENY = M
|
||||
ELSE
|
||||
LENX = M
|
||||
LENY = N
|
||||
END IF
|
||||
IF (INCX.GT.0) THEN
|
||||
KX = 1
|
||||
ELSE
|
||||
KX = 1 - (LENX-1)*INCX
|
||||
END IF
|
||||
IF (INCY.GT.0) THEN
|
||||
KY = 1
|
||||
ELSE
|
||||
KY = 1 - (LENY-1)*INCY
|
||||
END IF
|
||||
*
|
||||
* Start the operations. In this version the elements of A are
|
||||
* accessed sequentially with one pass through A.
|
||||
*
|
||||
* First form y := beta*y.
|
||||
*
|
||||
IF (BETA.NE.ONE) THEN
|
||||
IF (INCY.EQ.1) THEN
|
||||
IF (BETA.EQ.ZERO) THEN
|
||||
DO 10 I = 1,LENY
|
||||
Y(I) = ZERO
|
||||
10 CONTINUE
|
||||
ELSE
|
||||
DO 20 I = 1,LENY
|
||||
Y(I) = BETA*Y(I)
|
||||
20 CONTINUE
|
||||
END IF
|
||||
ELSE
|
||||
IY = KY
|
||||
IF (BETA.EQ.ZERO) THEN
|
||||
DO 30 I = 1,LENY
|
||||
Y(IY) = ZERO
|
||||
IY = IY + INCY
|
||||
30 CONTINUE
|
||||
ELSE
|
||||
DO 40 I = 1,LENY
|
||||
Y(IY) = BETA*Y(IY)
|
||||
IY = IY + INCY
|
||||
40 CONTINUE
|
||||
END IF
|
||||
END IF
|
||||
END IF
|
||||
IF (ALPHA.EQ.ZERO) RETURN
|
||||
IF (LSAME(TRANS,'N')) THEN
|
||||
*
|
||||
* Form y := alpha*A*x + y.
|
||||
*
|
||||
JX = KX
|
||||
IF (INCY.EQ.1) THEN
|
||||
DO 60 J = 1,N
|
||||
IF (X(JX).NE.ZERO) THEN
|
||||
TEMP = ALPHA*X(JX)
|
||||
DO 50 I = 1,M
|
||||
Y(I) = Y(I) + TEMP*A(I,J)
|
||||
50 CONTINUE
|
||||
END IF
|
||||
JX = JX + INCX
|
||||
60 CONTINUE
|
||||
ELSE
|
||||
DO 80 J = 1,N
|
||||
IF (X(JX).NE.ZERO) THEN
|
||||
TEMP = ALPHA*X(JX)
|
||||
IY = KY
|
||||
DO 70 I = 1,M
|
||||
Y(IY) = Y(IY) + TEMP*A(I,J)
|
||||
IY = IY + INCY
|
||||
70 CONTINUE
|
||||
END IF
|
||||
JX = JX + INCX
|
||||
80 CONTINUE
|
||||
END IF
|
||||
ELSE
|
||||
*
|
||||
* Form y := alpha*A**T*x + y or y := alpha*A**H*x + y.
|
||||
*
|
||||
JY = KY
|
||||
IF (INCX.EQ.1) THEN
|
||||
DO 110 J = 1,N
|
||||
TEMP = ZERO
|
||||
IF (NOCONJ) THEN
|
||||
DO 90 I = 1,M
|
||||
TEMP = TEMP + A(I,J)*X(I)
|
||||
90 CONTINUE
|
||||
ELSE
|
||||
DO 100 I = 1,M
|
||||
TEMP = TEMP + DCONJG(A(I,J))*X(I)
|
||||
100 CONTINUE
|
||||
END IF
|
||||
Y(JY) = Y(JY) + ALPHA*TEMP
|
||||
JY = JY + INCY
|
||||
110 CONTINUE
|
||||
ELSE
|
||||
DO 140 J = 1,N
|
||||
TEMP = ZERO
|
||||
IX = KX
|
||||
IF (NOCONJ) THEN
|
||||
DO 120 I = 1,M
|
||||
TEMP = TEMP + A(I,J)*X(IX)
|
||||
IX = IX + INCX
|
||||
120 CONTINUE
|
||||
ELSE
|
||||
DO 130 I = 1,M
|
||||
TEMP = TEMP + DCONJG(A(I,J))*X(IX)
|
||||
IX = IX + INCX
|
||||
130 CONTINUE
|
||||
END IF
|
||||
Y(JY) = Y(JY) + ALPHA*TEMP
|
||||
JY = JY + INCY
|
||||
140 CONTINUE
|
||||
END IF
|
||||
END IF
|
||||
*
|
||||
RETURN
|
||||
*
|
||||
* End of ZGEMV .
|
||||
*
|
||||
END
|
||||
@@ -136,6 +136,7 @@ void NAME(char *SIDE, char *UPLO,
|
||||
FLOAT *sa, *sb;
|
||||
|
||||
#ifdef SMP
|
||||
#ifndef COMPLEX
|
||||
#ifdef XDOUBLE
|
||||
int mode = BLAS_XDOUBLE | BLAS_REAL;
|
||||
#elif defined(DOUBLE)
|
||||
@@ -143,6 +144,15 @@ void NAME(char *SIDE, char *UPLO,
|
||||
#else
|
||||
int mode = BLAS_SINGLE | BLAS_REAL;
|
||||
#endif
|
||||
#else
|
||||
#ifdef XDOUBLE
|
||||
int mode = BLAS_XDOUBLE | BLAS_COMPLEX;
|
||||
#elif defined(DOUBLE)
|
||||
int mode = BLAS_DOUBLE | BLAS_COMPLEX;
|
||||
#else
|
||||
int mode = BLAS_SINGLE | BLAS_COMPLEX;
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(SMP) && !defined(NO_AFFINITY)
|
||||
@@ -237,6 +247,7 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo,
|
||||
FLOAT *sa, *sb;
|
||||
|
||||
#ifdef SMP
|
||||
#ifndef COMPLEX
|
||||
#ifdef XDOUBLE
|
||||
int mode = BLAS_XDOUBLE | BLAS_REAL;
|
||||
#elif defined(DOUBLE)
|
||||
@@ -244,6 +255,15 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo,
|
||||
#else
|
||||
int mode = BLAS_SINGLE | BLAS_REAL;
|
||||
#endif
|
||||
#else
|
||||
#ifdef XDOUBLE
|
||||
int mode = BLAS_XDOUBLE | BLAS_COMPLEX;
|
||||
#elif defined(DOUBLE)
|
||||
int mode = BLAS_DOUBLE | BLAS_COMPLEX;
|
||||
#else
|
||||
int mode = BLAS_SINGLE | BLAS_COMPLEX;
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(SMP) && !defined(NO_AFFINITY)
|
||||
|
||||
@@ -60,6 +60,7 @@ static blasint (*trtri_parallel[])(blas_arg_t *, BLASLONG *, BLASLONG *, FLOAT *
|
||||
};
|
||||
#endif
|
||||
|
||||
|
||||
int NAME(char *UPLO, char *DIAG, blasint *N, FLOAT *a, blasint *ldA, blasint *Info){
|
||||
|
||||
blas_arg_t args;
|
||||
@@ -83,6 +84,7 @@ int NAME(char *UPLO, char *DIAG, blasint *N, FLOAT *a, blasint *ldA, blasint *In
|
||||
TOUPPER(uplo_arg);
|
||||
TOUPPER(diag_arg);
|
||||
|
||||
|
||||
uplo = -1;
|
||||
if (uplo_arg == 'U') uplo = 0;
|
||||
if (uplo_arg == 'L') uplo = 1;
|
||||
@@ -90,6 +92,7 @@ int NAME(char *UPLO, char *DIAG, blasint *N, FLOAT *a, blasint *ldA, blasint *In
|
||||
if (diag_arg == 'U') diag = 0;
|
||||
if (diag_arg == 'N') diag = 1;
|
||||
|
||||
|
||||
info = 0;
|
||||
if (args.lda < MAX(1,args.n)) info = 5;
|
||||
if (args.n < 0) info = 3;
|
||||
|
||||
@@ -6,7 +6,7 @@ TOPDIR = ..
|
||||
include $(TOPDIR)/Makefile.system
|
||||
|
||||
ifdef TARGET_CORE
|
||||
CFLAGS += -DBUILD_KERNEL -DTABLE_NAME=gotoblas_$(TARGET_CORE)
|
||||
override CFLAGS += -DBUILD_KERNEL -DTABLE_NAME=gotoblas_$(TARGET_CORE)
|
||||
BUILD_KERNEL = 1
|
||||
KDIR =
|
||||
TSUFFIX = _$(TARGET_CORE)
|
||||
@@ -48,7 +48,7 @@ HPLOBJS = \
|
||||
|
||||
COMMONOBJS += lsame.$(SUFFIX) scabs1.$(SUFFIX) dcabs1.$(SUFFIX)
|
||||
|
||||
ifdef DYNAMIC_ARCH
|
||||
ifeq ($(DYNAMIC_ARCH), 1)
|
||||
SBLASOBJS += setparam$(TSUFFIX).$(SUFFIX)
|
||||
CCOMMON_OPT += -DTS=$(TSUFFIX)
|
||||
endif
|
||||
|
||||
@@ -388,7 +388,7 @@ $(KDIR)xgerv_k$(TSUFFIX).$(SUFFIX) $(KDIR)xgerv_k$(TSUFFIX).$(PSUFFIX) : $(KER
|
||||
$(CC) -c $(CFLAGS) -DXDOUBLE -UCONJ -DXCONJ $< -o $@
|
||||
|
||||
$(KDIR)xgerd_k$(TSUFFIX).$(SUFFIX) $(KDIR)xgerd_k$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(XGERCKERNEL) $(XGERPARAM)
|
||||
$(CC) -c $(CFLAGS) -DXDOUBLE -DCONJ-DXCONJ $< -o $@
|
||||
$(CC) -c $(CFLAGS) -DXDOUBLE -DCONJ -DXCONJ $< -o $@
|
||||
|
||||
$(KDIR)chemv_U$(TSUFFIX).$(SUFFIX) $(KDIR)chemv_U$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(CHEMV_U_KERNEL) $(CHEMV_U_PARAM)
|
||||
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -ULOWER -DHEMV $< -o $@
|
||||
|
||||
@@ -14,6 +14,20 @@ ifeq ($(ARCH), MIPS)
|
||||
USE_GEMM3M = 1
|
||||
endif
|
||||
|
||||
ifeq ($(ARCH), arm)
|
||||
USE_TRMM = 1
|
||||
endif
|
||||
|
||||
ifeq ($(ARCH), arm64)
|
||||
USE_TRMM = 1
|
||||
endif
|
||||
|
||||
ifeq ($(TARGET), LOONGSON3B)
|
||||
USE_TRMM = 1
|
||||
endif
|
||||
|
||||
|
||||
|
||||
SKERNELOBJS += \
|
||||
sgemm_kernel$(TSUFFIX).$(SUFFIX) \
|
||||
$(SGEMMINCOPYOBJ) $(SGEMMITCOPYOBJ) \
|
||||
@@ -498,6 +512,92 @@ $(KDIR)xgemm_kernel_r$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(XGEMMKERNEL) $(XGEMMD
|
||||
$(KDIR)xgemm_kernel_b$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(XGEMMKERNEL) $(XGEMMDEPEND)
|
||||
$(CC) $(CFLAGS) -c -DXDOUBLE -DCOMPLEX -DCC $< -o $@
|
||||
|
||||
|
||||
ifdef USE_TRMM
|
||||
$(KDIR)strmm_kernel_LN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(STRMMKERNEL)
|
||||
$(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -UCOMPLEX -DLEFT -UTRANSA $< -o $@
|
||||
|
||||
$(KDIR)strmm_kernel_LT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(STRMMKERNEL)
|
||||
$(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -UCOMPLEX -DLEFT -DTRANSA $< -o $@
|
||||
|
||||
$(KDIR)strmm_kernel_RN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(STRMMKERNEL)
|
||||
$(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -UCOMPLEX -ULEFT -UTRANSA $< -o $@
|
||||
|
||||
$(KDIR)strmm_kernel_RT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(STRMMKERNEL)
|
||||
$(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -UCOMPLEX -ULEFT -DTRANSA $< -o $@
|
||||
|
||||
$(KDIR)dtrmm_kernel_LN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DTRMMKERNEL)
|
||||
$(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -UCOMPLEX -DLEFT -UTRANSA $< -o $@
|
||||
|
||||
$(KDIR)dtrmm_kernel_LT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DTRMMKERNEL)
|
||||
$(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -UCOMPLEX -DLEFT -DTRANSA $< -o $@
|
||||
|
||||
$(KDIR)dtrmm_kernel_RN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DTRMMKERNEL)
|
||||
$(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -UCOMPLEX -ULEFT -UTRANSA $< -o $@
|
||||
|
||||
$(KDIR)dtrmm_kernel_RT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DTRMMKERNEL)
|
||||
$(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -UCOMPLEX -ULEFT -DTRANSA $< -o $@
|
||||
|
||||
$(KDIR)qtrmm_kernel_LN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(QGEMMKERNEL)
|
||||
$(CC) $(CFLAGS) -c -DTRMMKERNEL -DXDOUBLE -UCOMPLEX -DLEFT -UTRANSA $< -o $@
|
||||
|
||||
$(KDIR)qtrmm_kernel_LT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(QGEMMKERNEL)
|
||||
$(CC) $(CFLAGS) -c -DTRMMKERNEL -DXDOUBLE -UCOMPLEX -DLEFT -DTRANSA $< -o $@
|
||||
|
||||
$(KDIR)qtrmm_kernel_RN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(QGEMMKERNEL)
|
||||
$(CC) $(CFLAGS) -c -DTRMMKERNEL -DXDOUBLE -UCOMPLEX -ULEFT -UTRANSA $< -o $@
|
||||
|
||||
$(KDIR)qtrmm_kernel_RT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(QGEMMKERNEL)
|
||||
$(CC) $(CFLAGS) -c -DTRMMKERNEL -DXDOUBLE -UCOMPLEX -ULEFT -DTRANSA $< -o $@
|
||||
|
||||
$(KDIR)ctrmm_kernel_LN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CTRMMKERNEL)
|
||||
$(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -DCOMPLEX -DLEFT -UTRANSA -UCONJ -DNN $< -o $@
|
||||
|
||||
$(KDIR)ctrmm_kernel_LT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CTRMMKERNEL)
|
||||
$(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -DCOMPLEX -DLEFT -DTRANSA -UCONJ -DNN $< -o $@
|
||||
|
||||
$(KDIR)ctrmm_kernel_LR$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CTRMMKERNEL)
|
||||
$(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -DCOMPLEX -DLEFT -UTRANSA -DCONJ -DCN $< -o $@
|
||||
|
||||
$(KDIR)ctrmm_kernel_LC$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CTRMMKERNEL)
|
||||
$(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -DCOMPLEX -DLEFT -DTRANSA -DCONJ -DCN $< -o $@
|
||||
|
||||
$(KDIR)ctrmm_kernel_RN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CTRMMKERNEL)
|
||||
$(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -DCOMPLEX -ULEFT -UTRANSA -UCONJ -DNN $< -o $@
|
||||
|
||||
$(KDIR)ctrmm_kernel_RT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CTRMMKERNEL)
|
||||
$(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -DCOMPLEX -ULEFT -DTRANSA -UCONJ -DNN $< -o $@
|
||||
|
||||
$(KDIR)ctrmm_kernel_RR$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CTRMMKERNEL)
|
||||
$(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -DCOMPLEX -ULEFT -UTRANSA -DCONJ -DNC $< -o $@
|
||||
|
||||
$(KDIR)ctrmm_kernel_RC$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CTRMMKERNEL)
|
||||
$(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -DCOMPLEX -ULEFT -DTRANSA -DCONJ -DNC $< -o $@
|
||||
|
||||
$(KDIR)ztrmm_kernel_LN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZTRMMKERNEL)
|
||||
$(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -UTRANSA -UCONJ -DNN $< -o $@
|
||||
|
||||
$(KDIR)ztrmm_kernel_LT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZTRMMKERNEL)
|
||||
$(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -DTRANSA -UCONJ -DNN $< -o $@
|
||||
|
||||
$(KDIR)ztrmm_kernel_LR$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZTRMMKERNEL)
|
||||
$(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -UTRANSA -DCONJ -DCN $< -o $@
|
||||
|
||||
$(KDIR)ztrmm_kernel_LC$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZTRMMKERNEL)
|
||||
$(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -DTRANSA -DCONJ -DCN $< -o $@
|
||||
|
||||
$(KDIR)ztrmm_kernel_RN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZTRMMKERNEL)
|
||||
$(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -UTRANSA -UCONJ -DNN $< -o $@
|
||||
|
||||
$(KDIR)ztrmm_kernel_RT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZTRMMKERNEL)
|
||||
$(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -DTRANSA -UCONJ -DNN $< -o $@
|
||||
|
||||
$(KDIR)ztrmm_kernel_RR$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZTRMMKERNEL)
|
||||
$(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -UTRANSA -DCONJ -DNC $< -o $@
|
||||
|
||||
$(KDIR)ztrmm_kernel_RC$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZTRMMKERNEL)
|
||||
$(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -DTRANSA -DCONJ -DNC $< -o $@
|
||||
else
|
||||
$(KDIR)strmm_kernel_LN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SGEMMKERNEL)
|
||||
$(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -UCOMPLEX -DLEFT -UTRANSA $< -o $@
|
||||
|
||||
@@ -581,6 +681,10 @@ $(KDIR)ztrmm_kernel_RR$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZGEMMKERNEL)
|
||||
|
||||
$(KDIR)ztrmm_kernel_RC$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZGEMMKERNEL)
|
||||
$(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -DTRANSA -DCONJ -DNC $< -o $@
|
||||
endif
|
||||
|
||||
|
||||
|
||||
|
||||
$(KDIR)xtrmm_kernel_LN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(XGEMMKERNEL)
|
||||
$(CC) $(CFLAGS) -c -DTRMMKERNEL -DXDOUBLE -DCOMPLEX -DLEFT -UTRANSA -UCONJ -DNN $< -o $@
|
||||
@@ -1120,328 +1224,328 @@ $(KDIR)xhemm_iutcopy$(TSUFFIX).$(SUFFIX) : generic/zhemm_utcopy_$(XGEMM_UNROLL_M
|
||||
$(KDIR)xhemm_iltcopy$(TSUFFIX).$(SUFFIX) : generic/zhemm_ltcopy_$(XGEMM_UNROLL_M).c
|
||||
$(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DXDOUBLE -DCOMPLEX -UOUTER $< -DLOWER -o $@
|
||||
|
||||
$(KDIR)cgemm3m_oncopyb$(TSUFFIX).$(SUFFIX) : generic/zgemm3m_ncopy_$(SGEMM_UNROLL_N).c
|
||||
$(KDIR)cgemm3m_oncopyb$(TSUFFIX).$(SUFFIX) : generic/zgemm3m_ncopy_$(CGEMM3M_UNROLL_N).c
|
||||
$(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -DUSE_ALPHA $< -o $@
|
||||
|
||||
$(KDIR)cgemm3m_oncopyr$(TSUFFIX).$(SUFFIX) : generic/zgemm3m_ncopy_$(SGEMM_UNROLL_N).c
|
||||
$(KDIR)cgemm3m_oncopyr$(TSUFFIX).$(SUFFIX) : generic/zgemm3m_ncopy_$(CGEMM3M_UNROLL_N).c
|
||||
$(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -DUSE_ALPHA -DREAL_ONLY $< -o $@
|
||||
|
||||
$(KDIR)cgemm3m_oncopyi$(TSUFFIX).$(SUFFIX) : generic/zgemm3m_ncopy_$(SGEMM_UNROLL_N).c
|
||||
$(KDIR)cgemm3m_oncopyi$(TSUFFIX).$(SUFFIX) : generic/zgemm3m_ncopy_$(CGEMM3M_UNROLL_N).c
|
||||
$(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -DUSE_ALPHA -DIMAGE_ONLY $< -o $@
|
||||
|
||||
$(KDIR)cgemm3m_otcopyb$(TSUFFIX).$(SUFFIX) : generic/zgemm3m_tcopy_$(SGEMM_UNROLL_N).c
|
||||
$(KDIR)cgemm3m_otcopyb$(TSUFFIX).$(SUFFIX) : generic/zgemm3m_tcopy_$(CGEMM3M_UNROLL_N).c
|
||||
$(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -DUSE_ALPHA $< -o $@
|
||||
|
||||
$(KDIR)cgemm3m_otcopyr$(TSUFFIX).$(SUFFIX) : generic/zgemm3m_tcopy_$(SGEMM_UNROLL_N).c
|
||||
$(KDIR)cgemm3m_otcopyr$(TSUFFIX).$(SUFFIX) : generic/zgemm3m_tcopy_$(CGEMM3M_UNROLL_N).c
|
||||
$(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -DUSE_ALPHA -DREAL_ONLY $< -o $@
|
||||
|
||||
$(KDIR)cgemm3m_otcopyi$(TSUFFIX).$(SUFFIX) : generic/zgemm3m_tcopy_$(SGEMM_UNROLL_N).c
|
||||
$(KDIR)cgemm3m_otcopyi$(TSUFFIX).$(SUFFIX) : generic/zgemm3m_tcopy_$(CGEMM3M_UNROLL_N).c
|
||||
$(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -DUSE_ALPHA -DIMAGE_ONLY $< -o $@
|
||||
|
||||
$(KDIR)cgemm3m_incopyb$(TSUFFIX).$(SUFFIX) : generic/zgemm3m_ncopy_$(SGEMM_UNROLL_M).c
|
||||
$(KDIR)cgemm3m_incopyb$(TSUFFIX).$(SUFFIX) : generic/zgemm3m_ncopy_$(CGEMM3M_UNROLL_M).c
|
||||
$(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -DICOPY -UUSE_ALPHA $< -o $@
|
||||
|
||||
$(KDIR)cgemm3m_incopyr$(TSUFFIX).$(SUFFIX) : generic/zgemm3m_ncopy_$(SGEMM_UNROLL_M).c
|
||||
$(KDIR)cgemm3m_incopyr$(TSUFFIX).$(SUFFIX) : generic/zgemm3m_ncopy_$(CGEMM3M_UNROLL_M).c
|
||||
$(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -DICOPY -UUSE_ALPHA -DREAL_ONLY $< -o $@
|
||||
|
||||
$(KDIR)cgemm3m_incopyi$(TSUFFIX).$(SUFFIX) : generic/zgemm3m_ncopy_$(SGEMM_UNROLL_M).c
|
||||
$(KDIR)cgemm3m_incopyi$(TSUFFIX).$(SUFFIX) : generic/zgemm3m_ncopy_$(CGEMM3M_UNROLL_M).c
|
||||
$(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -DICOPY -UUSE_ALPHA -DIMAGE_ONLY $< -o $@
|
||||
|
||||
$(KDIR)cgemm3m_itcopyb$(TSUFFIX).$(SUFFIX) : generic/zgemm3m_tcopy_$(SGEMM_UNROLL_M).c
|
||||
$(KDIR)cgemm3m_itcopyb$(TSUFFIX).$(SUFFIX) : generic/zgemm3m_tcopy_$(CGEMM3M_UNROLL_M).c
|
||||
$(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -DICOPY -UUSE_ALPHA $< -o $@
|
||||
|
||||
$(KDIR)cgemm3m_itcopyr$(TSUFFIX).$(SUFFIX) : generic/zgemm3m_tcopy_$(SGEMM_UNROLL_M).c
|
||||
$(KDIR)cgemm3m_itcopyr$(TSUFFIX).$(SUFFIX) : generic/zgemm3m_tcopy_$(CGEMM3M_UNROLL_M).c
|
||||
$(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -DICOPY -UUSE_ALPHA -DREAL_ONLY $< -o $@
|
||||
|
||||
$(KDIR)cgemm3m_itcopyi$(TSUFFIX).$(SUFFIX) : generic/zgemm3m_tcopy_$(SGEMM_UNROLL_M).c
|
||||
$(KDIR)cgemm3m_itcopyi$(TSUFFIX).$(SUFFIX) : generic/zgemm3m_tcopy_$(CGEMM3M_UNROLL_M).c
|
||||
$(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -DICOPY -UUSE_ALPHA -DIMAGE_ONLY $< -o $@
|
||||
|
||||
$(KDIR)zgemm3m_oncopyb$(TSUFFIX).$(SUFFIX) : generic/zgemm3m_ncopy_$(DGEMM_UNROLL_N).c
|
||||
$(KDIR)zgemm3m_oncopyb$(TSUFFIX).$(SUFFIX) : generic/zgemm3m_ncopy_$(ZGEMM3M_UNROLL_N).c
|
||||
$(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DUSE_ALPHA $< -o $@
|
||||
|
||||
$(KDIR)zgemm3m_oncopyr$(TSUFFIX).$(SUFFIX) : generic/zgemm3m_ncopy_$(DGEMM_UNROLL_N).c
|
||||
$(KDIR)zgemm3m_oncopyr$(TSUFFIX).$(SUFFIX) : generic/zgemm3m_ncopy_$(ZGEMM3M_UNROLL_N).c
|
||||
$(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DUSE_ALPHA -DREAL_ONLY $< -o $@
|
||||
|
||||
$(KDIR)zgemm3m_oncopyi$(TSUFFIX).$(SUFFIX) : generic/zgemm3m_ncopy_$(DGEMM_UNROLL_N).c
|
||||
$(KDIR)zgemm3m_oncopyi$(TSUFFIX).$(SUFFIX) : generic/zgemm3m_ncopy_$(ZGEMM3M_UNROLL_N).c
|
||||
$(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DUSE_ALPHA -DIMAGE_ONLY $< -o $@
|
||||
|
||||
$(KDIR)zgemm3m_otcopyb$(TSUFFIX).$(SUFFIX) : generic/zgemm3m_tcopy_$(DGEMM_UNROLL_N).c
|
||||
$(KDIR)zgemm3m_otcopyb$(TSUFFIX).$(SUFFIX) : generic/zgemm3m_tcopy_$(ZGEMM3M_UNROLL_N).c
|
||||
$(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DUSE_ALPHA $< -o $@
|
||||
|
||||
$(KDIR)zgemm3m_otcopyr$(TSUFFIX).$(SUFFIX) : generic/zgemm3m_tcopy_$(DGEMM_UNROLL_N).c
|
||||
$(KDIR)zgemm3m_otcopyr$(TSUFFIX).$(SUFFIX) : generic/zgemm3m_tcopy_$(ZGEMM3M_UNROLL_N).c
|
||||
$(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DUSE_ALPHA -DREAL_ONLY $< -o $@
|
||||
|
||||
$(KDIR)zgemm3m_otcopyi$(TSUFFIX).$(SUFFIX) : generic/zgemm3m_tcopy_$(DGEMM_UNROLL_N).c
|
||||
$(KDIR)zgemm3m_otcopyi$(TSUFFIX).$(SUFFIX) : generic/zgemm3m_tcopy_$(ZGEMM3M_UNROLL_N).c
|
||||
$(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DUSE_ALPHA -DIMAGE_ONLY $< -o $@
|
||||
|
||||
$(KDIR)zgemm3m_incopyb$(TSUFFIX).$(SUFFIX) : generic/zgemm3m_ncopy_$(DGEMM_UNROLL_M).c
|
||||
$(KDIR)zgemm3m_incopyb$(TSUFFIX).$(SUFFIX) : generic/zgemm3m_ncopy_$(ZGEMM3M_UNROLL_M).c
|
||||
$(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DICOPY -UUSE_ALPHA $< -o $@
|
||||
|
||||
$(KDIR)zgemm3m_incopyr$(TSUFFIX).$(SUFFIX) : generic/zgemm3m_ncopy_$(DGEMM_UNROLL_M).c
|
||||
$(KDIR)zgemm3m_incopyr$(TSUFFIX).$(SUFFIX) : generic/zgemm3m_ncopy_$(ZGEMM3M_UNROLL_M).c
|
||||
$(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DICOPY -UUSE_ALPHA -DREAL_ONLY $< -o $@
|
||||
|
||||
$(KDIR)zgemm3m_incopyi$(TSUFFIX).$(SUFFIX) : generic/zgemm3m_ncopy_$(DGEMM_UNROLL_M).c
|
||||
$(KDIR)zgemm3m_incopyi$(TSUFFIX).$(SUFFIX) : generic/zgemm3m_ncopy_$(ZGEMM3M_UNROLL_M).c
|
||||
$(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DICOPY -UUSE_ALPHA -DIMAGE_ONLY $< -o $@
|
||||
|
||||
$(KDIR)zgemm3m_itcopyb$(TSUFFIX).$(SUFFIX) : generic/zgemm3m_tcopy_$(DGEMM_UNROLL_M).c
|
||||
$(KDIR)zgemm3m_itcopyb$(TSUFFIX).$(SUFFIX) : generic/zgemm3m_tcopy_$(ZGEMM3M_UNROLL_M).c
|
||||
$(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DICOPY -UUSE_ALPHA $< -o $@
|
||||
|
||||
$(KDIR)zgemm3m_itcopyr$(TSUFFIX).$(SUFFIX) : generic/zgemm3m_tcopy_$(DGEMM_UNROLL_M).c
|
||||
$(KDIR)zgemm3m_itcopyr$(TSUFFIX).$(SUFFIX) : generic/zgemm3m_tcopy_$(ZGEMM3M_UNROLL_M).c
|
||||
$(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DICOPY -UUSE_ALPHA -DREAL_ONLY $< -o $@
|
||||
|
||||
$(KDIR)zgemm3m_itcopyi$(TSUFFIX).$(SUFFIX) : generic/zgemm3m_tcopy_$(DGEMM_UNROLL_M).c
|
||||
$(KDIR)zgemm3m_itcopyi$(TSUFFIX).$(SUFFIX) : generic/zgemm3m_tcopy_$(ZGEMM3M_UNROLL_M).c
|
||||
$(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DICOPY -UUSE_ALPHA -DIMAGE_ONLY $< -o $@
|
||||
|
||||
$(KDIR)xgemm3m_oncopyb$(TSUFFIX).$(SUFFIX) : generic/zgemm3m_ncopy_$(QGEMM_UNROLL_N).c
|
||||
$(KDIR)xgemm3m_oncopyb$(TSUFFIX).$(SUFFIX) : generic/zgemm3m_ncopy_$(XGEMM3M_UNROLL_N).c
|
||||
$(CC) $(CFLAGS) -c -DXDOUBLE -DCOMPLEX -DUSE_ALPHA $< -o $@
|
||||
|
||||
$(KDIR)xgemm3m_oncopyr$(TSUFFIX).$(SUFFIX) : generic/zgemm3m_ncopy_$(QGEMM_UNROLL_N).c
|
||||
$(KDIR)xgemm3m_oncopyr$(TSUFFIX).$(SUFFIX) : generic/zgemm3m_ncopy_$(XGEMM3M_UNROLL_N).c
|
||||
$(CC) $(CFLAGS) -c -DXDOUBLE -DCOMPLEX -DUSE_ALPHA -DREAL_ONLY $< -o $@
|
||||
|
||||
$(KDIR)xgemm3m_oncopyi$(TSUFFIX).$(SUFFIX) : generic/zgemm3m_ncopy_$(QGEMM_UNROLL_N).c
|
||||
$(KDIR)xgemm3m_oncopyi$(TSUFFIX).$(SUFFIX) : generic/zgemm3m_ncopy_$(XGEMM3M_UNROLL_N).c
|
||||
$(CC) $(CFLAGS) -c -DXDOUBLE -DCOMPLEX -DUSE_ALPHA -DIMAGE_ONLY $< -o $@
|
||||
|
||||
$(KDIR)xgemm3m_otcopyb$(TSUFFIX).$(SUFFIX) : generic/zgemm3m_tcopy_$(QGEMM_UNROLL_N).c
|
||||
$(KDIR)xgemm3m_otcopyb$(TSUFFIX).$(SUFFIX) : generic/zgemm3m_tcopy_$(XGEMM3M_UNROLL_N).c
|
||||
$(CC) $(CFLAGS) -c -DXDOUBLE -DCOMPLEX -DUSE_ALPHA $< -o $@
|
||||
|
||||
$(KDIR)xgemm3m_otcopyr$(TSUFFIX).$(SUFFIX) : generic/zgemm3m_tcopy_$(QGEMM_UNROLL_N).c
|
||||
$(KDIR)xgemm3m_otcopyr$(TSUFFIX).$(SUFFIX) : generic/zgemm3m_tcopy_$(XGEMM3M_UNROLL_N).c
|
||||
$(CC) $(CFLAGS) -c -DXDOUBLE -DCOMPLEX -DUSE_ALPHA -DREAL_ONLY $< -o $@
|
||||
|
||||
$(KDIR)xgemm3m_otcopyi$(TSUFFIX).$(SUFFIX) : generic/zgemm3m_tcopy_$(QGEMM_UNROLL_N).c
|
||||
$(KDIR)xgemm3m_otcopyi$(TSUFFIX).$(SUFFIX) : generic/zgemm3m_tcopy_$(XGEMM3M_UNROLL_N).c
|
||||
$(CC) $(CFLAGS) -c -DXDOUBLE -DCOMPLEX -DUSE_ALPHA -DIMAGE_ONLY $< -o $@
|
||||
|
||||
$(KDIR)xgemm3m_incopyb$(TSUFFIX).$(SUFFIX) : generic/zgemm3m_ncopy_$(QGEMM_UNROLL_M).c
|
||||
$(KDIR)xgemm3m_incopyb$(TSUFFIX).$(SUFFIX) : generic/zgemm3m_ncopy_$(XGEMM3M_UNROLL_M).c
|
||||
$(CC) $(CFLAGS) -c -DXDOUBLE -DCOMPLEX -DICOPY -UUSE_ALPHA $< -o $@
|
||||
|
||||
$(KDIR)xgemm3m_incopyr$(TSUFFIX).$(SUFFIX) : generic/zgemm3m_ncopy_$(QGEMM_UNROLL_M).c
|
||||
$(KDIR)xgemm3m_incopyr$(TSUFFIX).$(SUFFIX) : generic/zgemm3m_ncopy_$(XGEMM3M_UNROLL_M).c
|
||||
$(CC) $(CFLAGS) -c -DXDOUBLE -DCOMPLEX -DICOPY -UUSE_ALPHA -DREAL_ONLY $< -o $@
|
||||
|
||||
$(KDIR)xgemm3m_incopyi$(TSUFFIX).$(SUFFIX) : generic/zgemm3m_ncopy_$(QGEMM_UNROLL_M).c
|
||||
$(KDIR)xgemm3m_incopyi$(TSUFFIX).$(SUFFIX) : generic/zgemm3m_ncopy_$(XGEMM3M_UNROLL_M).c
|
||||
$(CC) $(CFLAGS) -c -DXDOUBLE -DCOMPLEX -DICOPY -UUSE_ALPHA -DIMAGE_ONLY $< -o $@
|
||||
|
||||
$(KDIR)xgemm3m_itcopyb$(TSUFFIX).$(SUFFIX) : generic/zgemm3m_tcopy_$(QGEMM_UNROLL_M).c
|
||||
$(KDIR)xgemm3m_itcopyb$(TSUFFIX).$(SUFFIX) : generic/zgemm3m_tcopy_$(XGEMM3M_UNROLL_M).c
|
||||
$(CC) $(CFLAGS) -c -DXDOUBLE -DCOMPLEX -DICOPY -UUSE_ALPHA $< -o $@
|
||||
|
||||
$(KDIR)xgemm3m_itcopyr$(TSUFFIX).$(SUFFIX) : generic/zgemm3m_tcopy_$(QGEMM_UNROLL_M).c
|
||||
$(KDIR)xgemm3m_itcopyr$(TSUFFIX).$(SUFFIX) : generic/zgemm3m_tcopy_$(XGEMM3M_UNROLL_M).c
|
||||
$(CC) $(CFLAGS) -c -DXDOUBLE -DCOMPLEX -DICOPY -UUSE_ALPHA -DREAL_ONLY $< -o $@
|
||||
|
||||
$(KDIR)xgemm3m_itcopyi$(TSUFFIX).$(SUFFIX) : generic/zgemm3m_tcopy_$(QGEMM_UNROLL_M).c
|
||||
$(KDIR)xgemm3m_itcopyi$(TSUFFIX).$(SUFFIX) : generic/zgemm3m_tcopy_$(XGEMM3M_UNROLL_M).c
|
||||
$(CC) $(CFLAGS) -c -DXDOUBLE -DCOMPLEX -DICOPY -UUSE_ALPHA -DIMAGE_ONLY $< -o $@
|
||||
|
||||
$(KDIR)csymm3m_oucopyb$(TSUFFIX).$(SUFFIX) : generic/zsymm3m_ucopy_$(SGEMM_UNROLL_N).c
|
||||
$(KDIR)csymm3m_oucopyb$(TSUFFIX).$(SUFFIX) : generic/zsymm3m_ucopy_$(CGEMM3M_UNROLL_N).c
|
||||
$(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -UDOUBLE -DCOMPLEX -DUSE_ALPHA $< -o $@
|
||||
|
||||
$(KDIR)csymm3m_olcopyb$(TSUFFIX).$(SUFFIX) : generic/zsymm3m_lcopy_$(SGEMM_UNROLL_N).c
|
||||
$(KDIR)csymm3m_olcopyb$(TSUFFIX).$(SUFFIX) : generic/zsymm3m_lcopy_$(CGEMM3M_UNROLL_N).c
|
||||
$(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -UDOUBLE -DCOMPLEX -DUSE_ALPHA $< -o $@
|
||||
|
||||
$(KDIR)csymm3m_oucopyr$(TSUFFIX).$(SUFFIX) : generic/zsymm3m_ucopy_$(SGEMM_UNROLL_N).c
|
||||
$(KDIR)csymm3m_oucopyr$(TSUFFIX).$(SUFFIX) : generic/zsymm3m_ucopy_$(CGEMM3M_UNROLL_N).c
|
||||
$(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -UDOUBLE -DCOMPLEX -DUSE_ALPHA -DREAL_ONLY $< -o $@
|
||||
|
||||
$(KDIR)csymm3m_olcopyr$(TSUFFIX).$(SUFFIX) : generic/zsymm3m_lcopy_$(SGEMM_UNROLL_N).c
|
||||
$(KDIR)csymm3m_olcopyr$(TSUFFIX).$(SUFFIX) : generic/zsymm3m_lcopy_$(CGEMM3M_UNROLL_N).c
|
||||
$(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -UDOUBLE -DCOMPLEX -DUSE_ALPHA -DREAL_ONLY $< -o $@
|
||||
|
||||
$(KDIR)csymm3m_oucopyi$(TSUFFIX).$(SUFFIX) : generic/zsymm3m_ucopy_$(SGEMM_UNROLL_N).c
|
||||
$(KDIR)csymm3m_oucopyi$(TSUFFIX).$(SUFFIX) : generic/zsymm3m_ucopy_$(CGEMM3M_UNROLL_N).c
|
||||
$(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -UDOUBLE -DCOMPLEX -DUSE_ALPHA -DIMAGE_ONLY $< -o $@
|
||||
|
||||
$(KDIR)csymm3m_olcopyi$(TSUFFIX).$(SUFFIX) : generic/zsymm3m_lcopy_$(SGEMM_UNROLL_N).c
|
||||
$(KDIR)csymm3m_olcopyi$(TSUFFIX).$(SUFFIX) : generic/zsymm3m_lcopy_$(CGEMM3M_UNROLL_N).c
|
||||
$(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -UDOUBLE -DCOMPLEX -DUSE_ALPHA -DIMAGE_ONLY $< -o $@
|
||||
|
||||
$(KDIR)csymm3m_iucopyb$(TSUFFIX).$(SUFFIX) : generic/zsymm3m_ucopy_$(SGEMM_UNROLL_M).c
|
||||
$(KDIR)csymm3m_iucopyb$(TSUFFIX).$(SUFFIX) : generic/zsymm3m_ucopy_$(CGEMM3M_UNROLL_M).c
|
||||
$(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -UDOUBLE -DCOMPLEX -UUSE_ALPHA $< -o $@
|
||||
|
||||
$(KDIR)csymm3m_ilcopyb$(TSUFFIX).$(SUFFIX) : generic/zsymm3m_lcopy_$(SGEMM_UNROLL_M).c
|
||||
$(KDIR)csymm3m_ilcopyb$(TSUFFIX).$(SUFFIX) : generic/zsymm3m_lcopy_$(CGEMM3M_UNROLL_M).c
|
||||
$(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -UDOUBLE -DCOMPLEX -UUSE_ALPHA $< -o $@
|
||||
|
||||
$(KDIR)csymm3m_iucopyr$(TSUFFIX).$(SUFFIX) : generic/zsymm3m_ucopy_$(SGEMM_UNROLL_M).c
|
||||
$(KDIR)csymm3m_iucopyr$(TSUFFIX).$(SUFFIX) : generic/zsymm3m_ucopy_$(CGEMM3M_UNROLL_M).c
|
||||
$(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -UDOUBLE -DCOMPLEX -UUSE_ALPHA -DREAL_ONLY $< -o $@
|
||||
|
||||
$(KDIR)csymm3m_ilcopyr$(TSUFFIX).$(SUFFIX) : generic/zsymm3m_lcopy_$(SGEMM_UNROLL_M).c
|
||||
$(KDIR)csymm3m_ilcopyr$(TSUFFIX).$(SUFFIX) : generic/zsymm3m_lcopy_$(CGEMM3M_UNROLL_M).c
|
||||
$(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -UDOUBLE -DCOMPLEX -UUSE_ALPHA -DREAL_ONLY $< -o $@
|
||||
|
||||
$(KDIR)csymm3m_iucopyi$(TSUFFIX).$(SUFFIX) : generic/zsymm3m_ucopy_$(SGEMM_UNROLL_M).c
|
||||
$(KDIR)csymm3m_iucopyi$(TSUFFIX).$(SUFFIX) : generic/zsymm3m_ucopy_$(CGEMM3M_UNROLL_M).c
|
||||
$(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -UDOUBLE -DCOMPLEX -UUSE_ALPHA -DIMAGE_ONLY $< -o $@
|
||||
|
||||
$(KDIR)csymm3m_ilcopyi$(TSUFFIX).$(SUFFIX) : generic/zsymm3m_lcopy_$(SGEMM_UNROLL_M).c
|
||||
$(KDIR)csymm3m_ilcopyi$(TSUFFIX).$(SUFFIX) : generic/zsymm3m_lcopy_$(CGEMM3M_UNROLL_M).c
|
||||
$(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -UDOUBLE -DCOMPLEX -UUSE_ALPHA -DIMAGE_ONLY $< -o $@
|
||||
|
||||
$(KDIR)zsymm3m_oucopyb$(TSUFFIX).$(SUFFIX) : generic/zsymm3m_ucopy_$(DGEMM_UNROLL_N).c
|
||||
$(KDIR)zsymm3m_oucopyb$(TSUFFIX).$(SUFFIX) : generic/zsymm3m_ucopy_$(ZGEMM3M_UNROLL_N).c
|
||||
$(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -DDOUBLE -DCOMPLEX -DUSE_ALPHA $< -o $@
|
||||
|
||||
$(KDIR)zsymm3m_olcopyb$(TSUFFIX).$(SUFFIX) : generic/zsymm3m_lcopy_$(DGEMM_UNROLL_N).c
|
||||
$(KDIR)zsymm3m_olcopyb$(TSUFFIX).$(SUFFIX) : generic/zsymm3m_lcopy_$(ZGEMM3M_UNROLL_N).c
|
||||
$(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -DDOUBLE -DCOMPLEX -DUSE_ALPHA $< -o $@
|
||||
|
||||
$(KDIR)zsymm3m_oucopyr$(TSUFFIX).$(SUFFIX) : generic/zsymm3m_ucopy_$(DGEMM_UNROLL_N).c
|
||||
$(KDIR)zsymm3m_oucopyr$(TSUFFIX).$(SUFFIX) : generic/zsymm3m_ucopy_$(ZGEMM3M_UNROLL_N).c
|
||||
$(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -DDOUBLE -DCOMPLEX -DUSE_ALPHA -DREAL_ONLY $< -o $@
|
||||
|
||||
$(KDIR)zsymm3m_olcopyr$(TSUFFIX).$(SUFFIX) : generic/zsymm3m_lcopy_$(DGEMM_UNROLL_N).c
|
||||
$(KDIR)zsymm3m_olcopyr$(TSUFFIX).$(SUFFIX) : generic/zsymm3m_lcopy_$(ZGEMM3M_UNROLL_N).c
|
||||
$(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -DDOUBLE -DCOMPLEX -DUSE_ALPHA -DREAL_ONLY $< -o $@
|
||||
|
||||
$(KDIR)zsymm3m_oucopyi$(TSUFFIX).$(SUFFIX) : generic/zsymm3m_ucopy_$(DGEMM_UNROLL_N).c
|
||||
$(KDIR)zsymm3m_oucopyi$(TSUFFIX).$(SUFFIX) : generic/zsymm3m_ucopy_$(ZGEMM3M_UNROLL_N).c
|
||||
$(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -DDOUBLE -DCOMPLEX -DUSE_ALPHA -DIMAGE_ONLY $< -o $@
|
||||
|
||||
$(KDIR)zsymm3m_olcopyi$(TSUFFIX).$(SUFFIX) : generic/zsymm3m_lcopy_$(DGEMM_UNROLL_N).c
|
||||
$(KDIR)zsymm3m_olcopyi$(TSUFFIX).$(SUFFIX) : generic/zsymm3m_lcopy_$(ZGEMM3M_UNROLL_N).c
|
||||
$(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -DDOUBLE -DCOMPLEX -DUSE_ALPHA -DIMAGE_ONLY $< -o $@
|
||||
|
||||
$(KDIR)zsymm3m_iucopyb$(TSUFFIX).$(SUFFIX) : generic/zsymm3m_ucopy_$(DGEMM_UNROLL_M).c
|
||||
$(KDIR)zsymm3m_iucopyb$(TSUFFIX).$(SUFFIX) : generic/zsymm3m_ucopy_$(ZGEMM3M_UNROLL_M).c
|
||||
$(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -DDOUBLE -DCOMPLEX -UUSE_ALPHA $< -o $@
|
||||
|
||||
$(KDIR)zsymm3m_ilcopyb$(TSUFFIX).$(SUFFIX) : generic/zsymm3m_lcopy_$(DGEMM_UNROLL_M).c
|
||||
$(KDIR)zsymm3m_ilcopyb$(TSUFFIX).$(SUFFIX) : generic/zsymm3m_lcopy_$(ZGEMM3M_UNROLL_M).c
|
||||
$(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -DDOUBLE -DCOMPLEX -UUSE_ALPHA $< -o $@
|
||||
|
||||
$(KDIR)zsymm3m_iucopyr$(TSUFFIX).$(SUFFIX) : generic/zsymm3m_ucopy_$(DGEMM_UNROLL_M).c
|
||||
$(KDIR)zsymm3m_iucopyr$(TSUFFIX).$(SUFFIX) : generic/zsymm3m_ucopy_$(ZGEMM3M_UNROLL_M).c
|
||||
$(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -DDOUBLE -DCOMPLEX -UUSE_ALPHA -DREAL_ONLY $< -o $@
|
||||
|
||||
$(KDIR)zsymm3m_ilcopyr$(TSUFFIX).$(SUFFIX) : generic/zsymm3m_lcopy_$(DGEMM_UNROLL_M).c
|
||||
$(KDIR)zsymm3m_ilcopyr$(TSUFFIX).$(SUFFIX) : generic/zsymm3m_lcopy_$(ZGEMM3M_UNROLL_M).c
|
||||
$(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -DDOUBLE -DCOMPLEX -UUSE_ALPHA -DREAL_ONLY $< -o $@
|
||||
|
||||
$(KDIR)zsymm3m_iucopyi$(TSUFFIX).$(SUFFIX) : generic/zsymm3m_ucopy_$(DGEMM_UNROLL_M).c
|
||||
$(KDIR)zsymm3m_iucopyi$(TSUFFIX).$(SUFFIX) : generic/zsymm3m_ucopy_$(ZGEMM3M_UNROLL_M).c
|
||||
$(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -DDOUBLE -DCOMPLEX -UUSE_ALPHA -DIMAGE_ONLY $< -o $@
|
||||
|
||||
$(KDIR)zsymm3m_ilcopyi$(TSUFFIX).$(SUFFIX) : generic/zsymm3m_lcopy_$(DGEMM_UNROLL_M).c
|
||||
$(KDIR)zsymm3m_ilcopyi$(TSUFFIX).$(SUFFIX) : generic/zsymm3m_lcopy_$(ZGEMM3M_UNROLL_M).c
|
||||
$(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -DDOUBLE -DCOMPLEX -UUSE_ALPHA -DIMAGE_ONLY $< -o $@
|
||||
|
||||
$(KDIR)xsymm3m_oucopyb$(TSUFFIX).$(SUFFIX) : generic/zsymm3m_ucopy_$(QGEMM_UNROLL_N).c
|
||||
$(KDIR)xsymm3m_oucopyb$(TSUFFIX).$(SUFFIX) : generic/zsymm3m_ucopy_$(XGEMM3M_UNROLL_N).c
|
||||
$(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -DXDOUBLE -DCOMPLEX -DUSE_ALPHA $< -o $@
|
||||
|
||||
$(KDIR)xsymm3m_olcopyb$(TSUFFIX).$(SUFFIX) : generic/zsymm3m_lcopy_$(QGEMM_UNROLL_N).c
|
||||
$(KDIR)xsymm3m_olcopyb$(TSUFFIX).$(SUFFIX) : generic/zsymm3m_lcopy_$(XGEMM3M_UNROLL_N).c
|
||||
$(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -DXDOUBLE -DCOMPLEX -DUSE_ALPHA $< -o $@
|
||||
|
||||
$(KDIR)xsymm3m_oucopyr$(TSUFFIX).$(SUFFIX) : generic/zsymm3m_ucopy_$(QGEMM_UNROLL_N).c
|
||||
$(KDIR)xsymm3m_oucopyr$(TSUFFIX).$(SUFFIX) : generic/zsymm3m_ucopy_$(XGEMM3M_UNROLL_N).c
|
||||
$(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -DXDOUBLE -DCOMPLEX -DUSE_ALPHA -DREAL_ONLY $< -o $@
|
||||
|
||||
$(KDIR)xsymm3m_olcopyr$(TSUFFIX).$(SUFFIX) : generic/zsymm3m_lcopy_$(QGEMM_UNROLL_N).c
|
||||
$(KDIR)xsymm3m_olcopyr$(TSUFFIX).$(SUFFIX) : generic/zsymm3m_lcopy_$(XGEMM3M_UNROLL_N).c
|
||||
$(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -DXDOUBLE -DCOMPLEX -DUSE_ALPHA -DREAL_ONLY $< -o $@
|
||||
|
||||
$(KDIR)xsymm3m_oucopyi$(TSUFFIX).$(SUFFIX) : generic/zsymm3m_ucopy_$(QGEMM_UNROLL_N).c
|
||||
$(KDIR)xsymm3m_oucopyi$(TSUFFIX).$(SUFFIX) : generic/zsymm3m_ucopy_$(XGEMM3M_UNROLL_N).c
|
||||
$(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -DXDOUBLE -DCOMPLEX -DUSE_ALPHA -DIMAGE_ONLY $< -o $@
|
||||
|
||||
$(KDIR)xsymm3m_olcopyi$(TSUFFIX).$(SUFFIX) : generic/zsymm3m_lcopy_$(QGEMM_UNROLL_N).c
|
||||
$(KDIR)xsymm3m_olcopyi$(TSUFFIX).$(SUFFIX) : generic/zsymm3m_lcopy_$(XGEMM3M_UNROLL_N).c
|
||||
$(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -DXDOUBLE -DCOMPLEX -DUSE_ALPHA -DIMAGE_ONLY $< -o $@
|
||||
|
||||
$(KDIR)xsymm3m_iucopyb$(TSUFFIX).$(SUFFIX) : generic/zsymm3m_ucopy_$(QGEMM_UNROLL_M).c
|
||||
$(KDIR)xsymm3m_iucopyb$(TSUFFIX).$(SUFFIX) : generic/zsymm3m_ucopy_$(XGEMM3M_UNROLL_M).c
|
||||
$(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -DXDOUBLE -DCOMPLEX -UUSE_ALPHA $< -o $@
|
||||
|
||||
$(KDIR)xsymm3m_ilcopyb$(TSUFFIX).$(SUFFIX) : generic/zsymm3m_lcopy_$(QGEMM_UNROLL_M).c
|
||||
$(KDIR)xsymm3m_ilcopyb$(TSUFFIX).$(SUFFIX) : generic/zsymm3m_lcopy_$(XGEMM3M_UNROLL_M).c
|
||||
$(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -DXDOUBLE -DCOMPLEX -UUSE_ALPHA $< -o $@
|
||||
|
||||
$(KDIR)xsymm3m_iucopyr$(TSUFFIX).$(SUFFIX) : generic/zsymm3m_ucopy_$(QGEMM_UNROLL_M).c
|
||||
$(KDIR)xsymm3m_iucopyr$(TSUFFIX).$(SUFFIX) : generic/zsymm3m_ucopy_$(XGEMM3M_UNROLL_M).c
|
||||
$(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -DXDOUBLE -DCOMPLEX -UUSE_ALPHA -DREAL_ONLY $< -o $@
|
||||
|
||||
$(KDIR)xsymm3m_ilcopyr$(TSUFFIX).$(SUFFIX) : generic/zsymm3m_lcopy_$(QGEMM_UNROLL_M).c
|
||||
$(KDIR)xsymm3m_ilcopyr$(TSUFFIX).$(SUFFIX) : generic/zsymm3m_lcopy_$(XGEMM3M_UNROLL_M).c
|
||||
$(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -DXDOUBLE -DCOMPLEX -UUSE_ALPHA -DREAL_ONLY $< -o $@
|
||||
|
||||
$(KDIR)xsymm3m_iucopyi$(TSUFFIX).$(SUFFIX) : generic/zsymm3m_ucopy_$(QGEMM_UNROLL_M).c
|
||||
$(KDIR)xsymm3m_iucopyi$(TSUFFIX).$(SUFFIX) : generic/zsymm3m_ucopy_$(XGEMM3M_UNROLL_M).c
|
||||
$(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -DXDOUBLE -DCOMPLEX -UUSE_ALPHA -DIMAGE_ONLY $< -o $@
|
||||
|
||||
$(KDIR)xsymm3m_ilcopyi$(TSUFFIX).$(SUFFIX) : generic/zsymm3m_lcopy_$(QGEMM_UNROLL_M).c
|
||||
$(KDIR)xsymm3m_ilcopyi$(TSUFFIX).$(SUFFIX) : generic/zsymm3m_lcopy_$(XGEMM3M_UNROLL_M).c
|
||||
$(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -DXDOUBLE -DCOMPLEX -UUSE_ALPHA -DIMAGE_ONLY $< -o $@
|
||||
|
||||
$(KDIR)chemm3m_oucopyb$(TSUFFIX).$(SUFFIX) : generic/zhemm3m_ucopy_$(SGEMM_UNROLL_N).c
|
||||
$(KDIR)chemm3m_oucopyb$(TSUFFIX).$(SUFFIX) : generic/zhemm3m_ucopy_$(CGEMM3M_UNROLL_N).c
|
||||
$(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -UDOUBLE -DCOMPLEX -DUSE_ALPHA $< -o $@
|
||||
|
||||
$(KDIR)chemm3m_olcopyb$(TSUFFIX).$(SUFFIX) : generic/zhemm3m_lcopy_$(SGEMM_UNROLL_N).c
|
||||
$(KDIR)chemm3m_olcopyb$(TSUFFIX).$(SUFFIX) : generic/zhemm3m_lcopy_$(CGEMM3M_UNROLL_N).c
|
||||
$(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -UDOUBLE -DCOMPLEX -DUSE_ALPHA $< -o $@
|
||||
|
||||
$(KDIR)chemm3m_oucopyr$(TSUFFIX).$(SUFFIX) : generic/zhemm3m_ucopy_$(SGEMM_UNROLL_N).c
|
||||
$(KDIR)chemm3m_oucopyr$(TSUFFIX).$(SUFFIX) : generic/zhemm3m_ucopy_$(CGEMM3M_UNROLL_N).c
|
||||
$(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -UDOUBLE -DCOMPLEX -DUSE_ALPHA -DREAL_ONLY $< -o $@
|
||||
|
||||
$(KDIR)chemm3m_olcopyr$(TSUFFIX).$(SUFFIX) : generic/zhemm3m_lcopy_$(SGEMM_UNROLL_N).c
|
||||
$(KDIR)chemm3m_olcopyr$(TSUFFIX).$(SUFFIX) : generic/zhemm3m_lcopy_$(CGEMM3M_UNROLL_N).c
|
||||
$(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -UDOUBLE -DCOMPLEX -DUSE_ALPHA -DREAL_ONLY $< -o $@
|
||||
|
||||
$(KDIR)chemm3m_oucopyi$(TSUFFIX).$(SUFFIX) : generic/zhemm3m_ucopy_$(SGEMM_UNROLL_N).c
|
||||
$(KDIR)chemm3m_oucopyi$(TSUFFIX).$(SUFFIX) : generic/zhemm3m_ucopy_$(CGEMM3M_UNROLL_N).c
|
||||
$(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -UDOUBLE -DCOMPLEX -DUSE_ALPHA -DIMAGE_ONLY $< -o $@
|
||||
|
||||
$(KDIR)chemm3m_olcopyi$(TSUFFIX).$(SUFFIX) : generic/zhemm3m_lcopy_$(SGEMM_UNROLL_N).c
|
||||
$(KDIR)chemm3m_olcopyi$(TSUFFIX).$(SUFFIX) : generic/zhemm3m_lcopy_$(CGEMM3M_UNROLL_N).c
|
||||
$(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -UDOUBLE -DCOMPLEX -DUSE_ALPHA -DIMAGE_ONLY $< -o $@
|
||||
|
||||
$(KDIR)chemm3m_iucopyb$(TSUFFIX).$(SUFFIX) : generic/zhemm3m_ucopy_$(SGEMM_UNROLL_M).c
|
||||
$(KDIR)chemm3m_iucopyb$(TSUFFIX).$(SUFFIX) : generic/zhemm3m_ucopy_$(CGEMM3M_UNROLL_M).c
|
||||
$(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -UDOUBLE -DCOMPLEX -UUSE_ALPHA $< -o $@
|
||||
|
||||
$(KDIR)chemm3m_ilcopyb$(TSUFFIX).$(SUFFIX) : generic/zhemm3m_lcopy_$(SGEMM_UNROLL_M).c
|
||||
$(KDIR)chemm3m_ilcopyb$(TSUFFIX).$(SUFFIX) : generic/zhemm3m_lcopy_$(CGEMM3M_UNROLL_M).c
|
||||
$(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -UDOUBLE -DCOMPLEX -UUSE_ALPHA $< -o $@
|
||||
|
||||
$(KDIR)chemm3m_iucopyr$(TSUFFIX).$(SUFFIX) : generic/zhemm3m_ucopy_$(SGEMM_UNROLL_M).c
|
||||
$(KDIR)chemm3m_iucopyr$(TSUFFIX).$(SUFFIX) : generic/zhemm3m_ucopy_$(CGEMM3M_UNROLL_M).c
|
||||
$(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -UDOUBLE -DCOMPLEX -UUSE_ALPHA -DREAL_ONLY $< -o $@
|
||||
|
||||
$(KDIR)chemm3m_ilcopyr$(TSUFFIX).$(SUFFIX) : generic/zhemm3m_lcopy_$(SGEMM_UNROLL_M).c
|
||||
$(KDIR)chemm3m_ilcopyr$(TSUFFIX).$(SUFFIX) : generic/zhemm3m_lcopy_$(CGEMM3M_UNROLL_M).c
|
||||
$(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -UDOUBLE -DCOMPLEX -UUSE_ALPHA -DREAL_ONLY $< -o $@
|
||||
|
||||
$(KDIR)chemm3m_iucopyi$(TSUFFIX).$(SUFFIX) : generic/zhemm3m_ucopy_$(SGEMM_UNROLL_M).c
|
||||
$(KDIR)chemm3m_iucopyi$(TSUFFIX).$(SUFFIX) : generic/zhemm3m_ucopy_$(CGEMM3M_UNROLL_M).c
|
||||
$(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -UDOUBLE -DCOMPLEX -UUSE_ALPHA -DIMAGE_ONLY $< -o $@
|
||||
|
||||
$(KDIR)chemm3m_ilcopyi$(TSUFFIX).$(SUFFIX) : generic/zhemm3m_lcopy_$(SGEMM_UNROLL_M).c
|
||||
$(KDIR)chemm3m_ilcopyi$(TSUFFIX).$(SUFFIX) : generic/zhemm3m_lcopy_$(CGEMM3M_UNROLL_M).c
|
||||
$(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -UDOUBLE -DCOMPLEX -UUSE_ALPHA -DIMAGE_ONLY $< -o $@
|
||||
|
||||
$(KDIR)zhemm3m_oucopyb$(TSUFFIX).$(SUFFIX) : generic/zhemm3m_ucopy_$(DGEMM_UNROLL_N).c
|
||||
$(KDIR)zhemm3m_oucopyb$(TSUFFIX).$(SUFFIX) : generic/zhemm3m_ucopy_$(ZGEMM3M_UNROLL_N).c
|
||||
$(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -DDOUBLE -DCOMPLEX -DUSE_ALPHA $< -o $@
|
||||
|
||||
$(KDIR)zhemm3m_olcopyb$(TSUFFIX).$(SUFFIX) : generic/zhemm3m_lcopy_$(DGEMM_UNROLL_N).c
|
||||
$(KDIR)zhemm3m_olcopyb$(TSUFFIX).$(SUFFIX) : generic/zhemm3m_lcopy_$(ZGEMM3M_UNROLL_N).c
|
||||
$(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -DDOUBLE -DCOMPLEX -DUSE_ALPHA $< -o $@
|
||||
|
||||
$(KDIR)zhemm3m_oucopyr$(TSUFFIX).$(SUFFIX) : generic/zhemm3m_ucopy_$(DGEMM_UNROLL_N).c
|
||||
$(KDIR)zhemm3m_oucopyr$(TSUFFIX).$(SUFFIX) : generic/zhemm3m_ucopy_$(ZGEMM3M_UNROLL_N).c
|
||||
$(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -DDOUBLE -DCOMPLEX -DUSE_ALPHA -DREAL_ONLY $< -o $@
|
||||
|
||||
$(KDIR)zhemm3m_olcopyr$(TSUFFIX).$(SUFFIX) : generic/zhemm3m_lcopy_$(DGEMM_UNROLL_N).c
|
||||
$(KDIR)zhemm3m_olcopyr$(TSUFFIX).$(SUFFIX) : generic/zhemm3m_lcopy_$(ZGEMM3M_UNROLL_N).c
|
||||
$(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -DDOUBLE -DCOMPLEX -DUSE_ALPHA -DREAL_ONLY $< -o $@
|
||||
|
||||
$(KDIR)zhemm3m_oucopyi$(TSUFFIX).$(SUFFIX) : generic/zhemm3m_ucopy_$(DGEMM_UNROLL_N).c
|
||||
$(KDIR)zhemm3m_oucopyi$(TSUFFIX).$(SUFFIX) : generic/zhemm3m_ucopy_$(ZGEMM3M_UNROLL_N).c
|
||||
$(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -DDOUBLE -DCOMPLEX -DUSE_ALPHA -DIMAGE_ONLY $< -o $@
|
||||
|
||||
$(KDIR)zhemm3m_olcopyi$(TSUFFIX).$(SUFFIX) : generic/zhemm3m_lcopy_$(DGEMM_UNROLL_N).c
|
||||
$(KDIR)zhemm3m_olcopyi$(TSUFFIX).$(SUFFIX) : generic/zhemm3m_lcopy_$(ZGEMM3M_UNROLL_N).c
|
||||
$(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -DDOUBLE -DCOMPLEX -DUSE_ALPHA -DIMAGE_ONLY $< -o $@
|
||||
|
||||
$(KDIR)zhemm3m_iucopyb$(TSUFFIX).$(SUFFIX) : generic/zhemm3m_ucopy_$(DGEMM_UNROLL_M).c
|
||||
$(KDIR)zhemm3m_iucopyb$(TSUFFIX).$(SUFFIX) : generic/zhemm3m_ucopy_$(ZGEMM3M_UNROLL_M).c
|
||||
$(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -DDOUBLE -DCOMPLEX -UUSE_ALPHA $< -o $@
|
||||
|
||||
$(KDIR)zhemm3m_ilcopyb$(TSUFFIX).$(SUFFIX) : generic/zhemm3m_lcopy_$(DGEMM_UNROLL_M).c
|
||||
$(KDIR)zhemm3m_ilcopyb$(TSUFFIX).$(SUFFIX) : generic/zhemm3m_lcopy_$(ZGEMM3M_UNROLL_M).c
|
||||
$(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -DDOUBLE -DCOMPLEX -UUSE_ALPHA $< -o $@
|
||||
|
||||
$(KDIR)zhemm3m_iucopyr$(TSUFFIX).$(SUFFIX) : generic/zhemm3m_ucopy_$(DGEMM_UNROLL_M).c
|
||||
$(KDIR)zhemm3m_iucopyr$(TSUFFIX).$(SUFFIX) : generic/zhemm3m_ucopy_$(ZGEMM3M_UNROLL_M).c
|
||||
$(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -DDOUBLE -DCOMPLEX -UUSE_ALPHA -DREAL_ONLY $< -o $@
|
||||
|
||||
$(KDIR)zhemm3m_ilcopyr$(TSUFFIX).$(SUFFIX) : generic/zhemm3m_lcopy_$(DGEMM_UNROLL_M).c
|
||||
$(KDIR)zhemm3m_ilcopyr$(TSUFFIX).$(SUFFIX) : generic/zhemm3m_lcopy_$(ZGEMM3M_UNROLL_M).c
|
||||
$(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -DDOUBLE -DCOMPLEX -UUSE_ALPHA -DREAL_ONLY $< -o $@
|
||||
|
||||
$(KDIR)zhemm3m_iucopyi$(TSUFFIX).$(SUFFIX) : generic/zhemm3m_ucopy_$(DGEMM_UNROLL_M).c
|
||||
$(KDIR)zhemm3m_iucopyi$(TSUFFIX).$(SUFFIX) : generic/zhemm3m_ucopy_$(ZGEMM3M_UNROLL_M).c
|
||||
$(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -DDOUBLE -DCOMPLEX -UUSE_ALPHA -DIMAGE_ONLY $< -o $@
|
||||
|
||||
$(KDIR)zhemm3m_ilcopyi$(TSUFFIX).$(SUFFIX) : generic/zhemm3m_lcopy_$(DGEMM_UNROLL_M).c
|
||||
$(KDIR)zhemm3m_ilcopyi$(TSUFFIX).$(SUFFIX) : generic/zhemm3m_lcopy_$(ZGEMM3M_UNROLL_M).c
|
||||
$(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -DDOUBLE -DCOMPLEX -UUSE_ALPHA -DIMAGE_ONLY $< -o $@
|
||||
|
||||
$(KDIR)xhemm3m_oucopyb$(TSUFFIX).$(SUFFIX) : generic/zhemm3m_ucopy_$(QGEMM_UNROLL_N).c
|
||||
$(KDIR)xhemm3m_oucopyb$(TSUFFIX).$(SUFFIX) : generic/zhemm3m_ucopy_$(XGEMM3M_UNROLL_N).c
|
||||
$(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -DXDOUBLE -DCOMPLEX -DUSE_ALPHA $< -o $@
|
||||
|
||||
$(KDIR)xhemm3m_olcopyb$(TSUFFIX).$(SUFFIX) : generic/zhemm3m_lcopy_$(QGEMM_UNROLL_N).c
|
||||
$(KDIR)xhemm3m_olcopyb$(TSUFFIX).$(SUFFIX) : generic/zhemm3m_lcopy_$(XGEMM3M_UNROLL_N).c
|
||||
$(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -DXDOUBLE -DCOMPLEX -DUSE_ALPHA $< -o $@
|
||||
|
||||
$(KDIR)xhemm3m_oucopyr$(TSUFFIX).$(SUFFIX) : generic/zhemm3m_ucopy_$(QGEMM_UNROLL_N).c
|
||||
$(KDIR)xhemm3m_oucopyr$(TSUFFIX).$(SUFFIX) : generic/zhemm3m_ucopy_$(XGEMM3M_UNROLL_N).c
|
||||
$(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -DXDOUBLE -DCOMPLEX -DUSE_ALPHA -DREAL_ONLY $< -o $@
|
||||
|
||||
$(KDIR)xhemm3m_olcopyr$(TSUFFIX).$(SUFFIX) : generic/zhemm3m_lcopy_$(QGEMM_UNROLL_N).c
|
||||
$(KDIR)xhemm3m_olcopyr$(TSUFFIX).$(SUFFIX) : generic/zhemm3m_lcopy_$(XGEMM3M_UNROLL_N).c
|
||||
$(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -DXDOUBLE -DCOMPLEX -DUSE_ALPHA -DREAL_ONLY $< -o $@
|
||||
|
||||
$(KDIR)xhemm3m_oucopyi$(TSUFFIX).$(SUFFIX) : generic/zhemm3m_ucopy_$(QGEMM_UNROLL_N).c
|
||||
$(KDIR)xhemm3m_oucopyi$(TSUFFIX).$(SUFFIX) : generic/zhemm3m_ucopy_$(XGEMM3M_UNROLL_N).c
|
||||
$(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -DXDOUBLE -DCOMPLEX -DUSE_ALPHA -DIMAGE_ONLY $< -o $@
|
||||
|
||||
$(KDIR)xhemm3m_olcopyi$(TSUFFIX).$(SUFFIX) : generic/zhemm3m_lcopy_$(QGEMM_UNROLL_N).c
|
||||
$(KDIR)xhemm3m_olcopyi$(TSUFFIX).$(SUFFIX) : generic/zhemm3m_lcopy_$(XGEMM3M_UNROLL_N).c
|
||||
$(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -DXDOUBLE -DCOMPLEX -DUSE_ALPHA -DIMAGE_ONLY $< -o $@
|
||||
|
||||
$(KDIR)xhemm3m_iucopyb$(TSUFFIX).$(SUFFIX) : generic/zhemm3m_ucopy_$(QGEMM_UNROLL_M).c
|
||||
$(KDIR)xhemm3m_iucopyb$(TSUFFIX).$(SUFFIX) : generic/zhemm3m_ucopy_$(XGEMM3M_UNROLL_M).c
|
||||
$(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -DXDOUBLE -DCOMPLEX -UUSE_ALPHA $< -o $@
|
||||
|
||||
$(KDIR)xhemm3m_ilcopyb$(TSUFFIX).$(SUFFIX) : generic/zhemm3m_lcopy_$(QGEMM_UNROLL_M).c
|
||||
$(KDIR)xhemm3m_ilcopyb$(TSUFFIX).$(SUFFIX) : generic/zhemm3m_lcopy_$(XGEMM3M_UNROLL_M).c
|
||||
$(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -DXDOUBLE -DCOMPLEX -UUSE_ALPHA $< -o $@
|
||||
|
||||
$(KDIR)xhemm3m_iucopyr$(TSUFFIX).$(SUFFIX) : generic/zhemm3m_ucopy_$(QGEMM_UNROLL_M).c
|
||||
$(KDIR)xhemm3m_iucopyr$(TSUFFIX).$(SUFFIX) : generic/zhemm3m_ucopy_$(XGEMM3M_UNROLL_M).c
|
||||
$(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -DXDOUBLE -DCOMPLEX -UUSE_ALPHA -DREAL_ONLY $< -o $@
|
||||
|
||||
$(KDIR)xhemm3m_ilcopyr$(TSUFFIX).$(SUFFIX) : generic/zhemm3m_lcopy_$(QGEMM_UNROLL_M).c
|
||||
$(KDIR)xhemm3m_ilcopyr$(TSUFFIX).$(SUFFIX) : generic/zhemm3m_lcopy_$(XGEMM3M_UNROLL_M).c
|
||||
$(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -DXDOUBLE -DCOMPLEX -UUSE_ALPHA -DREAL_ONLY $< -o $@
|
||||
|
||||
$(KDIR)xhemm3m_iucopyi$(TSUFFIX).$(SUFFIX) : generic/zhemm3m_ucopy_$(QGEMM_UNROLL_M).c
|
||||
$(KDIR)xhemm3m_iucopyi$(TSUFFIX).$(SUFFIX) : generic/zhemm3m_ucopy_$(XGEMM3M_UNROLL_M).c
|
||||
$(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -DXDOUBLE -DCOMPLEX -UUSE_ALPHA -DIMAGE_ONLY $< -o $@
|
||||
|
||||
$(KDIR)xhemm3m_ilcopyi$(TSUFFIX).$(SUFFIX) : generic/zhemm3m_lcopy_$(QGEMM_UNROLL_M).c
|
||||
$(KDIR)xhemm3m_ilcopyi$(TSUFFIX).$(SUFFIX) : generic/zhemm3m_lcopy_$(XGEMM3M_UNROLL_M).c
|
||||
$(CC) $(CFLAGS) $(NO_UNINITIALIZED_WARN) -c -DXDOUBLE -DCOMPLEX -UUSE_ALPHA -DIMAGE_ONLY $< -o $@
|
||||
|
||||
$(KDIR)strsm_iunucopy$(TSUFFIX).$(SUFFIX) : generic/trsm_uncopy_$(SGEMM_UNROLL_M).c
|
||||
@@ -2522,328 +2626,328 @@ $(KDIR)xhemm_iutcopy$(TSUFFIX).$(PSUFFIX) : generic/zhemm_utcopy_$(XGEMM_UNROLL_
|
||||
$(KDIR)xhemm_iltcopy$(TSUFFIX).$(PSUFFIX) : generic/zhemm_ltcopy_$(XGEMM_UNROLL_M).c
|
||||
$(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -DXDOUBLE -DCOMPLEX -UOUTER $< -DLOWER -o $@
|
||||
|
||||
$(KDIR)cgemm3m_oncopyb$(TSUFFIX).$(PSUFFIX) : generic/zgemm3m_ncopy_$(SGEMM_UNROLL_N).c
|
||||
$(KDIR)cgemm3m_oncopyb$(TSUFFIX).$(PSUFFIX) : generic/zgemm3m_ncopy_$(CGEMM3M_UNROLL_N).c
|
||||
$(CC) $(PFLAGS) -c -UDOUBLE -DCOMPLEX -DUSE_ALPHA $< -o $@
|
||||
|
||||
$(KDIR)cgemm3m_oncopyr$(TSUFFIX).$(PSUFFIX) : generic/zgemm3m_ncopy_$(SGEMM_UNROLL_N).c
|
||||
$(KDIR)cgemm3m_oncopyr$(TSUFFIX).$(PSUFFIX) : generic/zgemm3m_ncopy_$(CGEMM3M_UNROLL_N).c
|
||||
$(CC) $(PFLAGS) -c -UDOUBLE -DCOMPLEX -DUSE_ALPHA -DREAL_ONLY $< -o $@
|
||||
|
||||
$(KDIR)cgemm3m_oncopyi$(TSUFFIX).$(PSUFFIX) : generic/zgemm3m_ncopy_$(SGEMM_UNROLL_N).c
|
||||
$(KDIR)cgemm3m_oncopyi$(TSUFFIX).$(PSUFFIX) : generic/zgemm3m_ncopy_$(CGEMM3M_UNROLL_N).c
|
||||
$(CC) $(PFLAGS) -c -UDOUBLE -DCOMPLEX -DUSE_ALPHA -DIMAGE_ONLY $< -o $@
|
||||
|
||||
$(KDIR)cgemm3m_otcopyb$(TSUFFIX).$(PSUFFIX) : generic/zgemm3m_tcopy_$(SGEMM_UNROLL_N).c
|
||||
$(KDIR)cgemm3m_otcopyb$(TSUFFIX).$(PSUFFIX) : generic/zgemm3m_tcopy_$(CGEMM3M_UNROLL_N).c
|
||||
$(CC) $(PFLAGS) -c -UDOUBLE -DCOMPLEX -DUSE_ALPHA $< -o $@
|
||||
|
||||
$(KDIR)cgemm3m_otcopyr$(TSUFFIX).$(PSUFFIX) : generic/zgemm3m_tcopy_$(SGEMM_UNROLL_N).c
|
||||
$(KDIR)cgemm3m_otcopyr$(TSUFFIX).$(PSUFFIX) : generic/zgemm3m_tcopy_$(CGEMM3M_UNROLL_N).c
|
||||
$(CC) $(PFLAGS) -c -UDOUBLE -DCOMPLEX -DUSE_ALPHA -DREAL_ONLY $< -o $@
|
||||
|
||||
$(KDIR)cgemm3m_otcopyi$(TSUFFIX).$(PSUFFIX) : generic/zgemm3m_tcopy_$(SGEMM_UNROLL_N).c
|
||||
$(KDIR)cgemm3m_otcopyi$(TSUFFIX).$(PSUFFIX) : generic/zgemm3m_tcopy_$(CGEMM3M_UNROLL_N).c
|
||||
$(CC) $(PFLAGS) -c -UDOUBLE -DCOMPLEX -DUSE_ALPHA -DIMAGE_ONLY $< -o $@
|
||||
|
||||
$(KDIR)cgemm3m_incopyb$(TSUFFIX).$(PSUFFIX) : generic/zgemm3m_ncopy_$(SGEMM_UNROLL_M).c
|
||||
$(KDIR)cgemm3m_incopyb$(TSUFFIX).$(PSUFFIX) : generic/zgemm3m_ncopy_$(CGEMM3M_UNROLL_M).c
|
||||
$(CC) $(PFLAGS) -c -UDOUBLE -DCOMPLEX -DICOPY -UUSE_ALPHA $< -o $@
|
||||
|
||||
$(KDIR)cgemm3m_incopyr$(TSUFFIX).$(PSUFFIX) : generic/zgemm3m_ncopy_$(SGEMM_UNROLL_M).c
|
||||
$(KDIR)cgemm3m_incopyr$(TSUFFIX).$(PSUFFIX) : generic/zgemm3m_ncopy_$(CGEMM3M_UNROLL_M).c
|
||||
$(CC) $(PFLAGS) -c -UDOUBLE -DCOMPLEX -DICOPY -UUSE_ALPHA -DREAL_ONLY $< -o $@
|
||||
|
||||
$(KDIR)cgemm3m_incopyi$(TSUFFIX).$(PSUFFIX) : generic/zgemm3m_ncopy_$(SGEMM_UNROLL_M).c
|
||||
$(KDIR)cgemm3m_incopyi$(TSUFFIX).$(PSUFFIX) : generic/zgemm3m_ncopy_$(CGEMM3M_UNROLL_M).c
|
||||
$(CC) $(PFLAGS) -c -UDOUBLE -DCOMPLEX -DICOPY -UUSE_ALPHA -DIMAGE_ONLY $< -o $@
|
||||
|
||||
$(KDIR)cgemm3m_itcopyb$(TSUFFIX).$(PSUFFIX) : generic/zgemm3m_tcopy_$(SGEMM_UNROLL_M).c
|
||||
$(KDIR)cgemm3m_itcopyb$(TSUFFIX).$(PSUFFIX) : generic/zgemm3m_tcopy_$(CGEMM3M_UNROLL_M).c
|
||||
$(CC) $(PFLAGS) -c -UDOUBLE -DCOMPLEX -DICOPY -UUSE_ALPHA $< -o $@
|
||||
|
||||
$(KDIR)cgemm3m_itcopyr$(TSUFFIX).$(PSUFFIX) : generic/zgemm3m_tcopy_$(SGEMM_UNROLL_M).c
|
||||
$(KDIR)cgemm3m_itcopyr$(TSUFFIX).$(PSUFFIX) : generic/zgemm3m_tcopy_$(CGEMM3M_UNROLL_M).c
|
||||
$(CC) $(PFLAGS) -c -UDOUBLE -DCOMPLEX -DICOPY -UUSE_ALPHA -DREAL_ONLY $< -o $@
|
||||
|
||||
$(KDIR)cgemm3m_itcopyi$(TSUFFIX).$(PSUFFIX) : generic/zgemm3m_tcopy_$(SGEMM_UNROLL_M).c
|
||||
$(KDIR)cgemm3m_itcopyi$(TSUFFIX).$(PSUFFIX) : generic/zgemm3m_tcopy_$(CGEMM3M_UNROLL_M).c
|
||||
$(CC) $(PFLAGS) -c -UDOUBLE -DCOMPLEX -DICOPY -UUSE_ALPHA -DIMAGE_ONLY $< -o $@
|
||||
|
||||
$(KDIR)zgemm3m_oncopyb$(TSUFFIX).$(PSUFFIX) : generic/zgemm3m_ncopy_$(DGEMM_UNROLL_N).c
|
||||
$(KDIR)zgemm3m_oncopyb$(TSUFFIX).$(PSUFFIX) : generic/zgemm3m_ncopy_$(ZGEMM3M_UNROLL_N).c
|
||||
$(CC) $(PFLAGS) -c -DDOUBLE -DCOMPLEX -DUSE_ALPHA $< -o $@
|
||||
|
||||
$(KDIR)zgemm3m_oncopyr$(TSUFFIX).$(PSUFFIX) : generic/zgemm3m_ncopy_$(DGEMM_UNROLL_N).c
|
||||
$(KDIR)zgemm3m_oncopyr$(TSUFFIX).$(PSUFFIX) : generic/zgemm3m_ncopy_$(ZGEMM3M_UNROLL_N).c
|
||||
$(CC) $(PFLAGS) -c -DDOUBLE -DCOMPLEX -DUSE_ALPHA -DREAL_ONLY $< -o $@
|
||||
|
||||
$(KDIR)zgemm3m_oncopyi$(TSUFFIX).$(PSUFFIX) : generic/zgemm3m_ncopy_$(DGEMM_UNROLL_N).c
|
||||
$(KDIR)zgemm3m_oncopyi$(TSUFFIX).$(PSUFFIX) : generic/zgemm3m_ncopy_$(ZGEMM3M_UNROLL_N).c
|
||||
$(CC) $(PFLAGS) -c -DDOUBLE -DCOMPLEX -DUSE_ALPHA -DIMAGE_ONLY $< -o $@
|
||||
|
||||
$(KDIR)zgemm3m_otcopyb$(TSUFFIX).$(PSUFFIX) : generic/zgemm3m_tcopy_$(DGEMM_UNROLL_N).c
|
||||
$(KDIR)zgemm3m_otcopyb$(TSUFFIX).$(PSUFFIX) : generic/zgemm3m_tcopy_$(ZGEMM3M_UNROLL_N).c
|
||||
$(CC) $(PFLAGS) -c -DDOUBLE -DCOMPLEX -DUSE_ALPHA $< -o $@
|
||||
|
||||
$(KDIR)zgemm3m_otcopyr$(TSUFFIX).$(PSUFFIX) : generic/zgemm3m_tcopy_$(DGEMM_UNROLL_N).c
|
||||
$(KDIR)zgemm3m_otcopyr$(TSUFFIX).$(PSUFFIX) : generic/zgemm3m_tcopy_$(ZGEMM3M_UNROLL_N).c
|
||||
$(CC) $(PFLAGS) -c -DDOUBLE -DCOMPLEX -DUSE_ALPHA -DREAL_ONLY $< -o $@
|
||||
|
||||
$(KDIR)zgemm3m_otcopyi$(TSUFFIX).$(PSUFFIX) : generic/zgemm3m_tcopy_$(DGEMM_UNROLL_N).c
|
||||
$(KDIR)zgemm3m_otcopyi$(TSUFFIX).$(PSUFFIX) : generic/zgemm3m_tcopy_$(ZGEMM3M_UNROLL_N).c
|
||||
$(CC) $(PFLAGS) -c -DDOUBLE -DCOMPLEX -DUSE_ALPHA -DIMAGE_ONLY $< -o $@
|
||||
|
||||
$(KDIR)zgemm3m_incopyb$(TSUFFIX).$(PSUFFIX) : generic/zgemm3m_ncopy_$(DGEMM_UNROLL_M).c
|
||||
$(KDIR)zgemm3m_incopyb$(TSUFFIX).$(PSUFFIX) : generic/zgemm3m_ncopy_$(ZGEMM3M_UNROLL_M).c
|
||||
$(CC) $(PFLAGS) -c -DDOUBLE -DCOMPLEX -DICOPY -UUSE_ALPHA $< -o $@
|
||||
|
||||
$(KDIR)zgemm3m_incopyr$(TSUFFIX).$(PSUFFIX) : generic/zgemm3m_ncopy_$(DGEMM_UNROLL_M).c
|
||||
$(KDIR)zgemm3m_incopyr$(TSUFFIX).$(PSUFFIX) : generic/zgemm3m_ncopy_$(ZGEMM3M_UNROLL_M).c
|
||||
$(CC) $(PFLAGS) -c -DDOUBLE -DCOMPLEX -DICOPY -UUSE_ALPHA -DREAL_ONLY $< -o $@
|
||||
|
||||
$(KDIR)zgemm3m_incopyi$(TSUFFIX).$(PSUFFIX) : generic/zgemm3m_ncopy_$(DGEMM_UNROLL_M).c
|
||||
$(KDIR)zgemm3m_incopyi$(TSUFFIX).$(PSUFFIX) : generic/zgemm3m_ncopy_$(ZGEMM3M_UNROLL_M).c
|
||||
$(CC) $(PFLAGS) -c -DDOUBLE -DCOMPLEX -DICOPY -UUSE_ALPHA -DIMAGE_ONLY $< -o $@
|
||||
|
||||
$(KDIR)zgemm3m_itcopyb$(TSUFFIX).$(PSUFFIX) : generic/zgemm3m_tcopy_$(DGEMM_UNROLL_M).c
|
||||
$(KDIR)zgemm3m_itcopyb$(TSUFFIX).$(PSUFFIX) : generic/zgemm3m_tcopy_$(ZGEMM3M_UNROLL_M).c
|
||||
$(CC) $(PFLAGS) -c -DDOUBLE -DCOMPLEX -DICOPY -UUSE_ALPHA $< -o $@
|
||||
|
||||
$(KDIR)zgemm3m_itcopyr$(TSUFFIX).$(PSUFFIX) : generic/zgemm3m_tcopy_$(DGEMM_UNROLL_M).c
|
||||
$(KDIR)zgemm3m_itcopyr$(TSUFFIX).$(PSUFFIX) : generic/zgemm3m_tcopy_$(ZGEMM3M_UNROLL_M).c
|
||||
$(CC) $(PFLAGS) -c -DDOUBLE -DCOMPLEX -DICOPY -UUSE_ALPHA -DREAL_ONLY $< -o $@
|
||||
|
||||
$(KDIR)zgemm3m_itcopyi$(TSUFFIX).$(PSUFFIX) : generic/zgemm3m_tcopy_$(DGEMM_UNROLL_M).c
|
||||
$(KDIR)zgemm3m_itcopyi$(TSUFFIX).$(PSUFFIX) : generic/zgemm3m_tcopy_$(ZGEMM3M_UNROLL_M).c
|
||||
$(CC) $(PFLAGS) -c -DDOUBLE -DCOMPLEX -DICOPY -UUSE_ALPHA -DIMAGE_ONLY $< -o $@
|
||||
|
||||
$(KDIR)xgemm3m_oncopyb$(TSUFFIX).$(PSUFFIX) : generic/zgemm3m_ncopy_$(QGEMM_UNROLL_N).c
|
||||
$(KDIR)xgemm3m_oncopyb$(TSUFFIX).$(PSUFFIX) : generic/zgemm3m_ncopy_$(XGEMM3M_UNROLL_N).c
|
||||
$(CC) $(PFLAGS) -c -DXDOUBLE -DCOMPLEX -DUSE_ALPHA $< -o $@
|
||||
|
||||
$(KDIR)xgemm3m_oncopyr$(TSUFFIX).$(PSUFFIX) : generic/zgemm3m_ncopy_$(QGEMM_UNROLL_N).c
|
||||
$(KDIR)xgemm3m_oncopyr$(TSUFFIX).$(PSUFFIX) : generic/zgemm3m_ncopy_$(XGEMM3M_UNROLL_N).c
|
||||
$(CC) $(PFLAGS) -c -DXDOUBLE -DCOMPLEX -DUSE_ALPHA -DREAL_ONLY $< -o $@
|
||||
|
||||
$(KDIR)xgemm3m_oncopyi$(TSUFFIX).$(PSUFFIX) : generic/zgemm3m_ncopy_$(QGEMM_UNROLL_N).c
|
||||
$(KDIR)xgemm3m_oncopyi$(TSUFFIX).$(PSUFFIX) : generic/zgemm3m_ncopy_$(XGEMM3M_UNROLL_N).c
|
||||
$(CC) $(PFLAGS) -c -DXDOUBLE -DCOMPLEX -DUSE_ALPHA -DIMAGE_ONLY $< -o $@
|
||||
|
||||
$(KDIR)xgemm3m_otcopyb$(TSUFFIX).$(PSUFFIX) : generic/zgemm3m_tcopy_$(QGEMM_UNROLL_N).c
|
||||
$(KDIR)xgemm3m_otcopyb$(TSUFFIX).$(PSUFFIX) : generic/zgemm3m_tcopy_$(XGEMM3M_UNROLL_N).c
|
||||
$(CC) $(PFLAGS) -c -DXDOUBLE -DCOMPLEX -DUSE_ALPHA $< -o $@
|
||||
|
||||
$(KDIR)xgemm3m_otcopyr$(TSUFFIX).$(PSUFFIX) : generic/zgemm3m_tcopy_$(QGEMM_UNROLL_N).c
|
||||
$(KDIR)xgemm3m_otcopyr$(TSUFFIX).$(PSUFFIX) : generic/zgemm3m_tcopy_$(XGEMM3M_UNROLL_N).c
|
||||
$(CC) $(PFLAGS) -c -DXDOUBLE -DCOMPLEX -DUSE_ALPHA -DREAL_ONLY $< -o $@
|
||||
|
||||
$(KDIR)xgemm3m_otcopyi$(TSUFFIX).$(PSUFFIX) : generic/zgemm3m_tcopy_$(QGEMM_UNROLL_N).c
|
||||
$(KDIR)xgemm3m_otcopyi$(TSUFFIX).$(PSUFFIX) : generic/zgemm3m_tcopy_$(XGEMM3M_UNROLL_N).c
|
||||
$(CC) $(PFLAGS) -c -DXDOUBLE -DCOMPLEX -DUSE_ALPHA -DIMAGE_ONLY $< -o $@
|
||||
|
||||
$(KDIR)xgemm3m_incopyb$(TSUFFIX).$(PSUFFIX) : generic/zgemm3m_ncopy_$(QGEMM_UNROLL_M).c
|
||||
$(KDIR)xgemm3m_incopyb$(TSUFFIX).$(PSUFFIX) : generic/zgemm3m_ncopy_$(XGEMM3M_UNROLL_M).c
|
||||
$(CC) $(PFLAGS) -c -DXDOUBLE -DCOMPLEX -DICOPY -UUSE_ALPHA $< -o $@
|
||||
|
||||
$(KDIR)xgemm3m_incopyr$(TSUFFIX).$(PSUFFIX) : generic/zgemm3m_ncopy_$(QGEMM_UNROLL_M).c
|
||||
$(KDIR)xgemm3m_incopyr$(TSUFFIX).$(PSUFFIX) : generic/zgemm3m_ncopy_$(XGEMM3M_UNROLL_M).c
|
||||
$(CC) $(PFLAGS) -c -DXDOUBLE -DCOMPLEX -DICOPY -UUSE_ALPHA -DREAL_ONLY $< -o $@
|
||||
|
||||
$(KDIR)xgemm3m_incopyi$(TSUFFIX).$(PSUFFIX) : generic/zgemm3m_ncopy_$(QGEMM_UNROLL_M).c
|
||||
$(KDIR)xgemm3m_incopyi$(TSUFFIX).$(PSUFFIX) : generic/zgemm3m_ncopy_$(XGEMM3M_UNROLL_M).c
|
||||
$(CC) $(PFLAGS) -c -DXDOUBLE -DCOMPLEX -DICOPY -UUSE_ALPHA -DIMAGE_ONLY $< -o $@
|
||||
|
||||
$(KDIR)xgemm3m_itcopyb$(TSUFFIX).$(PSUFFIX) : generic/zgemm3m_tcopy_$(QGEMM_UNROLL_M).c
|
||||
$(KDIR)xgemm3m_itcopyb$(TSUFFIX).$(PSUFFIX) : generic/zgemm3m_tcopy_$(XGEMM3M_UNROLL_M).c
|
||||
$(CC) $(PFLAGS) -c -DXDOUBLE -DCOMPLEX -DICOPY -UUSE_ALPHA $< -o $@
|
||||
|
||||
$(KDIR)xgemm3m_itcopyr$(TSUFFIX).$(PSUFFIX) : generic/zgemm3m_tcopy_$(QGEMM_UNROLL_M).c
|
||||
$(KDIR)xgemm3m_itcopyr$(TSUFFIX).$(PSUFFIX) : generic/zgemm3m_tcopy_$(XGEMM3M_UNROLL_M).c
|
||||
$(CC) $(PFLAGS) -c -DXDOUBLE -DCOMPLEX -DICOPY -UUSE_ALPHA -DREAL_ONLY $< -o $@
|
||||
|
||||
$(KDIR)xgemm3m_itcopyi$(TSUFFIX).$(PSUFFIX) : generic/zgemm3m_tcopy_$(QGEMM_UNROLL_M).c
|
||||
$(KDIR)xgemm3m_itcopyi$(TSUFFIX).$(PSUFFIX) : generic/zgemm3m_tcopy_$(XGEMM3M_UNROLL_M).c
|
||||
$(CC) $(PFLAGS) -c -DXDOUBLE -DCOMPLEX -DICOPY -UUSE_ALPHA -DIMAGE_ONLY $< -o $@
|
||||
|
||||
$(KDIR)csymm3m_oucopyb$(TSUFFIX).$(PSUFFIX) : generic/zsymm3m_ucopy_$(SGEMM_UNROLL_N).c
|
||||
$(KDIR)csymm3m_oucopyb$(TSUFFIX).$(PSUFFIX) : generic/zsymm3m_ucopy_$(CGEMM3M_UNROLL_N).c
|
||||
$(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -UDOUBLE -DCOMPLEX -DUSE_ALPHA $< -o $@
|
||||
|
||||
$(KDIR)csymm3m_olcopyb$(TSUFFIX).$(PSUFFIX) : generic/zsymm3m_lcopy_$(SGEMM_UNROLL_N).c
|
||||
$(KDIR)csymm3m_olcopyb$(TSUFFIX).$(PSUFFIX) : generic/zsymm3m_lcopy_$(CGEMM3M_UNROLL_N).c
|
||||
$(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -UDOUBLE -DCOMPLEX -DUSE_ALPHA $< -o $@
|
||||
|
||||
$(KDIR)csymm3m_oucopyr$(TSUFFIX).$(PSUFFIX) : generic/zsymm3m_ucopy_$(SGEMM_UNROLL_N).c
|
||||
$(KDIR)csymm3m_oucopyr$(TSUFFIX).$(PSUFFIX) : generic/zsymm3m_ucopy_$(CGEMM3M_UNROLL_N).c
|
||||
$(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -UDOUBLE -DCOMPLEX -DUSE_ALPHA -DREAL_ONLY $< -o $@
|
||||
|
||||
$(KDIR)csymm3m_olcopyr$(TSUFFIX).$(PSUFFIX) : generic/zsymm3m_lcopy_$(SGEMM_UNROLL_N).c
|
||||
$(KDIR)csymm3m_olcopyr$(TSUFFIX).$(PSUFFIX) : generic/zsymm3m_lcopy_$(CGEMM3M_UNROLL_N).c
|
||||
$(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -UDOUBLE -DCOMPLEX -DUSE_ALPHA -DREAL_ONLY $< -o $@
|
||||
|
||||
$(KDIR)csymm3m_oucopyi$(TSUFFIX).$(PSUFFIX) : generic/zsymm3m_ucopy_$(SGEMM_UNROLL_N).c
|
||||
$(KDIR)csymm3m_oucopyi$(TSUFFIX).$(PSUFFIX) : generic/zsymm3m_ucopy_$(CGEMM3M_UNROLL_N).c
|
||||
$(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -UDOUBLE -DCOMPLEX -DUSE_ALPHA -DIMAGE_ONLY $< -o $@
|
||||
|
||||
$(KDIR)csymm3m_olcopyi$(TSUFFIX).$(PSUFFIX) : generic/zsymm3m_lcopy_$(SGEMM_UNROLL_N).c
|
||||
$(KDIR)csymm3m_olcopyi$(TSUFFIX).$(PSUFFIX) : generic/zsymm3m_lcopy_$(CGEMM3M_UNROLL_N).c
|
||||
$(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -UDOUBLE -DCOMPLEX -DUSE_ALPHA -DIMAGE_ONLY $< -o $@
|
||||
|
||||
$(KDIR)csymm3m_iucopyb$(TSUFFIX).$(PSUFFIX) : generic/zsymm3m_ucopy_$(SGEMM_UNROLL_M).c
|
||||
$(KDIR)csymm3m_iucopyb$(TSUFFIX).$(PSUFFIX) : generic/zsymm3m_ucopy_$(CGEMM3M_UNROLL_M).c
|
||||
$(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -UDOUBLE -DCOMPLEX -UUSE_ALPHA $< -o $@
|
||||
|
||||
$(KDIR)csymm3m_ilcopyb$(TSUFFIX).$(PSUFFIX) : generic/zsymm3m_lcopy_$(SGEMM_UNROLL_M).c
|
||||
$(KDIR)csymm3m_ilcopyb$(TSUFFIX).$(PSUFFIX) : generic/zsymm3m_lcopy_$(CGEMM3M_UNROLL_M).c
|
||||
$(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -UDOUBLE -DCOMPLEX -UUSE_ALPHA $< -o $@
|
||||
|
||||
$(KDIR)csymm3m_iucopyr$(TSUFFIX).$(PSUFFIX) : generic/zsymm3m_ucopy_$(SGEMM_UNROLL_M).c
|
||||
$(KDIR)csymm3m_iucopyr$(TSUFFIX).$(PSUFFIX) : generic/zsymm3m_ucopy_$(CGEMM3M_UNROLL_M).c
|
||||
$(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -UDOUBLE -DCOMPLEX -UUSE_ALPHA -DREAL_ONLY $< -o $@
|
||||
|
||||
$(KDIR)csymm3m_ilcopyr$(TSUFFIX).$(PSUFFIX) : generic/zsymm3m_lcopy_$(SGEMM_UNROLL_M).c
|
||||
$(KDIR)csymm3m_ilcopyr$(TSUFFIX).$(PSUFFIX) : generic/zsymm3m_lcopy_$(CGEMM3M_UNROLL_M).c
|
||||
$(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -UDOUBLE -DCOMPLEX -UUSE_ALPHA -DREAL_ONLY $< -o $@
|
||||
|
||||
$(KDIR)csymm3m_iucopyi$(TSUFFIX).$(PSUFFIX) : generic/zsymm3m_ucopy_$(SGEMM_UNROLL_M).c
|
||||
$(KDIR)csymm3m_iucopyi$(TSUFFIX).$(PSUFFIX) : generic/zsymm3m_ucopy_$(CGEMM3M_UNROLL_M).c
|
||||
$(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -UDOUBLE -DCOMPLEX -UUSE_ALPHA -DIMAGE_ONLY $< -o $@
|
||||
|
||||
$(KDIR)csymm3m_ilcopyi$(TSUFFIX).$(PSUFFIX) : generic/zsymm3m_lcopy_$(SGEMM_UNROLL_M).c
|
||||
$(KDIR)csymm3m_ilcopyi$(TSUFFIX).$(PSUFFIX) : generic/zsymm3m_lcopy_$(CGEMM3M_UNROLL_M).c
|
||||
$(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -UDOUBLE -DCOMPLEX -UUSE_ALPHA -DIMAGE_ONLY $< -o $@
|
||||
|
||||
$(KDIR)zsymm3m_oucopyb$(TSUFFIX).$(PSUFFIX) : generic/zsymm3m_ucopy_$(DGEMM_UNROLL_N).c
|
||||
$(KDIR)zsymm3m_oucopyb$(TSUFFIX).$(PSUFFIX) : generic/zsymm3m_ucopy_$(ZGEMM3M_UNROLL_N).c
|
||||
$(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -DDOUBLE -DCOMPLEX -DUSE_ALPHA $< -o $@
|
||||
|
||||
$(KDIR)zsymm3m_olcopyb$(TSUFFIX).$(PSUFFIX) : generic/zsymm3m_lcopy_$(DGEMM_UNROLL_N).c
|
||||
$(KDIR)zsymm3m_olcopyb$(TSUFFIX).$(PSUFFIX) : generic/zsymm3m_lcopy_$(ZGEMM3M_UNROLL_N).c
|
||||
$(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -DDOUBLE -DCOMPLEX -DUSE_ALPHA $< -o $@
|
||||
|
||||
$(KDIR)zsymm3m_oucopyr$(TSUFFIX).$(PSUFFIX) : generic/zsymm3m_ucopy_$(DGEMM_UNROLL_N).c
|
||||
$(KDIR)zsymm3m_oucopyr$(TSUFFIX).$(PSUFFIX) : generic/zsymm3m_ucopy_$(ZGEMM3M_UNROLL_N).c
|
||||
$(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -DDOUBLE -DCOMPLEX -DUSE_ALPHA -DREAL_ONLY $< -o $@
|
||||
|
||||
$(KDIR)zsymm3m_olcopyr$(TSUFFIX).$(PSUFFIX) : generic/zsymm3m_lcopy_$(DGEMM_UNROLL_N).c
|
||||
$(KDIR)zsymm3m_olcopyr$(TSUFFIX).$(PSUFFIX) : generic/zsymm3m_lcopy_$(ZGEMM3M_UNROLL_N).c
|
||||
$(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -DDOUBLE -DCOMPLEX -DUSE_ALPHA -DREAL_ONLY $< -o $@
|
||||
|
||||
$(KDIR)zsymm3m_oucopyi$(TSUFFIX).$(PSUFFIX) : generic/zsymm3m_ucopy_$(DGEMM_UNROLL_N).c
|
||||
$(KDIR)zsymm3m_oucopyi$(TSUFFIX).$(PSUFFIX) : generic/zsymm3m_ucopy_$(ZGEMM3M_UNROLL_N).c
|
||||
$(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -DDOUBLE -DCOMPLEX -DUSE_ALPHA -DIMAGE_ONLY $< -o $@
|
||||
|
||||
$(KDIR)zsymm3m_olcopyi$(TSUFFIX).$(PSUFFIX) : generic/zsymm3m_lcopy_$(DGEMM_UNROLL_N).c
|
||||
$(KDIR)zsymm3m_olcopyi$(TSUFFIX).$(PSUFFIX) : generic/zsymm3m_lcopy_$(ZGEMM3M_UNROLL_N).c
|
||||
$(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -DDOUBLE -DCOMPLEX -DUSE_ALPHA -DIMAGE_ONLY $< -o $@
|
||||
|
||||
$(KDIR)zsymm3m_iucopyb$(TSUFFIX).$(PSUFFIX) : generic/zsymm3m_ucopy_$(DGEMM_UNROLL_M).c
|
||||
$(KDIR)zsymm3m_iucopyb$(TSUFFIX).$(PSUFFIX) : generic/zsymm3m_ucopy_$(ZGEMM3M_UNROLL_M).c
|
||||
$(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -DDOUBLE -DCOMPLEX -UUSE_ALPHA $< -o $@
|
||||
|
||||
$(KDIR)zsymm3m_ilcopyb$(TSUFFIX).$(PSUFFIX) : generic/zsymm3m_lcopy_$(DGEMM_UNROLL_M).c
|
||||
$(KDIR)zsymm3m_ilcopyb$(TSUFFIX).$(PSUFFIX) : generic/zsymm3m_lcopy_$(ZGEMM3M_UNROLL_M).c
|
||||
$(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -DDOUBLE -DCOMPLEX -UUSE_ALPHA $< -o $@
|
||||
|
||||
$(KDIR)zsymm3m_iucopyr$(TSUFFIX).$(PSUFFIX) : generic/zsymm3m_ucopy_$(DGEMM_UNROLL_M).c
|
||||
$(KDIR)zsymm3m_iucopyr$(TSUFFIX).$(PSUFFIX) : generic/zsymm3m_ucopy_$(ZGEMM3M_UNROLL_M).c
|
||||
$(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -DDOUBLE -DCOMPLEX -UUSE_ALPHA -DREAL_ONLY $< -o $@
|
||||
|
||||
$(KDIR)zsymm3m_ilcopyr$(TSUFFIX).$(PSUFFIX) : generic/zsymm3m_lcopy_$(DGEMM_UNROLL_M).c
|
||||
$(KDIR)zsymm3m_ilcopyr$(TSUFFIX).$(PSUFFIX) : generic/zsymm3m_lcopy_$(ZGEMM3M_UNROLL_M).c
|
||||
$(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -DDOUBLE -DCOMPLEX -UUSE_ALPHA -DREAL_ONLY $< -o $@
|
||||
|
||||
$(KDIR)zsymm3m_iucopyi$(TSUFFIX).$(PSUFFIX) : generic/zsymm3m_ucopy_$(DGEMM_UNROLL_M).c
|
||||
$(KDIR)zsymm3m_iucopyi$(TSUFFIX).$(PSUFFIX) : generic/zsymm3m_ucopy_$(ZGEMM3M_UNROLL_M).c
|
||||
$(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -DDOUBLE -DCOMPLEX -UUSE_ALPHA -DIMAGE_ONLY $< -o $@
|
||||
|
||||
$(KDIR)zsymm3m_ilcopyi$(TSUFFIX).$(PSUFFIX) : generic/zsymm3m_lcopy_$(DGEMM_UNROLL_M).c
|
||||
$(KDIR)zsymm3m_ilcopyi$(TSUFFIX).$(PSUFFIX) : generic/zsymm3m_lcopy_$(ZGEMM3M_UNROLL_M).c
|
||||
$(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -DDOUBLE -DCOMPLEX -UUSE_ALPHA -DIMAGE_ONLY $< -o $@
|
||||
|
||||
$(KDIR)xsymm3m_oucopyb$(TSUFFIX).$(PSUFFIX) : generic/zsymm3m_ucopy_$(QGEMM_UNROLL_N).c
|
||||
$(KDIR)xsymm3m_oucopyb$(TSUFFIX).$(PSUFFIX) : generic/zsymm3m_ucopy_$(XGEMM3M_UNROLL_N).c
|
||||
$(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -DXDOUBLE -DCOMPLEX -DUSE_ALPHA $< -o $@
|
||||
|
||||
$(KDIR)xsymm3m_olcopyb$(TSUFFIX).$(PSUFFIX) : generic/zsymm3m_lcopy_$(QGEMM_UNROLL_N).c
|
||||
$(KDIR)xsymm3m_olcopyb$(TSUFFIX).$(PSUFFIX) : generic/zsymm3m_lcopy_$(XGEMM3M_UNROLL_N).c
|
||||
$(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -DXDOUBLE -DCOMPLEX -DUSE_ALPHA $< -o $@
|
||||
|
||||
$(KDIR)xsymm3m_oucopyr$(TSUFFIX).$(PSUFFIX) : generic/zsymm3m_ucopy_$(QGEMM_UNROLL_N).c
|
||||
$(KDIR)xsymm3m_oucopyr$(TSUFFIX).$(PSUFFIX) : generic/zsymm3m_ucopy_$(XGEMM3M_UNROLL_N).c
|
||||
$(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -DXDOUBLE -DCOMPLEX -DUSE_ALPHA -DREAL_ONLY $< -o $@
|
||||
|
||||
$(KDIR)xsymm3m_olcopyr$(TSUFFIX).$(PSUFFIX) : generic/zsymm3m_lcopy_$(QGEMM_UNROLL_N).c
|
||||
$(KDIR)xsymm3m_olcopyr$(TSUFFIX).$(PSUFFIX) : generic/zsymm3m_lcopy_$(XGEMM3M_UNROLL_N).c
|
||||
$(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -DXDOUBLE -DCOMPLEX -DUSE_ALPHA -DREAL_ONLY $< -o $@
|
||||
|
||||
$(KDIR)xsymm3m_oucopyi$(TSUFFIX).$(PSUFFIX) : generic/zsymm3m_ucopy_$(QGEMM_UNROLL_N).c
|
||||
$(KDIR)xsymm3m_oucopyi$(TSUFFIX).$(PSUFFIX) : generic/zsymm3m_ucopy_$(XGEMM3M_UNROLL_N).c
|
||||
$(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -DXDOUBLE -DCOMPLEX -DUSE_ALPHA -DIMAGE_ONLY $< -o $@
|
||||
|
||||
$(KDIR)xsymm3m_olcopyi$(TSUFFIX).$(PSUFFIX) : generic/zsymm3m_lcopy_$(QGEMM_UNROLL_N).c
|
||||
$(KDIR)xsymm3m_olcopyi$(TSUFFIX).$(PSUFFIX) : generic/zsymm3m_lcopy_$(XGEMM3M_UNROLL_N).c
|
||||
$(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -DXDOUBLE -DCOMPLEX -DUSE_ALPHA -DIMAGE_ONLY $< -o $@
|
||||
|
||||
$(KDIR)xsymm3m_iucopyb$(TSUFFIX).$(PSUFFIX) : generic/zsymm3m_ucopy_$(QGEMM_UNROLL_M).c
|
||||
$(KDIR)xsymm3m_iucopyb$(TSUFFIX).$(PSUFFIX) : generic/zsymm3m_ucopy_$(XGEMM3M_UNROLL_M).c
|
||||
$(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -DXDOUBLE -DCOMPLEX -UUSE_ALPHA $< -o $@
|
||||
|
||||
$(KDIR)xsymm3m_ilcopyb$(TSUFFIX).$(PSUFFIX) : generic/zsymm3m_lcopy_$(QGEMM_UNROLL_M).c
|
||||
$(KDIR)xsymm3m_ilcopyb$(TSUFFIX).$(PSUFFIX) : generic/zsymm3m_lcopy_$(XGEMM3M_UNROLL_M).c
|
||||
$(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -DXDOUBLE -DCOMPLEX -UUSE_ALPHA $< -o $@
|
||||
|
||||
$(KDIR)xsymm3m_iucopyr$(TSUFFIX).$(PSUFFIX) : generic/zsymm3m_ucopy_$(QGEMM_UNROLL_M).c
|
||||
$(KDIR)xsymm3m_iucopyr$(TSUFFIX).$(PSUFFIX) : generic/zsymm3m_ucopy_$(XGEMM3M_UNROLL_M).c
|
||||
$(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -DXDOUBLE -DCOMPLEX -UUSE_ALPHA -DREAL_ONLY $< -o $@
|
||||
|
||||
$(KDIR)xsymm3m_ilcopyr$(TSUFFIX).$(PSUFFIX) : generic/zsymm3m_lcopy_$(QGEMM_UNROLL_M).c
|
||||
$(KDIR)xsymm3m_ilcopyr$(TSUFFIX).$(PSUFFIX) : generic/zsymm3m_lcopy_$(XGEMM3M_UNROLL_M).c
|
||||
$(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -DXDOUBLE -DCOMPLEX -UUSE_ALPHA -DREAL_ONLY $< -o $@
|
||||
|
||||
$(KDIR)xsymm3m_iucopyi$(TSUFFIX).$(PSUFFIX) : generic/zsymm3m_ucopy_$(QGEMM_UNROLL_M).c
|
||||
$(KDIR)xsymm3m_iucopyi$(TSUFFIX).$(PSUFFIX) : generic/zsymm3m_ucopy_$(XGEMM3M_UNROLL_M).c
|
||||
$(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -DXDOUBLE -DCOMPLEX -UUSE_ALPHA -DIMAGE_ONLY $< -o $@
|
||||
|
||||
$(KDIR)xsymm3m_ilcopyi$(TSUFFIX).$(PSUFFIX) : generic/zsymm3m_lcopy_$(QGEMM_UNROLL_M).c
|
||||
$(KDIR)xsymm3m_ilcopyi$(TSUFFIX).$(PSUFFIX) : generic/zsymm3m_lcopy_$(XGEMM3M_UNROLL_M).c
|
||||
$(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -DXDOUBLE -DCOMPLEX -UUSE_ALPHA -DIMAGE_ONLY $< -o $@
|
||||
|
||||
$(KDIR)chemm3m_oucopyb$(TSUFFIX).$(PSUFFIX) : generic/zhemm3m_ucopy_$(SGEMM_UNROLL_N).c
|
||||
$(KDIR)chemm3m_oucopyb$(TSUFFIX).$(PSUFFIX) : generic/zhemm3m_ucopy_$(CGEMM3M_UNROLL_N).c
|
||||
$(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -UDOUBLE -DCOMPLEX -DUSE_ALPHA $< -o $@
|
||||
|
||||
$(KDIR)chemm3m_olcopyb$(TSUFFIX).$(PSUFFIX) : generic/zhemm3m_lcopy_$(SGEMM_UNROLL_N).c
|
||||
$(KDIR)chemm3m_olcopyb$(TSUFFIX).$(PSUFFIX) : generic/zhemm3m_lcopy_$(CGEMM3M_UNROLL_N).c
|
||||
$(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -UDOUBLE -DCOMPLEX -DUSE_ALPHA $< -o $@
|
||||
|
||||
$(KDIR)chemm3m_oucopyr$(TSUFFIX).$(PSUFFIX) : generic/zhemm3m_ucopy_$(SGEMM_UNROLL_N).c
|
||||
$(KDIR)chemm3m_oucopyr$(TSUFFIX).$(PSUFFIX) : generic/zhemm3m_ucopy_$(CGEMM3M_UNROLL_N).c
|
||||
$(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -UDOUBLE -DCOMPLEX -DUSE_ALPHA -DREAL_ONLY $< -o $@
|
||||
|
||||
$(KDIR)chemm3m_olcopyr$(TSUFFIX).$(PSUFFIX) : generic/zhemm3m_lcopy_$(SGEMM_UNROLL_N).c
|
||||
$(KDIR)chemm3m_olcopyr$(TSUFFIX).$(PSUFFIX) : generic/zhemm3m_lcopy_$(CGEMM3M_UNROLL_N).c
|
||||
$(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -UDOUBLE -DCOMPLEX -DUSE_ALPHA -DREAL_ONLY $< -o $@
|
||||
|
||||
$(KDIR)chemm3m_oucopyi$(TSUFFIX).$(PSUFFIX) : generic/zhemm3m_ucopy_$(SGEMM_UNROLL_N).c
|
||||
$(KDIR)chemm3m_oucopyi$(TSUFFIX).$(PSUFFIX) : generic/zhemm3m_ucopy_$(CGEMM3M_UNROLL_N).c
|
||||
$(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -UDOUBLE -DCOMPLEX -DUSE_ALPHA -DIMAGE_ONLY $< -o $@
|
||||
|
||||
$(KDIR)chemm3m_olcopyi$(TSUFFIX).$(PSUFFIX) : generic/zhemm3m_lcopy_$(SGEMM_UNROLL_N).c
|
||||
$(KDIR)chemm3m_olcopyi$(TSUFFIX).$(PSUFFIX) : generic/zhemm3m_lcopy_$(CGEMM3M_UNROLL_N).c
|
||||
$(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -UDOUBLE -DCOMPLEX -DUSE_ALPHA -DIMAGE_ONLY $< -o $@
|
||||
|
||||
$(KDIR)chemm3m_iucopyb$(TSUFFIX).$(PSUFFIX) : generic/zhemm3m_ucopy_$(SGEMM_UNROLL_M).c
|
||||
$(KDIR)chemm3m_iucopyb$(TSUFFIX).$(PSUFFIX) : generic/zhemm3m_ucopy_$(CGEMM3M_UNROLL_M).c
|
||||
$(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -UDOUBLE -DCOMPLEX -UUSE_ALPHA $< -o $@
|
||||
|
||||
$(KDIR)chemm3m_ilcopyb$(TSUFFIX).$(PSUFFIX) : generic/zhemm3m_lcopy_$(SGEMM_UNROLL_M).c
|
||||
$(KDIR)chemm3m_ilcopyb$(TSUFFIX).$(PSUFFIX) : generic/zhemm3m_lcopy_$(CGEMM3M_UNROLL_M).c
|
||||
$(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -UDOUBLE -DCOMPLEX -UUSE_ALPHA $< -o $@
|
||||
|
||||
$(KDIR)chemm3m_iucopyr$(TSUFFIX).$(PSUFFIX) : generic/zhemm3m_ucopy_$(SGEMM_UNROLL_M).c
|
||||
$(KDIR)chemm3m_iucopyr$(TSUFFIX).$(PSUFFIX) : generic/zhemm3m_ucopy_$(CGEMM3M_UNROLL_M).c
|
||||
$(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -UDOUBLE -DCOMPLEX -UUSE_ALPHA -DREAL_ONLY $< -o $@
|
||||
|
||||
$(KDIR)chemm3m_ilcopyr$(TSUFFIX).$(PSUFFIX) : generic/zhemm3m_lcopy_$(SGEMM_UNROLL_M).c
|
||||
$(KDIR)chemm3m_ilcopyr$(TSUFFIX).$(PSUFFIX) : generic/zhemm3m_lcopy_$(CGEMM3M_UNROLL_M).c
|
||||
$(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -UDOUBLE -DCOMPLEX -UUSE_ALPHA -DREAL_ONLY $< -o $@
|
||||
|
||||
$(KDIR)chemm3m_iucopyi$(TSUFFIX).$(PSUFFIX) : generic/zhemm3m_ucopy_$(SGEMM_UNROLL_M).c
|
||||
$(KDIR)chemm3m_iucopyi$(TSUFFIX).$(PSUFFIX) : generic/zhemm3m_ucopy_$(CGEMM3M_UNROLL_M).c
|
||||
$(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -UDOUBLE -DCOMPLEX -UUSE_ALPHA -DIMAGE_ONLY $< -o $@
|
||||
|
||||
$(KDIR)chemm3m_ilcopyi$(TSUFFIX).$(PSUFFIX) : generic/zhemm3m_lcopy_$(SGEMM_UNROLL_M).c
|
||||
$(KDIR)chemm3m_ilcopyi$(TSUFFIX).$(PSUFFIX) : generic/zhemm3m_lcopy_$(CGEMM3M_UNROLL_M).c
|
||||
$(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -UDOUBLE -DCOMPLEX -UUSE_ALPHA -DIMAGE_ONLY $< -o $@
|
||||
|
||||
$(KDIR)zhemm3m_oucopyb$(TSUFFIX).$(PSUFFIX) : generic/zhemm3m_ucopy_$(DGEMM_UNROLL_N).c
|
||||
$(KDIR)zhemm3m_oucopyb$(TSUFFIX).$(PSUFFIX) : generic/zhemm3m_ucopy_$(ZGEMM3M_UNROLL_N).c
|
||||
$(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -DDOUBLE -DCOMPLEX -DUSE_ALPHA $< -o $@
|
||||
|
||||
$(KDIR)zhemm3m_olcopyb$(TSUFFIX).$(PSUFFIX) : generic/zhemm3m_lcopy_$(DGEMM_UNROLL_N).c
|
||||
$(KDIR)zhemm3m_olcopyb$(TSUFFIX).$(PSUFFIX) : generic/zhemm3m_lcopy_$(ZGEMM3M_UNROLL_N).c
|
||||
$(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -DDOUBLE -DCOMPLEX -DUSE_ALPHA $< -o $@
|
||||
|
||||
$(KDIR)zhemm3m_oucopyr$(TSUFFIX).$(PSUFFIX) : generic/zhemm3m_ucopy_$(DGEMM_UNROLL_N).c
|
||||
$(KDIR)zhemm3m_oucopyr$(TSUFFIX).$(PSUFFIX) : generic/zhemm3m_ucopy_$(ZGEMM3M_UNROLL_N).c
|
||||
$(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -DDOUBLE -DCOMPLEX -DUSE_ALPHA -DREAL_ONLY $< -o $@
|
||||
|
||||
$(KDIR)zhemm3m_olcopyr$(TSUFFIX).$(PSUFFIX) : generic/zhemm3m_lcopy_$(DGEMM_UNROLL_N).c
|
||||
$(KDIR)zhemm3m_olcopyr$(TSUFFIX).$(PSUFFIX) : generic/zhemm3m_lcopy_$(ZGEMM3M_UNROLL_N).c
|
||||
$(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -DDOUBLE -DCOMPLEX -DUSE_ALPHA -DREAL_ONLY $< -o $@
|
||||
|
||||
$(KDIR)zhemm3m_oucopyi$(TSUFFIX).$(PSUFFIX) : generic/zhemm3m_ucopy_$(DGEMM_UNROLL_N).c
|
||||
$(KDIR)zhemm3m_oucopyi$(TSUFFIX).$(PSUFFIX) : generic/zhemm3m_ucopy_$(ZGEMM3M_UNROLL_N).c
|
||||
$(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -DDOUBLE -DCOMPLEX -DUSE_ALPHA -DIMAGE_ONLY $< -o $@
|
||||
|
||||
$(KDIR)zhemm3m_olcopyi$(TSUFFIX).$(PSUFFIX) : generic/zhemm3m_lcopy_$(DGEMM_UNROLL_N).c
|
||||
$(KDIR)zhemm3m_olcopyi$(TSUFFIX).$(PSUFFIX) : generic/zhemm3m_lcopy_$(ZGEMM3M_UNROLL_N).c
|
||||
$(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -DDOUBLE -DCOMPLEX -DUSE_ALPHA -DIMAGE_ONLY $< -o $@
|
||||
|
||||
$(KDIR)zhemm3m_iucopyb$(TSUFFIX).$(PSUFFIX) : generic/zhemm3m_ucopy_$(DGEMM_UNROLL_M).c
|
||||
$(KDIR)zhemm3m_iucopyb$(TSUFFIX).$(PSUFFIX) : generic/zhemm3m_ucopy_$(ZGEMM3M_UNROLL_M).c
|
||||
$(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -DDOUBLE -DCOMPLEX -UUSE_ALPHA $< -o $@
|
||||
|
||||
$(KDIR)zhemm3m_ilcopyb$(TSUFFIX).$(PSUFFIX) : generic/zhemm3m_lcopy_$(DGEMM_UNROLL_M).c
|
||||
$(KDIR)zhemm3m_ilcopyb$(TSUFFIX).$(PSUFFIX) : generic/zhemm3m_lcopy_$(ZGEMM3M_UNROLL_M).c
|
||||
$(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -DDOUBLE -DCOMPLEX -UUSE_ALPHA $< -o $@
|
||||
|
||||
$(KDIR)zhemm3m_iucopyr$(TSUFFIX).$(PSUFFIX) : generic/zhemm3m_ucopy_$(DGEMM_UNROLL_M).c
|
||||
$(KDIR)zhemm3m_iucopyr$(TSUFFIX).$(PSUFFIX) : generic/zhemm3m_ucopy_$(ZGEMM3M_UNROLL_M).c
|
||||
$(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -DDOUBLE -DCOMPLEX -UUSE_ALPHA -DREAL_ONLY $< -o $@
|
||||
|
||||
$(KDIR)zhemm3m_ilcopyr$(TSUFFIX).$(PSUFFIX) : generic/zhemm3m_lcopy_$(DGEMM_UNROLL_M).c
|
||||
$(KDIR)zhemm3m_ilcopyr$(TSUFFIX).$(PSUFFIX) : generic/zhemm3m_lcopy_$(ZGEMM3M_UNROLL_M).c
|
||||
$(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -DDOUBLE -DCOMPLEX -UUSE_ALPHA -DREAL_ONLY $< -o $@
|
||||
|
||||
$(KDIR)zhemm3m_iucopyi$(TSUFFIX).$(PSUFFIX) : generic/zhemm3m_ucopy_$(DGEMM_UNROLL_M).c
|
||||
$(KDIR)zhemm3m_iucopyi$(TSUFFIX).$(PSUFFIX) : generic/zhemm3m_ucopy_$(ZGEMM3M_UNROLL_M).c
|
||||
$(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -DDOUBLE -DCOMPLEX -UUSE_ALPHA -DIMAGE_ONLY $< -o $@
|
||||
|
||||
$(KDIR)zhemm3m_ilcopyi$(TSUFFIX).$(PSUFFIX) : generic/zhemm3m_lcopy_$(DGEMM_UNROLL_M).c
|
||||
$(KDIR)zhemm3m_ilcopyi$(TSUFFIX).$(PSUFFIX) : generic/zhemm3m_lcopy_$(ZGEMM3M_UNROLL_M).c
|
||||
$(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -DDOUBLE -DCOMPLEX -UUSE_ALPHA -DIMAGE_ONLY $< -o $@
|
||||
|
||||
$(KDIR)xhemm3m_oucopyb$(TSUFFIX).$(PSUFFIX) : generic/zhemm3m_ucopy_$(QGEMM_UNROLL_N).c
|
||||
$(KDIR)xhemm3m_oucopyb$(TSUFFIX).$(PSUFFIX) : generic/zhemm3m_ucopy_$(XGEMM3M_UNROLL_N).c
|
||||
$(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -DXDOUBLE -DCOMPLEX -DUSE_ALPHA $< -o $@
|
||||
|
||||
$(KDIR)xhemm3m_olcopyb$(TSUFFIX).$(PSUFFIX) : generic/zhemm3m_lcopy_$(QGEMM_UNROLL_N).c
|
||||
$(KDIR)xhemm3m_olcopyb$(TSUFFIX).$(PSUFFIX) : generic/zhemm3m_lcopy_$(XGEMM3M_UNROLL_N).c
|
||||
$(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -DXDOUBLE -DCOMPLEX -DUSE_ALPHA $< -o $@
|
||||
|
||||
$(KDIR)xhemm3m_oucopyr$(TSUFFIX).$(PSUFFIX) : generic/zhemm3m_ucopy_$(QGEMM_UNROLL_N).c
|
||||
$(KDIR)xhemm3m_oucopyr$(TSUFFIX).$(PSUFFIX) : generic/zhemm3m_ucopy_$(XGEMM3M_UNROLL_N).c
|
||||
$(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -DXDOUBLE -DCOMPLEX -DUSE_ALPHA -DREAL_ONLY $< -o $@
|
||||
|
||||
$(KDIR)xhemm3m_olcopyr$(TSUFFIX).$(PSUFFIX) : generic/zhemm3m_lcopy_$(QGEMM_UNROLL_N).c
|
||||
$(KDIR)xhemm3m_olcopyr$(TSUFFIX).$(PSUFFIX) : generic/zhemm3m_lcopy_$(XGEMM3M_UNROLL_N).c
|
||||
$(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -DXDOUBLE -DCOMPLEX -DUSE_ALPHA -DREAL_ONLY $< -o $@
|
||||
|
||||
$(KDIR)xhemm3m_oucopyi$(TSUFFIX).$(PSUFFIX) : generic/zhemm3m_ucopy_$(QGEMM_UNROLL_N).c
|
||||
$(KDIR)xhemm3m_oucopyi$(TSUFFIX).$(PSUFFIX) : generic/zhemm3m_ucopy_$(XGEMM3M_UNROLL_N).c
|
||||
$(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -DXDOUBLE -DCOMPLEX -DUSE_ALPHA -DIMAGE_ONLY $< -o $@
|
||||
|
||||
$(KDIR)xhemm3m_olcopyi$(TSUFFIX).$(PSUFFIX) : generic/zhemm3m_lcopy_$(QGEMM_UNROLL_N).c
|
||||
$(KDIR)xhemm3m_olcopyi$(TSUFFIX).$(PSUFFIX) : generic/zhemm3m_lcopy_$(XGEMM3M_UNROLL_N).c
|
||||
$(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -DXDOUBLE -DCOMPLEX -DUSE_ALPHA -DIMAGE_ONLY $< -o $@
|
||||
|
||||
$(KDIR)xhemm3m_iucopyb$(TSUFFIX).$(PSUFFIX) : generic/zhemm3m_ucopy_$(QGEMM_UNROLL_M).c
|
||||
$(KDIR)xhemm3m_iucopyb$(TSUFFIX).$(PSUFFIX) : generic/zhemm3m_ucopy_$(XGEMM3M_UNROLL_M).c
|
||||
$(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -DXDOUBLE -DCOMPLEX -UUSE_ALPHA $< -o $@
|
||||
|
||||
$(KDIR)xhemm3m_ilcopyb$(TSUFFIX).$(PSUFFIX) : generic/zhemm3m_lcopy_$(QGEMM_UNROLL_M).c
|
||||
$(KDIR)xhemm3m_ilcopyb$(TSUFFIX).$(PSUFFIX) : generic/zhemm3m_lcopy_$(XGEMM3M_UNROLL_M).c
|
||||
$(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -DXDOUBLE -DCOMPLEX -UUSE_ALPHA $< -o $@
|
||||
|
||||
$(KDIR)xhemm3m_iucopyr$(TSUFFIX).$(PSUFFIX) : generic/zhemm3m_ucopy_$(QGEMM_UNROLL_M).c
|
||||
$(KDIR)xhemm3m_iucopyr$(TSUFFIX).$(PSUFFIX) : generic/zhemm3m_ucopy_$(XGEMM3M_UNROLL_M).c
|
||||
$(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -DXDOUBLE -DCOMPLEX -UUSE_ALPHA -DREAL_ONLY $< -o $@
|
||||
|
||||
$(KDIR)xhemm3m_ilcopyr$(TSUFFIX).$(PSUFFIX) : generic/zhemm3m_lcopy_$(QGEMM_UNROLL_M).c
|
||||
$(KDIR)xhemm3m_ilcopyr$(TSUFFIX).$(PSUFFIX) : generic/zhemm3m_lcopy_$(XGEMM3M_UNROLL_M).c
|
||||
$(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -DXDOUBLE -DCOMPLEX -UUSE_ALPHA -DREAL_ONLY $< -o $@
|
||||
|
||||
$(KDIR)xhemm3m_iucopyi$(TSUFFIX).$(PSUFFIX) : generic/zhemm3m_ucopy_$(QGEMM_UNROLL_M).c
|
||||
$(KDIR)xhemm3m_iucopyi$(TSUFFIX).$(PSUFFIX) : generic/zhemm3m_ucopy_$(XGEMM3M_UNROLL_M).c
|
||||
$(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -DXDOUBLE -DCOMPLEX -UUSE_ALPHA -DIMAGE_ONLY $< -o $@
|
||||
|
||||
$(KDIR)xhemm3m_ilcopyi$(TSUFFIX).$(PSUFFIX) : generic/zhemm3m_lcopy_$(QGEMM_UNROLL_M).c
|
||||
$(KDIR)xhemm3m_ilcopyi$(TSUFFIX).$(PSUFFIX) : generic/zhemm3m_lcopy_$(XGEMM3M_UNROLL_M).c
|
||||
$(CC) $(PFLAGS) $(NO_UNINITIALIZED_WARN) -c -DXDOUBLE -DCOMPLEX -UUSE_ALPHA -DIMAGE_ONLY $< -o $@
|
||||
|
||||
$(KDIR)strsm_iunucopy$(TSUFFIX).$(PSUFFIX) : generic/trsm_uncopy_$(SGEMM_UNROLL_M).c
|
||||
|
||||
46
kernel/arm/KERNEL
Normal file
46
kernel/arm/KERNEL
Normal file
@@ -0,0 +1,46 @@
|
||||
ifndef SNRM2KERNEL
|
||||
SNRM2KERNEL = nrm2.c
|
||||
endif
|
||||
|
||||
ifndef DNRM2KERNEL
|
||||
DNRM2KERNEL = nrm2.c
|
||||
endif
|
||||
|
||||
ifndef CNRM2KERNEL
|
||||
CNRM2KERNEL = znrm2.c
|
||||
endif
|
||||
|
||||
ifndef ZNRM2KERNEL
|
||||
ZNRM2KERNEL = znrm2.c
|
||||
endif
|
||||
|
||||
ifndef SCABS_KERNEL
|
||||
SCABS_KERNEL = ../generic/cabs.c
|
||||
endif
|
||||
|
||||
ifndef DCABS_KERNEL
|
||||
DCABS_KERNEL = ../generic/cabs.c
|
||||
endif
|
||||
|
||||
ifndef QCABS_KERNEL
|
||||
QCABS_KERNEL = ../generic/cabs.c
|
||||
endif
|
||||
|
||||
ifndef LSAME_KERNEL
|
||||
LSAME_KERNEL = ../generic/lsame.c
|
||||
endif
|
||||
|
||||
ifndef SGEMM_BETA
|
||||
SGEMM_BETA = ../generic/gemm_beta.c
|
||||
endif
|
||||
ifndef DGEMM_BETA
|
||||
DGEMM_BETA = ../generic/gemm_beta.c
|
||||
endif
|
||||
ifndef CGEMM_BETA
|
||||
CGEMM_BETA = ../generic/zgemm_beta.c
|
||||
endif
|
||||
ifndef ZGEMM_BETA
|
||||
ZGEMM_BETA = ../generic/zgemm_beta.c
|
||||
endif
|
||||
|
||||
|
||||
142
kernel/arm/KERNEL.ARMV6
Normal file
142
kernel/arm/KERNEL.ARMV6
Normal file
@@ -0,0 +1,142 @@
|
||||
SAMAXKERNEL = iamax_vfp.S
|
||||
DAMAXKERNEL = iamax_vfp.S
|
||||
CAMAXKERNEL = iamax_vfp.S
|
||||
ZAMAXKERNEL = iamax_vfp.S
|
||||
|
||||
SAMINKERNEL = iamax_vfp.S
|
||||
DAMINKERNEL = iamax_vfp.S
|
||||
CAMINKERNEL = iamax_vfp.S
|
||||
ZAMINKERNEL = iamax_vfp.S
|
||||
|
||||
SMAXKERNEL = iamax_vfp.S
|
||||
DMAXKERNEL = iamax_vfp.S
|
||||
|
||||
SMINKERNEL = iamax_vfp.S
|
||||
DMINKERNEL = iamax_vfp.S
|
||||
|
||||
ISAMAXKERNEL = iamax_vfp.S
|
||||
IDAMAXKERNEL = iamax_vfp.S
|
||||
ICAMAXKERNEL = iamax_vfp.S
|
||||
IZAMAXKERNEL = iamax_vfp.S
|
||||
|
||||
ISAMINKERNEL = iamax_vfp.S
|
||||
IDAMINKERNEL = iamax_vfp.S
|
||||
ICAMINKERNEL = iamax_vfp.S
|
||||
IZAMINKERNEL = iamax_vfp.S
|
||||
|
||||
ISMAXKERNEL = iamax_vfp.S
|
||||
IDMAXKERNEL = iamax_vfp.S
|
||||
|
||||
ISMINKERNEL = iamax_vfp.S
|
||||
IDMINKERNEL = iamax_vfp.S
|
||||
|
||||
SASUMKERNEL = asum_vfp.S
|
||||
DASUMKERNEL = asum_vfp.S
|
||||
CASUMKERNEL = asum_vfp.S
|
||||
ZASUMKERNEL = asum_vfp.S
|
||||
|
||||
SAXPYKERNEL = axpy_vfp.S
|
||||
DAXPYKERNEL = axpy_vfp.S
|
||||
CAXPYKERNEL = axpy_vfp.S
|
||||
ZAXPYKERNEL = axpy_vfp.S
|
||||
|
||||
SCOPYKERNEL = scopy_vfp.S
|
||||
DCOPYKERNEL = dcopy_vfp.S
|
||||
CCOPYKERNEL = ccopy_vfp.S
|
||||
ZCOPYKERNEL = zcopy_vfp.S
|
||||
|
||||
SDOTKERNEL = sdot_vfp.S
|
||||
DDOTKERNEL = ddot_vfp.S
|
||||
CDOTKERNEL = cdot_vfp.S
|
||||
ZDOTKERNEL = zdot_vfp.S
|
||||
|
||||
SNRM2KERNEL = nrm2_vfp.S
|
||||
DNRM2KERNEL = nrm2_vfp.S
|
||||
CNRM2KERNEL = nrm2_vfp.S
|
||||
ZNRM2KERNEL = nrm2_vfp.S
|
||||
|
||||
SROTKERNEL = rot_vfp.S
|
||||
DROTKERNEL = rot_vfp.S
|
||||
CROTKERNEL = rot_vfp.S
|
||||
ZROTKERNEL = rot_vfp.S
|
||||
|
||||
SSCALKERNEL = scal_vfp.S
|
||||
DSCALKERNEL = scal_vfp.S
|
||||
CSCALKERNEL = scal_vfp.S
|
||||
ZSCALKERNEL = scal_vfp.S
|
||||
|
||||
SSWAPKERNEL = swap_vfp.S
|
||||
DSWAPKERNEL = swap_vfp.S
|
||||
CSWAPKERNEL = swap_vfp.S
|
||||
ZSWAPKERNEL = swap_vfp.S
|
||||
|
||||
SGEMVNKERNEL = gemv_n_vfp.S
|
||||
DGEMVNKERNEL = gemv_n_vfp.S
|
||||
CGEMVNKERNEL = cgemv_n_vfp.S
|
||||
ZGEMVNKERNEL = zgemv_n_vfp.S
|
||||
|
||||
SGEMVTKERNEL = gemv_t_vfp.S
|
||||
DGEMVTKERNEL = gemv_t_vfp.S
|
||||
CGEMVTKERNEL = cgemv_t_vfp.S
|
||||
ZGEMVTKERNEL = zgemv_t_vfp.S
|
||||
|
||||
STRMMKERNEL = strmm_kernel_4x2_vfp.S
|
||||
DTRMMKERNEL = dtrmm_kernel_4x2_vfp.S
|
||||
CTRMMKERNEL = ctrmm_kernel_2x2_vfp.S
|
||||
ZTRMMKERNEL = ztrmm_kernel_2x2_vfp.S
|
||||
|
||||
SGEMMKERNEL = sgemm_kernel_4x2_vfp.S
|
||||
SGEMMINCOPY = sgemm_ncopy_4_vfp.S
|
||||
SGEMMITCOPY = sgemm_tcopy_4_vfp.S
|
||||
SGEMMINCOPYOBJ = sgemm_incopy.o
|
||||
SGEMMITCOPYOBJ = sgemm_itcopy.o
|
||||
SGEMMONCOPY = sgemm_ncopy_2_vfp.S
|
||||
SGEMMOTCOPY = ../generic/gemm_tcopy_2.c
|
||||
SGEMMONCOPYOBJ = sgemm_oncopy.o
|
||||
SGEMMOTCOPYOBJ = sgemm_otcopy.o
|
||||
|
||||
DGEMMKERNEL = dgemm_kernel_4x2_vfp.S
|
||||
DGEMMINCOPY = dgemm_ncopy_4_vfp.S
|
||||
DGEMMITCOPY = dgemm_tcopy_4_vfp.S
|
||||
DGEMMINCOPYOBJ = dgemm_incopy.o
|
||||
DGEMMITCOPYOBJ = dgemm_itcopy.o
|
||||
DGEMMONCOPY = dgemm_ncopy_2_vfp.S
|
||||
DGEMMOTCOPY = ../generic/gemm_tcopy_2.c
|
||||
DGEMMONCOPYOBJ = dgemm_oncopy.o
|
||||
DGEMMOTCOPYOBJ = dgemm_otcopy.o
|
||||
|
||||
CGEMMKERNEL = cgemm_kernel_2x2_vfp.S
|
||||
CGEMMONCOPY = cgemm_ncopy_2_vfp.S
|
||||
CGEMMOTCOPY = cgemm_tcopy_2_vfp.S
|
||||
CGEMMONCOPYOBJ = cgemm_oncopy.o
|
||||
CGEMMOTCOPYOBJ = cgemm_otcopy.o
|
||||
|
||||
ZGEMMKERNEL = zgemm_kernel_2x2_vfp.S
|
||||
ZGEMMONCOPY = zgemm_ncopy_2_vfp.S
|
||||
ZGEMMOTCOPY = zgemm_tcopy_2_vfp.S
|
||||
ZGEMMONCOPYOBJ = zgemm_oncopy.o
|
||||
ZGEMMOTCOPYOBJ = zgemm_otcopy.o
|
||||
|
||||
STRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
|
||||
STRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
|
||||
STRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
|
||||
STRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
|
||||
|
||||
DTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
|
||||
DTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
|
||||
DTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
|
||||
DTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
|
||||
|
||||
CTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
|
||||
CTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
|
||||
CTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
|
||||
CTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
|
||||
|
||||
ZTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
|
||||
ZTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
|
||||
ZTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
|
||||
ZTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
|
||||
|
||||
|
||||
|
||||
|
||||
141
kernel/arm/KERNEL.ARMV7
Normal file
141
kernel/arm/KERNEL.ARMV7
Normal file
@@ -0,0 +1,141 @@
|
||||
SAMAXKERNEL = iamax_vfp.S
|
||||
DAMAXKERNEL = iamax_vfp.S
|
||||
CAMAXKERNEL = iamax_vfp.S
|
||||
ZAMAXKERNEL = iamax_vfp.S
|
||||
|
||||
SAMINKERNEL = iamax_vfp.S
|
||||
DAMINKERNEL = iamax_vfp.S
|
||||
CAMINKERNEL = iamax_vfp.S
|
||||
ZAMINKERNEL = iamax_vfp.S
|
||||
|
||||
SMAXKERNEL = iamax_vfp.S
|
||||
DMAXKERNEL = iamax_vfp.S
|
||||
|
||||
SMINKERNEL = iamax_vfp.S
|
||||
DMINKERNEL = iamax_vfp.S
|
||||
|
||||
ISAMAXKERNEL = iamax_vfp.S
|
||||
IDAMAXKERNEL = iamax_vfp.S
|
||||
ICAMAXKERNEL = iamax_vfp.S
|
||||
IZAMAXKERNEL = iamax_vfp.S
|
||||
|
||||
ISAMINKERNEL = iamax_vfp.S
|
||||
IDAMINKERNEL = iamax_vfp.S
|
||||
ICAMINKERNEL = iamax_vfp.S
|
||||
IZAMINKERNEL = iamax_vfp.S
|
||||
|
||||
ISMAXKERNEL = iamax_vfp.S
|
||||
IDMAXKERNEL = iamax_vfp.S
|
||||
|
||||
ISMINKERNEL = iamax_vfp.S
|
||||
IDMINKERNEL = iamax_vfp.S
|
||||
|
||||
SSWAPKERNEL = swap_vfp.S
|
||||
DSWAPKERNEL = swap_vfp.S
|
||||
CSWAPKERNEL = swap_vfp.S
|
||||
ZSWAPKERNEL = swap_vfp.S
|
||||
|
||||
SASUMKERNEL = asum_vfp.S
|
||||
DASUMKERNEL = asum_vfp.S
|
||||
CASUMKERNEL = asum_vfp.S
|
||||
ZASUMKERNEL = asum_vfp.S
|
||||
|
||||
SAXPYKERNEL = axpy_vfp.S
|
||||
DAXPYKERNEL = axpy_vfp.S
|
||||
CAXPYKERNEL = axpy_vfp.S
|
||||
ZAXPYKERNEL = axpy_vfp.S
|
||||
|
||||
SCOPYKERNEL = scopy_vfp.S
|
||||
DCOPYKERNEL = dcopy_vfp.S
|
||||
CCOPYKERNEL = ccopy_vfp.S
|
||||
ZCOPYKERNEL = zcopy_vfp.S
|
||||
|
||||
SDOTKERNEL = sdot_vfp.S
|
||||
DDOTKERNEL = ddot_vfp.S
|
||||
CDOTKERNEL = cdot_vfp.S
|
||||
ZDOTKERNEL = zdot_vfp.S
|
||||
|
||||
SNRM2KERNEL = nrm2_vfpv3.S
|
||||
DNRM2KERNEL = nrm2_vfpv3.S
|
||||
CNRM2KERNEL = nrm2_vfpv3.S
|
||||
ZNRM2KERNEL = nrm2_vfpv3.S
|
||||
|
||||
SROTKERNEL = rot_vfp.S
|
||||
DROTKERNEL = rot_vfp.S
|
||||
CROTKERNEL = rot_vfp.S
|
||||
ZROTKERNEL = rot_vfp.S
|
||||
|
||||
SSCALKERNEL = scal_vfp.S
|
||||
DSCALKERNEL = scal_vfp.S
|
||||
CSCALKERNEL = scal_vfp.S
|
||||
ZSCALKERNEL = scal_vfp.S
|
||||
|
||||
SGEMVNKERNEL = gemv_n_vfp.S
|
||||
DGEMVNKERNEL = gemv_n_vfp.S
|
||||
CGEMVNKERNEL = cgemv_n_vfp.S
|
||||
ZGEMVNKERNEL = zgemv_n_vfp.S
|
||||
|
||||
SGEMVTKERNEL = gemv_t_vfp.S
|
||||
DGEMVTKERNEL = gemv_t_vfp.S
|
||||
CGEMVTKERNEL = cgemv_t_vfp.S
|
||||
ZGEMVTKERNEL = zgemv_t_vfp.S
|
||||
|
||||
STRMMKERNEL = strmm_kernel_4x4_vfpv3.S
|
||||
DTRMMKERNEL = dtrmm_kernel_4x4_vfpv3.S
|
||||
CTRMMKERNEL = ctrmm_kernel_2x2_vfpv3.S
|
||||
ZTRMMKERNEL = ztrmm_kernel_2x2_vfpv3.S
|
||||
|
||||
#SGEMMKERNEL = ../generic/gemmkernel_2x2.c
|
||||
SGEMMKERNEL = sgemm_kernel_4x4_vfpv3.S
|
||||
SGEMMINCOPY =
|
||||
SGEMMITCOPY =
|
||||
SGEMMONCOPY = sgemm_ncopy_4_vfp.S
|
||||
SGEMMOTCOPY = sgemm_tcopy_4_vfp.S
|
||||
SGEMMINCOPYOBJ =
|
||||
SGEMMITCOPYOBJ =
|
||||
SGEMMONCOPYOBJ = sgemm_oncopy.o
|
||||
SGEMMOTCOPYOBJ = sgemm_otcopy.o
|
||||
|
||||
DGEMMKERNEL = dgemm_kernel_4x4_vfpv3.S
|
||||
DGEMMINCOPY =
|
||||
DGEMMITCOPY =
|
||||
DGEMMONCOPY = dgemm_ncopy_4_vfp.S
|
||||
DGEMMOTCOPY = dgemm_tcopy_4_vfp.S
|
||||
DGEMMINCOPYOBJ =
|
||||
DGEMMITCOPYOBJ =
|
||||
DGEMMONCOPYOBJ = dgemm_oncopy.o
|
||||
DGEMMOTCOPYOBJ = dgemm_otcopy.o
|
||||
|
||||
CGEMMKERNEL = cgemm_kernel_2x2_vfpv3.S
|
||||
CGEMMONCOPY = cgemm_ncopy_2_vfp.S
|
||||
CGEMMOTCOPY = cgemm_tcopy_2_vfp.S
|
||||
CGEMMONCOPYOBJ = cgemm_oncopy.o
|
||||
CGEMMOTCOPYOBJ = cgemm_otcopy.o
|
||||
|
||||
ZGEMMKERNEL = zgemm_kernel_2x2_vfpv3.S
|
||||
ZGEMMONCOPY = zgemm_ncopy_2_vfp.S
|
||||
ZGEMMOTCOPY = zgemm_tcopy_2_vfp.S
|
||||
ZGEMMONCOPYOBJ = zgemm_oncopy.o
|
||||
ZGEMMOTCOPYOBJ = zgemm_otcopy.o
|
||||
|
||||
STRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
|
||||
STRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
|
||||
STRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
|
||||
STRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
|
||||
|
||||
DTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
|
||||
DTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
|
||||
DTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
|
||||
DTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
|
||||
|
||||
CTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
|
||||
CTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
|
||||
CTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
|
||||
CTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
|
||||
|
||||
ZTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
|
||||
ZTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
|
||||
ZTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
|
||||
ZTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
|
||||
|
||||
|
||||
2
kernel/arm/Makefile
Normal file
2
kernel/arm/Makefile
Normal file
@@ -0,0 +1,2 @@
|
||||
clean ::
|
||||
|
||||
73
kernel/arm/amax.c
Normal file
73
kernel/arm/amax.c
Normal file
@@ -0,0 +1,73 @@
|
||||
/***************************************************************************
|
||||
Copyright (c) 2013, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
/**************************************************************************************
|
||||
* 2013/09/14 Saar
|
||||
* BLASTEST float : OK
|
||||
* BLASTEST double : OK
|
||||
* CTEST : NoTest
|
||||
* TEST : NoTest
|
||||
*
|
||||
**************************************************************************************/
|
||||
|
||||
#include "common.h"
|
||||
#include <math.h>
|
||||
|
||||
#if defined(DOUBLE)
|
||||
|
||||
#define ABS fabs
|
||||
|
||||
#else
|
||||
|
||||
#define ABS fabsf
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
|
||||
{
|
||||
BLASLONG i=0;
|
||||
BLASLONG ix=0;
|
||||
FLOAT maxf=0.0;
|
||||
|
||||
if (n < 0 || inc_x < 1 ) return(maxf);
|
||||
|
||||
maxf=ABS(x[0]);
|
||||
|
||||
while(i < n)
|
||||
{
|
||||
if( ABS(x[ix]) > ABS(maxf) )
|
||||
{
|
||||
maxf = ABS(x[ix]);
|
||||
}
|
||||
ix += inc_x;
|
||||
i++;
|
||||
}
|
||||
return(maxf);
|
||||
}
|
||||
|
||||
|
||||
73
kernel/arm/amin.c
Normal file
73
kernel/arm/amin.c
Normal file
@@ -0,0 +1,73 @@
|
||||
/***************************************************************************
|
||||
Copyright (c) 2013, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
/**************************************************************************************
|
||||
* 2013/09/14 Saar
|
||||
* BLASTEST float : OK
|
||||
* BLASTEST double : OK
|
||||
* CTEST : NoTest
|
||||
* TEST : NoTest
|
||||
*
|
||||
**************************************************************************************/
|
||||
|
||||
#include "common.h"
|
||||
#include <math.h>
|
||||
|
||||
#if defined(DOUBLE)
|
||||
|
||||
#define ABS fabs
|
||||
|
||||
#else
|
||||
|
||||
#define ABS fabsf
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
|
||||
{
|
||||
BLASLONG i=0;
|
||||
BLASLONG ix=0;
|
||||
FLOAT minf=0.0;
|
||||
|
||||
if (n < 0 || inc_x < 1 ) return(minf);
|
||||
|
||||
minf=ABS(x[0]);
|
||||
|
||||
while(i < n)
|
||||
{
|
||||
if( ABS(x[ix]) < ABS(minf) )
|
||||
{
|
||||
minf = ABS(x[ix]);
|
||||
}
|
||||
ix += inc_x;
|
||||
i++;
|
||||
}
|
||||
return(minf);
|
||||
}
|
||||
|
||||
|
||||
67
kernel/arm/asum.c
Normal file
67
kernel/arm/asum.c
Normal file
@@ -0,0 +1,67 @@
|
||||
/***************************************************************************
|
||||
Copyright (c) 2013, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
/**************************************************************************************
|
||||
* 2013/09/14 Saar
|
||||
* BLASTEST float : OK
|
||||
* BLASTEST double : OK
|
||||
* CTEST : OK
|
||||
* TEST : OK
|
||||
*
|
||||
**************************************************************************************/
|
||||
|
||||
|
||||
#include "common.h"
|
||||
#include <math.h>
|
||||
|
||||
#if defined(DOUBLE)
|
||||
|
||||
#define ABS fabs
|
||||
|
||||
#else
|
||||
|
||||
#define ABS fabsf
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
|
||||
{
|
||||
BLASLONG i=0;
|
||||
FLOAT sumf = 0.0;
|
||||
if (n < 0 || inc_x < 1 ) return(sumf);
|
||||
|
||||
n *= inc_x;
|
||||
while(i < n)
|
||||
{
|
||||
sumf += ABS(x[i]);
|
||||
i += inc_x;
|
||||
}
|
||||
return(sumf);
|
||||
}
|
||||
|
||||
|
||||
481
kernel/arm/asum_vfp.S
Normal file
481
kernel/arm/asum_vfp.S
Normal file
@@ -0,0 +1,481 @@
|
||||
/***************************************************************************
|
||||
Copyright (c) 2013, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
/**************************************************************************************
|
||||
* 2013/11/11 Saar
|
||||
* BLASTEST : OK
|
||||
* CTEST : OK
|
||||
* TEST : OK
|
||||
*
|
||||
**************************************************************************************/
|
||||
|
||||
#define ASSEMBLER
|
||||
#include "common.h"
|
||||
|
||||
#define STACKSIZE 256
|
||||
|
||||
#define N r0
|
||||
#define X r1
|
||||
#define INC_X r2
|
||||
|
||||
|
||||
#define I r12
|
||||
|
||||
#define X_PRE 512
|
||||
|
||||
/**************************************************************************************
|
||||
* Macro definitions
|
||||
**************************************************************************************/
|
||||
|
||||
#if !defined(COMPLEX)
|
||||
|
||||
#if defined(DOUBLE)
|
||||
|
||||
.macro KERNEL_F4
|
||||
|
||||
pld [ X, #X_PRE ]
|
||||
fldmiad X!, { d4 - d5 }
|
||||
vabs.f64 d4, d4
|
||||
vadd.f64 d0 , d0, d4
|
||||
vabs.f64 d5, d5
|
||||
fldmiad X!, { d6 - d7 }
|
||||
vabs.f64 d6, d6
|
||||
vadd.f64 d1 , d1, d5
|
||||
vabs.f64 d7, d7
|
||||
vadd.f64 d0 , d0, d6
|
||||
vadd.f64 d1 , d1, d7
|
||||
|
||||
.endm
|
||||
|
||||
.macro KERNEL_F1
|
||||
|
||||
fldmiad X!, { d4 }
|
||||
vabs.f64 d4, d4
|
||||
vadd.f64 d0 , d0, d4
|
||||
|
||||
.endm
|
||||
|
||||
|
||||
.macro KERNEL_S4
|
||||
|
||||
fldmiad X, { d4 }
|
||||
vabs.f64 d4, d4
|
||||
vadd.f64 d0 , d0, d4
|
||||
add X, X, INC_X
|
||||
|
||||
fldmiad X, { d4 }
|
||||
vabs.f64 d4, d4
|
||||
vadd.f64 d0 , d0, d4
|
||||
add X, X, INC_X
|
||||
|
||||
fldmiad X, { d4 }
|
||||
vabs.f64 d4, d4
|
||||
vadd.f64 d0 , d0, d4
|
||||
add X, X, INC_X
|
||||
|
||||
fldmiad X, { d4 }
|
||||
vabs.f64 d4, d4
|
||||
vadd.f64 d0 , d0, d4
|
||||
add X, X, INC_X
|
||||
|
||||
.endm
|
||||
|
||||
|
||||
.macro KERNEL_S1
|
||||
|
||||
fldmiad X, { d4 }
|
||||
vabs.f64 d4, d4
|
||||
vadd.f64 d0 , d0, d4
|
||||
add X, X, INC_X
|
||||
|
||||
.endm
|
||||
|
||||
#else
|
||||
|
||||
.macro KERNEL_F4
|
||||
|
||||
fldmias X!, { s4 - s5 }
|
||||
vabs.f32 s4, s4
|
||||
vadd.f32 s0 , s0, s4
|
||||
vabs.f32 s5, s5
|
||||
fldmias X!, { s6 - s7 }
|
||||
vabs.f32 s6, s6
|
||||
vadd.f32 s1 , s1, s5
|
||||
vabs.f32 s7, s7
|
||||
vadd.f32 s0 , s0, s6
|
||||
vadd.f32 s1 , s1, s7
|
||||
|
||||
.endm
|
||||
|
||||
.macro KERNEL_F1
|
||||
|
||||
fldmias X!, { s4 }
|
||||
vabs.f32 s4, s4
|
||||
vadd.f32 s0 , s0, s4
|
||||
|
||||
.endm
|
||||
|
||||
|
||||
.macro KERNEL_S4
|
||||
|
||||
fldmias X, { s4 }
|
||||
vabs.f32 s4, s4
|
||||
vadd.f32 s0 , s0, s4
|
||||
add X, X, INC_X
|
||||
|
||||
fldmias X, { s4 }
|
||||
vabs.f32 s4, s4
|
||||
vadd.f32 s0 , s0, s4
|
||||
add X, X, INC_X
|
||||
|
||||
fldmias X, { s4 }
|
||||
vabs.f32 s4, s4
|
||||
vadd.f32 s0 , s0, s4
|
||||
add X, X, INC_X
|
||||
|
||||
fldmias X, { s4 }
|
||||
vabs.f32 s4, s4
|
||||
vadd.f32 s0 , s0, s4
|
||||
add X, X, INC_X
|
||||
|
||||
.endm
|
||||
|
||||
|
||||
.macro KERNEL_S1
|
||||
|
||||
fldmias X, { s4 }
|
||||
vabs.f32 s4, s4
|
||||
vadd.f32 s0 , s0, s4
|
||||
add X, X, INC_X
|
||||
|
||||
.endm
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
#else
|
||||
|
||||
#if defined(DOUBLE)
|
||||
|
||||
.macro KERNEL_F4
|
||||
|
||||
pld [ X, #X_PRE ]
|
||||
fldmiad X!, { d4 - d5 }
|
||||
vabs.f64 d4, d4
|
||||
vadd.f64 d0 , d0, d4
|
||||
vabs.f64 d5, d5
|
||||
fldmiad X!, { d6 - d7 }
|
||||
vabs.f64 d6, d6
|
||||
vadd.f64 d1 , d1, d5
|
||||
vabs.f64 d7, d7
|
||||
vadd.f64 d0 , d0, d6
|
||||
vadd.f64 d1 , d1, d7
|
||||
|
||||
pld [ X, #X_PRE ]
|
||||
fldmiad X!, { d4 - d5 }
|
||||
vabs.f64 d4, d4
|
||||
vadd.f64 d0 , d0, d4
|
||||
vabs.f64 d5, d5
|
||||
fldmiad X!, { d6 - d7 }
|
||||
vabs.f64 d6, d6
|
||||
vadd.f64 d1 , d1, d5
|
||||
vabs.f64 d7, d7
|
||||
vadd.f64 d0 , d0, d6
|
||||
vadd.f64 d1 , d1, d7
|
||||
|
||||
|
||||
.endm
|
||||
|
||||
.macro KERNEL_F1
|
||||
|
||||
fldmiad X!, { d4 }
|
||||
vabs.f64 d4, d4
|
||||
vadd.f64 d0 , d0, d4
|
||||
|
||||
fldmiad X!, { d4 }
|
||||
vabs.f64 d4, d4
|
||||
vadd.f64 d0 , d0, d4
|
||||
|
||||
|
||||
.endm
|
||||
|
||||
|
||||
.macro KERNEL_S4
|
||||
|
||||
fldmiad X, { d4 -d5 }
|
||||
vabs.f64 d4, d4
|
||||
vadd.f64 d0 , d0, d4
|
||||
vabs.f64 d5, d5
|
||||
vadd.f64 d0 , d0, d5
|
||||
add X, X, INC_X
|
||||
|
||||
fldmiad X, { d4 -d5 }
|
||||
vabs.f64 d4, d4
|
||||
vadd.f64 d0 , d0, d4
|
||||
vabs.f64 d5, d5
|
||||
vadd.f64 d0 , d0, d5
|
||||
add X, X, INC_X
|
||||
|
||||
fldmiad X, { d4 -d5 }
|
||||
vabs.f64 d4, d4
|
||||
vadd.f64 d0 , d0, d4
|
||||
vabs.f64 d5, d5
|
||||
vadd.f64 d0 , d0, d5
|
||||
add X, X, INC_X
|
||||
|
||||
fldmiad X, { d4 -d5 }
|
||||
vabs.f64 d4, d4
|
||||
vadd.f64 d0 , d0, d4
|
||||
vabs.f64 d5, d5
|
||||
vadd.f64 d0 , d0, d5
|
||||
add X, X, INC_X
|
||||
|
||||
.endm
|
||||
|
||||
|
||||
.macro KERNEL_S1
|
||||
|
||||
fldmiad X, { d4 -d5 }
|
||||
vabs.f64 d4, d4
|
||||
vadd.f64 d0 , d0, d4
|
||||
vabs.f64 d5, d5
|
||||
vadd.f64 d0 , d0, d5
|
||||
add X, X, INC_X
|
||||
|
||||
.endm
|
||||
|
||||
#else
|
||||
|
||||
.macro KERNEL_F4
|
||||
|
||||
pld [ X, #X_PRE ]
|
||||
fldmias X!, { s4 - s5 }
|
||||
vabs.f32 s4, s4
|
||||
vadd.f32 s0 , s0, s4
|
||||
vabs.f32 s5, s5
|
||||
fldmias X!, { s6 - s7 }
|
||||
vabs.f32 s6, s6
|
||||
vadd.f32 s1 , s1, s5
|
||||
vabs.f32 s7, s7
|
||||
vadd.f32 s0 , s0, s6
|
||||
vadd.f32 s1 , s1, s7
|
||||
|
||||
fldmias X!, { s4 - s5 }
|
||||
vabs.f32 s4, s4
|
||||
vadd.f32 s0 , s0, s4
|
||||
vabs.f32 s5, s5
|
||||
fldmias X!, { s6 - s7 }
|
||||
vabs.f32 s6, s6
|
||||
vadd.f32 s1 , s1, s5
|
||||
vabs.f32 s7, s7
|
||||
vadd.f32 s0 , s0, s6
|
||||
vadd.f32 s1 , s1, s7
|
||||
|
||||
|
||||
.endm
|
||||
|
||||
.macro KERNEL_F1
|
||||
|
||||
fldmias X!, { s4 }
|
||||
vabs.f32 s4, s4
|
||||
vadd.f32 s0 , s0, s4
|
||||
|
||||
fldmias X!, { s4 }
|
||||
vabs.f32 s4, s4
|
||||
vadd.f32 s0 , s0, s4
|
||||
|
||||
.endm
|
||||
|
||||
|
||||
.macro KERNEL_S4
|
||||
|
||||
fldmias X, { s4 -s5 }
|
||||
vabs.f32 s4, s4
|
||||
vadd.f32 s0 , s0, s4
|
||||
vabs.f32 s5, s5
|
||||
vadd.f32 s0 , s0, s5
|
||||
add X, X, INC_X
|
||||
|
||||
fldmias X, { s4 -s5 }
|
||||
vabs.f32 s4, s4
|
||||
vadd.f32 s0 , s0, s4
|
||||
vabs.f32 s5, s5
|
||||
vadd.f32 s0 , s0, s5
|
||||
add X, X, INC_X
|
||||
|
||||
fldmias X, { s4 -s5 }
|
||||
vabs.f32 s4, s4
|
||||
vadd.f32 s0 , s0, s4
|
||||
vabs.f32 s5, s5
|
||||
vadd.f32 s0 , s0, s5
|
||||
add X, X, INC_X
|
||||
|
||||
fldmias X, { s4 -s5 }
|
||||
vabs.f32 s4, s4
|
||||
vadd.f32 s0 , s0, s4
|
||||
vabs.f32 s5, s5
|
||||
vadd.f32 s0 , s0, s5
|
||||
add X, X, INC_X
|
||||
|
||||
.endm
|
||||
|
||||
|
||||
.macro KERNEL_S1
|
||||
|
||||
fldmias X, { s4 -s5 }
|
||||
vabs.f32 s4, s4
|
||||
vadd.f32 s0 , s0, s4
|
||||
vabs.f32 s5, s5
|
||||
vadd.f32 s0 , s0, s5
|
||||
add X, X, INC_X
|
||||
|
||||
.endm
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
/**************************************************************************************
|
||||
* End of macro definitions
|
||||
**************************************************************************************/
|
||||
|
||||
PROLOGUE
|
||||
|
||||
.align 5
|
||||
|
||||
#if defined(DOUBLE)
|
||||
vsub.f64 d0 , d0 , d0
|
||||
vsub.f64 d1 , d1 , d1
|
||||
#else
|
||||
vsub.f32 s0 , s0 , s0
|
||||
vsub.f32 s1 , s1 , s1
|
||||
#endif
|
||||
|
||||
cmp N, #0
|
||||
ble asum_kernel_L999
|
||||
|
||||
cmp INC_X, #0
|
||||
beq asum_kernel_L999
|
||||
|
||||
cmp INC_X, #1
|
||||
bne asum_kernel_S_BEGIN
|
||||
|
||||
|
||||
asum_kernel_F_BEGIN:
|
||||
|
||||
asrs I, N, #2 // I = N / 4
|
||||
ble asum_kernel_F1
|
||||
|
||||
.align 5
|
||||
|
||||
asum_kernel_F4:
|
||||
|
||||
#if !defined(DOUBLE) && !defined(COMPLEX)
|
||||
pld [ X, #X_PRE ]
|
||||
#endif
|
||||
KERNEL_F4
|
||||
|
||||
subs I, I, #1
|
||||
ble asum_kernel_F1
|
||||
|
||||
KERNEL_F4
|
||||
|
||||
subs I, I, #1
|
||||
bne asum_kernel_F4
|
||||
|
||||
asum_kernel_F1:
|
||||
|
||||
ands I, N, #3
|
||||
ble asum_kernel_L999
|
||||
|
||||
asum_kernel_F10:
|
||||
|
||||
KERNEL_F1
|
||||
|
||||
subs I, I, #1
|
||||
bne asum_kernel_F10
|
||||
|
||||
b asum_kernel_L999
|
||||
|
||||
asum_kernel_S_BEGIN:
|
||||
|
||||
#if defined(COMPLEX)
|
||||
|
||||
#if defined(DOUBLE)
|
||||
lsl INC_X, INC_X, #4 // INC_X * SIZE * 2
|
||||
#else
|
||||
lsl INC_X, INC_X, #3 // INC_X * SIZE * 2
|
||||
#endif
|
||||
|
||||
#else
|
||||
|
||||
#if defined(DOUBLE)
|
||||
lsl INC_X, INC_X, #3 // INC_X * SIZE
|
||||
#else
|
||||
lsl INC_X, INC_X, #2 // INC_X * SIZE
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
asrs I, N, #2 // I = N / 4
|
||||
ble asum_kernel_S1
|
||||
|
||||
.align 5
|
||||
|
||||
asum_kernel_S4:
|
||||
|
||||
KERNEL_S4
|
||||
|
||||
subs I, I, #1
|
||||
bne asum_kernel_S4
|
||||
|
||||
asum_kernel_S1:
|
||||
|
||||
ands I, N, #3
|
||||
ble asum_kernel_L999
|
||||
|
||||
asum_kernel_S10:
|
||||
|
||||
KERNEL_S1
|
||||
|
||||
subs I, I, #1
|
||||
bne asum_kernel_S10
|
||||
|
||||
|
||||
asum_kernel_L999:
|
||||
|
||||
|
||||
#if defined(DOUBLE)
|
||||
vadd.f64 d0 , d0, d1 // set return value
|
||||
#else
|
||||
vadd.f32 s0 , s0, s1 // set return value
|
||||
#endif
|
||||
|
||||
bx lr
|
||||
|
||||
EPILOGUE
|
||||
|
||||
64
kernel/arm/axpy.c
Normal file
64
kernel/arm/axpy.c
Normal file
@@ -0,0 +1,64 @@
|
||||
/***************************************************************************
|
||||
Copyright (c) 2013, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
/**************************************************************************************
|
||||
* 2013/09/14 Saar
|
||||
* BLASTEST float : OK
|
||||
* BLASTEST double : OK
|
||||
* CTEST : OK
|
||||
* TEST : OK
|
||||
*
|
||||
**************************************************************************************/
|
||||
|
||||
|
||||
#include "common.h"
|
||||
|
||||
int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT *dummy, BLASLONG dummy2)
|
||||
{
|
||||
BLASLONG i=0;
|
||||
BLASLONG ix,iy;
|
||||
|
||||
if ( n < 0 ) return(0);
|
||||
if ( da == 0.0 ) return(0);
|
||||
|
||||
ix = 0;
|
||||
iy = 0;
|
||||
|
||||
while(i < n)
|
||||
{
|
||||
|
||||
y[iy] += da * x[ix] ;
|
||||
ix += inc_x ;
|
||||
iy += inc_y ;
|
||||
i++ ;
|
||||
|
||||
}
|
||||
return(0);
|
||||
|
||||
}
|
||||
|
||||
|
||||
503
kernel/arm/axpy_vfp.S
Normal file
503
kernel/arm/axpy_vfp.S
Normal file
@@ -0,0 +1,503 @@
|
||||
/***************************************************************************
|
||||
Copyright (c) 2013, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
/**************************************************************************************
|
||||
* 2013/11/14 Saar
|
||||
* BLASTEST : xOK
|
||||
* CTEST : xOK
|
||||
* TEST : xOK
|
||||
*
|
||||
**************************************************************************************/
|
||||
|
||||
#define ASSEMBLER
|
||||
#include "common.h"
|
||||
|
||||
#define STACKSIZE 256
|
||||
|
||||
#define OLD_INC_X [fp, #0 ]
|
||||
#define OLD_Y [fp, #4 ]
|
||||
#define OLD_INC_Y [fp, #8 ]
|
||||
|
||||
|
||||
#define N r0
|
||||
#define Y r1
|
||||
#define INC_X r2
|
||||
#define X r3
|
||||
#define INC_Y r4
|
||||
|
||||
#define I r12
|
||||
|
||||
#define X_PRE 512
|
||||
|
||||
/**************************************************************************************
|
||||
* Macro definitions
|
||||
**************************************************************************************/
|
||||
|
||||
/*****************************************************************************************/
|
||||
|
||||
#if !defined(CONJ)
|
||||
|
||||
#if defined(DOUBLE)
|
||||
|
||||
#define FMAC_R1 fmacd
|
||||
#define FMAC_R2 fnmacd
|
||||
#define FMAC_I1 fmacd
|
||||
#define FMAC_I2 fmacd
|
||||
|
||||
#else
|
||||
|
||||
#define FMAC_R1 fmacs
|
||||
#define FMAC_R2 fnmacs
|
||||
#define FMAC_I1 fmacs
|
||||
#define FMAC_I2 fmacs
|
||||
|
||||
#endif
|
||||
|
||||
#else // CONJ
|
||||
|
||||
#if defined(DOUBLE)
|
||||
|
||||
#define FMAC_R1 fmacd
|
||||
#define FMAC_R2 fmacd
|
||||
#define FMAC_I1 fnmacd
|
||||
#define FMAC_I2 fmacd
|
||||
|
||||
#else
|
||||
|
||||
#define FMAC_R1 fmacs
|
||||
#define FMAC_R2 fmacs
|
||||
#define FMAC_I1 fnmacs
|
||||
#define FMAC_I2 fmacs
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
#if !defined(COMPLEX)
|
||||
|
||||
#if defined(DOUBLE)
|
||||
|
||||
.macro KERNEL_F4
|
||||
|
||||
pld [ X, #X_PRE ]
|
||||
fldmiad X!, { d4 - d7 }
|
||||
pld [ Y, #X_PRE ]
|
||||
fldmiad Y , { d8 - d11 }
|
||||
fmacd d8 , d0, d4
|
||||
fstmiad Y!, { d8 }
|
||||
fmacd d9 , d0, d5
|
||||
fstmiad Y!, { d9 }
|
||||
fmacd d10, d0, d6
|
||||
fstmiad Y!, { d10 }
|
||||
fmacd d11, d0, d7
|
||||
fstmiad Y!, { d11 }
|
||||
|
||||
|
||||
.endm
|
||||
|
||||
|
||||
.macro KERNEL_F1
|
||||
|
||||
fldmiad X!, { d4 }
|
||||
fldmiad Y , { d8 }
|
||||
fmacd d8 , d0, d4
|
||||
fstmiad Y!, { d8 }
|
||||
|
||||
.endm
|
||||
|
||||
.macro KERNEL_S1
|
||||
|
||||
fldmiad X , { d4 }
|
||||
fldmiad Y , { d8 }
|
||||
fmacd d8 , d0, d4
|
||||
fstmiad Y , { d8 }
|
||||
add X, X, INC_X
|
||||
add Y, Y, INC_Y
|
||||
|
||||
.endm
|
||||
|
||||
#else
|
||||
|
||||
.macro KERNEL_F4
|
||||
|
||||
fldmias X!, { s4 - s7 }
|
||||
fldmias Y , { s8 - s11 }
|
||||
fmacs s8 , s0, s4
|
||||
fstmias Y!, { s8 }
|
||||
fmacs s9 , s0, s5
|
||||
fstmias Y!, { s9 }
|
||||
fmacs s10, s0, s6
|
||||
fstmias Y!, { s10 }
|
||||
fmacs s11, s0, s7
|
||||
fstmias Y!, { s11 }
|
||||
|
||||
|
||||
.endm
|
||||
|
||||
|
||||
.macro KERNEL_F1
|
||||
|
||||
fldmias X!, { s4 }
|
||||
fldmias Y , { s8 }
|
||||
fmacs s8 , s0, s4
|
||||
fstmias Y!, { s8 }
|
||||
|
||||
.endm
|
||||
|
||||
.macro KERNEL_S1
|
||||
|
||||
fldmias X , { s4 }
|
||||
fldmias Y , { s8 }
|
||||
fmacs s8 , s0, s4
|
||||
fstmias Y , { s8 }
|
||||
add X, X, INC_X
|
||||
add Y, Y, INC_Y
|
||||
|
||||
.endm
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
#else
|
||||
|
||||
#if defined(DOUBLE)
|
||||
|
||||
.macro KERNEL_F4
|
||||
|
||||
pld [ X, #X_PRE ]
|
||||
fldmiad X!, { d4 - d7 }
|
||||
pld [ Y, #X_PRE ]
|
||||
fldmiad Y , { d8 - d11 }
|
||||
|
||||
FMAC_R1 d8 , d0, d4
|
||||
FMAC_R2 d8 , d1, d5
|
||||
FMAC_I1 d9 , d0, d5
|
||||
FMAC_I2 d9 , d1, d4
|
||||
fstmiad Y!, { d8 }
|
||||
fstmiad Y!, { d9 }
|
||||
|
||||
FMAC_R1 d10, d0, d6
|
||||
FMAC_R2 d10, d1, d7
|
||||
FMAC_I1 d11, d0, d7
|
||||
FMAC_I2 d11, d1, d6
|
||||
fstmiad Y!, { d10 }
|
||||
fstmiad Y!, { d11 }
|
||||
|
||||
pld [ X, #X_PRE ]
|
||||
fldmiad X!, { d4 - d7 }
|
||||
pld [ Y, #X_PRE ]
|
||||
fldmiad Y , { d8 - d11 }
|
||||
|
||||
FMAC_R1 d8 , d0, d4
|
||||
FMAC_R2 d8 , d1, d5
|
||||
FMAC_I1 d9 , d0, d5
|
||||
FMAC_I2 d9 , d1, d4
|
||||
fstmiad Y!, { d8 }
|
||||
fstmiad Y!, { d9 }
|
||||
|
||||
FMAC_R1 d10, d0, d6
|
||||
FMAC_R2 d10, d1, d7
|
||||
FMAC_I1 d11, d0, d7
|
||||
FMAC_I2 d11, d1, d6
|
||||
fstmiad Y!, { d10 }
|
||||
fstmiad Y!, { d11 }
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
.endm
|
||||
|
||||
|
||||
.macro KERNEL_F1
|
||||
|
||||
fldmiad X!, { d4 - d5 }
|
||||
fldmiad Y , { d8 - d9 }
|
||||
|
||||
FMAC_R1 d8 , d0, d4
|
||||
FMAC_R2 d8 , d1, d5
|
||||
FMAC_I1 d9 , d0, d5
|
||||
FMAC_I2 d9 , d1, d4
|
||||
fstmiad Y!, { d8 }
|
||||
fstmiad Y!, { d9 }
|
||||
|
||||
|
||||
|
||||
.endm
|
||||
|
||||
.macro KERNEL_S1
|
||||
|
||||
fldmiad X , { d4 - d5 }
|
||||
fldmiad Y , { d8 - d9 }
|
||||
|
||||
FMAC_R1 d8 , d0, d4
|
||||
FMAC_R2 d8 , d1, d5
|
||||
FMAC_I1 d9 , d0, d5
|
||||
FMAC_I2 d9 , d1, d4
|
||||
fstmiad Y , { d8 - d9 }
|
||||
|
||||
add X, X, INC_X
|
||||
add Y, Y, INC_Y
|
||||
|
||||
.endm
|
||||
|
||||
|
||||
|
||||
#else
|
||||
|
||||
.macro KERNEL_F4
|
||||
|
||||
pld [ X, #X_PRE ]
|
||||
fldmias X!, { s4 - s7 }
|
||||
pld [ Y, #X_PRE ]
|
||||
fldmias Y , { s8 - s11 }
|
||||
|
||||
FMAC_R1 s8 , s0, s4
|
||||
FMAC_R2 s8 , s1, s5
|
||||
FMAC_I1 s9 , s0, s5
|
||||
FMAC_I2 s9 , s1, s4
|
||||
fstmias Y!, { s8 }
|
||||
fstmias Y!, { s9 }
|
||||
|
||||
FMAC_R1 s10, s0, s6
|
||||
FMAC_R2 s10, s1, s7
|
||||
FMAC_I1 s11, s0, s7
|
||||
FMAC_I2 s11, s1, s6
|
||||
fstmias Y!, { s10 }
|
||||
fstmias Y!, { s11 }
|
||||
|
||||
fldmias X!, { s4 - s7 }
|
||||
fldmias Y , { s8 - s11 }
|
||||
|
||||
FMAC_R1 s8 , s0, s4
|
||||
FMAC_R2 s8 , s1, s5
|
||||
FMAC_I1 s9 , s0, s5
|
||||
FMAC_I2 s9 , s1, s4
|
||||
fstmias Y!, { s8 }
|
||||
fstmias Y!, { s9 }
|
||||
|
||||
FMAC_R1 s10, s0, s6
|
||||
FMAC_R2 s10, s1, s7
|
||||
FMAC_I1 s11, s0, s7
|
||||
FMAC_I2 s11, s1, s6
|
||||
fstmias Y!, { s10 }
|
||||
fstmias Y!, { s11 }
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
.endm
|
||||
|
||||
|
||||
.macro KERNEL_F1
|
||||
|
||||
fldmias X!, { s4 - s5 }
|
||||
fldmias Y , { s8 - s9 }
|
||||
|
||||
FMAC_R1 s8 , s0, s4
|
||||
FMAC_R2 s8 , s1, s5
|
||||
FMAC_I1 s9 , s0, s5
|
||||
FMAC_I2 s9 , s1, s4
|
||||
fstmias Y!, { s8 }
|
||||
fstmias Y!, { s9 }
|
||||
|
||||
|
||||
|
||||
.endm
|
||||
|
||||
.macro KERNEL_S1
|
||||
|
||||
fldmias X , { s4 - s5 }
|
||||
fldmias Y , { s8 - s9 }
|
||||
|
||||
FMAC_R1 s8 , s0, s4
|
||||
FMAC_R2 s8 , s1, s5
|
||||
FMAC_I1 s9 , s0, s5
|
||||
FMAC_I2 s9 , s1, s4
|
||||
fstmias Y , { s8 - s9 }
|
||||
|
||||
add X, X, INC_X
|
||||
add Y, Y, INC_Y
|
||||
|
||||
.endm
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
/**************************************************************************************
|
||||
* End of macro definitions
|
||||
**************************************************************************************/
|
||||
|
||||
PROLOGUE
|
||||
|
||||
.align 5
|
||||
push {r4 , fp}
|
||||
add fp, sp, #8
|
||||
sub sp, sp, #STACKSIZE // reserve stack
|
||||
|
||||
ldr INC_X , OLD_INC_X
|
||||
ldr Y, OLD_Y
|
||||
ldr INC_Y , OLD_INC_Y
|
||||
|
||||
sub r12, fp, #128
|
||||
|
||||
#if defined(DOUBLE)
|
||||
vstm r12, { d8 - d15} // store floating point registers
|
||||
#else
|
||||
vstm r12, { s8 - s15} // store floating point registers
|
||||
#endif
|
||||
|
||||
cmp N, #0
|
||||
ble axpy_kernel_L999
|
||||
|
||||
cmp INC_X, #0
|
||||
beq axpy_kernel_L999
|
||||
|
||||
cmp INC_Y, #0
|
||||
beq axpy_kernel_L999
|
||||
|
||||
cmp INC_X, #1
|
||||
bne axpy_kernel_S_BEGIN
|
||||
|
||||
cmp INC_Y, #1
|
||||
bne axpy_kernel_S_BEGIN
|
||||
|
||||
|
||||
axpy_kernel_F_BEGIN:
|
||||
|
||||
|
||||
asrs I, N, #2 // I = N / 4
|
||||
ble axpy_kernel_F1
|
||||
|
||||
.align 5
|
||||
|
||||
axpy_kernel_F4:
|
||||
|
||||
#if !defined(COMPLEX) && !defined(DOUBLE)
|
||||
pld [ X, #X_PRE ]
|
||||
pld [ Y, #X_PRE ]
|
||||
#endif
|
||||
|
||||
KERNEL_F4
|
||||
|
||||
subs I, I, #1
|
||||
ble axpy_kernel_F1
|
||||
|
||||
KERNEL_F4
|
||||
|
||||
subs I, I, #1
|
||||
bne axpy_kernel_F4
|
||||
|
||||
axpy_kernel_F1:
|
||||
|
||||
ands I, N, #3
|
||||
ble axpy_kernel_L999
|
||||
|
||||
axpy_kernel_F10:
|
||||
|
||||
KERNEL_F1
|
||||
|
||||
subs I, I, #1
|
||||
bne axpy_kernel_F10
|
||||
|
||||
b axpy_kernel_L999
|
||||
|
||||
axpy_kernel_S_BEGIN:
|
||||
|
||||
#if defined(COMPLEX)
|
||||
|
||||
#if defined(DOUBLE)
|
||||
lsl INC_X, INC_X, #4 // INC_X * SIZE * 2
|
||||
lsl INC_Y, INC_Y, #4 // INC_Y * SIZE * 2
|
||||
#else
|
||||
lsl INC_X, INC_X, #3 // INC_X * SIZE * 2
|
||||
lsl INC_Y, INC_Y, #3 // INC_Y * SIZE * 2
|
||||
#endif
|
||||
|
||||
#else
|
||||
|
||||
#if defined(DOUBLE)
|
||||
lsl INC_X, INC_X, #3 // INC_X * SIZE
|
||||
lsl INC_Y, INC_Y, #3 // INC_Y * SIZE
|
||||
#else
|
||||
lsl INC_X, INC_X, #2 // INC_X * SIZE
|
||||
lsl INC_Y, INC_Y, #2 // INC_Y * SIZE
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
asrs I, N, #2 // I = N / 4
|
||||
ble axpy_kernel_S1
|
||||
|
||||
.align 5
|
||||
|
||||
axpy_kernel_S4:
|
||||
|
||||
KERNEL_S1
|
||||
KERNEL_S1
|
||||
KERNEL_S1
|
||||
KERNEL_S1
|
||||
|
||||
subs I, I, #1
|
||||
bne axpy_kernel_S4
|
||||
|
||||
axpy_kernel_S1:
|
||||
|
||||
ands I, N, #3
|
||||
ble axpy_kernel_L999
|
||||
|
||||
axpy_kernel_S10:
|
||||
|
||||
KERNEL_S1
|
||||
|
||||
subs I, I, #1
|
||||
bne axpy_kernel_S10
|
||||
|
||||
|
||||
axpy_kernel_L999:
|
||||
|
||||
sub r3, fp, #128
|
||||
|
||||
#if defined(DOUBLE)
|
||||
vldm r3, { d8 - d15 } // restore floating point registers
|
||||
#else
|
||||
vldm r3, { s8 - s15 } // restore floating point registers
|
||||
#endif
|
||||
|
||||
mov r0, #0 // set return value
|
||||
|
||||
sub sp, fp, #8
|
||||
pop {r4,fp}
|
||||
bx lr
|
||||
|
||||
EPILOGUE
|
||||
|
||||
222
kernel/arm/ccopy_vfp.S
Normal file
222
kernel/arm/ccopy_vfp.S
Normal file
@@ -0,0 +1,222 @@
|
||||
/***************************************************************************
|
||||
Copyright (c) 2013, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
/**************************************************************************************
|
||||
* 2013/11/07 Saar
|
||||
* BLASTEST : OK
|
||||
* CTEST : OK
|
||||
* TEST : OK
|
||||
*
|
||||
**************************************************************************************/
|
||||
|
||||
#define ASSEMBLER
|
||||
#include "common.h"
|
||||
|
||||
#define STACKSIZE 256
|
||||
|
||||
#define N r0
|
||||
#define X r1
|
||||
#define INC_X r2
|
||||
#define OLD_Y r3
|
||||
|
||||
|
||||
/******************************************************
|
||||
* [fp, #-128] - [fp, #-64] is reserved
|
||||
* for store and restore of floating point
|
||||
* registers
|
||||
*******************************************************/
|
||||
|
||||
#define OLD_INC_Y [fp, #4 ]
|
||||
|
||||
#define I r5
|
||||
#define Y r6
|
||||
#define INC_Y r7
|
||||
|
||||
#define X_PRE 256
|
||||
|
||||
/**************************************************************************************
|
||||
* Macro definitions
|
||||
**************************************************************************************/
|
||||
|
||||
.macro COPY_F4
|
||||
|
||||
pld [ X, #X_PRE ]
|
||||
fldmias X!, { s0 - s7 }
|
||||
fstmias Y!, { s0 - s7 }
|
||||
|
||||
.endm
|
||||
|
||||
.macro COPY_F1
|
||||
|
||||
fldmias X!, { s0 - s1 }
|
||||
fstmias Y!, { s0 - s1 }
|
||||
|
||||
.endm
|
||||
|
||||
|
||||
/*************************************************************************************************************************/
|
||||
|
||||
.macro COPY_S4
|
||||
|
||||
nop
|
||||
fldmias X, { s0 - s1 }
|
||||
fstmias Y, { s0 - s1 }
|
||||
add X, X, INC_X
|
||||
add Y, Y, INC_Y
|
||||
|
||||
fldmias X, { s2 - s3 }
|
||||
fstmias Y, { s2 - s3 }
|
||||
add X, X, INC_X
|
||||
add Y, Y, INC_Y
|
||||
|
||||
fldmias X, { s0 - s1 }
|
||||
fstmias Y, { s0 - s1 }
|
||||
add X, X, INC_X
|
||||
add Y, Y, INC_Y
|
||||
|
||||
fldmias X, { s2 - s3 }
|
||||
fstmias Y, { s2 - s3 }
|
||||
add X, X, INC_X
|
||||
add Y, Y, INC_Y
|
||||
|
||||
.endm
|
||||
|
||||
|
||||
.macro COPY_S1
|
||||
|
||||
fldmias X, { s0 - s1 }
|
||||
fstmias Y, { s0 - s1 }
|
||||
add X, X, INC_X
|
||||
add Y, Y, INC_Y
|
||||
|
||||
.endm
|
||||
|
||||
|
||||
|
||||
/**************************************************************************************
|
||||
* End of macro definitions
|
||||
**************************************************************************************/
|
||||
|
||||
PROLOGUE
|
||||
|
||||
.align 5
|
||||
|
||||
push {r4 - r9, fp}
|
||||
add fp, sp, #24
|
||||
sub sp, sp, #STACKSIZE // reserve stack
|
||||
|
||||
sub r4, fp, #128
|
||||
vstm r4, { s8 - s15} // store floating point registers
|
||||
|
||||
mov Y, OLD_Y
|
||||
ldr INC_Y, OLD_INC_Y
|
||||
|
||||
cmp N, #0
|
||||
ble ccopy_kernel_L999
|
||||
|
||||
cmp INC_X, #0
|
||||
beq ccopy_kernel_L999
|
||||
|
||||
cmp INC_Y, #0
|
||||
beq ccopy_kernel_L999
|
||||
|
||||
cmp INC_X, #1
|
||||
bne ccopy_kernel_S_BEGIN
|
||||
|
||||
cmp INC_Y, #1
|
||||
bne ccopy_kernel_S_BEGIN
|
||||
|
||||
ccopy_kernel_F_BEGIN:
|
||||
|
||||
asrs I, N, #2 // I = N / 4
|
||||
ble ccopy_kernel_F1
|
||||
|
||||
ccopy_kernel_F4:
|
||||
|
||||
COPY_F4
|
||||
|
||||
subs I, I, #1
|
||||
bne ccopy_kernel_F4
|
||||
|
||||
ccopy_kernel_F1:
|
||||
|
||||
ands I, N, #3
|
||||
ble ccopy_kernel_L999
|
||||
|
||||
ccopy_kernel_F10:
|
||||
|
||||
COPY_F1
|
||||
|
||||
subs I, I, #1
|
||||
bne ccopy_kernel_F10
|
||||
|
||||
b ccopy_kernel_L999
|
||||
|
||||
ccopy_kernel_S_BEGIN:
|
||||
|
||||
lsl INC_X, INC_X, #3 // INC_X * SIZE * 2
|
||||
lsl INC_Y, INC_Y, #3 // INC_Y * SIZE * 2
|
||||
|
||||
asrs I, N, #2 // I = N / 4
|
||||
ble ccopy_kernel_S1
|
||||
|
||||
ccopy_kernel_S4:
|
||||
|
||||
COPY_S4
|
||||
|
||||
subs I, I, #1
|
||||
bne ccopy_kernel_S4
|
||||
|
||||
ccopy_kernel_S1:
|
||||
|
||||
ands I, N, #3
|
||||
ble ccopy_kernel_L999
|
||||
|
||||
ccopy_kernel_S10:
|
||||
|
||||
COPY_S1
|
||||
|
||||
subs I, I, #1
|
||||
bne ccopy_kernel_S10
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
ccopy_kernel_L999:
|
||||
|
||||
sub r3, fp, #128
|
||||
vldm r3, { s8 - s15} // restore floating point registers
|
||||
|
||||
mov r0, #0 // set return value
|
||||
sub sp, fp, #24
|
||||
pop {r4 - r9, fp}
|
||||
bx lr
|
||||
|
||||
EPILOGUE
|
||||
|
||||
284
kernel/arm/cdot_vfp.S
Normal file
284
kernel/arm/cdot_vfp.S
Normal file
@@ -0,0 +1,284 @@
|
||||
/***************************************************************************
|
||||
Copyright (c) 2013, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
/**************************************************************************************
|
||||
* 2013/11/11 Saar
|
||||
* BLASTEST : OK
|
||||
* CTEST : OK
|
||||
* TEST : OK
|
||||
*
|
||||
**************************************************************************************/
|
||||
|
||||
#define ASSEMBLER
|
||||
#include "common.h"
|
||||
|
||||
#define STACKSIZE 256
|
||||
|
||||
#define N r0
|
||||
#define X r1
|
||||
#define INC_X r2
|
||||
#define OLD_Y r3
|
||||
|
||||
|
||||
/******************************************************
|
||||
* [fp, #-128] - [fp, #-64] is reserved
|
||||
* for store and restore of floating point
|
||||
* registers
|
||||
*******************************************************/
|
||||
|
||||
#define OLD_INC_Y [fp, #4 ]
|
||||
|
||||
#define I r5
|
||||
#define Y r6
|
||||
#define INC_Y r7
|
||||
|
||||
#define X_PRE 512
|
||||
|
||||
/**************************************************************************************
|
||||
* Macro definitions
|
||||
**************************************************************************************/
|
||||
|
||||
.macro KERNEL_F4
|
||||
|
||||
pld [ X, #X_PRE ]
|
||||
pld [ Y, #X_PRE ]
|
||||
|
||||
fldmias X!, { s4 - s5 }
|
||||
fldmias Y!, { s8 - s9 }
|
||||
fmacs s0 , s4, s8
|
||||
fmacs s1 , s4, s9
|
||||
fldmias X!, { s6 - s7 }
|
||||
fmacs s2 , s5, s9
|
||||
fmacs s3 , s5, s8
|
||||
|
||||
fldmias Y!, { s10 - s11 }
|
||||
fmacs s0 , s6, s10
|
||||
fmacs s1 , s6, s11
|
||||
fmacs s2 , s7, s11
|
||||
fmacs s3 , s7, s10
|
||||
|
||||
|
||||
fldmias X!, { s4 - s5 }
|
||||
fldmias Y!, { s8 - s9 }
|
||||
fmacs s0 , s4, s8
|
||||
fmacs s1 , s4, s9
|
||||
fldmias X!, { s6 - s7 }
|
||||
fmacs s2 , s5, s9
|
||||
fmacs s3 , s5, s8
|
||||
|
||||
fldmias Y!, { s10 - s11 }
|
||||
fmacs s0 , s6, s10
|
||||
fmacs s1 , s6, s11
|
||||
fmacs s2 , s7, s11
|
||||
fmacs s3 , s7, s10
|
||||
|
||||
.endm
|
||||
|
||||
.macro KERNEL_F1
|
||||
|
||||
fldmias X!, { s4 - s5 }
|
||||
fldmias Y!, { s8 - s9 }
|
||||
fmacs s0 , s4, s8
|
||||
fmacs s1 , s4, s9
|
||||
fmacs s2 , s5, s9
|
||||
fmacs s3 , s5, s8
|
||||
|
||||
.endm
|
||||
|
||||
|
||||
/*************************************************************************************************************************/
|
||||
|
||||
.macro KERNEL_S4
|
||||
|
||||
nop
|
||||
|
||||
fldmias X, { s4 - s5 }
|
||||
fldmias Y, { s8 - s9 }
|
||||
fmacs s0 , s4, s8
|
||||
fmacs s1 , s4, s9
|
||||
fmacs s2 , s5, s9
|
||||
fmacs s3 , s5, s8
|
||||
add X, X, INC_X
|
||||
add Y, Y, INC_Y
|
||||
|
||||
fldmias X, { s4 - s5 }
|
||||
fldmias Y, { s8 - s9 }
|
||||
fmacs s0 , s4, s8
|
||||
fmacs s1 , s4, s9
|
||||
fmacs s2 , s5, s9
|
||||
fmacs s3 , s5, s8
|
||||
add X, X, INC_X
|
||||
add Y, Y, INC_Y
|
||||
|
||||
fldmias X, { s4 - s5 }
|
||||
fldmias Y, { s8 - s9 }
|
||||
fmacs s0 , s4, s8
|
||||
fmacs s1 , s4, s9
|
||||
fmacs s2 , s5, s9
|
||||
fmacs s3 , s5, s8
|
||||
add X, X, INC_X
|
||||
add Y, Y, INC_Y
|
||||
|
||||
fldmias X, { s4 - s5 }
|
||||
fldmias Y, { s8 - s9 }
|
||||
fmacs s0 , s4, s8
|
||||
fmacs s1 , s4, s9
|
||||
fmacs s2 , s5, s9
|
||||
fmacs s3 , s5, s8
|
||||
add X, X, INC_X
|
||||
add Y, Y, INC_Y
|
||||
|
||||
.endm
|
||||
|
||||
|
||||
.macro KERNEL_S1
|
||||
|
||||
fldmias X, { s4 - s5 }
|
||||
fldmias Y, { s8 - s9 }
|
||||
fmacs s0 , s4, s8
|
||||
fmacs s1 , s4, s9
|
||||
fmacs s2 , s5, s9
|
||||
fmacs s3 , s5, s8
|
||||
add X, X, INC_X
|
||||
add Y, Y, INC_Y
|
||||
|
||||
.endm
|
||||
|
||||
|
||||
|
||||
/**************************************************************************************
|
||||
* End of macro definitions
|
||||
**************************************************************************************/
|
||||
|
||||
PROLOGUE
|
||||
|
||||
.align 5
|
||||
|
||||
push {r4 - r9, fp}
|
||||
add fp, sp, #24
|
||||
sub sp, sp, #STACKSIZE // reserve stack
|
||||
|
||||
sub r4, fp, #128
|
||||
vstm r4, { s8 - s15} // store floating point registers
|
||||
|
||||
mov Y, OLD_Y
|
||||
ldr INC_Y, OLD_INC_Y
|
||||
|
||||
vsub.f32 s0 , s0 , s0
|
||||
vsub.f32 s1 , s1 , s1
|
||||
vsub.f32 s2 , s2 , s2
|
||||
vsub.f32 s3 , s3 , s3
|
||||
|
||||
cmp N, #0
|
||||
ble cdot_kernel_L999
|
||||
|
||||
cmp INC_X, #0
|
||||
beq cdot_kernel_L999
|
||||
|
||||
cmp INC_Y, #0
|
||||
beq cdot_kernel_L999
|
||||
|
||||
cmp INC_X, #1
|
||||
bne cdot_kernel_S_BEGIN
|
||||
|
||||
cmp INC_Y, #1
|
||||
bne cdot_kernel_S_BEGIN
|
||||
|
||||
cdot_kernel_F_BEGIN:
|
||||
|
||||
asrs I, N, #2 // I = N / 4
|
||||
ble cdot_kernel_F1
|
||||
|
||||
cdot_kernel_F4:
|
||||
|
||||
KERNEL_F4
|
||||
|
||||
subs I, I, #1
|
||||
bne cdot_kernel_F4
|
||||
|
||||
cdot_kernel_F1:
|
||||
|
||||
ands I, N, #3
|
||||
ble cdot_kernel_L999
|
||||
|
||||
cdot_kernel_F10:
|
||||
|
||||
KERNEL_F1
|
||||
|
||||
subs I, I, #1
|
||||
bne cdot_kernel_F10
|
||||
|
||||
b cdot_kernel_L999
|
||||
|
||||
cdot_kernel_S_BEGIN:
|
||||
|
||||
lsl INC_X, INC_X, #3 // INC_X * SIZE * 2
|
||||
lsl INC_Y, INC_Y, #3 // INC_Y * SIZE * 2
|
||||
|
||||
asrs I, N, #2 // I = N / 4
|
||||
ble cdot_kernel_S1
|
||||
|
||||
cdot_kernel_S4:
|
||||
|
||||
KERNEL_S4
|
||||
|
||||
subs I, I, #1
|
||||
bne cdot_kernel_S4
|
||||
|
||||
cdot_kernel_S1:
|
||||
|
||||
ands I, N, #3
|
||||
ble cdot_kernel_L999
|
||||
|
||||
cdot_kernel_S10:
|
||||
|
||||
KERNEL_S1
|
||||
|
||||
subs I, I, #1
|
||||
bne cdot_kernel_S10
|
||||
|
||||
|
||||
|
||||
cdot_kernel_L999:
|
||||
|
||||
sub r3, fp, #128
|
||||
vldm r3, { s8 - s15} // restore floating point registers
|
||||
|
||||
#if !defined(CONJ)
|
||||
vsub.f32 s0 , s0, s2
|
||||
vadd.f32 s1 , s1, s3
|
||||
#else
|
||||
vadd.f32 s0 , s0, s2
|
||||
vsub.f32 s1 , s1, s3
|
||||
#endif
|
||||
|
||||
sub sp, fp, #24
|
||||
pop {r4 - r9, fp}
|
||||
bx lr
|
||||
|
||||
EPILOGUE
|
||||
|
||||
1252
kernel/arm/cgemm_kernel_2x2_vfp.S
Normal file
1252
kernel/arm/cgemm_kernel_2x2_vfp.S
Normal file
File diff suppressed because it is too large
Load Diff
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user