From fe8cd5ae7e0958cced30e7086509d286a8442be0 Mon Sep 17 00:00:00 2001 From: Thomas Hisch Date: Mon, 28 Sep 2020 00:42:17 +0200 Subject: [PATCH 01/11] Consolidate usage of backticks for build options There were some build options in the README that were not highlighted. Now all are highlighted. --- README.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 6d44129c2..ca034e747 100644 --- a/README.md +++ b/README.md @@ -174,18 +174,18 @@ Please read `GotoBLAS_01Readme.txt` for older CPU models already supported by th ### Support for multiple targets in a single library -OpenBLAS can be built for multiple targets with runtime detection of the target cpu by specifiying DYNAMIC_ARCH=1 in Makefile.rule, on the gmake command line or as -DDYNAMIC_ARCH=TRUE in cmake. +OpenBLAS can be built for multiple targets with runtime detection of the target cpu by specifiying `DYNAMIC_ARCH=1` in Makefile.rule, on the gmake command line or as `-DDYNAMIC_ARCH=TRUE` in cmake. -For **x86_64**, the list of targets this activates contains Prescott, Core2, Nehalem, Barcelona, Sandybridge, Bulldozer, Piledriver, Steamroller, Excavator, Haswell, Zen, SkylakeX. For cpu generations not included in this list, the corresponding older model is used. If you also specify DYNAMIC_OLDER=1, specific support for Penryn, Dunnington, Opteron, Opteron/SSE3, Bobcat, Atom and Nano is added. Finally there is an option DYNAMIC_LIST that allows to specify an individual list of targets to include instead of the default. +For **x86_64**, the list of targets this activates contains Prescott, Core2, Nehalem, Barcelona, Sandybridge, Bulldozer, Piledriver, Steamroller, Excavator, Haswell, Zen, SkylakeX. For cpu generations not included in this list, the corresponding older model is used. If you also specify `DYNAMIC_OLDER=1`, specific support for Penryn, Dunnington, Opteron, Opteron/SSE3, Bobcat, Atom and Nano is added. Finally there is an option `DYNAMIC_LIST` that allows to specify an individual list of targets to include instead of the default. -DYNAMIC_ARCH is also supported on **x86**, where it translates to Katmai, Coppermine, Northwood, Prescott, Banias, +`DYNAMIC_ARCH` is also supported on **x86**, where it translates to Katmai, Coppermine, Northwood, Prescott, Banias, Core2, Penryn, Dunnington, Nehalem, Athlon, Opteron, Opteron_SSE3, Barcelona, Bobcat, Atom and Nano. On **ARMV8**, it enables support for CortexA53, CortexA57, CortexA72, CortexA73, Falkor, ThunderX, ThunderX2T99, TSV110 as well as generic ARMV8 cpus. For **POWER**, the list encompasses POWER6, POWER8 and POWER9, on **ZARCH** it comprises Z13 and Z14. -The TARGET option can be used in conjunction with DYNAMIC_ARCH=1 to specify which cpu model should be assumed for all the +The `TARGET` option can be used in conjunction with `DYNAMIC_ARCH=1` to specify which cpu model should be assumed for all the common code in the library, usually you will want to set this to the oldest model you expect to encounter. Please note that it is not possible to combine support for different architectures, so no combined 32 and 64 bit or x86_64 and arm64 in the same library. From 2bf70c8e3b72f560ab35320ed12df9ac92f9b46c Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Wed, 30 Sep 2020 22:43:25 +0200 Subject: [PATCH 02/11] Change ifdef linux to __linux for C11 compatibility --- cpuid_arm.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cpuid_arm.c b/cpuid_arm.c index 19aa90718..a3b1dfd33 100644 --- a/cpuid_arm.c +++ b/cpuid_arm.c @@ -54,7 +54,7 @@ static char *cpuname_lower[] = { int get_feature(char *search) { -#ifdef linux +#ifdef __linux FILE *infile; char buffer[2048], *p,*t; p = (char *) NULL ; @@ -90,7 +90,7 @@ int get_feature(char *search) int detect(void) { -#ifdef linux +#ifdef __linux FILE *infile; char buffer[512], *p; @@ -289,7 +289,7 @@ void get_libname(void) void get_features(void) { -#ifdef linux +#ifdef __linux FILE *infile; char buffer[2048], *p,*t; p = (char *) NULL ; From be40440ec59e8ac16b7c63d62ab743845073d2ae Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Wed, 30 Sep 2020 22:45:18 +0200 Subject: [PATCH 03/11] Change ifdef linux to __linux for C11 compatibility --- cpuid_arm64.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/cpuid_arm64.c b/cpuid_arm64.c index a0d3e15b9..ae150ef1b 100644 --- a/cpuid_arm64.c +++ b/cpuid_arm64.c @@ -90,7 +90,7 @@ static char *cpuname_lower[] = { int get_feature(char *search) { -#ifdef linux +#ifdef __linux FILE *infile; char buffer[2048], *p,*t; p = (char *) NULL ; @@ -126,7 +126,7 @@ int get_feature(char *search) int detect(void) { -#ifdef linux +#ifdef __linux FILE *infile; char buffer[512], *p, *cpu_part = NULL, *cpu_implementer = NULL; @@ -242,7 +242,7 @@ void get_cpucount(void) { int n=0; -#ifdef linux +#ifdef __linux FILE *infile; char buffer[2048], *p,*t; p = (char *) NULL ; @@ -441,7 +441,7 @@ void get_libname(void) void get_features(void) { -#ifdef linux +#ifdef __linux FILE *infile; char buffer[2048], *p,*t; p = (char *) NULL ; From a7d5d0078dd3d5a0c5d1aff9f3723d6799bd0410 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Wed, 30 Sep 2020 22:46:25 +0200 Subject: [PATCH 04/11] Change ifdef linux to __linux for C11 compatibility --- cpuid_mips.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpuid_mips.c b/cpuid_mips.c index 3a2e12393..e6e837f73 100644 --- a/cpuid_mips.c +++ b/cpuid_mips.c @@ -84,7 +84,7 @@ static char *cpuname[] = { int detect(void){ -#ifdef linux +#ifdef __linux FILE *infile; char buffer[512], *p; From 0b2bb5696af3c7abb0b0d5038124eb4a5f883fbc Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Wed, 30 Sep 2020 22:47:25 +0200 Subject: [PATCH 05/11] Change ifdef linux to __linux for C11 compatibility --- cpuid_mips64.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpuid_mips64.c b/cpuid_mips64.c index 0e32bfc0b..0c19ac1e7 100644 --- a/cpuid_mips64.c +++ b/cpuid_mips64.c @@ -90,7 +90,7 @@ static char *cpuname[] = { int detect(void){ -#ifdef linux +#ifdef __linux FILE *infile; char buffer[512], *p; From e1574cbc83a691f2f0ff898c9976e1f5861d9686 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Wed, 30 Sep 2020 22:50:21 +0200 Subject: [PATCH 06/11] Change ifdef linux to __linux for C11 compatibility and add a fallback for unsupported operating systems in detect() --- cpuid_power.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/cpuid_power.c b/cpuid_power.c index b17493bc8..2526e8d0e 100644 --- a/cpuid_power.c +++ b/cpuid_power.c @@ -104,7 +104,7 @@ char *corename[] = { int detect(void){ -#ifdef linux +#ifdef __linux FILE *infile; char buffer[512], *p; @@ -214,6 +214,8 @@ switch ( id >> 16 ) { return CPUTYPE_UNKNOWN; } #endif + + return CPUTYPE_UNKNOWN; } void get_architecture(void){ From 5464eb13ea362012047d98dd7c6ecd33ca58b27b Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Wed, 30 Sep 2020 22:59:41 +0200 Subject: [PATCH 07/11] Change ifdef linux to __linux for C11 compatibility --- benchmark/amax.c | 2 +- benchmark/amin.c | 2 +- benchmark/asum.c | 2 +- benchmark/axpby.c | 2 +- benchmark/axpy.c | 2 +- benchmark/copy.c | 2 +- benchmark/dot.c | 2 +- benchmark/geev.c | 2 +- benchmark/gemm.c | 2 +- benchmark/gemm3m.c | 2 +- benchmark/gemv.c | 2 +- benchmark/ger.c | 2 +- benchmark/gesv.c | 2 +- benchmark/getri.c | 2 +- benchmark/hbmv.c | 2 +- benchmark/hemm.c | 2 +- benchmark/hemv.c | 2 +- benchmark/her.c | 2 +- benchmark/her2.c | 2 +- benchmark/her2k.c | 2 +- benchmark/herk.c | 2 +- benchmark/hpmv.c | 2 +- benchmark/iamax.c | 2 +- benchmark/iamin.c | 2 +- benchmark/imax.c | 2 +- benchmark/imin.c | 2 +- benchmark/linpack.c | 2 +- benchmark/max.c | 2 +- benchmark/min.c | 2 +- benchmark/nrm2.c | 2 +- benchmark/rot.c | 2 +- benchmark/rotm.c | 2 +- benchmark/scal.c | 2 +- benchmark/spmv.c | 2 +- benchmark/spr.c | 2 +- benchmark/spr2.c | 2 +- benchmark/swap.c | 2 +- benchmark/symm.c | 2 +- benchmark/symv.c | 2 +- benchmark/syr.c | 2 +- benchmark/syr2.c | 2 +- benchmark/syr2k.c | 2 +- benchmark/syrk.c | 2 +- benchmark/tpmv.c | 2 +- benchmark/tpsv.c | 2 +- benchmark/trmm.c | 2 +- benchmark/trmv.c | 2 +- benchmark/trsm.c | 2 +- benchmark/trsv.c | 2 +- benchmark/zdot-intel.c | 2 +- benchmark/zdot.c | 2 +- 51 files changed, 51 insertions(+), 51 deletions(-) diff --git a/benchmark/amax.c b/benchmark/amax.c index 32f55ce83..19ae95c8b 100644 --- a/benchmark/amax.c +++ b/benchmark/amax.c @@ -146,7 +146,7 @@ int main(int argc, char *argv[]){ fprintf(stderr,"Out of Memory!!\n");exit(1); } -#ifdef linux +#ifdef __linux srandom(getpid()); #endif diff --git a/benchmark/amin.c b/benchmark/amin.c index 218f0ea9f..d0cadbd3b 100644 --- a/benchmark/amin.c +++ b/benchmark/amin.c @@ -145,7 +145,7 @@ int main(int argc, char *argv[]){ fprintf(stderr,"Out of Memory!!\n");exit(1); } -#ifdef linux +#ifdef __linux srandom(getpid()); #endif diff --git a/benchmark/asum.c b/benchmark/asum.c index e3d16acfd..bcccd9089 100644 --- a/benchmark/asum.c +++ b/benchmark/asum.c @@ -152,7 +152,7 @@ int main(int argc, char *argv[]){ } -#ifdef linux +#ifdef __linux srandom(getpid()); #endif diff --git a/benchmark/axpby.c b/benchmark/axpby.c index 3b3dd9979..793ee7e40 100644 --- a/benchmark/axpby.c +++ b/benchmark/axpby.c @@ -152,7 +152,7 @@ int main(int argc, char *argv[]){ fprintf(stderr,"Out of Memory!!\n");exit(1); } -#ifdef linux +#ifdef __linux srandom(getpid()); #endif diff --git a/benchmark/axpy.c b/benchmark/axpy.c index e40f93c70..760703c1d 100644 --- a/benchmark/axpy.c +++ b/benchmark/axpy.c @@ -151,7 +151,7 @@ int main(int argc, char *argv[]){ fprintf(stderr,"Out of Memory!!\n");exit(1); } -#ifdef linux +#ifdef __linux srandom(getpid()); #endif diff --git a/benchmark/copy.c b/benchmark/copy.c index d7f58c94f..eb5148fff 100644 --- a/benchmark/copy.c +++ b/benchmark/copy.c @@ -154,7 +154,7 @@ int main(int argc, char *argv[]){ fprintf(stderr,"Out of Memory!!\n");exit(1); } -#ifdef linux +#ifdef __linux srandom(getpid()); #endif diff --git a/benchmark/dot.c b/benchmark/dot.c index 50d05e532..aae3c04b0 100644 --- a/benchmark/dot.c +++ b/benchmark/dot.c @@ -145,7 +145,7 @@ int main(int argc, char *argv[]){ fprintf(stderr,"Out of Memory!!\n");exit(1); } -#ifdef linux +#ifdef __linux srandom(getpid()); #endif diff --git a/benchmark/geev.c b/benchmark/geev.c index ef9271220..4fd2c8d6f 100644 --- a/benchmark/geev.c +++ b/benchmark/geev.c @@ -214,7 +214,7 @@ int main(int argc, char *argv[]){ } -#ifdef linux +#ifdef __linux srandom(getpid()); #endif diff --git a/benchmark/gemm.c b/benchmark/gemm.c index d2235330b..84dd292c5 100644 --- a/benchmark/gemm.c +++ b/benchmark/gemm.c @@ -197,7 +197,7 @@ int main(int argc, char *argv[]){ fprintf(stderr,"Out of Memory!!\n");exit(1); } -#ifdef linux +#ifdef __linux srandom(getpid()); #endif diff --git a/benchmark/gemm3m.c b/benchmark/gemm3m.c index f4048c436..98c13e1be 100644 --- a/benchmark/gemm3m.c +++ b/benchmark/gemm3m.c @@ -163,7 +163,7 @@ int main(int argc, char *argv[]){ loops = atoi(p); -#ifdef linux +#ifdef __linux srandom(getpid()); #endif diff --git a/benchmark/gemv.c b/benchmark/gemv.c index a9dee67d2..fb1f541d3 100644 --- a/benchmark/gemv.c +++ b/benchmark/gemv.c @@ -181,7 +181,7 @@ int main(int argc, char *argv[]){ fprintf(stderr,"Out of Memory!!\n");exit(1); } -#ifdef linux +#ifdef __linux srandom(getpid()); #endif diff --git a/benchmark/ger.c b/benchmark/ger.c index ca7e94e15..d53d328f0 100644 --- a/benchmark/ger.c +++ b/benchmark/ger.c @@ -165,7 +165,7 @@ int main(int argc, char *argv[]){ fprintf(stderr,"Out of Memory!!\n");exit(1); } -#ifdef linux +#ifdef __linux srandom(getpid()); #endif diff --git a/benchmark/gesv.c b/benchmark/gesv.c index 80f644e69..057cbd243 100644 --- a/benchmark/gesv.c +++ b/benchmark/gesv.c @@ -165,7 +165,7 @@ int main(int argc, char *argv[]){ fprintf(stderr,"Out of Memory!!\n");exit(1); } -#ifdef linux +#ifdef __linux srandom(getpid()); #endif diff --git a/benchmark/getri.c b/benchmark/getri.c index e8b82a758..a07014768 100644 --- a/benchmark/getri.c +++ b/benchmark/getri.c @@ -188,7 +188,7 @@ int main(int argc, char *argv[]){ } -#ifdef linux +#ifdef __linux srandom(getpid()); #endif diff --git a/benchmark/hbmv.c b/benchmark/hbmv.c index b9dcc03bb..60ba9fb89 100644 --- a/benchmark/hbmv.c +++ b/benchmark/hbmv.c @@ -158,7 +158,7 @@ int main(int argc, char *argv[]){ exit(1); } -#ifdef linux +#ifdef __linux srandom(getpid()); #endif diff --git a/benchmark/hemm.c b/benchmark/hemm.c index 2fe0f5c5f..2bc165458 100644 --- a/benchmark/hemm.c +++ b/benchmark/hemm.c @@ -151,7 +151,7 @@ int main(int argc, char *argv[]){ -#ifdef linux +#ifdef __linux srandom(getpid()); #endif diff --git a/benchmark/hemv.c b/benchmark/hemv.c index b6ff512ce..98618a04e 100644 --- a/benchmark/hemv.c +++ b/benchmark/hemv.c @@ -152,7 +152,7 @@ int main(int argc, char *argv[]){ fprintf(stderr,"Out of Memory!!\n");exit(1); } -#ifdef linux +#ifdef __linux srandom(getpid()); #endif diff --git a/benchmark/her.c b/benchmark/her.c index f4e10b684..010f8120d 100644 --- a/benchmark/her.c +++ b/benchmark/her.c @@ -149,7 +149,7 @@ int main(int argc, char *argv[]){ -#ifdef linux +#ifdef __linux srandom(getpid()); #endif diff --git a/benchmark/her2.c b/benchmark/her2.c index e10b7e98e..0f80f3ed9 100644 --- a/benchmark/her2.c +++ b/benchmark/her2.c @@ -151,7 +151,7 @@ int main(int argc, char *argv[]){ fprintf(stderr,"Out of Memory!!\n");exit(1); } -#ifdef linux +#ifdef __linux srandom(getpid()); #endif diff --git a/benchmark/her2k.c b/benchmark/her2k.c index a0772feff..021873beb 100644 --- a/benchmark/her2k.c +++ b/benchmark/her2k.c @@ -150,7 +150,7 @@ int main(int argc, char *argv[]){ -#ifdef linux +#ifdef __linux srandom(getpid()); #endif diff --git a/benchmark/herk.c b/benchmark/herk.c index eed8ed738..c09d35c1f 100644 --- a/benchmark/herk.c +++ b/benchmark/herk.c @@ -149,7 +149,7 @@ int main(int argc, char *argv[]){ -#ifdef linux +#ifdef __linux srandom(getpid()); #endif diff --git a/benchmark/hpmv.c b/benchmark/hpmv.c index 6e6634fcf..b0157094e 100644 --- a/benchmark/hpmv.c +++ b/benchmark/hpmv.c @@ -155,7 +155,7 @@ int main(int argc, char *argv[]){ exit(1); } -#ifdef linux +#ifdef __linux srandom(getpid()); #endif diff --git a/benchmark/iamax.c b/benchmark/iamax.c index 736f02b89..c87044ab4 100644 --- a/benchmark/iamax.c +++ b/benchmark/iamax.c @@ -145,7 +145,7 @@ int main(int argc, char *argv[]){ fprintf(stderr,"Out of Memory!!\n");exit(1); } -#ifdef linux +#ifdef __linux srandom(getpid()); #endif diff --git a/benchmark/iamin.c b/benchmark/iamin.c index b2c779811..e7c8e59e4 100644 --- a/benchmark/iamin.c +++ b/benchmark/iamin.c @@ -145,7 +145,7 @@ int main(int argc, char *argv[]){ fprintf(stderr,"Out of Memory!!\n");exit(1); } -#ifdef linux +#ifdef __linux srandom(getpid()); #endif diff --git a/benchmark/imax.c b/benchmark/imax.c index c7060af84..b56ef64ba 100644 --- a/benchmark/imax.c +++ b/benchmark/imax.c @@ -139,7 +139,7 @@ int main(int argc, char *argv[]){ fprintf(stderr,"Out of Memory!!\n");exit(1); } -#ifdef linux +#ifdef __linux srandom(getpid()); #endif diff --git a/benchmark/imin.c b/benchmark/imin.c index f8bdc2537..4a92c8bd0 100644 --- a/benchmark/imin.c +++ b/benchmark/imin.c @@ -139,7 +139,7 @@ int main(int argc, char *argv[]){ fprintf(stderr,"Out of Memory!!\n");exit(1); } -#ifdef linux +#ifdef __linux srandom(getpid()); #endif diff --git a/benchmark/linpack.c b/benchmark/linpack.c index e4b20e99d..661a44175 100644 --- a/benchmark/linpack.c +++ b/benchmark/linpack.c @@ -174,7 +174,7 @@ int main(int argc, char *argv[]){ fprintf(stderr,"Out of Memory!!\n");exit(1); } -#ifdef linux +#ifdef __linux srandom(getpid()); #endif diff --git a/benchmark/max.c b/benchmark/max.c index 2fa6e5a14..a19a386a2 100644 --- a/benchmark/max.c +++ b/benchmark/max.c @@ -139,7 +139,7 @@ int main(int argc, char *argv[]){ fprintf(stderr,"Out of Memory!!\n");exit(1); } -#ifdef linux +#ifdef __linux srandom(getpid()); #endif diff --git a/benchmark/min.c b/benchmark/min.c index 9abed8e80..4df8fb0fd 100644 --- a/benchmark/min.c +++ b/benchmark/min.c @@ -139,7 +139,7 @@ int main(int argc, char *argv[]){ fprintf(stderr,"Out of Memory!!\n");exit(1); } -#ifdef linux +#ifdef __linux srandom(getpid()); #endif diff --git a/benchmark/nrm2.c b/benchmark/nrm2.c index d3718f9e0..0f416621a 100644 --- a/benchmark/nrm2.c +++ b/benchmark/nrm2.c @@ -145,7 +145,7 @@ int main(int argc, char *argv[]){ fprintf(stderr,"Out of Memory!!\n");exit(1); } -#ifdef linux +#ifdef __linux srandom(getpid()); #endif diff --git a/benchmark/rot.c b/benchmark/rot.c index 8ec8b1d97..69698988d 100644 --- a/benchmark/rot.c +++ b/benchmark/rot.c @@ -156,7 +156,7 @@ int main(int argc, char *argv[]){ fprintf(stderr,"Out of Memory!!\n");exit(1); } -#ifdef linux +#ifdef __linux srandom(getpid()); #endif diff --git a/benchmark/rotm.c b/benchmark/rotm.c index 8dea2d08c..17c8d5416 100644 --- a/benchmark/rotm.c +++ b/benchmark/rotm.c @@ -168,7 +168,7 @@ int main(int argc, char *argv[]) exit(1); } -#ifdef linux +#ifdef __linux srandom(getpid()); #endif diff --git a/benchmark/scal.c b/benchmark/scal.c index 453c3234d..8bd62c77c 100644 --- a/benchmark/scal.c +++ b/benchmark/scal.c @@ -150,7 +150,7 @@ int main(int argc, char *argv[]){ fprintf(stderr,"Out of Memory!!\n");exit(1); } -#ifdef linux +#ifdef __linux srandom(getpid()); #endif diff --git a/benchmark/spmv.c b/benchmark/spmv.c index 2a26c9416..cff504d3b 100644 --- a/benchmark/spmv.c +++ b/benchmark/spmv.c @@ -163,7 +163,7 @@ int main(int argc, char *argv[]){ fprintf(stderr,"Out of Memory!!\n");exit(1); } -#ifdef linux +#ifdef __linux srandom(getpid()); #endif diff --git a/benchmark/spr.c b/benchmark/spr.c index c91e587b1..5dcaa4f8b 100755 --- a/benchmark/spr.c +++ b/benchmark/spr.c @@ -149,7 +149,7 @@ int main(int argc, char *argv[]){ fprintf(stderr,"Out of Memory!!\n");exit(1); } -#ifdef linux +#ifdef __linux srandom(getpid()); #endif diff --git a/benchmark/spr2.c b/benchmark/spr2.c index e8ee345d7..a5f2791f7 100755 --- a/benchmark/spr2.c +++ b/benchmark/spr2.c @@ -153,7 +153,7 @@ int main(int argc, char *argv[]){ fprintf(stderr,"Out of Memory!!\n");exit(1); } -#ifdef linux +#ifdef __linux srandom(getpid()); #endif diff --git a/benchmark/swap.c b/benchmark/swap.c index 368c59cd4..76d545995 100644 --- a/benchmark/swap.c +++ b/benchmark/swap.c @@ -151,7 +151,7 @@ int main(int argc, char *argv[]){ fprintf(stderr,"Out of Memory!!\n");exit(1); } -#ifdef linux +#ifdef __linux srandom(getpid()); #endif diff --git a/benchmark/symm.c b/benchmark/symm.c index b979e8d51..bb9849eb5 100644 --- a/benchmark/symm.c +++ b/benchmark/symm.c @@ -162,7 +162,7 @@ int main(int argc, char *argv[]){ -#ifdef linux +#ifdef __linux srandom(getpid()); #endif diff --git a/benchmark/symv.c b/benchmark/symv.c index 789c3560f..e4c892b5a 100644 --- a/benchmark/symv.c +++ b/benchmark/symv.c @@ -162,7 +162,7 @@ int main(int argc, char *argv[]){ fprintf(stderr,"Out of Memory!!\n");exit(1); } -#ifdef linux +#ifdef __linux srandom(getpid()); #endif diff --git a/benchmark/syr.c b/benchmark/syr.c index 458bc6edb..a9dd293e6 100644 --- a/benchmark/syr.c +++ b/benchmark/syr.c @@ -144,7 +144,7 @@ int main(int argc, char *argv[]){ fprintf(stderr,"Out of Memory!!\n");exit(1); } -#ifdef linux +#ifdef __linux srandom(getpid()); #endif diff --git a/benchmark/syr2.c b/benchmark/syr2.c index 0129dd09a..9efbca315 100644 --- a/benchmark/syr2.c +++ b/benchmark/syr2.c @@ -150,7 +150,7 @@ int main(int argc, char *argv[]){ -#ifdef linux +#ifdef __linux srandom(getpid()); #endif diff --git a/benchmark/syr2k.c b/benchmark/syr2k.c index b1fcd8a18..a906559eb 100644 --- a/benchmark/syr2k.c +++ b/benchmark/syr2k.c @@ -162,7 +162,7 @@ int main(int argc, char *argv[]){ -#ifdef linux +#ifdef __linux srandom(getpid()); #endif diff --git a/benchmark/syrk.c b/benchmark/syrk.c index 95625a6c4..0fbb943f6 100644 --- a/benchmark/syrk.c +++ b/benchmark/syrk.c @@ -159,7 +159,7 @@ int main(int argc, char *argv[]){ -#ifdef linux +#ifdef __linux srandom(getpid()); #endif diff --git a/benchmark/tpmv.c b/benchmark/tpmv.c index ee5b97f24..fe9d07534 100644 --- a/benchmark/tpmv.c +++ b/benchmark/tpmv.c @@ -132,7 +132,7 @@ int main(int argc, char *argv[]) fprintf(stderr,"Out of Memory!!\n");exit(1); } -#ifdef linux +#ifdef __linux srandom(getpid()); #endif diff --git a/benchmark/tpsv.c b/benchmark/tpsv.c index 46d78fd17..8472ac261 100644 --- a/benchmark/tpsv.c +++ b/benchmark/tpsv.c @@ -132,7 +132,7 @@ int main(int argc, char *argv[]) fprintf(stderr,"Out of Memory!!\n");exit(1); } -#ifdef linux +#ifdef __linux srandom(getpid()); #endif diff --git a/benchmark/trmm.c b/benchmark/trmm.c index e095b85ee..23af122b4 100644 --- a/benchmark/trmm.c +++ b/benchmark/trmm.c @@ -162,7 +162,7 @@ int main(int argc, char *argv[]){ -#ifdef linux +#ifdef __linux srandom(getpid()); #endif diff --git a/benchmark/trmv.c b/benchmark/trmv.c index f5a5fe31a..46641b3e4 100644 --- a/benchmark/trmv.c +++ b/benchmark/trmv.c @@ -132,7 +132,7 @@ int main(int argc, char *argv[]) fprintf(stderr,"Out of Memory!!\n");exit(1); } -#ifdef linux +#ifdef __linux srandom(getpid()); #endif diff --git a/benchmark/trsm.c b/benchmark/trsm.c index 6ce1d532c..17676946a 100644 --- a/benchmark/trsm.c +++ b/benchmark/trsm.c @@ -172,7 +172,7 @@ int main(int argc, char *argv[]){ -#ifdef linux +#ifdef __linux srandom(getpid()); #endif diff --git a/benchmark/trsv.c b/benchmark/trsv.c index c60890de4..1734e2adb 100644 --- a/benchmark/trsv.c +++ b/benchmark/trsv.c @@ -159,7 +159,7 @@ int main(int argc, char *argv[]){ uplo,diag,loops); -#ifdef linux +#ifdef __linux srandom(getpid()); #endif diff --git a/benchmark/zdot-intel.c b/benchmark/zdot-intel.c index bb2c40f38..ba1515365 100644 --- a/benchmark/zdot-intel.c +++ b/benchmark/zdot-intel.c @@ -146,7 +146,7 @@ int main(int argc, char *argv[]){ fprintf(stderr,"Out of Memory!!\n");exit(1); } -#ifdef linux +#ifdef __linux srandom(getpid()); #endif diff --git a/benchmark/zdot.c b/benchmark/zdot.c index 136135c9c..fa624e859 100644 --- a/benchmark/zdot.c +++ b/benchmark/zdot.c @@ -145,7 +145,7 @@ int main(int argc, char *argv[]){ fprintf(stderr,"Out of Memory!!\n");exit(1); } -#ifdef linux +#ifdef __linux srandom(getpid()); #endif From dee7c49938ef34c18deb3175f6e67ae9a2240f5f Mon Sep 17 00:00:00 2001 From: Alexander Grund Date: Thu, 1 Oct 2020 10:43:16 +0200 Subject: [PATCH 08/11] Fix TABs and trailing space --- driver/others/memory.c | 352 ++++++++++++++++++++--------------------- 1 file changed, 176 insertions(+), 176 deletions(-) diff --git a/driver/others/memory.c b/driver/others/memory.c index 9b6c226a1..5c9c388ce 100644 --- a/driver/others/memory.c +++ b/driver/others/memory.c @@ -80,7 +80,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #undef COMPILE_TLS #endif -#if defined(__GLIBC_PREREQ) +#if defined(__GLIBC_PREREQ) #if !__GLIBC_PREREQ(2,20) #undef COMPILE_TLS #endif @@ -161,7 +161,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #if defined(OS_WINDOWS) && (defined(__MINGW32__) || defined(__MINGW64__)) #include #undef printf -#define printf _cprintf +#define printf _cprintf #endif #ifdef OS_LINUX @@ -190,14 +190,14 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define CONSTRUCTOR __cdecl #define DESTRUCTOR __cdecl #elif (defined(OS_DARWIN) || defined(OS_SUNOS)) && defined(C_GCC) -#define CONSTRUCTOR __attribute__ ((constructor)) -#define DESTRUCTOR __attribute__ ((destructor)) +#define CONSTRUCTOR __attribute__ ((constructor)) +#define DESTRUCTOR __attribute__ ((destructor)) #elif __GNUC__ && INIT_PRIORITY && ((GCC_VERSION >= 40300) || (CLANG_VERSION >= 20900)) -#define CONSTRUCTOR __attribute__ ((constructor(101))) -#define DESTRUCTOR __attribute__ ((destructor(101))) +#define CONSTRUCTOR __attribute__ ((constructor(101))) +#define DESTRUCTOR __attribute__ ((destructor(101))) #else -#define CONSTRUCTOR __attribute__ ((constructor)) -#define DESTRUCTOR __attribute__ ((destructor)) +#define CONSTRUCTOR __attribute__ ((constructor)) +#define DESTRUCTOR __attribute__ ((destructor)) #endif #ifdef DYNAMIC_ARCH @@ -272,7 +272,7 @@ int get_num_procs(void) { return nums; } ret = CPU_COUNT_S(size,cpusetp); - if (ret > 0 && ret < nums) nums = ret; + if (ret > 0 && ret < nums) nums = ret; CPU_FREE(cpusetp); return nums; } else { @@ -281,7 +281,7 @@ int get_num_procs(void) { return nums; } ret = CPU_COUNT(&cpuset); - if (ret > 0 && ret < nums) nums = ret; + if (ret > 0 && ret < nums) nums = ret; return nums; } #endif @@ -628,12 +628,12 @@ static void *alloc_mmap(void *address){ if (address){ map_address = mmap(address, - allocation_block_size, - MMAP_ACCESS, MMAP_POLICY | MAP_FIXED, -1, 0); + allocation_block_size, + MMAP_ACCESS, MMAP_POLICY | MAP_FIXED, -1, 0); } else { map_address = mmap(address, - allocation_block_size, - MMAP_ACCESS, MMAP_POLICY, -1, 0); + allocation_block_size, + MMAP_ACCESS, MMAP_POLICY, -1, 0); } STORE_RELEASE_FUNC(map_address, alloc_mmap_free); @@ -648,7 +648,7 @@ static void *alloc_mmap(void *address){ #else #define BENCH_ITERATION 4 -#define SCALING 2 +#define SCALING 2 static inline BLASULONG run_bench(BLASULONG address, BLASULONG size) { @@ -711,60 +711,60 @@ static void *alloc_mmap(void *address){ #endif map_address = mmap(NULL, allocation_block_size * SCALING, - MMAP_ACCESS, MMAP_POLICY, -1, 0); + MMAP_ACCESS, MMAP_POLICY, -1, 0); if (map_address != (void *)-1) { #ifdef OS_LINUX #ifdef DEBUG - int ret=0; - ret=my_mbind(map_address, allocation_block_size * SCALING, MPOL_PREFERRED, NULL, 0, 0); - if(ret==-1){ - int errsv=errno; - perror("OpenBLAS alloc_mmap:"); - printf("error code=%d,\tmap_address=%lx\n",errsv,map_address); - } + int ret=0; + ret=my_mbind(map_address, allocation_block_size * SCALING, MPOL_PREFERRED, NULL, 0, 0); + if(ret==-1){ + int errsv=errno; + perror("OpenBLAS alloc_mmap:"); + printf("error code=%d,\tmap_address=%lx\n",errsv,map_address); + } #else - my_mbind(map_address, allocation_block_size * SCALING, MPOL_PREFERRED, NULL, 0, 0); + my_mbind(map_address, allocation_block_size * SCALING, MPOL_PREFERRED, NULL, 0, 0); #endif #endif - allocsize = DGEMM_P * DGEMM_Q * sizeof(double); + allocsize = DGEMM_P * DGEMM_Q * sizeof(double); - start = (BLASULONG)map_address; - current = (SCALING - 1) * allocation_block_size; - original = current; + start = (BLASULONG)map_address; + current = (SCALING - 1) * allocation_block_size; + original = current; - while(current > 0 && current <= original) { - *(BLASLONG *)start = (BLASLONG)start + PAGESIZE; - start += PAGESIZE; - current -= PAGESIZE; - } + while(current > 0 && current <= original) { + *(BLASLONG *)start = (BLASLONG)start + PAGESIZE; + start += PAGESIZE; + current -= PAGESIZE; + } - *(BLASLONG *)(start - PAGESIZE) = (BLASULONG)map_address; + *(BLASLONG *)(start - PAGESIZE) = (BLASULONG)map_address; - start = (BLASULONG)map_address; + start = (BLASULONG)map_address; - best = (BLASULONG)-1; - best_address = map_address; + best = (BLASULONG)-1; + best_address = map_address; - while ((start + allocsize < (BLASULONG)map_address + (SCALING - 1) * allocation_block_size)) { + while ((start + allocsize < (BLASULONG)map_address + (SCALING - 1) * allocation_block_size)) { - current = run_bench(start, allocsize); + current = run_bench(start, allocsize); - if (best > current) { - best = current; - best_address = (void *)start; - } + if (best > current) { + best = current; + best_address = (void *)start; + } - start += PAGESIZE; + start += PAGESIZE; - } + } if ((BLASULONG)best_address > (BLASULONG)map_address) - munmap(map_address, (BLASULONG)best_address - (BLASULONG)map_address); + munmap(map_address, (BLASULONG)best_address - (BLASULONG)map_address); munmap((void *)((BLASULONG)best_address + allocation_block_size), (SCALING - 1) * allocation_block_size + (BLASULONG)map_address - (BLASULONG)best_address); @@ -854,9 +854,9 @@ static void *alloc_windows(void *address){ void *map_address; map_address = VirtualAlloc(address, - allocation_block_size, - MEM_RESERVE | MEM_COMMIT, - PAGE_READWRITE); + allocation_block_size, + MEM_RESERVE | MEM_COMMIT, + PAGE_READWRITE); if (map_address == (void *)NULL) map_address = (void *)-1; @@ -897,9 +897,9 @@ static void *alloc_devicedirver(void *address){ } map_address = mmap(address, allocation_block_size, - PROT_READ | PROT_WRITE, - MAP_FILE | MAP_SHARED, - fd, 0); + PROT_READ | PROT_WRITE, + MAP_FILE | MAP_SHARED, + fd, 0); STORE_RELEASE_FUNC_WITH_ATTR(map_address, alloc_devicedirver_free, fd); @@ -974,12 +974,12 @@ static void *alloc_hugetlb(void *address){ shmid = shmget(IPC_PRIVATE, allocation_block_size, #ifdef OS_LINUX - SHM_HUGETLB | + SHM_HUGETLB | #endif #ifdef OS_AIX - SHM_LGPAGE | SHM_PIN | + SHM_LGPAGE | SHM_PIN | #endif - IPC_CREAT | SHM_R | SHM_W); + IPC_CREAT | SHM_R | SHM_W); if (shmid != -1) { map_address = (void *)shmat(shmid, address, SHM_RND); @@ -1026,9 +1026,9 @@ static void *alloc_hugetlb(void *address){ } map_address = (void *)VirtualAlloc(address, - allocation_block_size, - MEM_LARGE_PAGES | MEM_RESERVE | MEM_COMMIT, - PAGE_READWRITE); + allocation_block_size, + MEM_LARGE_PAGES | MEM_RESERVE | MEM_COMMIT, + PAGE_READWRITE); tp.Privileges[0].Attributes = 0; AdjustTokenPrivileges(hToken, FALSE, &tp, 0, NULL, NULL); @@ -1078,9 +1078,9 @@ static void *alloc_hugetlbfile(void *address){ unlink(filename); map_address = mmap(address, allocation_block_size, - PROT_READ | PROT_WRITE, - MAP_SHARED, - fd, 0); + PROT_READ | PROT_WRITE, + MAP_SHARED, + fd, 0); STORE_RELEASE_FUNC_WITH_ATTR(map_address, alloc_hugetlbfile_free, fd); @@ -1107,7 +1107,7 @@ static volatile int memory_initialized = 0; /* 1 : Level 2 functions */ /* 2 : Thread */ - static void blas_memory_cleanup(void* ptr){ +static void blas_memory_cleanup(void* ptr){ if (ptr) { struct alloc_t ** table = (struct alloc_t **)ptr; int pos; @@ -1243,27 +1243,27 @@ UNLOCK_COMMAND(&alloc_lock); while ((func != NULL) && (map_address == (void *) -1)) { - map_address = (*func)((void *)base_address); + map_address = (*func)((void *)base_address); #ifdef ALLOC_DEVICEDRIVER - if ((*func == alloc_devicedirver) && (map_address == (void *)-1)) { - fprintf(stderr, "OpenBLAS Warning ... Physically contiguous allocation failed.\n"); - } + if ((*func == alloc_devicedirver) && (map_address == (void *)-1)) { + fprintf(stderr, "OpenBLAS Warning ... Physically contiguous allocation failed.\n"); + } #endif #ifdef ALLOC_HUGETLBFILE - if ((*func == alloc_hugetlbfile) && (map_address == (void *)-1)) { + if ((*func == alloc_hugetlbfile) && (map_address == (void *)-1)) { #ifndef OS_WINDOWS - fprintf(stderr, "OpenBLAS Warning ... HugeTLB(File) allocation failed.\n"); + fprintf(stderr, "OpenBLAS Warning ... HugeTLB(File) allocation failed.\n"); #endif - } + } #endif #if (defined ALLOC_SHM) && (defined OS_LINUX || defined OS_AIX || defined __sun__ || defined OS_WINDOWS) - if ((*func == alloc_hugetlb) && (map_address != (void *)-1)) hugetlb_allocated = 1; + if ((*func == alloc_hugetlb) && (map_address != (void *)-1)) hugetlb_allocated = 1; #endif - func ++; + func ++; } #ifdef DEBUG @@ -1377,7 +1377,7 @@ static BLASULONG init_lock = 0UL; #endif static void _touch_memory(blas_arg_t *arg, BLASLONG *range_m, BLASLONG *range_n, - void *sa, void *sb, BLASLONG pos) { + void *sa, void *sb, BLASLONG pos) { #if !defined(ARCH_POWER) && !defined(ARCH_SPARC) @@ -1507,11 +1507,11 @@ void CONSTRUCTOR gotoblas_init(void) { struct rlimit curlimit; if ( getrlimit(RLIMIT_STACK, &curlimit ) == 0 ) { - if ( curlimit.rlim_cur != curlimit.rlim_max ) - { - curlimit.rlim_cur = curlimit.rlim_max; - setrlimit(RLIMIT_STACK, &curlimit); - } + if ( curlimit.rlim_cur != curlimit.rlim_max ) + { + curlimit.rlim_cur = curlimit.rlim_max; + setrlimit(RLIMIT_STACK, &curlimit); + } } #endif @@ -1545,7 +1545,7 @@ void DESTRUCTOR gotoblas_quit(void) { TlsFree(local_storage_key); #else pthread_key_delete(local_storage_key); -#endif +#endif #endif #ifdef PROFILE @@ -1605,8 +1605,8 @@ BOOL APIENTRY DllMain(HMODULE hModule, DWORD ul_reason_for_call, LPVOID lpReser */ static int on_process_term(void) { - gotoblas_quit(); - return 0; + gotoblas_quit(); + return 0; } #ifdef _WIN64 #pragma comment(linker, "/INCLUDE:_tls_used") @@ -1705,7 +1705,7 @@ void gotoblas_dummy_for_PGI(void) { #if defined(OS_WINDOWS) && (defined(__MINGW32__) || defined(__MINGW64__)) #include #undef printf -#define printf _cprintf +#define printf _cprintf #endif #ifdef OS_LINUX @@ -1734,14 +1734,14 @@ void gotoblas_dummy_for_PGI(void) { #define CONSTRUCTOR __cdecl #define DESTRUCTOR __cdecl #elif (defined(OS_DARWIN) || defined(OS_SUNOS)) && defined(C_GCC) -#define CONSTRUCTOR __attribute__ ((constructor)) -#define DESTRUCTOR __attribute__ ((destructor)) +#define CONSTRUCTOR __attribute__ ((constructor)) +#define DESTRUCTOR __attribute__ ((destructor)) #elif __GNUC__ && INIT_PRIORITY && ((GCC_VERSION >= 40300) || (CLANG_VERSION >= 20900)) -#define CONSTRUCTOR __attribute__ ((constructor(101))) -#define DESTRUCTOR __attribute__ ((destructor(101))) +#define CONSTRUCTOR __attribute__ ((constructor(101))) +#define DESTRUCTOR __attribute__ ((destructor(101))) #else -#define CONSTRUCTOR __attribute__ ((constructor)) -#define DESTRUCTOR __attribute__ ((destructor)) +#define CONSTRUCTOR __attribute__ ((constructor)) +#define DESTRUCTOR __attribute__ ((destructor)) #endif #ifdef DYNAMIC_ARCH @@ -1817,7 +1817,7 @@ int get_num_procs(void) { return nums; } ret = CPU_COUNT_S(size,cpusetp); - if (ret > 0 && ret < nums) nums = ret; + if (ret > 0 && ret < nums) nums = ret; CPU_FREE(cpusetp); return nums; } else { @@ -1826,7 +1826,7 @@ int get_num_procs(void) { return nums; } ret = CPU_COUNT(&cpuset); - if (ret > 0 && ret < nums) nums = ret; + if (ret > 0 && ret < nums) nums = ret; return nums; } #endif @@ -2083,26 +2083,26 @@ static void *alloc_mmap(void *address){ if (address){ map_address = mmap(address, - BUFFER_SIZE, - MMAP_ACCESS, MMAP_POLICY | MAP_FIXED, -1, 0); + BUFFER_SIZE, + MMAP_ACCESS, MMAP_POLICY | MAP_FIXED, -1, 0); } else { map_address = mmap(address, - BUFFER_SIZE, - MMAP_ACCESS, MMAP_POLICY, -1, 0); + BUFFER_SIZE, + MMAP_ACCESS, MMAP_POLICY, -1, 0); } if (map_address != (void *)-1) { #if (defined(SMP) || defined(USE_LOCKING)) && !defined(USE_OPENMP) LOCK_COMMAND(&alloc_lock); -#endif +#endif release_info[release_pos].address = map_address; release_info[release_pos].func = alloc_mmap_free; release_pos ++; #if (defined(SMP) || defined(USE_LOCKING)) && !defined(USE_OPENMP) UNLOCK_COMMAND(&alloc_lock); -#endif +#endif } else { -#ifdef DEBUG +#ifdef DEBUG int errsv=errno; perror("OpenBLAS : mmap failed:"); printf("error code=%d,\tmap_address=%lx\n",errsv,map_address); @@ -2119,7 +2119,7 @@ static void *alloc_mmap(void *address){ #else #define BENCH_ITERATION 4 -#define SCALING 2 +#define SCALING 2 static inline BLASULONG run_bench(BLASULONG address, BLASULONG size) { @@ -2182,59 +2182,59 @@ static void *alloc_mmap(void *address){ #endif map_address = mmap(NULL, BUFFER_SIZE * SCALING, - MMAP_ACCESS, MMAP_POLICY, -1, 0); + MMAP_ACCESS, MMAP_POLICY, -1, 0); if (map_address != (void *)-1) { #ifdef OS_LINUX #ifdef DEBUG - int ret=0; - ret=my_mbind(map_address, BUFFER_SIZE * SCALING, MPOL_PREFERRED, NULL, 0, 0); - if(ret==-1){ - int errsv=errno; - perror("OpenBLAS alloc_mmap:"); - printf("error code=%d,\tmap_address=%lx\n",errsv,map_address); - } + int ret=0; + ret=my_mbind(map_address, BUFFER_SIZE * SCALING, MPOL_PREFERRED, NULL, 0, 0); + if(ret==-1){ + int errsv=errno; + perror("OpenBLAS alloc_mmap:"); + printf("error code=%d,\tmap_address=%lx\n",errsv,map_address); + } #else - my_mbind(map_address, BUFFER_SIZE * SCALING, MPOL_PREFERRED, NULL, 0, 0); + my_mbind(map_address, BUFFER_SIZE * SCALING, MPOL_PREFERRED, NULL, 0, 0); #endif #endif - allocsize = DGEMM_P * DGEMM_Q * sizeof(double); + allocsize = DGEMM_P * DGEMM_Q * sizeof(double); - start = (BLASULONG)map_address; - current = (SCALING - 1) * BUFFER_SIZE; + start = (BLASULONG)map_address; + current = (SCALING - 1) * BUFFER_SIZE; - while(current > 0) { - *(BLASLONG *)start = (BLASLONG)start + PAGESIZE; - start += PAGESIZE; - current -= PAGESIZE; - } + while(current > 0) { + *(BLASLONG *)start = (BLASLONG)start + PAGESIZE; + start += PAGESIZE; + current -= PAGESIZE; + } - *(BLASLONG *)(start - PAGESIZE) = (BLASULONG)map_address; + *(BLASLONG *)(start - PAGESIZE) = (BLASULONG)map_address; - start = (BLASULONG)map_address; + start = (BLASULONG)map_address; - best = (BLASULONG)-1; - best_address = map_address; + best = (BLASULONG)-1; + best_address = map_address; - while ((start + allocsize < (BLASULONG)map_address + (SCALING - 1) * BUFFER_SIZE)) { + while ((start + allocsize < (BLASULONG)map_address + (SCALING - 1) * BUFFER_SIZE)) { - current = run_bench(start, allocsize); + current = run_bench(start, allocsize); - if (best > current) { - best = current; - best_address = (void *)start; - } + if (best > current) { + best = current; + best_address = (void *)start; + } - start += PAGESIZE; + start += PAGESIZE; - } + } if ((BLASULONG)best_address > (BLASULONG)map_address) - munmap(map_address, (BLASULONG)best_address - (BLASULONG)map_address); + munmap(map_address, (BLASULONG)best_address - (BLASULONG)map_address); munmap((void *)((BLASULONG)best_address + BUFFER_SIZE), (SCALING - 1) * BUFFER_SIZE + (BLASULONG)map_address - (BLASULONG)best_address); @@ -2342,9 +2342,9 @@ static void *alloc_windows(void *address){ void *map_address; map_address = VirtualAlloc(address, - BUFFER_SIZE, - MEM_RESERVE | MEM_COMMIT, - PAGE_READWRITE); + BUFFER_SIZE, + MEM_RESERVE | MEM_COMMIT, + PAGE_READWRITE); if (map_address == (void *)NULL) map_address = (void *)-1; @@ -2388,9 +2388,9 @@ static void *alloc_devicedirver(void *address){ } map_address = mmap(address, BUFFER_SIZE, - PROT_READ | PROT_WRITE, - MAP_FILE | MAP_SHARED, - fd, 0); + PROT_READ | PROT_WRITE, + MAP_FILE | MAP_SHARED, + fd, 0); if (map_address != (void *)-1) { release_info[release_pos].address = map_address; @@ -2471,12 +2471,12 @@ static void *alloc_hugetlb(void *address){ shmid = shmget(IPC_PRIVATE, BUFFER_SIZE, #ifdef OS_LINUX - SHM_HUGETLB | + SHM_HUGETLB | #endif #ifdef OS_AIX - SHM_LGPAGE | SHM_PIN | + SHM_LGPAGE | SHM_PIN | #endif - IPC_CREAT | SHM_R | SHM_W); + IPC_CREAT | SHM_R | SHM_W); if (shmid != -1) { map_address = (void *)shmat(shmid, address, SHM_RND); @@ -2511,7 +2511,7 @@ static void *alloc_hugetlb(void *address){ tp.PrivilegeCount = 1; tp.Privileges[0].Attributes = SE_PRIVILEGE_ENABLED; - + if (LookupPrivilegeValue(NULL, SE_LOCK_MEMORY_NAME, &tp.Privileges[0].Luid) != TRUE) { CloseHandle(hToken); return (void*)-1; @@ -2523,9 +2523,9 @@ static void *alloc_hugetlb(void *address){ } map_address = (void *)VirtualAlloc(address, - BUFFER_SIZE, - MEM_LARGE_PAGES | MEM_RESERVE | MEM_COMMIT, - PAGE_READWRITE); + BUFFER_SIZE, + MEM_LARGE_PAGES | MEM_RESERVE | MEM_COMMIT, + PAGE_READWRITE); tp.Privileges[0].Attributes = 0; AdjustTokenPrivileges(hToken, FALSE, &tp, 0, NULL, NULL); @@ -2578,9 +2578,9 @@ static void *alloc_hugetlbfile(void *address){ unlink(filename); map_address = mmap(address, BUFFER_SIZE, - PROT_READ | PROT_WRITE, - MAP_SHARED, - fd, 0); + PROT_READ | PROT_WRITE, + MAP_SHARED, + fd, 0); if (map_address != (void *)-1) { release_info[release_pos].address = map_address; @@ -2717,7 +2717,7 @@ void *blas_memory_alloc(int procpos){ if (!memory[position].used && (memory[position].pos == mypos)) { #if defined(SMP) && !defined(USE_OPENMP) LOCK_COMMAND(&alloc_lock); -#else +#else blas_lock(&memory[position].lock); #endif if (!memory[position].used) goto allocation; @@ -2725,7 +2725,7 @@ void *blas_memory_alloc(int procpos){ UNLOCK_COMMAND(&alloc_lock); #else blas_unlock(&memory[position].lock); -#endif +#endif } position ++; @@ -2741,22 +2741,22 @@ void *blas_memory_alloc(int procpos){ LOCK_COMMAND(&alloc_lock); #endif do { - RMB; -#if defined(USE_OPENMP) - if (!memory[position].used) { + RMB; +#if defined(USE_OPENMP) + if (!memory[position].used) { blas_lock(&memory[position].lock); #endif if (!memory[position].used) goto allocation; - + #if defined(USE_OPENMP) - blas_unlock(&memory[position].lock); + blas_unlock(&memory[position].lock); } #endif position ++; } while (position < NUM_BUFFERS); #if (defined(SMP) || defined(USE_LOCKING)) && !defined(USE_OPENMP) - UNLOCK_COMMAND(&alloc_lock); + UNLOCK_COMMAND(&alloc_lock); #endif goto error; @@ -2770,7 +2770,7 @@ void *blas_memory_alloc(int procpos){ #if (defined(SMP) || defined(USE_LOCKING)) && !defined(USE_OPENMP) UNLOCK_COMMAND(&alloc_lock); #else - blas_unlock(&memory[position].lock); + blas_unlock(&memory[position].lock); #endif if (!memory[position].addr) { do { @@ -2784,27 +2784,27 @@ void *blas_memory_alloc(int procpos){ while ((func != NULL) && (map_address == (void *) -1)) { - map_address = (*func)((void *)base_address); + map_address = (*func)((void *)base_address); #ifdef ALLOC_DEVICEDRIVER - if ((*func == alloc_devicedirver) && (map_address == (void *)-1)) { - fprintf(stderr, "OpenBLAS Warning ... Physically contiguous allocation was failed.\n"); - } + if ((*func == alloc_devicedirver) && (map_address == (void *)-1)) { + fprintf(stderr, "OpenBLAS Warning ... Physically contiguous allocation was failed.\n"); + } #endif #ifdef ALLOC_HUGETLBFILE - if ((*func == alloc_hugetlbfile) && (map_address == (void *)-1)) { + if ((*func == alloc_hugetlbfile) && (map_address == (void *)-1)) { #ifndef OS_WINDOWS - fprintf(stderr, "OpenBLAS Warning ... HugeTLB(File) allocation was failed.\n"); + fprintf(stderr, "OpenBLAS Warning ... HugeTLB(File) allocation was failed.\n"); #endif - } + } #endif #if (defined ALLOC_SHM) && (defined OS_LINUX || defined OS_AIX || defined __sun__ || defined OS_WINDOWS) - if ((*func == alloc_hugetlb) && (map_address != (void *)-1)) hugetlb_allocated = 1; + if ((*func == alloc_hugetlb) && (map_address != (void *)-1)) hugetlb_allocated = 1; #endif - func ++; + func ++; } #ifdef DEBUG @@ -2818,7 +2818,7 @@ void *blas_memory_alloc(int procpos){ #if (defined(SMP) || defined(USE_LOCKING)) && !defined(USE_OPENMP) LOCK_COMMAND(&alloc_lock); -#endif +#endif memory[position].addr = map_address; #if (defined(SMP) || defined(USE_LOCKING)) && !defined(USE_OPENMP) UNLOCK_COMMAND(&alloc_lock); @@ -2856,7 +2856,7 @@ void *blas_memory_alloc(int procpos){ #ifdef DEBUG printf("Mapped : %p %3d\n\n", - (void *)memory[position].addr, position); + (void *)memory[position].addr, position); #endif return (void *)memory[position].addr; @@ -2972,7 +2972,7 @@ static BLASULONG init_lock = 0UL; #endif static void _touch_memory(blas_arg_t *arg, BLASLONG *range_m, BLASLONG *range_n, - void *sa, void *sb, BLASLONG pos) { + void *sa, void *sb, BLASLONG pos) { #if !defined(ARCH_POWER) && !defined(ARCH_SPARC) @@ -3099,15 +3099,15 @@ void CONSTRUCTOR gotoblas_init(void) { //#if defined(OS_LINUX) #if 0 - struct rlimit curlimit; - if ( getrlimit(RLIMIT_STACK, &curlimit ) == 0 ) - { - if ( curlimit.rlim_cur != curlimit.rlim_max ) - { - curlimit.rlim_cur = curlimit.rlim_max; - setrlimit(RLIMIT_STACK, &curlimit); - } - } + struct rlimit curlimit; + if ( getrlimit(RLIMIT_STACK, &curlimit ) == 0 ) + { + if ( curlimit.rlim_cur != curlimit.rlim_max ) + { + curlimit.rlim_cur = curlimit.rlim_max; + setrlimit(RLIMIT_STACK, &curlimit); + } + } #endif #ifdef SMP @@ -3189,8 +3189,8 @@ BOOL APIENTRY DllMain(HMODULE hModule, DWORD ul_reason_for_call, LPVOID lpReser */ static int on_process_term(void) { - gotoblas_quit(); - return 0; + gotoblas_quit(); + return 0; } #ifdef _WIN64 #pragma comment(linker, "/INCLUDE:_tls_used") @@ -3237,7 +3237,7 @@ void gotoblas_dummy_for_PGI(void) { asm (".section .init,\"ax\"; call gotoblas_init@PLT; .section .text"); asm (".section .fini,\"ax\"; call gotoblas_quit@PLT; .section .text"); #endif -#endif +#endif } #endif From 3c05f54df8de5df17507e80697d651e147e0bf69 Mon Sep 17 00:00:00 2001 From: Alexander Grund Date: Thu, 1 Oct 2020 10:48:45 +0200 Subject: [PATCH 09/11] Avoid out of bounds access on invalid memory free --- driver/others/memory.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/driver/others/memory.c b/driver/others/memory.c index 5c9c388ce..91cfefbd7 100644 --- a/driver/others/memory.c +++ b/driver/others/memory.c @@ -2882,9 +2882,10 @@ void blas_memory_free(void *free_area){ while ((position < NUM_BUFFERS) && (memory[position].addr != free_area)) position++; - if (memory[position].addr != free_area) goto error; + if (position >= NUM_BUFFERS) goto error; #ifdef DEBUG + if (memory[position].addr != free_area) goto error; printf(" Position : %d\n", position); #endif From 3094fc6c83c7a623f9a7e7846eb711a8a99ddfff Mon Sep 17 00:00:00 2001 From: Alexander Grund Date: Thu, 1 Oct 2020 15:41:42 +0200 Subject: [PATCH 10/11] Lazyly reinit threads after a fork in OMP mode This initializes the per-thread memory buffers which get cleared/released on a fork via pthread_at_fork. Not doing so leads to each thread calling blas_memory_alloc on almost every execution which slows down the code significantly as the threads race for the memory allocation using locks to serialize that. --- driver/others/blas_server_omp.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/driver/others/blas_server_omp.c b/driver/others/blas_server_omp.c index d126955e4..da0a5674a 100644 --- a/driver/others/blas_server_omp.c +++ b/driver/others/blas_server_omp.c @@ -48,6 +48,21 @@ #else +#ifndef likely +#ifdef __GNUC__ +#define likely(x) __builtin_expect(!!(x), 1) +#else +#define likely(x) (x) +#endif +#endif +#ifndef unlikely +#ifdef __GNUC__ +#define unlikely(x) __builtin_expect(!!(x), 0) +#else +#define unlikely(x) (x) +#endif +#endif + #ifndef OMP_SCHED #define OMP_SCHED static #endif @@ -350,6 +365,9 @@ static void exec_threads(blas_queue_t *queue, int buf_index){ int exec_blas(BLASLONG num, blas_queue_t *queue){ + // Handle lazy re-init of the thread-pool after a POSIX fork + if (unlikely(blas_server_avail == 0)) blas_thread_init(); + BLASLONG i, buf_index; if ((num <= 0) || (queue == NULL)) return 0; From d2333e784224ba19f01659210d2aaab04b43d45c Mon Sep 17 00:00:00 2001 From: User User-User Date: Sat, 3 Oct 2020 18:00:34 +0300 Subject: [PATCH 11/11] aarch64 fix std=c18 compilation --- common.h | 2 +- driver/others/dynamic_arm64.c | 2 +- kernel/arm64/daxpy_thunderx.c | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/common.h b/common.h index adc162536..ac12dd6d8 100644 --- a/common.h +++ b/common.h @@ -352,7 +352,7 @@ typedef int blasint; #endif #if defined(ARMV7) || defined(ARMV6) || defined(ARMV8) || defined(ARMV5) -#define YIELDING asm volatile ("nop;nop;nop;nop;nop;nop;nop;nop; \n"); +#define YIELDING __asm__ __volatile__ ("nop;nop;nop;nop;nop;nop;nop;nop; \n"); #endif #ifdef BULLDOZER diff --git a/driver/others/dynamic_arm64.c b/driver/others/dynamic_arm64.c index 157b03365..be22b247c 100644 --- a/driver/others/dynamic_arm64.c +++ b/driver/others/dynamic_arm64.c @@ -68,7 +68,7 @@ extern void openblas_warning(int verbose, const char * msg); #endif #define get_cpu_ftr(id, var) ({ \ - asm("mrs %0, "#id : "=r" (var)); \ + __asm__("mrs %0, "#id : "=r" (var)); \ }) static char *corename[] = { diff --git a/kernel/arm64/daxpy_thunderx.c b/kernel/arm64/daxpy_thunderx.c index 37aae9391..f44f9d4e5 100644 --- a/kernel/arm64/daxpy_thunderx.c +++ b/kernel/arm64/daxpy_thunderx.c @@ -62,7 +62,7 @@ static void daxpy_kernel_8(BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha) y5 = a * x[5] + y[5]; y6 = a * x[6] + y[6]; y7 = a * x[7] + y[7]; - asm("":"+w"(y0),"+w"(y1),"+w"(y2),"+w"(y3),"+w"(y4),"+w"(y5),"+w"(y6),"+w"(y7)); + __asm__("":"+w"(y0),"+w"(y1),"+w"(y2),"+w"(y3),"+w"(y4),"+w"(y5),"+w"(y6),"+w"(y7)); y[0] = y0; y[1] = y1; y[2] = y2; @@ -74,7 +74,7 @@ static void daxpy_kernel_8(BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha) xx = (x + 4*128/sizeof(*x)); yy = (y + 4*128/sizeof(*y)); - asm("":"+r"(yy)::"memory"); + __asm__("":"+r"(yy)::"memory"); prefetch(xx); prefetch(yy);