diff --git a/README.md b/README.md index 6d44129c2..ca034e747 100644 --- a/README.md +++ b/README.md @@ -174,18 +174,18 @@ Please read `GotoBLAS_01Readme.txt` for older CPU models already supported by th ### Support for multiple targets in a single library -OpenBLAS can be built for multiple targets with runtime detection of the target cpu by specifiying DYNAMIC_ARCH=1 in Makefile.rule, on the gmake command line or as -DDYNAMIC_ARCH=TRUE in cmake. +OpenBLAS can be built for multiple targets with runtime detection of the target cpu by specifiying `DYNAMIC_ARCH=1` in Makefile.rule, on the gmake command line or as `-DDYNAMIC_ARCH=TRUE` in cmake. -For **x86_64**, the list of targets this activates contains Prescott, Core2, Nehalem, Barcelona, Sandybridge, Bulldozer, Piledriver, Steamroller, Excavator, Haswell, Zen, SkylakeX. For cpu generations not included in this list, the corresponding older model is used. If you also specify DYNAMIC_OLDER=1, specific support for Penryn, Dunnington, Opteron, Opteron/SSE3, Bobcat, Atom and Nano is added. Finally there is an option DYNAMIC_LIST that allows to specify an individual list of targets to include instead of the default. +For **x86_64**, the list of targets this activates contains Prescott, Core2, Nehalem, Barcelona, Sandybridge, Bulldozer, Piledriver, Steamroller, Excavator, Haswell, Zen, SkylakeX. For cpu generations not included in this list, the corresponding older model is used. If you also specify `DYNAMIC_OLDER=1`, specific support for Penryn, Dunnington, Opteron, Opteron/SSE3, Bobcat, Atom and Nano is added. Finally there is an option `DYNAMIC_LIST` that allows to specify an individual list of targets to include instead of the default. -DYNAMIC_ARCH is also supported on **x86**, where it translates to Katmai, Coppermine, Northwood, Prescott, Banias, +`DYNAMIC_ARCH` is also supported on **x86**, where it translates to Katmai, Coppermine, Northwood, Prescott, Banias, Core2, Penryn, Dunnington, Nehalem, Athlon, Opteron, Opteron_SSE3, Barcelona, Bobcat, Atom and Nano. On **ARMV8**, it enables support for CortexA53, CortexA57, CortexA72, CortexA73, Falkor, ThunderX, ThunderX2T99, TSV110 as well as generic ARMV8 cpus. For **POWER**, the list encompasses POWER6, POWER8 and POWER9, on **ZARCH** it comprises Z13 and Z14. -The TARGET option can be used in conjunction with DYNAMIC_ARCH=1 to specify which cpu model should be assumed for all the +The `TARGET` option can be used in conjunction with `DYNAMIC_ARCH=1` to specify which cpu model should be assumed for all the common code in the library, usually you will want to set this to the oldest model you expect to encounter. Please note that it is not possible to combine support for different architectures, so no combined 32 and 64 bit or x86_64 and arm64 in the same library. diff --git a/benchmark/amax.c b/benchmark/amax.c index 32f55ce83..19ae95c8b 100644 --- a/benchmark/amax.c +++ b/benchmark/amax.c @@ -146,7 +146,7 @@ int main(int argc, char *argv[]){ fprintf(stderr,"Out of Memory!!\n");exit(1); } -#ifdef linux +#ifdef __linux srandom(getpid()); #endif diff --git a/benchmark/amin.c b/benchmark/amin.c index 218f0ea9f..d0cadbd3b 100644 --- a/benchmark/amin.c +++ b/benchmark/amin.c @@ -145,7 +145,7 @@ int main(int argc, char *argv[]){ fprintf(stderr,"Out of Memory!!\n");exit(1); } -#ifdef linux +#ifdef __linux srandom(getpid()); #endif diff --git a/benchmark/asum.c b/benchmark/asum.c index e3d16acfd..bcccd9089 100644 --- a/benchmark/asum.c +++ b/benchmark/asum.c @@ -152,7 +152,7 @@ int main(int argc, char *argv[]){ } -#ifdef linux +#ifdef __linux srandom(getpid()); #endif diff --git a/benchmark/axpby.c b/benchmark/axpby.c index 3b3dd9979..793ee7e40 100644 --- a/benchmark/axpby.c +++ b/benchmark/axpby.c @@ -152,7 +152,7 @@ int main(int argc, char *argv[]){ fprintf(stderr,"Out of Memory!!\n");exit(1); } -#ifdef linux +#ifdef __linux srandom(getpid()); #endif diff --git a/benchmark/axpy.c b/benchmark/axpy.c index e40f93c70..760703c1d 100644 --- a/benchmark/axpy.c +++ b/benchmark/axpy.c @@ -151,7 +151,7 @@ int main(int argc, char *argv[]){ fprintf(stderr,"Out of Memory!!\n");exit(1); } -#ifdef linux +#ifdef __linux srandom(getpid()); #endif diff --git a/benchmark/copy.c b/benchmark/copy.c index d7f58c94f..eb5148fff 100644 --- a/benchmark/copy.c +++ b/benchmark/copy.c @@ -154,7 +154,7 @@ int main(int argc, char *argv[]){ fprintf(stderr,"Out of Memory!!\n");exit(1); } -#ifdef linux +#ifdef __linux srandom(getpid()); #endif diff --git a/benchmark/dot.c b/benchmark/dot.c index 50d05e532..aae3c04b0 100644 --- a/benchmark/dot.c +++ b/benchmark/dot.c @@ -145,7 +145,7 @@ int main(int argc, char *argv[]){ fprintf(stderr,"Out of Memory!!\n");exit(1); } -#ifdef linux +#ifdef __linux srandom(getpid()); #endif diff --git a/benchmark/geev.c b/benchmark/geev.c index ef9271220..4fd2c8d6f 100644 --- a/benchmark/geev.c +++ b/benchmark/geev.c @@ -214,7 +214,7 @@ int main(int argc, char *argv[]){ } -#ifdef linux +#ifdef __linux srandom(getpid()); #endif diff --git a/benchmark/gemm.c b/benchmark/gemm.c index d2235330b..84dd292c5 100644 --- a/benchmark/gemm.c +++ b/benchmark/gemm.c @@ -197,7 +197,7 @@ int main(int argc, char *argv[]){ fprintf(stderr,"Out of Memory!!\n");exit(1); } -#ifdef linux +#ifdef __linux srandom(getpid()); #endif diff --git a/benchmark/gemm3m.c b/benchmark/gemm3m.c index f4048c436..98c13e1be 100644 --- a/benchmark/gemm3m.c +++ b/benchmark/gemm3m.c @@ -163,7 +163,7 @@ int main(int argc, char *argv[]){ loops = atoi(p); -#ifdef linux +#ifdef __linux srandom(getpid()); #endif diff --git a/benchmark/gemv.c b/benchmark/gemv.c index a9dee67d2..fb1f541d3 100644 --- a/benchmark/gemv.c +++ b/benchmark/gemv.c @@ -181,7 +181,7 @@ int main(int argc, char *argv[]){ fprintf(stderr,"Out of Memory!!\n");exit(1); } -#ifdef linux +#ifdef __linux srandom(getpid()); #endif diff --git a/benchmark/ger.c b/benchmark/ger.c index ca7e94e15..d53d328f0 100644 --- a/benchmark/ger.c +++ b/benchmark/ger.c @@ -165,7 +165,7 @@ int main(int argc, char *argv[]){ fprintf(stderr,"Out of Memory!!\n");exit(1); } -#ifdef linux +#ifdef __linux srandom(getpid()); #endif diff --git a/benchmark/gesv.c b/benchmark/gesv.c index 80f644e69..057cbd243 100644 --- a/benchmark/gesv.c +++ b/benchmark/gesv.c @@ -165,7 +165,7 @@ int main(int argc, char *argv[]){ fprintf(stderr,"Out of Memory!!\n");exit(1); } -#ifdef linux +#ifdef __linux srandom(getpid()); #endif diff --git a/benchmark/getri.c b/benchmark/getri.c index e8b82a758..a07014768 100644 --- a/benchmark/getri.c +++ b/benchmark/getri.c @@ -188,7 +188,7 @@ int main(int argc, char *argv[]){ } -#ifdef linux +#ifdef __linux srandom(getpid()); #endif diff --git a/benchmark/hbmv.c b/benchmark/hbmv.c index b9dcc03bb..60ba9fb89 100644 --- a/benchmark/hbmv.c +++ b/benchmark/hbmv.c @@ -158,7 +158,7 @@ int main(int argc, char *argv[]){ exit(1); } -#ifdef linux +#ifdef __linux srandom(getpid()); #endif diff --git a/benchmark/hemm.c b/benchmark/hemm.c index 2fe0f5c5f..2bc165458 100644 --- a/benchmark/hemm.c +++ b/benchmark/hemm.c @@ -151,7 +151,7 @@ int main(int argc, char *argv[]){ -#ifdef linux +#ifdef __linux srandom(getpid()); #endif diff --git a/benchmark/hemv.c b/benchmark/hemv.c index b6ff512ce..98618a04e 100644 --- a/benchmark/hemv.c +++ b/benchmark/hemv.c @@ -152,7 +152,7 @@ int main(int argc, char *argv[]){ fprintf(stderr,"Out of Memory!!\n");exit(1); } -#ifdef linux +#ifdef __linux srandom(getpid()); #endif diff --git a/benchmark/her.c b/benchmark/her.c index f4e10b684..010f8120d 100644 --- a/benchmark/her.c +++ b/benchmark/her.c @@ -149,7 +149,7 @@ int main(int argc, char *argv[]){ -#ifdef linux +#ifdef __linux srandom(getpid()); #endif diff --git a/benchmark/her2.c b/benchmark/her2.c index e10b7e98e..0f80f3ed9 100644 --- a/benchmark/her2.c +++ b/benchmark/her2.c @@ -151,7 +151,7 @@ int main(int argc, char *argv[]){ fprintf(stderr,"Out of Memory!!\n");exit(1); } -#ifdef linux +#ifdef __linux srandom(getpid()); #endif diff --git a/benchmark/her2k.c b/benchmark/her2k.c index a0772feff..021873beb 100644 --- a/benchmark/her2k.c +++ b/benchmark/her2k.c @@ -150,7 +150,7 @@ int main(int argc, char *argv[]){ -#ifdef linux +#ifdef __linux srandom(getpid()); #endif diff --git a/benchmark/herk.c b/benchmark/herk.c index eed8ed738..c09d35c1f 100644 --- a/benchmark/herk.c +++ b/benchmark/herk.c @@ -149,7 +149,7 @@ int main(int argc, char *argv[]){ -#ifdef linux +#ifdef __linux srandom(getpid()); #endif diff --git a/benchmark/hpmv.c b/benchmark/hpmv.c index 6e6634fcf..b0157094e 100644 --- a/benchmark/hpmv.c +++ b/benchmark/hpmv.c @@ -155,7 +155,7 @@ int main(int argc, char *argv[]){ exit(1); } -#ifdef linux +#ifdef __linux srandom(getpid()); #endif diff --git a/benchmark/iamax.c b/benchmark/iamax.c index 736f02b89..c87044ab4 100644 --- a/benchmark/iamax.c +++ b/benchmark/iamax.c @@ -145,7 +145,7 @@ int main(int argc, char *argv[]){ fprintf(stderr,"Out of Memory!!\n");exit(1); } -#ifdef linux +#ifdef __linux srandom(getpid()); #endif diff --git a/benchmark/iamin.c b/benchmark/iamin.c index b2c779811..e7c8e59e4 100644 --- a/benchmark/iamin.c +++ b/benchmark/iamin.c @@ -145,7 +145,7 @@ int main(int argc, char *argv[]){ fprintf(stderr,"Out of Memory!!\n");exit(1); } -#ifdef linux +#ifdef __linux srandom(getpid()); #endif diff --git a/benchmark/imax.c b/benchmark/imax.c index c7060af84..b56ef64ba 100644 --- a/benchmark/imax.c +++ b/benchmark/imax.c @@ -139,7 +139,7 @@ int main(int argc, char *argv[]){ fprintf(stderr,"Out of Memory!!\n");exit(1); } -#ifdef linux +#ifdef __linux srandom(getpid()); #endif diff --git a/benchmark/imin.c b/benchmark/imin.c index f8bdc2537..4a92c8bd0 100644 --- a/benchmark/imin.c +++ b/benchmark/imin.c @@ -139,7 +139,7 @@ int main(int argc, char *argv[]){ fprintf(stderr,"Out of Memory!!\n");exit(1); } -#ifdef linux +#ifdef __linux srandom(getpid()); #endif diff --git a/benchmark/linpack.c b/benchmark/linpack.c index e4b20e99d..661a44175 100644 --- a/benchmark/linpack.c +++ b/benchmark/linpack.c @@ -174,7 +174,7 @@ int main(int argc, char *argv[]){ fprintf(stderr,"Out of Memory!!\n");exit(1); } -#ifdef linux +#ifdef __linux srandom(getpid()); #endif diff --git a/benchmark/max.c b/benchmark/max.c index 2fa6e5a14..a19a386a2 100644 --- a/benchmark/max.c +++ b/benchmark/max.c @@ -139,7 +139,7 @@ int main(int argc, char *argv[]){ fprintf(stderr,"Out of Memory!!\n");exit(1); } -#ifdef linux +#ifdef __linux srandom(getpid()); #endif diff --git a/benchmark/min.c b/benchmark/min.c index 9abed8e80..4df8fb0fd 100644 --- a/benchmark/min.c +++ b/benchmark/min.c @@ -139,7 +139,7 @@ int main(int argc, char *argv[]){ fprintf(stderr,"Out of Memory!!\n");exit(1); } -#ifdef linux +#ifdef __linux srandom(getpid()); #endif diff --git a/benchmark/nrm2.c b/benchmark/nrm2.c index d3718f9e0..0f416621a 100644 --- a/benchmark/nrm2.c +++ b/benchmark/nrm2.c @@ -145,7 +145,7 @@ int main(int argc, char *argv[]){ fprintf(stderr,"Out of Memory!!\n");exit(1); } -#ifdef linux +#ifdef __linux srandom(getpid()); #endif diff --git a/benchmark/rot.c b/benchmark/rot.c index 8ec8b1d97..69698988d 100644 --- a/benchmark/rot.c +++ b/benchmark/rot.c @@ -156,7 +156,7 @@ int main(int argc, char *argv[]){ fprintf(stderr,"Out of Memory!!\n");exit(1); } -#ifdef linux +#ifdef __linux srandom(getpid()); #endif diff --git a/benchmark/rotm.c b/benchmark/rotm.c index 8dea2d08c..17c8d5416 100644 --- a/benchmark/rotm.c +++ b/benchmark/rotm.c @@ -168,7 +168,7 @@ int main(int argc, char *argv[]) exit(1); } -#ifdef linux +#ifdef __linux srandom(getpid()); #endif diff --git a/benchmark/scal.c b/benchmark/scal.c index 453c3234d..8bd62c77c 100644 --- a/benchmark/scal.c +++ b/benchmark/scal.c @@ -150,7 +150,7 @@ int main(int argc, char *argv[]){ fprintf(stderr,"Out of Memory!!\n");exit(1); } -#ifdef linux +#ifdef __linux srandom(getpid()); #endif diff --git a/benchmark/spmv.c b/benchmark/spmv.c index 2a26c9416..cff504d3b 100644 --- a/benchmark/spmv.c +++ b/benchmark/spmv.c @@ -163,7 +163,7 @@ int main(int argc, char *argv[]){ fprintf(stderr,"Out of Memory!!\n");exit(1); } -#ifdef linux +#ifdef __linux srandom(getpid()); #endif diff --git a/benchmark/spr.c b/benchmark/spr.c index c91e587b1..5dcaa4f8b 100755 --- a/benchmark/spr.c +++ b/benchmark/spr.c @@ -149,7 +149,7 @@ int main(int argc, char *argv[]){ fprintf(stderr,"Out of Memory!!\n");exit(1); } -#ifdef linux +#ifdef __linux srandom(getpid()); #endif diff --git a/benchmark/spr2.c b/benchmark/spr2.c index e8ee345d7..a5f2791f7 100755 --- a/benchmark/spr2.c +++ b/benchmark/spr2.c @@ -153,7 +153,7 @@ int main(int argc, char *argv[]){ fprintf(stderr,"Out of Memory!!\n");exit(1); } -#ifdef linux +#ifdef __linux srandom(getpid()); #endif diff --git a/benchmark/swap.c b/benchmark/swap.c index 368c59cd4..76d545995 100644 --- a/benchmark/swap.c +++ b/benchmark/swap.c @@ -151,7 +151,7 @@ int main(int argc, char *argv[]){ fprintf(stderr,"Out of Memory!!\n");exit(1); } -#ifdef linux +#ifdef __linux srandom(getpid()); #endif diff --git a/benchmark/symm.c b/benchmark/symm.c index b979e8d51..bb9849eb5 100644 --- a/benchmark/symm.c +++ b/benchmark/symm.c @@ -162,7 +162,7 @@ int main(int argc, char *argv[]){ -#ifdef linux +#ifdef __linux srandom(getpid()); #endif diff --git a/benchmark/symv.c b/benchmark/symv.c index 789c3560f..e4c892b5a 100644 --- a/benchmark/symv.c +++ b/benchmark/symv.c @@ -162,7 +162,7 @@ int main(int argc, char *argv[]){ fprintf(stderr,"Out of Memory!!\n");exit(1); } -#ifdef linux +#ifdef __linux srandom(getpid()); #endif diff --git a/benchmark/syr.c b/benchmark/syr.c index 458bc6edb..a9dd293e6 100644 --- a/benchmark/syr.c +++ b/benchmark/syr.c @@ -144,7 +144,7 @@ int main(int argc, char *argv[]){ fprintf(stderr,"Out of Memory!!\n");exit(1); } -#ifdef linux +#ifdef __linux srandom(getpid()); #endif diff --git a/benchmark/syr2.c b/benchmark/syr2.c index 0129dd09a..9efbca315 100644 --- a/benchmark/syr2.c +++ b/benchmark/syr2.c @@ -150,7 +150,7 @@ int main(int argc, char *argv[]){ -#ifdef linux +#ifdef __linux srandom(getpid()); #endif diff --git a/benchmark/syr2k.c b/benchmark/syr2k.c index b1fcd8a18..a906559eb 100644 --- a/benchmark/syr2k.c +++ b/benchmark/syr2k.c @@ -162,7 +162,7 @@ int main(int argc, char *argv[]){ -#ifdef linux +#ifdef __linux srandom(getpid()); #endif diff --git a/benchmark/syrk.c b/benchmark/syrk.c index 95625a6c4..0fbb943f6 100644 --- a/benchmark/syrk.c +++ b/benchmark/syrk.c @@ -159,7 +159,7 @@ int main(int argc, char *argv[]){ -#ifdef linux +#ifdef __linux srandom(getpid()); #endif diff --git a/benchmark/tpmv.c b/benchmark/tpmv.c index ee5b97f24..fe9d07534 100644 --- a/benchmark/tpmv.c +++ b/benchmark/tpmv.c @@ -132,7 +132,7 @@ int main(int argc, char *argv[]) fprintf(stderr,"Out of Memory!!\n");exit(1); } -#ifdef linux +#ifdef __linux srandom(getpid()); #endif diff --git a/benchmark/tpsv.c b/benchmark/tpsv.c index 46d78fd17..8472ac261 100644 --- a/benchmark/tpsv.c +++ b/benchmark/tpsv.c @@ -132,7 +132,7 @@ int main(int argc, char *argv[]) fprintf(stderr,"Out of Memory!!\n");exit(1); } -#ifdef linux +#ifdef __linux srandom(getpid()); #endif diff --git a/benchmark/trmm.c b/benchmark/trmm.c index e095b85ee..23af122b4 100644 --- a/benchmark/trmm.c +++ b/benchmark/trmm.c @@ -162,7 +162,7 @@ int main(int argc, char *argv[]){ -#ifdef linux +#ifdef __linux srandom(getpid()); #endif diff --git a/benchmark/trmv.c b/benchmark/trmv.c index f5a5fe31a..46641b3e4 100644 --- a/benchmark/trmv.c +++ b/benchmark/trmv.c @@ -132,7 +132,7 @@ int main(int argc, char *argv[]) fprintf(stderr,"Out of Memory!!\n");exit(1); } -#ifdef linux +#ifdef __linux srandom(getpid()); #endif diff --git a/benchmark/trsm.c b/benchmark/trsm.c index 6ce1d532c..17676946a 100644 --- a/benchmark/trsm.c +++ b/benchmark/trsm.c @@ -172,7 +172,7 @@ int main(int argc, char *argv[]){ -#ifdef linux +#ifdef __linux srandom(getpid()); #endif diff --git a/benchmark/trsv.c b/benchmark/trsv.c index c60890de4..1734e2adb 100644 --- a/benchmark/trsv.c +++ b/benchmark/trsv.c @@ -159,7 +159,7 @@ int main(int argc, char *argv[]){ uplo,diag,loops); -#ifdef linux +#ifdef __linux srandom(getpid()); #endif diff --git a/benchmark/zdot-intel.c b/benchmark/zdot-intel.c index bb2c40f38..ba1515365 100644 --- a/benchmark/zdot-intel.c +++ b/benchmark/zdot-intel.c @@ -146,7 +146,7 @@ int main(int argc, char *argv[]){ fprintf(stderr,"Out of Memory!!\n");exit(1); } -#ifdef linux +#ifdef __linux srandom(getpid()); #endif diff --git a/benchmark/zdot.c b/benchmark/zdot.c index 136135c9c..fa624e859 100644 --- a/benchmark/zdot.c +++ b/benchmark/zdot.c @@ -145,7 +145,7 @@ int main(int argc, char *argv[]){ fprintf(stderr,"Out of Memory!!\n");exit(1); } -#ifdef linux +#ifdef __linux srandom(getpid()); #endif diff --git a/common.h b/common.h index adc162536..ac12dd6d8 100644 --- a/common.h +++ b/common.h @@ -352,7 +352,7 @@ typedef int blasint; #endif #if defined(ARMV7) || defined(ARMV6) || defined(ARMV8) || defined(ARMV5) -#define YIELDING asm volatile ("nop;nop;nop;nop;nop;nop;nop;nop; \n"); +#define YIELDING __asm__ __volatile__ ("nop;nop;nop;nop;nop;nop;nop;nop; \n"); #endif #ifdef BULLDOZER diff --git a/cpuid_arm.c b/cpuid_arm.c index 19aa90718..a3b1dfd33 100644 --- a/cpuid_arm.c +++ b/cpuid_arm.c @@ -54,7 +54,7 @@ static char *cpuname_lower[] = { int get_feature(char *search) { -#ifdef linux +#ifdef __linux FILE *infile; char buffer[2048], *p,*t; p = (char *) NULL ; @@ -90,7 +90,7 @@ int get_feature(char *search) int detect(void) { -#ifdef linux +#ifdef __linux FILE *infile; char buffer[512], *p; @@ -289,7 +289,7 @@ void get_libname(void) void get_features(void) { -#ifdef linux +#ifdef __linux FILE *infile; char buffer[2048], *p,*t; p = (char *) NULL ; diff --git a/cpuid_arm64.c b/cpuid_arm64.c index a0d3e15b9..ae150ef1b 100644 --- a/cpuid_arm64.c +++ b/cpuid_arm64.c @@ -90,7 +90,7 @@ static char *cpuname_lower[] = { int get_feature(char *search) { -#ifdef linux +#ifdef __linux FILE *infile; char buffer[2048], *p,*t; p = (char *) NULL ; @@ -126,7 +126,7 @@ int get_feature(char *search) int detect(void) { -#ifdef linux +#ifdef __linux FILE *infile; char buffer[512], *p, *cpu_part = NULL, *cpu_implementer = NULL; @@ -242,7 +242,7 @@ void get_cpucount(void) { int n=0; -#ifdef linux +#ifdef __linux FILE *infile; char buffer[2048], *p,*t; p = (char *) NULL ; @@ -441,7 +441,7 @@ void get_libname(void) void get_features(void) { -#ifdef linux +#ifdef __linux FILE *infile; char buffer[2048], *p,*t; p = (char *) NULL ; diff --git a/cpuid_mips.c b/cpuid_mips.c index 3a2e12393..e6e837f73 100644 --- a/cpuid_mips.c +++ b/cpuid_mips.c @@ -84,7 +84,7 @@ static char *cpuname[] = { int detect(void){ -#ifdef linux +#ifdef __linux FILE *infile; char buffer[512], *p; diff --git a/cpuid_mips64.c b/cpuid_mips64.c index 0e32bfc0b..0c19ac1e7 100644 --- a/cpuid_mips64.c +++ b/cpuid_mips64.c @@ -90,7 +90,7 @@ static char *cpuname[] = { int detect(void){ -#ifdef linux +#ifdef __linux FILE *infile; char buffer[512], *p; diff --git a/cpuid_power.c b/cpuid_power.c index b17493bc8..2526e8d0e 100644 --- a/cpuid_power.c +++ b/cpuid_power.c @@ -104,7 +104,7 @@ char *corename[] = { int detect(void){ -#ifdef linux +#ifdef __linux FILE *infile; char buffer[512], *p; @@ -214,6 +214,8 @@ switch ( id >> 16 ) { return CPUTYPE_UNKNOWN; } #endif + + return CPUTYPE_UNKNOWN; } void get_architecture(void){ diff --git a/driver/others/blas_server_omp.c b/driver/others/blas_server_omp.c index d126955e4..da0a5674a 100644 --- a/driver/others/blas_server_omp.c +++ b/driver/others/blas_server_omp.c @@ -48,6 +48,21 @@ #else +#ifndef likely +#ifdef __GNUC__ +#define likely(x) __builtin_expect(!!(x), 1) +#else +#define likely(x) (x) +#endif +#endif +#ifndef unlikely +#ifdef __GNUC__ +#define unlikely(x) __builtin_expect(!!(x), 0) +#else +#define unlikely(x) (x) +#endif +#endif + #ifndef OMP_SCHED #define OMP_SCHED static #endif @@ -350,6 +365,9 @@ static void exec_threads(blas_queue_t *queue, int buf_index){ int exec_blas(BLASLONG num, blas_queue_t *queue){ + // Handle lazy re-init of the thread-pool after a POSIX fork + if (unlikely(blas_server_avail == 0)) blas_thread_init(); + BLASLONG i, buf_index; if ((num <= 0) || (queue == NULL)) return 0; diff --git a/driver/others/dynamic_arm64.c b/driver/others/dynamic_arm64.c index 157b03365..be22b247c 100644 --- a/driver/others/dynamic_arm64.c +++ b/driver/others/dynamic_arm64.c @@ -68,7 +68,7 @@ extern void openblas_warning(int verbose, const char * msg); #endif #define get_cpu_ftr(id, var) ({ \ - asm("mrs %0, "#id : "=r" (var)); \ + __asm__("mrs %0, "#id : "=r" (var)); \ }) static char *corename[] = { diff --git a/driver/others/memory.c b/driver/others/memory.c index 9b6c226a1..91cfefbd7 100644 --- a/driver/others/memory.c +++ b/driver/others/memory.c @@ -80,7 +80,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #undef COMPILE_TLS #endif -#if defined(__GLIBC_PREREQ) +#if defined(__GLIBC_PREREQ) #if !__GLIBC_PREREQ(2,20) #undef COMPILE_TLS #endif @@ -161,7 +161,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #if defined(OS_WINDOWS) && (defined(__MINGW32__) || defined(__MINGW64__)) #include #undef printf -#define printf _cprintf +#define printf _cprintf #endif #ifdef OS_LINUX @@ -190,14 +190,14 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define CONSTRUCTOR __cdecl #define DESTRUCTOR __cdecl #elif (defined(OS_DARWIN) || defined(OS_SUNOS)) && defined(C_GCC) -#define CONSTRUCTOR __attribute__ ((constructor)) -#define DESTRUCTOR __attribute__ ((destructor)) +#define CONSTRUCTOR __attribute__ ((constructor)) +#define DESTRUCTOR __attribute__ ((destructor)) #elif __GNUC__ && INIT_PRIORITY && ((GCC_VERSION >= 40300) || (CLANG_VERSION >= 20900)) -#define CONSTRUCTOR __attribute__ ((constructor(101))) -#define DESTRUCTOR __attribute__ ((destructor(101))) +#define CONSTRUCTOR __attribute__ ((constructor(101))) +#define DESTRUCTOR __attribute__ ((destructor(101))) #else -#define CONSTRUCTOR __attribute__ ((constructor)) -#define DESTRUCTOR __attribute__ ((destructor)) +#define CONSTRUCTOR __attribute__ ((constructor)) +#define DESTRUCTOR __attribute__ ((destructor)) #endif #ifdef DYNAMIC_ARCH @@ -272,7 +272,7 @@ int get_num_procs(void) { return nums; } ret = CPU_COUNT_S(size,cpusetp); - if (ret > 0 && ret < nums) nums = ret; + if (ret > 0 && ret < nums) nums = ret; CPU_FREE(cpusetp); return nums; } else { @@ -281,7 +281,7 @@ int get_num_procs(void) { return nums; } ret = CPU_COUNT(&cpuset); - if (ret > 0 && ret < nums) nums = ret; + if (ret > 0 && ret < nums) nums = ret; return nums; } #endif @@ -628,12 +628,12 @@ static void *alloc_mmap(void *address){ if (address){ map_address = mmap(address, - allocation_block_size, - MMAP_ACCESS, MMAP_POLICY | MAP_FIXED, -1, 0); + allocation_block_size, + MMAP_ACCESS, MMAP_POLICY | MAP_FIXED, -1, 0); } else { map_address = mmap(address, - allocation_block_size, - MMAP_ACCESS, MMAP_POLICY, -1, 0); + allocation_block_size, + MMAP_ACCESS, MMAP_POLICY, -1, 0); } STORE_RELEASE_FUNC(map_address, alloc_mmap_free); @@ -648,7 +648,7 @@ static void *alloc_mmap(void *address){ #else #define BENCH_ITERATION 4 -#define SCALING 2 +#define SCALING 2 static inline BLASULONG run_bench(BLASULONG address, BLASULONG size) { @@ -711,60 +711,60 @@ static void *alloc_mmap(void *address){ #endif map_address = mmap(NULL, allocation_block_size * SCALING, - MMAP_ACCESS, MMAP_POLICY, -1, 0); + MMAP_ACCESS, MMAP_POLICY, -1, 0); if (map_address != (void *)-1) { #ifdef OS_LINUX #ifdef DEBUG - int ret=0; - ret=my_mbind(map_address, allocation_block_size * SCALING, MPOL_PREFERRED, NULL, 0, 0); - if(ret==-1){ - int errsv=errno; - perror("OpenBLAS alloc_mmap:"); - printf("error code=%d,\tmap_address=%lx\n",errsv,map_address); - } + int ret=0; + ret=my_mbind(map_address, allocation_block_size * SCALING, MPOL_PREFERRED, NULL, 0, 0); + if(ret==-1){ + int errsv=errno; + perror("OpenBLAS alloc_mmap:"); + printf("error code=%d,\tmap_address=%lx\n",errsv,map_address); + } #else - my_mbind(map_address, allocation_block_size * SCALING, MPOL_PREFERRED, NULL, 0, 0); + my_mbind(map_address, allocation_block_size * SCALING, MPOL_PREFERRED, NULL, 0, 0); #endif #endif - allocsize = DGEMM_P * DGEMM_Q * sizeof(double); + allocsize = DGEMM_P * DGEMM_Q * sizeof(double); - start = (BLASULONG)map_address; - current = (SCALING - 1) * allocation_block_size; - original = current; + start = (BLASULONG)map_address; + current = (SCALING - 1) * allocation_block_size; + original = current; - while(current > 0 && current <= original) { - *(BLASLONG *)start = (BLASLONG)start + PAGESIZE; - start += PAGESIZE; - current -= PAGESIZE; - } + while(current > 0 && current <= original) { + *(BLASLONG *)start = (BLASLONG)start + PAGESIZE; + start += PAGESIZE; + current -= PAGESIZE; + } - *(BLASLONG *)(start - PAGESIZE) = (BLASULONG)map_address; + *(BLASLONG *)(start - PAGESIZE) = (BLASULONG)map_address; - start = (BLASULONG)map_address; + start = (BLASULONG)map_address; - best = (BLASULONG)-1; - best_address = map_address; + best = (BLASULONG)-1; + best_address = map_address; - while ((start + allocsize < (BLASULONG)map_address + (SCALING - 1) * allocation_block_size)) { + while ((start + allocsize < (BLASULONG)map_address + (SCALING - 1) * allocation_block_size)) { - current = run_bench(start, allocsize); + current = run_bench(start, allocsize); - if (best > current) { - best = current; - best_address = (void *)start; - } + if (best > current) { + best = current; + best_address = (void *)start; + } - start += PAGESIZE; + start += PAGESIZE; - } + } if ((BLASULONG)best_address > (BLASULONG)map_address) - munmap(map_address, (BLASULONG)best_address - (BLASULONG)map_address); + munmap(map_address, (BLASULONG)best_address - (BLASULONG)map_address); munmap((void *)((BLASULONG)best_address + allocation_block_size), (SCALING - 1) * allocation_block_size + (BLASULONG)map_address - (BLASULONG)best_address); @@ -854,9 +854,9 @@ static void *alloc_windows(void *address){ void *map_address; map_address = VirtualAlloc(address, - allocation_block_size, - MEM_RESERVE | MEM_COMMIT, - PAGE_READWRITE); + allocation_block_size, + MEM_RESERVE | MEM_COMMIT, + PAGE_READWRITE); if (map_address == (void *)NULL) map_address = (void *)-1; @@ -897,9 +897,9 @@ static void *alloc_devicedirver(void *address){ } map_address = mmap(address, allocation_block_size, - PROT_READ | PROT_WRITE, - MAP_FILE | MAP_SHARED, - fd, 0); + PROT_READ | PROT_WRITE, + MAP_FILE | MAP_SHARED, + fd, 0); STORE_RELEASE_FUNC_WITH_ATTR(map_address, alloc_devicedirver_free, fd); @@ -974,12 +974,12 @@ static void *alloc_hugetlb(void *address){ shmid = shmget(IPC_PRIVATE, allocation_block_size, #ifdef OS_LINUX - SHM_HUGETLB | + SHM_HUGETLB | #endif #ifdef OS_AIX - SHM_LGPAGE | SHM_PIN | + SHM_LGPAGE | SHM_PIN | #endif - IPC_CREAT | SHM_R | SHM_W); + IPC_CREAT | SHM_R | SHM_W); if (shmid != -1) { map_address = (void *)shmat(shmid, address, SHM_RND); @@ -1026,9 +1026,9 @@ static void *alloc_hugetlb(void *address){ } map_address = (void *)VirtualAlloc(address, - allocation_block_size, - MEM_LARGE_PAGES | MEM_RESERVE | MEM_COMMIT, - PAGE_READWRITE); + allocation_block_size, + MEM_LARGE_PAGES | MEM_RESERVE | MEM_COMMIT, + PAGE_READWRITE); tp.Privileges[0].Attributes = 0; AdjustTokenPrivileges(hToken, FALSE, &tp, 0, NULL, NULL); @@ -1078,9 +1078,9 @@ static void *alloc_hugetlbfile(void *address){ unlink(filename); map_address = mmap(address, allocation_block_size, - PROT_READ | PROT_WRITE, - MAP_SHARED, - fd, 0); + PROT_READ | PROT_WRITE, + MAP_SHARED, + fd, 0); STORE_RELEASE_FUNC_WITH_ATTR(map_address, alloc_hugetlbfile_free, fd); @@ -1107,7 +1107,7 @@ static volatile int memory_initialized = 0; /* 1 : Level 2 functions */ /* 2 : Thread */ - static void blas_memory_cleanup(void* ptr){ +static void blas_memory_cleanup(void* ptr){ if (ptr) { struct alloc_t ** table = (struct alloc_t **)ptr; int pos; @@ -1243,27 +1243,27 @@ UNLOCK_COMMAND(&alloc_lock); while ((func != NULL) && (map_address == (void *) -1)) { - map_address = (*func)((void *)base_address); + map_address = (*func)((void *)base_address); #ifdef ALLOC_DEVICEDRIVER - if ((*func == alloc_devicedirver) && (map_address == (void *)-1)) { - fprintf(stderr, "OpenBLAS Warning ... Physically contiguous allocation failed.\n"); - } + if ((*func == alloc_devicedirver) && (map_address == (void *)-1)) { + fprintf(stderr, "OpenBLAS Warning ... Physically contiguous allocation failed.\n"); + } #endif #ifdef ALLOC_HUGETLBFILE - if ((*func == alloc_hugetlbfile) && (map_address == (void *)-1)) { + if ((*func == alloc_hugetlbfile) && (map_address == (void *)-1)) { #ifndef OS_WINDOWS - fprintf(stderr, "OpenBLAS Warning ... HugeTLB(File) allocation failed.\n"); + fprintf(stderr, "OpenBLAS Warning ... HugeTLB(File) allocation failed.\n"); #endif - } + } #endif #if (defined ALLOC_SHM) && (defined OS_LINUX || defined OS_AIX || defined __sun__ || defined OS_WINDOWS) - if ((*func == alloc_hugetlb) && (map_address != (void *)-1)) hugetlb_allocated = 1; + if ((*func == alloc_hugetlb) && (map_address != (void *)-1)) hugetlb_allocated = 1; #endif - func ++; + func ++; } #ifdef DEBUG @@ -1377,7 +1377,7 @@ static BLASULONG init_lock = 0UL; #endif static void _touch_memory(blas_arg_t *arg, BLASLONG *range_m, BLASLONG *range_n, - void *sa, void *sb, BLASLONG pos) { + void *sa, void *sb, BLASLONG pos) { #if !defined(ARCH_POWER) && !defined(ARCH_SPARC) @@ -1507,11 +1507,11 @@ void CONSTRUCTOR gotoblas_init(void) { struct rlimit curlimit; if ( getrlimit(RLIMIT_STACK, &curlimit ) == 0 ) { - if ( curlimit.rlim_cur != curlimit.rlim_max ) - { - curlimit.rlim_cur = curlimit.rlim_max; - setrlimit(RLIMIT_STACK, &curlimit); - } + if ( curlimit.rlim_cur != curlimit.rlim_max ) + { + curlimit.rlim_cur = curlimit.rlim_max; + setrlimit(RLIMIT_STACK, &curlimit); + } } #endif @@ -1545,7 +1545,7 @@ void DESTRUCTOR gotoblas_quit(void) { TlsFree(local_storage_key); #else pthread_key_delete(local_storage_key); -#endif +#endif #endif #ifdef PROFILE @@ -1605,8 +1605,8 @@ BOOL APIENTRY DllMain(HMODULE hModule, DWORD ul_reason_for_call, LPVOID lpReser */ static int on_process_term(void) { - gotoblas_quit(); - return 0; + gotoblas_quit(); + return 0; } #ifdef _WIN64 #pragma comment(linker, "/INCLUDE:_tls_used") @@ -1705,7 +1705,7 @@ void gotoblas_dummy_for_PGI(void) { #if defined(OS_WINDOWS) && (defined(__MINGW32__) || defined(__MINGW64__)) #include #undef printf -#define printf _cprintf +#define printf _cprintf #endif #ifdef OS_LINUX @@ -1734,14 +1734,14 @@ void gotoblas_dummy_for_PGI(void) { #define CONSTRUCTOR __cdecl #define DESTRUCTOR __cdecl #elif (defined(OS_DARWIN) || defined(OS_SUNOS)) && defined(C_GCC) -#define CONSTRUCTOR __attribute__ ((constructor)) -#define DESTRUCTOR __attribute__ ((destructor)) +#define CONSTRUCTOR __attribute__ ((constructor)) +#define DESTRUCTOR __attribute__ ((destructor)) #elif __GNUC__ && INIT_PRIORITY && ((GCC_VERSION >= 40300) || (CLANG_VERSION >= 20900)) -#define CONSTRUCTOR __attribute__ ((constructor(101))) -#define DESTRUCTOR __attribute__ ((destructor(101))) +#define CONSTRUCTOR __attribute__ ((constructor(101))) +#define DESTRUCTOR __attribute__ ((destructor(101))) #else -#define CONSTRUCTOR __attribute__ ((constructor)) -#define DESTRUCTOR __attribute__ ((destructor)) +#define CONSTRUCTOR __attribute__ ((constructor)) +#define DESTRUCTOR __attribute__ ((destructor)) #endif #ifdef DYNAMIC_ARCH @@ -1817,7 +1817,7 @@ int get_num_procs(void) { return nums; } ret = CPU_COUNT_S(size,cpusetp); - if (ret > 0 && ret < nums) nums = ret; + if (ret > 0 && ret < nums) nums = ret; CPU_FREE(cpusetp); return nums; } else { @@ -1826,7 +1826,7 @@ int get_num_procs(void) { return nums; } ret = CPU_COUNT(&cpuset); - if (ret > 0 && ret < nums) nums = ret; + if (ret > 0 && ret < nums) nums = ret; return nums; } #endif @@ -2083,26 +2083,26 @@ static void *alloc_mmap(void *address){ if (address){ map_address = mmap(address, - BUFFER_SIZE, - MMAP_ACCESS, MMAP_POLICY | MAP_FIXED, -1, 0); + BUFFER_SIZE, + MMAP_ACCESS, MMAP_POLICY | MAP_FIXED, -1, 0); } else { map_address = mmap(address, - BUFFER_SIZE, - MMAP_ACCESS, MMAP_POLICY, -1, 0); + BUFFER_SIZE, + MMAP_ACCESS, MMAP_POLICY, -1, 0); } if (map_address != (void *)-1) { #if (defined(SMP) || defined(USE_LOCKING)) && !defined(USE_OPENMP) LOCK_COMMAND(&alloc_lock); -#endif +#endif release_info[release_pos].address = map_address; release_info[release_pos].func = alloc_mmap_free; release_pos ++; #if (defined(SMP) || defined(USE_LOCKING)) && !defined(USE_OPENMP) UNLOCK_COMMAND(&alloc_lock); -#endif +#endif } else { -#ifdef DEBUG +#ifdef DEBUG int errsv=errno; perror("OpenBLAS : mmap failed:"); printf("error code=%d,\tmap_address=%lx\n",errsv,map_address); @@ -2119,7 +2119,7 @@ static void *alloc_mmap(void *address){ #else #define BENCH_ITERATION 4 -#define SCALING 2 +#define SCALING 2 static inline BLASULONG run_bench(BLASULONG address, BLASULONG size) { @@ -2182,59 +2182,59 @@ static void *alloc_mmap(void *address){ #endif map_address = mmap(NULL, BUFFER_SIZE * SCALING, - MMAP_ACCESS, MMAP_POLICY, -1, 0); + MMAP_ACCESS, MMAP_POLICY, -1, 0); if (map_address != (void *)-1) { #ifdef OS_LINUX #ifdef DEBUG - int ret=0; - ret=my_mbind(map_address, BUFFER_SIZE * SCALING, MPOL_PREFERRED, NULL, 0, 0); - if(ret==-1){ - int errsv=errno; - perror("OpenBLAS alloc_mmap:"); - printf("error code=%d,\tmap_address=%lx\n",errsv,map_address); - } + int ret=0; + ret=my_mbind(map_address, BUFFER_SIZE * SCALING, MPOL_PREFERRED, NULL, 0, 0); + if(ret==-1){ + int errsv=errno; + perror("OpenBLAS alloc_mmap:"); + printf("error code=%d,\tmap_address=%lx\n",errsv,map_address); + } #else - my_mbind(map_address, BUFFER_SIZE * SCALING, MPOL_PREFERRED, NULL, 0, 0); + my_mbind(map_address, BUFFER_SIZE * SCALING, MPOL_PREFERRED, NULL, 0, 0); #endif #endif - allocsize = DGEMM_P * DGEMM_Q * sizeof(double); + allocsize = DGEMM_P * DGEMM_Q * sizeof(double); - start = (BLASULONG)map_address; - current = (SCALING - 1) * BUFFER_SIZE; + start = (BLASULONG)map_address; + current = (SCALING - 1) * BUFFER_SIZE; - while(current > 0) { - *(BLASLONG *)start = (BLASLONG)start + PAGESIZE; - start += PAGESIZE; - current -= PAGESIZE; - } + while(current > 0) { + *(BLASLONG *)start = (BLASLONG)start + PAGESIZE; + start += PAGESIZE; + current -= PAGESIZE; + } - *(BLASLONG *)(start - PAGESIZE) = (BLASULONG)map_address; + *(BLASLONG *)(start - PAGESIZE) = (BLASULONG)map_address; - start = (BLASULONG)map_address; + start = (BLASULONG)map_address; - best = (BLASULONG)-1; - best_address = map_address; + best = (BLASULONG)-1; + best_address = map_address; - while ((start + allocsize < (BLASULONG)map_address + (SCALING - 1) * BUFFER_SIZE)) { + while ((start + allocsize < (BLASULONG)map_address + (SCALING - 1) * BUFFER_SIZE)) { - current = run_bench(start, allocsize); + current = run_bench(start, allocsize); - if (best > current) { - best = current; - best_address = (void *)start; - } + if (best > current) { + best = current; + best_address = (void *)start; + } - start += PAGESIZE; + start += PAGESIZE; - } + } if ((BLASULONG)best_address > (BLASULONG)map_address) - munmap(map_address, (BLASULONG)best_address - (BLASULONG)map_address); + munmap(map_address, (BLASULONG)best_address - (BLASULONG)map_address); munmap((void *)((BLASULONG)best_address + BUFFER_SIZE), (SCALING - 1) * BUFFER_SIZE + (BLASULONG)map_address - (BLASULONG)best_address); @@ -2342,9 +2342,9 @@ static void *alloc_windows(void *address){ void *map_address; map_address = VirtualAlloc(address, - BUFFER_SIZE, - MEM_RESERVE | MEM_COMMIT, - PAGE_READWRITE); + BUFFER_SIZE, + MEM_RESERVE | MEM_COMMIT, + PAGE_READWRITE); if (map_address == (void *)NULL) map_address = (void *)-1; @@ -2388,9 +2388,9 @@ static void *alloc_devicedirver(void *address){ } map_address = mmap(address, BUFFER_SIZE, - PROT_READ | PROT_WRITE, - MAP_FILE | MAP_SHARED, - fd, 0); + PROT_READ | PROT_WRITE, + MAP_FILE | MAP_SHARED, + fd, 0); if (map_address != (void *)-1) { release_info[release_pos].address = map_address; @@ -2471,12 +2471,12 @@ static void *alloc_hugetlb(void *address){ shmid = shmget(IPC_PRIVATE, BUFFER_SIZE, #ifdef OS_LINUX - SHM_HUGETLB | + SHM_HUGETLB | #endif #ifdef OS_AIX - SHM_LGPAGE | SHM_PIN | + SHM_LGPAGE | SHM_PIN | #endif - IPC_CREAT | SHM_R | SHM_W); + IPC_CREAT | SHM_R | SHM_W); if (shmid != -1) { map_address = (void *)shmat(shmid, address, SHM_RND); @@ -2511,7 +2511,7 @@ static void *alloc_hugetlb(void *address){ tp.PrivilegeCount = 1; tp.Privileges[0].Attributes = SE_PRIVILEGE_ENABLED; - + if (LookupPrivilegeValue(NULL, SE_LOCK_MEMORY_NAME, &tp.Privileges[0].Luid) != TRUE) { CloseHandle(hToken); return (void*)-1; @@ -2523,9 +2523,9 @@ static void *alloc_hugetlb(void *address){ } map_address = (void *)VirtualAlloc(address, - BUFFER_SIZE, - MEM_LARGE_PAGES | MEM_RESERVE | MEM_COMMIT, - PAGE_READWRITE); + BUFFER_SIZE, + MEM_LARGE_PAGES | MEM_RESERVE | MEM_COMMIT, + PAGE_READWRITE); tp.Privileges[0].Attributes = 0; AdjustTokenPrivileges(hToken, FALSE, &tp, 0, NULL, NULL); @@ -2578,9 +2578,9 @@ static void *alloc_hugetlbfile(void *address){ unlink(filename); map_address = mmap(address, BUFFER_SIZE, - PROT_READ | PROT_WRITE, - MAP_SHARED, - fd, 0); + PROT_READ | PROT_WRITE, + MAP_SHARED, + fd, 0); if (map_address != (void *)-1) { release_info[release_pos].address = map_address; @@ -2717,7 +2717,7 @@ void *blas_memory_alloc(int procpos){ if (!memory[position].used && (memory[position].pos == mypos)) { #if defined(SMP) && !defined(USE_OPENMP) LOCK_COMMAND(&alloc_lock); -#else +#else blas_lock(&memory[position].lock); #endif if (!memory[position].used) goto allocation; @@ -2725,7 +2725,7 @@ void *blas_memory_alloc(int procpos){ UNLOCK_COMMAND(&alloc_lock); #else blas_unlock(&memory[position].lock); -#endif +#endif } position ++; @@ -2741,22 +2741,22 @@ void *blas_memory_alloc(int procpos){ LOCK_COMMAND(&alloc_lock); #endif do { - RMB; -#if defined(USE_OPENMP) - if (!memory[position].used) { + RMB; +#if defined(USE_OPENMP) + if (!memory[position].used) { blas_lock(&memory[position].lock); #endif if (!memory[position].used) goto allocation; - + #if defined(USE_OPENMP) - blas_unlock(&memory[position].lock); + blas_unlock(&memory[position].lock); } #endif position ++; } while (position < NUM_BUFFERS); #if (defined(SMP) || defined(USE_LOCKING)) && !defined(USE_OPENMP) - UNLOCK_COMMAND(&alloc_lock); + UNLOCK_COMMAND(&alloc_lock); #endif goto error; @@ -2770,7 +2770,7 @@ void *blas_memory_alloc(int procpos){ #if (defined(SMP) || defined(USE_LOCKING)) && !defined(USE_OPENMP) UNLOCK_COMMAND(&alloc_lock); #else - blas_unlock(&memory[position].lock); + blas_unlock(&memory[position].lock); #endif if (!memory[position].addr) { do { @@ -2784,27 +2784,27 @@ void *blas_memory_alloc(int procpos){ while ((func != NULL) && (map_address == (void *) -1)) { - map_address = (*func)((void *)base_address); + map_address = (*func)((void *)base_address); #ifdef ALLOC_DEVICEDRIVER - if ((*func == alloc_devicedirver) && (map_address == (void *)-1)) { - fprintf(stderr, "OpenBLAS Warning ... Physically contiguous allocation was failed.\n"); - } + if ((*func == alloc_devicedirver) && (map_address == (void *)-1)) { + fprintf(stderr, "OpenBLAS Warning ... Physically contiguous allocation was failed.\n"); + } #endif #ifdef ALLOC_HUGETLBFILE - if ((*func == alloc_hugetlbfile) && (map_address == (void *)-1)) { + if ((*func == alloc_hugetlbfile) && (map_address == (void *)-1)) { #ifndef OS_WINDOWS - fprintf(stderr, "OpenBLAS Warning ... HugeTLB(File) allocation was failed.\n"); + fprintf(stderr, "OpenBLAS Warning ... HugeTLB(File) allocation was failed.\n"); #endif - } + } #endif #if (defined ALLOC_SHM) && (defined OS_LINUX || defined OS_AIX || defined __sun__ || defined OS_WINDOWS) - if ((*func == alloc_hugetlb) && (map_address != (void *)-1)) hugetlb_allocated = 1; + if ((*func == alloc_hugetlb) && (map_address != (void *)-1)) hugetlb_allocated = 1; #endif - func ++; + func ++; } #ifdef DEBUG @@ -2818,7 +2818,7 @@ void *blas_memory_alloc(int procpos){ #if (defined(SMP) || defined(USE_LOCKING)) && !defined(USE_OPENMP) LOCK_COMMAND(&alloc_lock); -#endif +#endif memory[position].addr = map_address; #if (defined(SMP) || defined(USE_LOCKING)) && !defined(USE_OPENMP) UNLOCK_COMMAND(&alloc_lock); @@ -2856,7 +2856,7 @@ void *blas_memory_alloc(int procpos){ #ifdef DEBUG printf("Mapped : %p %3d\n\n", - (void *)memory[position].addr, position); + (void *)memory[position].addr, position); #endif return (void *)memory[position].addr; @@ -2882,9 +2882,10 @@ void blas_memory_free(void *free_area){ while ((position < NUM_BUFFERS) && (memory[position].addr != free_area)) position++; - if (memory[position].addr != free_area) goto error; + if (position >= NUM_BUFFERS) goto error; #ifdef DEBUG + if (memory[position].addr != free_area) goto error; printf(" Position : %d\n", position); #endif @@ -2972,7 +2973,7 @@ static BLASULONG init_lock = 0UL; #endif static void _touch_memory(blas_arg_t *arg, BLASLONG *range_m, BLASLONG *range_n, - void *sa, void *sb, BLASLONG pos) { + void *sa, void *sb, BLASLONG pos) { #if !defined(ARCH_POWER) && !defined(ARCH_SPARC) @@ -3099,15 +3100,15 @@ void CONSTRUCTOR gotoblas_init(void) { //#if defined(OS_LINUX) #if 0 - struct rlimit curlimit; - if ( getrlimit(RLIMIT_STACK, &curlimit ) == 0 ) - { - if ( curlimit.rlim_cur != curlimit.rlim_max ) - { - curlimit.rlim_cur = curlimit.rlim_max; - setrlimit(RLIMIT_STACK, &curlimit); - } - } + struct rlimit curlimit; + if ( getrlimit(RLIMIT_STACK, &curlimit ) == 0 ) + { + if ( curlimit.rlim_cur != curlimit.rlim_max ) + { + curlimit.rlim_cur = curlimit.rlim_max; + setrlimit(RLIMIT_STACK, &curlimit); + } + } #endif #ifdef SMP @@ -3189,8 +3190,8 @@ BOOL APIENTRY DllMain(HMODULE hModule, DWORD ul_reason_for_call, LPVOID lpReser */ static int on_process_term(void) { - gotoblas_quit(); - return 0; + gotoblas_quit(); + return 0; } #ifdef _WIN64 #pragma comment(linker, "/INCLUDE:_tls_used") @@ -3237,7 +3238,7 @@ void gotoblas_dummy_for_PGI(void) { asm (".section .init,\"ax\"; call gotoblas_init@PLT; .section .text"); asm (".section .fini,\"ax\"; call gotoblas_quit@PLT; .section .text"); #endif -#endif +#endif } #endif diff --git a/kernel/arm64/daxpy_thunderx.c b/kernel/arm64/daxpy_thunderx.c index 37aae9391..f44f9d4e5 100644 --- a/kernel/arm64/daxpy_thunderx.c +++ b/kernel/arm64/daxpy_thunderx.c @@ -62,7 +62,7 @@ static void daxpy_kernel_8(BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha) y5 = a * x[5] + y[5]; y6 = a * x[6] + y[6]; y7 = a * x[7] + y[7]; - asm("":"+w"(y0),"+w"(y1),"+w"(y2),"+w"(y3),"+w"(y4),"+w"(y5),"+w"(y6),"+w"(y7)); + __asm__("":"+w"(y0),"+w"(y1),"+w"(y2),"+w"(y3),"+w"(y4),"+w"(y5),"+w"(y6),"+w"(y7)); y[0] = y0; y[1] = y1; y[2] = y2; @@ -74,7 +74,7 @@ static void daxpy_kernel_8(BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha) xx = (x + 4*128/sizeof(*x)); yy = (y + 4*128/sizeof(*y)); - asm("":"+r"(yy)::"memory"); + __asm__("":"+r"(yy)::"memory"); prefetch(xx); prefetch(yy);