diff --git a/Makefile.system b/Makefile.system index 4f17c25b9..3312a0be3 100644 --- a/Makefile.system +++ b/Makefile.system @@ -617,6 +617,7 @@ DYNAMIC_CORE += POWER8 ifneq ($(C_COMPILER), GCC) DYNAMIC_CORE += POWER9 DYNAMIC_CORE += POWER10 +override LDFLAGS += -Wl,-no-power10-stubs endif ifeq ($(C_COMPILER), GCC) ifeq ($(GCCVERSIONGT5), 1) @@ -626,9 +627,11 @@ $(info, OpenBLAS: Your gcc version is too old to build the POWER9 kernels.) endif ifeq ($(GCCVERSIONGTEQ11), 1) DYNAMIC_CORE += POWER10 -else ifeq ($(GCCVERSIONEQ10), 1) +override LDFLAGS += -Wl,-no-power10-stubs +else ifeq ($(GCCVERSIONGTEQ10), 1) ifeq ($(GCCMINORVERSIONGTEQ2), 1) DYNAMIC_CORE += POWER10 +override LDFLAGS += -Wl,-no-power10-stubs endif else $(info, OpenBLAS: Your gcc version is too old to build the POWER10 kernels.) diff --git a/README.md b/README.md index 6dc3c7b42..4e5e3e956 100644 --- a/README.md +++ b/README.md @@ -58,6 +58,10 @@ Examples: ```sh make BINARY=64 CC=mips64el-unknown-linux-gnu-gcc FC=mips64el-unknown-linux-gnu-gfortran HOSTCC=gcc TARGET=LOONGSON3A ``` + or same with the newer mips-crosscompiler put out by Loongson that defaults to the 32bit ABI: + ```sh + make HOSTCC=gcc CC='/opt/mips-loongson-gcc7.3-linux-gnu/2019.06-29/bin/mips-linux-gnu-gcc -mabi=64' FC='/opt/mips-loongson-gcc7.3-linux-gnu/2019.06-29/bin/mips-linux-gnu-gfortran -mabi=64' TARGET=LOONGSON3A + ``` * On an x86 box, compile this library for a loongson3a CPU with loongcc (based on Open64) compiler: ```sh diff --git a/c_check b/c_check index dd700b8b4..314c2b157 100644 --- a/c_check +++ b/c_check @@ -249,6 +249,28 @@ if (($architecture eq "x86") || ($architecture eq "x86_64")) { } } +$c11_atomics = 0; +if ($data =~ /HAVE_C11/) { + eval "use File::Temp qw(tempfile)"; + if ($@){ + warn "could not load PERL module File::Temp, so could not check compiler compatibility with C11"; + $c11_atomics = 0; + } else { + ($fh,$tmpf) = tempfile( SUFFIX => '.c' , UNLINK => 1 ); + print $tmpf "#include \nint main(void){}\n"; + $args = " -c -o $tmpf.o $tmpf"; + my @cmd = ("$compiler_name $flags $args >/dev/null 2>/dev/null"); + system(@cmd) == 0; + if ($? != 0) { + $c11_atomics = 0; + } else { + $c11_atomics = 1; + } + unlink("$tmpf.o"); + } +} + + $data = `$compiler_name $flags -S ctest1.c && grep globl ctest1.s | head -n 1 && rm -f ctest1.s`; $data =~ /globl\s([_\.]*)(.*)/; @@ -352,6 +374,8 @@ print CONFFILE "#define __32BIT__\t1\n" if $binformat eq bin32; print CONFFILE "#define __64BIT__\t1\n" if $binformat eq bin64; print CONFFILE "#define FUNDERSCORE\t$need_fu\n" if $need_fu ne ""; print CONFFILE "#define HAVE_MSA\t1\n" if $have_msa eq 1; +print CONFFILE "#define HAVE_C11\t1\n" if $c11_atomics eq 1; + if ($os eq "LINUX") { diff --git a/cmake/f_check.cmake b/cmake/f_check.cmake index f877fc3e1..0f5d0e15d 100644 --- a/cmake/f_check.cmake +++ b/cmake/f_check.cmake @@ -21,7 +21,15 @@ # NEED2UNDERSCORES if (NOT NO_LAPACK) - enable_language(Fortran) + include(CheckLanguage) + check_language(Fortran) + if(CMAKE_Fortran_COMPILER) + enable_language(Fortran) + else() + message(STATUS "No Fortran compiler found, can build only BLAS but not LAPACK") + set (NOFORTRAN 1) + set (NO_LAPACK 1) + endif() else() include(CMakeForceCompiler) CMAKE_FORCE_Fortran_COMPILER(gfortran GNU) diff --git a/common.h b/common.h index 00b34a3f7..d6637abe4 100644 --- a/common.h +++ b/common.h @@ -681,7 +681,7 @@ __declspec(dllimport) int __cdecl omp_in_parallel(void); __declspec(dllimport) int __cdecl omp_get_num_procs(void); #endif -#if (__STDC_VERSION__ >= 201112L) +#ifdef HAVE_C11 #if defined(C_GCC) && ( __GNUC__ < 7) // workaround for GCC bug 65467 #ifndef _Atomic diff --git a/ctest.c b/ctest.c index 5e869b901..cd84ab1bb 100644 --- a/ctest.c +++ b/ctest.c @@ -153,3 +153,6 @@ ARCH_ARM ARCH_ARM64 #endif +#if (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L) +HAVE_C11 +#endif diff --git a/driver/level3/level3_gemm3m_thread.c b/driver/level3/level3_gemm3m_thread.c index 9216daaed..39824fc5a 100644 --- a/driver/level3/level3_gemm3m_thread.c +++ b/driver/level3/level3_gemm3m_thread.c @@ -91,7 +91,7 @@ #endif typedef struct { -#if __STDC_VERSION__ >= 201112L +#ifdef HAVE_C11 _Atomic #else volatile diff --git a/driver/level3/level3_syrk_threaded.c b/driver/level3/level3_syrk_threaded.c index 574f825b0..a041abac3 100644 --- a/driver/level3/level3_syrk_threaded.c +++ b/driver/level3/level3_syrk_threaded.c @@ -67,7 +67,7 @@ #endif typedef struct { -#if __STDC_VERSION__ >= 201112L +#ifdef HAVE_C11 _Atomic #else volatile diff --git a/driver/others/blas_server.c b/driver/others/blas_server.c index 04b614a6e..756e51b5d 100644 --- a/driver/others/blas_server.c +++ b/driver/others/blas_server.c @@ -141,7 +141,7 @@ typedef struct { } thread_status_t; -#if (__STDC_VERSION__ >= 201112L) +#ifdef HAVE_C11 #define atomic_load_queue(p) __atomic_load_n(p, __ATOMIC_RELAXED) #define atomic_store_queue(p, v) __atomic_store_n(p, v, __ATOMIC_RELAXED) #else diff --git a/driver/others/blas_server_omp.c b/driver/others/blas_server_omp.c index 4255852c8..b4eb27c25 100644 --- a/driver/others/blas_server_omp.c +++ b/driver/others/blas_server_omp.c @@ -55,7 +55,7 @@ int blas_server_avail = 0; static void * blas_thread_buffer[MAX_PARALLEL_NUMBER][MAX_CPU_NUMBER]; -#if __STDC_VERSION__ >= 201112L +#ifdef HAVE_C11 static atomic_bool blas_buffer_inuse[MAX_PARALLEL_NUMBER]; #else static _Bool blas_buffer_inuse[MAX_PARALLEL_NUMBER]; @@ -320,7 +320,7 @@ int exec_blas(BLASLONG num, blas_queue_t *queue){ while(true) { for(i=0; i < MAX_PARALLEL_NUMBER; i++) { -#if __STDC_VERSION__ >= 201112L +#ifdef HAVE_C11 _Bool inuse = false; if(atomic_compare_exchange_weak(&blas_buffer_inuse[i], &inuse, true)) { #else @@ -345,7 +345,7 @@ int exec_blas(BLASLONG num, blas_queue_t *queue){ exec_threads(&queue[i], buf_index); } -#if __STDC_VERSION__ >= 201112L +#ifdef HAVE_C11 atomic_store(&blas_buffer_inuse[buf_index], false); #else blas_buffer_inuse[buf_index] = false; diff --git a/driver/others/memory.c b/driver/others/memory.c index a5595aed4..9b6c226a1 100644 --- a/driver/others/memory.c +++ b/driver/others/memory.c @@ -1095,7 +1095,7 @@ static BLASULONG base_address = 0UL; static BLASULONG base_address = BASE_ADDRESS; #endif -#if __STDC_VERSION__ >= 201112L +#ifdef HAVE_C11 static _Atomic int memory_initialized = 0; #else static volatile int memory_initialized = 0; diff --git a/lapack/getrf/getrf_parallel.c b/lapack/getrf/getrf_parallel.c index c602822a8..fc410b0e7 100644 --- a/lapack/getrf/getrf_parallel.c +++ b/lapack/getrf/getrf_parallel.c @@ -68,7 +68,7 @@ double sqrt(double); #define GETRF_FACTOR 1.00 -#if (__STDC_VERSION__ >= 201112L) +#ifdef HAVE_C11 #define atomic_load_long(p) __atomic_load_n(p, __ATOMIC_RELAXED) #define atomic_store_long(p, v) __atomic_store_n(p, v, __ATOMIC_RELAXED) #else diff --git a/lapack/getrf/potrf_parallel.c b/lapack/getrf/potrf_parallel.c index 312509685..008fcb8cc 100644 --- a/lapack/getrf/potrf_parallel.c +++ b/lapack/getrf/potrf_parallel.c @@ -101,7 +101,7 @@ static FLOAT dm1 = -1.; #endif typedef struct { -#if __STDC_VERSION__ >= 201112L +#ifdef HAVE_C11 _Atomic #else volatile