diff --git a/Makefile.system b/Makefile.system index 8843d0ad3..4cb4dc954 100644 --- a/Makefile.system +++ b/Makefile.system @@ -769,6 +769,9 @@ else FCOMMON_OPT += -m32 endif endif +ifneq ($(NO_LAPACKE), 1) +FCOMMON_OPT += -fno-second-underscore +endif endif endif diff --git a/cmake/arch.cmake b/cmake/arch.cmake index 5a7434551..f3ae84fe0 100644 --- a/cmake/arch.cmake +++ b/cmake/arch.cmake @@ -73,7 +73,7 @@ if (DYNAMIC_ARCH) endif () if (NOT NO_AVX512) set(DYNAMIC_CORE ${DYNAMIC_CORE} SKYLAKEX) - string(REGEX REPLACE "-march=native" "" CMAKE_C_FLAGS ${CMAKE_C_FLAGS}) + string(REGEX REPLACE "-march=native" "" CMAKE_C_FLAGS "${CMAKE_C_FLAGS}") endif () if (DYNAMIC_LIST) set(DYNAMIC_CORE PRESCOTT ${DYNAMIC_LIST}) diff --git a/common_arm64.h b/common_arm64.h index f27ca8c63..5951e1ee5 100644 --- a/common_arm64.h +++ b/common_arm64.h @@ -78,7 +78,18 @@ static void __inline blas_lock(volatile BLASULONG *address){ #define BLAS_LOCK_DEFINED +#if !defined(OS_DARWIN) && !defined (OS_ANDROID) +static __inline BLASULONG rpcc(void){ + BLASULONG ret = 0; + + __asm__ __volatile__ ("isb; mrs %0,cntvct_el0":"=r"(ret)); + return ret; +} + +#define RPCC_DEFINED +#define RPCC64BIT +#endif static inline int blas_quickdivide(blasint x, blasint y){ return x / y; diff --git a/common_thread.h b/common_thread.h index bd964445e..6ec40e096 100644 --- a/common_thread.h +++ b/common_thread.h @@ -194,10 +194,6 @@ int trsm_thread(int mode, BLASLONG m, BLASLONG n, int syrk_thread(int mode, blas_arg_t *, BLASLONG *, BLASLONG *, int (*function)(), void *, void *, BLASLONG); -int beta_thread(int mode, BLASLONG m, BLASLONG n, - double alpha_r, double alpha_i, - void *c, BLASLONG ldc, int (*fuction)()); - int getrf_thread(int mode, BLASLONG m, BLASLONG n, BLASLONG k, void *offsetA, BLASLONG lda, void *offsetB, BLASLONG jb, diff --git a/cpuid_x86.c b/cpuid_x86.c index 92c8e1b67..9e1c8e752 100644 --- a/cpuid_x86.c +++ b/cpuid_x86.c @@ -1197,7 +1197,11 @@ int get_cpuname(void){ case 3: case 5: case 6: +#if defined(__x86_64__) || defined(__amd64__) + return CPUTYPE_CORE2; +#else return CPUTYPE_PENTIUM2; +#endif case 7: case 8: case 10: @@ -1379,6 +1383,8 @@ int get_cpuname(void){ break; case 7: // family 6 exmodel 7 switch (model) { + case 10: // Goldmont Plus + return CPUTYPE_NEHALEM; case 14: // Ice Lake if(support_avx512()) return CPUTYPE_SKYLAKEX; diff --git a/driver/others/memory.c b/driver/others/memory.c index 534d6d9fc..55dce72b8 100644 --- a/driver/others/memory.c +++ b/driver/others/memory.c @@ -129,7 +129,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #endif -#if defined(OS_FREEBSD) || defined(OS_OPENBSD) || defined(OS_DRAGONFLY) || defined(OS_DARWIN) +#if defined(OS_FREEBSD) || defined(OS_OPENBSD) || defined(OS_NETBSD) || defined(OS_DRAGONFLY) || defined(OS_DARWIN) #include #include #endif @@ -192,7 +192,7 @@ void goto_set_num_threads(int num_threads) {}; #else -#if defined(OS_LINUX) || defined(OS_SUNOS) || defined(OS_NETBSD) +#if defined(OS_LINUX) || defined(OS_SUNOS) #ifndef NO_AFFINITY int get_num_procs(void); #else @@ -312,7 +312,7 @@ int get_num_procs(void) { #endif -#if defined(OS_FREEBSD) || defined(OS_OPENBSD) || defined(OS_DRAGONFLY) +#if defined(OS_FREEBSD) || defined(OS_OPENBSD) || defined(OS_NETBSD) || defined(OS_DRAGONFLY) int get_num_procs(void) { @@ -404,7 +404,7 @@ extern int openblas_goto_num_threads_env(); extern int openblas_omp_num_threads_env(); int blas_get_cpu_number(void){ -#if defined(OS_LINUX) || defined(OS_WINDOWS) || defined(OS_FREEBSD) || defined(OS_OPENBSD) || defined(OS_DRAGONFLY) || defined(OS_DARWIN) || defined(OS_ANDROID) +#if defined(OS_LINUX) || defined(OS_WINDOWS) || defined(OS_FREEBSD) || defined(OS_OPENBSD) || defined(OS_NETBSD) || defined(OS_DRAGONFLY) || defined(OS_DARWIN) || defined(OS_ANDROID) int max_num; #endif int blas_goto_num = 0; @@ -412,7 +412,7 @@ int blas_get_cpu_number(void){ if (blas_num_threads) return blas_num_threads; -#if defined(OS_LINUX) || defined(OS_WINDOWS) || defined(OS_FREEBSD) || defined(OS_OPENBSD) || defined(OS_DRAGONFLY) || defined(OS_DARWIN) || defined(OS_ANDROID) +#if defined(OS_LINUX) || defined(OS_WINDOWS) || defined(OS_FREEBSD) || defined(OS_OPENBSD) || defined(OS_NETBSD) || defined(OS_DRAGONFLY) || defined(OS_DARWIN) || defined(OS_ANDROID) max_num = get_num_procs(); #endif @@ -436,7 +436,7 @@ int blas_get_cpu_number(void){ else if (blas_omp_num > 0) blas_num_threads = blas_omp_num; else blas_num_threads = MAX_CPU_NUMBER; -#if defined(OS_LINUX) || defined(OS_WINDOWS) || defined(OS_FREEBSD) || defined(OS_OPENBSD) || defined(OS_DRAGONFLY) || defined(OS_DARWIN) || defined(OS_ANDROID) +#if defined(OS_LINUX) || defined(OS_WINDOWS) || defined(OS_FREEBSD) || defined(OS_OPENBSD) || defined(OS_NETBSD) || defined(OS_DRAGONFLY) || defined(OS_DARWIN) || defined(OS_ANDROID) if (blas_num_threads > max_num) blas_num_threads = max_num; #endif @@ -1673,7 +1673,7 @@ void gotoblas_dummy_for_PGI(void) { #include #endif -#if defined(OS_FREEBSD) || defined(OS_OPENBSD) || defined(OS_DRAGONFLY) || defined(OS_DARWIN) +#if defined(OS_FREEBSD) || defined(OS_OPENBSD) || defined(OS_NETBSD) || defined(OS_DRAGONFLY) || defined(OS_DARWIN) #include #include #endif @@ -1736,7 +1736,7 @@ void goto_set_num_threads(int num_threads) {}; #else -#if defined(OS_LINUX) || defined(OS_SUNOS) || defined(OS_NETBSD) +#if defined(OS_LINUX) || defined(OS_SUNOS) #ifndef NO_AFFINITY int get_num_procs(void); #else @@ -1855,7 +1855,7 @@ int get_num_procs(void) { #endif -#if defined(OS_FREEBSD) || defined(OS_OPENBSD) || defined(OS_DRAGONFLY) +#if defined(OS_FREEBSD) || defined(OS_OPENBSD) || defined(OS_NETBSD) || defined(OS_DRAGONFLY) int get_num_procs(void) { @@ -1945,7 +1945,7 @@ extern int openblas_goto_num_threads_env(); extern int openblas_omp_num_threads_env(); int blas_get_cpu_number(void){ -#if defined(OS_LINUX) || defined(OS_WINDOWS) || defined(OS_FREEBSD) || defined(OS_OPENBSD) || defined(OS_DRAGONFLY) || defined(OS_DARWIN) || defined(OS_ANDROID) +#if defined(OS_LINUX) || defined(OS_WINDOWS) || defined(OS_FREEBSD) || defined(OS_OPENBSD) || defined(OS_NETBSD) || defined(OS_DRAGONFLY) || defined(OS_DARWIN) || defined(OS_ANDROID) int max_num; #endif int blas_goto_num = 0; @@ -1953,7 +1953,7 @@ int blas_get_cpu_number(void){ if (blas_num_threads) return blas_num_threads; -#if defined(OS_LINUX) || defined(OS_WINDOWS) || defined(OS_FREEBSD) || defined(OS_OPENBSD) || defined(OS_DRAGONFLY) || defined(OS_DARWIN) || defined(OS_ANDROID) +#if defined(OS_LINUX) || defined(OS_WINDOWS) || defined(OS_FREEBSD) || defined(OS_OPENBSD) || defined(OS_NETBSD) || defined(OS_DRAGONFLY) || defined(OS_DARWIN) || defined(OS_ANDROID) max_num = get_num_procs(); #endif @@ -1977,7 +1977,7 @@ int blas_get_cpu_number(void){ else if (blas_omp_num > 0) blas_num_threads = blas_omp_num; else blas_num_threads = MAX_CPU_NUMBER; -#if defined(OS_LINUX) || defined(OS_WINDOWS) || defined(OS_FREEBSD) || defined(OS_OPENBSD) || defined(OS_DRAGONFLY) || defined(OS_DARWIN) || defined(OS_ANDROID) +#if defined(OS_LINUX) || defined(OS_WINDOWS) || defined(OS_FREEBSD) || defined(OS_OPENBSD) || defined(OS_NETBSD) || defined(OS_DRAGONFLY) || defined(OS_DARWIN) || defined(OS_ANDROID) if (blas_num_threads > max_num) blas_num_threads = max_num; #endif diff --git a/exports/gensymbol b/exports/gensymbol index 21a1b703d..37ba0b191 100644 --- a/exports/gensymbol +++ b/exports/gensymbol @@ -618,19 +618,6 @@ # functions added for lapack-3.7.0 slarfy, - slasyf_rk, - ssyconvf_rook, - ssytf2_rk, - ssytrf_rk, - ssytrs_3, - ssytri_3, - ssytri_3x, - ssycon_3, - ssysv_rk, - slasyf_aa, - ssysv_aa, - ssytrf_aa, - ssytrs_aa, strevc3, sgelqt, sgelqt3, @@ -647,33 +634,8 @@ stplqt, stplqt2, stpmlqt, - ssytrd_2stage, - ssytrd_sy2sb, - ssytrd_sb2st, - ssb2st_kernels, - ssyevd_2stage, - ssyev_2stage, - ssyevx_2stage, - ssyevr_2stage, - ssbev_2stage, - ssbevx_2stage, - ssbevd_2stage, - ssygv_2stage, dlarfy, - dlasyf_rk, dsyconvf, - dsyconvf_rook, - dsytf2_rk, - dsytrf_rk, - dsytrs_3, - dsytri_3, - dsytri_3x, - dsycon_3, - dsysv_rk, - dlasyf_aa, - dsysv_aa, - dsytrf_aa, - dsytrs_aa, dtrevc3, dgelqt, dgelqt3, @@ -690,45 +652,8 @@ dtplqt, dtplqt2, dtpmlqt, - dsytrd_2stage, - dsytrd_sy2sb, - dsytrd_sb2st, - dsb2st_kernels, - dsyevd_2stage, - dsyev_2stage, - dsyevx_2stage, - dsyevr_2stage, - dsbev_2stage, - dsbevx_2stage, - dsbevd_2stage, - dsygv_2stage, - chetf2_rk, - chetrf_rk, - chetri_3, - chetri_3x, - chetrs_3, - checon_3, - chesv_rk, - chesv_aa, - chetrf_aa, - chetrs_aa, - clahef_aa, - clahef_rk, clarfy, - clasyf_rk, - clasyf_aa, csyconvf, - csyconvf_rook, - csytf2_rk, - csytrf_rk, - csytrf_aa, - csytrs_3, - csytrs_aa, - csytri_3, - csytri_3x, - csycon_3, - csysv_rk, - csysv_aa, ctrevc3, cgelqt, cgelqt3, @@ -745,45 +670,8 @@ ctplqt, ctplqt2, ctpmlqt, - chetrd_2stage, - chetrd_he2hb, - chetrd_hb2st, - chb2st_kernels, - cheevd_2stage, - cheev_2stage, - cheevx_2stage, - cheevr_2stage, - chbev_2stage, - chbevx_2stage, - chbevd_2stage, - chegv_2stage, - zhetf2_rk, - zhetrf_rk, - zhetri_3, - zhetri_3x, - zhetrs_3, - zhecon_3, - zhesv_rk, - zhesv_aa, - zhetrf_aa, - zhetrs_aa, - zlahef_aa, - zlahef_rk, zlarfy, - zlasyf_rk, - zlasyf_aa, zsyconvf, - zsyconvf_rook, - zsytrs_aa, - zsytf2_rk, - zsytrf_rk, - zsytrf_aa, - zsytrs_3, - zsytri_3, - zsytri_3x, - zsycon_3, - zsysv_rk, - zsysv_aa, ztrevc3, ztplqt, ztplqt2, @@ -800,43 +688,13 @@ zlaswlq, zlamswlq, zgemlq, - zhetrd_2stage, - zhetrd_he2hb, - zhetrd_hb2st, - zhb2st_kernels, - zheevd_2stage, - zheev_2stage, - zheevx_2stage, - zheevr_2stage, - zhbev_2stage, - zhbevx_2stage, - zhbevd_2stage, - zhegv_2stage, sladiv1, dladiv1, iparam2stage, # functions added for lapack-3.8.0 - ilaenv2stage, - ssysv_aa_2stage, - ssytrf_aa_2stage, - ssytrs_aa_2stage, - chesv_aa_2stage, - chetrf_aa_2stage, - chetrs_aa_2stage, - csysv_aa_2stage, - csytrf_aa_2stage, - csytrs_aa_2stage, - dsysv_aa_2stage, - dsytrf_aa_2stage, - dsytrs_aa_2stage, - zhesv_aa_2stage, - zhetrf_aa_2stage, - zhetrs_aa_2stage, - zsysv_aa_2stage, - zsytrf_aa_2stage, - zsytrs_aa_2stage + ilaenv2stage ); @lapack_extendedprecision_objs = ( @@ -3509,6 +3367,59 @@ zlahef_rook, zlasyf_rook, zsytf2_rook, zsytrf_rook, zsytrs_rook, zsytri_rook, zsycon_rook, zsysv_rook, +# 3.7.0 + slasyf_rk, ssyconvf_rook, ssytf2_rk, + ssytrf_rk, ssytrs_3, ssytri_3, + ssytri_3x, ssycon_3, ssysv_rk, + slasyf_aa, ssysv_aa, ssytrf_aa, + ssytrs_aa, ssytrd_2stage, ssytrd_sy2sb, + ssytrd_sb2st, ssb2st_kernels, ssyevd_2stage, + ssyev_2stage, ssyevx_2stage, ssyevr_2stage, + ssbev_2stage, ssbevx_2stage, ssbevd_2stage, + ssygv_2stage, dlasyf_rk, dsyconvf_rook, + dsytf2_rk, dsytrf_rk, dsytrs_3, + dsytri_3, dsytri_3x, dsycon_3, + dsysv_rk, dlasyf_aa, dsysv_aa, + dsytrf_aa, dsytrs_aa, dsytrd_2stage, + dsytrd_sy2sb, dsytrd_sb2st, dsb2st_kernels, + dsyevd_2stage, dsyev_2stage, dsyevx_2stage, + dsyevr_2stage, dsbev_2stage, dsbevx_2stage, + dsbevd_2stage, dsygv_2stage, chetf2_rk, + chetrf_rk, chetri_3, chetri_3x, + chetrs_3, checon_3, chesv_rk, + chesv_aa, chetrf_aa, chetrs_aa, + clahef_aa, clahef_rk, clasyf_rk, + clasyf_aa, csytf2_rk, csytrf_rk, + csytrf_aa, csytrs_3, csytrs_aa, + csytri_3, csytri_3x, csycon_3, + csysv_rk, csysv_aa, csyconvf_rook, + chetrd_2stage, chetrd_he2hb, chetrd_hb2st, + chb2st_kernels, cheevd_2stage, cheev_2stage, + cheevx_2stage, cheevr_2stage, chbev_2stage, + chbevx_2stage, chbevd_2stage, chegv_2stage, + zhetf2_rk, zhetrf_rk, zhetri_3, + zhetri_3x, zhetrs_3, zhecon_3, + zhesv_rk, zhesv_aa, zhetrf_aa, + zhetrs_aa, zlahef_aa, zlahef_rk, + zlasyf_rk, zlasyf_aa, zsyconvf_rook, + zsytrs_aa, zsytf2_rk, zsytrf_rk, + zsytrf_aa, zsytrs_3, zsytri_3, + zsytri_3x, zsycon_3, zsysv_rk, + zsysv_aa, zhetrd_2stage, zhetrd_he2hb, + zhetrd_hb2st, zhb2st_kernels, zheevd_2stage, + zheev_2stage, zheevx_2stage, zheevr_2stage, + zhbev_2stage, zhbevx_2stage, zhbevd_2stage, + zhegv_2stage, +# 3.8.0 + ssysv_aa_2stage, ssytrf_aa_2stage, + ssytrs_aa_2stage, chesv_aa_2stage, + chetrf_aa_2stage, chetrs_aa_2stage, + csysv_aa_2stage, csytrf_aa_2stage, + csytrs_aa_2stage, dsysv_aa_2stage, + dsytrf_aa_2stage, dsytrs_aa_2stage, + zhesv_aa_2stage, zhetrf_aa_2stage, + zhetrs_aa_2stage, zsysv_aa_2stage, + zsytrf_aa_2stage, zsytrs_aa_2stage ); diff --git a/f_check b/f_check index b05db85bd..993ad9a35 100644 --- a/f_check +++ b/f_check @@ -19,7 +19,7 @@ $nofortran = 0; $compiler = join(" ", @ARGV); $compiler_bin = shift(@ARGV); - + # f77 is too ambiguous $compiler = "" if $compiler eq "f77"; @@ -130,6 +130,11 @@ if ($compiler eq "") { if ($data =~ / zho_ge__/) { $need2bu = 1; } + if ($vendor =~ /G95/) { + if ($ENV{NO_LAPACKE} != 1) { + $need2bu = ""; + } + } } if ($vendor eq "") { @@ -277,6 +282,8 @@ $linker_a = ""; if ($link ne "") { $link =~ s/\-Y\sP\,/\-Y/g; + + $link =~ s/\-R\s*/\-rpath\@/g; $link =~ s/\-rpath\s+/\-rpath\@/g; diff --git a/getarch.c b/getarch.c index 4d960356c..1f590390a 100644 --- a/getarch.c +++ b/getarch.c @@ -82,7 +82,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #ifdef OS_WINDOWS #include #endif -#if defined(__FreeBSD__) || defined(__OpenBSD__) || defined(__DragonFly__) || defined(__APPLE__) +#if defined(__FreeBSD__) || defined(__OpenBSD__) || defined(__NetBSD__) || defined(__DragonFly__) || defined(__APPLE__) #include #include #endif @@ -1201,7 +1201,7 @@ static int get_num_cores(void) { #ifdef OS_WINDOWS SYSTEM_INFO sysinfo; -#elif defined(__FreeBSD__) || defined(__OpenBSD__) || defined(__DragonFly__) || defined(__APPLE__) +#elif defined(__FreeBSD__) || defined(__OpenBSD__) || defined(__NetBSD__) || defined(__DragonFly__) || defined(__APPLE__) int m[2], count; size_t len; #endif @@ -1215,7 +1215,7 @@ static int get_num_cores(void) { GetSystemInfo(&sysinfo); return sysinfo.dwNumberOfProcessors; -#elif defined(__FreeBSD__) || defined(__OpenBSD__) || defined(__DragonFly__) || defined(__APPLE__) +#elif defined(__FreeBSD__) || defined(__OpenBSD__) || defined(__NetBSD__) || defined(__DragonFly__) || defined(__APPLE__) m[0] = CTL_HW; m[1] = HW_NCPU; len = sizeof(int);