Merge pull request #13 from xianyi/develop

resync with upstream
This commit is contained in:
Martin Kroeker 2019-11-03 22:33:31 +01:00 committed by GitHub
commit ccc28c6d60
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 98 additions and 164 deletions

View File

@ -769,6 +769,9 @@ else
FCOMMON_OPT += -m32
endif
endif
ifneq ($(NO_LAPACKE), 1)
FCOMMON_OPT += -fno-second-underscore
endif
endif
endif

View File

@ -73,7 +73,7 @@ if (DYNAMIC_ARCH)
endif ()
if (NOT NO_AVX512)
set(DYNAMIC_CORE ${DYNAMIC_CORE} SKYLAKEX)
string(REGEX REPLACE "-march=native" "" CMAKE_C_FLAGS ${CMAKE_C_FLAGS})
string(REGEX REPLACE "-march=native" "" CMAKE_C_FLAGS "${CMAKE_C_FLAGS}")
endif ()
if (DYNAMIC_LIST)
set(DYNAMIC_CORE PRESCOTT ${DYNAMIC_LIST})

View File

@ -78,7 +78,18 @@ static void __inline blas_lock(volatile BLASULONG *address){
#define BLAS_LOCK_DEFINED
#if !defined(OS_DARWIN) && !defined (OS_ANDROID)
static __inline BLASULONG rpcc(void){
BLASULONG ret = 0;
__asm__ __volatile__ ("isb; mrs %0,cntvct_el0":"=r"(ret));
return ret;
}
#define RPCC_DEFINED
#define RPCC64BIT
#endif
static inline int blas_quickdivide(blasint x, blasint y){
return x / y;

View File

@ -194,10 +194,6 @@ int trsm_thread(int mode, BLASLONG m, BLASLONG n,
int syrk_thread(int mode, blas_arg_t *, BLASLONG *, BLASLONG *, int (*function)(), void *, void *, BLASLONG);
int beta_thread(int mode, BLASLONG m, BLASLONG n,
double alpha_r, double alpha_i,
void *c, BLASLONG ldc, int (*fuction)());
int getrf_thread(int mode, BLASLONG m, BLASLONG n, BLASLONG k,
void *offsetA, BLASLONG lda,
void *offsetB, BLASLONG jb,

View File

@ -1197,7 +1197,11 @@ int get_cpuname(void){
case 3:
case 5:
case 6:
#if defined(__x86_64__) || defined(__amd64__)
return CPUTYPE_CORE2;
#else
return CPUTYPE_PENTIUM2;
#endif
case 7:
case 8:
case 10:
@ -1379,6 +1383,8 @@ int get_cpuname(void){
break;
case 7: // family 6 exmodel 7
switch (model) {
case 10: // Goldmont Plus
return CPUTYPE_NEHALEM;
case 14: // Ice Lake
if(support_avx512())
return CPUTYPE_SKYLAKEX;

View File

@ -129,7 +129,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include <unistd.h>
#endif
#if defined(OS_FREEBSD) || defined(OS_OPENBSD) || defined(OS_DRAGONFLY) || defined(OS_DARWIN)
#if defined(OS_FREEBSD) || defined(OS_OPENBSD) || defined(OS_NETBSD) || defined(OS_DRAGONFLY) || defined(OS_DARWIN)
#include <sys/sysctl.h>
#include <sys/resource.h>
#endif
@ -192,7 +192,7 @@ void goto_set_num_threads(int num_threads) {};
#else
#if defined(OS_LINUX) || defined(OS_SUNOS) || defined(OS_NETBSD)
#if defined(OS_LINUX) || defined(OS_SUNOS)
#ifndef NO_AFFINITY
int get_num_procs(void);
#else
@ -312,7 +312,7 @@ int get_num_procs(void) {
#endif
#if defined(OS_FREEBSD) || defined(OS_OPENBSD) || defined(OS_DRAGONFLY)
#if defined(OS_FREEBSD) || defined(OS_OPENBSD) || defined(OS_NETBSD) || defined(OS_DRAGONFLY)
int get_num_procs(void) {
@ -404,7 +404,7 @@ extern int openblas_goto_num_threads_env();
extern int openblas_omp_num_threads_env();
int blas_get_cpu_number(void){
#if defined(OS_LINUX) || defined(OS_WINDOWS) || defined(OS_FREEBSD) || defined(OS_OPENBSD) || defined(OS_DRAGONFLY) || defined(OS_DARWIN) || defined(OS_ANDROID)
#if defined(OS_LINUX) || defined(OS_WINDOWS) || defined(OS_FREEBSD) || defined(OS_OPENBSD) || defined(OS_NETBSD) || defined(OS_DRAGONFLY) || defined(OS_DARWIN) || defined(OS_ANDROID)
int max_num;
#endif
int blas_goto_num = 0;
@ -412,7 +412,7 @@ int blas_get_cpu_number(void){
if (blas_num_threads) return blas_num_threads;
#if defined(OS_LINUX) || defined(OS_WINDOWS) || defined(OS_FREEBSD) || defined(OS_OPENBSD) || defined(OS_DRAGONFLY) || defined(OS_DARWIN) || defined(OS_ANDROID)
#if defined(OS_LINUX) || defined(OS_WINDOWS) || defined(OS_FREEBSD) || defined(OS_OPENBSD) || defined(OS_NETBSD) || defined(OS_DRAGONFLY) || defined(OS_DARWIN) || defined(OS_ANDROID)
max_num = get_num_procs();
#endif
@ -436,7 +436,7 @@ int blas_get_cpu_number(void){
else if (blas_omp_num > 0) blas_num_threads = blas_omp_num;
else blas_num_threads = MAX_CPU_NUMBER;
#if defined(OS_LINUX) || defined(OS_WINDOWS) || defined(OS_FREEBSD) || defined(OS_OPENBSD) || defined(OS_DRAGONFLY) || defined(OS_DARWIN) || defined(OS_ANDROID)
#if defined(OS_LINUX) || defined(OS_WINDOWS) || defined(OS_FREEBSD) || defined(OS_OPENBSD) || defined(OS_NETBSD) || defined(OS_DRAGONFLY) || defined(OS_DARWIN) || defined(OS_ANDROID)
if (blas_num_threads > max_num) blas_num_threads = max_num;
#endif
@ -1673,7 +1673,7 @@ void gotoblas_dummy_for_PGI(void) {
#include <sys/resource.h>
#endif
#if defined(OS_FREEBSD) || defined(OS_OPENBSD) || defined(OS_DRAGONFLY) || defined(OS_DARWIN)
#if defined(OS_FREEBSD) || defined(OS_OPENBSD) || defined(OS_NETBSD) || defined(OS_DRAGONFLY) || defined(OS_DARWIN)
#include <sys/sysctl.h>
#include <sys/resource.h>
#endif
@ -1736,7 +1736,7 @@ void goto_set_num_threads(int num_threads) {};
#else
#if defined(OS_LINUX) || defined(OS_SUNOS) || defined(OS_NETBSD)
#if defined(OS_LINUX) || defined(OS_SUNOS)
#ifndef NO_AFFINITY
int get_num_procs(void);
#else
@ -1855,7 +1855,7 @@ int get_num_procs(void) {
#endif
#if defined(OS_FREEBSD) || defined(OS_OPENBSD) || defined(OS_DRAGONFLY)
#if defined(OS_FREEBSD) || defined(OS_OPENBSD) || defined(OS_NETBSD) || defined(OS_DRAGONFLY)
int get_num_procs(void) {
@ -1945,7 +1945,7 @@ extern int openblas_goto_num_threads_env();
extern int openblas_omp_num_threads_env();
int blas_get_cpu_number(void){
#if defined(OS_LINUX) || defined(OS_WINDOWS) || defined(OS_FREEBSD) || defined(OS_OPENBSD) || defined(OS_DRAGONFLY) || defined(OS_DARWIN) || defined(OS_ANDROID)
#if defined(OS_LINUX) || defined(OS_WINDOWS) || defined(OS_FREEBSD) || defined(OS_OPENBSD) || defined(OS_NETBSD) || defined(OS_DRAGONFLY) || defined(OS_DARWIN) || defined(OS_ANDROID)
int max_num;
#endif
int blas_goto_num = 0;
@ -1953,7 +1953,7 @@ int blas_get_cpu_number(void){
if (blas_num_threads) return blas_num_threads;
#if defined(OS_LINUX) || defined(OS_WINDOWS) || defined(OS_FREEBSD) || defined(OS_OPENBSD) || defined(OS_DRAGONFLY) || defined(OS_DARWIN) || defined(OS_ANDROID)
#if defined(OS_LINUX) || defined(OS_WINDOWS) || defined(OS_FREEBSD) || defined(OS_OPENBSD) || defined(OS_NETBSD) || defined(OS_DRAGONFLY) || defined(OS_DARWIN) || defined(OS_ANDROID)
max_num = get_num_procs();
#endif
@ -1977,7 +1977,7 @@ int blas_get_cpu_number(void){
else if (blas_omp_num > 0) blas_num_threads = blas_omp_num;
else blas_num_threads = MAX_CPU_NUMBER;
#if defined(OS_LINUX) || defined(OS_WINDOWS) || defined(OS_FREEBSD) || defined(OS_OPENBSD) || defined(OS_DRAGONFLY) || defined(OS_DARWIN) || defined(OS_ANDROID)
#if defined(OS_LINUX) || defined(OS_WINDOWS) || defined(OS_FREEBSD) || defined(OS_OPENBSD) || defined(OS_NETBSD) || defined(OS_DRAGONFLY) || defined(OS_DARWIN) || defined(OS_ANDROID)
if (blas_num_threads > max_num) blas_num_threads = max_num;
#endif

View File

@ -618,19 +618,6 @@
# functions added for lapack-3.7.0
slarfy,
slasyf_rk,
ssyconvf_rook,
ssytf2_rk,
ssytrf_rk,
ssytrs_3,
ssytri_3,
ssytri_3x,
ssycon_3,
ssysv_rk,
slasyf_aa,
ssysv_aa,
ssytrf_aa,
ssytrs_aa,
strevc3,
sgelqt,
sgelqt3,
@ -647,33 +634,8 @@
stplqt,
stplqt2,
stpmlqt,
ssytrd_2stage,
ssytrd_sy2sb,
ssytrd_sb2st,
ssb2st_kernels,
ssyevd_2stage,
ssyev_2stage,
ssyevx_2stage,
ssyevr_2stage,
ssbev_2stage,
ssbevx_2stage,
ssbevd_2stage,
ssygv_2stage,
dlarfy,
dlasyf_rk,
dsyconvf,
dsyconvf_rook,
dsytf2_rk,
dsytrf_rk,
dsytrs_3,
dsytri_3,
dsytri_3x,
dsycon_3,
dsysv_rk,
dlasyf_aa,
dsysv_aa,
dsytrf_aa,
dsytrs_aa,
dtrevc3,
dgelqt,
dgelqt3,
@ -690,45 +652,8 @@
dtplqt,
dtplqt2,
dtpmlqt,
dsytrd_2stage,
dsytrd_sy2sb,
dsytrd_sb2st,
dsb2st_kernels,
dsyevd_2stage,
dsyev_2stage,
dsyevx_2stage,
dsyevr_2stage,
dsbev_2stage,
dsbevx_2stage,
dsbevd_2stage,
dsygv_2stage,
chetf2_rk,
chetrf_rk,
chetri_3,
chetri_3x,
chetrs_3,
checon_3,
chesv_rk,
chesv_aa,
chetrf_aa,
chetrs_aa,
clahef_aa,
clahef_rk,
clarfy,
clasyf_rk,
clasyf_aa,
csyconvf,
csyconvf_rook,
csytf2_rk,
csytrf_rk,
csytrf_aa,
csytrs_3,
csytrs_aa,
csytri_3,
csytri_3x,
csycon_3,
csysv_rk,
csysv_aa,
ctrevc3,
cgelqt,
cgelqt3,
@ -745,45 +670,8 @@
ctplqt,
ctplqt2,
ctpmlqt,
chetrd_2stage,
chetrd_he2hb,
chetrd_hb2st,
chb2st_kernels,
cheevd_2stage,
cheev_2stage,
cheevx_2stage,
cheevr_2stage,
chbev_2stage,
chbevx_2stage,
chbevd_2stage,
chegv_2stage,
zhetf2_rk,
zhetrf_rk,
zhetri_3,
zhetri_3x,
zhetrs_3,
zhecon_3,
zhesv_rk,
zhesv_aa,
zhetrf_aa,
zhetrs_aa,
zlahef_aa,
zlahef_rk,
zlarfy,
zlasyf_rk,
zlasyf_aa,
zsyconvf,
zsyconvf_rook,
zsytrs_aa,
zsytf2_rk,
zsytrf_rk,
zsytrf_aa,
zsytrs_3,
zsytri_3,
zsytri_3x,
zsycon_3,
zsysv_rk,
zsysv_aa,
ztrevc3,
ztplqt,
ztplqt2,
@ -800,43 +688,13 @@
zlaswlq,
zlamswlq,
zgemlq,
zhetrd_2stage,
zhetrd_he2hb,
zhetrd_hb2st,
zhb2st_kernels,
zheevd_2stage,
zheev_2stage,
zheevx_2stage,
zheevr_2stage,
zhbev_2stage,
zhbevx_2stage,
zhbevd_2stage,
zhegv_2stage,
sladiv1,
dladiv1,
iparam2stage,
# functions added for lapack-3.8.0
ilaenv2stage,
ssysv_aa_2stage,
ssytrf_aa_2stage,
ssytrs_aa_2stage,
chesv_aa_2stage,
chetrf_aa_2stage,
chetrs_aa_2stage,
csysv_aa_2stage,
csytrf_aa_2stage,
csytrs_aa_2stage,
dsysv_aa_2stage,
dsytrf_aa_2stage,
dsytrs_aa_2stage,
zhesv_aa_2stage,
zhetrf_aa_2stage,
zhetrs_aa_2stage,
zsysv_aa_2stage,
zsytrf_aa_2stage,
zsytrs_aa_2stage
ilaenv2stage
);
@lapack_extendedprecision_objs = (
@ -3509,6 +3367,59 @@
zlahef_rook, zlasyf_rook,
zsytf2_rook, zsytrf_rook, zsytrs_rook,
zsytri_rook, zsycon_rook, zsysv_rook,
# 3.7.0
slasyf_rk, ssyconvf_rook, ssytf2_rk,
ssytrf_rk, ssytrs_3, ssytri_3,
ssytri_3x, ssycon_3, ssysv_rk,
slasyf_aa, ssysv_aa, ssytrf_aa,
ssytrs_aa, ssytrd_2stage, ssytrd_sy2sb,
ssytrd_sb2st, ssb2st_kernels, ssyevd_2stage,
ssyev_2stage, ssyevx_2stage, ssyevr_2stage,
ssbev_2stage, ssbevx_2stage, ssbevd_2stage,
ssygv_2stage, dlasyf_rk, dsyconvf_rook,
dsytf2_rk, dsytrf_rk, dsytrs_3,
dsytri_3, dsytri_3x, dsycon_3,
dsysv_rk, dlasyf_aa, dsysv_aa,
dsytrf_aa, dsytrs_aa, dsytrd_2stage,
dsytrd_sy2sb, dsytrd_sb2st, dsb2st_kernels,
dsyevd_2stage, dsyev_2stage, dsyevx_2stage,
dsyevr_2stage, dsbev_2stage, dsbevx_2stage,
dsbevd_2stage, dsygv_2stage, chetf2_rk,
chetrf_rk, chetri_3, chetri_3x,
chetrs_3, checon_3, chesv_rk,
chesv_aa, chetrf_aa, chetrs_aa,
clahef_aa, clahef_rk, clasyf_rk,
clasyf_aa, csytf2_rk, csytrf_rk,
csytrf_aa, csytrs_3, csytrs_aa,
csytri_3, csytri_3x, csycon_3,
csysv_rk, csysv_aa, csyconvf_rook,
chetrd_2stage, chetrd_he2hb, chetrd_hb2st,
chb2st_kernels, cheevd_2stage, cheev_2stage,
cheevx_2stage, cheevr_2stage, chbev_2stage,
chbevx_2stage, chbevd_2stage, chegv_2stage,
zhetf2_rk, zhetrf_rk, zhetri_3,
zhetri_3x, zhetrs_3, zhecon_3,
zhesv_rk, zhesv_aa, zhetrf_aa,
zhetrs_aa, zlahef_aa, zlahef_rk,
zlasyf_rk, zlasyf_aa, zsyconvf_rook,
zsytrs_aa, zsytf2_rk, zsytrf_rk,
zsytrf_aa, zsytrs_3, zsytri_3,
zsytri_3x, zsycon_3, zsysv_rk,
zsysv_aa, zhetrd_2stage, zhetrd_he2hb,
zhetrd_hb2st, zhb2st_kernels, zheevd_2stage,
zheev_2stage, zheevx_2stage, zheevr_2stage,
zhbev_2stage, zhbevx_2stage, zhbevd_2stage,
zhegv_2stage,
# 3.8.0
ssysv_aa_2stage, ssytrf_aa_2stage,
ssytrs_aa_2stage, chesv_aa_2stage,
chetrf_aa_2stage, chetrs_aa_2stage,
csysv_aa_2stage, csytrf_aa_2stage,
csytrs_aa_2stage, dsysv_aa_2stage,
dsytrf_aa_2stage, dsytrs_aa_2stage,
zhesv_aa_2stage, zhetrf_aa_2stage,
zhetrs_aa_2stage, zsysv_aa_2stage,
zsytrf_aa_2stage, zsytrs_aa_2stage
);

View File

@ -130,6 +130,11 @@ if ($compiler eq "") {
if ($data =~ / zho_ge__/) {
$need2bu = 1;
}
if ($vendor =~ /G95/) {
if ($ENV{NO_LAPACKE} != 1) {
$need2bu = "";
}
}
}
if ($vendor eq "") {
@ -278,6 +283,8 @@ if ($link ne "") {
$link =~ s/\-Y\sP\,/\-Y/g;
$link =~ s/\-R\s*/\-rpath\@/g;
$link =~ s/\-rpath\s+/\-rpath\@/g;
$link =~ s/\-rpath-link\s+/\-rpath-link\@/g;

View File

@ -82,7 +82,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#ifdef OS_WINDOWS
#include <windows.h>
#endif
#if defined(__FreeBSD__) || defined(__OpenBSD__) || defined(__DragonFly__) || defined(__APPLE__)
#if defined(__FreeBSD__) || defined(__OpenBSD__) || defined(__NetBSD__) || defined(__DragonFly__) || defined(__APPLE__)
#include <sys/types.h>
#include <sys/sysctl.h>
#endif
@ -1201,7 +1201,7 @@ static int get_num_cores(void) {
#ifdef OS_WINDOWS
SYSTEM_INFO sysinfo;
#elif defined(__FreeBSD__) || defined(__OpenBSD__) || defined(__DragonFly__) || defined(__APPLE__)
#elif defined(__FreeBSD__) || defined(__OpenBSD__) || defined(__NetBSD__) || defined(__DragonFly__) || defined(__APPLE__)
int m[2], count;
size_t len;
#endif
@ -1215,7 +1215,7 @@ static int get_num_cores(void) {
GetSystemInfo(&sysinfo);
return sysinfo.dwNumberOfProcessors;
#elif defined(__FreeBSD__) || defined(__OpenBSD__) || defined(__DragonFly__) || defined(__APPLE__)
#elif defined(__FreeBSD__) || defined(__OpenBSD__) || defined(__NetBSD__) || defined(__DragonFly__) || defined(__APPLE__)
m[0] = CTL_HW;
m[1] = HW_NCPU;
len = sizeof(int);