commit
680f744abf
|
@ -90,7 +90,7 @@ ifeq ($(F_COMPILER), GFORTRAN)
|
||||||
GCCVERSIONGTEQ4 := $(shell expr `$(FC) -dumpversion | cut -f1 -d.` \>= 4)
|
GCCVERSIONGTEQ4 := $(shell expr `$(FC) -dumpversion | cut -f1 -d.` \>= 4)
|
||||||
GCCVERSIONGTEQ5 := $(shell expr `$(FC) -dumpversion | cut -f1 -d.` \>= 5)
|
GCCVERSIONGTEQ5 := $(shell expr `$(FC) -dumpversion | cut -f1 -d.` \>= 5)
|
||||||
GCCMINORVERSIONGTEQ7 := $(shell expr `$(FC) -dumpversion | cut -f2 -d.` \>= 7)
|
GCCMINORVERSIONGTEQ7 := $(shell expr `$(FC) -dumpversion | cut -f2 -d.` \>= 7)
|
||||||
GCCVERSIONCHECK := $(GCCVERSIONGTEQ5)$(GCCVERSIONGTEQ4)$(GCCVERSIONMINORGTEQ7)
|
GCCVERSIONCHECK := $(GCCVERSIONGTEQ5)$(GCCVERSIONGTEQ4)$(GCCMINORVERSIONGTEQ7)
|
||||||
ifeq ($(GCCVERSIONCHECK), $(filter $(GCCVERSIONCHECK), 011 110 111))
|
ifeq ($(GCCVERSIONCHECK), $(filter $(GCCVERSIONCHECK), 011 110 111))
|
||||||
FCOMMON_OPT += -mavx2
|
FCOMMON_OPT += -mavx2
|
||||||
endif
|
endif
|
||||||
|
|
|
@ -844,8 +844,8 @@ Lmcount$lazy_ptr:
|
||||||
#define BUFFER_SIZE ( 2 << 20)
|
#define BUFFER_SIZE ( 2 << 20)
|
||||||
#elif defined(PPC440FP2)
|
#elif defined(PPC440FP2)
|
||||||
#define BUFFER_SIZE ( 16 << 20)
|
#define BUFFER_SIZE ( 16 << 20)
|
||||||
#elif defined(POWER8) || defined(POWER9) || defined(POWER10)
|
#elif defined(POWER6) || defined(POWER8) || defined(POWER9) || defined(POWER10)
|
||||||
#define BUFFER_SIZE ( 64 << 20)
|
#define BUFFER_SIZE ( 64 << 22)
|
||||||
#else
|
#else
|
||||||
#define BUFFER_SIZE ( 16 << 20)
|
#define BUFFER_SIZE ( 16 << 20)
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -22,20 +22,25 @@ ifeq ($(C_COMPILER), CLANG)
|
||||||
override CFLAGS += -fno-integrated-as
|
override CFLAGS += -fno-integrated-as
|
||||||
endif
|
endif
|
||||||
endif
|
endif
|
||||||
|
|
||||||
AVX2OPT =
|
AVX2OPT =
|
||||||
ifeq ($(C_COMPILER), GCC)
|
ifeq ($(C_COMPILER), GCC)
|
||||||
# AVX2 support was added in 4.7.0
|
# AVX2 support was added in 4.7.0
|
||||||
GCCVERSIONGTEQ4 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 4)
|
GCCVERSIONGTEQ4 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 4)
|
||||||
GCCMINORVERSIONGTEQ7 := $(shell expr `$(CC) -dumpversion | cut -f2 -d.` \>= 7)
|
GCCVERSIONGTEQ5 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 5)
|
||||||
ifeq ($(GCCVERSIONGTEQ4)$(GCCMINORVERSIONGTEQ7), 11)
|
GCCMINORVERSIONGTEQ7 := $(shell expr `$(CC) -dumpversion | cut -f2 -d.` \>= 7)
|
||||||
|
GCCVERSIONCHECK := $(GCCVERSIONGTEQ5)$(GCCVERSIONGTEQ4)$(GCCMINORVERSIONGTEQ7)
|
||||||
|
ifeq ($(GCCVERSIONCHECK), $(filter $(GCCVERSIONCHECK), 011 110 111))
|
||||||
AVX2OPT = -mavx2
|
AVX2OPT = -mavx2
|
||||||
endif
|
endif
|
||||||
endif
|
endif
|
||||||
ifeq ($(C_COMPILER), CLANG)
|
ifeq ($(C_COMPILER), CLANG)
|
||||||
# Any clang posing as gcc 4.2 should be new enough (3.4 or later)
|
# Any clang posing as gcc 4.2 should be new enough (3.4 or later)
|
||||||
GCCVERSIONGTEQ4 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 4)
|
GCCVERSIONGTEQ4 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 4)
|
||||||
|
GCCVERSIONGTEQ5 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 5)
|
||||||
GCCMINORVERSIONGTEQ2 := $(shell expr `$(CC) -dumpversion | cut -f2 -d.` \>= 2)
|
GCCMINORVERSIONGTEQ2 := $(shell expr `$(CC) -dumpversion | cut -f2 -d.` \>= 2)
|
||||||
ifeq ($(GCCVERSIONGTEQ4)$(GCCMINORVERSIONGTEQ2), 11)
|
GCCVERSIONCHECK := $(GCCVERSIONGTEQ5)$(GCCVERSIONGTEQ4)$(GCCMINORVERSIONGTEQ7)
|
||||||
|
ifeq ($(GCCVERSIONCHECK), $(filter $(GCCVERSIONCHECK), 011 110 111))
|
||||||
AVX2OPT = -mavx2
|
AVX2OPT = -mavx2
|
||||||
endif
|
endif
|
||||||
endif
|
endif
|
||||||
|
|
|
@ -513,7 +513,7 @@ static void zgemv_kernel_4x1(BLASLONG n, FLOAT *ap, FLOAT *x, FLOAT *y, FLOAT al
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
static __attribute__((always_inline)) void copy_x(BLASLONG n, FLOAT *src, FLOAT *dest, BLASLONG inc_src) {
|
static __attribute__((always_inline)) inline void copy_x(BLASLONG n, FLOAT *src, FLOAT *dest, BLASLONG inc_src) {
|
||||||
BLASLONG i;
|
BLASLONG i;
|
||||||
for (i = 0; i < n; i++) {
|
for (i = 0; i < n; i++) {
|
||||||
*dest = *src;
|
*dest = *src;
|
||||||
|
|
|
@ -71,7 +71,7 @@ lapack_int LAPACKE_zgesvdq( int matrix_layout, char joba, char jobp,
|
||||||
goto exit_level_0;
|
goto exit_level_0;
|
||||||
}
|
}
|
||||||
liwork = iwork_query;
|
liwork = iwork_query;
|
||||||
lcwork = LAPACK_C2INT(cwork_query);
|
lcwork = LAPACK_Z2INT(cwork_query);
|
||||||
lrwork = (lapack_int)rwork_query;
|
lrwork = (lapack_int)rwork_query;
|
||||||
/* Allocate memory for work arrays */
|
/* Allocate memory for work arrays */
|
||||||
iwork = (lapack_int*)LAPACKE_malloc( sizeof(lapack_int) * liwork );
|
iwork = (lapack_int*)LAPACKE_malloc( sizeof(lapack_int) * liwork );
|
||||||
|
|
Loading…
Reference in New Issue