diff --git a/Makefile.x86_64 b/Makefile.x86_64 index 117347c01..a849f0b01 100644 --- a/Makefile.x86_64 +++ b/Makefile.x86_64 @@ -90,7 +90,7 @@ ifeq ($(F_COMPILER), GFORTRAN) GCCVERSIONGTEQ4 := $(shell expr `$(FC) -dumpversion | cut -f1 -d.` \>= 4) GCCVERSIONGTEQ5 := $(shell expr `$(FC) -dumpversion | cut -f1 -d.` \>= 5) GCCMINORVERSIONGTEQ7 := $(shell expr `$(FC) -dumpversion | cut -f2 -d.` \>= 7) -GCCVERSIONCHECK := $(GCCVERSIONGTEQ5)$(GCCVERSIONGTEQ4)$(GCCVERSIONMINORGTEQ7) +GCCVERSIONCHECK := $(GCCVERSIONGTEQ5)$(GCCVERSIONGTEQ4)$(GCCMINORVERSIONGTEQ7) ifeq ($(GCCVERSIONCHECK), $(filter $(GCCVERSIONCHECK), 011 110 111)) FCOMMON_OPT += -mavx2 endif diff --git a/common_power.h b/common_power.h index e0685f760..a61e4e28a 100644 --- a/common_power.h +++ b/common_power.h @@ -844,8 +844,8 @@ Lmcount$lazy_ptr: #define BUFFER_SIZE ( 2 << 20) #elif defined(PPC440FP2) #define BUFFER_SIZE ( 16 << 20) -#elif defined(POWER8) || defined(POWER9) || defined(POWER10) -#define BUFFER_SIZE ( 64 << 20) +#elif defined(POWER6) || defined(POWER8) || defined(POWER9) || defined(POWER10) +#define BUFFER_SIZE ( 64 << 22) #else #define BUFFER_SIZE ( 16 << 20) #endif diff --git a/kernel/Makefile b/kernel/Makefile index 43318d475..e52781c6d 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -22,20 +22,25 @@ ifeq ($(C_COMPILER), CLANG) override CFLAGS += -fno-integrated-as endif endif + AVX2OPT = ifeq ($(C_COMPILER), GCC) # AVX2 support was added in 4.7.0 - GCCVERSIONGTEQ4 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 4) - GCCMINORVERSIONGTEQ7 := $(shell expr `$(CC) -dumpversion | cut -f2 -d.` \>= 7) - ifeq ($(GCCVERSIONGTEQ4)$(GCCMINORVERSIONGTEQ7), 11) +GCCVERSIONGTEQ4 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 4) +GCCVERSIONGTEQ5 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 5) +GCCMINORVERSIONGTEQ7 := $(shell expr `$(CC) -dumpversion | cut -f2 -d.` \>= 7) +GCCVERSIONCHECK := $(GCCVERSIONGTEQ5)$(GCCVERSIONGTEQ4)$(GCCMINORVERSIONGTEQ7) +ifeq ($(GCCVERSIONCHECK), $(filter $(GCCVERSIONCHECK), 011 110 111)) AVX2OPT = -mavx2 endif endif ifeq ($(C_COMPILER), CLANG) # Any clang posing as gcc 4.2 should be new enough (3.4 or later) GCCVERSIONGTEQ4 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 4) + GCCVERSIONGTEQ5 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 5) GCCMINORVERSIONGTEQ2 := $(shell expr `$(CC) -dumpversion | cut -f2 -d.` \>= 2) - ifeq ($(GCCVERSIONGTEQ4)$(GCCMINORVERSIONGTEQ2), 11) + GCCVERSIONCHECK := $(GCCVERSIONGTEQ5)$(GCCVERSIONGTEQ4)$(GCCMINORVERSIONGTEQ7) + ifeq ($(GCCVERSIONCHECK), $(filter $(GCCVERSIONCHECK), 011 110 111)) AVX2OPT = -mavx2 endif endif diff --git a/kernel/power/zgemv_t_4.c b/kernel/power/zgemv_t_4.c index 4ed27d96b..956d75ffc 100644 --- a/kernel/power/zgemv_t_4.c +++ b/kernel/power/zgemv_t_4.c @@ -513,7 +513,7 @@ static void zgemv_kernel_4x1(BLASLONG n, FLOAT *ap, FLOAT *x, FLOAT *y, FLOAT al #endif -static __attribute__((always_inline)) void copy_x(BLASLONG n, FLOAT *src, FLOAT *dest, BLASLONG inc_src) { +static __attribute__((always_inline)) inline void copy_x(BLASLONG n, FLOAT *src, FLOAT *dest, BLASLONG inc_src) { BLASLONG i; for (i = 0; i < n; i++) { *dest = *src; diff --git a/lapack-netlib/LAPACKE/src/lapacke_zgesvdq.c b/lapack-netlib/LAPACKE/src/lapacke_zgesvdq.c index f58a5c4e9..4928b1bc0 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zgesvdq.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zgesvdq.c @@ -71,7 +71,7 @@ lapack_int LAPACKE_zgesvdq( int matrix_layout, char joba, char jobp, goto exit_level_0; } liwork = iwork_query; - lcwork = LAPACK_C2INT(cwork_query); + lcwork = LAPACK_Z2INT(cwork_query); lrwork = (lapack_int)rwork_query; /* Allocate memory for work arrays */ iwork = (lapack_int*)LAPACKE_malloc( sizeof(lapack_int) * liwork );