From f95031204ee88a8976bc377e524abd09a8b5cac3 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Thu, 22 Oct 2020 16:19:26 +0200 Subject: [PATCH 1/6] Fix macro used in argument conversion (LAPACK PR 458) --- lapack-netlib/LAPACKE/src/lapacke_zgesvdq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lapack-netlib/LAPACKE/src/lapacke_zgesvdq.c b/lapack-netlib/LAPACKE/src/lapacke_zgesvdq.c index f58a5c4e9..4928b1bc0 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zgesvdq.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zgesvdq.c @@ -71,7 +71,7 @@ lapack_int LAPACKE_zgesvdq( int matrix_layout, char joba, char jobp, goto exit_level_0; } liwork = iwork_query; - lcwork = LAPACK_C2INT(cwork_query); + lcwork = LAPACK_Z2INT(cwork_query); lrwork = (lapack_int)rwork_query; /* Allocate memory for work arrays */ iwork = (lapack_int*)LAPACKE_malloc( sizeof(lapack_int) * liwork ); From 2e48d560bad5400f9a33a643f504a6eb707621f9 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Thu, 22 Oct 2020 16:23:29 +0200 Subject: [PATCH 2/6] Fix compiler version check --- kernel/Makefile | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/kernel/Makefile b/kernel/Makefile index 43318d475..e52781c6d 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -22,20 +22,25 @@ ifeq ($(C_COMPILER), CLANG) override CFLAGS += -fno-integrated-as endif endif + AVX2OPT = ifeq ($(C_COMPILER), GCC) # AVX2 support was added in 4.7.0 - GCCVERSIONGTEQ4 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 4) - GCCMINORVERSIONGTEQ7 := $(shell expr `$(CC) -dumpversion | cut -f2 -d.` \>= 7) - ifeq ($(GCCVERSIONGTEQ4)$(GCCMINORVERSIONGTEQ7), 11) +GCCVERSIONGTEQ4 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 4) +GCCVERSIONGTEQ5 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 5) +GCCMINORVERSIONGTEQ7 := $(shell expr `$(CC) -dumpversion | cut -f2 -d.` \>= 7) +GCCVERSIONCHECK := $(GCCVERSIONGTEQ5)$(GCCVERSIONGTEQ4)$(GCCMINORVERSIONGTEQ7) +ifeq ($(GCCVERSIONCHECK), $(filter $(GCCVERSIONCHECK), 011 110 111)) AVX2OPT = -mavx2 endif endif ifeq ($(C_COMPILER), CLANG) # Any clang posing as gcc 4.2 should be new enough (3.4 or later) GCCVERSIONGTEQ4 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 4) + GCCVERSIONGTEQ5 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 5) GCCMINORVERSIONGTEQ2 := $(shell expr `$(CC) -dumpversion | cut -f2 -d.` \>= 2) - ifeq ($(GCCVERSIONGTEQ4)$(GCCMINORVERSIONGTEQ2), 11) + GCCVERSIONCHECK := $(GCCVERSIONGTEQ5)$(GCCVERSIONGTEQ4)$(GCCMINORVERSIONGTEQ7) + ifeq ($(GCCVERSIONCHECK), $(filter $(GCCVERSIONCHECK), 011 110 111)) AVX2OPT = -mavx2 endif endif From ee90f30384875b82f84ea8f5c9284d64af247054 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Thu, 22 Oct 2020 18:47:07 +0200 Subject: [PATCH 3/6] Increase BUFFERSIZE for POWER8-10 and use same value for POWER6 to fix overflow warning for PWR8 ZGEMM and PWR9 C/ZGEMM and avoid size mismatches in DYNAMIC_ARCH --- common_power.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/common_power.h b/common_power.h index e0685f760..0f1001cb6 100644 --- a/common_power.h +++ b/common_power.h @@ -844,8 +844,8 @@ Lmcount$lazy_ptr: #define BUFFER_SIZE ( 2 << 20) #elif defined(PPC440FP2) #define BUFFER_SIZE ( 16 << 20) -#elif defined(POWER8) || defined(POWER9) || defined(POWER10) -#define BUFFER_SIZE ( 64 << 20) +#elif defined(POWER6) || defined(POWER8) || defined(POWER9) || defined(POWER10) +#define BUFFER_SIZE ( 32 << 22) #else #define BUFFER_SIZE ( 16 << 20) #endif From 34c3c407efaaf9770f75f0b9bf8846d91ea3283b Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Thu, 22 Oct 2020 22:14:26 +0200 Subject: [PATCH 4/6] label always_inline function as inline to silence a gcc warning --- kernel/power/zgemv_t_4.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/power/zgemv_t_4.c b/kernel/power/zgemv_t_4.c index 4ed27d96b..956d75ffc 100644 --- a/kernel/power/zgemv_t_4.c +++ b/kernel/power/zgemv_t_4.c @@ -513,7 +513,7 @@ static void zgemv_kernel_4x1(BLASLONG n, FLOAT *ap, FLOAT *x, FLOAT *y, FLOAT al #endif -static __attribute__((always_inline)) void copy_x(BLASLONG n, FLOAT *src, FLOAT *dest, BLASLONG inc_src) { +static __attribute__((always_inline)) inline void copy_x(BLASLONG n, FLOAT *src, FLOAT *dest, BLASLONG inc_src) { BLASLONG i; for (i = 0; i < n; i++) { *dest = *src; From 1d4c96fa0c3506d7bfee45463b17ee2dbb3db3d5 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Fri, 23 Oct 2020 00:12:06 +0200 Subject: [PATCH 5/6] Increase BUFFERSIZE further --- common_power.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/common_power.h b/common_power.h index 0f1001cb6..a61e4e28a 100644 --- a/common_power.h +++ b/common_power.h @@ -845,7 +845,7 @@ Lmcount$lazy_ptr: #elif defined(PPC440FP2) #define BUFFER_SIZE ( 16 << 20) #elif defined(POWER6) || defined(POWER8) || defined(POWER9) || defined(POWER10) -#define BUFFER_SIZE ( 32 << 22) +#define BUFFER_SIZE ( 64 << 22) #else #define BUFFER_SIZE ( 16 << 20) #endif From b23cb0523174bbd8bff06ca37be947140c9bfd9f Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Fri, 23 Oct 2020 00:18:29 +0200 Subject: [PATCH 6/6] Fix twisted spelling that broke the gfortran version test again --- Makefile.x86_64 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile.x86_64 b/Makefile.x86_64 index 117347c01..a849f0b01 100644 --- a/Makefile.x86_64 +++ b/Makefile.x86_64 @@ -90,7 +90,7 @@ ifeq ($(F_COMPILER), GFORTRAN) GCCVERSIONGTEQ4 := $(shell expr `$(FC) -dumpversion | cut -f1 -d.` \>= 4) GCCVERSIONGTEQ5 := $(shell expr `$(FC) -dumpversion | cut -f1 -d.` \>= 5) GCCMINORVERSIONGTEQ7 := $(shell expr `$(FC) -dumpversion | cut -f2 -d.` \>= 7) -GCCVERSIONCHECK := $(GCCVERSIONGTEQ5)$(GCCVERSIONGTEQ4)$(GCCVERSIONMINORGTEQ7) +GCCVERSIONCHECK := $(GCCVERSIONGTEQ5)$(GCCVERSIONGTEQ4)$(GCCMINORVERSIONGTEQ7) ifeq ($(GCCVERSIONCHECK), $(filter $(GCCVERSIONCHECK), 011 110 111)) FCOMMON_OPT += -mavx2 endif