From ee90f30384875b82f84ea8f5c9284d64af247054 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Thu, 22 Oct 2020 18:47:07 +0200 Subject: [PATCH 1/3] Increase BUFFERSIZE for POWER8-10 and use same value for POWER6 to fix overflow warning for PWR8 ZGEMM and PWR9 C/ZGEMM and avoid size mismatches in DYNAMIC_ARCH --- common_power.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/common_power.h b/common_power.h index e0685f760..0f1001cb6 100644 --- a/common_power.h +++ b/common_power.h @@ -844,8 +844,8 @@ Lmcount$lazy_ptr: #define BUFFER_SIZE ( 2 << 20) #elif defined(PPC440FP2) #define BUFFER_SIZE ( 16 << 20) -#elif defined(POWER8) || defined(POWER9) || defined(POWER10) -#define BUFFER_SIZE ( 64 << 20) +#elif defined(POWER6) || defined(POWER8) || defined(POWER9) || defined(POWER10) +#define BUFFER_SIZE ( 32 << 22) #else #define BUFFER_SIZE ( 16 << 20) #endif From 34c3c407efaaf9770f75f0b9bf8846d91ea3283b Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Thu, 22 Oct 2020 22:14:26 +0200 Subject: [PATCH 2/3] label always_inline function as inline to silence a gcc warning --- kernel/power/zgemv_t_4.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/power/zgemv_t_4.c b/kernel/power/zgemv_t_4.c index 4ed27d96b..956d75ffc 100644 --- a/kernel/power/zgemv_t_4.c +++ b/kernel/power/zgemv_t_4.c @@ -513,7 +513,7 @@ static void zgemv_kernel_4x1(BLASLONG n, FLOAT *ap, FLOAT *x, FLOAT *y, FLOAT al #endif -static __attribute__((always_inline)) void copy_x(BLASLONG n, FLOAT *src, FLOAT *dest, BLASLONG inc_src) { +static __attribute__((always_inline)) inline void copy_x(BLASLONG n, FLOAT *src, FLOAT *dest, BLASLONG inc_src) { BLASLONG i; for (i = 0; i < n; i++) { *dest = *src; From 1d4c96fa0c3506d7bfee45463b17ee2dbb3db3d5 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Fri, 23 Oct 2020 00:12:06 +0200 Subject: [PATCH 3/3] Increase BUFFERSIZE further --- common_power.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/common_power.h b/common_power.h index 0f1001cb6..a61e4e28a 100644 --- a/common_power.h +++ b/common_power.h @@ -845,7 +845,7 @@ Lmcount$lazy_ptr: #elif defined(PPC440FP2) #define BUFFER_SIZE ( 16 << 20) #elif defined(POWER6) || defined(POWER8) || defined(POWER9) || defined(POWER10) -#define BUFFER_SIZE ( 32 << 22) +#define BUFFER_SIZE ( 64 << 22) #else #define BUFFER_SIZE ( 16 << 20) #endif