From 3fd6ccdf7610014c11f4f5e82c3f9ce16a0945ce Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Thu, 18 Mar 2021 07:50:19 +0100 Subject: [PATCH 1/7] Include just the definition of BLASLONG rather than all of common.h --- getarch_2nd.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/getarch_2nd.c b/getarch_2nd.c index 53ecccf30..dd1f83089 100644 --- a/getarch_2nd.c +++ b/getarch_2nd.c @@ -4,7 +4,15 @@ #else #include "config_kernel.h" #endif -#include "common.h" +#if (defined(__WIN32__) || defined(__WIN64__) || defined(__CYGWIN32__) || defined(__CYGWIN64__) || defined(_WIN32) || defined(_WIN64)) && defined(__64BIT__) +typedef long long BLASLONG; +typedef unsigned long long BLASULONG; +#else +typedef long BLASLONG; +typedef unsigned long BLASULONG; +#endif + +#include "param.h" int main(int argc, char **argv) { From 7b294a99fde37f9657cdd7a261318e97e36fe351 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Thu, 18 Mar 2021 21:28:19 +0100 Subject: [PATCH 2/7] Move common.h back to the top of the file so that SKYLAKEX (from config.h) is defined in time --- kernel/x86_64/sgemm_direct_skylakex.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/kernel/x86_64/sgemm_direct_skylakex.c b/kernel/x86_64/sgemm_direct_skylakex.c index cc2ac5553..2588289d1 100644 --- a/kernel/x86_64/sgemm_direct_skylakex.c +++ b/kernel/x86_64/sgemm_direct_skylakex.c @@ -1,10 +1,10 @@ /* the direct sgemm code written by Arjan van der Ven */ - +#include "common.h" #if defined(SKYLAKEX) || defined (COOPERLAKE) #include -#include "common.h" + /* * "Direct sgemm" code. This code operates directly on the inputs and outputs @@ -472,7 +472,7 @@ void CNAME (BLASLONG M, BLASLONG N, BLASLONG K, float * __restrict A, BLASLONG s } } #else -#include "common.h" + void CNAME (BLASLONG M, BLASLONG N, BLASLONG K, float * __restrict A, BLASLONG strideA, float * __restrict B, BLASLONG strideB , float * __restrict R, BLASLONG strideR) {} #endif From 0f5e86a0d99d1432bf1f5919992f395147d4f72c Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Thu, 18 Mar 2021 21:53:50 +0100 Subject: [PATCH 3/7] Remove premature entry for DOMATCOPY_RT --- kernel/x86_64/KERNEL | 1 - 1 file changed, 1 deletion(-) diff --git a/kernel/x86_64/KERNEL b/kernel/x86_64/KERNEL index 5da79cc3f..bea7036c2 100644 --- a/kernel/x86_64/KERNEL +++ b/kernel/x86_64/KERNEL @@ -491,4 +491,3 @@ SSUMKERNEL = ../arm/sum.c DSUMKERNEL = ../arm/sum.c SOMATCOPY_RT = omatcopy_rt.c -DOMATCOPY_RT = omatcopy_rt.c From d3555d2e505acf3b1a4f059d0a78177d8eb56a18 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Fri, 19 Mar 2021 11:44:31 +0100 Subject: [PATCH 4/7] Add workaround for LAPACK test failures with the NVIDIA HPC compiler --- kernel/power/KERNEL.POWER8 | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/kernel/power/KERNEL.POWER8 b/kernel/power/KERNEL.POWER8 index c2f4cd204..2b8e65948 100644 --- a/kernel/power/KERNEL.POWER8 +++ b/kernel/power/KERNEL.POWER8 @@ -242,8 +242,13 @@ ZROTKERNEL = zrot.c # SSCALKERNEL = sscal.c DSCALKERNEL = dscal.c +ifeq ($(C_COMPILER), PGI) +CSCALKERNEL = ../arm/zscal.c +ZSCALKERNEL = ../arm/zscal.c +else CSCALKERNEL = zscal.c ZSCALKERNEL = zscal.c +endif # SSWAPKERNEL = sswap.c DSWAPKERNEL = dswap.c From ef85c2247419647212de39228433e292a8a18625 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Fri, 19 Mar 2021 11:46:25 +0100 Subject: [PATCH 5/7] Add workaround for LAPACK test failures with the NVIDIA HPC compiler --- kernel/power/KERNEL.POWER9 | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/kernel/power/KERNEL.POWER9 b/kernel/power/KERNEL.POWER9 index 2bd2516de..b6b102b3e 100644 --- a/kernel/power/KERNEL.POWER9 +++ b/kernel/power/KERNEL.POWER9 @@ -166,8 +166,13 @@ ZROTKERNEL = zrot.c # SSCALKERNEL = sscal.c DSCALKERNEL = dscal.c +ifeq ($(C_COMPILER), PGI) +CSCALKERNEL = ../arm/zscal.c +ZSCALKERNEL = ../arm/zscal.c +else CSCALKERNEL = zscal.c ZSCALKERNEL = zscal.c +endif # SSWAPKERNEL = sswap.c DSWAPKERNEL = dswap.c From 86c5a0013fdbd87832cdd5f0a3446aac0aa43804 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Fri, 19 Mar 2021 11:47:58 +0100 Subject: [PATCH 6/7] Add workaround for LAPACK testsuite failures with the NVIDIA HPC compiler --- kernel/power/KERNEL.POWER10 | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/kernel/power/KERNEL.POWER10 b/kernel/power/KERNEL.POWER10 index 1cf7b0b7c..594b1a35a 100644 --- a/kernel/power/KERNEL.POWER10 +++ b/kernel/power/KERNEL.POWER10 @@ -169,8 +169,13 @@ ZROTKERNEL = zrot.c # SSCALKERNEL = sscal.c DSCALKERNEL = dscal.c +ifeq ($(C_COMPILER), PGI) +CSCALKERNEL = ../arm/zscal.c +ZSCALKERNEL = ../arm/zscal.c +else CSCALKERNEL = zscal.c ZSCALKERNEL = zscal.c +endif # SSWAPKERNEL = sswap.c DSWAPKERNEL = dswap.c From 198adea9611e5b384166fea9ce607fb1cbcad792 Mon Sep 17 00:00:00 2001 From: Gordon Fossum Date: Fri, 19 Mar 2021 10:05:23 -0400 Subject: [PATCH 7/7] Changed default P/Q values for CGEMM and ZGEMM (Power10 only) --- param.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/param.h b/param.h index c41f75ec9..a35ce69bd 100644 --- a/param.h +++ b/param.h @@ -2466,13 +2466,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define SGEMM_DEFAULT_P 512 #define DGEMM_DEFAULT_P 384 -#define CGEMM_DEFAULT_P 512 +#define CGEMM_DEFAULT_P 512 #define ZGEMM_DEFAULT_P 256 #define SGEMM_DEFAULT_Q 512 #define DGEMM_DEFAULT_Q 512 -#define CGEMM_DEFAULT_Q 1026 -#define ZGEMM_DEFAULT_Q 1026 +#define CGEMM_DEFAULT_Q 384 +#define ZGEMM_DEFAULT_Q 384 #define SGEMM_DEFAULT_R 4096 #define DGEMM_DEFAULT_R 4096