From 114eb159a4b0d83a76ab837952516e7fadc21a30 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Sat, 19 Dec 2020 22:15:58 +0100 Subject: [PATCH 1/4] Disable FMA intrinsics in the srot kernel when the compiler is PGI/NVIDIA --- kernel/x86_64/srot.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/x86_64/srot.c b/kernel/x86_64/srot.c index 3de586cb8..3264d251a 100644 --- a/kernel/x86_64/srot.c +++ b/kernel/x86_64/srot.c @@ -13,7 +13,7 @@ static void srot_kernel(BLASLONG n, FLOAT *x, FLOAT *y, FLOAT c, FLOAT s) { BLASLONG i = 0; -#if V_SIMD && (defined(HAVE_FMA3) || V_SIMD > 128) +#if V_SIMD && !defined(C_PGI) && (defined(HAVE_FMA3) || V_SIMD > 128) const int vstep = v_nlanes_f32; const int unrollx4 = n & (-vstep * 4); const int unrollx = n & -vstep; From 6f4698ee1fda9b569ed51c214dc51aed4774b21a Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Mon, 21 Dec 2020 07:41:18 +0100 Subject: [PATCH 2/4] Temporarily revert to the old nrm2 kernel --- kernel/arm64/KERNEL.NEOVERSEN1 | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/kernel/arm64/KERNEL.NEOVERSEN1 b/kernel/arm64/KERNEL.NEOVERSEN1 index ea010db42..074d72153 100644 --- a/kernel/arm64/KERNEL.NEOVERSEN1 +++ b/kernel/arm64/KERNEL.NEOVERSEN1 @@ -91,10 +91,10 @@ IDAMAXKERNEL = iamax_thunderx2t99.c ICAMAXKERNEL = izamax_thunderx2t99.c IZAMAXKERNEL = izamax_thunderx2t99.c -SNRM2KERNEL = scnrm2_thunderx2t99.c -DNRM2KERNEL = dznrm2_thunderx2t99.c -CNRM2KERNEL = scnrm2_thunderx2t99.c -ZNRM2KERNEL = dznrm2_thunderx2t99.c +SNRM2KERNEL = nrm2.S +DNRM2KERNEL = nrm2.S +CNRM2KERNEL = znrm2.S +ZNRM2KERNEL = znrm2.S DDOTKERNEL = dot_thunderx2t99.c SDOTKERNEL = dot_thunderx2t99.c From 2768bc1764fe61fcebb6a0e5f906811f7460ed07 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Mon, 21 Dec 2020 07:42:51 +0100 Subject: [PATCH 3/4] Temporarily revert to the old nrm2 kernels --- kernel/arm64/KERNEL.THUNDERX2T99 | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/kernel/arm64/KERNEL.THUNDERX2T99 b/kernel/arm64/KERNEL.THUNDERX2T99 index a20d0d4a6..8333f60e6 100644 --- a/kernel/arm64/KERNEL.THUNDERX2T99 +++ b/kernel/arm64/KERNEL.THUNDERX2T99 @@ -153,12 +153,12 @@ IDAMAXKERNEL = iamax_thunderx2t99.c ICAMAXKERNEL = izamax_thunderx2t99.c IZAMAXKERNEL = izamax_thunderx2t99.c -SNRM2KERNEL = scnrm2_thunderx2t99.c -CNRM2KERNEL = scnrm2_thunderx2t99.c +SNRM2KERNEL = nrm2.S +CNRM2KERNEL = nrm2.S #DNRM2KERNEL = dznrm2_thunderx2t99_fast.c #ZNRM2KERNEL = dznrm2_thunderx2t99_fast.c -DNRM2KERNEL = dznrm2_thunderx2t99.c -ZNRM2KERNEL = dznrm2_thunderx2t99.c +DNRM2KERNEL = znrm2.S +ZNRM2KERNEL = znrm2.S DDOTKERNEL = dot_thunderx2t99.c From 8631e2976a01d074b207db0c58618c01c9998d35 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Mon, 21 Dec 2020 07:45:13 +0100 Subject: [PATCH 4/4] Temporarily revert to the old nrm2 kernels --- kernel/arm64/KERNEL.THUNDERX3T110 | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/kernel/arm64/KERNEL.THUNDERX3T110 b/kernel/arm64/KERNEL.THUNDERX3T110 index a20d0d4a6..4cdd8769f 100644 --- a/kernel/arm64/KERNEL.THUNDERX3T110 +++ b/kernel/arm64/KERNEL.THUNDERX3T110 @@ -153,13 +153,16 @@ IDAMAXKERNEL = iamax_thunderx2t99.c ICAMAXKERNEL = izamax_thunderx2t99.c IZAMAXKERNEL = izamax_thunderx2t99.c -SNRM2KERNEL = scnrm2_thunderx2t99.c -CNRM2KERNEL = scnrm2_thunderx2t99.c -#DNRM2KERNEL = dznrm2_thunderx2t99_fast.c -#ZNRM2KERNEL = dznrm2_thunderx2t99_fast.c -DNRM2KERNEL = dznrm2_thunderx2t99.c -ZNRM2KERNEL = dznrm2_thunderx2t99.c - +#SNRM2KERNEL = scnrm2_thunderx2t99.c +#CNRM2KERNEL = scnrm2_thunderx2t99.c +##DNRM2KERNEL = dznrm2_thunderx2t99_fast.c +##ZNRM2KERNEL = dznrm2_thunderx2t99_fast.c +#DNRM2KERNEL = dznrm2_thunderx2t99.c +#ZNRM2KERNEL = dznrm2_thunderx2t99.c +SNRM2KERNEL = nrm2.S +DNRM2KERNEL = nrm2.S +CNRM2KERNEL = znrm2.S +ZNRM2KERNEL = znrm2.S DDOTKERNEL = dot_thunderx2t99.c SDOTKERNEL = dot_thunderx2t99.c