Merge pull request #3048 from martin-frbg/issue2998

Temporarily revert to the old NRM2 kernels for ThunderX2/3 and NeoverseN1
This commit is contained in:
Martin Kroeker 2020-12-21 13:30:08 +01:00 committed by GitHub
commit 3559c5d7a2
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 19 additions and 16 deletions

View File

@ -91,10 +91,10 @@ IDAMAXKERNEL = iamax_thunderx2t99.c
ICAMAXKERNEL = izamax_thunderx2t99.c ICAMAXKERNEL = izamax_thunderx2t99.c
IZAMAXKERNEL = izamax_thunderx2t99.c IZAMAXKERNEL = izamax_thunderx2t99.c
SNRM2KERNEL = scnrm2_thunderx2t99.c SNRM2KERNEL = nrm2.S
DNRM2KERNEL = dznrm2_thunderx2t99.c DNRM2KERNEL = nrm2.S
CNRM2KERNEL = scnrm2_thunderx2t99.c CNRM2KERNEL = znrm2.S
ZNRM2KERNEL = dznrm2_thunderx2t99.c ZNRM2KERNEL = znrm2.S
DDOTKERNEL = dot_thunderx2t99.c DDOTKERNEL = dot_thunderx2t99.c
SDOTKERNEL = dot_thunderx2t99.c SDOTKERNEL = dot_thunderx2t99.c

View File

@ -153,12 +153,12 @@ IDAMAXKERNEL = iamax_thunderx2t99.c
ICAMAXKERNEL = izamax_thunderx2t99.c ICAMAXKERNEL = izamax_thunderx2t99.c
IZAMAXKERNEL = izamax_thunderx2t99.c IZAMAXKERNEL = izamax_thunderx2t99.c
SNRM2KERNEL = scnrm2_thunderx2t99.c SNRM2KERNEL = nrm2.S
CNRM2KERNEL = scnrm2_thunderx2t99.c CNRM2KERNEL = nrm2.S
#DNRM2KERNEL = dznrm2_thunderx2t99_fast.c #DNRM2KERNEL = dznrm2_thunderx2t99_fast.c
#ZNRM2KERNEL = dznrm2_thunderx2t99_fast.c #ZNRM2KERNEL = dznrm2_thunderx2t99_fast.c
DNRM2KERNEL = dznrm2_thunderx2t99.c DNRM2KERNEL = znrm2.S
ZNRM2KERNEL = dznrm2_thunderx2t99.c ZNRM2KERNEL = znrm2.S
DDOTKERNEL = dot_thunderx2t99.c DDOTKERNEL = dot_thunderx2t99.c

View File

@ -153,13 +153,16 @@ IDAMAXKERNEL = iamax_thunderx2t99.c
ICAMAXKERNEL = izamax_thunderx2t99.c ICAMAXKERNEL = izamax_thunderx2t99.c
IZAMAXKERNEL = izamax_thunderx2t99.c IZAMAXKERNEL = izamax_thunderx2t99.c
SNRM2KERNEL = scnrm2_thunderx2t99.c #SNRM2KERNEL = scnrm2_thunderx2t99.c
CNRM2KERNEL = scnrm2_thunderx2t99.c #CNRM2KERNEL = scnrm2_thunderx2t99.c
#DNRM2KERNEL = dznrm2_thunderx2t99_fast.c ##DNRM2KERNEL = dznrm2_thunderx2t99_fast.c
#ZNRM2KERNEL = dznrm2_thunderx2t99_fast.c ##ZNRM2KERNEL = dznrm2_thunderx2t99_fast.c
DNRM2KERNEL = dznrm2_thunderx2t99.c #DNRM2KERNEL = dznrm2_thunderx2t99.c
ZNRM2KERNEL = dznrm2_thunderx2t99.c #ZNRM2KERNEL = dznrm2_thunderx2t99.c
SNRM2KERNEL = nrm2.S
DNRM2KERNEL = nrm2.S
CNRM2KERNEL = znrm2.S
ZNRM2KERNEL = znrm2.S
DDOTKERNEL = dot_thunderx2t99.c DDOTKERNEL = dot_thunderx2t99.c
SDOTKERNEL = dot_thunderx2t99.c SDOTKERNEL = dot_thunderx2t99.c

View File

@ -13,7 +13,7 @@ static void srot_kernel(BLASLONG n, FLOAT *x, FLOAT *y, FLOAT c, FLOAT s)
{ {
BLASLONG i = 0; BLASLONG i = 0;
#if V_SIMD && (defined(HAVE_FMA3) || V_SIMD > 128) #if V_SIMD && !defined(C_PGI) && (defined(HAVE_FMA3) || V_SIMD > 128)
const int vstep = v_nlanes_f32; const int vstep = v_nlanes_f32;
const int unrollx4 = n & (-vstep * 4); const int unrollx4 = n & (-vstep * 4);
const int unrollx = n & -vstep; const int unrollx = n & -vstep;