Merge pull request #3048 from martin-frbg/issue2998
Temporarily revert to the old NRM2 kernels for ThunderX2/3 and NeoverseN1
This commit is contained in:
commit
3559c5d7a2
|
@ -91,10 +91,10 @@ IDAMAXKERNEL = iamax_thunderx2t99.c
|
|||
ICAMAXKERNEL = izamax_thunderx2t99.c
|
||||
IZAMAXKERNEL = izamax_thunderx2t99.c
|
||||
|
||||
SNRM2KERNEL = scnrm2_thunderx2t99.c
|
||||
DNRM2KERNEL = dznrm2_thunderx2t99.c
|
||||
CNRM2KERNEL = scnrm2_thunderx2t99.c
|
||||
ZNRM2KERNEL = dznrm2_thunderx2t99.c
|
||||
SNRM2KERNEL = nrm2.S
|
||||
DNRM2KERNEL = nrm2.S
|
||||
CNRM2KERNEL = znrm2.S
|
||||
ZNRM2KERNEL = znrm2.S
|
||||
|
||||
DDOTKERNEL = dot_thunderx2t99.c
|
||||
SDOTKERNEL = dot_thunderx2t99.c
|
||||
|
|
|
@ -153,12 +153,12 @@ IDAMAXKERNEL = iamax_thunderx2t99.c
|
|||
ICAMAXKERNEL = izamax_thunderx2t99.c
|
||||
IZAMAXKERNEL = izamax_thunderx2t99.c
|
||||
|
||||
SNRM2KERNEL = scnrm2_thunderx2t99.c
|
||||
CNRM2KERNEL = scnrm2_thunderx2t99.c
|
||||
SNRM2KERNEL = nrm2.S
|
||||
CNRM2KERNEL = nrm2.S
|
||||
#DNRM2KERNEL = dznrm2_thunderx2t99_fast.c
|
||||
#ZNRM2KERNEL = dznrm2_thunderx2t99_fast.c
|
||||
DNRM2KERNEL = dznrm2_thunderx2t99.c
|
||||
ZNRM2KERNEL = dznrm2_thunderx2t99.c
|
||||
DNRM2KERNEL = znrm2.S
|
||||
ZNRM2KERNEL = znrm2.S
|
||||
|
||||
|
||||
DDOTKERNEL = dot_thunderx2t99.c
|
||||
|
|
|
@ -153,13 +153,16 @@ IDAMAXKERNEL = iamax_thunderx2t99.c
|
|||
ICAMAXKERNEL = izamax_thunderx2t99.c
|
||||
IZAMAXKERNEL = izamax_thunderx2t99.c
|
||||
|
||||
SNRM2KERNEL = scnrm2_thunderx2t99.c
|
||||
CNRM2KERNEL = scnrm2_thunderx2t99.c
|
||||
#DNRM2KERNEL = dznrm2_thunderx2t99_fast.c
|
||||
#ZNRM2KERNEL = dznrm2_thunderx2t99_fast.c
|
||||
DNRM2KERNEL = dznrm2_thunderx2t99.c
|
||||
ZNRM2KERNEL = dznrm2_thunderx2t99.c
|
||||
|
||||
#SNRM2KERNEL = scnrm2_thunderx2t99.c
|
||||
#CNRM2KERNEL = scnrm2_thunderx2t99.c
|
||||
##DNRM2KERNEL = dznrm2_thunderx2t99_fast.c
|
||||
##ZNRM2KERNEL = dznrm2_thunderx2t99_fast.c
|
||||
#DNRM2KERNEL = dznrm2_thunderx2t99.c
|
||||
#ZNRM2KERNEL = dznrm2_thunderx2t99.c
|
||||
SNRM2KERNEL = nrm2.S
|
||||
DNRM2KERNEL = nrm2.S
|
||||
CNRM2KERNEL = znrm2.S
|
||||
ZNRM2KERNEL = znrm2.S
|
||||
|
||||
DDOTKERNEL = dot_thunderx2t99.c
|
||||
SDOTKERNEL = dot_thunderx2t99.c
|
||||
|
|
|
@ -13,7 +13,7 @@ static void srot_kernel(BLASLONG n, FLOAT *x, FLOAT *y, FLOAT c, FLOAT s)
|
|||
{
|
||||
BLASLONG i = 0;
|
||||
|
||||
#if V_SIMD && (defined(HAVE_FMA3) || V_SIMD > 128)
|
||||
#if V_SIMD && !defined(C_PGI) && (defined(HAVE_FMA3) || V_SIMD > 128)
|
||||
const int vstep = v_nlanes_f32;
|
||||
const int unrollx4 = n & (-vstep * 4);
|
||||
const int unrollx = n & -vstep;
|
||||
|
|
Loading…
Reference in New Issue