Merge pull request #3048 from martin-frbg/issue2998
Temporarily revert to the old NRM2 kernels for ThunderX2/3 and NeoverseN1
This commit is contained in:
commit
3559c5d7a2
|
@ -91,10 +91,10 @@ IDAMAXKERNEL = iamax_thunderx2t99.c
|
||||||
ICAMAXKERNEL = izamax_thunderx2t99.c
|
ICAMAXKERNEL = izamax_thunderx2t99.c
|
||||||
IZAMAXKERNEL = izamax_thunderx2t99.c
|
IZAMAXKERNEL = izamax_thunderx2t99.c
|
||||||
|
|
||||||
SNRM2KERNEL = scnrm2_thunderx2t99.c
|
SNRM2KERNEL = nrm2.S
|
||||||
DNRM2KERNEL = dznrm2_thunderx2t99.c
|
DNRM2KERNEL = nrm2.S
|
||||||
CNRM2KERNEL = scnrm2_thunderx2t99.c
|
CNRM2KERNEL = znrm2.S
|
||||||
ZNRM2KERNEL = dznrm2_thunderx2t99.c
|
ZNRM2KERNEL = znrm2.S
|
||||||
|
|
||||||
DDOTKERNEL = dot_thunderx2t99.c
|
DDOTKERNEL = dot_thunderx2t99.c
|
||||||
SDOTKERNEL = dot_thunderx2t99.c
|
SDOTKERNEL = dot_thunderx2t99.c
|
||||||
|
|
|
@ -153,12 +153,12 @@ IDAMAXKERNEL = iamax_thunderx2t99.c
|
||||||
ICAMAXKERNEL = izamax_thunderx2t99.c
|
ICAMAXKERNEL = izamax_thunderx2t99.c
|
||||||
IZAMAXKERNEL = izamax_thunderx2t99.c
|
IZAMAXKERNEL = izamax_thunderx2t99.c
|
||||||
|
|
||||||
SNRM2KERNEL = scnrm2_thunderx2t99.c
|
SNRM2KERNEL = nrm2.S
|
||||||
CNRM2KERNEL = scnrm2_thunderx2t99.c
|
CNRM2KERNEL = nrm2.S
|
||||||
#DNRM2KERNEL = dznrm2_thunderx2t99_fast.c
|
#DNRM2KERNEL = dznrm2_thunderx2t99_fast.c
|
||||||
#ZNRM2KERNEL = dznrm2_thunderx2t99_fast.c
|
#ZNRM2KERNEL = dznrm2_thunderx2t99_fast.c
|
||||||
DNRM2KERNEL = dznrm2_thunderx2t99.c
|
DNRM2KERNEL = znrm2.S
|
||||||
ZNRM2KERNEL = dznrm2_thunderx2t99.c
|
ZNRM2KERNEL = znrm2.S
|
||||||
|
|
||||||
|
|
||||||
DDOTKERNEL = dot_thunderx2t99.c
|
DDOTKERNEL = dot_thunderx2t99.c
|
||||||
|
|
|
@ -153,13 +153,16 @@ IDAMAXKERNEL = iamax_thunderx2t99.c
|
||||||
ICAMAXKERNEL = izamax_thunderx2t99.c
|
ICAMAXKERNEL = izamax_thunderx2t99.c
|
||||||
IZAMAXKERNEL = izamax_thunderx2t99.c
|
IZAMAXKERNEL = izamax_thunderx2t99.c
|
||||||
|
|
||||||
SNRM2KERNEL = scnrm2_thunderx2t99.c
|
#SNRM2KERNEL = scnrm2_thunderx2t99.c
|
||||||
CNRM2KERNEL = scnrm2_thunderx2t99.c
|
#CNRM2KERNEL = scnrm2_thunderx2t99.c
|
||||||
#DNRM2KERNEL = dznrm2_thunderx2t99_fast.c
|
##DNRM2KERNEL = dznrm2_thunderx2t99_fast.c
|
||||||
#ZNRM2KERNEL = dznrm2_thunderx2t99_fast.c
|
##ZNRM2KERNEL = dznrm2_thunderx2t99_fast.c
|
||||||
DNRM2KERNEL = dznrm2_thunderx2t99.c
|
#DNRM2KERNEL = dznrm2_thunderx2t99.c
|
||||||
ZNRM2KERNEL = dznrm2_thunderx2t99.c
|
#ZNRM2KERNEL = dznrm2_thunderx2t99.c
|
||||||
|
SNRM2KERNEL = nrm2.S
|
||||||
|
DNRM2KERNEL = nrm2.S
|
||||||
|
CNRM2KERNEL = znrm2.S
|
||||||
|
ZNRM2KERNEL = znrm2.S
|
||||||
|
|
||||||
DDOTKERNEL = dot_thunderx2t99.c
|
DDOTKERNEL = dot_thunderx2t99.c
|
||||||
SDOTKERNEL = dot_thunderx2t99.c
|
SDOTKERNEL = dot_thunderx2t99.c
|
||||||
|
|
|
@ -13,7 +13,7 @@ static void srot_kernel(BLASLONG n, FLOAT *x, FLOAT *y, FLOAT c, FLOAT s)
|
||||||
{
|
{
|
||||||
BLASLONG i = 0;
|
BLASLONG i = 0;
|
||||||
|
|
||||||
#if V_SIMD && (defined(HAVE_FMA3) || V_SIMD > 128)
|
#if V_SIMD && !defined(C_PGI) && (defined(HAVE_FMA3) || V_SIMD > 128)
|
||||||
const int vstep = v_nlanes_f32;
|
const int vstep = v_nlanes_f32;
|
||||||
const int unrollx4 = n & (-vstep * 4);
|
const int unrollx4 = n & (-vstep * 4);
|
||||||
const int unrollx = n & -vstep;
|
const int unrollx = n & -vstep;
|
||||||
|
|
Loading…
Reference in New Issue