Fix casum fallback kernel.
This kernel is only used on Skylake+ if the kernel with AVX512 intrinsics can't be used, but used the variable x1 incorrectly in the tail end of the loop, as it is still at the initial value instead of where x points to. This caused 55 "other error"s in the LAPACK tests (https://github.com/OpenMathLib/OpenBLAS/issues/4282) This change makes casum.c as similar as possible as zasum.c, because zasum.c does this correctly.
This commit is contained in:
parent
cb2950709f
commit
f8ad5344c2
|
@ -9,12 +9,12 @@
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifndef HAVE_CASUM_KERNEL
|
#ifndef HAVE_CASUM_KERNEL
|
||||||
static FLOAT casum_kernel(BLASLONG n, FLOAT *x1)
|
static FLOAT casum_kernel(BLASLONG n, FLOAT *x)
|
||||||
{
|
{
|
||||||
|
|
||||||
BLASLONG i=0;
|
BLASLONG i=0;
|
||||||
BLASLONG n_8 = n & -8;
|
BLASLONG n_8 = n & -8;
|
||||||
FLOAT *x = x1;
|
FLOAT *x1 = x;
|
||||||
FLOAT temp0, temp1, temp2, temp3;
|
FLOAT temp0, temp1, temp2, temp3;
|
||||||
FLOAT temp4, temp5, temp6, temp7;
|
FLOAT temp4, temp5, temp6, temp7;
|
||||||
FLOAT sum0 = 0.0;
|
FLOAT sum0 = 0.0;
|
||||||
|
@ -24,14 +24,14 @@ static FLOAT casum_kernel(BLASLONG n, FLOAT *x1)
|
||||||
FLOAT sum4 = 0.0;
|
FLOAT sum4 = 0.0;
|
||||||
|
|
||||||
while (i < n_8) {
|
while (i < n_8) {
|
||||||
temp0 = ABS_K(x[0]);
|
temp0 = ABS_K(x1[0]);
|
||||||
temp1 = ABS_K(x[1]);
|
temp1 = ABS_K(x1[1]);
|
||||||
temp2 = ABS_K(x[2]);
|
temp2 = ABS_K(x1[2]);
|
||||||
temp3 = ABS_K(x[3]);
|
temp3 = ABS_K(x1[3]);
|
||||||
temp4 = ABS_K(x[4]);
|
temp4 = ABS_K(x1[4]);
|
||||||
temp5 = ABS_K(x[5]);
|
temp5 = ABS_K(x1[5]);
|
||||||
temp6 = ABS_K(x[6]);
|
temp6 = ABS_K(x1[6]);
|
||||||
temp7 = ABS_K(x[7]);
|
temp7 = ABS_K(x1[7]);
|
||||||
|
|
||||||
sum0 += temp0;
|
sum0 += temp0;
|
||||||
sum1 += temp1;
|
sum1 += temp1;
|
||||||
|
@ -43,12 +43,12 @@ static FLOAT casum_kernel(BLASLONG n, FLOAT *x1)
|
||||||
sum2 += temp6;
|
sum2 += temp6;
|
||||||
sum3 += temp7;
|
sum3 += temp7;
|
||||||
|
|
||||||
x+=8;
|
x1+=8;
|
||||||
i+=4;
|
i+=4;
|
||||||
}
|
}
|
||||||
|
|
||||||
while (i < n) {
|
while (i < n) {
|
||||||
sum4 += (ABS_K(x1[0]) + ABS_K(x1[1]));
|
sum4 += ABS_K(x1[0]) + ABS_K(x1[1]);
|
||||||
x1 += 2;
|
x1 += 2;
|
||||||
i++;
|
i++;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue