Add NaN tests

This commit is contained in:
Martin Kroeker 2023-08-19 12:20:42 +02:00 committed by GitHub
parent 2e68d922d5
commit 3d10fb003e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 198 additions and 5 deletions

View File

@ -520,6 +520,19 @@ LL(1000):
.align 4
LL(1010):
addi RET, RET, 1
fcmpu cr0, f24, f24
bun cr0, LL(9999)
addi RET, RET, 1
fcmpu cr0, f25, f25
bun cr0, LL(9999)
addi RET, RET, 1
fcmpu cr0, f26, f26
bun cr0, LL(9999)
addi RET, RET, 1
fcmpu cr0, f27, f27
bun cr0, LL(9999)
fabs f8, f24
fabs f9, f25
fabs f10, f26
@ -530,6 +543,20 @@ LL(1010):
LFD f26, 10 * SIZE(XX)
LFD f27, 11 * SIZE(XX)
addi RET, RET, 1
fcmpu cr0, f24, f24
bun cr0, LL(9999)
addi RET, RET, 1
fcmpu cr0, f25, f25
bun cr0, LL(9999)
addi RET, RET, 1
fcmpu cr0, f26, f26
bun cr0, LL(9999)
addi RET, RET, 1
fcmpu cr0, f27, f27
bun cr0, LL(9999)
subi RET, RET, 8
fabs f12, f28
fabs f13, f29
fabs f14, f30
@ -577,6 +604,32 @@ LL(1010):
.align 4
LL(1020):
addi RET, RET, 1
fcmpu cr0, f24, f24
bun cr0, LL(9999)
addi RET, RET, 1
fcmpu cr0, f25, f25
bun cr0, LL(9999)
addi RET, RET, 1
fcmpu cr0, f26, f26
bun cr0, LL(9999)
addi RET, RET, 1
fcmpu cr0, f27, f27
bun cr0, LL(9999)
addi RET, RET, 1
fcmpu cr0, f28, f28
bun cr0, LL(9999)
addi RET, RET, 1
fcmpu cr0, f29, f29
bun cr0, LL(9999)
addi RET, RET, 1
fcmpu cr0, f30, f30
bun cr0, LL(9999)
addi RET, RET, 1
fcmpu cr0, f31, f31
bun cr0, LL(9999)
subi RET, RET, 8
fabs f8, f24
fabs f9, f25
fabs f10, f26
@ -631,8 +684,12 @@ LL(1050):
LL(1060):
LFD f8, 0 * SIZE(XX)
addi XX, XX, 1 * SIZE
addi RET, RET, 1
fcmpu cr0, f8, f8
bun cru, LL(9999)
fabs f8, f8
addi RET, RET, 1
//addi RET, RET, 1
fcmpu cr0, f1, f8
beq cr0, LL(9999)
bdnz LL(1060)
@ -658,6 +715,18 @@ LL(1100):
.align 4
LL(1110):
addi RET, RET, 1
fcmpu cr0, f24, f24
bun cr0, LL(9999)
addi RET, RET, 1
fcmpu cr0, f25, f25
bun cr0, LL(9999)
addi RET, RET, 1
fcmpu cr0, f26, f26
bun cr0, LL(9999)
addi RET, RET, 1
fcmpu cr0, f27, f27
bun cr0, LL(9999)
fabs f8, f24
fabs f9, f25
fabs f10, f26
@ -667,7 +736,19 @@ LL(1110):
LFDUX f25, XX, INCX
LFDUX f26, XX, INCX
LFDUX f27, XX, INCX
addi RET, RET, 1
fcmpu cr0, f24, f24
bun cr0, LL(9999)
addi RET, RET, 1
fcmpu cr0, f25, f25
bun cr0, LL(9999)
addi RET, RET, 1
fcmpu cr0, f26, f26
bun cr0, LL(9999)
addi RET, RET, 1
fcmpu cr0, f27, f27
bun cr0, LL(9999)
subi RET, RET, 8
fabs f12, f28
fabs f13, f29
fabs f14, f30
@ -714,6 +795,30 @@ LL(1110):
.align 4
LL(1120):
addi RET, RET, 1
fcmpu cr0, f24, f24
bun cr0, LL(9999)
addi RET, RET, 1
fcmpu cr0, f25, f25
bun cr0, LL(9999)
addi RET, RET, 1
fcmpu cr0, f26, f26
bun cr0, LL(9999)
addi RET, RET, 1
fcmpu cr0, f27, f27
bun cr0, LL(9999)
addi RET, RET, 1
fcmpu cr0, f28, f28
bun cr0, LL(9999)
addi RET, RET, 1
fcmpu cr0, f29, f29
bun cr0, LL(9999)
addi RET, RET, 1
fcmpu cr0, f30, f30
bun cr0, LL(9999)
addi RET, RET, 1
fcmpu cr0, f31, f31
subi RET, RET, 8
fabs f8, f24
fabs f9, f25
fabs f10, f26
@ -765,8 +870,11 @@ LL(1150):
LL(1160):
LFDUX f8, XX, INCX
fabs f8, f8
addi RET, RET, 1
fcmpu cr0, f8, f8
bun LL(9999)
fabs f8, f8
// addi RET, RET, 1
fcmpu cr0, f1, f8
beq cr0, LL(9999)
bdnz LL(1160)

View File

@ -327,6 +327,7 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) {
BLASLONG max = 0;
if (n <= 0 || inc_x <= 0) return (max);
if (n == 1) return(1);
if (inc_x == 1) {
@ -335,7 +336,7 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) {
BLASLONG n1 = n & -32;
if (n1 > 0) {
for (int ii=i;ii<i+32;ii++) if (x[ii]!=x[ii]) return(ii+1);
max = diamax_kernel_32(n1, x, &maxf);
i = n1;
@ -343,6 +344,7 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) {
#endif
#endif
while (i < n) {
if (x[i] != x[i]) return(i+1);
if (ABS(x[i]) > maxf) {
max = i;
maxf = ABS(x[i]);
@ -356,6 +358,10 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) {
BLASLONG n1 = n & -4;
while (j < n1) {
if (x[i] != x[i]) return(i+1);
if (x[i+inc_x] != x[i+inc_x]) return(j+1);
if (x[i+2*inc_x] != x[i+2*inc_x]) return(j+2);
if (x[i+3*inc_x] != x[i+3*inc_x]) return(j+3);
if (ABS(x[i]) > maxf) {
max = j;
maxf = ABS(x[i]);
@ -381,6 +387,7 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) {
while (j < n) {
if (x[i] != x[i]) return(j+1);
if (ABS(x[i]) > maxf) {
max = j;
maxf = ABS(x[i]);

View File

@ -58,6 +58,78 @@ static BLASLONG siamax_kernel_64(BLASLONG n, FLOAT *x, FLOAT *maxf) {
register __vector float quadruple_values={0,0,0,0};
register __vector float * v_ptrx=(__vector float *)x;
for(; i<n; i+=64){
if (vec_any_nan(v_ptrx[0])) {
float d=vec_extract(v_ptrx[0],0);
if (d!=d) return(i+0);
d=vec_extract(v_ptrx[0],1);
if (d!=d) return(i+1);
d=vec_extract(v_ptrx[0],2);
if (d!=d) return(i+2);
return(i+3);
}
if (vec_any_nan(v_ptrx[1])) {
float d=vec_extract(v_ptrx[1],0);
if (d!=d) return(i+4+0);
d=vec_extract(v_ptrx[1],1);
if (d!=d) return(i+4+1);
d=vec_extract(v_ptrx[1],2);
if (d!=d) return(i+4+2);
return(i+4+3);
}
if (vec_any_nan(v_ptrx[2])) {
float d=vec_extract(v_ptrx[2],0);
if (d!=d) return(i+8+0);
d=vec_extract(v_ptrx[2],1);
if (d!=d) return(i+8+1);
d=vec_extract(v_ptrx[2],2);
if (d!=d) return(i+8+2);
return(i+8+3);
}
if (vec_any_nan(v_ptrx[3])) {
float d=vec_extract(v_ptrx[3],0);
if (d!=d) return(i+12+0);
d=vec_extract(v_ptrx[3],1);
if (d!=d) return(i+12+1);
d=vec_extract(v_ptrx[3],2);
if (d!=d) return(i+12+2);
return(i+12+3);
}
if (vec_any_nan(v_ptrx[4])) {
float d=vec_extract(v_ptrx[4],0);
if (d!=d) return(i+16+0);
d=vec_extract(v_ptrx[4],1);
if (d!=d) return(i+16+1);
d=vec_extract(v_ptrx[4],2);
if (d!=d) return(i+16+2);
return(i+16+3);
}
if (vec_any_nan(v_ptrx[5])) {
float d=vec_extract(v_ptrx[5],0);
if (d!=d) return(i+20+0);
d=vec_extract(v_ptrx[5],1);
if (d!=d) return(i+20+1);
d=vec_extract(v_ptrx[5],2);
if (d!=d) return(i+20+2);
return(i+20+3);
}
if (vec_any_nan(v_ptrx[6])) {
float d=vec_extract(v_ptrx[6],0);
if (d!=d) return(i+24+0);
d=vec_extract(v_ptrx[6],1);
if (d!=d) return(i+24+1);
d=vec_extract(v_ptrx[6],2);
if (d!=d) return(i+24+2);
return(i+24+3);
}
if (vec_any_nan(v_ptrx[7])) {
float d=vec_extract(v_ptrx[7],0);
if (d!=d) return(i+28+0);
d=vec_extract(v_ptrx[7],1);
if (d!=d) return(i+28+1);
d=vec_extract(v_ptrx[7],2);
if (d!=d) return(i+28+2);
return(i+28+3);
}
//absolute temporary vectors
register __vector float v0=vec_abs(v_ptrx[0]);
register __vector float v1=vec_abs(v_ptrx[1]);
@ -226,7 +298,7 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) {
BLASLONG max = 0;
if (n <= 0 || inc_x <= 0) return (max);
if (x[0] != x[0]) return(1);
if (inc_x == 1) {
BLASLONG n1 = n & -64;
@ -238,6 +310,7 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) {
}
while (i < n) {
if (x[i] != x[i]) return(i+1);
if (ABS(x[i]) > maxf) {
max = i;
maxf = ABS(x[i]);
@ -251,18 +324,22 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) {
BLASLONG n1 = n & -4;
while (j < n1) {
if (x[i] != x[i]) return(j+1);
if (ABS(x[i]) > maxf) {
max = j;
maxf = ABS(x[i]);
}
if (x[i+inc_x] != x[i+inc_x]) return(j+1);
if (ABS(x[i + inc_x]) > maxf) {
max = j + 1;
maxf = ABS(x[i + inc_x]);
}
if (x[i+2*inc_x] != x[i+2*inc_x]) return(j+2);
if (ABS(x[i + 2 * inc_x]) > maxf) {
max = j + 2;
maxf = ABS(x[i + 2 * inc_x]);
}
if (x[i+3*inc_x] != x[i+3*inc_x]) return(j+3);
if (ABS(x[i + 3 * inc_x]) > maxf) {
max = j + 3;
maxf = ABS(x[i + 3 * inc_x]);
@ -276,6 +353,7 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) {
while (j < n) {
if (x[i] != x[i]) return(j+1);
if (ABS(x[i]) > maxf) {
max = j;
maxf = ABS(x[i]);