Add NaN tests
This commit is contained in:
parent
2e68d922d5
commit
3d10fb003e
|
@ -520,6 +520,19 @@ LL(1000):
|
|||
.align 4
|
||||
|
||||
LL(1010):
|
||||
addi RET, RET, 1
|
||||
fcmpu cr0, f24, f24
|
||||
bun cr0, LL(9999)
|
||||
addi RET, RET, 1
|
||||
fcmpu cr0, f25, f25
|
||||
bun cr0, LL(9999)
|
||||
addi RET, RET, 1
|
||||
fcmpu cr0, f26, f26
|
||||
bun cr0, LL(9999)
|
||||
addi RET, RET, 1
|
||||
fcmpu cr0, f27, f27
|
||||
bun cr0, LL(9999)
|
||||
|
||||
fabs f8, f24
|
||||
fabs f9, f25
|
||||
fabs f10, f26
|
||||
|
@ -529,6 +542,20 @@ LL(1010):
|
|||
LFD f25, 9 * SIZE(XX)
|
||||
LFD f26, 10 * SIZE(XX)
|
||||
LFD f27, 11 * SIZE(XX)
|
||||
|
||||
addi RET, RET, 1
|
||||
fcmpu cr0, f24, f24
|
||||
bun cr0, LL(9999)
|
||||
addi RET, RET, 1
|
||||
fcmpu cr0, f25, f25
|
||||
bun cr0, LL(9999)
|
||||
addi RET, RET, 1
|
||||
fcmpu cr0, f26, f26
|
||||
bun cr0, LL(9999)
|
||||
addi RET, RET, 1
|
||||
fcmpu cr0, f27, f27
|
||||
bun cr0, LL(9999)
|
||||
subi RET, RET, 8
|
||||
|
||||
fabs f12, f28
|
||||
fabs f13, f29
|
||||
|
@ -577,6 +604,32 @@ LL(1010):
|
|||
.align 4
|
||||
|
||||
LL(1020):
|
||||
addi RET, RET, 1
|
||||
fcmpu cr0, f24, f24
|
||||
bun cr0, LL(9999)
|
||||
addi RET, RET, 1
|
||||
fcmpu cr0, f25, f25
|
||||
bun cr0, LL(9999)
|
||||
addi RET, RET, 1
|
||||
fcmpu cr0, f26, f26
|
||||
bun cr0, LL(9999)
|
||||
addi RET, RET, 1
|
||||
fcmpu cr0, f27, f27
|
||||
bun cr0, LL(9999)
|
||||
addi RET, RET, 1
|
||||
fcmpu cr0, f28, f28
|
||||
bun cr0, LL(9999)
|
||||
addi RET, RET, 1
|
||||
fcmpu cr0, f29, f29
|
||||
bun cr0, LL(9999)
|
||||
addi RET, RET, 1
|
||||
fcmpu cr0, f30, f30
|
||||
bun cr0, LL(9999)
|
||||
addi RET, RET, 1
|
||||
fcmpu cr0, f31, f31
|
||||
bun cr0, LL(9999)
|
||||
subi RET, RET, 8
|
||||
|
||||
fabs f8, f24
|
||||
fabs f9, f25
|
||||
fabs f10, f26
|
||||
|
@ -631,8 +684,12 @@ LL(1050):
|
|||
LL(1060):
|
||||
LFD f8, 0 * SIZE(XX)
|
||||
addi XX, XX, 1 * SIZE
|
||||
addi RET, RET, 1
|
||||
fcmpu cr0, f8, f8
|
||||
bun cru, LL(9999)
|
||||
|
||||
fabs f8, f8
|
||||
addi RET, RET, 1
|
||||
//addi RET, RET, 1
|
||||
fcmpu cr0, f1, f8
|
||||
beq cr0, LL(9999)
|
||||
bdnz LL(1060)
|
||||
|
@ -658,6 +715,18 @@ LL(1100):
|
|||
.align 4
|
||||
|
||||
LL(1110):
|
||||
addi RET, RET, 1
|
||||
fcmpu cr0, f24, f24
|
||||
bun cr0, LL(9999)
|
||||
addi RET, RET, 1
|
||||
fcmpu cr0, f25, f25
|
||||
bun cr0, LL(9999)
|
||||
addi RET, RET, 1
|
||||
fcmpu cr0, f26, f26
|
||||
bun cr0, LL(9999)
|
||||
addi RET, RET, 1
|
||||
fcmpu cr0, f27, f27
|
||||
bun cr0, LL(9999)
|
||||
fabs f8, f24
|
||||
fabs f9, f25
|
||||
fabs f10, f26
|
||||
|
@ -667,7 +736,19 @@ LL(1110):
|
|||
LFDUX f25, XX, INCX
|
||||
LFDUX f26, XX, INCX
|
||||
LFDUX f27, XX, INCX
|
||||
|
||||
addi RET, RET, 1
|
||||
fcmpu cr0, f24, f24
|
||||
bun cr0, LL(9999)
|
||||
addi RET, RET, 1
|
||||
fcmpu cr0, f25, f25
|
||||
bun cr0, LL(9999)
|
||||
addi RET, RET, 1
|
||||
fcmpu cr0, f26, f26
|
||||
bun cr0, LL(9999)
|
||||
addi RET, RET, 1
|
||||
fcmpu cr0, f27, f27
|
||||
bun cr0, LL(9999)
|
||||
subi RET, RET, 8
|
||||
fabs f12, f28
|
||||
fabs f13, f29
|
||||
fabs f14, f30
|
||||
|
@ -714,6 +795,30 @@ LL(1110):
|
|||
.align 4
|
||||
|
||||
LL(1120):
|
||||
addi RET, RET, 1
|
||||
fcmpu cr0, f24, f24
|
||||
bun cr0, LL(9999)
|
||||
addi RET, RET, 1
|
||||
fcmpu cr0, f25, f25
|
||||
bun cr0, LL(9999)
|
||||
addi RET, RET, 1
|
||||
fcmpu cr0, f26, f26
|
||||
bun cr0, LL(9999)
|
||||
addi RET, RET, 1
|
||||
fcmpu cr0, f27, f27
|
||||
bun cr0, LL(9999)
|
||||
addi RET, RET, 1
|
||||
fcmpu cr0, f28, f28
|
||||
bun cr0, LL(9999)
|
||||
addi RET, RET, 1
|
||||
fcmpu cr0, f29, f29
|
||||
bun cr0, LL(9999)
|
||||
addi RET, RET, 1
|
||||
fcmpu cr0, f30, f30
|
||||
bun cr0, LL(9999)
|
||||
addi RET, RET, 1
|
||||
fcmpu cr0, f31, f31
|
||||
subi RET, RET, 8
|
||||
fabs f8, f24
|
||||
fabs f9, f25
|
||||
fabs f10, f26
|
||||
|
@ -765,8 +870,11 @@ LL(1150):
|
|||
|
||||
LL(1160):
|
||||
LFDUX f8, XX, INCX
|
||||
fabs f8, f8
|
||||
addi RET, RET, 1
|
||||
fcmpu cr0, f8, f8
|
||||
bun LL(9999)
|
||||
fabs f8, f8
|
||||
// addi RET, RET, 1
|
||||
fcmpu cr0, f1, f8
|
||||
beq cr0, LL(9999)
|
||||
bdnz LL(1160)
|
||||
|
|
|
@ -327,6 +327,7 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) {
|
|||
BLASLONG max = 0;
|
||||
|
||||
if (n <= 0 || inc_x <= 0) return (max);
|
||||
if (n == 1) return(1);
|
||||
|
||||
if (inc_x == 1) {
|
||||
|
||||
|
@ -335,7 +336,7 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) {
|
|||
|
||||
BLASLONG n1 = n & -32;
|
||||
if (n1 > 0) {
|
||||
|
||||
for (int ii=i;ii<i+32;ii++) if (x[ii]!=x[ii]) return(ii+1);
|
||||
max = diamax_kernel_32(n1, x, &maxf);
|
||||
|
||||
i = n1;
|
||||
|
@ -343,6 +344,7 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) {
|
|||
#endif
|
||||
#endif
|
||||
while (i < n) {
|
||||
if (x[i] != x[i]) return(i+1);
|
||||
if (ABS(x[i]) > maxf) {
|
||||
max = i;
|
||||
maxf = ABS(x[i]);
|
||||
|
@ -356,6 +358,10 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) {
|
|||
BLASLONG n1 = n & -4;
|
||||
while (j < n1) {
|
||||
|
||||
if (x[i] != x[i]) return(i+1);
|
||||
if (x[i+inc_x] != x[i+inc_x]) return(j+1);
|
||||
if (x[i+2*inc_x] != x[i+2*inc_x]) return(j+2);
|
||||
if (x[i+3*inc_x] != x[i+3*inc_x]) return(j+3);
|
||||
if (ABS(x[i]) > maxf) {
|
||||
max = j;
|
||||
maxf = ABS(x[i]);
|
||||
|
@ -381,6 +387,7 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) {
|
|||
|
||||
|
||||
while (j < n) {
|
||||
if (x[i] != x[i]) return(j+1);
|
||||
if (ABS(x[i]) > maxf) {
|
||||
max = j;
|
||||
maxf = ABS(x[i]);
|
||||
|
|
|
@ -58,6 +58,78 @@ static BLASLONG siamax_kernel_64(BLASLONG n, FLOAT *x, FLOAT *maxf) {
|
|||
register __vector float quadruple_values={0,0,0,0};
|
||||
register __vector float * v_ptrx=(__vector float *)x;
|
||||
for(; i<n; i+=64){
|
||||
if (vec_any_nan(v_ptrx[0])) {
|
||||
float d=vec_extract(v_ptrx[0],0);
|
||||
if (d!=d) return(i+0);
|
||||
d=vec_extract(v_ptrx[0],1);
|
||||
if (d!=d) return(i+1);
|
||||
d=vec_extract(v_ptrx[0],2);
|
||||
if (d!=d) return(i+2);
|
||||
return(i+3);
|
||||
}
|
||||
if (vec_any_nan(v_ptrx[1])) {
|
||||
float d=vec_extract(v_ptrx[1],0);
|
||||
if (d!=d) return(i+4+0);
|
||||
d=vec_extract(v_ptrx[1],1);
|
||||
if (d!=d) return(i+4+1);
|
||||
d=vec_extract(v_ptrx[1],2);
|
||||
if (d!=d) return(i+4+2);
|
||||
return(i+4+3);
|
||||
}
|
||||
if (vec_any_nan(v_ptrx[2])) {
|
||||
float d=vec_extract(v_ptrx[2],0);
|
||||
if (d!=d) return(i+8+0);
|
||||
d=vec_extract(v_ptrx[2],1);
|
||||
if (d!=d) return(i+8+1);
|
||||
d=vec_extract(v_ptrx[2],2);
|
||||
if (d!=d) return(i+8+2);
|
||||
return(i+8+3);
|
||||
}
|
||||
if (vec_any_nan(v_ptrx[3])) {
|
||||
float d=vec_extract(v_ptrx[3],0);
|
||||
if (d!=d) return(i+12+0);
|
||||
d=vec_extract(v_ptrx[3],1);
|
||||
if (d!=d) return(i+12+1);
|
||||
d=vec_extract(v_ptrx[3],2);
|
||||
if (d!=d) return(i+12+2);
|
||||
return(i+12+3);
|
||||
}
|
||||
if (vec_any_nan(v_ptrx[4])) {
|
||||
float d=vec_extract(v_ptrx[4],0);
|
||||
if (d!=d) return(i+16+0);
|
||||
d=vec_extract(v_ptrx[4],1);
|
||||
if (d!=d) return(i+16+1);
|
||||
d=vec_extract(v_ptrx[4],2);
|
||||
if (d!=d) return(i+16+2);
|
||||
return(i+16+3);
|
||||
}
|
||||
if (vec_any_nan(v_ptrx[5])) {
|
||||
float d=vec_extract(v_ptrx[5],0);
|
||||
if (d!=d) return(i+20+0);
|
||||
d=vec_extract(v_ptrx[5],1);
|
||||
if (d!=d) return(i+20+1);
|
||||
d=vec_extract(v_ptrx[5],2);
|
||||
if (d!=d) return(i+20+2);
|
||||
return(i+20+3);
|
||||
}
|
||||
if (vec_any_nan(v_ptrx[6])) {
|
||||
float d=vec_extract(v_ptrx[6],0);
|
||||
if (d!=d) return(i+24+0);
|
||||
d=vec_extract(v_ptrx[6],1);
|
||||
if (d!=d) return(i+24+1);
|
||||
d=vec_extract(v_ptrx[6],2);
|
||||
if (d!=d) return(i+24+2);
|
||||
return(i+24+3);
|
||||
}
|
||||
if (vec_any_nan(v_ptrx[7])) {
|
||||
float d=vec_extract(v_ptrx[7],0);
|
||||
if (d!=d) return(i+28+0);
|
||||
d=vec_extract(v_ptrx[7],1);
|
||||
if (d!=d) return(i+28+1);
|
||||
d=vec_extract(v_ptrx[7],2);
|
||||
if (d!=d) return(i+28+2);
|
||||
return(i+28+3);
|
||||
}
|
||||
//absolute temporary vectors
|
||||
register __vector float v0=vec_abs(v_ptrx[0]);
|
||||
register __vector float v1=vec_abs(v_ptrx[1]);
|
||||
|
@ -226,7 +298,7 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) {
|
|||
BLASLONG max = 0;
|
||||
|
||||
if (n <= 0 || inc_x <= 0) return (max);
|
||||
|
||||
if (x[0] != x[0]) return(1);
|
||||
if (inc_x == 1) {
|
||||
|
||||
BLASLONG n1 = n & -64;
|
||||
|
@ -238,6 +310,7 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) {
|
|||
}
|
||||
|
||||
while (i < n) {
|
||||
if (x[i] != x[i]) return(i+1);
|
||||
if (ABS(x[i]) > maxf) {
|
||||
max = i;
|
||||
maxf = ABS(x[i]);
|
||||
|
@ -251,18 +324,22 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) {
|
|||
BLASLONG n1 = n & -4;
|
||||
while (j < n1) {
|
||||
|
||||
if (x[i] != x[i]) return(j+1);
|
||||
if (ABS(x[i]) > maxf) {
|
||||
max = j;
|
||||
maxf = ABS(x[i]);
|
||||
}
|
||||
if (x[i+inc_x] != x[i+inc_x]) return(j+1);
|
||||
if (ABS(x[i + inc_x]) > maxf) {
|
||||
max = j + 1;
|
||||
maxf = ABS(x[i + inc_x]);
|
||||
}
|
||||
if (x[i+2*inc_x] != x[i+2*inc_x]) return(j+2);
|
||||
if (ABS(x[i + 2 * inc_x]) > maxf) {
|
||||
max = j + 2;
|
||||
maxf = ABS(x[i + 2 * inc_x]);
|
||||
}
|
||||
if (x[i+3*inc_x] != x[i+3*inc_x]) return(j+3);
|
||||
if (ABS(x[i + 3 * inc_x]) > maxf) {
|
||||
max = j + 3;
|
||||
maxf = ABS(x[i + 3 * inc_x]);
|
||||
|
@ -276,6 +353,7 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) {
|
|||
|
||||
|
||||
while (j < n) {
|
||||
if (x[i] != x[i]) return(j+1);
|
||||
if (ABS(x[i]) > maxf) {
|
||||
max = j;
|
||||
maxf = ABS(x[i]);
|
||||
|
|
Loading…
Reference in New Issue