diff --git a/kernel/power/iamax.S b/kernel/power/iamax.S index 45461ae85..147a60f0f 100644 --- a/kernel/power/iamax.S +++ b/kernel/power/iamax.S @@ -520,6 +520,19 @@ LL(1000): .align 4 LL(1010): + addi RET, RET, 1 + fcmpu cr0, f24, f24 + bun cr0, LL(9999) + addi RET, RET, 1 + fcmpu cr0, f25, f25 + bun cr0, LL(9999) + addi RET, RET, 1 + fcmpu cr0, f26, f26 + bun cr0, LL(9999) + addi RET, RET, 1 + fcmpu cr0, f27, f27 + bun cr0, LL(9999) + fabs f8, f24 fabs f9, f25 fabs f10, f26 @@ -529,6 +542,20 @@ LL(1010): LFD f25, 9 * SIZE(XX) LFD f26, 10 * SIZE(XX) LFD f27, 11 * SIZE(XX) + + addi RET, RET, 1 + fcmpu cr0, f24, f24 + bun cr0, LL(9999) + addi RET, RET, 1 + fcmpu cr0, f25, f25 + bun cr0, LL(9999) + addi RET, RET, 1 + fcmpu cr0, f26, f26 + bun cr0, LL(9999) + addi RET, RET, 1 + fcmpu cr0, f27, f27 + bun cr0, LL(9999) + subi RET, RET, 8 fabs f12, f28 fabs f13, f29 @@ -577,6 +604,32 @@ LL(1010): .align 4 LL(1020): + addi RET, RET, 1 + fcmpu cr0, f24, f24 + bun cr0, LL(9999) + addi RET, RET, 1 + fcmpu cr0, f25, f25 + bun cr0, LL(9999) + addi RET, RET, 1 + fcmpu cr0, f26, f26 + bun cr0, LL(9999) + addi RET, RET, 1 + fcmpu cr0, f27, f27 + bun cr0, LL(9999) + addi RET, RET, 1 + fcmpu cr0, f28, f28 + bun cr0, LL(9999) + addi RET, RET, 1 + fcmpu cr0, f29, f29 + bun cr0, LL(9999) + addi RET, RET, 1 + fcmpu cr0, f30, f30 + bun cr0, LL(9999) + addi RET, RET, 1 + fcmpu cr0, f31, f31 + bun cr0, LL(9999) + subi RET, RET, 8 + fabs f8, f24 fabs f9, f25 fabs f10, f26 @@ -631,8 +684,12 @@ LL(1050): LL(1060): LFD f8, 0 * SIZE(XX) addi XX, XX, 1 * SIZE + addi RET, RET, 1 + fcmpu cr0, f8, f8 + bun cru, LL(9999) + fabs f8, f8 - addi RET, RET, 1 + //addi RET, RET, 1 fcmpu cr0, f1, f8 beq cr0, LL(9999) bdnz LL(1060) @@ -658,6 +715,18 @@ LL(1100): .align 4 LL(1110): + addi RET, RET, 1 + fcmpu cr0, f24, f24 + bun cr0, LL(9999) + addi RET, RET, 1 + fcmpu cr0, f25, f25 + bun cr0, LL(9999) + addi RET, RET, 1 + fcmpu cr0, f26, f26 + bun cr0, LL(9999) + addi RET, RET, 1 + fcmpu cr0, f27, f27 + bun cr0, LL(9999) fabs f8, f24 fabs f9, f25 fabs f10, f26 @@ -667,7 +736,19 @@ LL(1110): LFDUX f25, XX, INCX LFDUX f26, XX, INCX LFDUX f27, XX, INCX - + addi RET, RET, 1 + fcmpu cr0, f24, f24 + bun cr0, LL(9999) + addi RET, RET, 1 + fcmpu cr0, f25, f25 + bun cr0, LL(9999) + addi RET, RET, 1 + fcmpu cr0, f26, f26 + bun cr0, LL(9999) + addi RET, RET, 1 + fcmpu cr0, f27, f27 + bun cr0, LL(9999) + subi RET, RET, 8 fabs f12, f28 fabs f13, f29 fabs f14, f30 @@ -714,6 +795,30 @@ LL(1110): .align 4 LL(1120): + addi RET, RET, 1 + fcmpu cr0, f24, f24 + bun cr0, LL(9999) + addi RET, RET, 1 + fcmpu cr0, f25, f25 + bun cr0, LL(9999) + addi RET, RET, 1 + fcmpu cr0, f26, f26 + bun cr0, LL(9999) + addi RET, RET, 1 + fcmpu cr0, f27, f27 + bun cr0, LL(9999) + addi RET, RET, 1 + fcmpu cr0, f28, f28 + bun cr0, LL(9999) + addi RET, RET, 1 + fcmpu cr0, f29, f29 + bun cr0, LL(9999) + addi RET, RET, 1 + fcmpu cr0, f30, f30 + bun cr0, LL(9999) + addi RET, RET, 1 + fcmpu cr0, f31, f31 + subi RET, RET, 8 fabs f8, f24 fabs f9, f25 fabs f10, f26 @@ -765,8 +870,11 @@ LL(1150): LL(1160): LFDUX f8, XX, INCX - fabs f8, f8 addi RET, RET, 1 + fcmpu cr0, f8, f8 + bun LL(9999) + fabs f8, f8 +// addi RET, RET, 1 fcmpu cr0, f1, f8 beq cr0, LL(9999) bdnz LL(1160) diff --git a/kernel/power/idamax.c b/kernel/power/idamax.c index f1ef00066..8a8471c0f 100644 --- a/kernel/power/idamax.c +++ b/kernel/power/idamax.c @@ -327,6 +327,7 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) { BLASLONG max = 0; if (n <= 0 || inc_x <= 0) return (max); + if (n == 1) return(1); if (inc_x == 1) { @@ -335,7 +336,7 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) { BLASLONG n1 = n & -32; if (n1 > 0) { - + for (int ii=i;ii maxf) { max = i; maxf = ABS(x[i]); @@ -356,6 +358,10 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) { BLASLONG n1 = n & -4; while (j < n1) { + if (x[i] != x[i]) return(i+1); + if (x[i+inc_x] != x[i+inc_x]) return(j+1); + if (x[i+2*inc_x] != x[i+2*inc_x]) return(j+2); + if (x[i+3*inc_x] != x[i+3*inc_x]) return(j+3); if (ABS(x[i]) > maxf) { max = j; maxf = ABS(x[i]); @@ -381,6 +387,7 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) { while (j < n) { + if (x[i] != x[i]) return(j+1); if (ABS(x[i]) > maxf) { max = j; maxf = ABS(x[i]); diff --git a/kernel/power/isamax.c b/kernel/power/isamax.c index fb2dafec0..1a116ca64 100644 --- a/kernel/power/isamax.c +++ b/kernel/power/isamax.c @@ -58,6 +58,78 @@ static BLASLONG siamax_kernel_64(BLASLONG n, FLOAT *x, FLOAT *maxf) { register __vector float quadruple_values={0,0,0,0}; register __vector float * v_ptrx=(__vector float *)x; for(; i maxf) { max = i; maxf = ABS(x[i]); @@ -251,18 +324,22 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) { BLASLONG n1 = n & -4; while (j < n1) { + if (x[i] != x[i]) return(j+1); if (ABS(x[i]) > maxf) { max = j; maxf = ABS(x[i]); } + if (x[i+inc_x] != x[i+inc_x]) return(j+1); if (ABS(x[i + inc_x]) > maxf) { max = j + 1; maxf = ABS(x[i + inc_x]); } + if (x[i+2*inc_x] != x[i+2*inc_x]) return(j+2); if (ABS(x[i + 2 * inc_x]) > maxf) { max = j + 2; maxf = ABS(x[i + 2 * inc_x]); } + if (x[i+3*inc_x] != x[i+3*inc_x]) return(j+3); if (ABS(x[i + 3 * inc_x]) > maxf) { max = j + 3; maxf = ABS(x[i + 3 * inc_x]); @@ -276,6 +353,7 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) { while (j < n) { + if (x[i] != x[i]) return(j+1); if (ABS(x[i]) > maxf) { max = j; maxf = ABS(x[i]);