This commit is contained in:
Martin Kroeker 2024-01-22 11:11:12 +03:00 committed by GitHub
commit f1ff4c5c02
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
13 changed files with 623 additions and 22 deletions

View File

@ -56,13 +56,15 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
BLASLONG max=0; BLASLONG max=0;
if (n <= 0 || inc_x <= 0) return(max); if (n <= 0 || inc_x <= 0) return(max);
if (n==1) return(1);
if (x[0]!=x[0]) return(1);
maxf=ABS(x[0]); maxf=ABS(x[0]);
ix += inc_x; ix += inc_x;
i++; i++;
while(i < n) while(i < n)
{ {
if (x[ix]!=x[ix]) return(i+1);
if( ABS(x[ix]) > maxf ) if( ABS(x[ix]) > maxf )
{ {
max = i; max = i;

View File

@ -56,13 +56,15 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
BLASLONG min=0; BLASLONG min=0;
if (n <= 0 || inc_x <= 0) return(min); if (n <= 0 || inc_x <= 0) return(min);
if (n==1) return(1);
minf=ABS(x[0]); if (x[0]!=x[0]) return(1);
minf=ABS(x[0]);
ix += inc_x; ix += inc_x;
i++; i++;
while(i < n) while(i < n)
{ {
if (x[ix]!=x[ix]) return(i+1);
if( ABS(x[ix]) < ABS(minf) ) if( ABS(x[ix]) < ABS(minf) )
{ {
min = i; min = i;

View File

@ -1,5 +1,5 @@
/*************************************************************************** /***************************************************************************
Copyright (c) 2016, The OpenBLAS Project Copyright (c) 2013, The OpenBLAS Project
All rights reserved. All rights reserved.
Redistribution and use in source and binary forms, with or without Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are modification, are permitted provided that the following conditions are
@ -25,6 +25,15 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/ *****************************************************************************/
/**************************************************************************************
* 2013/09/14 Saar
* BLASTEST float : NoTest
* BLASTEST double : NoTest
* CTEST : OK
* TEST : OK
*
**************************************************************************************/
#include "common.h" #include "common.h"
#include <math.h> #include <math.h>
@ -47,13 +56,15 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
BLASLONG max=0; BLASLONG max=0;
if (n <= 0 || inc_x <= 0) return(max); if (n <= 0 || inc_x <= 0) return(max);
if (n==1) return(1);
if (x[0]!=x[0]) return(1);
maxf=ABS(x[0]); maxf=ABS(x[0]);
ix += inc_x; ix += inc_x;
i++; i++;
while(i < n) while(i < n)
{ {
if (x[ix]!=x[ix]) return(i+1);
if( ABS(x[ix]) > maxf ) if( ABS(x[ix]) > maxf )
{ {
max = i; max = i;

View File

@ -1,5 +1,5 @@
/*************************************************************************** /***************************************************************************
Copyright (c) 2016, The OpenBLAS Project Copyright (c) 2013, The OpenBLAS Project
All rights reserved. All rights reserved.
Redistribution and use in source and binary forms, with or without Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are modification, are permitted provided that the following conditions are
@ -25,6 +25,15 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/ *****************************************************************************/
/**************************************************************************************
* 2013/09/14 Saar
* BLASTEST float : NoTest
* BLASTEST double : NoTest
* CTEST : NoTest
* TEST : NoTest
*
**************************************************************************************/
#include "common.h" #include "common.h"
#include <math.h> #include <math.h>
@ -47,13 +56,15 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
BLASLONG min=0; BLASLONG min=0;
if (n <= 0 || inc_x <= 0) return(min); if (n <= 0 || inc_x <= 0) return(min);
if (n==1) return(1);
minf=ABS(x[0]); if (x[0]!=x[0]) return(1);
minf=ABS(x[0]);
ix += inc_x; ix += inc_x;
i++; i++;
while(i < n) while(i < n)
{ {
if (x[ix]!=x[ix]) return(i+1);
if( ABS(x[ix]) < ABS(minf) ) if( ABS(x[ix]) < ABS(minf) )
{ {
min = i; min = i;

View File

@ -520,6 +520,19 @@ LL(1000):
.align 4 .align 4
LL(1010): LL(1010):
addi RET, RET, 1
fcmpu cr0, f24, f24
bun cr0, LL(9999)
addi RET, RET, 1
fcmpu cr0, f25, f25
bun cr0, LL(9999)
addi RET, RET, 1
fcmpu cr0, f26, f26
bun cr0, LL(9999)
addi RET, RET, 1
fcmpu cr0, f27, f27
bun cr0, LL(9999)
fabs f8, f24 fabs f8, f24
fabs f9, f25 fabs f9, f25
fabs f10, f26 fabs f10, f26
@ -529,6 +542,20 @@ LL(1010):
LFD f25, 9 * SIZE(XX) LFD f25, 9 * SIZE(XX)
LFD f26, 10 * SIZE(XX) LFD f26, 10 * SIZE(XX)
LFD f27, 11 * SIZE(XX) LFD f27, 11 * SIZE(XX)
addi RET, RET, 1
fcmpu cr0, f24, f24
bun cr0, LL(9999)
addi RET, RET, 1
fcmpu cr0, f25, f25
bun cr0, LL(9999)
addi RET, RET, 1
fcmpu cr0, f26, f26
bun cr0, LL(9999)
addi RET, RET, 1
fcmpu cr0, f27, f27
bun cr0, LL(9999)
subi RET, RET, 8
fabs f12, f28 fabs f12, f28
fabs f13, f29 fabs f13, f29
@ -577,6 +604,32 @@ LL(1010):
.align 4 .align 4
LL(1020): LL(1020):
addi RET, RET, 1
fcmpu cr0, f24, f24
bun cr0, LL(9999)
addi RET, RET, 1
fcmpu cr0, f25, f25
bun cr0, LL(9999)
addi RET, RET, 1
fcmpu cr0, f26, f26
bun cr0, LL(9999)
addi RET, RET, 1
fcmpu cr0, f27, f27
bun cr0, LL(9999)
addi RET, RET, 1
fcmpu cr0, f28, f28
bun cr0, LL(9999)
addi RET, RET, 1
fcmpu cr0, f29, f29
bun cr0, LL(9999)
addi RET, RET, 1
fcmpu cr0, f30, f30
bun cr0, LL(9999)
addi RET, RET, 1
fcmpu cr0, f31, f31
bun cr0, LL(9999)
subi RET, RET, 8
fabs f8, f24 fabs f8, f24
fabs f9, f25 fabs f9, f25
fabs f10, f26 fabs f10, f26
@ -631,8 +684,12 @@ LL(1050):
LL(1060): LL(1060):
LFD f8, 0 * SIZE(XX) LFD f8, 0 * SIZE(XX)
addi XX, XX, 1 * SIZE addi XX, XX, 1 * SIZE
addi RET, RET, 1
fcmpu cr0, f8, f8
bun cru, LL(9999)
fabs f8, f8 fabs f8, f8
addi RET, RET, 1 //addi RET, RET, 1
fcmpu cr0, f1, f8 fcmpu cr0, f1, f8
beq cr0, LL(9999) beq cr0, LL(9999)
bdnz LL(1060) bdnz LL(1060)
@ -658,6 +715,18 @@ LL(1100):
.align 4 .align 4
LL(1110): LL(1110):
addi RET, RET, 1
fcmpu cr0, f24, f24
bun cr0, LL(9999)
addi RET, RET, 1
fcmpu cr0, f25, f25
bun cr0, LL(9999)
addi RET, RET, 1
fcmpu cr0, f26, f26
bun cr0, LL(9999)
addi RET, RET, 1
fcmpu cr0, f27, f27
bun cr0, LL(9999)
fabs f8, f24 fabs f8, f24
fabs f9, f25 fabs f9, f25
fabs f10, f26 fabs f10, f26
@ -667,7 +736,19 @@ LL(1110):
LFDUX f25, XX, INCX LFDUX f25, XX, INCX
LFDUX f26, XX, INCX LFDUX f26, XX, INCX
LFDUX f27, XX, INCX LFDUX f27, XX, INCX
addi RET, RET, 1
fcmpu cr0, f24, f24
bun cr0, LL(9999)
addi RET, RET, 1
fcmpu cr0, f25, f25
bun cr0, LL(9999)
addi RET, RET, 1
fcmpu cr0, f26, f26
bun cr0, LL(9999)
addi RET, RET, 1
fcmpu cr0, f27, f27
bun cr0, LL(9999)
subi RET, RET, 8
fabs f12, f28 fabs f12, f28
fabs f13, f29 fabs f13, f29
fabs f14, f30 fabs f14, f30
@ -714,6 +795,30 @@ LL(1110):
.align 4 .align 4
LL(1120): LL(1120):
addi RET, RET, 1
fcmpu cr0, f24, f24
bun cr0, LL(9999)
addi RET, RET, 1
fcmpu cr0, f25, f25
bun cr0, LL(9999)
addi RET, RET, 1
fcmpu cr0, f26, f26
bun cr0, LL(9999)
addi RET, RET, 1
fcmpu cr0, f27, f27
bun cr0, LL(9999)
addi RET, RET, 1
fcmpu cr0, f28, f28
bun cr0, LL(9999)
addi RET, RET, 1
fcmpu cr0, f29, f29
bun cr0, LL(9999)
addi RET, RET, 1
fcmpu cr0, f30, f30
bun cr0, LL(9999)
addi RET, RET, 1
fcmpu cr0, f31, f31
subi RET, RET, 8
fabs f8, f24 fabs f8, f24
fabs f9, f25 fabs f9, f25
fabs f10, f26 fabs f10, f26
@ -765,8 +870,11 @@ LL(1150):
LL(1160): LL(1160):
LFDUX f8, XX, INCX LFDUX f8, XX, INCX
fabs f8, f8
addi RET, RET, 1 addi RET, RET, 1
fcmpu cr0, f8, f8
bun LL(9999)
fabs f8, f8
// addi RET, RET, 1
fcmpu cr0, f1, f8 fcmpu cr0, f1, f8
beq cr0, LL(9999) beq cr0, LL(9999)
bdnz LL(1160) bdnz LL(1160)

View File

@ -327,6 +327,7 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) {
BLASLONG max = 0; BLASLONG max = 0;
if (n <= 0 || inc_x <= 0) return (max); if (n <= 0 || inc_x <= 0) return (max);
if (n == 1) return(1);
if (inc_x == 1) { if (inc_x == 1) {
@ -335,7 +336,7 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) {
BLASLONG n1 = n & -32; BLASLONG n1 = n & -32;
if (n1 > 0) { if (n1 > 0) {
for (int ii=i;ii<i+32;ii++) if (x[ii]!=x[ii]) return(ii+1);
max = diamax_kernel_32(n1, x, &maxf); max = diamax_kernel_32(n1, x, &maxf);
i = n1; i = n1;
@ -343,6 +344,7 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) {
#endif #endif
#endif #endif
while (i < n) { while (i < n) {
if (x[i] != x[i]) return(i+1);
if (ABS(x[i]) > maxf) { if (ABS(x[i]) > maxf) {
max = i; max = i;
maxf = ABS(x[i]); maxf = ABS(x[i]);
@ -356,6 +358,10 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) {
BLASLONG n1 = n & -4; BLASLONG n1 = n & -4;
while (j < n1) { while (j < n1) {
if (x[i] != x[i]) return(i+1);
if (x[i+inc_x] != x[i+inc_x]) return(j+1);
if (x[i+2*inc_x] != x[i+2*inc_x]) return(j+2);
if (x[i+3*inc_x] != x[i+3*inc_x]) return(j+3);
if (ABS(x[i]) > maxf) { if (ABS(x[i]) > maxf) {
max = j; max = j;
maxf = ABS(x[i]); maxf = ABS(x[i]);
@ -381,6 +387,7 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) {
while (j < n) { while (j < n) {
if (x[i] != x[i]) return(j+1);
if (ABS(x[i]) > maxf) { if (ABS(x[i]) > maxf) {
max = j; max = j;
maxf = ABS(x[i]); maxf = ABS(x[i]);

View File

@ -58,6 +58,78 @@ static BLASLONG siamax_kernel_64(BLASLONG n, FLOAT *x, FLOAT *maxf) {
register __vector float quadruple_values={0,0,0,0}; register __vector float quadruple_values={0,0,0,0};
register __vector float * v_ptrx=(__vector float *)x; register __vector float * v_ptrx=(__vector float *)x;
for(; i<n; i+=64){ for(; i<n; i+=64){
if (vec_any_nan(v_ptrx[0])) {
float d=vec_extract(v_ptrx[0],0);
if (d!=d) return(i+0);
d=vec_extract(v_ptrx[0],1);
if (d!=d) return(i+1);
d=vec_extract(v_ptrx[0],2);
if (d!=d) return(i+2);
return(i+3);
}
if (vec_any_nan(v_ptrx[1])) {
float d=vec_extract(v_ptrx[1],0);
if (d!=d) return(i+4+0);
d=vec_extract(v_ptrx[1],1);
if (d!=d) return(i+4+1);
d=vec_extract(v_ptrx[1],2);
if (d!=d) return(i+4+2);
return(i+4+3);
}
if (vec_any_nan(v_ptrx[2])) {
float d=vec_extract(v_ptrx[2],0);
if (d!=d) return(i+8+0);
d=vec_extract(v_ptrx[2],1);
if (d!=d) return(i+8+1);
d=vec_extract(v_ptrx[2],2);
if (d!=d) return(i+8+2);
return(i+8+3);
}
if (vec_any_nan(v_ptrx[3])) {
float d=vec_extract(v_ptrx[3],0);
if (d!=d) return(i+12+0);
d=vec_extract(v_ptrx[3],1);
if (d!=d) return(i+12+1);
d=vec_extract(v_ptrx[3],2);
if (d!=d) return(i+12+2);
return(i+12+3);
}
if (vec_any_nan(v_ptrx[4])) {
float d=vec_extract(v_ptrx[4],0);
if (d!=d) return(i+16+0);
d=vec_extract(v_ptrx[4],1);
if (d!=d) return(i+16+1);
d=vec_extract(v_ptrx[4],2);
if (d!=d) return(i+16+2);
return(i+16+3);
}
if (vec_any_nan(v_ptrx[5])) {
float d=vec_extract(v_ptrx[5],0);
if (d!=d) return(i+20+0);
d=vec_extract(v_ptrx[5],1);
if (d!=d) return(i+20+1);
d=vec_extract(v_ptrx[5],2);
if (d!=d) return(i+20+2);
return(i+20+3);
}
if (vec_any_nan(v_ptrx[6])) {
float d=vec_extract(v_ptrx[6],0);
if (d!=d) return(i+24+0);
d=vec_extract(v_ptrx[6],1);
if (d!=d) return(i+24+1);
d=vec_extract(v_ptrx[6],2);
if (d!=d) return(i+24+2);
return(i+24+3);
}
if (vec_any_nan(v_ptrx[7])) {
float d=vec_extract(v_ptrx[7],0);
if (d!=d) return(i+28+0);
d=vec_extract(v_ptrx[7],1);
if (d!=d) return(i+28+1);
d=vec_extract(v_ptrx[7],2);
if (d!=d) return(i+28+2);
return(i+28+3);
}
//absolute temporary vectors //absolute temporary vectors
register __vector float v0=vec_abs(v_ptrx[0]); register __vector float v0=vec_abs(v_ptrx[0]);
register __vector float v1=vec_abs(v_ptrx[1]); register __vector float v1=vec_abs(v_ptrx[1]);
@ -226,7 +298,7 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) {
BLASLONG max = 0; BLASLONG max = 0;
if (n <= 0 || inc_x <= 0) return (max); if (n <= 0 || inc_x <= 0) return (max);
if (x[0] != x[0]) return(1);
if (inc_x == 1) { if (inc_x == 1) {
BLASLONG n1 = n & -64; BLASLONG n1 = n & -64;
@ -238,6 +310,7 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) {
} }
while (i < n) { while (i < n) {
if (x[i] != x[i]) return(i+1);
if (ABS(x[i]) > maxf) { if (ABS(x[i]) > maxf) {
max = i; max = i;
maxf = ABS(x[i]); maxf = ABS(x[i]);
@ -251,18 +324,22 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) {
BLASLONG n1 = n & -4; BLASLONG n1 = n & -4;
while (j < n1) { while (j < n1) {
if (x[i] != x[i]) return(j+1);
if (ABS(x[i]) > maxf) { if (ABS(x[i]) > maxf) {
max = j; max = j;
maxf = ABS(x[i]); maxf = ABS(x[i]);
} }
if (x[i+inc_x] != x[i+inc_x]) return(j+1);
if (ABS(x[i + inc_x]) > maxf) { if (ABS(x[i + inc_x]) > maxf) {
max = j + 1; max = j + 1;
maxf = ABS(x[i + inc_x]); maxf = ABS(x[i + inc_x]);
} }
if (x[i+2*inc_x] != x[i+2*inc_x]) return(j+2);
if (ABS(x[i + 2 * inc_x]) > maxf) { if (ABS(x[i + 2 * inc_x]) > maxf) {
max = j + 2; max = j + 2;
maxf = ABS(x[i + 2 * inc_x]); maxf = ABS(x[i + 2 * inc_x]);
} }
if (x[i+3*inc_x] != x[i+3*inc_x]) return(j+3);
if (ABS(x[i + 3 * inc_x]) > maxf) { if (ABS(x[i + 3 * inc_x]) > maxf) {
max = j + 3; max = j + 3;
maxf = ABS(x[i + 3 * inc_x]); maxf = ABS(x[i + 3 * inc_x]);
@ -276,6 +353,7 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) {
while (j < n) { while (j < n) {
if (x[i] != x[i]) return(j+1);
if (ABS(x[i]) > maxf) { if (ABS(x[i]) > maxf) {
max = j; max = j;
maxf = ABS(x[i]); maxf = ABS(x[i]);

View File

@ -56,13 +56,15 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
BLASLONG max=0; BLASLONG max=0;
if (n <= 0 || inc_x <= 0) return(max); if (n <= 0 || inc_x <= 0) return(max);
if (n==1) return(1);
if (x[0]!=x[0]) return(1);
maxf=ABS(x[0]); maxf=ABS(x[0]);
ix += inc_x; ix += inc_x;
i++; i++;
while(i < n) while(i < n)
{ {
if (x[ix]!=x[ix]) return(i+1);
if( ABS(x[ix]) > maxf ) if( ABS(x[ix]) > maxf )
{ {
max = i; max = i;

View File

@ -56,13 +56,15 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
BLASLONG min=0; BLASLONG min=0;
if (n <= 0 || inc_x <= 0) return(min); if (n <= 0 || inc_x <= 0) return(min);
if (n==1) return(1);
minf=ABS(x[0]); if (x[0]!=x[0]) return(1);
minf=ABS(x[0]);
ix += inc_x; ix += inc_x;
i++; i++;
while(i < n) while(i < n)
{ {
if (x[ix]!=x[ix]) return(i+1);
if( ABS(x[ix]) < ABS(minf) ) if( ABS(x[ix]) < ABS(minf) )
{ {
min = i; min = i;

View File

@ -100,6 +100,8 @@
movl $1, RET movl $1, RET
FLD (X) FLD (X)
fcomi %st, %st
jp .L999
#ifdef USE_ABS #ifdef USE_ABS
fabs fabs
#endif #endif
@ -121,6 +123,8 @@
#endif #endif
FLD 0 * SIZE(X) FLD 0 * SIZE(X)
fucomi
jp .L998
#ifdef USE_ABS #ifdef USE_ABS
fabs fabs
#endif #endif
@ -131,6 +135,8 @@
incl NUM incl NUM
FLD 1 * SIZE(X) FLD 1 * SIZE(X)
fucomi
jp .L998
#ifdef USE_ABS #ifdef USE_ABS
fabs fabs
#endif #endif
@ -141,6 +147,8 @@
incl NUM incl NUM
FLD 2 * SIZE(X) FLD 2 * SIZE(X)
fucomi
jp .L998
#ifdef USE_ABS #ifdef USE_ABS
fabs fabs
#endif #endif
@ -151,6 +159,8 @@
incl NUM incl NUM
FLD 3 * SIZE(X) FLD 3 * SIZE(X)
fucomi
jp .L998
#ifdef USE_ABS #ifdef USE_ABS
fabs fabs
#endif #endif
@ -161,6 +171,8 @@
incl NUM incl NUM
FLD 4 * SIZE(X) FLD 4 * SIZE(X)
fucomi
jp .L998
#ifdef USE_ABS #ifdef USE_ABS
fabs fabs
#endif #endif
@ -171,6 +183,8 @@
incl NUM incl NUM
FLD 5 * SIZE(X) FLD 5 * SIZE(X)
fucomi
jp .L998
#ifdef USE_ABS #ifdef USE_ABS
fabs fabs
#endif #endif
@ -181,6 +195,8 @@
incl NUM incl NUM
FLD 6 * SIZE(X) FLD 6 * SIZE(X)
fucomi
jp .L998
#ifdef USE_ABS #ifdef USE_ABS
fabs fabs
#endif #endif
@ -191,6 +207,8 @@
incl NUM incl NUM
FLD 7 * SIZE(X) FLD 7 * SIZE(X)
fucomi
jp .L998
#ifdef USE_ABS #ifdef USE_ABS
fabs fabs
#endif #endif
@ -215,6 +233,8 @@
.L21: .L21:
FLD 0 * SIZE(X) FLD 0 * SIZE(X)
fucomi
jp .L998
#ifdef USE_ABS #ifdef USE_ABS
fabs fabs
#endif #endif
@ -238,6 +258,8 @@
.L50: .L50:
FLD 0 * SIZE(X) FLD 0 * SIZE(X)
fucomi
jp .L998
addl INCX, X addl INCX, X
#ifdef USE_ABS #ifdef USE_ABS
fabs fabs
@ -249,6 +271,8 @@
incl NUM incl NUM
FLD 0 * SIZE(X) FLD 0 * SIZE(X)
fucomi
jp .L998
addl INCX, X addl INCX, X
#ifdef USE_ABS #ifdef USE_ABS
fabs fabs
@ -260,6 +284,8 @@
incl NUM incl NUM
FLD 0 * SIZE(X) FLD 0 * SIZE(X)
fucomi
jp .L998
addl INCX, X addl INCX, X
#ifdef USE_ABS #ifdef USE_ABS
fabs fabs
@ -271,6 +297,8 @@
incl NUM incl NUM
FLD 0 * SIZE(X) FLD 0 * SIZE(X)
fucomi
jp .L998
addl INCX, X addl INCX, X
#ifdef USE_ABS #ifdef USE_ABS
fabs fabs
@ -282,6 +310,8 @@
incl NUM incl NUM
FLD 0 * SIZE(X) FLD 0 * SIZE(X)
fucomi
jp .L998
addl INCX, X addl INCX, X
#ifdef USE_ABS #ifdef USE_ABS
fabs fabs
@ -293,6 +323,8 @@
incl NUM incl NUM
FLD 0 * SIZE(X) FLD 0 * SIZE(X)
fucomi
jp .L998
addl INCX, X addl INCX, X
#ifdef USE_ABS #ifdef USE_ABS
fabs fabs
@ -304,6 +336,8 @@
incl NUM incl NUM
FLD 0 * SIZE(X) FLD 0 * SIZE(X)
fucomi
jp .L998
addl INCX, X addl INCX, X
#ifdef USE_ABS #ifdef USE_ABS
fabs fabs
@ -315,6 +349,8 @@
incl NUM incl NUM
FLD 0 * SIZE(X) FLD 0 * SIZE(X)
fucomi
jp .L998
addl INCX, X addl INCX, X
#ifdef USE_ABS #ifdef USE_ABS
fabs fabs
@ -338,6 +374,8 @@
.L61: .L61:
FLD 0 * SIZE(X) FLD 0 * SIZE(X)
fucomi
jp .L998
#ifdef USE_ABS #ifdef USE_ABS
fabs fabs
#endif #endif
@ -361,4 +399,7 @@
popl %ebp popl %ebp
ret ret
.L998: mov NUM, RET
jmp .L999
EPILOGUE EPILOGUE

View File

@ -93,6 +93,10 @@
addq INCX, X addq INCX, X
decq M decq M
shufps $0, %xmm0, %xmm0 shufps $0, %xmm0, %xmm0
incq RET
comiss %xmm0, %xmm0
jp .L999
decq RET
#ifdef USE_ABS #ifdef USE_ABS
andps %xmm15, %xmm0 andps %xmm15, %xmm0
#endif #endif
@ -254,6 +258,10 @@
decq M decq M
addq $SIZE, X addq $SIZE, X
incq RET
comiss %xmm1, %xmm1
jp .L998
decq RET
#ifdef USE_ABS #ifdef USE_ABS
andps %xmm15, %xmm1 andps %xmm15, %xmm1
#endif #endif
@ -268,6 +276,14 @@
movss 0 * SIZE(X), %xmm1 movss 0 * SIZE(X), %xmm1
movss 1 * SIZE(X), %xmm2 movss 1 * SIZE(X), %xmm2
incq RET
comiss %xmm1, %xmm1
jp .L998
incq RET
comiss %xmm2, %xmm2
jp .L998
decq RET
decq RET
subq $2, M subq $2, M
addq $2 * SIZE, X addq $2 * SIZE, X
@ -332,6 +348,31 @@
movss 5 * SIZE(X), %xmm6 movss 5 * SIZE(X), %xmm6
movss 6 * SIZE(X), %xmm7 movss 6 * SIZE(X), %xmm7
movss 7 * SIZE(X), %xmm8 movss 7 * SIZE(X), %xmm8
incq RET
comiss %xmm1, %xmm1
jp .L998
incq RET
comiss %xmm2, %xmm2
jp .L998
incq RET
comiss %xmm3, %xmm3
jp .L998
incq RET
comiss %xmm4, %xmm4
jp .L998
incq RET
comiss %xmm5, %xmm5
jp .L998
incq RET
comiss %xmm6, %xmm6
jp .L998
incq RET
comiss %xmm7, %xmm7
jp .L998
incq RET
comiss %xmm8, %xmm8
jp .L998
subq $8, RET
#ifdef USE_ABS #ifdef USE_ABS
andps %xmm15, %xmm1 andps %xmm15, %xmm1
andps %xmm15, %xmm2 andps %xmm15, %xmm2
@ -378,6 +419,19 @@
movss 1 * SIZE(X), %xmm2 movss 1 * SIZE(X), %xmm2
movss 2 * SIZE(X), %xmm3 movss 2 * SIZE(X), %xmm3
movss 3 * SIZE(X), %xmm4 movss 3 * SIZE(X), %xmm4
incq RET
comiss %xmm1, %xmm1
jp .L998
incq RET
comiss %xmm2, %xmm2
jp .L998
incq RET
comiss %xmm3, %xmm3
jp .L998
incq RET
comiss %xmm4, %xmm4
jp .L998
subq $4, RET
#ifdef USE_ABS #ifdef USE_ABS
andps %xmm15, %xmm1 andps %xmm15, %xmm1
andps %xmm15, %xmm2 andps %xmm15, %xmm2
@ -405,6 +459,13 @@
movss 0 * SIZE(X), %xmm1 movss 0 * SIZE(X), %xmm1
movss 1 * SIZE(X), %xmm2 movss 1 * SIZE(X), %xmm2
incq RET
comiss %xmm1, %xmm1
jp .L998
incq RET
comiss %xmm2, %xmm2
jp .L998
subq $2, RET
#ifdef USE_ABS #ifdef USE_ABS
andps %xmm15, %xmm1 andps %xmm15, %xmm1
andps %xmm15, %xmm2 andps %xmm15, %xmm2
@ -593,6 +654,31 @@
movss 5 * SIZE(X), %xmm6 movss 5 * SIZE(X), %xmm6
movss 6 * SIZE(X), %xmm7 movss 6 * SIZE(X), %xmm7
movss 7 * SIZE(X), %xmm8 movss 7 * SIZE(X), %xmm8
incq RET
comiss %xmm1, %xmm1
jp .L998
incq RET
comiss %xmm2, %xmm2
jp .L998
incq RET
comiss %xmm3, %xmm3
jp .L998
incq RET
comiss %xmm4, %xmm4
jp .L998
incq RET
comiss %xmm5, %xmm5
jp .L998
incq RET
comiss %xmm6, %xmm6
jp .L998
incq RET
comiss %xmm7, %xmm7
jp .L998
incq RET
comiss %xmm8, %xmm8
jp .L998
subq $8, RET
#ifdef USE_ABS #ifdef USE_ABS
andps %xmm15, %xmm1 andps %xmm15, %xmm1
andps %xmm15, %xmm2 andps %xmm15, %xmm2
@ -639,6 +725,19 @@
movss 1 * SIZE(X), %xmm2 movss 1 * SIZE(X), %xmm2
movss 2 * SIZE(X), %xmm3 movss 2 * SIZE(X), %xmm3
movss 3 * SIZE(X), %xmm4 movss 3 * SIZE(X), %xmm4
incq RET
comiss %xmm1, %xmm1
jp .L998
incq RET
comiss %xmm2, %xmm2
jp .L998
incq RET
comiss %xmm3, %xmm3
jp .L998
incq RET
comiss %xmm4, %xmm4
jp .L998
subq $4, RET
#ifdef USE_ABS #ifdef USE_ABS
andps %xmm15, %xmm1 andps %xmm15, %xmm1
andps %xmm15, %xmm2 andps %xmm15, %xmm2
@ -666,6 +765,13 @@
movss 0 * SIZE(X), %xmm1 movss 0 * SIZE(X), %xmm1
movss 1 * SIZE(X), %xmm2 movss 1 * SIZE(X), %xmm2
incq RET
comiss %xmm1, %xmm1
jp .L998
incq RET
comiss %xmm2, %xmm2
jp .L998
subq $2, RET
#ifdef USE_ABS #ifdef USE_ABS
andps %xmm15, %xmm1 andps %xmm15, %xmm1
andps %xmm15, %xmm2 andps %xmm15, %xmm2
@ -885,6 +991,31 @@
movss 0 * SIZE(X), %xmm2 movss 0 * SIZE(X), %xmm2
subq INCX, X subq INCX, X
movss 0 * SIZE(X), %xmm1 movss 0 * SIZE(X), %xmm1
incq RET
comiss %xmm1, %xmm1
jp .L998
incq RET
comiss %xmm2, %xmm2
jp .L998
incq RET
comiss %xmm3, %xmm3
jp .L998
incq RET
comiss %xmm4, %xmm4
jp .L998
incq RET
comiss %xmm5, %xmm5
jp .L998
incq RET
comiss %xmm6, %xmm6
jp .L998
incq RET
comiss %xmm7, %xmm7
jp .L998
incq RET
comiss %xmm8, %xmm8
jp .L998
subq $8, RET
#ifdef USE_ABS #ifdef USE_ABS
andps %xmm15, %xmm1 andps %xmm15, %xmm1
andps %xmm15, %xmm2 andps %xmm15, %xmm2
@ -932,7 +1063,19 @@
addq INCX, X addq INCX, X
movss 0 * SIZE(X), %xmm4 movss 0 * SIZE(X), %xmm4
addq INCX, X addq INCX, X
incq RET
comiss %xmm1, %xmm1
jp .L998
incq RET
comiss %xmm2, %xmm2
jp .L998
incq RET
comiss %xmm3, %xmm3
jp .L998
incq RET
comiss %xmm4, %xmm4
jp .L998
subq $4, RET
#ifdef USE_ABS #ifdef USE_ABS
andps %xmm15, %xmm1 andps %xmm15, %xmm1
andps %xmm15, %xmm2 andps %xmm15, %xmm2
@ -961,6 +1104,13 @@
addq INCX, X addq INCX, X
movss 0 * SIZE(X), %xmm2 movss 0 * SIZE(X), %xmm2
addq INCX, X addq INCX, X
incq RET
comiss %xmm1, %xmm1
jp .L998
incq RET
comiss %xmm2, %xmm2
jp .L998
subq $2, RET
#ifdef USE_ABS #ifdef USE_ABS
andps %xmm15, %xmm1 andps %xmm15, %xmm1
andps %xmm15, %xmm2 andps %xmm15, %xmm2
@ -982,5 +1132,9 @@
ret ret
.L998:
// incq RET
jmp .L999
EPILOGUE EPILOGUE

View File

@ -79,6 +79,8 @@
movsd (X), %xmm0 movsd (X), %xmm0
addq INCX, X addq INCX, X
decq M decq M
comisd %xmm0, %xmm0
jp .L987
#ifdef USE_ABS #ifdef USE_ABS
andpd %xmm15, %xmm0 andpd %xmm15, %xmm0
#endif #endif
@ -269,6 +271,11 @@
je .L21 je .L21
movsd 0 * SIZE(X), %xmm1 movsd 0 * SIZE(X), %xmm1
incq RET
comisd %xmm1, %xmm1
jp .L987
decq RET
#ifdef USE_ABS #ifdef USE_ABS
andpd %xmm15, %xmm1 andpd %xmm15, %xmm1
#endif #endif
@ -342,6 +349,32 @@
movsd 5 * SIZE(X), %xmm6 movsd 5 * SIZE(X), %xmm6
movsd 6 * SIZE(X), %xmm7 movsd 6 * SIZE(X), %xmm7
movsd 7 * SIZE(X), %xmm8 movsd 7 * SIZE(X), %xmm8
incq RET
comisd %xmm1, %xmm1
jp .L987
incq RET
comisd %xmm2, %xmm2
jp .L987
incq RET
comisd %xmm3, %xmm3
jp .L987
incq RET
comisd %xmm4, %xmm4
jp .L987
incq RET
comisd %xmm5, %xmm5
jp .L987
incq RET
comisd %xmm6, %xmm6
jp .L987
incq RET
comisd %xmm7, %xmm7
jp .L987
incq RET
comisd %xmm8, %xmm8
jp .L987
subq $8, RET
#ifdef USE_ABS #ifdef USE_ABS
andpd %xmm15, %xmm1 andpd %xmm15, %xmm1
andpd %xmm15, %xmm2 andpd %xmm15, %xmm2
@ -374,9 +407,9 @@
comisd %xmm0, %xmm6 comisd %xmm0, %xmm6
je .L999 je .L999
incq RET incq RET
comisd %xmm0, %xmm7 comisd %xmm0, %xmm7
je .L999 je .L999
incq RET incq RET
jmp .L999 jmp .L999
ALIGN_3 ALIGN_3
@ -388,6 +421,21 @@
movsd 1 * SIZE(X), %xmm2 movsd 1 * SIZE(X), %xmm2
movsd 2 * SIZE(X), %xmm3 movsd 2 * SIZE(X), %xmm3
movsd 3 * SIZE(X), %xmm4 movsd 3 * SIZE(X), %xmm4
incq RET
comisd %xmm1, %xmm1
jp .L987
incq RET
comisd %xmm2, %xmm2
jp .L987
incq RET
comisd %xmm3, %xmm3
jp .L987
incq RET
comisd %xmm4, %xmm4
jp .L987
subq $4, RET
#ifdef USE_ABS #ifdef USE_ABS
andpd %xmm15, %xmm1 andpd %xmm15, %xmm1
andpd %xmm15, %xmm2 andpd %xmm15, %xmm2
@ -415,6 +463,14 @@
movsd 0 * SIZE(X), %xmm1 movsd 0 * SIZE(X), %xmm1
movsd 1 * SIZE(X), %xmm2 movsd 1 * SIZE(X), %xmm2
incq RET
comisd %xmm1, %xmm1
jp .L987
incq RET
comisd %xmm2, %xmm2
jp .L987
subq $2, RET
#ifdef USE_ABS #ifdef USE_ABS
andpd %xmm15, %xmm1 andpd %xmm15, %xmm1
andpd %xmm15, %xmm2 andpd %xmm15, %xmm2
@ -670,6 +726,32 @@
movsd 5 * SIZE(X), %xmm6 movsd 5 * SIZE(X), %xmm6
movsd 6 * SIZE(X), %xmm7 movsd 6 * SIZE(X), %xmm7
movsd 7 * SIZE(X), %xmm8 movsd 7 * SIZE(X), %xmm8
incq RET
comisd %xmm1, %xmm1
jp .L987
incq RET
comisd %xmm2, %xmm2
jp .L987
incq RET
comisd %xmm3, %xmm3
jp .L987
incq RET
comisd %xmm4, %xmm4
jp .L987
incq RET
comisd %xmm5, %xmm5
jp .L987
incq RET
comisd %xmm6, %xmm6
jp .L987
incq RET
comisd %xmm7, %xmm7
jp .L987
incq RET
comisd %xmm8, %xmm8
jp .L987
subq $8, RET
#ifdef USE_ABS #ifdef USE_ABS
andpd %xmm15, %xmm1 andpd %xmm15, %xmm1
andpd %xmm15, %xmm2 andpd %xmm15, %xmm2
@ -716,6 +798,20 @@
movsd 1 * SIZE(X), %xmm2 movsd 1 * SIZE(X), %xmm2
movsd 2 * SIZE(X), %xmm3 movsd 2 * SIZE(X), %xmm3
movsd 3 * SIZE(X), %xmm4 movsd 3 * SIZE(X), %xmm4
incq RET
comisd %xmm1, %xmm1
jp .L987
incq RET
comisd %xmm2, %xmm2
jp .L987
incq RET
comisd %xmm3, %xmm3
jp .L987
incq RET
comisd %xmm4, %xmm4
jp .L987
subq $4, RET
#ifdef USE_ABS #ifdef USE_ABS
andpd %xmm15, %xmm1 andpd %xmm15, %xmm1
andpd %xmm15, %xmm2 andpd %xmm15, %xmm2
@ -743,11 +839,21 @@
movsd 0 * SIZE(X), %xmm1 movsd 0 * SIZE(X), %xmm1
movsd 1 * SIZE(X), %xmm2 movsd 1 * SIZE(X), %xmm2
incq RET
comisd %xmm1, %xmm1
jp .L987
incq RET
comisd %xmm2, %xmm2
jp .L987
subq $2, RET
#ifdef USE_ABS #ifdef USE_ABS
andpd %xmm15, %xmm1 andpd %xmm15, %xmm1
andpd %xmm15, %xmm2 andpd %xmm15, %xmm2
#endif #endif
addq $2 * SIZE, X addq $2 * SIZE, X
comisd %xmm0, %xmm0
jp .L987
incq RET incq RET
comisd %xmm0, %xmm1 comisd %xmm0, %xmm1
je .L999 je .L999
@ -962,6 +1068,7 @@
ALIGN_4 ALIGN_4
.L92: .L92:
movsd 0 * SIZE(X), %xmm1 movsd 0 * SIZE(X), %xmm1
addq INCX, X addq INCX, X
movhpd 0 * SIZE(X), %xmm1 movhpd 0 * SIZE(X), %xmm1
@ -1033,6 +1140,32 @@
movsd 0 * SIZE(X), %xmm2 movsd 0 * SIZE(X), %xmm2
subq INCX, X subq INCX, X
movsd 0 * SIZE(X), %xmm1 movsd 0 * SIZE(X), %xmm1
incq RET
comisd %xmm1, %xmm1
jp .L987
incq RET
comisd %xmm2, %xmm2
jp .L987
incq RET
comisd %xmm3, %xmm3
jp .L987
incq RET
comisd %xmm4, %xmm4
jp .L987
incq RET
comisd %xmm5, %xmm5
jp .L987
incq RET
comisd %xmm6, %xmm6
jp .L987
incq RET
comisd %xmm7, %xmm7
jp .L987
incq RET
comisd %xmm8, %xmm8
jp .L987
subq $8, RET
#ifdef USE_ABS #ifdef USE_ABS
andpd %xmm15, %xmm1 andpd %xmm15, %xmm1
andpd %xmm15, %xmm2 andpd %xmm15, %xmm2
@ -1083,6 +1216,20 @@
addq INCX, X addq INCX, X
movsd 0 * SIZE(X), %xmm4 movsd 0 * SIZE(X), %xmm4
addq INCX, X addq INCX, X
incq RET
comisd %xmm1, %xmm1
jp .L987
incq RET
comisd %xmm2, %xmm2
jp .L987
incq RET
comisd %xmm3, %xmm3
jp .L987
incq RET
comisd %xmm4, %xmm4
jp .L987
subq $4, RET
#ifdef USE_ABS #ifdef USE_ABS
andpd %xmm15, %xmm1 andpd %xmm15, %xmm1
andpd %xmm15, %xmm2 andpd %xmm15, %xmm2
@ -1111,6 +1258,14 @@
addq INCX, X addq INCX, X
movsd 0 * SIZE(X), %xmm2 movsd 0 * SIZE(X), %xmm2
addq INCX, X addq INCX, X
incq RET
comisd %xmm1, %xmm1
jp .L987
incq RET
comisd %xmm2, %xmm2
jp .L987
decq RET
decq RET
#ifdef USE_ABS #ifdef USE_ABS
andpd %xmm15, %xmm1 andpd %xmm15, %xmm1
andpd %xmm15, %xmm2 andpd %xmm15, %xmm2
@ -1122,7 +1277,6 @@
comisd %xmm0, %xmm2 comisd %xmm0, %xmm2
je .L999 je .L999
ALIGN_3 ALIGN_3
.L98: .L98:
incq RET incq RET
ALIGN_3 ALIGN_3
@ -1132,5 +1286,8 @@
ret ret
EPILOGUE .L987:
incq RET //count from xmm0
jmp .L999
EPILOGUE

View File

@ -58,3 +58,29 @@ CTEST(amax, damax){
} }
#endif #endif
#ifdef BUILD_SINGLE
CTEST(amax, isamax_nan){
blasint N=3, inc=1;
int te_max=0, tr_max=2;
float x[]={1., 0./0., 0./0. };
//float x[]={ 0./0., 2., 3. };
te_max=BLASFUNC(isamax)(&N, x, &inc);
ASSERT_EQUAL(tr_max, te_max);
}
#endif
#ifdef BUILD_DOUBLE
CTEST(amax, idamax_nan){
blasint N=4, inc=1;
int te_max=0, tr_max=1;
//float x[]={1., 0./0., 0./0. };
double x[]={ 0./0., 1.,2. ,3.};
te_max=BLASFUNC(idamax)(&N, x, &inc);
ASSERT_EQUAL(tr_max, te_max);
}
#endif