This commit is contained in:
Martin Kroeker 2024-01-22 11:11:12 +03:00 committed by GitHub
commit f1ff4c5c02
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
13 changed files with 623 additions and 22 deletions

View File

@ -56,13 +56,15 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
BLASLONG max=0;
if (n <= 0 || inc_x <= 0) return(max);
if (n==1) return(1);
if (x[0]!=x[0]) return(1);
maxf=ABS(x[0]);
ix += inc_x;
i++;
while(i < n)
{
if (x[ix]!=x[ix]) return(i+1);
if( ABS(x[ix]) > maxf )
{
max = i;

View File

@ -56,13 +56,15 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
BLASLONG min=0;
if (n <= 0 || inc_x <= 0) return(min);
minf=ABS(x[0]);
if (n==1) return(1);
if (x[0]!=x[0]) return(1);
minf=ABS(x[0]);
ix += inc_x;
i++;
while(i < n)
{
if (x[ix]!=x[ix]) return(i+1);
if( ABS(x[ix]) < ABS(minf) )
{
min = i;

View File

@ -1,5 +1,5 @@
/***************************************************************************
Copyright (c) 2016, The OpenBLAS Project
Copyright (c) 2013, The OpenBLAS Project
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
@ -25,6 +25,15 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
/**************************************************************************************
* 2013/09/14 Saar
* BLASTEST float : NoTest
* BLASTEST double : NoTest
* CTEST : OK
* TEST : OK
*
**************************************************************************************/
#include "common.h"
#include <math.h>
@ -47,13 +56,15 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
BLASLONG max=0;
if (n <= 0 || inc_x <= 0) return(max);
if (n==1) return(1);
if (x[0]!=x[0]) return(1);
maxf=ABS(x[0]);
ix += inc_x;
i++;
while(i < n)
{
if (x[ix]!=x[ix]) return(i+1);
if( ABS(x[ix]) > maxf )
{
max = i;

View File

@ -1,5 +1,5 @@
/***************************************************************************
Copyright (c) 2016, The OpenBLAS Project
Copyright (c) 2013, The OpenBLAS Project
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
@ -25,6 +25,15 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
/**************************************************************************************
* 2013/09/14 Saar
* BLASTEST float : NoTest
* BLASTEST double : NoTest
* CTEST : NoTest
* TEST : NoTest
*
**************************************************************************************/
#include "common.h"
#include <math.h>
@ -47,13 +56,15 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
BLASLONG min=0;
if (n <= 0 || inc_x <= 0) return(min);
minf=ABS(x[0]);
if (n==1) return(1);
if (x[0]!=x[0]) return(1);
minf=ABS(x[0]);
ix += inc_x;
i++;
while(i < n)
{
if (x[ix]!=x[ix]) return(i+1);
if( ABS(x[ix]) < ABS(minf) )
{
min = i;

View File

@ -520,6 +520,19 @@ LL(1000):
.align 4
LL(1010):
addi RET, RET, 1
fcmpu cr0, f24, f24
bun cr0, LL(9999)
addi RET, RET, 1
fcmpu cr0, f25, f25
bun cr0, LL(9999)
addi RET, RET, 1
fcmpu cr0, f26, f26
bun cr0, LL(9999)
addi RET, RET, 1
fcmpu cr0, f27, f27
bun cr0, LL(9999)
fabs f8, f24
fabs f9, f25
fabs f10, f26
@ -529,6 +542,20 @@ LL(1010):
LFD f25, 9 * SIZE(XX)
LFD f26, 10 * SIZE(XX)
LFD f27, 11 * SIZE(XX)
addi RET, RET, 1
fcmpu cr0, f24, f24
bun cr0, LL(9999)
addi RET, RET, 1
fcmpu cr0, f25, f25
bun cr0, LL(9999)
addi RET, RET, 1
fcmpu cr0, f26, f26
bun cr0, LL(9999)
addi RET, RET, 1
fcmpu cr0, f27, f27
bun cr0, LL(9999)
subi RET, RET, 8
fabs f12, f28
fabs f13, f29
@ -577,6 +604,32 @@ LL(1010):
.align 4
LL(1020):
addi RET, RET, 1
fcmpu cr0, f24, f24
bun cr0, LL(9999)
addi RET, RET, 1
fcmpu cr0, f25, f25
bun cr0, LL(9999)
addi RET, RET, 1
fcmpu cr0, f26, f26
bun cr0, LL(9999)
addi RET, RET, 1
fcmpu cr0, f27, f27
bun cr0, LL(9999)
addi RET, RET, 1
fcmpu cr0, f28, f28
bun cr0, LL(9999)
addi RET, RET, 1
fcmpu cr0, f29, f29
bun cr0, LL(9999)
addi RET, RET, 1
fcmpu cr0, f30, f30
bun cr0, LL(9999)
addi RET, RET, 1
fcmpu cr0, f31, f31
bun cr0, LL(9999)
subi RET, RET, 8
fabs f8, f24
fabs f9, f25
fabs f10, f26
@ -631,8 +684,12 @@ LL(1050):
LL(1060):
LFD f8, 0 * SIZE(XX)
addi XX, XX, 1 * SIZE
addi RET, RET, 1
fcmpu cr0, f8, f8
bun cru, LL(9999)
fabs f8, f8
addi RET, RET, 1
//addi RET, RET, 1
fcmpu cr0, f1, f8
beq cr0, LL(9999)
bdnz LL(1060)
@ -658,6 +715,18 @@ LL(1100):
.align 4
LL(1110):
addi RET, RET, 1
fcmpu cr0, f24, f24
bun cr0, LL(9999)
addi RET, RET, 1
fcmpu cr0, f25, f25
bun cr0, LL(9999)
addi RET, RET, 1
fcmpu cr0, f26, f26
bun cr0, LL(9999)
addi RET, RET, 1
fcmpu cr0, f27, f27
bun cr0, LL(9999)
fabs f8, f24
fabs f9, f25
fabs f10, f26
@ -667,7 +736,19 @@ LL(1110):
LFDUX f25, XX, INCX
LFDUX f26, XX, INCX
LFDUX f27, XX, INCX
addi RET, RET, 1
fcmpu cr0, f24, f24
bun cr0, LL(9999)
addi RET, RET, 1
fcmpu cr0, f25, f25
bun cr0, LL(9999)
addi RET, RET, 1
fcmpu cr0, f26, f26
bun cr0, LL(9999)
addi RET, RET, 1
fcmpu cr0, f27, f27
bun cr0, LL(9999)
subi RET, RET, 8
fabs f12, f28
fabs f13, f29
fabs f14, f30
@ -714,6 +795,30 @@ LL(1110):
.align 4
LL(1120):
addi RET, RET, 1
fcmpu cr0, f24, f24
bun cr0, LL(9999)
addi RET, RET, 1
fcmpu cr0, f25, f25
bun cr0, LL(9999)
addi RET, RET, 1
fcmpu cr0, f26, f26
bun cr0, LL(9999)
addi RET, RET, 1
fcmpu cr0, f27, f27
bun cr0, LL(9999)
addi RET, RET, 1
fcmpu cr0, f28, f28
bun cr0, LL(9999)
addi RET, RET, 1
fcmpu cr0, f29, f29
bun cr0, LL(9999)
addi RET, RET, 1
fcmpu cr0, f30, f30
bun cr0, LL(9999)
addi RET, RET, 1
fcmpu cr0, f31, f31
subi RET, RET, 8
fabs f8, f24
fabs f9, f25
fabs f10, f26
@ -765,8 +870,11 @@ LL(1150):
LL(1160):
LFDUX f8, XX, INCX
fabs f8, f8
addi RET, RET, 1
fcmpu cr0, f8, f8
bun LL(9999)
fabs f8, f8
// addi RET, RET, 1
fcmpu cr0, f1, f8
beq cr0, LL(9999)
bdnz LL(1160)

View File

@ -327,6 +327,7 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) {
BLASLONG max = 0;
if (n <= 0 || inc_x <= 0) return (max);
if (n == 1) return(1);
if (inc_x == 1) {
@ -335,7 +336,7 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) {
BLASLONG n1 = n & -32;
if (n1 > 0) {
for (int ii=i;ii<i+32;ii++) if (x[ii]!=x[ii]) return(ii+1);
max = diamax_kernel_32(n1, x, &maxf);
i = n1;
@ -343,6 +344,7 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) {
#endif
#endif
while (i < n) {
if (x[i] != x[i]) return(i+1);
if (ABS(x[i]) > maxf) {
max = i;
maxf = ABS(x[i]);
@ -356,6 +358,10 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) {
BLASLONG n1 = n & -4;
while (j < n1) {
if (x[i] != x[i]) return(i+1);
if (x[i+inc_x] != x[i+inc_x]) return(j+1);
if (x[i+2*inc_x] != x[i+2*inc_x]) return(j+2);
if (x[i+3*inc_x] != x[i+3*inc_x]) return(j+3);
if (ABS(x[i]) > maxf) {
max = j;
maxf = ABS(x[i]);
@ -381,6 +387,7 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) {
while (j < n) {
if (x[i] != x[i]) return(j+1);
if (ABS(x[i]) > maxf) {
max = j;
maxf = ABS(x[i]);

View File

@ -58,6 +58,78 @@ static BLASLONG siamax_kernel_64(BLASLONG n, FLOAT *x, FLOAT *maxf) {
register __vector float quadruple_values={0,0,0,0};
register __vector float * v_ptrx=(__vector float *)x;
for(; i<n; i+=64){
if (vec_any_nan(v_ptrx[0])) {
float d=vec_extract(v_ptrx[0],0);
if (d!=d) return(i+0);
d=vec_extract(v_ptrx[0],1);
if (d!=d) return(i+1);
d=vec_extract(v_ptrx[0],2);
if (d!=d) return(i+2);
return(i+3);
}
if (vec_any_nan(v_ptrx[1])) {
float d=vec_extract(v_ptrx[1],0);
if (d!=d) return(i+4+0);
d=vec_extract(v_ptrx[1],1);
if (d!=d) return(i+4+1);
d=vec_extract(v_ptrx[1],2);
if (d!=d) return(i+4+2);
return(i+4+3);
}
if (vec_any_nan(v_ptrx[2])) {
float d=vec_extract(v_ptrx[2],0);
if (d!=d) return(i+8+0);
d=vec_extract(v_ptrx[2],1);
if (d!=d) return(i+8+1);
d=vec_extract(v_ptrx[2],2);
if (d!=d) return(i+8+2);
return(i+8+3);
}
if (vec_any_nan(v_ptrx[3])) {
float d=vec_extract(v_ptrx[3],0);
if (d!=d) return(i+12+0);
d=vec_extract(v_ptrx[3],1);
if (d!=d) return(i+12+1);
d=vec_extract(v_ptrx[3],2);
if (d!=d) return(i+12+2);
return(i+12+3);
}
if (vec_any_nan(v_ptrx[4])) {
float d=vec_extract(v_ptrx[4],0);
if (d!=d) return(i+16+0);
d=vec_extract(v_ptrx[4],1);
if (d!=d) return(i+16+1);
d=vec_extract(v_ptrx[4],2);
if (d!=d) return(i+16+2);
return(i+16+3);
}
if (vec_any_nan(v_ptrx[5])) {
float d=vec_extract(v_ptrx[5],0);
if (d!=d) return(i+20+0);
d=vec_extract(v_ptrx[5],1);
if (d!=d) return(i+20+1);
d=vec_extract(v_ptrx[5],2);
if (d!=d) return(i+20+2);
return(i+20+3);
}
if (vec_any_nan(v_ptrx[6])) {
float d=vec_extract(v_ptrx[6],0);
if (d!=d) return(i+24+0);
d=vec_extract(v_ptrx[6],1);
if (d!=d) return(i+24+1);
d=vec_extract(v_ptrx[6],2);
if (d!=d) return(i+24+2);
return(i+24+3);
}
if (vec_any_nan(v_ptrx[7])) {
float d=vec_extract(v_ptrx[7],0);
if (d!=d) return(i+28+0);
d=vec_extract(v_ptrx[7],1);
if (d!=d) return(i+28+1);
d=vec_extract(v_ptrx[7],2);
if (d!=d) return(i+28+2);
return(i+28+3);
}
//absolute temporary vectors
register __vector float v0=vec_abs(v_ptrx[0]);
register __vector float v1=vec_abs(v_ptrx[1]);
@ -226,7 +298,7 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) {
BLASLONG max = 0;
if (n <= 0 || inc_x <= 0) return (max);
if (x[0] != x[0]) return(1);
if (inc_x == 1) {
BLASLONG n1 = n & -64;
@ -238,6 +310,7 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) {
}
while (i < n) {
if (x[i] != x[i]) return(i+1);
if (ABS(x[i]) > maxf) {
max = i;
maxf = ABS(x[i]);
@ -251,18 +324,22 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) {
BLASLONG n1 = n & -4;
while (j < n1) {
if (x[i] != x[i]) return(j+1);
if (ABS(x[i]) > maxf) {
max = j;
maxf = ABS(x[i]);
}
if (x[i+inc_x] != x[i+inc_x]) return(j+1);
if (ABS(x[i + inc_x]) > maxf) {
max = j + 1;
maxf = ABS(x[i + inc_x]);
}
if (x[i+2*inc_x] != x[i+2*inc_x]) return(j+2);
if (ABS(x[i + 2 * inc_x]) > maxf) {
max = j + 2;
maxf = ABS(x[i + 2 * inc_x]);
}
if (x[i+3*inc_x] != x[i+3*inc_x]) return(j+3);
if (ABS(x[i + 3 * inc_x]) > maxf) {
max = j + 3;
maxf = ABS(x[i + 3 * inc_x]);
@ -276,6 +353,7 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) {
while (j < n) {
if (x[i] != x[i]) return(j+1);
if (ABS(x[i]) > maxf) {
max = j;
maxf = ABS(x[i]);

View File

@ -56,13 +56,15 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
BLASLONG max=0;
if (n <= 0 || inc_x <= 0) return(max);
if (n==1) return(1);
if (x[0]!=x[0]) return(1);
maxf=ABS(x[0]);
ix += inc_x;
i++;
while(i < n)
{
if (x[ix]!=x[ix]) return(i+1);
if( ABS(x[ix]) > maxf )
{
max = i;

View File

@ -56,13 +56,15 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
BLASLONG min=0;
if (n <= 0 || inc_x <= 0) return(min);
minf=ABS(x[0]);
if (n==1) return(1);
if (x[0]!=x[0]) return(1);
minf=ABS(x[0]);
ix += inc_x;
i++;
while(i < n)
{
if (x[ix]!=x[ix]) return(i+1);
if( ABS(x[ix]) < ABS(minf) )
{
min = i;

View File

@ -100,6 +100,8 @@
movl $1, RET
FLD (X)
fcomi %st, %st
jp .L999
#ifdef USE_ABS
fabs
#endif
@ -121,6 +123,8 @@
#endif
FLD 0 * SIZE(X)
fucomi
jp .L998
#ifdef USE_ABS
fabs
#endif
@ -131,6 +135,8 @@
incl NUM
FLD 1 * SIZE(X)
fucomi
jp .L998
#ifdef USE_ABS
fabs
#endif
@ -141,6 +147,8 @@
incl NUM
FLD 2 * SIZE(X)
fucomi
jp .L998
#ifdef USE_ABS
fabs
#endif
@ -151,6 +159,8 @@
incl NUM
FLD 3 * SIZE(X)
fucomi
jp .L998
#ifdef USE_ABS
fabs
#endif
@ -161,6 +171,8 @@
incl NUM
FLD 4 * SIZE(X)
fucomi
jp .L998
#ifdef USE_ABS
fabs
#endif
@ -171,6 +183,8 @@
incl NUM
FLD 5 * SIZE(X)
fucomi
jp .L998
#ifdef USE_ABS
fabs
#endif
@ -181,6 +195,8 @@
incl NUM
FLD 6 * SIZE(X)
fucomi
jp .L998
#ifdef USE_ABS
fabs
#endif
@ -191,6 +207,8 @@
incl NUM
FLD 7 * SIZE(X)
fucomi
jp .L998
#ifdef USE_ABS
fabs
#endif
@ -215,6 +233,8 @@
.L21:
FLD 0 * SIZE(X)
fucomi
jp .L998
#ifdef USE_ABS
fabs
#endif
@ -238,6 +258,8 @@
.L50:
FLD 0 * SIZE(X)
fucomi
jp .L998
addl INCX, X
#ifdef USE_ABS
fabs
@ -249,6 +271,8 @@
incl NUM
FLD 0 * SIZE(X)
fucomi
jp .L998
addl INCX, X
#ifdef USE_ABS
fabs
@ -260,6 +284,8 @@
incl NUM
FLD 0 * SIZE(X)
fucomi
jp .L998
addl INCX, X
#ifdef USE_ABS
fabs
@ -271,6 +297,8 @@
incl NUM
FLD 0 * SIZE(X)
fucomi
jp .L998
addl INCX, X
#ifdef USE_ABS
fabs
@ -282,6 +310,8 @@
incl NUM
FLD 0 * SIZE(X)
fucomi
jp .L998
addl INCX, X
#ifdef USE_ABS
fabs
@ -293,6 +323,8 @@
incl NUM
FLD 0 * SIZE(X)
fucomi
jp .L998
addl INCX, X
#ifdef USE_ABS
fabs
@ -304,6 +336,8 @@
incl NUM
FLD 0 * SIZE(X)
fucomi
jp .L998
addl INCX, X
#ifdef USE_ABS
fabs
@ -315,6 +349,8 @@
incl NUM
FLD 0 * SIZE(X)
fucomi
jp .L998
addl INCX, X
#ifdef USE_ABS
fabs
@ -338,6 +374,8 @@
.L61:
FLD 0 * SIZE(X)
fucomi
jp .L998
#ifdef USE_ABS
fabs
#endif
@ -361,4 +399,7 @@
popl %ebp
ret
.L998: mov NUM, RET
jmp .L999
EPILOGUE

View File

@ -93,6 +93,10 @@
addq INCX, X
decq M
shufps $0, %xmm0, %xmm0
incq RET
comiss %xmm0, %xmm0
jp .L999
decq RET
#ifdef USE_ABS
andps %xmm15, %xmm0
#endif
@ -254,6 +258,10 @@
decq M
addq $SIZE, X
incq RET
comiss %xmm1, %xmm1
jp .L998
decq RET
#ifdef USE_ABS
andps %xmm15, %xmm1
#endif
@ -268,6 +276,14 @@
movss 0 * SIZE(X), %xmm1
movss 1 * SIZE(X), %xmm2
incq RET
comiss %xmm1, %xmm1
jp .L998
incq RET
comiss %xmm2, %xmm2
jp .L998
decq RET
decq RET
subq $2, M
addq $2 * SIZE, X
@ -332,6 +348,31 @@
movss 5 * SIZE(X), %xmm6
movss 6 * SIZE(X), %xmm7
movss 7 * SIZE(X), %xmm8
incq RET
comiss %xmm1, %xmm1
jp .L998
incq RET
comiss %xmm2, %xmm2
jp .L998
incq RET
comiss %xmm3, %xmm3
jp .L998
incq RET
comiss %xmm4, %xmm4
jp .L998
incq RET
comiss %xmm5, %xmm5
jp .L998
incq RET
comiss %xmm6, %xmm6
jp .L998
incq RET
comiss %xmm7, %xmm7
jp .L998
incq RET
comiss %xmm8, %xmm8
jp .L998
subq $8, RET
#ifdef USE_ABS
andps %xmm15, %xmm1
andps %xmm15, %xmm2
@ -378,6 +419,19 @@
movss 1 * SIZE(X), %xmm2
movss 2 * SIZE(X), %xmm3
movss 3 * SIZE(X), %xmm4
incq RET
comiss %xmm1, %xmm1
jp .L998
incq RET
comiss %xmm2, %xmm2
jp .L998
incq RET
comiss %xmm3, %xmm3
jp .L998
incq RET
comiss %xmm4, %xmm4
jp .L998
subq $4, RET
#ifdef USE_ABS
andps %xmm15, %xmm1
andps %xmm15, %xmm2
@ -405,6 +459,13 @@
movss 0 * SIZE(X), %xmm1
movss 1 * SIZE(X), %xmm2
incq RET
comiss %xmm1, %xmm1
jp .L998
incq RET
comiss %xmm2, %xmm2
jp .L998
subq $2, RET
#ifdef USE_ABS
andps %xmm15, %xmm1
andps %xmm15, %xmm2
@ -593,6 +654,31 @@
movss 5 * SIZE(X), %xmm6
movss 6 * SIZE(X), %xmm7
movss 7 * SIZE(X), %xmm8
incq RET
comiss %xmm1, %xmm1
jp .L998
incq RET
comiss %xmm2, %xmm2
jp .L998
incq RET
comiss %xmm3, %xmm3
jp .L998
incq RET
comiss %xmm4, %xmm4
jp .L998
incq RET
comiss %xmm5, %xmm5
jp .L998
incq RET
comiss %xmm6, %xmm6
jp .L998
incq RET
comiss %xmm7, %xmm7
jp .L998
incq RET
comiss %xmm8, %xmm8
jp .L998
subq $8, RET
#ifdef USE_ABS
andps %xmm15, %xmm1
andps %xmm15, %xmm2
@ -639,6 +725,19 @@
movss 1 * SIZE(X), %xmm2
movss 2 * SIZE(X), %xmm3
movss 3 * SIZE(X), %xmm4
incq RET
comiss %xmm1, %xmm1
jp .L998
incq RET
comiss %xmm2, %xmm2
jp .L998
incq RET
comiss %xmm3, %xmm3
jp .L998
incq RET
comiss %xmm4, %xmm4
jp .L998
subq $4, RET
#ifdef USE_ABS
andps %xmm15, %xmm1
andps %xmm15, %xmm2
@ -666,6 +765,13 @@
movss 0 * SIZE(X), %xmm1
movss 1 * SIZE(X), %xmm2
incq RET
comiss %xmm1, %xmm1
jp .L998
incq RET
comiss %xmm2, %xmm2
jp .L998
subq $2, RET
#ifdef USE_ABS
andps %xmm15, %xmm1
andps %xmm15, %xmm2
@ -885,6 +991,31 @@
movss 0 * SIZE(X), %xmm2
subq INCX, X
movss 0 * SIZE(X), %xmm1
incq RET
comiss %xmm1, %xmm1
jp .L998
incq RET
comiss %xmm2, %xmm2
jp .L998
incq RET
comiss %xmm3, %xmm3
jp .L998
incq RET
comiss %xmm4, %xmm4
jp .L998
incq RET
comiss %xmm5, %xmm5
jp .L998
incq RET
comiss %xmm6, %xmm6
jp .L998
incq RET
comiss %xmm7, %xmm7
jp .L998
incq RET
comiss %xmm8, %xmm8
jp .L998
subq $8, RET
#ifdef USE_ABS
andps %xmm15, %xmm1
andps %xmm15, %xmm2
@ -932,7 +1063,19 @@
addq INCX, X
movss 0 * SIZE(X), %xmm4
addq INCX, X
incq RET
comiss %xmm1, %xmm1
jp .L998
incq RET
comiss %xmm2, %xmm2
jp .L998
incq RET
comiss %xmm3, %xmm3
jp .L998
incq RET
comiss %xmm4, %xmm4
jp .L998
subq $4, RET
#ifdef USE_ABS
andps %xmm15, %xmm1
andps %xmm15, %xmm2
@ -961,6 +1104,13 @@
addq INCX, X
movss 0 * SIZE(X), %xmm2
addq INCX, X
incq RET
comiss %xmm1, %xmm1
jp .L998
incq RET
comiss %xmm2, %xmm2
jp .L998
subq $2, RET
#ifdef USE_ABS
andps %xmm15, %xmm1
andps %xmm15, %xmm2
@ -982,5 +1132,9 @@
ret
.L998:
// incq RET
jmp .L999
EPILOGUE

View File

@ -79,6 +79,8 @@
movsd (X), %xmm0
addq INCX, X
decq M
comisd %xmm0, %xmm0
jp .L987
#ifdef USE_ABS
andpd %xmm15, %xmm0
#endif
@ -269,6 +271,11 @@
je .L21
movsd 0 * SIZE(X), %xmm1
incq RET
comisd %xmm1, %xmm1
jp .L987
decq RET
#ifdef USE_ABS
andpd %xmm15, %xmm1
#endif
@ -342,6 +349,32 @@
movsd 5 * SIZE(X), %xmm6
movsd 6 * SIZE(X), %xmm7
movsd 7 * SIZE(X), %xmm8
incq RET
comisd %xmm1, %xmm1
jp .L987
incq RET
comisd %xmm2, %xmm2
jp .L987
incq RET
comisd %xmm3, %xmm3
jp .L987
incq RET
comisd %xmm4, %xmm4
jp .L987
incq RET
comisd %xmm5, %xmm5
jp .L987
incq RET
comisd %xmm6, %xmm6
jp .L987
incq RET
comisd %xmm7, %xmm7
jp .L987
incq RET
comisd %xmm8, %xmm8
jp .L987
subq $8, RET
#ifdef USE_ABS
andpd %xmm15, %xmm1
andpd %xmm15, %xmm2
@ -374,9 +407,9 @@
comisd %xmm0, %xmm6
je .L999
incq RET
comisd %xmm0, %xmm7
je .L999
incq RET
comisd %xmm0, %xmm7
je .L999
incq RET
jmp .L999
ALIGN_3
@ -388,6 +421,21 @@
movsd 1 * SIZE(X), %xmm2
movsd 2 * SIZE(X), %xmm3
movsd 3 * SIZE(X), %xmm4
incq RET
comisd %xmm1, %xmm1
jp .L987
incq RET
comisd %xmm2, %xmm2
jp .L987
incq RET
comisd %xmm3, %xmm3
jp .L987
incq RET
comisd %xmm4, %xmm4
jp .L987
subq $4, RET
#ifdef USE_ABS
andpd %xmm15, %xmm1
andpd %xmm15, %xmm2
@ -415,6 +463,14 @@
movsd 0 * SIZE(X), %xmm1
movsd 1 * SIZE(X), %xmm2
incq RET
comisd %xmm1, %xmm1
jp .L987
incq RET
comisd %xmm2, %xmm2
jp .L987
subq $2, RET
#ifdef USE_ABS
andpd %xmm15, %xmm1
andpd %xmm15, %xmm2
@ -670,6 +726,32 @@
movsd 5 * SIZE(X), %xmm6
movsd 6 * SIZE(X), %xmm7
movsd 7 * SIZE(X), %xmm8
incq RET
comisd %xmm1, %xmm1
jp .L987
incq RET
comisd %xmm2, %xmm2
jp .L987
incq RET
comisd %xmm3, %xmm3
jp .L987
incq RET
comisd %xmm4, %xmm4
jp .L987
incq RET
comisd %xmm5, %xmm5
jp .L987
incq RET
comisd %xmm6, %xmm6
jp .L987
incq RET
comisd %xmm7, %xmm7
jp .L987
incq RET
comisd %xmm8, %xmm8
jp .L987
subq $8, RET
#ifdef USE_ABS
andpd %xmm15, %xmm1
andpd %xmm15, %xmm2
@ -716,6 +798,20 @@
movsd 1 * SIZE(X), %xmm2
movsd 2 * SIZE(X), %xmm3
movsd 3 * SIZE(X), %xmm4
incq RET
comisd %xmm1, %xmm1
jp .L987
incq RET
comisd %xmm2, %xmm2
jp .L987
incq RET
comisd %xmm3, %xmm3
jp .L987
incq RET
comisd %xmm4, %xmm4
jp .L987
subq $4, RET
#ifdef USE_ABS
andpd %xmm15, %xmm1
andpd %xmm15, %xmm2
@ -743,11 +839,21 @@
movsd 0 * SIZE(X), %xmm1
movsd 1 * SIZE(X), %xmm2
incq RET
comisd %xmm1, %xmm1
jp .L987
incq RET
comisd %xmm2, %xmm2
jp .L987
subq $2, RET
#ifdef USE_ABS
andpd %xmm15, %xmm1
andpd %xmm15, %xmm2
#endif
addq $2 * SIZE, X
comisd %xmm0, %xmm0
jp .L987
incq RET
comisd %xmm0, %xmm1
je .L999
@ -962,6 +1068,7 @@
ALIGN_4
.L92:
movsd 0 * SIZE(X), %xmm1
addq INCX, X
movhpd 0 * SIZE(X), %xmm1
@ -1033,6 +1140,32 @@
movsd 0 * SIZE(X), %xmm2
subq INCX, X
movsd 0 * SIZE(X), %xmm1
incq RET
comisd %xmm1, %xmm1
jp .L987
incq RET
comisd %xmm2, %xmm2
jp .L987
incq RET
comisd %xmm3, %xmm3
jp .L987
incq RET
comisd %xmm4, %xmm4
jp .L987
incq RET
comisd %xmm5, %xmm5
jp .L987
incq RET
comisd %xmm6, %xmm6
jp .L987
incq RET
comisd %xmm7, %xmm7
jp .L987
incq RET
comisd %xmm8, %xmm8
jp .L987
subq $8, RET
#ifdef USE_ABS
andpd %xmm15, %xmm1
andpd %xmm15, %xmm2
@ -1083,6 +1216,20 @@
addq INCX, X
movsd 0 * SIZE(X), %xmm4
addq INCX, X
incq RET
comisd %xmm1, %xmm1
jp .L987
incq RET
comisd %xmm2, %xmm2
jp .L987
incq RET
comisd %xmm3, %xmm3
jp .L987
incq RET
comisd %xmm4, %xmm4
jp .L987
subq $4, RET
#ifdef USE_ABS
andpd %xmm15, %xmm1
andpd %xmm15, %xmm2
@ -1111,6 +1258,14 @@
addq INCX, X
movsd 0 * SIZE(X), %xmm2
addq INCX, X
incq RET
comisd %xmm1, %xmm1
jp .L987
incq RET
comisd %xmm2, %xmm2
jp .L987
decq RET
decq RET
#ifdef USE_ABS
andpd %xmm15, %xmm1
andpd %xmm15, %xmm2
@ -1122,7 +1277,6 @@
comisd %xmm0, %xmm2
je .L999
ALIGN_3
.L98:
incq RET
ALIGN_3
@ -1132,5 +1286,8 @@
ret
EPILOGUE
.L987:
incq RET //count from xmm0
jmp .L999
EPILOGUE

View File

@ -58,3 +58,29 @@ CTEST(amax, damax){
}
#endif
#ifdef BUILD_SINGLE
CTEST(amax, isamax_nan){
blasint N=3, inc=1;
int te_max=0, tr_max=2;
float x[]={1., 0./0., 0./0. };
//float x[]={ 0./0., 2., 3. };
te_max=BLASFUNC(isamax)(&N, x, &inc);
ASSERT_EQUAL(tr_max, te_max);
}
#endif
#ifdef BUILD_DOUBLE
CTEST(amax, idamax_nan){
blasint N=4, inc=1;
int te_max=0, tr_max=1;
//float x[]={1., 0./0., 0./0. };
double x[]={ 0./0., 1.,2. ,3.};
te_max=BLASFUNC(idamax)(&N, x, &inc);
ASSERT_EQUAL(tr_max, te_max);
}
#endif