[ZARCH] Update max/min functions

This commit is contained in:
maamountki 2019-01-21 15:56:04 +02:00 committed by GitHub
parent b815a04c87
commit b111829226
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
24 changed files with 769 additions and 1081 deletions

View File

@ -55,7 +55,7 @@ static FLOAT camax_kernel_32(BLASLONG n, FLOAT *x)
"srlg %%r0,%1,5 \n\t"
"xgr %%r1,%%r1 \n\t"
"0: \n\t"
"pfd 1, 1024(%2) \n\t"
"pfd 1, 1024(%%r1,%2) \n\t"
"vlef %%v16,0(%%r1,%2),0 \n\t"
"vlef %%v17,4(%%r1,%2),0 \n\t"
@ -93,100 +93,88 @@ static FLOAT camax_kernel_32(BLASLONG n, FLOAT *x)
"vlef %%v22,120(%%r1,%2),3 \n\t"
"vlef %%v23,124(%%r1,%2),3 \n\t"
"vflpsb %%v16, %%v16 \n\t"
"vflpsb %%v17, %%v17 \n\t"
"vflpsb %%v18, %%v18 \n\t"
"vflpsb %%v19, %%v19 \n\t"
"vflpsb %%v20, %%v20 \n\t"
"vflpsb %%v21, %%v21 \n\t"
"vflpsb %%v22, %%v22 \n\t"
"vflpsb %%v23, %%v23 \n\t"
"vlef %%v24,128(%%r1,%2),0 \n\t"
"vlef %%v25,132(%%r1,%2),0 \n\t"
"vlef %%v24,136(%%r1,%2),1 \n\t"
"vlef %%v25,140(%%r1,%2),1 \n\t"
"vlef %%v24,144(%%r1,%2),2 \n\t"
"vlef %%v25,148(%%r1,%2),2 \n\t"
"vlef %%v24,152(%%r1,%2),3 \n\t"
"vlef %%v25,156(%%r1,%2),3 \n\t"
"vlef %%v26,160(%%r1,%2),0 \n\t"
"vlef %%v27,164(%%r1,%2),0 \n\t"
"vlef %%v26,168(%%r1,%2),1 \n\t"
"vlef %%v27,172(%%r1,%2),1 \n\t"
"vlef %%v26,176(%%r1,%2),2 \n\t"
"vlef %%v27,180(%%r1,%2),2 \n\t"
"vlef %%v26,184(%%r1,%2),3 \n\t"
"vlef %%v27,188(%%r1,%2),3 \n\t"
"vlef %%v28,192(%%r1,%2),0 \n\t"
"vlef %%v29,196(%%r1,%2),0 \n\t"
"vlef %%v28,200(%%r1,%2),1 \n\t"
"vlef %%v29,204(%%r1,%2),1 \n\t"
"vlef %%v28,208(%%r1,%2),2 \n\t"
"vlef %%v29,212(%%r1,%2),2 \n\t"
"vlef %%v28,216(%%r1,%2),3 \n\t"
"vlef %%v29,220(%%r1,%2),3 \n\t"
"vlef %%v30,224(%%r1,%2),0 \n\t"
"vlef %%v31,228(%%r1,%2),0 \n\t"
"vlef %%v30,232(%%r1,%2),1 \n\t"
"vlef %%v31,236(%%r1,%2),1 \n\t"
"vlef %%v30,240(%%r1,%2),2 \n\t"
"vlef %%v31,244(%%r1,%2),2 \n\t"
"vlef %%v30,248(%%r1,%2),3 \n\t"
"vlef %%v31,252(%%r1,%2),3 \n\t"
"vflpsb %%v16,%%v16 \n\t"
"vflpsb %%v17,%%v17 \n\t"
"vflpsb %%v18,%%v18 \n\t"
"vflpsb %%v19,%%v19 \n\t"
"vflpsb %%v20,%%v20 \n\t"
"vflpsb %%v21,%%v21 \n\t"
"vflpsb %%v22,%%v22 \n\t"
"vflpsb %%v23,%%v23 \n\t"
"vflpsb %%v24,%%v24 \n\t"
"vflpsb %%v25,%%v25 \n\t"
"vflpsb %%v26,%%v26 \n\t"
"vflpsb %%v27,%%v27 \n\t"
"vflpsb %%v28,%%v28 \n\t"
"vflpsb %%v29,%%v29 \n\t"
"vflpsb %%v30,%%v30 \n\t"
"vflpsb %%v31,%%v31 \n\t"
"vfasb %%v16,%%v16,%%v17 \n\t"
"vfasb %%v17,%%v18,%%v19 \n\t"
"vfasb %%v18,%%v20,%%v21 \n\t"
"vfasb %%v19,%%v22,%%v23 \n\t"
"vfasb %%v18,%%v18,%%v19 \n\t"
"vfasb %%v20,%%v20,%%v21 \n\t"
"vfasb %%v22,%%v22,%%v23 \n\t"
"vfasb %%v24,%%v24,%%v25 \n\t"
"vfasb %%v26,%%v26,%%v27 \n\t"
"vfasb %%v28,%%v28,%%v29 \n\t"
"vfasb %%v30,%%v30,%%v31 \n\t"
"vfchsb %%v24,%%v16,%%v17 \n\t"
"vfchsb %%v25,%%v18,%%v19 \n\t"
"vsel %%v24,%%v16,%%v17,%%v24 \n\t"
"vsel %%v25,%%v18,%%v19,%%v25 \n\t"
"vfmaxsb %%v16,%%v16,%%v24,0 \n\t"
"vfmaxsb %%v18,%%v18,%%v26,0 \n\t"
"vfmaxsb %%v20,%%v20,%%v28,0 \n\t"
"vfmaxsb %%v22,%%v22,%%v30,0 \n\t"
"vfchsb %%v26,%%v24,%%v25 \n\t"
"vsel %%v26,%%v24,%%v25,%%v26 \n\t"
"vfmaxsb %%v16,%%v16,%%v20,0 \n\t"
"vfmaxsb %%v18,%%v18,%%v22,0 \n\t"
"vfchsb %%v27,%%v26,%%v0 \n\t"
"vsel %%v0,%%v26,%%v0,%%v27 \n\t"
"vfmaxsb %%v16,%%v16,%%v18,0 \n\t"
"vlef %%v16,128(%%r1,%2),0 \n\t"
"vlef %%v17,132(%%r1,%2),0 \n\t"
"vlef %%v16,136(%%r1,%2),1 \n\t"
"vlef %%v17,140(%%r1,%2),1 \n\t"
"vlef %%v16,144(%%r1,%2),2 \n\t"
"vlef %%v17,148(%%r1,%2),2 \n\t"
"vlef %%v16,152(%%r1,%2),3 \n\t"
"vlef %%v17,156(%%r1,%2),3 \n\t"
"vlef %%v18,160(%%r1,%2),0 \n\t"
"vlef %%v19,164(%%r1,%2),0 \n\t"
"vlef %%v18,168(%%r1,%2),1 \n\t"
"vlef %%v19,172(%%r1,%2),1 \n\t"
"vlef %%v18,176(%%r1,%2),2 \n\t"
"vlef %%v19,180(%%r1,%2),2 \n\t"
"vlef %%v18,184(%%r1,%2),3 \n\t"
"vlef %%v19,188(%%r1,%2),3 \n\t"
"vlef %%v20,192(%%r1,%2),0 \n\t"
"vlef %%v21,196(%%r1,%2),0 \n\t"
"vlef %%v20,200(%%r1,%2),1 \n\t"
"vlef %%v21,204(%%r1,%2),1 \n\t"
"vlef %%v20,208(%%r1,%2),2 \n\t"
"vlef %%v21,212(%%r1,%2),2 \n\t"
"vlef %%v20,216(%%r1,%2),3 \n\t"
"vlef %%v21,220(%%r1,%2),3 \n\t"
"vlef %%v22,224(%%r1,%2),0 \n\t"
"vlef %%v23,228(%%r1,%2),0 \n\t"
"vlef %%v22,232(%%r1,%2),1 \n\t"
"vlef %%v23,236(%%r1,%2),1 \n\t"
"vlef %%v22,240(%%r1,%2),2 \n\t"
"vlef %%v23,244(%%r1,%2),2 \n\t"
"vlef %%v22,248(%%r1,%2),3 \n\t"
"vlef %%v23,252(%%r1,%2),3 \n\t"
"vflpsb %%v16, %%v16 \n\t"
"vflpsb %%v17, %%v17 \n\t"
"vflpsb %%v18, %%v18 \n\t"
"vflpsb %%v19, %%v19 \n\t"
"vflpsb %%v20, %%v20 \n\t"
"vflpsb %%v21, %%v21 \n\t"
"vflpsb %%v22, %%v22 \n\t"
"vflpsb %%v23, %%v23 \n\t"
"vfasb %%v16,%%v16,%%v17 \n\t"
"vfasb %%v17,%%v18,%%v19 \n\t"
"vfasb %%v18,%%v20,%%v21 \n\t"
"vfasb %%v19,%%v22,%%v23 \n\t"
"vfchsb %%v24,%%v16,%%v17 \n\t"
"vfchsb %%v25,%%v18,%%v19 \n\t"
"vsel %%v24,%%v16,%%v17,%%v24 \n\t"
"vsel %%v25,%%v18,%%v19,%%v25 \n\t"
"vfchsb %%v26,%%v24,%%v25 \n\t"
"vsel %%v26,%%v24,%%v25,%%v26 \n\t"
"vfchsb %%v27,%%v26,%%v0 \n\t"
"vsel %%v0,%%v26,%%v0,%%v27 \n\t"
"vfmaxsb %%v0,%%v0,%%v16,0 \n\t"
"agfi %%r1, 256 \n\t"
"brctg %%r0, 0b \n\t"
"veslg %%v16,%%v0,32 \n\t"
"vfchsb %%v17,%%v16,%%v0 \n\t"
"vsel %%v0,%%v16,%%v0,%%v17 \n\t"
"vfmaxsb %%v0,%%v0,%%v16,0 \n\t"
"vrepf %%v16,%%v0,2 \n\t"
"wfchsb %%v17,%%v16,%%v0 \n\t"
"vsel %%v0,%%v16,%%v0,%%v17 \n\t"
"wfmaxsb %%v0,%%v0,%%v16,0 \n\t"
"ler %0,%%f0 "
:"=f"(amax)
:"r"(n),"ZR"((const FLOAT (*)[n])x)
@ -233,11 +221,9 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) {
maxf=CABS1(x,0);
inc_x2 = 2 * inc_x;
ix += inc_x2;
i++;
BLASLONG n1 = (n - 1) & -4;
while ((i - 1) < n1) {
BLASLONG n1 = n & -4;
while (i < n1) {
if (CABS1(x,ix) > maxf) {
maxf = CABS1(x,ix);

View File

@ -43,8 +43,8 @@ static FLOAT camin_kernel_32(BLASLONG n, FLOAT *x)
__asm__ volatile (
"vlef %%v0,0(%2),0 \n\t"
"vlef %%v16,4(%2),0 \n\t"
"vlef %%v0,8(%2),0 \n\t"
"vlef %%v16,12(%2),0 \n\t"
"vlef %%v0,8(%2),1 \n\t"
"vlef %%v16,12(%2),1 \n\t"
"vlef %%v0,16(%2),2 \n\t"
"vlef %%v16,20(%2),2 \n\t"
"vlef %%v0,24(%2),3 \n\t"
@ -59,8 +59,8 @@ static FLOAT camin_kernel_32(BLASLONG n, FLOAT *x)
"vlef %%v16,0(%%r1,%2),0 \n\t"
"vlef %%v17,4(%%r1,%2),0 \n\t"
"vlef %%v16,8(%%r1,%2),0 \n\t"
"vlef %%v17,12(%%r1,%2),0 \n\t"
"vlef %%v16,8(%%r1,%2),1 \n\t"
"vlef %%v17,12(%%r1,%2),1 \n\t"
"vlef %%v16,16(%%r1,%2),2 \n\t"
"vlef %%v17,20(%%r1,%2),2 \n\t"
"vlef %%v16,24(%%r1,%2),3 \n\t"
@ -68,8 +68,8 @@ static FLOAT camin_kernel_32(BLASLONG n, FLOAT *x)
"vlef %%v18,32(%%r1,%2),0 \n\t"
"vlef %%v19,36(%%r1,%2),0 \n\t"
"vlef %%v18,40(%%r1,%2),0 \n\t"
"vlef %%v19,44(%%r1,%2),0 \n\t"
"vlef %%v18,40(%%r1,%2),1 \n\t"
"vlef %%v19,44(%%r1,%2),1 \n\t"
"vlef %%v18,48(%%r1,%2),2 \n\t"
"vlef %%v19,52(%%r1,%2),2 \n\t"
"vlef %%v18,56(%%r1,%2),3 \n\t"
@ -77,8 +77,8 @@ static FLOAT camin_kernel_32(BLASLONG n, FLOAT *x)
"vlef %%v20,64(%%r1,%2),0 \n\t"
"vlef %%v21,68(%%r1,%2),0 \n\t"
"vlef %%v20,72(%%r1,%2),0 \n\t"
"vlef %%v21,76(%%r1,%2),0 \n\t"
"vlef %%v20,72(%%r1,%2),1 \n\t"
"vlef %%v21,76(%%r1,%2),1 \n\t"
"vlef %%v20,80(%%r1,%2),2 \n\t"
"vlef %%v21,84(%%r1,%2),2 \n\t"
"vlef %%v20,88(%%r1,%2),3 \n\t"
@ -86,107 +86,95 @@ static FLOAT camin_kernel_32(BLASLONG n, FLOAT *x)
"vlef %%v22,96(%%r1,%2),0 \n\t"
"vlef %%v23,100(%%r1,%2),0 \n\t"
"vlef %%v22,104(%%r1,%2),0 \n\t"
"vlef %%v23,108(%%r1,%2),0 \n\t"
"vlef %%v22,104(%%r1,%2),1 \n\t"
"vlef %%v23,108(%%r1,%2),1 \n\t"
"vlef %%v22,112(%%r1,%2),2 \n\t"
"vlef %%v23,116(%%r1,%2),2 \n\t"
"vlef %%v22,120(%%r1,%2),3 \n\t"
"vlef %%v23,124(%%r1,%2),3 \n\t"
"vflpsb %%v16, %%v16 \n\t"
"vflpsb %%v17, %%v17 \n\t"
"vflpsb %%v18, %%v18 \n\t"
"vflpsb %%v19, %%v19 \n\t"
"vflpsb %%v20, %%v20 \n\t"
"vflpsb %%v21, %%v21 \n\t"
"vflpsb %%v22, %%v22 \n\t"
"vflpsb %%v23, %%v23 \n\t"
"vlef %%v24,128(%%r1,%2),0 \n\t"
"vlef %%v25,132(%%r1,%2),0 \n\t"
"vlef %%v24,136(%%r1,%2),1 \n\t"
"vlef %%v25,140(%%r1,%2),1 \n\t"
"vlef %%v24,144(%%r1,%2),2 \n\t"
"vlef %%v25,148(%%r1,%2),2 \n\t"
"vlef %%v24,152(%%r1,%2),3 \n\t"
"vlef %%v25,156(%%r1,%2),3 \n\t"
"vlef %%v26,160(%%r1,%2),0 \n\t"
"vlef %%v27,164(%%r1,%2),0 \n\t"
"vlef %%v26,168(%%r1,%2),1 \n\t"
"vlef %%v27,172(%%r1,%2),1 \n\t"
"vlef %%v26,176(%%r1,%2),2 \n\t"
"vlef %%v27,180(%%r1,%2),2 \n\t"
"vlef %%v26,184(%%r1,%2),3 \n\t"
"vlef %%v27,188(%%r1,%2),3 \n\t"
"vlef %%v28,192(%%r1,%2),0 \n\t"
"vlef %%v29,196(%%r1,%2),0 \n\t"
"vlef %%v28,200(%%r1,%2),1 \n\t"
"vlef %%v29,204(%%r1,%2),1 \n\t"
"vlef %%v28,208(%%r1,%2),2 \n\t"
"vlef %%v29,212(%%r1,%2),2 \n\t"
"vlef %%v28,216(%%r1,%2),3 \n\t"
"vlef %%v29,220(%%r1,%2),3 \n\t"
"vlef %%v30,224(%%r1,%2),0 \n\t"
"vlef %%v31,228(%%r1,%2),0 \n\t"
"vlef %%v30,232(%%r1,%2),1 \n\t"
"vlef %%v31,236(%%r1,%2),1 \n\t"
"vlef %%v30,240(%%r1,%2),2 \n\t"
"vlef %%v31,244(%%r1,%2),2 \n\t"
"vlef %%v30,248(%%r1,%2),3 \n\t"
"vlef %%v31,252(%%r1,%2),3 \n\t"
"vflpsb %%v16,%%v16 \n\t"
"vflpsb %%v17,%%v17 \n\t"
"vflpsb %%v18,%%v18 \n\t"
"vflpsb %%v19,%%v19 \n\t"
"vflpsb %%v20,%%v20 \n\t"
"vflpsb %%v21,%%v21 \n\t"
"vflpsb %%v22,%%v22 \n\t"
"vflpsb %%v23,%%v23 \n\t"
"vflpsb %%v24,%%v24 \n\t"
"vflpsb %%v25,%%v25 \n\t"
"vflpsb %%v26,%%v26 \n\t"
"vflpsb %%v27,%%v27 \n\t"
"vflpsb %%v28,%%v28 \n\t"
"vflpsb %%v29,%%v29 \n\t"
"vflpsb %%v30,%%v30 \n\t"
"vflpsb %%v31,%%v31 \n\t"
"vfasb %%v16,%%v16,%%v17 \n\t"
"vfasb %%v17,%%v18,%%v19 \n\t"
"vfasb %%v18,%%v20,%%v21 \n\t"
"vfasb %%v19,%%v22,%%v23 \n\t"
"vfasb %%v18,%%v18,%%v19 \n\t"
"vfasb %%v20,%%v20,%%v21 \n\t"
"vfasb %%v22,%%v22,%%v23 \n\t"
"vfasb %%v24,%%v24,%%v25 \n\t"
"vfasb %%v26,%%v26,%%v27 \n\t"
"vfasb %%v28,%%v28,%%v29 \n\t"
"vfasb %%v30,%%v30,%%v31 \n\t"
"vfchsb %%v24,%%v17,%%v16 \n\t"
"vfchsb %%v25,%%v19,%%v18 \n\t"
"vsel %%v24,%%v16,%%v17,%%v24 \n\t"
"vsel %%v25,%%v18,%%v19,%%v25 \n\t"
"vfminsb %%v16,%%v16,%%v24,0 \n\t"
"vfminsb %%v18,%%v18,%%v26,0 \n\t"
"vfminsb %%v20,%%v20,%%v28,0 \n\t"
"vfminsb %%v22,%%v22,%%v30,0 \n\t"
"vfchsb %%v26,%%v25,%%v24 \n\t"
"vsel %%v26,%%v24,%%v25,%%v26 \n\t"
"vfminsb %%v16,%%v16,%%v20,0 \n\t"
"vfminsb %%v18,%%v18,%%v22,0 \n\t"
"vfchsb %%v27,%%v0,%%v26 \n\t"
"vsel %%v0,%%v26,%%v0,%%v27 \n\t"
"vfminsb %%v16,%%v16,%%v18,0 \n\t"
"vlef %%v16,128(%%r1,%2),0 \n\t"
"vlef %%v17,132(%%r1,%2),0 \n\t"
"vlef %%v16,136(%%r1,%2),0 \n\t"
"vlef %%v17,140(%%r1,%2),0 \n\t"
"vlef %%v16,144(%%r1,%2),2 \n\t"
"vlef %%v17,148(%%r1,%2),2 \n\t"
"vlef %%v16,152(%%r1,%2),3 \n\t"
"vlef %%v17,156(%%r1,%2),3 \n\t"
"vlef %%v18,160(%%r1,%2),0 \n\t"
"vlef %%v19,164(%%r1,%2),0 \n\t"
"vlef %%v18,168(%%r1,%2),0 \n\t"
"vlef %%v19,172(%%r1,%2),0 \n\t"
"vlef %%v18,176(%%r1,%2),2 \n\t"
"vlef %%v19,180(%%r1,%2),2 \n\t"
"vlef %%v18,184(%%r1,%2),3 \n\t"
"vlef %%v19,188(%%r1,%2),3 \n\t"
"vlef %%v20,192(%%r1,%2),0 \n\t"
"vlef %%v21,196(%%r1,%2),0 \n\t"
"vlef %%v20,200(%%r1,%2),0 \n\t"
"vlef %%v21,204(%%r1,%2),0 \n\t"
"vlef %%v20,208(%%r1,%2),2 \n\t"
"vlef %%v21,212(%%r1,%2),2 \n\t"
"vlef %%v20,216(%%r1,%2),3 \n\t"
"vlef %%v21,220(%%r1,%2),3 \n\t"
"vlef %%v22,224(%%r1,%2),0 \n\t"
"vlef %%v23,228(%%r1,%2),0 \n\t"
"vlef %%v22,232(%%r1,%2),0 \n\t"
"vlef %%v23,236(%%r1,%2),0 \n\t"
"vlef %%v22,240(%%r1,%2),2 \n\t"
"vlef %%v23,244(%%r1,%2),2 \n\t"
"vlef %%v22,248(%%r1,%2),3 \n\t"
"vlef %%v23,252(%%r1,%2),3 \n\t"
"vflpsb %%v16, %%v16 \n\t"
"vflpsb %%v17, %%v17 \n\t"
"vflpsb %%v18, %%v18 \n\t"
"vflpsb %%v19, %%v19 \n\t"
"vflpsb %%v20, %%v20 \n\t"
"vflpsb %%v21, %%v21 \n\t"
"vflpsb %%v22, %%v22 \n\t"
"vflpsb %%v23, %%v23 \n\t"
"vfasb %%v16,%%v16,%%v17 \n\t"
"vfasb %%v17,%%v18,%%v19 \n\t"
"vfasb %%v18,%%v20,%%v21 \n\t"
"vfasb %%v19,%%v22,%%v23 \n\t"
"vfchsb %%v24,%%v17,%%v16 \n\t"
"vfchsb %%v25,%%v19,%%v18 \n\t"
"vsel %%v24,%%v16,%%v17,%%v24 \n\t"
"vsel %%v25,%%v18,%%v19,%%v25 \n\t"
"vfchsb %%v26,%%v25,%%v24 \n\t"
"vsel %%v26,%%v24,%%v25,%%v26 \n\t"
"vfchsb %%v27,%%v0,%%v26 \n\t"
"vsel %%v0,%%v26,%%v0,%%v27 \n\t"
"vfminsb %%v0,%%v0,%%v16,0 \n\t"
"agfi %%r1, 256 \n\t"
"brctg %%r0, 0b \n\t"
"veslg %%v16,%%v0,32 \n\t"
"vfchsb %%v17,%%v0,%%v16 \n\t"
"vsel %%v0,%%v16,%%v0,%%v17 \n\t"
"vfminsb %%v0,%%v0,%%v16,0 \n\t"
"vrepf %%v16,%%v0,2 \n\t"
"wfchsb %%v17,%%v0,%%v16 \n\t"
"vsel %%v0,%%v16,%%v0,%%v17 \n\t"
"wfminsb %%v0,%%v0,%%v16,0 \n\t"
"ler %0,%%f0 "
:"=f"(amin)
:"r"(n),"ZR"((const FLOAT (*)[n])x)
@ -233,11 +221,9 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) {
minf=CABS1(x,0);
inc_x2 = 2 * inc_x;
ix += inc_x2;
i++;
BLASLONG n1 = (n - 1) & -4;
while ((i - 1) < n1) {
BLASLONG n1 = n & -4;
while (i < n1) {
if (CABS1(x,ix) < minf) {
minf = CABS1(x,ix);

View File

@ -39,8 +39,7 @@ static FLOAT damax_kernel_32(BLASLONG n, FLOAT *x)
FLOAT amax;
__asm__ volatile (
"vl %%v0,0(%2) \n\t"
"vflpdb %%v0,%%v0 \n\t"
"vl %%v0,0(%2) \n\t"
"srlg %%r0,%1,5 \n\t"
"xgr %%r1,%%r1 \n\t"
"0: \n\t"
@ -54,79 +53,42 @@ static FLOAT damax_kernel_32(BLASLONG n, FLOAT *x)
"vl %%v21,80(%%r1,%2) \n\t"
"vl %%v22,96(%%r1,%2) \n\t"
"vl %%v23,112(%%r1,%2) \n\t"
"vflpdb %%v16, %%v16 \n\t"
"vflpdb %%v17, %%v17 \n\t"
"vflpdb %%v18, %%v18 \n\t"
"vflpdb %%v19, %%v19 \n\t"
"vflpdb %%v20, %%v20 \n\t"
"vflpdb %%v21, %%v21 \n\t"
"vflpdb %%v22, %%v22 \n\t"
"vflpdb %%v23, %%v23 \n\t"
"vl %%v24,128(%%r1,%2) \n\t"
"vl %%v25,144(%%r1,%2) \n\t"
"vl %%v26,160(%%r1,%2) \n\t"
"vl %%v27,176(%%r1,%2) \n\t"
"vl %%v28,192(%%r1,%2) \n\t"
"vl %%v29,208(%%r1,%2) \n\t"
"vl %%v30,224(%%r1,%2) \n\t"
"vl %%v31,240(%%r1,%2) \n\t"
"vfchdb %%v24,%%v16,%%v17 \n\t"
"vfchdb %%v25,%%v18,%%v19 \n\t"
"vfchdb %%v26,%%v20,%%v21 \n\t"
"vfchdb %%v27,%%v22,%%v23 \n\t"
"vsel %%v24,%%v16,%%v17,%%v24 \n\t"
"vsel %%v25,%%v18,%%v19,%%v25 \n\t"
"vsel %%v26,%%v20,%%v21,%%v26 \n\t"
"vsel %%v27,%%v22,%%v23,%%v27 \n\t"
"vfmaxdb %%v16,%%v16,%%v24,8 \n\t"
"vfmaxdb %%v17,%%v17,%%v25,8 \n\t"
"vfmaxdb %%v18,%%v18,%%v26,8 \n\t"
"vfmaxdb %%v19,%%v19,%%v27,8 \n\t"
"vfmaxdb %%v20,%%v20,%%v28,8 \n\t"
"vfmaxdb %%v21,%%v21,%%v29,8 \n\t"
"vfmaxdb %%v22,%%v22,%%v30,8 \n\t"
"vfmaxdb %%v23,%%v23,%%v31,8 \n\t"
"vfchdb %%v28,%%v24,%%v25 \n\t"
"vfchdb %%v29,%%v26,%%v27 \n\t"
"vsel %%v28,%%v24,%%v25,%%v28 \n\t"
"vsel %%v29,%%v26,%%v27,%%v29 \n\t"
"vfmaxdb %%v16,%%v16,%%v20,8 \n\t"
"vfmaxdb %%v17,%%v17,%%v21,8 \n\t"
"vfmaxdb %%v18,%%v18,%%v22,8 \n\t"
"vfmaxdb %%v19,%%v19,%%v23,8 \n\t"
"vfchdb %%v30,%%v28,%%v29 \n\t"
"vsel %%v30,%%v28,%%v29,%%v30 \n\t"
"vfmaxdb %%v16,%%v16,%%v18,8 \n\t"
"vfmaxdb %%v17,%%v17,%%v19,8 \n\t"
"vfchdb %%v31,%%v30,%%v0 \n\t"
"vsel %%v0,%%v30,%%v0,%%v31 \n\t"
"vfmaxdb %%v16,%%v16,%%v17,8 \n\t"
"vl %%v16,128(%%r1,%2) \n\t"
"vl %%v17,144(%%r1,%2) \n\t"
"vl %%v18,160(%%r1,%2) \n\t"
"vl %%v19,176(%%r1,%2) \n\t"
"vl %%v20,192(%%r1,%2) \n\t"
"vl %%v21,208(%%r1,%2) \n\t"
"vl %%v22,224(%%r1,%2) \n\t"
"vl %%v23,240(%%r1,%2) \n\t"
"vflpdb %%v16, %%v16 \n\t"
"vflpdb %%v17, %%v17 \n\t"
"vflpdb %%v18, %%v18 \n\t"
"vflpdb %%v19, %%v19 \n\t"
"vflpdb %%v20, %%v20 \n\t"
"vflpdb %%v21, %%v21 \n\t"
"vflpdb %%v22, %%v22 \n\t"
"vflpdb %%v23, %%v23 \n\t"
"vfchdb %%v24,%%v16,%%v17 \n\t"
"vfchdb %%v25,%%v18,%%v19 \n\t"
"vfchdb %%v26,%%v20,%%v21 \n\t"
"vfchdb %%v27,%%v22,%%v23 \n\t"
"vsel %%v24,%%v16,%%v17,%%v24 \n\t"
"vsel %%v25,%%v18,%%v19,%%v25 \n\t"
"vsel %%v26,%%v20,%%v21,%%v26 \n\t"
"vsel %%v27,%%v22,%%v23,%%v27 \n\t"
"vfchdb %%v28,%%v24,%%v25 \n\t"
"vfchdb %%v29,%%v26,%%v27 \n\t"
"vsel %%v28,%%v24,%%v25,%%v28 \n\t"
"vsel %%v29,%%v26,%%v27,%%v29 \n\t"
"vfchdb %%v30,%%v28,%%v29 \n\t"
"vsel %%v30,%%v28,%%v29,%%v30 \n\t"
"vfchdb %%v31,%%v30,%%v0 \n\t"
"vsel %%v0,%%v30,%%v0,%%v31 \n\t"
"vfmaxdb %%v0,%%v0,%%16,8 \n\t"
"agfi %%r1, 256 \n\t"
"brctg %%r0, 0b \n\t"
"vrepg %%v16,%%v0,1 \n\t"
"wfchdb %%v17,%%v16,%%v0 \n\t"
"vsel %%v0,%%v16,%%v0,%%v17 \n\t"
"ldr %0,%%f0 "
"vrepg %%v16,%%v0,1 \n\t"
"wfmaxdb %%v0,%%v0,%%v16,8 \n\t"
"lpdr %0,%%f0 "
:"=f"(amax)
:"r"(n),"ZR"((const FLOAT (*)[n])x)
:"memory","cc","r0","r1","v0","v16","v17","v18","v19","v20","v21","v22","v23","v24","v25","v26","v27","v28","v29","v30","v31"
@ -168,11 +130,9 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) {
} else {
maxf=ABS(x[0]);
i += inc_x;
j++;
BLASLONG n1 = (n - 1) & -4;
while ((j - 1) < n1) {
BLASLONG n1 = n & -4;
while (j < n1) {
if (ABS(x[i]) > maxf) {
maxf = ABS(x[i]);

View File

@ -39,11 +39,10 @@ static FLOAT damin_kernel_32(BLASLONG n, FLOAT *x)
FLOAT amin;
__asm__ volatile (
"vl %%v0,0(%2) \n\t"
"vflpdb %%v0,%%v0 \n\t"
"vl %%v0,0(%2) \n\t"
"srlg %%r0,%1,5 \n\t"
"xgr %%r1,%%r1 \n\t"
"0: \n\t"
"0: \n\t"
"pfd 1, 1024(%%r1,%2) \n\t"
"vl %%v16,0(%%r1,%2) \n\t"
@ -54,79 +53,42 @@ static FLOAT damin_kernel_32(BLASLONG n, FLOAT *x)
"vl %%v21,80(%%r1,%2) \n\t"
"vl %%v22,96(%%r1,%2) \n\t"
"vl %%v23,112(%%r1,%2) \n\t"
"vflpdb %%v16, %%v16 \n\t"
"vflpdb %%v17, %%v17 \n\t"
"vflpdb %%v18, %%v18 \n\t"
"vflpdb %%v19, %%v19 \n\t"
"vflpdb %%v20, %%v20 \n\t"
"vflpdb %%v21, %%v21 \n\t"
"vflpdb %%v22, %%v22 \n\t"
"vflpdb %%v23, %%v23 \n\t"
"vl %%v24,128(%%r1,%2) \n\t"
"vl %%v25,144(%%r1,%2) \n\t"
"vl %%v26,160(%%r1,%2) \n\t"
"vl %%v27,176(%%r1,%2) \n\t"
"vl %%v28,192(%%r1,%2) \n\t"
"vl %%v29,208(%%r1,%2) \n\t"
"vl %%v30,224(%%r1,%2) \n\t"
"vl %%v31,240(%%r1,%2) \n\t"
"vfchdb %%v24,%%v17,%%v16 \n\t"
"vfchdb %%v25,%%v19,%%v18 \n\t"
"vfchdb %%v26,%%v21,%%v20 \n\t"
"vfchdb %%v27,%%v23,%%v22 \n\t"
"vsel %%v24,%%v16,%%v17,%%v24 \n\t"
"vsel %%v25,%%v18,%%v19,%%v25 \n\t"
"vsel %%v26,%%v20,%%v21,%%v26 \n\t"
"vsel %%v27,%%v22,%%v23,%%v27 \n\t"
"vfmindb %%v16,%%v16,%%v24,8 \n\t"
"vfmindb %%v17,%%v17,%%v25,8 \n\t"
"vfmindb %%v18,%%v18,%%v26,8 \n\t"
"vfmindb %%v19,%%v19,%%v27,8 \n\t"
"vfmindb %%v20,%%v20,%%v28,8 \n\t"
"vfmindb %%v21,%%v21,%%v29,8 \n\t"
"vfmindb %%v22,%%v22,%%v30,8 \n\t"
"vfmindb %%v23,%%v23,%%v31,8 \n\t"
"vfchdb %%v28,%%v25,%%v24 \n\t"
"vfchdb %%v29,%%v27,%%v26 \n\t"
"vsel %%v28,%%v24,%%v25,%%v28 \n\t"
"vsel %%v29,%%v26,%%v27,%%v29 \n\t"
"vfmindb %%v16,%%v16,%%v20,8 \n\t"
"vfmindb %%v17,%%v17,%%v21,8 \n\t"
"vfmindb %%v18,%%v18,%%v22,8 \n\t"
"vfmindb %%v19,%%v19,%%v23,8 \n\t"
"vfchdb %%v30,%%v29,%%v28 \n\t"
"vsel %%v30,%%v28,%%v29,%%v30 \n\t"
"vfmindb %%v16,%%v16,%%v18,8 \n\t"
"vfmindb %%v17,%%v17,%%v19,8 \n\t"
"vfchdb %%v31,%%v0,%%v30 \n\t"
"vsel %%v0,%%v30,%%v0,%%v31 \n\t"
"vfmindb %%v16,%%v16,%%v17,8 \n\t"
"vl %%v16,128(%%r1,%2) \n\t"
"vl %%v17,144(%%r1,%2) \n\t"
"vl %%v18,160(%%r1,%2) \n\t"
"vl %%v19,176(%%r1,%2) \n\t"
"vl %%v20,192(%%r1,%2) \n\t"
"vl %%v21,208(%%r1,%2) \n\t"
"vl %%v22,224(%%r1,%2) \n\t"
"vl %%v23,240(%%r1,%2) \n\t"
"vflpdb %%v16, %%v16 \n\t"
"vflpdb %%v17, %%v17 \n\t"
"vflpdb %%v18, %%v18 \n\t"
"vflpdb %%v19, %%v19 \n\t"
"vflpdb %%v20, %%v20 \n\t"
"vflpdb %%v21, %%v21 \n\t"
"vflpdb %%v22, %%v22 \n\t"
"vflpdb %%v23, %%v23 \n\t"
"vfchdb %%v24,%%v17,%%v16 \n\t"
"vfchdb %%v25,%%v19,%%v18 \n\t"
"vfchdb %%v26,%%v21,%%v20 \n\t"
"vfchdb %%v27,%%v23,%%v22 \n\t"
"vsel %%v24,%%v16,%%v17,%%v24 \n\t"
"vsel %%v25,%%v18,%%v19,%%v25 \n\t"
"vsel %%v26,%%v20,%%v21,%%v26 \n\t"
"vsel %%v27,%%v22,%%v23,%%v27 \n\t"
"vfchdb %%v28,%%v25,%%v24 \n\t"
"vfchdb %%v29,%%v27,%%v26 \n\t"
"vsel %%v28,%%v24,%%v25,%%v28 \n\t"
"vsel %%v29,%%v26,%%v27,%%v29 \n\t"
"vfchdb %%v30,%%v29,%%v28 \n\t"
"vsel %%v30,%%v28,%%v29,%%v30 \n\t"
"vfchdb %%v31,%%v0,%%v30 \n\t"
"vsel %%v0,%%v30,%%v0,%%v31 \n\t"
"vfmindb %%v0,%%v0,%%16,8 \n\t"
"agfi %%r1, 256 \n\t"
"brctg %%r0, 0b \n\t"
"vrepg %%v16,%%v0,1 \n\t"
"wfchdb %%v17,%%v0,%%v16 \n\t"
"vsel %%v0,%%v16,%%v0,%%v17 \n\t"
"ldr %0,%%f0 "
"vrepg %%v16,%%v0,1 \n\t"
"wfmindb %%v0,%%v0,%%v16,8 \n\t"
"lpdr %0,%%f0 "
:"=f"(amin)
:"r"(n),"ZR"((const FLOAT (*)[n])x)
:"memory","cc","r0","r1","v0","v16","v17","v18","v19","v20","v21","v22","v23","v24","v25","v26","v27","v28","v29","v30","v31"
@ -168,11 +130,9 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) {
} else {
minf=ABS(x[0]);
i += inc_x;
j++;
BLASLONG n1 = (n - 1) & -4;
while ((j - 1) < n1) {
BLASLONG n1 = n & -4;
while (j < n1) {
if (ABS(x[i]) < minf) {
minf = ABS(x[i]);

View File

@ -32,7 +32,7 @@ static FLOAT dmax_kernel_32(BLASLONG n, FLOAT *x)
FLOAT max;
__asm__ volatile (
"vl %%v0,0(%2) \n\t"
"vl %%v0,0(%2) \n\t"
"srlg %%r0,%1,5 \n\t"
"xgr %%r1,%%r1 \n\t"
"0: \n\t"
@ -46,62 +46,41 @@ static FLOAT dmax_kernel_32(BLASLONG n, FLOAT *x)
"vl %%v21,80(%%r1,%2) \n\t"
"vl %%v22,96(%%r1,%2) \n\t"
"vl %%v23,112(%%r1,%2) \n\t"
"vl %%v24,128(%%r1,%2) \n\t"
"vl %%v25,144(%%r1,%2) \n\t"
"vl %%v26,160(%%r1,%2) \n\t"
"vl %%v27,176(%%r1,%2) \n\t"
"vl %%v28,192(%%r1,%2) \n\t"
"vl %%v29,208(%%r1,%2) \n\t"
"vl %%v30,224(%%r1,%2) \n\t"
"vl %%v31,240(%%r1,%2) \n\t"
"vfchdb %%v24,%%v16,%%v17 \n\t"
"vfchdb %%v25,%%v18,%%v19 \n\t"
"vfchdb %%v26,%%v20,%%v21 \n\t"
"vfchdb %%v27,%%v22,%%v23 \n\t"
"vsel %%v24,%%v16,%%v17,%%v24 \n\t"
"vsel %%v25,%%v18,%%v19,%%v25 \n\t"
"vsel %%v26,%%v20,%%v21,%%v26 \n\t"
"vsel %%v27,%%v22,%%v23,%%v27 \n\t"
"vfmaxdb %%v16,%%v16,%%v24,0 \n\t"
"vfmaxdb %%v17,%%v17,%%v25,0 \n\t"
"vfmaxdb %%v18,%%v18,%%v26,0 \n\t"
"vfmaxdb %%v19,%%v19,%%v27,0 \n\t"
"vfmaxdb %%v20,%%v20,%%v28,0 \n\t"
"vfmaxdb %%v21,%%v21,%%v29,0 \n\t"
"vfmaxdb %%v22,%%v22,%%v30,0 \n\t"
"vfmaxdb %%v23,%%v23,%%v31,0 \n\t"
"vfchdb %%v28,%%v24,%%v25 \n\t"
"vfchdb %%v29,%%v26,%%v27 \n\t"
"vsel %%v28,%%v24,%%v25,%%v28 \n\t"
"vsel %%v29,%%v26,%%v27,%%v29 \n\t"
"vfmaxdb %%v16,%%v16,%%v20,0 \n\t"
"vfmaxdb %%v17,%%v17,%%v21,0 \n\t"
"vfmaxdb %%v18,%%v18,%%v22,0 \n\t"
"vfmaxdb %%v19,%%v19,%%v23,0 \n\t"
"vfchdb %%v30,%%v28,%%v29 \n\t"
"vsel %%v30,%%v28,%%v29,%%v30 \n\t"
"vfmaxdb %%v16,%%v16,%%v18,0 \n\t"
"vfmaxdb %%v17,%%v17,%%v19,0 \n\t"
"vfchdb %%v31,%%v30,%%v0 \n\t"
"vsel %%v0,%%v30,%%v0,%%v31 \n\t"
"vfmaxdb %%v16,%%v16,%%v17,0 \n\t"
"vl %%v16,128(%%r1,%2) \n\t"
"vl %%v17,144(%%r1,%2) \n\t"
"vl %%v18,160(%%r1,%2) \n\t"
"vl %%v19,176(%%r1,%2) \n\t"
"vl %%v20,192(%%r1,%2) \n\t"
"vl %%v21,208(%%r1,%2) \n\t"
"vl %%v22,224(%%r1,%2) \n\t"
"vl %%v23,240(%%r1,%2) \n\t"
"vfchdb %%v24,%%v16,%%v17 \n\t"
"vfchdb %%v25,%%v18,%%v19 \n\t"
"vfchdb %%v26,%%v20,%%v21 \n\t"
"vfchdb %%v27,%%v22,%%v23 \n\t"
"vsel %%v24,%%v16,%%v17,%%v24 \n\t"
"vsel %%v25,%%v18,%%v19,%%v25 \n\t"
"vsel %%v26,%%v20,%%v21,%%v26 \n\t"
"vsel %%v27,%%v22,%%v23,%%v27 \n\t"
"vfchdb %%v28,%%v24,%%v25 \n\t"
"vfchdb %%v29,%%v26,%%v27 \n\t"
"vsel %%v28,%%v24,%%v25,%%v28 \n\t"
"vsel %%v29,%%v26,%%v27,%%v29 \n\t"
"vfchdb %%v30,%%v28,%%v29 \n\t"
"vsel %%v30,%%v28,%%v29,%%v30 \n\t"
"vfchdb %%v31,%%v30,%%v0 \n\t"
"vsel %%v0,%%v30,%%v0,%%v31 \n\t"
"vfmaxdb %%v0,%%v0,%%16,0 \n\t"
"agfi %%r1, 256 \n\t"
"brctg %%r0, 0b \n\t"
"vrepg %%v16,%%v0,1 \n\t"
"wfchdb %%v17,%%v16,%%v0 \n\t"
"vsel %%v0,%%v16,%%v0,%%v17 \n\t"
"vrepg %%v16,%%v0,1 \n\t"
"wfmaxdb %%v0,%%v0,%%v16,0 \n\t"
"ldr %0,%%f0 "
:"=f"(max)
:"r"(n),"ZR"((const FLOAT (*)[n])x)
@ -144,11 +123,9 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) {
} else {
maxf=x[0];
i += inc_x;
j++;
BLASLONG n1 = (n - 1) & -4;
while ((j - 1) < n1) {
BLASLONG n1 = n & -4;
while (j < n1) {
if (x[i] > maxf) {
maxf = x[i];

View File

@ -32,7 +32,7 @@ static FLOAT dmin_kernel_32(BLASLONG n, FLOAT *x)
FLOAT min;
__asm__ volatile (
"vl %%v0,0(%2) \n\t"
"vl %%v0,0(%2) \n\t"
"srlg %%r0,%1,5 \n\t"
"xgr %%r1,%%r1 \n\t"
"0: \n\t"
@ -46,62 +46,41 @@ static FLOAT dmin_kernel_32(BLASLONG n, FLOAT *x)
"vl %%v21,80(%%r1,%2) \n\t"
"vl %%v22,96(%%r1,%2) \n\t"
"vl %%v23,112(%%r1,%2) \n\t"
"vl %%v24,128(%%r1,%2) \n\t"
"vl %%v25,144(%%r1,%2) \n\t"
"vl %%v26,160(%%r1,%2) \n\t"
"vl %%v27,176(%%r1,%2) \n\t"
"vl %%v28,192(%%r1,%2) \n\t"
"vl %%v29,208(%%r1,%2) \n\t"
"vl %%v30,224(%%r1,%2) \n\t"
"vl %%v31,240(%%r1,%2) \n\t"
"vfchdb %%v24,%%v17,%%v16 \n\t"
"vfchdb %%v25,%%v19,%%v18 \n\t"
"vfchdb %%v26,%%v21,%%v20 \n\t"
"vfchdb %%v27,%%v23,%%v22 \n\t"
"vsel %%v24,%%v16,%%v17,%%v24 \n\t"
"vsel %%v25,%%v18,%%v19,%%v25 \n\t"
"vsel %%v26,%%v20,%%v21,%%v26 \n\t"
"vsel %%v27,%%v22,%%v23,%%v27 \n\t"
"vfmindb %%v16,%%v16,%%v24,0 \n\t"
"vfmindb %%v17,%%v17,%%v25,0 \n\t"
"vfmindb %%v18,%%v18,%%v26,0 \n\t"
"vfmindb %%v19,%%v19,%%v27,0 \n\t"
"vfmindb %%v20,%%v20,%%v28,0 \n\t"
"vfmindb %%v21,%%v21,%%v29,0 \n\t"
"vfmindb %%v22,%%v22,%%v30,0 \n\t"
"vfmindb %%v23,%%v23,%%v31,0 \n\t"
"vfchdb %%v28,%%v25,%%v24 \n\t"
"vfchdb %%v29,%%v27,%%v26 \n\t"
"vsel %%v28,%%v24,%%v25,%%v28 \n\t"
"vsel %%v29,%%v26,%%v27,%%v29 \n\t"
"vfmindb %%v16,%%v16,%%v20,0 \n\t"
"vfmindb %%v17,%%v17,%%v21,0 \n\t"
"vfmindb %%v18,%%v18,%%v22,0 \n\t"
"vfmindb %%v19,%%v19,%%v23,0 \n\t"
"vfchdb %%v30,%%v29,%%v28 \n\t"
"vsel %%v30,%%v28,%%v29,%%v30 \n\t"
"vfmindb %%v16,%%v16,%%v18,0 \n\t"
"vfmindb %%v17,%%v17,%%v19,0 \n\t"
"vfchdb %%v31,%%v0,%%v30 \n\t"
"vsel %%v0,%%v30,%%v0,%%v31 \n\t"
"vfmindb %%v16,%%v16,%%v17,0 \n\t"
"vl %%v16,128(%%r1,%2) \n\t"
"vl %%v17,144(%%r1,%2) \n\t"
"vl %%v18,160(%%r1,%2) \n\t"
"vl %%v19,176(%%r1,%2) \n\t"
"vl %%v20,192(%%r1,%2) \n\t"
"vl %%v21,208(%%r1,%2) \n\t"
"vl %%v22,224(%%r1,%2) \n\t"
"vl %%v23,240(%%r1,%2) \n\t"
"vfchdb %%v24,%%v17,%%v16 \n\t"
"vfchdb %%v25,%%v19,%%v18 \n\t"
"vfchdb %%v26,%%v21,%%v20 \n\t"
"vfchdb %%v27,%%v23,%%v22 \n\t"
"vsel %%v24,%%v16,%%v17,%%v24 \n\t"
"vsel %%v25,%%v18,%%v19,%%v25 \n\t"
"vsel %%v26,%%v20,%%v21,%%v26 \n\t"
"vsel %%v27,%%v22,%%v23,%%v27 \n\t"
"vfchdb %%v28,%%v25,%%v24 \n\t"
"vfchdb %%v29,%%v27,%%v26 \n\t"
"vsel %%v28,%%v24,%%v25,%%v28 \n\t"
"vsel %%v29,%%v26,%%v27,%%v29 \n\t"
"vfchdb %%v30,%%v29,%%v28 \n\t"
"vsel %%v30,%%v28,%%v29,%%v30 \n\t"
"vfchdb %%v31,%%v0,%%v30 \n\t"
"vsel %%v0,%%v30,%%v0,%%v31 \n\t"
"vfmindb %%v0,%%v0,%%16,0 \n\t"
"agfi %%r1, 256 \n\t"
"brctg %%r0, 0b \n\t"
"vrepg %%v16,%%v0,1 \n\t"
"wfchdb %%v17,%%v0,%%v16 \n\t"
"vsel %%v0,%%v16,%%v0,%%v17 \n\t"
"vrepg %%v16,%%v0,1 \n\t"
"wfmindb %%v0,%%v0,%%v16,0 \n\t"
"ldr %0,%%f0 "
:"=f"(min)
:"r"(n),"ZR"((const FLOAT (*)[n])x)
@ -144,11 +123,9 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) {
} else {
minf=x[0];
i += inc_x;
j++;
BLASLONG n1 = (n - 1) & -4;
while ((j - 1) < n1) {
BLASLONG n1 = n & -4;
while (j < n1) {
if (x[i] < minf) {
minf = x[i];

View File

@ -76,7 +76,7 @@ static BLASLONG icamax_kernel_32(BLASLONG n, FLOAT *x, FLOAT *amax)
"srlg %%r0,%2,5 \n\t"
"xgr %%r1,%%r1 \n\t"
"0: \n\t"
"pfd 1, 1024(%3) \n\t"
"pfd 1, 1024(%%r1,%3) \n\t"
"vlef %%v16,0(%%r1,%3),0 \n\t"
"vlef %%v17,4(%%r1,%3),0 \n\t"
@ -127,14 +127,14 @@ static BLASLONG icamax_kernel_32(BLASLONG n, FLOAT *x, FLOAT *amax)
"vfasb %%v18,%%v20,%%v21 \n\t"
"vfasb %%v19,%%v22,%%v23 \n\t"
"vfchsb %%v5,%%v16,%%v17 \n\t"
"vfchsb %%v6,%%v18,%%v19 \n\t"
"vfchesb %%v5,%%v16,%%v17 \n\t"
"vfchesb %%v6,%%v18,%%v19 \n\t"
"vsel %%v16,%%v16,%%v17,%%v5 \n\t"
"vsel %%v5,%%v24,%%v25,%%v5 \n\t"
"vsel %%v17,%%v18,%%v19,%%v6 \n\t"
"vsel %%v6,%%v26,%%v27,%%v6 \n\t"
"vfchsb %%v18,%%v16,%%v17 \n\t"
"vfchesb %%v18,%%v16,%%v17 \n\t"
"vsel %%v16,%%v16,%%v17,%%v18 \n\t"
"vsel %%v5,%%v5,%%v6,%%v18 \n\t"
"vsegf %%v6,%%v5 \n\t"
@ -142,13 +142,13 @@ static BLASLONG icamax_kernel_32(BLASLONG n, FLOAT *x, FLOAT *amax)
"vag %%v5,%%v5,%%v4 \n\t"
"vag %%v6,%%v6,%%v4 \n\t"
"vfchsb %%v7,%%v16,%%v0 \n\t"
"vsel %%v0,%%v16,%%v0,%%v7 \n\t"
"vfchesb %%v7,%%v0,%%v16 \n\t"
"vsel %%v0,%%v0,%%v16,%%v7 \n\t"
"vsegf %%v8,%%v7 \n\t"
"vesrlg %%v7,%%v7,32 \n\t"
"vsegf %%v7,%%v7 \n\t"
"vsel %%v1,%%v5,%%v1,%%v7 \n\t"
"vsel %%v2,%%v6,%%v2,%%v8 \n\t"
"vsel %%v1,%%v1,%%v5,%%v7 \n\t"
"vsel %%v2,%%v2,%%v6,%%v8 \n\t"
"vag %%v4,%%v4,%%v3 \n\t"
"vlef %%v16,128(%%r1,%3),0 \n\t"
@ -200,14 +200,14 @@ static BLASLONG icamax_kernel_32(BLASLONG n, FLOAT *x, FLOAT *amax)
"vfasb %%v18,%%v20,%%v21 \n\t"
"vfasb %%v19,%%v22,%%v23 \n\t"
"vfchsb %%v5,%%v16,%%v17 \n\t"
"vfchsb %%v6,%%v18,%%v19 \n\t"
"vfchesb %%v5,%%v16,%%v17 \n\t"
"vfchesb %%v6,%%v18,%%v19 \n\t"
"vsel %%v16,%%v16,%%v17,%%v5 \n\t"
"vsel %%v5,%%v24,%%v25,%%v5 \n\t"
"vsel %%v17,%%v18,%%v19,%%v6 \n\t"
"vsel %%v6,%%v26,%%v27,%%v6 \n\t"
"vfchsb %%v18,%%v16,%%v17 \n\t"
"vfchesb %%v18,%%v16,%%v17 \n\t"
"vsel %%v16,%%v16,%%v17,%%v18 \n\t"
"vsel %%v5,%%v5,%%v6,%%v18 \n\t"
"vsegf %%v6,%%v5 \n\t"
@ -215,13 +215,13 @@ static BLASLONG icamax_kernel_32(BLASLONG n, FLOAT *x, FLOAT *amax)
"vag %%v5,%%v5,%%v4 \n\t"
"vag %%v6,%%v6,%%v4 \n\t"
"vfchsb %%v7,%%v16,%%v0 \n\t"
"vsel %%v0,%%v16,%%v0,%%v7 \n\t"
"vfchesb %%v7,%%v0,%%v16 \n\t"
"vsel %%v0,%%v0,%%v16,%%v7 \n\t"
"vsegf %%v8,%%v7 \n\t"
"vesrlg %%v7,%%v7,32 \n\t"
"vsegf %%v7,%%v7 \n\t"
"vsel %%v1,%%v5,%%v1,%%v7 \n\t"
"vsel %%v2,%%v6,%%v2,%%v8 \n\t"
"vsel %%v1,%%v1,%%v5,%%v7 \n\t"
"vsel %%v2,%%v2,%%v6,%%v8 \n\t"
"vag %%v4,%%v4,%%v3 \n\t"
"agfi %%r1, 256 \n\t"
@ -250,8 +250,8 @@ static BLASLONG icamax_kernel_32(BLASLONG n, FLOAT *x, FLOAT *amax)
"wfchsb %%v4,%%v2,%%v0 \n\t"
"vsel %%v1,%%v3,%%v1,%%v4 \n\t"
"vsel %%v0,%%v2,%%v0,%%v4 \n\t"
"vlgvg %0,%%v1,0 \n\t"
"ste %%f0,%1 \n\t"
"vlgvg %0,%%v1,0 \n\t"
"2: \n\t"
"nop "
:"=r"(iamax),"=m"(*amax)
@ -302,6 +302,7 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
} else {
max = 0;
maxf = CABS1(x,0);
inc_x2 = 2 * inc_x;
ix += inc_x2;

View File

@ -127,14 +127,14 @@ static BLASLONG icamin_kernel_32(BLASLONG n, FLOAT *x, FLOAT *amin)
"vfasb %%v18,%%v20,%%v21 \n\t"
"vfasb %%v19,%%v22,%%v23 \n\t"
"vfchsb %%v5,%%v17,%%v16 \n\t"
"vfchsb %%v6,%%v19,%%v18 \n\t"
"vfchesb %%v5,%%v17,%%v16 \n\t"
"vfchesb %%v6,%%v19,%%v18 \n\t"
"vsel %%v16,%%v16,%%v17,%%v5 \n\t"
"vsel %%v5,%%v24,%%v25,%%v5 \n\t"
"vsel %%v17,%%v18,%%v19,%%v6 \n\t"
"vsel %%v6,%%v26,%%v27,%%v6 \n\t"
"vfchsb %%v18,%%v17,%%v16 \n\t"
"vfchesb %%v18,%%v17,%%v16 \n\t"
"vsel %%v16,%%v16,%%v17,%%v18 \n\t"
"vsel %%v5,%%v5,%%v6,%%v18 \n\t"
"vsegf %%v6,%%v5 \n\t"
@ -142,13 +142,13 @@ static BLASLONG icamin_kernel_32(BLASLONG n, FLOAT *x, FLOAT *amin)
"vag %%v5,%%v5,%%v4 \n\t"
"vag %%v6,%%v6,%%v4 \n\t"
"vfchsb %%v7,%%v0,%%v16 \n\t"
"vsel %%v0,%%v16,%%v0,%%v7 \n\t"
"vfchesb %%v7,%%v16,%%v0 \n\t"
"vsel %%v0,%%v0,%%v16,%%v7 \n\t"
"vsegf %%v8,%%v7 \n\t"
"vesrlg %%v7,%%v7,32 \n\t"
"vsegf %%v7,%%v7 \n\t"
"vsel %%v1,%%v5,%%v1,%%v7 \n\t"
"vsel %%v2,%%v6,%%v2,%%v8 \n\t"
"vsel %%v1,%%v1,%%v5,%%v7 \n\t"
"vsel %%v2,%%v2,%%v6,%%v8 \n\t"
"vag %%v4,%%v4,%%v3 \n\t"
"vlef %%v16,128(%%r1,%3),0 \n\t"
@ -200,14 +200,14 @@ static BLASLONG icamin_kernel_32(BLASLONG n, FLOAT *x, FLOAT *amin)
"vfasb %%v18,%%v20,%%v21 \n\t"
"vfasb %%v19,%%v22,%%v23 \n\t"
"vfchsb %%v5,%%v17,%%v16 \n\t"
"vfchsb %%v6,%%v19,%%v18 \n\t"
"vfchesb %%v5,%%v17,%%v16 \n\t"
"vfchesb %%v6,%%v19,%%v18 \n\t"
"vsel %%v16,%%v16,%%v17,%%v5 \n\t"
"vsel %%v5,%%v24,%%v25,%%v5 \n\t"
"vsel %%v17,%%v18,%%v19,%%v6 \n\t"
"vsel %%v6,%%v26,%%v27,%%v6 \n\t"
"vfchsb %%v18,%%v17,%%v16 \n\t"
"vfchesb %%v18,%%v17,%%v16 \n\t"
"vsel %%v16,%%v16,%%v17,%%v18 \n\t"
"vsel %%v5,%%v5,%%v6,%%v18 \n\t"
"vsegf %%v6,%%v5 \n\t"
@ -215,13 +215,13 @@ static BLASLONG icamin_kernel_32(BLASLONG n, FLOAT *x, FLOAT *amin)
"vag %%v5,%%v5,%%v4 \n\t"
"vag %%v6,%%v6,%%v4 \n\t"
"vfchsb %%v7,%%v0,%%v16 \n\t"
"vsel %%v0,%%v16,%%v0,%%v7 \n\t"
"vfchesb %%v7,%%v16,%%v0 \n\t"
"vsel %%v0,%%v0,%%v16,%%v7 \n\t"
"vsegf %%v8,%%v7 \n\t"
"vesrlg %%v7,%%v7,32 \n\t"
"vsegf %%v7,%%v7 \n\t"
"vsel %%v1,%%v5,%%v1,%%v7 \n\t"
"vsel %%v2,%%v6,%%v2,%%v8 \n\t"
"vsel %%v1,%%v1,%%v5,%%v7 \n\t"
"vsel %%v2,%%v2,%%v6,%%v8 \n\t"
"vag %%v4,%%v4,%%v3 \n\t"
"agfi %%r1, 256 \n\t"
@ -250,8 +250,8 @@ static BLASLONG icamin_kernel_32(BLASLONG n, FLOAT *x, FLOAT *amin)
"wfchsb %%v4,%%v0,%%v2 \n\t"
"vsel %%v1,%%v3,%%v1,%%v4 \n\t"
"vsel %%v0,%%v2,%%v0,%%v4 \n\t"
"vlgvg %0,%%v1,0 \n\t"
"ste %%f0,%1 \n\t"
"vlgvg %0,%%v1,0 \n\t"
"2: \n\t"
"nop "
:"=r"(iamin),"=m"(*amin)
@ -302,6 +302,7 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
} else {
min = 0;
minf = CABS1(x,0);
inc_x2 = 2 * inc_x;
ix += inc_x2;

View File

@ -63,7 +63,7 @@ static BLASLONG idamax_kernel_32(BLASLONG n, FLOAT *x, FLOAT *amax)
"vleig %%v31,15,1 \n\t"
"srlg %%r0,%2,5 \n\t"
"xgr %%r1,%%r1 \n\t"
"0: \n\t"
"0: \n\t"
"pfd 1, 1024(%%r1,%3) \n\t"
"vl %%v16,0(%%r1,%3) \n\t"
@ -83,10 +83,10 @@ static BLASLONG idamax_kernel_32(BLASLONG n, FLOAT *x, FLOAT *amax)
"vflpdb %%v22, %%v22 \n\t"
"vflpdb %%v23, %%v23 \n\t"
"vfchdb %%v4,%%v16,%%v17 \n\t"
"vfchdb %%v5,%%v18,%%v19 \n\t"
"vfchdb %%v6,%%v20,%%v21 \n\t"
"vfchdb %%v7,%%v22,%%v23 \n\t"
"vfchedb %%v4,%%v16,%%v17 \n\t"
"vfchedb %%v5,%%v18,%%v19 \n\t"
"vfchedb %%v6,%%v20,%%v21 \n\t"
"vfchedb %%v7,%%v22,%%v23 \n\t"
"vsel %%v16,%%v16,%%v17,%%v4 \n\t"
"vsel %%v4,%%v24,%%v25,%%v4 \n\t"
"vsel %%v17,%%v18,%%v19,%%v5 \n\t"
@ -96,21 +96,21 @@ static BLASLONG idamax_kernel_32(BLASLONG n, FLOAT *x, FLOAT *amax)
"vsel %%v19,%%v22,%%v23,%%v7 \n\t"
"vsel %%v7,%%v30,%%v31,%%v7 \n\t"
"vfchdb %%v20,%%v16,%%v17 \n\t"
"vfchdb %%v21,%%v18,%%v19 \n\t"
"vfchedb %%v20,%%v16,%%v17 \n\t"
"vfchedb %%v21,%%v18,%%v19 \n\t"
"vsel %%v16,%%v16,%%v17,%%v20 \n\t"
"vsel %%v4,%%v4,%%v5,%%v20 \n\t"
"vsel %%v17,%%v18,%%v19,%%v21 \n\t"
"vsel %%v5,%%v6,%%v7,%%v21 \n\t"
"vfchdb %%v18,%%v16,%%v17 \n\t"
"vfchedb %%v18,%%v16,%%v17 \n\t"
"vsel %%v16,%%v16,%%v17,%%v18 \n\t"
"vsel %%v4,%%v4,%%v5,%%v18 \n\t"
"vag %%v4,%%v4,%%v3 \n\t"
"vfchdb %%v5,%%v16,%%v0 \n\t"
"vsel %%v0,%%v16,%%v0,%%v5 \n\t"
"vsel %%v1,%%v4,%%v1,%%v5 \n\t"
"vfchedb %%v5,%%v0,%%v16 \n\t"
"vsel %%v0,%%v0,%%v16,%%v5 \n\t"
"vsel %%v1,%%v1,%%v4,%%v5 \n\t"
"vag %%v3,%%v3,%%v2 \n\t"
"vl %%v16,128(%%r1,%3) \n\t"
@ -130,10 +130,10 @@ static BLASLONG idamax_kernel_32(BLASLONG n, FLOAT *x, FLOAT *amax)
"vflpdb %%v22, %%v22 \n\t"
"vflpdb %%v23, %%v23 \n\t"
"vfchdb %%v4,%%v16,%%v17 \n\t"
"vfchdb %%v5,%%v18,%%v19 \n\t"
"vfchdb %%v6,%%v20,%%v21 \n\t"
"vfchdb %%v7,%%v22,%%v23 \n\t"
"vfchedb %%v4,%%v16,%%v17 \n\t"
"vfchedb %%v5,%%v18,%%v19 \n\t"
"vfchedb %%v6,%%v20,%%v21 \n\t"
"vfchedb %%v7,%%v22,%%v23 \n\t"
"vsel %%v16,%%v16,%%v17,%%v4 \n\t"
"vsel %%v4,%%v24,%%v25,%%v4 \n\t"
"vsel %%v17,%%v18,%%v19,%%v5 \n\t"
@ -143,21 +143,21 @@ static BLASLONG idamax_kernel_32(BLASLONG n, FLOAT *x, FLOAT *amax)
"vsel %%v19,%%v22,%%v23,%%v7 \n\t"
"vsel %%v7,%%v30,%%v31,%%v7 \n\t"
"vfchdb %%v20,%%v16,%%v17 \n\t"
"vfchdb %%v21,%%v18,%%v19 \n\t"
"vfchedb %%v20,%%v16,%%v17 \n\t"
"vfchedb %%v21,%%v18,%%v19 \n\t"
"vsel %%v16,%%v16,%%v17,%%v20 \n\t"
"vsel %%v4,%%v4,%%v5,%%v20 \n\t"
"vsel %%v17,%%v18,%%v19,%%v21 \n\t"
"vsel %%v5,%%v6,%%v7,%%v21 \n\t"
"vfchdb %%v18,%%v16,%%v17 \n\t"
"vfchedb %%v18,%%v16,%%v17 \n\t"
"vsel %%v16,%%v16,%%v17,%%v18 \n\t"
"vsel %%v4,%%v4,%%v5,%%v18 \n\t"
"vag %%v4,%%v4,%%v3 \n\t"
"vfchdb %%v5,%%v16,%%v0 \n\t"
"vsel %%v0,%%v16,%%v0,%%v5 \n\t"
"vsel %%v1,%%v4,%%v1,%%v5 \n\t"
"vfchedb %%v5,%%v0,%%v16 \n\t"
"vsel %%v0,%%v0,%%v16,%%v5 \n\t"
"vsel %%v1,%%v1,%%v4,%%v5 \n\t"
"vag %%v3,%%v3,%%v2 \n\t"
"agfi %%r1, 256 \n\t"
@ -175,8 +175,8 @@ static BLASLONG idamax_kernel_32(BLASLONG n, FLOAT *x, FLOAT *amax)
"wfchdb %%v4,%%v2,%%v0 \n\t"
"vsel %%v1,%%v3,%%v1,%%v4 \n\t"
"vsel %%v0,%%v2,%%v0,%%v4 \n\t"
"vlgvg %0,%%v1,0 \n\t"
"std %%f0,%1 \n\t"
"vlgvg %0,%%v1,0 \n\t"
"2: \n\t"
"nop "
:"=r"(iamax),"=m"(*amax)
@ -221,12 +221,11 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) {
} else {
max = 0;
maxf = ABS(x[0]);
i += inc_x;
j++;
BLASLONG n1 = (n - 1) & -4;
while ((j - 1) < n1) {
BLASLONG n1 = n & -4;
while (j < n1) {
if (ABS(x[i]) > maxf) {
max = j;

View File

@ -63,7 +63,7 @@ static BLASLONG idamin_kernel_32(BLASLONG n, FLOAT *x, FLOAT *amin)
"vleig %%v31,15,1 \n\t"
"srlg %%r0,%2,5 \n\t"
"xgr %%r1,%%r1 \n\t"
"0: \n\t"
"0: \n\t"
"pfd 1, 1024(%%r1,%3) \n\t"
"vl %%v16,0(%%r1,%3) \n\t"
@ -83,10 +83,10 @@ static BLASLONG idamin_kernel_32(BLASLONG n, FLOAT *x, FLOAT *amin)
"vflpdb %%v22, %%v22 \n\t"
"vflpdb %%v23, %%v23 \n\t"
"vfchdb %%v4,%%v17,%%v16 \n\t"
"vfchdb %%v5,%%v19,%%v18 \n\t"
"vfchdb %%v6,%%v21,%%v20 \n\t"
"vfchdb %%v7,%%v23,%%v22 \n\t"
"vfchedb %%v4,%%v17,%%v16 \n\t"
"vfchedb %%v5,%%v19,%%v18 \n\t"
"vfchedb %%v6,%%v21,%%v20 \n\t"
"vfchedb %%v7,%%v23,%%v22 \n\t"
"vsel %%v16,%%v16,%%v17,%%v4 \n\t"
"vsel %%v4,%%v24,%%v25,%%v4 \n\t"
"vsel %%v17,%%v18,%%v19,%%v5 \n\t"
@ -96,21 +96,21 @@ static BLASLONG idamin_kernel_32(BLASLONG n, FLOAT *x, FLOAT *amin)
"vsel %%v19,%%v22,%%v23,%%v7 \n\t"
"vsel %%v7,%%v30,%%v31,%%v7 \n\t"
"vfchdb %%v20,%%v17,%%v16 \n\t"
"vfchdb %%v21,%%v19,%%v18 \n\t"
"vfchedb %%v20,%%v17,%%v16 \n\t"
"vfchedb %%v21,%%v19,%%v18 \n\t"
"vsel %%v16,%%v16,%%v17,%%v20 \n\t"
"vsel %%v4,%%v4,%%v5,%%v20 \n\t"
"vsel %%v17,%%v18,%%v19,%%v21 \n\t"
"vsel %%v5,%%v6,%%v7,%%v21 \n\t"
"vfchdb %%v18,%%v17,%%v16 \n\t"
"vfchedb %%v18,%%v17,%%v16 \n\t"
"vsel %%v16,%%v16,%%v17,%%v18 \n\t"
"vsel %%v4,%%v4,%%v5,%%v18 \n\t"
"vag %%v4,%%v4,%%v3 \n\t"
"vfchdb %%v5,%%v0,%%v16 \n\t"
"vsel %%v0,%%v16,%%v0,%%v5 \n\t"
"vsel %%v1,%%v4,%%v1,%%v5 \n\t"
"vfchedb %%v5,%%v16,%%v0 \n\t"
"vsel %%v0,%%v0,%%v16,%%v5 \n\t"
"vsel %%v1,%%v1,%%v4,%%v5 \n\t"
"vag %%v3,%%v3,%%v2 \n\t"
"vl %%v16,128(%%r1,%3) \n\t"
@ -130,10 +130,10 @@ static BLASLONG idamin_kernel_32(BLASLONG n, FLOAT *x, FLOAT *amin)
"vflpdb %%v22, %%v22 \n\t"
"vflpdb %%v23, %%v23 \n\t"
"vfchdb %%v4,%%v17,%%v16 \n\t"
"vfchdb %%v5,%%v19,%%v18 \n\t"
"vfchdb %%v6,%%v21,%%v20 \n\t"
"vfchdb %%v7,%%v23,%%v22 \n\t"
"vfchedb %%v4,%%v17,%%v16 \n\t"
"vfchedb %%v5,%%v19,%%v18 \n\t"
"vfchedb %%v6,%%v21,%%v20 \n\t"
"vfchedb %%v7,%%v23,%%v22 \n\t"
"vsel %%v16,%%v16,%%v17,%%v4 \n\t"
"vsel %%v4,%%v24,%%v25,%%v4 \n\t"
"vsel %%v17,%%v18,%%v19,%%v5 \n\t"
@ -143,21 +143,21 @@ static BLASLONG idamin_kernel_32(BLASLONG n, FLOAT *x, FLOAT *amin)
"vsel %%v19,%%v22,%%v23,%%v7 \n\t"
"vsel %%v7,%%v30,%%v31,%%v7 \n\t"
"vfchdb %%v20,%%v17,%%v16 \n\t"
"vfchdb %%v21,%%v19,%%v18 \n\t"
"vfchedb %%v20,%%v17,%%v16 \n\t"
"vfchedb %%v21,%%v19,%%v18 \n\t"
"vsel %%v16,%%v16,%%v17,%%v20 \n\t"
"vsel %%v4,%%v4,%%v5,%%v20 \n\t"
"vsel %%v17,%%v18,%%v19,%%v21 \n\t"
"vsel %%v5,%%v6,%%v7,%%v21 \n\t"
"vfchdb %%v18,%%v17,%%v16 \n\t"
"vfchedb %%v18,%%v17,%%v16 \n\t"
"vsel %%v16,%%v16,%%v17,%%v18 \n\t"
"vsel %%v4,%%v4,%%v5,%%v18 \n\t"
"vag %%v4,%%v4,%%v3 \n\t"
"vfchdb %%v5,%%v0,%%v16 \n\t"
"vsel %%v0,%%v16,%%v0,%%v5 \n\t"
"vsel %%v1,%%v4,%%v1,%%v5 \n\t"
"vfchedb %%v5,%%v16,%%v0 \n\t"
"vsel %%v0,%%v0,%%v16,%%v5 \n\t"
"vsel %%v1,%%v1,%%v4,%%v5 \n\t"
"vag %%v3,%%v3,%%v2 \n\t"
"agfi %%r1, 256 \n\t"
@ -175,8 +175,8 @@ static BLASLONG idamin_kernel_32(BLASLONG n, FLOAT *x, FLOAT *amin)
"wfchdb %%v4,%%v0,%%v2 \n\t"
"vsel %%v1,%%v3,%%v1,%%v4 \n\t"
"vsel %%v0,%%v2,%%v0,%%v4 \n\t"
"vlgvg %0,%%v1,0 \n\t"
"std %%f0,%1 \n\t"
"vlgvg %0,%%v1,0 \n\t"
"2: \n\t"
"nop "
:"=r"(iamin),"=m"(*amin)
@ -221,12 +221,11 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) {
} else {
min = 0;
minf = ABS(x[0]);
i += inc_x;
j++;
BLASLONG n1 = (n - 1) & -4;
while ((j - 1) < n1) {
BLASLONG n1 = n & -4;
while (j < n1) {
if (ABS(x[i]) < minf) {
min = j;

View File

@ -55,7 +55,7 @@ static BLASLONG idmax_kernel_32(BLASLONG n, FLOAT *x, FLOAT *max)
"vleig %%v31,15,1 \n\t"
"srlg %%r0,%2,5 \n\t"
"xgr %%r1,%%r1 \n\t"
"0: \n\t"
"0: \n\t"
"pfd 1, 1024(%%r1,%3) \n\t"
"vl %%v16,0(%%r1,%3) \n\t"
@ -67,10 +67,10 @@ static BLASLONG idmax_kernel_32(BLASLONG n, FLOAT *x, FLOAT *max)
"vl %%v22,96(%%r1,%3) \n\t"
"vl %%v23,112(%%r1,%3) \n\t"
"vfchdb %%v4,%%v16,%%v17 \n\t"
"vfchdb %%v5,%%v18,%%v19 \n\t"
"vfchdb %%v6,%%v20,%%v21 \n\t"
"vfchdb %%v7,%%v22,%%v23 \n\t"
"vfchedb %%v4,%%v16,%%v17 \n\t"
"vfchedb %%v5,%%v18,%%v19 \n\t"
"vfchedb %%v6,%%v20,%%v21 \n\t"
"vfchedb %%v7,%%v22,%%v23 \n\t"
"vsel %%v16,%%v16,%%v17,%%v4 \n\t"
"vsel %%v4,%%v24,%%v25,%%v4 \n\t"
"vsel %%v17,%%v18,%%v19,%%v5 \n\t"
@ -80,21 +80,21 @@ static BLASLONG idmax_kernel_32(BLASLONG n, FLOAT *x, FLOAT *max)
"vsel %%v19,%%v22,%%v23,%%v7 \n\t"
"vsel %%v7,%%v30,%%v31,%%v7 \n\t"
"vfchdb %%v20,%%v16,%%v17 \n\t"
"vfchdb %%v21,%%v18,%%v19 \n\t"
"vfchedb %%v20,%%v16,%%v17 \n\t"
"vfchedb %%v21,%%v18,%%v19 \n\t"
"vsel %%v16,%%v16,%%v17,%%v20 \n\t"
"vsel %%v4,%%v4,%%v5,%%v20 \n\t"
"vsel %%v17,%%v18,%%v19,%%v21 \n\t"
"vsel %%v5,%%v6,%%v7,%%v21 \n\t"
"vfchdb %%v18,%%v16,%%v17 \n\t"
"vfchedb %%v18,%%v16,%%v17 \n\t"
"vsel %%v16,%%v16,%%v17,%%v18 \n\t"
"vsel %%v4,%%v4,%%v5,%%v18 \n\t"
"vag %%v4,%%v4,%%v3 \n\t"
"vfchdb %%v5,%%v16,%%v0 \n\t"
"vsel %%v0,%%v16,%%v0,%%v5 \n\t"
"vsel %%v1,%%v4,%%v1,%%v5 \n\t"
"vfchedb %%v5,%%v0,%%v16 \n\t"
"vsel %%v0,%%v0,%%v16,%%v5 \n\t"
"vsel %%v1,%%v1,%%v4,%%v5 \n\t"
"vag %%v3,%%v3,%%v2 \n\t"
"vl %%v16,128(%%r1,%3) \n\t"
@ -106,10 +106,10 @@ static BLASLONG idmax_kernel_32(BLASLONG n, FLOAT *x, FLOAT *max)
"vl %%v22,224(%%r1,%3) \n\t"
"vl %%v23,240(%%r1,%3) \n\t"
"vfchdb %%v4,%%v16,%%v17 \n\t"
"vfchdb %%v5,%%v18,%%v19 \n\t"
"vfchdb %%v6,%%v20,%%v21 \n\t"
"vfchdb %%v7,%%v22,%%v23 \n\t"
"vfchedb %%v4,%%v16,%%v17 \n\t"
"vfchedb %%v5,%%v18,%%v19 \n\t"
"vfchedb %%v6,%%v20,%%v21 \n\t"
"vfchedb %%v7,%%v22,%%v23 \n\t"
"vsel %%v16,%%v16,%%v17,%%v4 \n\t"
"vsel %%v4,%%v24,%%v25,%%v4 \n\t"
"vsel %%v17,%%v18,%%v19,%%v5 \n\t"
@ -119,21 +119,21 @@ static BLASLONG idmax_kernel_32(BLASLONG n, FLOAT *x, FLOAT *max)
"vsel %%v19,%%v22,%%v23,%%v7 \n\t"
"vsel %%v7,%%v30,%%v31,%%v7 \n\t"
"vfchdb %%v20,%%v16,%%v17 \n\t"
"vfchdb %%v21,%%v18,%%v19 \n\t"
"vfchedb %%v20,%%v16,%%v17 \n\t"
"vfchedb %%v21,%%v18,%%v19 \n\t"
"vsel %%v16,%%v16,%%v17,%%v20 \n\t"
"vsel %%v4,%%v4,%%v5,%%v20 \n\t"
"vsel %%v17,%%v18,%%v19,%%v21 \n\t"
"vsel %%v5,%%v6,%%v7,%%v21 \n\t"
"vfchdb %%v18,%%v16,%%v17 \n\t"
"vfchedb %%v18,%%v16,%%v17 \n\t"
"vsel %%v16,%%v16,%%v17,%%v18 \n\t"
"vsel %%v4,%%v4,%%v5,%%v18 \n\t"
"vag %%v4,%%v4,%%v3 \n\t"
"vfchdb %%v5,%%v16,%%v0 \n\t"
"vsel %%v0,%%v16,%%v0,%%v5 \n\t"
"vsel %%v1,%%v4,%%v1,%%v5 \n\t"
"vfchedb %%v5,%%v0,%%v16 \n\t"
"vsel %%v0,%%v0,%%v16,%%v5 \n\t"
"vsel %%v1,%%v1,%%v4,%%v5 \n\t"
"vag %%v3,%%v3,%%v2 \n\t"
"agfi %%r1, 256 \n\t"
@ -151,8 +151,8 @@ static BLASLONG idmax_kernel_32(BLASLONG n, FLOAT *x, FLOAT *max)
"wfchdb %%v4,%%v2,%%v0 \n\t"
"vsel %%v1,%%v3,%%v1,%%v4 \n\t"
"vsel %%v0,%%v2,%%v0,%%v4 \n\t"
"vlgvg %0,%%v1,0 \n\t"
"std %%f0,%1 \n\t"
"vlgvg %0,%%v1,0 \n\t"
"2: \n\t"
"nop "
:"=r"(imax),"=m"(*max)
@ -197,12 +197,11 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) {
} else {
max = 0;
maxf = x[0];
i += inc_x;
j++;
BLASLONG n1 = (n - 1) & -4;
while ((j - 1) < n1) {
BLASLONG n1 = n & -4;
while (j < n1) {
if (x[i] > maxf) {
max = j;

View File

@ -55,7 +55,7 @@ static BLASLONG idmin_kernel_32(BLASLONG n, FLOAT *x, FLOAT *min)
"vleig %%v31,15,1 \n\t"
"srlg %%r0,%2,5 \n\t"
"xgr %%r1,%%r1 \n\t"
"0: \n\t"
"0: \n\t"
"pfd 1, 1024(%%r1,%3) \n\t"
"vl %%v16,0(%%r1,%3) \n\t"
@ -67,10 +67,10 @@ static BLASLONG idmin_kernel_32(BLASLONG n, FLOAT *x, FLOAT *min)
"vl %%v22,96(%%r1,%3) \n\t"
"vl %%v23,112(%%r1,%3) \n\t"
"vfchdb %%v4,%%v17,%%v16 \n\t"
"vfchdb %%v5,%%v19,%%v18 \n\t"
"vfchdb %%v6,%%v21,%%v20 \n\t"
"vfchdb %%v7,%%v23,%%v22 \n\t"
"vfchedb %%v4,%%v17,%%v16 \n\t"
"vfchedb %%v5,%%v19,%%v18 \n\t"
"vfchedb %%v6,%%v21,%%v20 \n\t"
"vfchedb %%v7,%%v23,%%v22 \n\t"
"vsel %%v16,%%v16,%%v17,%%v4 \n\t"
"vsel %%v4,%%v24,%%v25,%%v4 \n\t"
"vsel %%v17,%%v18,%%v19,%%v5 \n\t"
@ -80,21 +80,21 @@ static BLASLONG idmin_kernel_32(BLASLONG n, FLOAT *x, FLOAT *min)
"vsel %%v19,%%v22,%%v23,%%v7 \n\t"
"vsel %%v7,%%v30,%%v31,%%v7 \n\t"
"vfchdb %%v20,%%v17,%%v16 \n\t"
"vfchdb %%v21,%%v19,%%v18 \n\t"
"vfchedb %%v20,%%v17,%%v16 \n\t"
"vfchedb %%v21,%%v19,%%v18 \n\t"
"vsel %%v16,%%v16,%%v17,%%v20 \n\t"
"vsel %%v4,%%v4,%%v5,%%v20 \n\t"
"vsel %%v17,%%v18,%%v19,%%v21 \n\t"
"vsel %%v5,%%v6,%%v7,%%v21 \n\t"
"vfchdb %%v18,%%v17,%%v16 \n\t"
"vfchedb %%v18,%%v17,%%v16 \n\t"
"vsel %%v16,%%v16,%%v17,%%v18 \n\t"
"vsel %%v4,%%v4,%%v5,%%v18 \n\t"
"vag %%v4,%%v4,%%v3 \n\t"
"vfchdb %%v5,%%v0,%%v16 \n\t"
"vsel %%v0,%%v16,%%v0,%%v5 \n\t"
"vsel %%v1,%%v4,%%v1,%%v5 \n\t"
"vfchedb %%v5,%%v16,%%v0 \n\t"
"vsel %%v0,%%v0,%%v16,%%v5 \n\t"
"vsel %%v1,%%v1,%%v4,%%v5 \n\t"
"vag %%v3,%%v3,%%v2 \n\t"
"vl %%v16,128(%%r1,%3) \n\t"
@ -106,10 +106,10 @@ static BLASLONG idmin_kernel_32(BLASLONG n, FLOAT *x, FLOAT *min)
"vl %%v22,224(%%r1,%3) \n\t"
"vl %%v23,240(%%r1,%3) \n\t"
"vfchdb %%v4,%%v17,%%v16 \n\t"
"vfchdb %%v5,%%v19,%%v18 \n\t"
"vfchdb %%v6,%%v21,%%v20 \n\t"
"vfchdb %%v7,%%v23,%%v22 \n\t"
"vfchedb %%v4,%%v17,%%v16 \n\t"
"vfchedb %%v5,%%v19,%%v18 \n\t"
"vfchedb %%v6,%%v21,%%v20 \n\t"
"vfchedb %%v7,%%v23,%%v22 \n\t"
"vsel %%v16,%%v16,%%v17,%%v4 \n\t"
"vsel %%v4,%%v24,%%v25,%%v4 \n\t"
"vsel %%v17,%%v18,%%v19,%%v5 \n\t"
@ -119,21 +119,21 @@ static BLASLONG idmin_kernel_32(BLASLONG n, FLOAT *x, FLOAT *min)
"vsel %%v19,%%v22,%%v23,%%v7 \n\t"
"vsel %%v7,%%v30,%%v31,%%v7 \n\t"
"vfchdb %%v20,%%v17,%%v16 \n\t"
"vfchdb %%v21,%%v19,%%v18 \n\t"
"vfchedb %%v20,%%v17,%%v16 \n\t"
"vfchedb %%v21,%%v19,%%v18 \n\t"
"vsel %%v16,%%v16,%%v17,%%v20 \n\t"
"vsel %%v4,%%v4,%%v5,%%v20 \n\t"
"vsel %%v17,%%v18,%%v19,%%v21 \n\t"
"vsel %%v5,%%v6,%%v7,%%v21 \n\t"
"vfchdb %%v18,%%v17,%%v16 \n\t"
"vfchedb %%v18,%%v17,%%v16 \n\t"
"vsel %%v16,%%v16,%%v17,%%v18 \n\t"
"vsel %%v4,%%v4,%%v5,%%v18 \n\t"
"vag %%v4,%%v4,%%v3 \n\t"
"vfchdb %%v5,%%v0,%%v16 \n\t"
"vsel %%v0,%%v16,%%v0,%%v5 \n\t"
"vsel %%v1,%%v4,%%v1,%%v5 \n\t"
"vfchedb %%v5,%%v16,%%v0 \n\t"
"vsel %%v0,%%v0,%%v16,%%v5 \n\t"
"vsel %%v1,%%v1,%%v4,%%v5 \n\t"
"vag %%v3,%%v3,%%v2 \n\t"
"agfi %%r1, 256 \n\t"
@ -151,8 +151,8 @@ static BLASLONG idmin_kernel_32(BLASLONG n, FLOAT *x, FLOAT *min)
"wfchdb %%v4,%%v0,%%v2 \n\t"
"vsel %%v1,%%v3,%%v1,%%v4 \n\t"
"vsel %%v0,%%v2,%%v0,%%v4 \n\t"
"vlgvg %0,%%v1,0 \n\t"
"std %%f0,%1 \n\t"
"vlgvg %0,%%v1,0 \n\t"
"2: \n\t"
"nop "
:"=r"(imin),"=m"(*min)
@ -197,12 +197,11 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) {
} else {
min = 0;
minf = x[0];
i += inc_x;
j++;
BLASLONG n1 = (n - 1) & -4;
while ((j - 1) < n1) {
BLASLONG n1 = n & -4;
while (j < n1) {
if (x[i] < minf) {
min = j;

View File

@ -81,7 +81,7 @@ static BLASLONG isamax_kernel_64(BLASLONG n, FLOAT *x, FLOAT *amax)
"vleif %%v31,31,3 \n\t"
"srlg %%r0,%2,6 \n\t"
"xgr %%r1,%%r1 \n\t"
"0: \n\t"
"0: \n\t"
"pfd 1, 1024(%%r1,%3) \n\t"
"vl %%v16,0(%%r1,%3) \n\t"
@ -101,10 +101,10 @@ static BLASLONG isamax_kernel_64(BLASLONG n, FLOAT *x, FLOAT *amax)
"vflpsb %%v22, %%v22 \n\t"
"vflpsb %%v23, %%v23 \n\t"
"vfchsb %%v5,%%v16,%%v17 \n\t"
"vfchsb %%v6,%%v18,%%v19 \n\t"
"vfchsb %%v7,%%v20,%%v21 \n\t"
"vfchsb %%v8,%%v22,%%v23 \n\t"
"vfchesb %%v5,%%v16,%%v17 \n\t"
"vfchesb %%v6,%%v18,%%v19 \n\t"
"vfchesb %%v7,%%v20,%%v21 \n\t"
"vfchesb %%v8,%%v22,%%v23 \n\t"
"vsel %%v16,%%v16,%%v17,%%v5 \n\t"
"vsel %%v5,%%v24,%%v25,%%v5 \n\t"
"vsel %%v17,%%v18,%%v19,%%v6 \n\t"
@ -114,14 +114,14 @@ static BLASLONG isamax_kernel_64(BLASLONG n, FLOAT *x, FLOAT *amax)
"vsel %%v19,%%v22,%%v23,%%v8 \n\t"
"vsel %%v8,%%v30,%%v31,%%v8 \n\t"
"vfchsb %%v20,%%v16,%%v17 \n\t"
"vfchsb %%v21,%%v18,%%v19 \n\t"
"vfchesb %%v20,%%v16,%%v17 \n\t"
"vfchesb %%v21,%%v18,%%v19 \n\t"
"vsel %%v16,%%v16,%%v17,%%v20 \n\t"
"vsel %%v5,%%v5,%%v6,%%v20 \n\t"
"vsel %%v17,%%v18,%%v19,%%v21 \n\t"
"vsel %%v6,%%v7,%%v8,%%v21 \n\t"
"vfchsb %%v18,%%v16,%%v17 \n\t"
"vfchesb %%v18,%%v16,%%v17 \n\t"
"vsel %%v16,%%v16,%%v17,%%v18 \n\t"
"vsel %%v5,%%v5,%%v6,%%v18 \n\t"
"vsegf %%v6,%%v5 \n\t"
@ -129,13 +129,13 @@ static BLASLONG isamax_kernel_64(BLASLONG n, FLOAT *x, FLOAT *amax)
"vag %%v5,%%v5,%%v4 \n\t"
"vag %%v6,%%v6,%%v4 \n\t"
"vfchsb %%v7,%%v16,%%v0 \n\t"
"vsel %%v0,%%v16,%%v0,%%v7 \n\t"
"vfchesb %%v7,%%v0,%%v16 \n\t"
"vsel %%v0,%%v0,%%v16,%%v7 \n\t"
"vsegf %%v8,%%v7 \n\t"
"vesrlg %%v7,%%v7,32 \n\t"
"vsegf %%v7,%%v7 \n\t"
"vsel %%v1,%%v5,%%v1,%%v7 \n\t"
"vsel %%v2,%%v6,%%v2,%%v8 \n\t"
"vsel %%v1,%%v1,%%v5,%%v7 \n\t"
"vsel %%v2,%%v2,%%v6,%%v8 \n\t"
"vag %%v4,%%v4,%%v3 \n\t"
"vl %%v16,128(%%r1,%3) \n\t"
@ -155,10 +155,10 @@ static BLASLONG isamax_kernel_64(BLASLONG n, FLOAT *x, FLOAT *amax)
"vflpsb %%v22, %%v22 \n\t"
"vflpsb %%v23, %%v23 \n\t"
"vfchsb %%v5,%%v16,%%v17 \n\t"
"vfchsb %%v6,%%v18,%%v19 \n\t"
"vfchsb %%v7,%%v20,%%v21 \n\t"
"vfchsb %%v8,%%v22,%%v23 \n\t"
"vfchesb %%v5,%%v16,%%v17 \n\t"
"vfchesb %%v6,%%v18,%%v19 \n\t"
"vfchesb %%v7,%%v20,%%v21 \n\t"
"vfchesb %%v8,%%v22,%%v23 \n\t"
"vsel %%v16,%%v16,%%v17,%%v5 \n\t"
"vsel %%v5,%%v24,%%v25,%%v5 \n\t"
"vsel %%v17,%%v18,%%v19,%%v6 \n\t"
@ -168,14 +168,14 @@ static BLASLONG isamax_kernel_64(BLASLONG n, FLOAT *x, FLOAT *amax)
"vsel %%v19,%%v22,%%v23,%%v8 \n\t"
"vsel %%v8,%%v30,%%v31,%%v8 \n\t"
"vfchsb %%v20,%%v16,%%v17 \n\t"
"vfchsb %%v21,%%v18,%%v19 \n\t"
"vfchesb %%v20,%%v16,%%v17 \n\t"
"vfchesb %%v21,%%v18,%%v19 \n\t"
"vsel %%v16,%%v16,%%v17,%%v20 \n\t"
"vsel %%v5,%%v5,%%v6,%%v20 \n\t"
"vsel %%v17,%%v18,%%v19,%%v21 \n\t"
"vsel %%v6,%%v7,%%v8,%%v21 \n\t"
"vfchsb %%v18,%%v16,%%v17 \n\t"
"vfchesb %%v18,%%v16,%%v17 \n\t"
"vsel %%v16,%%v16,%%v17,%%v18 \n\t"
"vsel %%v5,%%v5,%%v6,%%v18 \n\t"
"vsegf %%v6,%%v5 \n\t"
@ -183,13 +183,13 @@ static BLASLONG isamax_kernel_64(BLASLONG n, FLOAT *x, FLOAT *amax)
"vag %%v5,%%v5,%%v4 \n\t"
"vag %%v6,%%v6,%%v4 \n\t"
"vfchsb %%v7,%%v16,%%v0 \n\t"
"vsel %%v0,%%v16,%%v0,%%v7 \n\t"
"vfchesb %%v7,%%v0,%%v16 \n\t"
"vsel %%v0,%%v0,%%v16,%%v7 \n\t"
"vsegf %%v8,%%v7 \n\t"
"vesrlg %%v7,%%v7,32 \n\t"
"vsegf %%v7,%%v7 \n\t"
"vsel %%v1,%%v5,%%v1,%%v7 \n\t"
"vsel %%v2,%%v6,%%v2,%%v8 \n\t"
"vsel %%v1,%%v1,%%v5,%%v7 \n\t"
"vsel %%v2,%%v2,%%v6,%%v8 \n\t"
"vag %%v4,%%v4,%%v3 \n\t"
"agfi %%r1, 256 \n\t"
@ -218,8 +218,8 @@ static BLASLONG isamax_kernel_64(BLASLONG n, FLOAT *x, FLOAT *amax)
"wfchsb %%v4,%%v2,%%v0 \n\t"
"vsel %%v1,%%v3,%%v1,%%v4 \n\t"
"vsel %%v0,%%v2,%%v0,%%v4 \n\t"
"vlgvg %0,%%v1,0 \n\t"
"ste %%f0,%1 \n\t"
"vlgvg %0,%%v1,0 \n\t"
"2: \n\t"
"nop "
:"=r"(iamax),"=m"(*amax)
@ -264,12 +264,11 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) {
} else {
max = 0;
maxf = ABS(x[0]);
i += inc_x;
j++;
BLASLONG n1 = (n - 1) & -4;
while ((j - 1) < n1) {
BLASLONG n1 = n & -4;
while (j < n1) {
if (ABS(x[i]) > maxf) {
max = j;

View File

@ -81,7 +81,7 @@ static BLASLONG isamin_kernel_64(BLASLONG n, FLOAT *x, FLOAT *amin)
"vleif %%v31,31,3 \n\t"
"srlg %%r0,%2,6 \n\t"
"xgr %%r1,%%r1 \n\t"
"0: \n\t"
"0: \n\t"
"pfd 1, 1024(%%r1,%3) \n\t"
"vl %%v16,0(%%r1,%3) \n\t"
@ -101,10 +101,10 @@ static BLASLONG isamin_kernel_64(BLASLONG n, FLOAT *x, FLOAT *amin)
"vflpsb %%v22, %%v22 \n\t"
"vflpsb %%v23, %%v23 \n\t"
"vfchsb %%v5,%%v17,%%v16 \n\t"
"vfchsb %%v6,%%v19,%%v18 \n\t"
"vfchsb %%v7,%%v21,%%v20 \n\t"
"vfchsb %%v8,%%v23,%%v22 \n\t"
"vfchesb %%v5,%%v17,%%v16 \n\t"
"vfchesb %%v6,%%v19,%%v18 \n\t"
"vfchesb %%v7,%%v21,%%v20 \n\t"
"vfchesb %%v8,%%v23,%%v22 \n\t"
"vsel %%v16,%%v16,%%v17,%%v5 \n\t"
"vsel %%v5,%%v24,%%v25,%%v5 \n\t"
"vsel %%v17,%%v18,%%v19,%%v6 \n\t"
@ -114,14 +114,14 @@ static BLASLONG isamin_kernel_64(BLASLONG n, FLOAT *x, FLOAT *amin)
"vsel %%v19,%%v22,%%v23,%%v8 \n\t"
"vsel %%v8,%%v30,%%v31,%%v8 \n\t"
"vfchsb %%v20,%%v17,%%v16 \n\t"
"vfchsb %%v21,%%v19,%%v18 \n\t"
"vfchesb %%v20,%%v17,%%v16 \n\t"
"vfchesb %%v21,%%v19,%%v18 \n\t"
"vsel %%v16,%%v16,%%v17,%%v20 \n\t"
"vsel %%v5,%%v5,%%v6,%%v20 \n\t"
"vsel %%v17,%%v18,%%v19,%%v21 \n\t"
"vsel %%v6,%%v7,%%v8,%%v21 \n\t"
"vfchsb %%v18,%%v17,%%v16 \n\t"
"vfchesb %%v18,%%v17,%%v16 \n\t"
"vsel %%v16,%%v16,%%v17,%%v18 \n\t"
"vsel %%v5,%%v5,%%v6,%%v18 \n\t"
"vsegf %%v6,%%v5 \n\t"
@ -129,13 +129,13 @@ static BLASLONG isamin_kernel_64(BLASLONG n, FLOAT *x, FLOAT *amin)
"vag %%v5,%%v5,%%v4 \n\t"
"vag %%v6,%%v6,%%v4 \n\t"
"vfchsb %%v7,%%v0,%%v16 \n\t"
"vsel %%v0,%%v16,%%v0,%%v7 \n\t"
"vfchesb %%v7,%%v16,%%v0 \n\t"
"vsel %%v0,%%v0,%%v16,%%v7 \n\t"
"vsegf %%v8,%%v7 \n\t"
"vesrlg %%v7,%%v7,32 \n\t"
"vsegf %%v7,%%v7 \n\t"
"vsel %%v1,%%v5,%%v1,%%v7 \n\t"
"vsel %%v2,%%v6,%%v2,%%v8 \n\t"
"vsel %%v1,%%v1,%%v5,%%v7 \n\t"
"vsel %%v2,%%v2,%%v6,%%v8 \n\t"
"vag %%v4,%%v4,%%v3 \n\t"
"vl %%v16,128(%%r1,%3) \n\t"
@ -155,10 +155,10 @@ static BLASLONG isamin_kernel_64(BLASLONG n, FLOAT *x, FLOAT *amin)
"vflpsb %%v22, %%v22 \n\t"
"vflpsb %%v23, %%v23 \n\t"
"vfchsb %%v5,%%v17,%%v16 \n\t"
"vfchsb %%v6,%%v19,%%v18 \n\t"
"vfchsb %%v7,%%v21,%%v20 \n\t"
"vfchsb %%v8,%%v23,%%v22 \n\t"
"vfchesb %%v5,%%v17,%%v16 \n\t"
"vfchesb %%v6,%%v19,%%v18 \n\t"
"vfchesb %%v7,%%v21,%%v20 \n\t"
"vfchesb %%v8,%%v23,%%v22 \n\t"
"vsel %%v16,%%v16,%%v17,%%v5 \n\t"
"vsel %%v5,%%v24,%%v25,%%v5 \n\t"
"vsel %%v17,%%v18,%%v19,%%v6 \n\t"
@ -168,14 +168,14 @@ static BLASLONG isamin_kernel_64(BLASLONG n, FLOAT *x, FLOAT *amin)
"vsel %%v19,%%v22,%%v23,%%v8 \n\t"
"vsel %%v8,%%v30,%%v31,%%v8 \n\t"
"vfchsb %%v20,%%v17,%%v16 \n\t"
"vfchsb %%v21,%%v19,%%v18 \n\t"
"vfchesb %%v20,%%v17,%%v16 \n\t"
"vfchesb %%v21,%%v19,%%v18 \n\t"
"vsel %%v16,%%v16,%%v17,%%v20 \n\t"
"vsel %%v5,%%v5,%%v6,%%v20 \n\t"
"vsel %%v17,%%v18,%%v19,%%v21 \n\t"
"vsel %%v6,%%v7,%%v8,%%v21 \n\t"
"vfchsb %%v18,%%v17,%%v16 \n\t"
"vfchesb %%v18,%%v17,%%v16 \n\t"
"vsel %%v16,%%v16,%%v17,%%v18 \n\t"
"vsel %%v5,%%v5,%%v6,%%v18 \n\t"
"vsegf %%v6,%%v5 \n\t"
@ -183,13 +183,13 @@ static BLASLONG isamin_kernel_64(BLASLONG n, FLOAT *x, FLOAT *amin)
"vag %%v5,%%v5,%%v4 \n\t"
"vag %%v6,%%v6,%%v4 \n\t"
"vfchsb %%v7,%%v0,%%v16 \n\t"
"vsel %%v0,%%v16,%%v0,%%v7 \n\t"
"vfchesb %%v7,%%v16,%%v0 \n\t"
"vsel %%v0,%%v0,%%v16,%%v7 \n\t"
"vsegf %%v8,%%v7 \n\t"
"vesrlg %%v7,%%v7,32 \n\t"
"vsegf %%v7,%%v7 \n\t"
"vsel %%v1,%%v5,%%v1,%%v7 \n\t"
"vsel %%v2,%%v6,%%v2,%%v8 \n\t"
"vsel %%v1,%%v1,%%v5,%%v7 \n\t"
"vsel %%v2,%%v2,%%v6,%%v8 \n\t"
"vag %%v4,%%v4,%%v3 \n\t"
"agfi %%r1, 256 \n\t"
@ -218,8 +218,8 @@ static BLASLONG isamin_kernel_64(BLASLONG n, FLOAT *x, FLOAT *amin)
"wfchsb %%v4,%%v0,%%v2 \n\t"
"vsel %%v1,%%v3,%%v1,%%v4 \n\t"
"vsel %%v0,%%v2,%%v0,%%v4 \n\t"
"vlgvg %0,%%v1,0 \n\t"
"ste %%f0,%1 \n\t"
"vlgvg %0,%%v1,0 \n\t"
"2: \n\t"
"nop "
:"=r"(iamin),"=m"(*amin)
@ -264,12 +264,11 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) {
} else {
min = 0;
minf = ABS(x[0]);
i += inc_x;
j++;
BLASLONG n1 = (n - 1) & -4;
while ((j - 1) < n1) {
BLASLONG n1 = n & -4;
while (j < n1) {
if (ABS(x[i]) < minf) {
min = j;

View File

@ -73,7 +73,7 @@ static BLASLONG ismax_kernel_64(BLASLONG n, FLOAT *x, FLOAT *max)
"vleif %%v31,31,3 \n\t"
"srlg %%r0,%2,6 \n\t"
"xgr %%r1,%%r1 \n\t"
"0: \n\t"
"0: \n\t"
"pfd 1, 1024(%%r1,%3) \n\t"
"vl %%v16,0(%%r1,%3) \n\t"
@ -85,10 +85,10 @@ static BLASLONG ismax_kernel_64(BLASLONG n, FLOAT *x, FLOAT *max)
"vl %%v22,96(%%r1,%3) \n\t"
"vl %%v23,112(%%r1,%3) \n\t"
"vfchsb %%v5,%%v16,%%v17 \n\t"
"vfchsb %%v6,%%v18,%%v19 \n\t"
"vfchsb %%v7,%%v20,%%v21 \n\t"
"vfchsb %%v8,%%v22,%%v23 \n\t"
"vfchesb %%v5,%%v16,%%v17 \n\t"
"vfchesb %%v6,%%v18,%%v19 \n\t"
"vfchesb %%v7,%%v20,%%v21 \n\t"
"vfchesb %%v8,%%v22,%%v23 \n\t"
"vsel %%v16,%%v16,%%v17,%%v5 \n\t"
"vsel %%v5,%%v24,%%v25,%%v5 \n\t"
"vsel %%v17,%%v18,%%v19,%%v6 \n\t"
@ -98,14 +98,14 @@ static BLASLONG ismax_kernel_64(BLASLONG n, FLOAT *x, FLOAT *max)
"vsel %%v19,%%v22,%%v23,%%v8 \n\t"
"vsel %%v8,%%v30,%%v31,%%v8 \n\t"
"vfchsb %%v20,%%v16,%%v17 \n\t"
"vfchsb %%v21,%%v18,%%v19 \n\t"
"vfchesb %%v20,%%v16,%%v17 \n\t"
"vfchesb %%v21,%%v18,%%v19 \n\t"
"vsel %%v16,%%v16,%%v17,%%v20 \n\t"
"vsel %%v5,%%v5,%%v6,%%v20 \n\t"
"vsel %%v17,%%v18,%%v19,%%v21 \n\t"
"vsel %%v6,%%v7,%%v8,%%v21 \n\t"
"vfchsb %%v18,%%v16,%%v17 \n\t"
"vfchesb %%v18,%%v16,%%v17 \n\t"
"vsel %%v16,%%v16,%%v17,%%v18 \n\t"
"vsel %%v5,%%v5,%%v6,%%v18 \n\t"
"vsegf %%v6,%%v5 \n\t"
@ -113,13 +113,13 @@ static BLASLONG ismax_kernel_64(BLASLONG n, FLOAT *x, FLOAT *max)
"vag %%v5,%%v5,%%v4 \n\t"
"vag %%v6,%%v6,%%v4 \n\t"
"vfchsb %%v7,%%v16,%%v0 \n\t"
"vsel %%v0,%%v16,%%v0,%%v7 \n\t"
"vfchesb %%v7,%%v0,%%v16 \n\t"
"vsel %%v0,%%v0,%%v16,%%v7 \n\t"
"vsegf %%v8,%%v7 \n\t"
"vesrlg %%v7,%%v7,32 \n\t"
"vsegf %%v7,%%v7 \n\t"
"vsel %%v1,%%v5,%%v1,%%v7 \n\t"
"vsel %%v2,%%v6,%%v2,%%v8 \n\t"
"vsel %%v1,%%v1,%%v5,%%v7 \n\t"
"vsel %%v2,%%v2,%%v6,%%v8 \n\t"
"vag %%v4,%%v4,%%v3 \n\t"
"vl %%v16,128(%%r1,%3) \n\t"
@ -131,10 +131,10 @@ static BLASLONG ismax_kernel_64(BLASLONG n, FLOAT *x, FLOAT *max)
"vl %%v22,224(%%r1,%3) \n\t"
"vl %%v23,240(%%r1,%3) \n\t"
"vfchsb %%v5,%%v16,%%v17 \n\t"
"vfchsb %%v6,%%v18,%%v19 \n\t"
"vfchsb %%v7,%%v20,%%v21 \n\t"
"vfchsb %%v8,%%v22,%%v23 \n\t"
"vfchesb %%v5,%%v16,%%v17 \n\t"
"vfchesb %%v6,%%v18,%%v19 \n\t"
"vfchesb %%v7,%%v20,%%v21 \n\t"
"vfchesb %%v8,%%v22,%%v23 \n\t"
"vsel %%v16,%%v16,%%v17,%%v5 \n\t"
"vsel %%v5,%%v24,%%v25,%%v5 \n\t"
"vsel %%v17,%%v18,%%v19,%%v6 \n\t"
@ -144,14 +144,14 @@ static BLASLONG ismax_kernel_64(BLASLONG n, FLOAT *x, FLOAT *max)
"vsel %%v19,%%v22,%%v23,%%v8 \n\t"
"vsel %%v8,%%v30,%%v31,%%v8 \n\t"
"vfchsb %%v20,%%v16,%%v17 \n\t"
"vfchsb %%v21,%%v18,%%v19 \n\t"
"vfchesb %%v20,%%v16,%%v17 \n\t"
"vfchesb %%v21,%%v18,%%v19 \n\t"
"vsel %%v16,%%v16,%%v17,%%v20 \n\t"
"vsel %%v5,%%v5,%%v6,%%v20 \n\t"
"vsel %%v17,%%v18,%%v19,%%v21 \n\t"
"vsel %%v6,%%v7,%%v8,%%v21 \n\t"
"vfchsb %%v18,%%v16,%%v17 \n\t"
"vfchesb %%v18,%%v16,%%v17 \n\t"
"vsel %%v16,%%v16,%%v17,%%v18 \n\t"
"vsel %%v5,%%v5,%%v6,%%v18 \n\t"
"vsegf %%v6,%%v5 \n\t"
@ -159,13 +159,13 @@ static BLASLONG ismax_kernel_64(BLASLONG n, FLOAT *x, FLOAT *max)
"vag %%v5,%%v5,%%v4 \n\t"
"vag %%v6,%%v6,%%v4 \n\t"
"vfchsb %%v7,%%v16,%%v0 \n\t"
"vsel %%v0,%%v16,%%v0,%%v7 \n\t"
"vfchesb %%v7,%%v0,%%v16 \n\t"
"vsel %%v0,%%v0,%%v16,%%v7 \n\t"
"vsegf %%v8,%%v7 \n\t"
"vesrlg %%v7,%%v7,32 \n\t"
"vsegf %%v7,%%v7 \n\t"
"vsel %%v1,%%v5,%%v1,%%v7 \n\t"
"vsel %%v2,%%v6,%%v2,%%v8 \n\t"
"vsel %%v1,%%v1,%%v5,%%v7 \n\t"
"vsel %%v2,%%v2,%%v6,%%v8 \n\t"
"vag %%v4,%%v4,%%v3 \n\t"
"agfi %%r1, 256 \n\t"
@ -194,8 +194,8 @@ static BLASLONG ismax_kernel_64(BLASLONG n, FLOAT *x, FLOAT *max)
"wfchsb %%v4,%%v2,%%v0 \n\t"
"vsel %%v1,%%v3,%%v1,%%v4 \n\t"
"vsel %%v0,%%v2,%%v0,%%v4 \n\t"
"vlgvg %0,%%v1,0 \n\t"
"ste %%f0,%1 \n\t"
"vlgvg %0,%%v1,0 \n\t"
"2: \n\t"
"nop "
:"=r"(imax),"=m"(*max)
@ -240,12 +240,11 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) {
} else {
max = 0;
maxf = x[0];
i += inc_x;
j++;
BLASLONG n1 = (n - 1) & -4;
while ((j - 1) < n1) {
BLASLONG n1 = n & -4;
while (j < n1) {
if (x[i] > maxf) {
max = j;

View File

@ -73,7 +73,7 @@ static BLASLONG ismin_kernel_64(BLASLONG n, FLOAT *x, FLOAT *min)
"vleif %%v31,31,3 \n\t"
"srlg %%r0,%2,6 \n\t"
"xgr %%r1,%%r1 \n\t"
"0: \n\t"
"0: \n\t"
"pfd 1, 1024(%%r1,%3) \n\t"
"vl %%v16,0(%%r1,%3) \n\t"
@ -85,10 +85,10 @@ static BLASLONG ismin_kernel_64(BLASLONG n, FLOAT *x, FLOAT *min)
"vl %%v22,96(%%r1,%3) \n\t"
"vl %%v23,112(%%r1,%3) \n\t"
"vfchsb %%v5,%%v17,%%v16 \n\t"
"vfchsb %%v6,%%v19,%%v18 \n\t"
"vfchsb %%v7,%%v21,%%v20 \n\t"
"vfchsb %%v8,%%v23,%%v22 \n\t"
"vfchesb %%v5,%%v17,%%v16 \n\t"
"vfchesb %%v6,%%v19,%%v18 \n\t"
"vfchesb %%v7,%%v21,%%v20 \n\t"
"vfchesb %%v8,%%v23,%%v22 \n\t"
"vsel %%v16,%%v16,%%v17,%%v5 \n\t"
"vsel %%v5,%%v24,%%v25,%%v5 \n\t"
"vsel %%v17,%%v18,%%v19,%%v6 \n\t"
@ -98,14 +98,14 @@ static BLASLONG ismin_kernel_64(BLASLONG n, FLOAT *x, FLOAT *min)
"vsel %%v19,%%v22,%%v23,%%v8 \n\t"
"vsel %%v8,%%v30,%%v31,%%v8 \n\t"
"vfchsb %%v20,%%v17,%%v16 \n\t"
"vfchsb %%v21,%%v19,%%v18 \n\t"
"vfchesb %%v20,%%v17,%%v16 \n\t"
"vfchesb %%v21,%%v19,%%v18 \n\t"
"vsel %%v16,%%v16,%%v17,%%v20 \n\t"
"vsel %%v5,%%v5,%%v6,%%v20 \n\t"
"vsel %%v17,%%v18,%%v19,%%v21 \n\t"
"vsel %%v6,%%v7,%%v8,%%v21 \n\t"
"vfchsb %%v18,%%v17,%%v16 \n\t"
"vfchesb %%v18,%%v17,%%v16 \n\t"
"vsel %%v16,%%v16,%%v17,%%v18 \n\t"
"vsel %%v5,%%v5,%%v6,%%v18 \n\t"
"vsegf %%v6,%%v5 \n\t"
@ -113,13 +113,13 @@ static BLASLONG ismin_kernel_64(BLASLONG n, FLOAT *x, FLOAT *min)
"vag %%v5,%%v5,%%v4 \n\t"
"vag %%v6,%%v6,%%v4 \n\t"
"vfchsb %%v7,%%v0,%%v16 \n\t"
"vsel %%v0,%%v16,%%v0,%%v7 \n\t"
"vfchesb %%v7,%%v16,%%v0 \n\t"
"vsel %%v0,%%v0,%%v16,%%v7 \n\t"
"vsegf %%v8,%%v7 \n\t"
"vesrlg %%v7,%%v7,32 \n\t"
"vsegf %%v7,%%v7 \n\t"
"vsel %%v1,%%v5,%%v1,%%v7 \n\t"
"vsel %%v2,%%v6,%%v2,%%v8 \n\t"
"vsel %%v1,%%v1,%%v5,%%v7 \n\t"
"vsel %%v2,%%v2,%%v6,%%v8 \n\t"
"vag %%v4,%%v4,%%v3 \n\t"
"vl %%v16,128(%%r1,%3) \n\t"
@ -131,10 +131,10 @@ static BLASLONG ismin_kernel_64(BLASLONG n, FLOAT *x, FLOAT *min)
"vl %%v22,224(%%r1,%3) \n\t"
"vl %%v23,240(%%r1,%3) \n\t"
"vfchsb %%v5,%%v17,%%v16 \n\t"
"vfchsb %%v6,%%v19,%%v18 \n\t"
"vfchsb %%v7,%%v21,%%v20 \n\t"
"vfchsb %%v8,%%v23,%%v22 \n\t"
"vfchesb %%v5,%%v17,%%v16 \n\t"
"vfchesb %%v6,%%v19,%%v18 \n\t"
"vfchesb %%v7,%%v21,%%v20 \n\t"
"vfchesb %%v8,%%v23,%%v22 \n\t"
"vsel %%v16,%%v16,%%v17,%%v5 \n\t"
"vsel %%v5,%%v24,%%v25,%%v5 \n\t"
"vsel %%v17,%%v18,%%v19,%%v6 \n\t"
@ -144,14 +144,14 @@ static BLASLONG ismin_kernel_64(BLASLONG n, FLOAT *x, FLOAT *min)
"vsel %%v19,%%v22,%%v23,%%v8 \n\t"
"vsel %%v8,%%v30,%%v31,%%v8 \n\t"
"vfchsb %%v20,%%v17,%%v16 \n\t"
"vfchsb %%v21,%%v19,%%v18 \n\t"
"vfchesb %%v20,%%v17,%%v16 \n\t"
"vfchesb %%v21,%%v19,%%v18 \n\t"
"vsel %%v16,%%v16,%%v17,%%v20 \n\t"
"vsel %%v5,%%v5,%%v6,%%v20 \n\t"
"vsel %%v17,%%v18,%%v19,%%v21 \n\t"
"vsel %%v6,%%v7,%%v8,%%v21 \n\t"
"vfchsb %%v18,%%v17,%%v16 \n\t"
"vfchesb %%v18,%%v17,%%v16 \n\t"
"vsel %%v16,%%v16,%%v17,%%v18 \n\t"
"vsel %%v5,%%v5,%%v6,%%v18 \n\t"
"vsegf %%v6,%%v5 \n\t"
@ -159,13 +159,13 @@ static BLASLONG ismin_kernel_64(BLASLONG n, FLOAT *x, FLOAT *min)
"vag %%v5,%%v5,%%v4 \n\t"
"vag %%v6,%%v6,%%v4 \n\t"
"vfchsb %%v7,%%v0,%%v16 \n\t"
"vsel %%v0,%%v16,%%v0,%%v7 \n\t"
"vfchesb %%v7,%%v16,%%v0 \n\t"
"vsel %%v0,%%v0,%%v16,%%v7 \n\t"
"vsegf %%v8,%%v7 \n\t"
"vesrlg %%v7,%%v7,32 \n\t"
"vsegf %%v7,%%v7 \n\t"
"vsel %%v1,%%v5,%%v1,%%v7 \n\t"
"vsel %%v2,%%v6,%%v2,%%v8 \n\t"
"vsel %%v1,%%v1,%%v5,%%v7 \n\t"
"vsel %%v2,%%v2,%%v6,%%v8 \n\t"
"vag %%v4,%%v4,%%v3 \n\t"
"agfi %%r1, 256 \n\t"
@ -194,8 +194,8 @@ static BLASLONG ismin_kernel_64(BLASLONG n, FLOAT *x, FLOAT *min)
"wfchsb %%v4,%%v0,%%v2 \n\t"
"vsel %%v1,%%v3,%%v1,%%v4 \n\t"
"vsel %%v0,%%v2,%%v0,%%v4 \n\t"
"vlgvg %0,%%v1,0 \n\t"
"ste %%f0,%1 \n\t"
"vlgvg %0,%%v1,0 \n\t"
"2: \n\t"
"nop "
:"=r"(imin),"=m"(*min)
@ -240,12 +240,11 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) {
} else {
min = 0;
minf = x[0];
i += inc_x;
j++;
BLASLONG n1 = (n - 1) & -4;
while ((j - 1) < n1) {
BLASLONG n1 = n & -4;
while (j < n1) {
if (x[i] < minf) {
min = j;

View File

@ -93,21 +93,21 @@ static BLASLONG izamax_kernel_16(BLASLONG n, FLOAT *x, FLOAT *amax)
"vfadb %%v18,%%v20,%%v21 \n\t"
"vfadb %%v19,%%v22,%%v23 \n\t"
"vfchdb %%v4,%%v16,%%v17 \n\t"
"vfchdb %%v5,%%v18,%%v19 \n\t"
"vfchedb %%v4,%%v16,%%v17 \n\t"
"vfchedb %%v5,%%v18,%%v19 \n\t"
"vsel %%v16,%%v16,%%v17,%%v4 \n\t"
"vsel %%v4,%%v24,%%v25,%%v4 \n\t"
"vsel %%v17,%%v18,%%v19,%%v5 \n\t"
"vsel %%v5,%%v26,%%v27,%%v5 \n\t"
"vfchdb %%v18,%%v16,%%v17 \n\t"
"vfchedb %%v18,%%v16,%%v17 \n\t"
"vsel %%v16,%%v16,%%v17,%%v18 \n\t"
"vsel %%v4,%%v4,%%v5,%%v18 \n\t"
"vag %%v4,%%v4,%%v3 \n\t"
"vfchdb %%v5,%%v16,%%v0 \n\t"
"vsel %%v0,%%v16,%%v0,%%v5 \n\t"
"vsel %%v1,%%v4,%%v1,%%v5 \n\t"
"vfchedb %%v5,%%v0,%%v16 \n\t"
"vsel %%v0,%%v0,%%v16,%%v5 \n\t"
"vsel %%v1,%%v1,%%v4,%%v5 \n\t"
"vag %%v3,%%v3,%%v2 \n\t"
"vleg %%v16,128(%%r1,%3),0 \n\t"
@ -139,21 +139,21 @@ static BLASLONG izamax_kernel_16(BLASLONG n, FLOAT *x, FLOAT *amax)
"vfadb %%v18,%%v20,%%v21 \n\t"
"vfadb %%v19,%%v22,%%v23 \n\t"
"vfchdb %%v4,%%v16,%%v17 \n\t"
"vfchdb %%v5,%%v18,%%v19 \n\t"
"vfchedb %%v4,%%v16,%%v17 \n\t"
"vfchedb %%v5,%%v18,%%v19 \n\t"
"vsel %%v16,%%v16,%%v17,%%v4 \n\t"
"vsel %%v4,%%v24,%%v25,%%v4 \n\t"
"vsel %%v17,%%v18,%%v19,%%v5 \n\t"
"vsel %%v5,%%v26,%%v27,%%v5 \n\t"
"vfchdb %%v18,%%v16,%%v17 \n\t"
"vfchedb %%v18,%%v16,%%v17 \n\t"
"vsel %%v16,%%v16,%%v17,%%v18 \n\t"
"vsel %%v4,%%v4,%%v5,%%v18 \n\t"
"vag %%v4,%%v4,%%v3 \n\t"
"vfchdb %%v5,%%v16,%%v0 \n\t"
"vsel %%v0,%%v16,%%v0,%%v5 \n\t"
"vsel %%v1,%%v4,%%v1,%%v5 \n\t"
"vfchedb %%v5,%%v0,%%v16 \n\t"
"vsel %%v0,%%v0,%%v16,%%v5 \n\t"
"vsel %%v1,%%v1,%%v4,%%v5 \n\t"
"vag %%v3,%%v3,%%v2 \n\t"
"agfi %%r1, 256 \n\t"
@ -171,8 +171,8 @@ static BLASLONG izamax_kernel_16(BLASLONG n, FLOAT *x, FLOAT *amax)
"wfchdb %%v4,%%v2,%%v0 \n\t"
"vsel %%v1,%%v3,%%v1,%%v4 \n\t"
"vsel %%v0,%%v2,%%v0,%%v4 \n\t"
"vlgvg %0,%%v1,0 \n\t"
"std %%f0,%1 \n\t"
"vlgvg %0,%%v1,0 \n\t"
"2: \n\t"
"nop "
:"=r"(iamax),"=m"(*amax)
@ -223,6 +223,7 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
} else {
max = 0;
maxf = CABS1(x,0);
inc_x2 = 2 * inc_x;
ix += inc_x2;

View File

@ -93,21 +93,21 @@ static BLASLONG izamin_kernel_16(BLASLONG n, FLOAT *x, FLOAT *amin)
"vfadb %%v18,%%v20,%%v21 \n\t"
"vfadb %%v19,%%v22,%%v23 \n\t"
"vfchdb %%v4,%%v17,%%v16 \n\t"
"vfchdb %%v5,%%v19,%%v18 \n\t"
"vfchedb %%v4,%%v17,%%v16 \n\t"
"vfchedb %%v5,%%v19,%%v18 \n\t"
"vsel %%v16,%%v16,%%v17,%%v4 \n\t"
"vsel %%v4,%%v24,%%v25,%%v4 \n\t"
"vsel %%v17,%%v18,%%v19,%%v5 \n\t"
"vsel %%v5,%%v26,%%v27,%%v5 \n\t"
"vfchdb %%v18,%%v17,%%v16 \n\t"
"vfchedb %%v18,%%v17,%%v16 \n\t"
"vsel %%v16,%%v16,%%v17,%%v18 \n\t"
"vsel %%v4,%%v4,%%v5,%%v18 \n\t"
"vag %%v4,%%v4,%%v3 \n\t"
"vfchdb %%v5,%%v0,%%v16 \n\t"
"vsel %%v0,%%v16,%%v0,%%v5 \n\t"
"vsel %%v1,%%v4,%%v1,%%v5 \n\t"
"vfchedb %%v5,%%v16,%%v0 \n\t"
"vsel %%v0,%%v0,%%v16,%%v5 \n\t"
"vsel %%v1,%%v1,%%v4,%%v5 \n\t"
"vag %%v3,%%v3,%%v2 \n\t"
"vleg %%v16,128(%%r1,%3),0 \n\t"
@ -139,21 +139,21 @@ static BLASLONG izamin_kernel_16(BLASLONG n, FLOAT *x, FLOAT *amin)
"vfadb %%v18,%%v20,%%v21 \n\t"
"vfadb %%v19,%%v22,%%v23 \n\t"
"vfchdb %%v4,%%v17,%%v16 \n\t"
"vfchdb %%v5,%%v19,%%v18 \n\t"
"vfchedb %%v4,%%v17,%%v16 \n\t"
"vfchedb %%v5,%%v19,%%v18 \n\t"
"vsel %%v16,%%v16,%%v17,%%v4 \n\t"
"vsel %%v4,%%v24,%%v25,%%v4 \n\t"
"vsel %%v17,%%v18,%%v19,%%v5 \n\t"
"vsel %%v5,%%v26,%%v27,%%v5 \n\t"
"vfchdb %%v18,%%v17,%%v16 \n\t"
"vfchedb %%v18,%%v17,%%v16 \n\t"
"vsel %%v16,%%v16,%%v17,%%v18 \n\t"
"vsel %%v4,%%v4,%%v5,%%v18 \n\t"
"vag %%v4,%%v4,%%v3 \n\t"
"vfchdb %%v5,%%v0,%%v16 \n\t"
"vsel %%v0,%%v16,%%v0,%%v5 \n\t"
"vsel %%v1,%%v4,%%v1,%%v5 \n\t"
"vfchedb %%v5,%%v16,%%v0 \n\t"
"vsel %%v0,%%v0,%%v16,%%v5 \n\t"
"vsel %%v1,%%v1,%%v4,%%v5 \n\t"
"vag %%v3,%%v3,%%v2 \n\t"
"agfi %%r1, 256 \n\t"
@ -171,8 +171,8 @@ static BLASLONG izamin_kernel_16(BLASLONG n, FLOAT *x, FLOAT *amin)
"wfchdb %%v4,%%v0,%%v2 \n\t"
"vsel %%v1,%%v3,%%v1,%%v4 \n\t"
"vsel %%v0,%%v2,%%v0,%%v4 \n\t"
"vlgvg %0,%%v1,0 \n\t"
"std %%f0,%1 \n\t"
"vlgvg %0,%%v1,0 \n\t"
"2: \n\t"
"nop "
:"=r"(iamin),"=m"(*amin)
@ -223,6 +223,7 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
} else {
min = 0;
minf = CABS1(x,0);
inc_x2 = 2 * inc_x;
ix += inc_x2;

View File

@ -40,8 +40,7 @@ static FLOAT samax_kernel_64(BLASLONG n, FLOAT *x)
__asm__ volatile (
"vl %%v0,0(%2) \n\t"
"vflpsb %%v0,%%v0 \n\t"
"srlg %%r0,%1,6 \n\t"
"srlg %%r0,%1,6 \n\t"
"xgr %%r1,%%r1 \n\t"
"0: \n\t"
"pfd 1, 1024(%%r1,%2) \n\t"
@ -54,83 +53,45 @@ static FLOAT samax_kernel_64(BLASLONG n, FLOAT *x)
"vl %%v21,80(%%r1,%2) \n\t"
"vl %%v22,96(%%r1,%2) \n\t"
"vl %%v23,112(%%r1,%2) \n\t"
"vflpsb %%v16, %%v16 \n\t"
"vflpsb %%v17, %%v17 \n\t"
"vflpsb %%v18, %%v18 \n\t"
"vflpsb %%v19, %%v19 \n\t"
"vflpsb %%v20, %%v20 \n\t"
"vflpsb %%v21, %%v21 \n\t"
"vflpsb %%v22, %%v22 \n\t"
"vflpsb %%v23, %%v23 \n\t"
"vl %%v24,128(%%r1,%2) \n\t"
"vl %%v25,144(%%r1,%2) \n\t"
"vl %%v26,160(%%r1,%2) \n\t"
"vl %%v27,176(%%r1,%2) \n\t"
"vl %%v28,192(%%r1,%2) \n\t"
"vl %%v29,208(%%r1,%2) \n\t"
"vl %%v30,224(%%r1,%2) \n\t"
"vl %%v31,240(%%r1,%2) \n\t"
"vfchsb %%v24,%%v16,%%v17 \n\t"
"vfchsb %%v25,%%v18,%%v19 \n\t"
"vfchsb %%v26,%%v20,%%v21 \n\t"
"vfchsb %%v27,%%v22,%%v23 \n\t"
"vsel %%v24,%%v16,%%v17,%%v24 \n\t"
"vsel %%v25,%%v18,%%v19,%%v25 \n\t"
"vsel %%v26,%%v20,%%v21,%%v26 \n\t"
"vsel %%v27,%%v22,%%v23,%%v27 \n\t"
"vfmaxsb %%v16,%%v16,%%v24,8 \n\t"
"vfmaxsb %%v17,%%v17,%%v25,8 \n\t"
"vfmaxsb %%v18,%%v18,%%v26,8 \n\t"
"vfmaxsb %%v19,%%v19,%%v27,8 \n\t"
"vfmaxsb %%v20,%%v20,%%v28,8 \n\t"
"vfmaxsb %%v21,%%v21,%%v29,8 \n\t"
"vfmaxsb %%v22,%%v22,%%v30,8 \n\t"
"vfmaxsb %%v23,%%v23,%%v31,8 \n\t"
"vfchsb %%v28,%%v24,%%v25 \n\t"
"vfchsb %%v29,%%v26,%%v27 \n\t"
"vsel %%v28,%%v24,%%v25,%%v28 \n\t"
"vsel %%v29,%%v26,%%v27,%%v29 \n\t"
"vfmaxsb %%v16,%%v16,%%v20,8 \n\t"
"vfmaxsb %%v17,%%v17,%%v21,8 \n\t"
"vfmaxsb %%v18,%%v18,%%v22,8 \n\t"
"vfmaxsb %%v19,%%v19,%%v23,8 \n\t"
"vfchsb %%v30,%%v28,%%v29 \n\t"
"vsel %%v30,%%v28,%%v29,%%v30 \n\t"
"vfmaxsb %%v16,%%v16,%%v18,8 \n\t"
"vfmaxsb %%v17,%%v17,%%v19,8 \n\t"
"vfchsb %%v31,%%v30,%%v0 \n\t"
"vsel %%v0,%%v30,%%v0,%%v31 \n\t"
"vfmaxsb %%v16,%%v16,%%v17,8 \n\t"
"vl %%v16,128(%%r1,%2) \n\t"
"vl %%v17,144(%%r1,%2) \n\t"
"vl %%v18,160(%%r1,%2) \n\t"
"vl %%v19,176(%%r1,%2) \n\t"
"vl %%v20,192(%%r1,%2) \n\t"
"vl %%v21,208(%%r1,%2) \n\t"
"vl %%v22,224(%%r1,%2) \n\t"
"vl %%v23,240(%%r1,%2) \n\t"
"vflpsb %%v16, %%v16 \n\t"
"vflpsb %%v17, %%v17 \n\t"
"vflpsb %%v18, %%v18 \n\t"
"vflpsb %%v19, %%v19 \n\t"
"vflpsb %%v20, %%v20 \n\t"
"vflpsb %%v21, %%v21 \n\t"
"vflpsb %%v22, %%v22 \n\t"
"vflpsb %%v23, %%v23 \n\t"
"vfchsb %%v24,%%v16,%%v17 \n\t"
"vfchsb %%v25,%%v18,%%v19 \n\t"
"vfchsb %%v26,%%v20,%%v21 \n\t"
"vfchsb %%v27,%%v22,%%v23 \n\t"
"vsel %%v24,%%v16,%%v17,%%v24 \n\t"
"vsel %%v25,%%v18,%%v19,%%v25 \n\t"
"vsel %%v26,%%v20,%%v21,%%v26 \n\t"
"vsel %%v27,%%v22,%%v23,%%v27 \n\t"
"vfchsb %%v28,%%v24,%%v25 \n\t"
"vfchsb %%v29,%%v26,%%v27 \n\t"
"vsel %%v28,%%v24,%%v25,%%v28 \n\t"
"vsel %%v29,%%v26,%%v27,%%v29 \n\t"
"vfchsb %%v30,%%v28,%%v29 \n\t"
"vsel %%v30,%%v28,%%v29,%%v30 \n\t"
"vfchsb %%v31,%%v30,%%v0 \n\t"
"vsel %%v0,%%v30,%%v0,%%v31 \n\t"
"vfmaxsb %%v0,%%v0,%%16,8 \n\t"
"agfi %%r1, 256 \n\t"
"brctg %%r0, 0b \n\t"
"veslg %%v16,%%v0,32 \n\t"
"vfchsb %%v17,%%v16,%%v0 \n\t"
"vsel %%v0,%%v16,%%v0,%%v17 \n\t"
"vfmaxsb %%v0,%%v0,%%v16,8 \n\t"
"vrepf %%v16,%%v0,2 \n\t"
"wfchsb %%v17,%%v16,%%v0 \n\t"
"vsel %%v0,%%v16,%%v0,%%v17 \n\t"
"ler %0,%%f0 "
"vrepf %%v16,%%v0,2 \n\t"
"wfmaxsb %%v0,%%v0,%%v16,8 \n\t"
"lper %0,%%f0 "
:"=f"(amax)
:"r"(n),"ZR"((const FLOAT (*)[n])x)
:"memory","cc","r0","r1","v0","v16","v17","v18","v19","v20","v21","v22","v23","v24","v25","v26","v27","v28","v29","v30","v31"
@ -172,11 +133,9 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) {
} else {
maxf=ABS(x[0]);
i += inc_x;
j++;
BLASLONG n1 = (n - 1) & -4;
while ((j - 1) < n1) {
BLASLONG n1 = n & -4;
while (j < n1) {
if (ABS(x[i]) > maxf) {
maxf = ABS(x[i]);

View File

@ -40,8 +40,7 @@ static FLOAT samin_kernel_64(BLASLONG n, FLOAT *x)
__asm__ volatile (
"vl %%v0,0(%2) \n\t"
"vflpsb %%v0,%%v0 \n\t"
"srlg %%r0,%1,6 \n\t"
"srlg %%r0,%1,6 \n\t"
"xgr %%r1,%%r1 \n\t"
"0: \n\t"
"pfd 1, 1024(%%r1,%2) \n\t"
@ -54,83 +53,45 @@ static FLOAT samin_kernel_64(BLASLONG n, FLOAT *x)
"vl %%v21,80(%%r1,%2) \n\t"
"vl %%v22,96(%%r1,%2) \n\t"
"vl %%v23,112(%%r1,%2) \n\t"
"vflpsb %%v16, %%v16 \n\t"
"vflpsb %%v17, %%v17 \n\t"
"vflpsb %%v18, %%v18 \n\t"
"vflpsb %%v19, %%v19 \n\t"
"vflpsb %%v20, %%v20 \n\t"
"vflpsb %%v21, %%v21 \n\t"
"vflpsb %%v22, %%v22 \n\t"
"vflpsb %%v23, %%v23 \n\t"
"vl %%v24,128(%%r1,%2) \n\t"
"vl %%v25,144(%%r1,%2) \n\t"
"vl %%v26,160(%%r1,%2) \n\t"
"vl %%v27,176(%%r1,%2) \n\t"
"vl %%v28,192(%%r1,%2) \n\t"
"vl %%v29,208(%%r1,%2) \n\t"
"vl %%v30,224(%%r1,%2) \n\t"
"vl %%v31,240(%%r1,%2) \n\t"
"vfchsb %%v24,%%v17,%%v16 \n\t"
"vfchsb %%v25,%%v19,%%v18 \n\t"
"vfchsb %%v26,%%v21,%%v20 \n\t"
"vfchsb %%v27,%%v23,%%v22 \n\t"
"vsel %%v24,%%v16,%%v17,%%v24 \n\t"
"vsel %%v25,%%v18,%%v19,%%v25 \n\t"
"vsel %%v26,%%v20,%%v21,%%v26 \n\t"
"vsel %%v27,%%v22,%%v23,%%v27 \n\t"
"vfminsb %%v16,%%v16,%%v24,8 \n\t"
"vfminsb %%v17,%%v17,%%v25,8 \n\t"
"vfminsb %%v18,%%v18,%%v26,8 \n\t"
"vfminsb %%v19,%%v19,%%v27,8 \n\t"
"vfminsb %%v20,%%v20,%%v28,8 \n\t"
"vfminsb %%v21,%%v21,%%v29,8 \n\t"
"vfminsb %%v22,%%v22,%%v30,8 \n\t"
"vfminsb %%v23,%%v23,%%v31,8 \n\t"
"vfchsb %%v28,%%v25,%%v24 \n\t"
"vfchsb %%v29,%%v27,%%v26 \n\t"
"vsel %%v28,%%v24,%%v25,%%v28 \n\t"
"vsel %%v29,%%v26,%%v27,%%v29 \n\t"
"vfminsb %%v16,%%v16,%%v20,8 \n\t"
"vfminsb %%v17,%%v17,%%v21,8 \n\t"
"vfminsb %%v18,%%v18,%%v22,8 \n\t"
"vfminsb %%v19,%%v19,%%v23,8 \n\t"
"vfchsb %%v30,%%v29,%%v28 \n\t"
"vsel %%v30,%%v28,%%v29,%%v30 \n\t"
"vfminsb %%v16,%%v16,%%v18,8 \n\t"
"vfminsb %%v17,%%v17,%%v19,8 \n\t"
"vfchsb %%v31,%%v0,%%v30 \n\t"
"vsel %%v0,%%v30,%%v0,%%v31 \n\t"
"vfminsb %%v16,%%v16,%%v17,8 \n\t"
"vl %%v16,128(%%r1,%2) \n\t"
"vl %%v17,144(%%r1,%2) \n\t"
"vl %%v18,160(%%r1,%2) \n\t"
"vl %%v19,176(%%r1,%2) \n\t"
"vl %%v20,192(%%r1,%2) \n\t"
"vl %%v21,208(%%r1,%2) \n\t"
"vl %%v22,224(%%r1,%2) \n\t"
"vl %%v23,240(%%r1,%2) \n\t"
"vflpsb %%v16, %%v16 \n\t"
"vflpsb %%v17, %%v17 \n\t"
"vflpsb %%v18, %%v18 \n\t"
"vflpsb %%v19, %%v19 \n\t"
"vflpsb %%v20, %%v20 \n\t"
"vflpsb %%v21, %%v21 \n\t"
"vflpsb %%v22, %%v22 \n\t"
"vflpsb %%v23, %%v23 \n\t"
"vfchsb %%v24,%%v17,%%v16 \n\t"
"vfchsb %%v25,%%v19,%%v18 \n\t"
"vfchsb %%v26,%%v21,%%v20 \n\t"
"vfchsb %%v27,%%v23,%%v22 \n\t"
"vsel %%v24,%%v16,%%v17,%%v24 \n\t"
"vsel %%v25,%%v18,%%v19,%%v25 \n\t"
"vsel %%v26,%%v20,%%v21,%%v26 \n\t"
"vsel %%v27,%%v22,%%v23,%%v27 \n\t"
"vfchsb %%v28,%%v25,%%v24 \n\t"
"vfchsb %%v29,%%v27,%%v26 \n\t"
"vsel %%v28,%%v24,%%v25,%%v28 \n\t"
"vsel %%v29,%%v26,%%v27,%%v29 \n\t"
"vfchsb %%v30,%%v29,%%v28 \n\t"
"vsel %%v30,%%v28,%%v29,%%v30 \n\t"
"vfchsb %%v31,%%v0,%%v30 \n\t"
"vsel %%v0,%%v30,%%v0,%%v31 \n\t"
"vfminsb %%v0,%%v0,%%16,8 \n\t"
"agfi %%r1, 256 \n\t"
"brctg %%r0, 0b \n\t"
"veslg %%v16,%%v0,32 \n\t"
"vfchsb %%v17,%%v0,%%v16 \n\t"
"vsel %%v0,%%v16,%%v0,%%v17 \n\t"
"vfminsb %%v0,%%v0,%%v16,8 \n\t"
"vrepf %%v16,%%v0,2 \n\t"
"wfchsb %%v17,%%v0,%%v16 \n\t"
"vsel %%v0,%%v16,%%v0,%%v17 \n\t"
"ler %0,%%f0 "
"vrepf %%v16,%%v0,2 \n\t"
"wfminsb %%v0,%%v0,%%v16,8 \n\t"
"lper %0,%%f0 "
:"=f"(amin)
:"r"(n),"ZR"((const FLOAT (*)[n])x)
:"memory","cc","r0","r1","v0","v16","v17","v18","v19","v20","v21","v22","v23","v24","v25","v26","v27","v28","v29","v30","v31"
@ -172,11 +133,9 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) {
} else {
minf=ABS(x[0]);
i += inc_x;
j++;
BLASLONG n1 = (n - 1) & -4;
while ((j - 1) < n1) {
BLASLONG n1 = n & -4;
while (j < n1) {
if (ABS(x[i]) < minf) {
minf = ABS(x[i]);

View File

@ -33,7 +33,7 @@ static FLOAT smax_kernel_64(BLASLONG n, FLOAT *x)
__asm__ volatile (
"vl %%v0,0(%2) \n\t"
"srlg %%r0,%1,6 \n\t"
"srlg %%r0,%1,6 \n\t"
"xgr %%r1,%%r1 \n\t"
"0: \n\t"
"pfd 1, 1024(%%r1,%2) \n\t"
@ -46,66 +46,44 @@ static FLOAT smax_kernel_64(BLASLONG n, FLOAT *x)
"vl %%v21,80(%%r1,%2) \n\t"
"vl %%v22,96(%%r1,%2) \n\t"
"vl %%v23,112(%%r1,%2) \n\t"
"vl %%v24,128(%%r1,%2) \n\t"
"vl %%v25,144(%%r1,%2) \n\t"
"vl %%v26,160(%%r1,%2) \n\t"
"vl %%v27,176(%%r1,%2) \n\t"
"vl %%v28,192(%%r1,%2) \n\t"
"vl %%v29,208(%%r1,%2) \n\t"
"vl %%v30,224(%%r1,%2) \n\t"
"vl %%v31,240(%%r1,%2) \n\t"
"vfchsb %%v24,%%v16,%%v17 \n\t"
"vfchsb %%v25,%%v18,%%v19 \n\t"
"vfchsb %%v26,%%v20,%%v21 \n\t"
"vfchsb %%v27,%%v22,%%v23 \n\t"
"vsel %%v24,%%v16,%%v17,%%v24 \n\t"
"vsel %%v25,%%v18,%%v19,%%v25 \n\t"
"vsel %%v26,%%v20,%%v21,%%v26 \n\t"
"vsel %%v27,%%v22,%%v23,%%v27 \n\t"
"vfmaxsb %%v16,%%v16,%%v24,0 \n\t"
"vfmaxsb %%v17,%%v17,%%v25,0 \n\t"
"vfmaxsb %%v18,%%v18,%%v26,0 \n\t"
"vfmaxsb %%v19,%%v19,%%v27,0 \n\t"
"vfmaxsb %%v20,%%v20,%%v28,0 \n\t"
"vfmaxsb %%v21,%%v21,%%v29,0 \n\t"
"vfmaxsb %%v22,%%v22,%%v30,0 \n\t"
"vfmaxsb %%v23,%%v23,%%v31,0 \n\t"
"vfchsb %%v28,%%v24,%%v25 \n\t"
"vfchsb %%v29,%%v26,%%v27 \n\t"
"vsel %%v28,%%v24,%%v25,%%v28 \n\t"
"vsel %%v29,%%v26,%%v27,%%v29 \n\t"
"vfmaxsb %%v16,%%v16,%%v20,0 \n\t"
"vfmaxsb %%v17,%%v17,%%v21,0 \n\t"
"vfmaxsb %%v18,%%v18,%%v22,0 \n\t"
"vfmaxsb %%v19,%%v19,%%v23,0 \n\t"
"vfchsb %%v30,%%v28,%%v29 \n\t"
"vsel %%v30,%%v28,%%v29,%%v30 \n\t"
"vfmaxsb %%v16,%%v16,%%v18,0 \n\t"
"vfmaxsb %%v17,%%v17,%%v19,0 \n\t"
"vfchsb %%v31,%%v30,%%v0 \n\t"
"vsel %%v0,%%v30,%%v0,%%v31 \n\t"
"vfmaxsb %%v16,%%v16,%%v17,0 \n\t"
"vl %%v16,128(%%r1,%2) \n\t"
"vl %%v17,144(%%r1,%2) \n\t"
"vl %%v18,160(%%r1,%2) \n\t"
"vl %%v19,176(%%r1,%2) \n\t"
"vl %%v20,192(%%r1,%2) \n\t"
"vl %%v21,208(%%r1,%2) \n\t"
"vl %%v22,224(%%r1,%2) \n\t"
"vl %%v23,240(%%r1,%2) \n\t"
"vfchsb %%v24,%%v16,%%v17 \n\t"
"vfchsb %%v25,%%v18,%%v19 \n\t"
"vfchsb %%v26,%%v20,%%v21 \n\t"
"vfchsb %%v27,%%v22,%%v23 \n\t"
"vsel %%v24,%%v16,%%v17,%%v24 \n\t"
"vsel %%v25,%%v18,%%v19,%%v25 \n\t"
"vsel %%v26,%%v20,%%v21,%%v26 \n\t"
"vsel %%v27,%%v22,%%v23,%%v27 \n\t"
"vfchsb %%v28,%%v24,%%v25 \n\t"
"vfchsb %%v29,%%v26,%%v27 \n\t"
"vsel %%v28,%%v24,%%v25,%%v28 \n\t"
"vsel %%v29,%%v26,%%v27,%%v29 \n\t"
"vfchsb %%v30,%%v28,%%v29 \n\t"
"vsel %%v30,%%v28,%%v29,%%v30 \n\t"
"vfchsb %%v31,%%v30,%%v0 \n\t"
"vsel %%v0,%%v30,%%v0,%%v31 \n\t"
"vfmaxsb %%v0,%%v0,%%16,0 \n\t"
"agfi %%r1, 256 \n\t"
"brctg %%r0, 0b \n\t"
"veslg %%v16,%%v0,32 \n\t"
"vfchsb %%v17,%%v16,%%v0 \n\t"
"vsel %%v0,%%v16,%%v0,%%v17 \n\t"
"vfmaxsb %%v0,%%v0,%%v16,0 \n\t"
"vrepf %%v16,%%v0,2 \n\t"
"wfchsb %%v17,%%v16,%%v0 \n\t"
"vsel %%v0,%%v16,%%v0,%%v17 \n\t"
"vrepf %%v16,%%v0,2 \n\t"
"wfmaxsb %%v0,%%v0,%%v16,0 \n\t"
"ler %0,%%f0 "
:"=f"(max)
:"r"(n),"ZR"((const FLOAT (*)[n])x)
@ -148,11 +126,9 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) {
} else {
maxf=x[0];
i += inc_x;
j++;
BLASLONG n1 = (n - 1) & -4;
while ((j - 1) < n1) {
BLASLONG n1 = n & -4;
while (j < n1) {
if (x[i] > maxf) {
maxf = x[i];

View File

@ -33,7 +33,7 @@ static FLOAT smin_kernel_64(BLASLONG n, FLOAT *x)
__asm__ volatile (
"vl %%v0,0(%2) \n\t"
"srlg %%r0,%1,6 \n\t"
"srlg %%r0,%1,6 \n\t"
"xgr %%r1,%%r1 \n\t"
"0: \n\t"
"pfd 1, 1024(%%r1,%2) \n\t"
@ -46,66 +46,44 @@ static FLOAT smin_kernel_64(BLASLONG n, FLOAT *x)
"vl %%v21,80(%%r1,%2) \n\t"
"vl %%v22,96(%%r1,%2) \n\t"
"vl %%v23,112(%%r1,%2) \n\t"
"vl %%v24,128(%%r1,%2) \n\t"
"vl %%v25,144(%%r1,%2) \n\t"
"vl %%v26,160(%%r1,%2) \n\t"
"vl %%v27,176(%%r1,%2) \n\t"
"vl %%v28,192(%%r1,%2) \n\t"
"vl %%v29,208(%%r1,%2) \n\t"
"vl %%v30,224(%%r1,%2) \n\t"
"vl %%v31,240(%%r1,%2) \n\t"
"vfchsb %%v24,%%v17,%%v16 \n\t"
"vfchsb %%v25,%%v19,%%v18 \n\t"
"vfchsb %%v26,%%v21,%%v20 \n\t"
"vfchsb %%v27,%%v23,%%v22 \n\t"
"vsel %%v24,%%v16,%%v17,%%v24 \n\t"
"vsel %%v25,%%v18,%%v19,%%v25 \n\t"
"vsel %%v26,%%v20,%%v21,%%v26 \n\t"
"vsel %%v27,%%v22,%%v23,%%v27 \n\t"
"vfminsb %%v16,%%v16,%%v24,0 \n\t"
"vfminsb %%v17,%%v17,%%v25,0 \n\t"
"vfminsb %%v18,%%v18,%%v26,0 \n\t"
"vfminsb %%v19,%%v19,%%v27,0 \n\t"
"vfminsb %%v20,%%v20,%%v28,0 \n\t"
"vfminsb %%v21,%%v21,%%v29,0 \n\t"
"vfminsb %%v22,%%v22,%%v30,0 \n\t"
"vfminsb %%v23,%%v23,%%v31,0 \n\t"
"vfchsb %%v28,%%v25,%%v24 \n\t"
"vfchsb %%v29,%%v27,%%v26 \n\t"
"vsel %%v28,%%v24,%%v25,%%v28 \n\t"
"vsel %%v29,%%v26,%%v27,%%v29 \n\t"
"vfminsb %%v16,%%v16,%%v20,0 \n\t"
"vfminsb %%v17,%%v17,%%v21,0 \n\t"
"vfminsb %%v18,%%v18,%%v22,0 \n\t"
"vfminsb %%v19,%%v19,%%v23,0 \n\t"
"vfchsb %%v30,%%v29,%%v28 \n\t"
"vsel %%v30,%%v28,%%v29,%%v30 \n\t"
"vfminsb %%v16,%%v16,%%v18,0 \n\t"
"vfminsb %%v17,%%v17,%%v19,0 \n\t"
"vfchsb %%v31,%%v0,%%v30 \n\t"
"vsel %%v0,%%v30,%%v0,%%v31 \n\t"
"vfminsb %%v16,%%v16,%%v17,0 \n\t"
"vl %%v16,128(%%r1,%2) \n\t"
"vl %%v17,144(%%r1,%2) \n\t"
"vl %%v18,160(%%r1,%2) \n\t"
"vl %%v19,176(%%r1,%2) \n\t"
"vl %%v20,192(%%r1,%2) \n\t"
"vl %%v21,208(%%r1,%2) \n\t"
"vl %%v22,224(%%r1,%2) \n\t"
"vl %%v23,240(%%r1,%2) \n\t"
"vfchsb %%v24,%%v17,%%v16 \n\t"
"vfchsb %%v25,%%v19,%%v18 \n\t"
"vfchsb %%v26,%%v21,%%v20 \n\t"
"vfchsb %%v27,%%v23,%%v22 \n\t"
"vsel %%v24,%%v16,%%v17,%%v24 \n\t"
"vsel %%v25,%%v18,%%v19,%%v25 \n\t"
"vsel %%v26,%%v20,%%v21,%%v26 \n\t"
"vsel %%v27,%%v22,%%v23,%%v27 \n\t"
"vfchsb %%v28,%%v25,%%v24 \n\t"
"vfchsb %%v29,%%v27,%%v26 \n\t"
"vsel %%v28,%%v24,%%v25,%%v28 \n\t"
"vsel %%v29,%%v26,%%v27,%%v29 \n\t"
"vfchsb %%v30,%%v29,%%v28 \n\t"
"vsel %%v30,%%v28,%%v29,%%v30 \n\t"
"vfchsb %%v31,%%v0,%%v30 \n\t"
"vsel %%v0,%%v30,%%v0,%%v31 \n\t"
"vfminsb %%v0,%%v0,%%16,0 \n\t"
"agfi %%r1, 256 \n\t"
"brctg %%r0, 0b \n\t"
"veslg %%v16,%%v0,32 \n\t"
"vfchsb %%v17,%%v0,%%v16 \n\t"
"vsel %%v0,%%v16,%%v0,%%v17 \n\t"
"vfminsb %%v0,%%v0,%%v16,0 \n\t"
"vrepf %%v16,%%v0,2 \n\t"
"wfchsb %%v17,%%v0,%%v16 \n\t"
"vsel %%v0,%%v16,%%v0,%%v17 \n\t"
"vrepf %%v16,%%v0,2 \n\t"
"wfminsb %%v0,%%v0,%%v16,0 \n\t"
"ler %0,%%f0 "
:"=f"(min)
:"r"(n),"ZR"((const FLOAT (*)[n])x)
@ -148,11 +126,9 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) {
} else {
minf=x[0];
i += inc_x;
j++;
BLASLONG n1 = (n - 1) & -4;
while ((j - 1) < n1) {
BLASLONG n1 = n & -4;
while (j < n1) {
if (x[i] < minf) {
minf = x[i];

View File

@ -69,76 +69,66 @@ static FLOAT zamax_kernel_16(BLASLONG n, FLOAT *x)
"vleg %%v23,104(%%r1,%2),0 \n\t"
"vleg %%v22,112(%%r1,%2),1 \n\t"
"vleg %%v23,120(%%r1,%2),1 \n\t"
"vflpdb %%v16, %%v16 \n\t"
"vflpdb %%v17, %%v17 \n\t"
"vflpdb %%v18, %%v18 \n\t"
"vflpdb %%v19, %%v19 \n\t"
"vflpdb %%v20, %%v20 \n\t"
"vflpdb %%v21, %%v21 \n\t"
"vflpdb %%v22, %%v22 \n\t"
"vflpdb %%v23, %%v23 \n\t"
"vleg %%v24,128(%%r1,%2),0 \n\t"
"vleg %%v25,136(%%r1,%2),0 \n\t"
"vleg %%v24,144(%%r1,%2),1 \n\t"
"vleg %%v25,152(%%r1,%2),1 \n\t"
"vleg %%v26,160(%%r1,%2),0 \n\t"
"vleg %%v27,168(%%r1,%2),0 \n\t"
"vleg %%v26,176(%%r1,%2),1 \n\t"
"vleg %%v27,184(%%r1,%2),1 \n\t"
"vleg %%v28,192(%%r1,%2),0 \n\t"
"vleg %%v29,200(%%r1,%2),0 \n\t"
"vleg %%v28,208(%%r1,%2),1 \n\t"
"vleg %%v29,216(%%r1,%2),1 \n\t"
"vleg %%v30,224(%%r1,%2),0 \n\t"
"vleg %%v31,232(%%r1,%2),0 \n\t"
"vleg %%v30,240(%%r1,%2),1 \n\t"
"vleg %%v31,248(%%r1,%2),1 \n\t"
"vflpdb %%v16,%%v16 \n\t"
"vflpdb %%v17,%%v17 \n\t"
"vflpdb %%v18,%%v18 \n\t"
"vflpdb %%v19,%%v19 \n\t"
"vflpdb %%v20,%%v20 \n\t"
"vflpdb %%v21,%%v21 \n\t"
"vflpdb %%v22,%%v22 \n\t"
"vflpdb %%v23,%%v23 \n\t"
"vflpdb %%v24,%%v24 \n\t"
"vflpdb %%v25,%%v25 \n\t"
"vflpdb %%v26,%%v26 \n\t"
"vflpdb %%v27,%%v27 \n\t"
"vflpdb %%v28,%%v28 \n\t"
"vflpdb %%v29,%%v29 \n\t"
"vflpdb %%v30,%%v30 \n\t"
"vflpdb %%v31,%%v31 \n\t"
"vfadb %%v16,%%v16,%%v17 \n\t"
"vfadb %%v17,%%v18,%%v19 \n\t"
"vfadb %%v18,%%v20,%%v21 \n\t"
"vfadb %%v19,%%v22,%%v23 \n\t"
"vfadb %%v18,%%v18,%%v19 \n\t"
"vfadb %%v20,%%v20,%%v21 \n\t"
"vfadb %%v22,%%v22,%%v23 \n\t"
"vfadb %%v24,%%v24,%%v25 \n\t"
"vfadb %%v26,%%v26,%%v27 \n\t"
"vfadb %%v28,%%v28,%%v29 \n\t"
"vfadb %%v30,%%v30,%%v31 \n\t"
"vfchdb %%v24,%%v16,%%v17 \n\t"
"vfchdb %%v25,%%v18,%%v19 \n\t"
"vsel %%v24,%%v16,%%v17,%%v24 \n\t"
"vsel %%v25,%%v18,%%v19,%%v25 \n\t"
"vfmaxdb %%v16,%%v16,%%v24,0 \n\t"
"vfmaxdb %%v18,%%v18,%%v26,0 \n\t"
"vfmaxdb %%v20,%%v20,%%v28,0 \n\t"
"vfmaxdb %%v22,%%v22,%%v30,0 \n\t"
"vfchdb %%v26,%%v24,%%v25 \n\t"
"vsel %%v26,%%v24,%%v25,%%v26 \n\t"
"vfmaxdb %%v16,%%v16,%%v20,0 \n\t"
"vfmaxdb %%v18,%%v18,%%v22,0 \n\t"
"vfchdb %%v27,%%v26,%%v0 \n\t"
"vsel %%v0,%%v26,%%v0,%%v27 \n\t"
"vfmaxdb %%v16,%%v16,%%v18,0 \n\t"
"vleg %%v16,128(%%r1,%2),0 \n\t"
"vleg %%v17,136(%%r1,%2),0 \n\t"
"vleg %%v16,144(%%r1,%2),1 \n\t"
"vleg %%v17,152(%%r1,%2),1 \n\t"
"vleg %%v18,160(%%r1,%2),0 \n\t"
"vleg %%v19,168(%%r1,%2),0 \n\t"
"vleg %%v18,176(%%r1,%2),1 \n\t"
"vleg %%v19,184(%%r1,%2),1 \n\t"
"vleg %%v20,192(%%r1,%2),0 \n\t"
"vleg %%v21,200(%%r1,%2),0 \n\t"
"vleg %%v20,208(%%r1,%2),1 \n\t"
"vleg %%v21,216(%%r1,%2),1 \n\t"
"vleg %%v22,224(%%r1,%2),0 \n\t"
"vleg %%v23,232(%%r1,%2),0 \n\t"
"vleg %%v22,240(%%r1,%2),1 \n\t"
"vleg %%v23,248(%%r1,%2),1 \n\t"
"vflpdb %%v16, %%v16 \n\t"
"vflpdb %%v17, %%v17 \n\t"
"vflpdb %%v18, %%v18 \n\t"
"vflpdb %%v19, %%v19 \n\t"
"vflpdb %%v20, %%v20 \n\t"
"vflpdb %%v21, %%v21 \n\t"
"vflpdb %%v22, %%v22 \n\t"
"vflpdb %%v23, %%v23 \n\t"
"vfadb %%v16,%%v16,%%v17 \n\t"
"vfadb %%v17,%%v18,%%v19 \n\t"
"vfadb %%v18,%%v20,%%v21 \n\t"
"vfadb %%v19,%%v22,%%v23 \n\t"
"vfchdb %%v24,%%v16,%%v17 \n\t"
"vfchdb %%v25,%%v18,%%v19 \n\t"
"vsel %%v24,%%v16,%%v17,%%v24 \n\t"
"vsel %%v25,%%v18,%%v19,%%v25 \n\t"
"vfchdb %%v26,%%v24,%%v25 \n\t"
"vsel %%v26,%%v24,%%v25,%%v26 \n\t"
"vfchdb %%v27,%%v26,%%v0 \n\t"
"vsel %%v0,%%v26,%%v0,%%v27 \n\t"
"vfmaxdb %%v0,%%v0,%%v16,0 \n\t"
"agfi %%r1, 256 \n\t"
"brctg %%r0, 0b \n\t"
"vrepg %%v16,%%v0,1 \n\t"
"wfchdb %%v17,%%v16,%%v0 \n\t"
"vsel %%v0,%%v16,%%v0,%%v17 \n\t"
"wfmaxdb %%v0,%%v0,%%v16,0 \n\t"
"ldr %0,%%f0 "
:"=f"(amax)
:"r"(n),"ZR"((const FLOAT (*)[n])x)
@ -185,11 +175,9 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) {
maxf=CABS1(x,0);
inc_x2 = 2 * inc_x;
ix += inc_x2;
i++;
BLASLONG n1 = (n - 1) & -4;
while ((i - 1) < n1) {
BLASLONG n1 = n & -4;
while (i < n1) {
if (CABS1(x,ix) > maxf) {
maxf = CABS1(x,ix);

View File

@ -69,76 +69,66 @@ static FLOAT zamin_kernel_16(BLASLONG n, FLOAT *x)
"vleg %%v23,104(%%r1,%2),0 \n\t"
"vleg %%v22,112(%%r1,%2),1 \n\t"
"vleg %%v23,120(%%r1,%2),1 \n\t"
"vflpdb %%v16, %%v16 \n\t"
"vflpdb %%v17, %%v17 \n\t"
"vflpdb %%v18, %%v18 \n\t"
"vflpdb %%v19, %%v19 \n\t"
"vflpdb %%v20, %%v20 \n\t"
"vflpdb %%v21, %%v21 \n\t"
"vflpdb %%v22, %%v22 \n\t"
"vflpdb %%v23, %%v23 \n\t"
"vleg %%v24,128(%%r1,%2),0 \n\t"
"vleg %%v25,136(%%r1,%2),0 \n\t"
"vleg %%v24,144(%%r1,%2),1 \n\t"
"vleg %%v25,152(%%r1,%2),1 \n\t"
"vleg %%v26,160(%%r1,%2),0 \n\t"
"vleg %%v27,168(%%r1,%2),0 \n\t"
"vleg %%v26,176(%%r1,%2),1 \n\t"
"vleg %%v27,184(%%r1,%2),1 \n\t"
"vleg %%v28,192(%%r1,%2),0 \n\t"
"vleg %%v29,200(%%r1,%2),0 \n\t"
"vleg %%v28,208(%%r1,%2),1 \n\t"
"vleg %%v29,216(%%r1,%2),1 \n\t"
"vleg %%v30,224(%%r1,%2),0 \n\t"
"vleg %%v31,232(%%r1,%2),0 \n\t"
"vleg %%v30,240(%%r1,%2),1 \n\t"
"vleg %%v31,248(%%r1,%2),1 \n\t"
"vflpdb %%v16,%%v16 \n\t"
"vflpdb %%v17,%%v17 \n\t"
"vflpdb %%v18,%%v18 \n\t"
"vflpdb %%v19,%%v19 \n\t"
"vflpdb %%v20,%%v20 \n\t"
"vflpdb %%v21,%%v21 \n\t"
"vflpdb %%v22,%%v22 \n\t"
"vflpdb %%v23,%%v23 \n\t"
"vflpdb %%v24,%%v24 \n\t"
"vflpdb %%v25,%%v25 \n\t"
"vflpdb %%v26,%%v26 \n\t"
"vflpdb %%v27,%%v27 \n\t"
"vflpdb %%v28,%%v28 \n\t"
"vflpdb %%v29,%%v29 \n\t"
"vflpdb %%v30,%%v30 \n\t"
"vflpdb %%v31,%%v31 \n\t"
"vfadb %%v16,%%v16,%%v17 \n\t"
"vfadb %%v17,%%v18,%%v19 \n\t"
"vfadb %%v18,%%v20,%%v21 \n\t"
"vfadb %%v19,%%v22,%%v23 \n\t"
"vfadb %%v18,%%v18,%%v19 \n\t"
"vfadb %%v20,%%v20,%%v21 \n\t"
"vfadb %%v22,%%v22,%%v23 \n\t"
"vfadb %%v24,%%v24,%%v25 \n\t"
"vfadb %%v26,%%v26,%%v27 \n\t"
"vfadb %%v28,%%v28,%%v29 \n\t"
"vfadb %%v30,%%v30,%%v31 \n\t"
"vfchdb %%v24,%%v17,%%v16 \n\t"
"vfchdb %%v25,%%v19,%%v18 \n\t"
"vsel %%v24,%%v16,%%v17,%%v24 \n\t"
"vsel %%v25,%%v18,%%v19,%%v25 \n\t"
"vfmindb %%v16,%%v16,%%v24,0 \n\t"
"vfmindb %%v18,%%v18,%%v26,0 \n\t"
"vfmindb %%v20,%%v20,%%v28,0 \n\t"
"vfmindb %%v22,%%v22,%%v30,0 \n\t"
"vfchdb %%v26,%%v25,%%v24 \n\t"
"vsel %%v26,%%v24,%%v25,%%v26 \n\t"
"vfmindb %%v16,%%v16,%%v20,0 \n\t"
"vfmindb %%v18,%%v18,%%v22,0 \n\t"
"vfchdb %%v27,%%v0,%%v26 \n\t"
"vsel %%v0,%%v26,%%v0,%%v27 \n\t"
"vfmindb %%v16,%%v16,%%v18,0 \n\t"
"vleg %%v16,128(%%r1,%2),0 \n\t"
"vleg %%v17,136(%%r1,%2),0 \n\t"
"vleg %%v16,144(%%r1,%2),1 \n\t"
"vleg %%v17,152(%%r1,%2),1 \n\t"
"vleg %%v18,160(%%r1,%2),0 \n\t"
"vleg %%v19,168(%%r1,%2),0 \n\t"
"vleg %%v18,176(%%r1,%2),1 \n\t"
"vleg %%v19,184(%%r1,%2),1 \n\t"
"vleg %%v20,192(%%r1,%2),0 \n\t"
"vleg %%v21,200(%%r1,%2),0 \n\t"
"vleg %%v20,208(%%r1,%2),1 \n\t"
"vleg %%v21,216(%%r1,%2),1 \n\t"
"vleg %%v22,224(%%r1,%2),0 \n\t"
"vleg %%v23,232(%%r1,%2),0 \n\t"
"vleg %%v22,240(%%r1,%2),1 \n\t"
"vleg %%v23,248(%%r1,%2),1 \n\t"
"vflpdb %%v16, %%v16 \n\t"
"vflpdb %%v17, %%v17 \n\t"
"vflpdb %%v18, %%v18 \n\t"
"vflpdb %%v19, %%v19 \n\t"
"vflpdb %%v20, %%v20 \n\t"
"vflpdb %%v21, %%v21 \n\t"
"vflpdb %%v22, %%v22 \n\t"
"vflpdb %%v23, %%v23 \n\t"
"vfadb %%v16,%%v16,%%v17 \n\t"
"vfadb %%v17,%%v18,%%v19 \n\t"
"vfadb %%v18,%%v20,%%v21 \n\t"
"vfadb %%v19,%%v22,%%v23 \n\t"
"vfchdb %%v24,%%v17,%%v16 \n\t"
"vfchdb %%v25,%%v19,%%v18 \n\t"
"vsel %%v24,%%v16,%%v17,%%v24 \n\t"
"vsel %%v25,%%v18,%%v19,%%v25 \n\t"
"vfchdb %%v26,%%v25,%%v24 \n\t"
"vsel %%v26,%%v24,%%v25,%%v26 \n\t"
"vfchdb %%v27,%%v0,%%v26 \n\t"
"vsel %%v0,%%v26,%%v0,%%v27 \n\t"
"vfmindb %%v0,%%v0,%%v16,0 \n\t"
"agfi %%r1, 256 \n\t"
"brctg %%r0, 0b \n\t"
"vrepg %%v16,%%v0,1 \n\t"
"wfchdb %%v17,%%v0,%%v16 \n\t"
"vsel %%v0,%%v16,%%v0,%%v17 \n\t"
"wfmindb %%v0,%%v0,%%v16,0 \n\t"
"ldr %0,%%f0 "
:"=f"(amin)
:"r"(n),"ZR"((const FLOAT (*)[n])x)
@ -185,11 +175,9 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) {
minf=CABS1(x,0);
inc_x2 = 2 * inc_x;
ix += inc_x2;
i++;
BLASLONG n1 = (n - 1) & -4;
while ((i - 1) < n1) {
BLASLONG n1 = n & -4;
while (i < n1) {
if (CABS1(x,ix) < minf) {
minf = CABS1(x,ix);