Added missing Blas1 single fp {saxpy, caxpy, cdot, crot(refactored version of srot),isamax ,isamin, icamax, icamin},
Fixed idamin,icamin choosing the first occurance index of equal minimals
This commit is contained in:
@@ -89,10 +89,10 @@ static BLASLONG diamin_kernel_32(BLASLONG n, FLOAT *x, FLOAT *minf) {
|
||||
".p2align 5 \n\t"
|
||||
|
||||
"1: \n\t"
|
||||
"xvcmpgedp 2,44,45 \n\t "
|
||||
"xvcmpgedp 3,46,47 \n\t "
|
||||
"xvcmpgedp 4,48,49 \n\t "
|
||||
"xvcmpgedp 5,50,51 \n\t"
|
||||
"xvcmpgtdp 2,44,45 \n\t "
|
||||
"xvcmpgtdp 3,46,47 \n\t "
|
||||
"xvcmpgtdp 4,48,49 \n\t "
|
||||
"xvcmpgtdp 5,50,51 \n\t"
|
||||
|
||||
"xxsel 32,40,41,2 \n\t"
|
||||
"xxsel 0,44,45,2 \n\t"
|
||||
@@ -103,8 +103,8 @@ static BLASLONG diamin_kernel_32(BLASLONG n, FLOAT *x, FLOAT *minf) {
|
||||
"xxsel 35,42,43,5 \n\t"
|
||||
"xxsel 47,50,51,5 \n\t"
|
||||
|
||||
"xvcmpgedp 2,0, 1 \n\t"
|
||||
"xvcmpgedp 3, 45,47 \n\t"
|
||||
"xvcmpgtdp 2,0, 1 \n\t"
|
||||
"xvcmpgtdp 3, 45,47 \n\t"
|
||||
|
||||
"addi %[ptr_tmp] ,%[ptr_tmp] , 128 \n\t"
|
||||
|
||||
@@ -125,7 +125,7 @@ static BLASLONG diamin_kernel_32(BLASLONG n, FLOAT *x, FLOAT *minf) {
|
||||
"lxvd2x 47, %[i48],%[ptr_tmp] \n\t"
|
||||
|
||||
//choose smaller from first and second part
|
||||
"xvcmpgedp 4, 0,5 \n\t"
|
||||
"xvcmpgtdp 4, 0,5 \n\t"
|
||||
"xxsel 3, 0,5,4 \n\t"
|
||||
"xxsel 33,32,34,4 \n\t"
|
||||
|
||||
@@ -139,7 +139,7 @@ static BLASLONG diamin_kernel_32(BLASLONG n, FLOAT *x, FLOAT *minf) {
|
||||
"lxvd2x 51,%[i112],%[ptr_tmp] \n\t"
|
||||
|
||||
//compare with previous to get vec_min_index(v6 | vs38 ) and vec_min_value (vs39)
|
||||
"xvcmpgedp 2,39, 3 \n\t"
|
||||
"xvcmpgtdp 2,39, 3 \n\t"
|
||||
"xxsel 39,39,3,2 \n\t"
|
||||
"xxsel 38,38,33,2 \n\t"
|
||||
|
||||
@@ -162,10 +162,10 @@ static BLASLONG diamin_kernel_32(BLASLONG n, FLOAT *x, FLOAT *minf) {
|
||||
//<-----------jump here from first load
|
||||
"2: \n\t"
|
||||
|
||||
"xvcmpgedp 2,44,45 \n\t "
|
||||
"xvcmpgedp 3,46,47 \n\t "
|
||||
"xvcmpgedp 4,48,49 \n\t "
|
||||
"xvcmpgedp 5,50,51 \n\t"
|
||||
"xvcmpgtdp 2,44,45 \n\t "
|
||||
"xvcmpgtdp 3,46,47 \n\t "
|
||||
"xvcmpgtdp 4,48,49 \n\t "
|
||||
"xvcmpgtdp 5,50,51 \n\t"
|
||||
|
||||
"xxsel 32,40,41,2 \n\t"
|
||||
"xxsel 0,44,45,2 \n\t"
|
||||
@@ -176,8 +176,8 @@ static BLASLONG diamin_kernel_32(BLASLONG n, FLOAT *x, FLOAT *minf) {
|
||||
"xxsel 35,42,43,5 \n\t"
|
||||
"xxsel 47,50,51,5 \n\t"
|
||||
|
||||
"xvcmpgedp 2,0, 1 \n\t"
|
||||
"xvcmpgedp 3, 45,47 \n\t"
|
||||
"xvcmpgtdp 2,0, 1 \n\t"
|
||||
"xvcmpgtdp 3, 45,47 \n\t"
|
||||
"xxsel 32,32,33,2 \n\t"
|
||||
"xxsel 0 ,0,1,2 \n\t"
|
||||
"xxsel 34,34,35,3 \n\t"
|
||||
@@ -194,7 +194,7 @@ static BLASLONG diamin_kernel_32(BLASLONG n, FLOAT *x, FLOAT *minf) {
|
||||
"lxvd2x 47, %[i48],%[ptr_tmp] \n\t"
|
||||
|
||||
//choose smaller from first and second part
|
||||
"xvcmpgedp 4, 0,5 \n\t"
|
||||
"xvcmpgtdp 4, 0,5 \n\t"
|
||||
"xxsel 3, 0,5,4 \n\t"
|
||||
"xxsel 33,32,34,4 \n\t"
|
||||
|
||||
@@ -210,7 +210,7 @@ static BLASLONG diamin_kernel_32(BLASLONG n, FLOAT *x, FLOAT *minf) {
|
||||
|
||||
|
||||
//compare with previous to get vec_min_index(v6 | vs38 ) and vec_min_value (vs39)
|
||||
"xvcmpgedp 2,39, 3 \n\t"
|
||||
"xvcmpgtdp 2,39, 3 \n\t"
|
||||
"xxsel 39,39,3,2 \n\t"
|
||||
"xxsel 38,38,33,2 \n\t"
|
||||
|
||||
@@ -238,10 +238,10 @@ static BLASLONG diamin_kernel_32(BLASLONG n, FLOAT *x, FLOAT *minf) {
|
||||
|
||||
//==============================================================================
|
||||
|
||||
"xvcmpgedp 2,44,45 \n\t "
|
||||
"xvcmpgedp 3,46,47 \n\t "
|
||||
"xvcmpgedp 4,48,49 \n\t "
|
||||
"xvcmpgedp 5,50,51 \n\t"
|
||||
"xvcmpgtdp 2,44,45 \n\t "
|
||||
"xvcmpgtdp 3,46,47 \n\t "
|
||||
"xvcmpgtdp 4,48,49 \n\t "
|
||||
"xvcmpgtdp 5,50,51 \n\t"
|
||||
|
||||
"xxsel 32,40,41,2 \n\t"
|
||||
"xxsel 0,44,45,2 \n\t"
|
||||
@@ -252,8 +252,8 @@ static BLASLONG diamin_kernel_32(BLASLONG n, FLOAT *x, FLOAT *minf) {
|
||||
"xxsel 35,42,43,5 \n\t"
|
||||
"xxsel 47,50,51,5 \n\t"
|
||||
|
||||
"xvcmpgedp 2,0, 1 \n\t"
|
||||
"xvcmpgedp 3, 45,47 \n\t"
|
||||
"xvcmpgtdp 2,0, 1 \n\t"
|
||||
"xvcmpgtdp 3, 45,47 \n\t"
|
||||
|
||||
|
||||
"xxsel 32,32,33,2 \n\t"
|
||||
@@ -264,14 +264,14 @@ static BLASLONG diamin_kernel_32(BLASLONG n, FLOAT *x, FLOAT *minf) {
|
||||
// for {second 8 elements } we have to add 8 to each so that it became {from 8 to 16}
|
||||
"vaddudm 2,2,4 \n\t" // vs34=vs34 + vs36{8,8}
|
||||
//choose smaller from first and second part
|
||||
"xvcmpgedp 4, 0,5 \n\t"
|
||||
"xvcmpgtdp 4, 0,5 \n\t"
|
||||
"xxsel 3, 0,5,4 \n\t"
|
||||
"xxsel 33,32,34,4 \n\t"
|
||||
|
||||
"vaddudm 1,1,5 \n\t" // get real index for first smaller
|
||||
|
||||
//compare with previous to get vec_min_index(v6 | vs38 ) and vec_min_value (vs39)
|
||||
"xvcmpgedp 2,39, 3 \n\t"
|
||||
"xvcmpgtdp 2,39, 3 \n\t"
|
||||
"xxsel 39,39,3,2 \n\t"
|
||||
"xxsel 38,38,33,2 \n\t"
|
||||
|
||||
@@ -284,7 +284,7 @@ static BLASLONG diamin_kernel_32(BLASLONG n, FLOAT *x, FLOAT *minf) {
|
||||
//cr6 0 bit set if all true, cr6=4*6+bit_ind=24,0011at CR(BI)==1, at=10 hint that it occurs rarely
|
||||
//0b001110=14
|
||||
"bc 14,24, 3f \n\t"
|
||||
"xvcmpgedp 4,39, 40 \n\t"
|
||||
"xvcmpgtdp 4,39, 40 \n\t"
|
||||
"xxsel 0,39,40,4 \n\t"
|
||||
"xxsel 1,38,32,4 \n\t"
|
||||
"stxsdx 0,0,%[ptr_minf] \n\t"
|
||||
|
||||
Reference in New Issue
Block a user