Added missing Blas1 single fp {saxpy, caxpy, cdot, crot(refactored version of srot),isamax ,isamin, icamax, icamin},

Fixed idamin,icamin choosing the first occurance index of equal minimals
This commit is contained in:
Ubuntu
2019-01-16 15:16:21 +00:00
parent 28ca97015d
commit 8c3386be87
11 changed files with 1802 additions and 48 deletions

View File

@@ -89,10 +89,10 @@ static BLASLONG diamin_kernel_32(BLASLONG n, FLOAT *x, FLOAT *minf) {
".p2align 5 \n\t"
"1: \n\t"
"xvcmpgedp 2,44,45 \n\t "
"xvcmpgedp 3,46,47 \n\t "
"xvcmpgedp 4,48,49 \n\t "
"xvcmpgedp 5,50,51 \n\t"
"xvcmpgtdp 2,44,45 \n\t "
"xvcmpgtdp 3,46,47 \n\t "
"xvcmpgtdp 4,48,49 \n\t "
"xvcmpgtdp 5,50,51 \n\t"
"xxsel 32,40,41,2 \n\t"
"xxsel 0,44,45,2 \n\t"
@@ -103,8 +103,8 @@ static BLASLONG diamin_kernel_32(BLASLONG n, FLOAT *x, FLOAT *minf) {
"xxsel 35,42,43,5 \n\t"
"xxsel 47,50,51,5 \n\t"
"xvcmpgedp 2,0, 1 \n\t"
"xvcmpgedp 3, 45,47 \n\t"
"xvcmpgtdp 2,0, 1 \n\t"
"xvcmpgtdp 3, 45,47 \n\t"
"addi %[ptr_tmp] ,%[ptr_tmp] , 128 \n\t"
@@ -125,7 +125,7 @@ static BLASLONG diamin_kernel_32(BLASLONG n, FLOAT *x, FLOAT *minf) {
"lxvd2x 47, %[i48],%[ptr_tmp] \n\t"
//choose smaller from first and second part
"xvcmpgedp 4, 0,5 \n\t"
"xvcmpgtdp 4, 0,5 \n\t"
"xxsel 3, 0,5,4 \n\t"
"xxsel 33,32,34,4 \n\t"
@@ -139,7 +139,7 @@ static BLASLONG diamin_kernel_32(BLASLONG n, FLOAT *x, FLOAT *minf) {
"lxvd2x 51,%[i112],%[ptr_tmp] \n\t"
//compare with previous to get vec_min_index(v6 | vs38 ) and vec_min_value (vs39)
"xvcmpgedp 2,39, 3 \n\t"
"xvcmpgtdp 2,39, 3 \n\t"
"xxsel 39,39,3,2 \n\t"
"xxsel 38,38,33,2 \n\t"
@@ -162,10 +162,10 @@ static BLASLONG diamin_kernel_32(BLASLONG n, FLOAT *x, FLOAT *minf) {
//<-----------jump here from first load
"2: \n\t"
"xvcmpgedp 2,44,45 \n\t "
"xvcmpgedp 3,46,47 \n\t "
"xvcmpgedp 4,48,49 \n\t "
"xvcmpgedp 5,50,51 \n\t"
"xvcmpgtdp 2,44,45 \n\t "
"xvcmpgtdp 3,46,47 \n\t "
"xvcmpgtdp 4,48,49 \n\t "
"xvcmpgtdp 5,50,51 \n\t"
"xxsel 32,40,41,2 \n\t"
"xxsel 0,44,45,2 \n\t"
@@ -176,8 +176,8 @@ static BLASLONG diamin_kernel_32(BLASLONG n, FLOAT *x, FLOAT *minf) {
"xxsel 35,42,43,5 \n\t"
"xxsel 47,50,51,5 \n\t"
"xvcmpgedp 2,0, 1 \n\t"
"xvcmpgedp 3, 45,47 \n\t"
"xvcmpgtdp 2,0, 1 \n\t"
"xvcmpgtdp 3, 45,47 \n\t"
"xxsel 32,32,33,2 \n\t"
"xxsel 0 ,0,1,2 \n\t"
"xxsel 34,34,35,3 \n\t"
@@ -194,7 +194,7 @@ static BLASLONG diamin_kernel_32(BLASLONG n, FLOAT *x, FLOAT *minf) {
"lxvd2x 47, %[i48],%[ptr_tmp] \n\t"
//choose smaller from first and second part
"xvcmpgedp 4, 0,5 \n\t"
"xvcmpgtdp 4, 0,5 \n\t"
"xxsel 3, 0,5,4 \n\t"
"xxsel 33,32,34,4 \n\t"
@@ -210,7 +210,7 @@ static BLASLONG diamin_kernel_32(BLASLONG n, FLOAT *x, FLOAT *minf) {
//compare with previous to get vec_min_index(v6 | vs38 ) and vec_min_value (vs39)
"xvcmpgedp 2,39, 3 \n\t"
"xvcmpgtdp 2,39, 3 \n\t"
"xxsel 39,39,3,2 \n\t"
"xxsel 38,38,33,2 \n\t"
@@ -238,10 +238,10 @@ static BLASLONG diamin_kernel_32(BLASLONG n, FLOAT *x, FLOAT *minf) {
//==============================================================================
"xvcmpgedp 2,44,45 \n\t "
"xvcmpgedp 3,46,47 \n\t "
"xvcmpgedp 4,48,49 \n\t "
"xvcmpgedp 5,50,51 \n\t"
"xvcmpgtdp 2,44,45 \n\t "
"xvcmpgtdp 3,46,47 \n\t "
"xvcmpgtdp 4,48,49 \n\t "
"xvcmpgtdp 5,50,51 \n\t"
"xxsel 32,40,41,2 \n\t"
"xxsel 0,44,45,2 \n\t"
@@ -252,8 +252,8 @@ static BLASLONG diamin_kernel_32(BLASLONG n, FLOAT *x, FLOAT *minf) {
"xxsel 35,42,43,5 \n\t"
"xxsel 47,50,51,5 \n\t"
"xvcmpgedp 2,0, 1 \n\t"
"xvcmpgedp 3, 45,47 \n\t"
"xvcmpgtdp 2,0, 1 \n\t"
"xvcmpgtdp 3, 45,47 \n\t"
"xxsel 32,32,33,2 \n\t"
@@ -264,14 +264,14 @@ static BLASLONG diamin_kernel_32(BLASLONG n, FLOAT *x, FLOAT *minf) {
// for {second 8 elements } we have to add 8 to each so that it became {from 8 to 16}
"vaddudm 2,2,4 \n\t" // vs34=vs34 + vs36{8,8}
//choose smaller from first and second part
"xvcmpgedp 4, 0,5 \n\t"
"xvcmpgtdp 4, 0,5 \n\t"
"xxsel 3, 0,5,4 \n\t"
"xxsel 33,32,34,4 \n\t"
"vaddudm 1,1,5 \n\t" // get real index for first smaller
//compare with previous to get vec_min_index(v6 | vs38 ) and vec_min_value (vs39)
"xvcmpgedp 2,39, 3 \n\t"
"xvcmpgtdp 2,39, 3 \n\t"
"xxsel 39,39,3,2 \n\t"
"xxsel 38,38,33,2 \n\t"
@@ -284,7 +284,7 @@ static BLASLONG diamin_kernel_32(BLASLONG n, FLOAT *x, FLOAT *minf) {
//cr6 0 bit set if all true, cr6=4*6+bit_ind=24,0011at CR(BI)==1, at=10 hint that it occurs rarely
//0b001110=14
"bc 14,24, 3f \n\t"
"xvcmpgedp 4,39, 40 \n\t"
"xvcmpgtdp 4,39, 40 \n\t"
"xxsel 0,39,40,4 \n\t"
"xxsel 1,38,32,4 \n\t"
"stxsdx 0,0,%[ptr_minf] \n\t"