AIX changes for Power8

This commit is contained in:
Kavana Bhat
2019-08-20 06:51:35 -05:00
parent 2a43062de7
commit 3dc6b26eff
48 changed files with 9263 additions and 996 deletions

View File

@@ -54,8 +54,8 @@ static BLASLONG ziamin_kernel_16_TUNED(BLASLONG n, FLOAT *x, FLOAT *minf) {
"lxvd2x 47, %[i48],%[ptr_tmp] \n\t"
"lxvd2x 48, %[i64],%[ptr_tmp] \n\t"
"lxvd2x 49, %[i80],%[ptr_tmp] \n\t"
"lxvd2x 50, %[i96],%[ptr_tmp] \n\t"
"lxvd2x 51,%[i112],%[ptr_tmp] \n\t"
"lxvd2x 6, %[i96],%[ptr_tmp] \n\t"
"lxvd2x 7,%[i112],%[ptr_tmp] \n\t"
"xxlor 40,%x[start],%x[start] \n\t" //{ 1,0} vs40 | v8
"vaddudm 9,8,%[adder] \n\t" //{3,2} vs41
@@ -65,7 +65,7 @@ static BLASLONG ziamin_kernel_16_TUNED(BLASLONG n, FLOAT *x, FLOAT *minf) {
"vaddudm 11,10,%[adder] \n\t" //{7,6} vs43
"lxvdsx 39,0,%[ptr_minf] \n\t" // vs39 vec_min_value
"vaddudm 4,11, %[adder] \n\t" // {9,8} -{8;8} vs36 | v4
"xxspltd 36,36,0 \n\t"
XXSPLTD_S(36,36,0)
@@ -75,24 +75,24 @@ static BLASLONG ziamin_kernel_16_TUNED(BLASLONG n, FLOAT *x, FLOAT *minf) {
"xvabsdp 47, 47 \n\t"
"xvabsdp 48, 48 \n\t"
"xvabsdp 49, 49 \n\t"
"xvabsdp 50, 50 \n\t"
"xvabsdp 51, 51 \n\t"
"xvabsdp 6, 6 \n\t"
"xvabsdp 7, 7 \n\t"
//jump first half forward
"b 2f \n\t"
"b two%= \n\t"
".p2align 5 \n\t"
"1: \n\t"
".align 5 \n\t"
"one%=: \n\t"
"xxmrghd 0,44,45 \n\t"
"xxmrgld 1,44,45 \n\t"
"xxmrghd 2,46,47 \n\t"
"xxmrgld 3,46,47 \n\t"
"xxmrghd 4,48,49 \n\t"
"xxmrgld 5,48,49 \n\t"
"xxmrghd 44,50,51 \n\t"
"xxmrgld 45,50,51 \n\t"
XXMRGHD_S(0,44,45)
XXMRGLD_S(1,44,45)
XXMRGHD_S(2,46,47)
XXMRGLD_S(3,46,47)
XXMRGHD_S(4,48,49)
XXMRGLD_S(5,48,49)
XXMRGHD_S(44,6,7)
XXMRGLD_S(45,6,7)
"xvadddp 46, 0,1 \n\t"
"xvadddp 47, 2,3 \n\t"
@@ -101,15 +101,15 @@ static BLASLONG ziamin_kernel_16_TUNED(BLASLONG n, FLOAT *x, FLOAT *minf) {
"xvcmpgtdp 50,46,47 \n\t "
"xvcmpgtdp 51,48,49 \n\t "
"xvcmpgtdp 6,46,47 \n\t "
"xvcmpgtdp 7,48,49 \n\t "
"addi %[ptr_tmp] ,%[ptr_tmp] , 128 \n\t"
"xxsel 32,40,41,50 \n\t"
"xxsel 0,46,47,50 \n\t"
"xxsel 33,42,43,51 \n\t"
"xxsel 1,48,49,51 \n\t"
"xxsel 32,40,41,6 \n\t"
"xxsel 0,46,47,6 \n\t"
"xxsel 33,42,43,7 \n\t"
"xxsel 1,48,49,7 \n\t"
"lxvd2x 44, 0,%[ptr_tmp] \n\t"
"lxvd2x 45, %[i16],%[ptr_tmp] \n\t"
@@ -131,8 +131,8 @@ static BLASLONG ziamin_kernel_16_TUNED(BLASLONG n, FLOAT *x, FLOAT *minf) {
"lxvd2x 48, %[i64],%[ptr_tmp] \n\t"
"lxvd2x 49, %[i80],%[ptr_tmp] \n\t"
"lxvd2x 50, %[i96],%[ptr_tmp] \n\t"
"lxvd2x 51,%[i112],%[ptr_tmp] \n\t"
"lxvd2x 6, %[i96],%[ptr_tmp] \n\t"
"lxvd2x 7,%[i112],%[ptr_tmp] \n\t"
//select with previous
"xxsel 38,38,32,4 \n\t"
"xxsel 39,39,3,4 \n\t"
@@ -146,35 +146,35 @@ static BLASLONG ziamin_kernel_16_TUNED(BLASLONG n, FLOAT *x, FLOAT *minf) {
"xvabsdp 47, 47 \n\t"
"xvabsdp 48, 48 \n\t"
"xvabsdp 49, 49 \n\t"
"xvabsdp 50, 50 \n\t"
"xvabsdp 51, 51 \n\t"
"xvabsdp 6, 6 \n\t"
"xvabsdp 7, 7 \n\t"
//>>/////////////////////////////// half start
"2: \n\t"
"xxmrghd 0,44,45 \n\t"
"xxmrgld 1,44,45 \n\t"
"xxmrghd 2,46,47 \n\t"
"xxmrgld 3,46,47 \n\t"
"xxmrghd 4,48,49 \n\t"
"xxmrgld 5,48,49 \n\t"
"xxmrghd 44,50,51 \n\t"
"xxmrgld 45,50,51 \n\t"
"two%=: \n\t"
XXMRGHD_S(0,44,45)
XXMRGLD_S(1,44,45)
XXMRGHD_S(2,46,47)
XXMRGLD_S(3,46,47)
XXMRGHD_S(4,48,49)
XXMRGLD_S(5,48,49)
XXMRGHD_S(44,6,7)
XXMRGLD_S(45,6,7)
"xvadddp 46, 0,1 \n\t"
"xvadddp 47, 2,3 \n\t"
"xvadddp 48, 4,5 \n\t"
"xvadddp 49, 44,45 \n\t"
"xvcmpgtdp 50,46,47 \n\t "
"xvcmpgtdp 51,48,49 \n\t "
"xvcmpgtdp 6,46,47 \n\t "
"xvcmpgtdp 7,48,49 \n\t "
"addi %[ptr_tmp] ,%[ptr_tmp] , 128 \n\t"
"xxsel 32,40,41,50 \n\t"
"xxsel 0,46,47,50 \n\t"
"xxsel 33,42,43,51 \n\t"
"xxsel 1,48,49,51 \n\t"
"xxsel 32,40,41,6 \n\t"
"xxsel 0,46,47,6 \n\t"
"xxsel 33,42,43,7 \n\t"
"xxsel 1,48,49,7 \n\t"
"lxvd2x 44, 0,%[ptr_tmp] \n\t"
"lxvd2x 45, %[i16],%[ptr_tmp] \n\t"
@@ -196,8 +196,8 @@ static BLASLONG ziamin_kernel_16_TUNED(BLASLONG n, FLOAT *x, FLOAT *minf) {
"lxvd2x 48, %[i64],%[ptr_tmp] \n\t"
"lxvd2x 49, %[i80],%[ptr_tmp] \n\t"
"lxvd2x 50, %[i96],%[ptr_tmp] \n\t"
"lxvd2x 51,%[i112],%[ptr_tmp] \n\t"
"lxvd2x 6, %[i96],%[ptr_tmp] \n\t"
"lxvd2x 7,%[i112],%[ptr_tmp] \n\t"
//select with previous
"xxsel 38,38,32,4 \n\t"
"xxsel 39,39,3,4 \n\t"
@@ -209,24 +209,24 @@ static BLASLONG ziamin_kernel_16_TUNED(BLASLONG n, FLOAT *x, FLOAT *minf) {
"xvabsdp 47, 47 \n\t"
"xvabsdp 48, 48 \n\t"
"xvabsdp 49, 49 \n\t"
"xvabsdp 50, 50 \n\t"
"xvabsdp 51, 51 \n\t"
"xvabsdp 6, 6 \n\t"
"xvabsdp 7, 7 \n\t"
//decrement n
"addic. %[n], %[n], -16 \n\t"
//Loop back if >0
"bgt+ 1b \n\t"
"bgt+ one%= \n\t"
"xxmrghd 0,44,45 \n\t"
"xxmrgld 1,44,45 \n\t"
"xxmrghd 2,46,47 \n\t"
"xxmrgld 3,46,47 \n\t"
"xxmrghd 4,48,49 \n\t"
"xxmrgld 5,48,49 \n\t"
"xxmrghd 44,50,51 \n\t"
"xxmrgld 45,50,51 \n\t"
XXMRGHD_S(0,44,45)
XXMRGLD_S(1,44,45)
XXMRGHD_S(2,46,47)
XXMRGLD_S(3,46,47)
XXMRGHD_S(4,48,49)
XXMRGLD_S(5,48,49)
XXMRGHD_S(44,6,7)
XXMRGLD_S(45,6,7)
"xvadddp 46, 0,1 \n\t"
"xvadddp 47, 2,3 \n\t"
@@ -235,13 +235,13 @@ static BLASLONG ziamin_kernel_16_TUNED(BLASLONG n, FLOAT *x, FLOAT *minf) {
"xvcmpgtdp 50,46,47 \n\t "
"xvcmpgtdp 51,48,49 \n\t "
"xvcmpgtdp 6,46,47 \n\t "
"xvcmpgtdp 7,48,49 \n\t "
"xxsel 32,40,41,50 \n\t"
"xxsel 0,46,47,50 \n\t"
"xxsel 33,42,43,51 \n\t"
"xxsel 1,48,49,51 \n\t"
"xxsel 32,40,41,6 \n\t"
"xxsel 0,46,47,6 \n\t"
"xxsel 33,42,43,7 \n\t"
"xxsel 1,48,49,7 \n\t"
"xvcmpgtdp 2,0,1 \n\t "
"xxsel 32,32,33,2 \n\t"
@@ -260,28 +260,28 @@ static BLASLONG ziamin_kernel_16_TUNED(BLASLONG n, FLOAT *x, FLOAT *minf) {
///////extract min value and min index from vector
"xxspltd 32,38,1 \n\t"
"xxspltd 40,39,1 \n\t"
XXSPLTD_S(32,38,1)
XXSPLTD_S(40,39,1)
"xvcmpeqdp. 2, 40,39 \n\t"
//cr6 0 bit set if all true, cr6=4*6+bit_ind=24,0011at CR(BI)==1, at=10 hint that it occurs rarely
//0b001110=14
"bc 14,24, 3f \n\t"
"bc 14,24, three%= \n\t"
"xvcmpgtdp 4,39, 40 \n\t"
"xxsel 0,39,40,4 \n\t"
"xxsel 1,38,32,4 \n\t"
"stxsdx 0,0,%[ptr_minf] \n\t"
"b 4f \n\t"
"b four%= \n\t"
"3: \n\t"
"three%=: \n\t"
//if elements value are equal then choose minimum index
"xxspltd 0,40,0 \n\t"
XXSPLTD_S(0,40,0)
"vminud 0,0,6 \n\t" //vs32 vs38
"xxlor 1,32,32 \n\t"
"stxsdx 0,0,%[ptr_minf] \n\t"
"4: \n\t"
"four%=: \n\t"
"mfvsrd %[index],1 \n\t"
: [minf] "=m"(*minf),[ptr_tmp] "+&b"(x),[index] "=r"(index), [n] "+&r"(n)
@@ -290,7 +290,7 @@ static BLASLONG ziamin_kernel_16_TUNED(BLASLONG n, FLOAT *x, FLOAT *minf) {
[i64] "b"(64), [i80] "b"(80), [i96] "b"(96), [i112] "b"(112),
[start] "v"(start), [adder] "v"(temp_add_index)
: "cc", "vs0", "vs1","vs2","vs3", "vs4","vs5","vs32", "vs33", "vs34", "vs35", "vs36",
"vs37", "vs38", "vs39", "vs40", "vs41", "vs42", "vs43", "vs44", "vs45", "vs46", "vs47", "vs48", "vs49", "vs50", "vs51"
"vs37", "vs38", "vs39", "vs40", "vs41", "vs42", "vs43", "vs44", "vs45", "vs46", "vs47", "vs48", "vs49", "vs6", "vs7"
);
return index;