Merge pull request #2338 from kavanabhat/aix_mod

Changes to build on AIX in POWER8 mode
This commit is contained in:
Martin Kroeker
2019-12-09 17:54:49 +01:00
committed by GitHub
48 changed files with 9389 additions and 955 deletions

View File

@@ -56,8 +56,8 @@ static BLASLONG ziamax_kernel_16(BLASLONG n, FLOAT *x, FLOAT *maxf) {
"lxvd2x 47, %[i48],%[ptr_tmp] \n\t"
"lxvd2x 48, %[i64],%[ptr_tmp] \n\t"
"lxvd2x 49, %[i80],%[ptr_tmp] \n\t"
"lxvd2x 50, %[i96],%[ptr_tmp] \n\t"
"lxvd2x 51,%[i112],%[ptr_tmp] \n\t"
"lxvd2x 6, %[i96],%[ptr_tmp] \n\t"
"lxvd2x 7,%[i112],%[ptr_tmp] \n\t"
"xxlor 40,%x[start],%x[start] \n\t" //{ 1,0} vs40 | v8
"vaddudm 9,8,%[adder] \n\t" //{3,2} vs41
@@ -67,7 +67,7 @@ static BLASLONG ziamax_kernel_16(BLASLONG n, FLOAT *x, FLOAT *maxf) {
"vaddudm 11,10,%[adder] \n\t" //{7,6} vs43
"xxlxor 39,39,39 \n\t" // vs39 vec_max_value is zero
"vaddudm 4,11, %[adder] \n\t" // {9,8} -{8;8} vs36 | v4
"xxspltd 36,36,0 \n\t"
XXSPLTD_S(36,36,0)
@@ -77,24 +77,24 @@ static BLASLONG ziamax_kernel_16(BLASLONG n, FLOAT *x, FLOAT *maxf) {
"xvabsdp 47, 47 \n\t"
"xvabsdp 48, 48 \n\t"
"xvabsdp 49, 49 \n\t"
"xvabsdp 50, 50 \n\t"
"xvabsdp 51, 51 \n\t"
"xvabsdp 6, 6 \n\t"
"xvabsdp 7, 7 \n\t"
//jump first half forward
"b 2f \n\t"
"b two%= \n\t"
".p2align 5 \n\t"
"1: \n\t"
".align 5 \n\t"
"one%=: \n\t"
"xxmrghd 0,44,45 \n\t"
"xxmrgld 1,44,45 \n\t"
"xxmrghd 2,46,47 \n\t"
"xxmrgld 3,46,47 \n\t"
"xxmrghd 4,48,49 \n\t"
"xxmrgld 5,48,49 \n\t"
"xxmrghd 44,50,51 \n\t"
"xxmrgld 45,50,51 \n\t"
XXMRGHD_S(0,44,45)
XXMRGLD_S(1,44,45)
XXMRGHD_S(2,46,47)
XXMRGLD_S(3,46,47)
XXMRGHD_S(4,48,49)
XXMRGLD_S(5,48,49)
XXMRGHD_S(44,6,7)
XXMRGLD_S(45,6,7)
"xvadddp 46, 0,1 \n\t"
"xvadddp 47, 2,3 \n\t"
@@ -103,15 +103,15 @@ static BLASLONG ziamax_kernel_16(BLASLONG n, FLOAT *x, FLOAT *maxf) {
"xvcmpgtdp 50,47,46 \n\t "
"xvcmpgtdp 51,49,48 \n\t "
"xvcmpgtdp 6,47,46 \n\t "
"xvcmpgtdp 7,49,48 \n\t "
"addi %[ptr_tmp] ,%[ptr_tmp] , 128 \n\t"
"xxsel 32,40,41,50 \n\t"
"xxsel 0,46,47,50 \n\t"
"xxsel 33,42,43,51 \n\t"
"xxsel 1,48,49,51 \n\t"
"xxsel 32,40,41,6 \n\t"
"xxsel 0,46,47,6 \n\t"
"xxsel 33,42,43,7 \n\t"
"xxsel 1,48,49,7 \n\t"
"lxvd2x 44, 0,%[ptr_tmp] \n\t"
"lxvd2x 45, %[i16],%[ptr_tmp] \n\t"
@@ -133,8 +133,8 @@ static BLASLONG ziamax_kernel_16(BLASLONG n, FLOAT *x, FLOAT *maxf) {
"lxvd2x 48, %[i64],%[ptr_tmp] \n\t"
"lxvd2x 49, %[i80],%[ptr_tmp] \n\t"
"lxvd2x 50, %[i96],%[ptr_tmp] \n\t"
"lxvd2x 51,%[i112],%[ptr_tmp] \n\t"
"lxvd2x 6, %[i96],%[ptr_tmp] \n\t"
"lxvd2x 7,%[i112],%[ptr_tmp] \n\t"
//select with previous
"xxsel 38,38,32,4 \n\t"
"xxsel 39,39,3,4 \n\t"
@@ -148,35 +148,35 @@ static BLASLONG ziamax_kernel_16(BLASLONG n, FLOAT *x, FLOAT *maxf) {
"xvabsdp 47, 47 \n\t"
"xvabsdp 48, 48 \n\t"
"xvabsdp 49, 49 \n\t"
"xvabsdp 50, 50 \n\t"
"xvabsdp 51, 51 \n\t"
"xvabsdp 6, 6 \n\t"
"xvabsdp 7, 7 \n\t"
//>>/////////////////////////////// half start
"2: \n\t"
"xxmrghd 0,44,45 \n\t"
"xxmrgld 1,44,45 \n\t"
"xxmrghd 2,46,47 \n\t"
"xxmrgld 3,46,47 \n\t"
"xxmrghd 4,48,49 \n\t"
"xxmrgld 5,48,49 \n\t"
"xxmrghd 44,50,51 \n\t"
"xxmrgld 45,50,51 \n\t"
"two%=: \n\t"
XXMRGHD_S(0,44,45)
XXMRGLD_S(1,44,45)
XXMRGHD_S(2,46,47)
XXMRGLD_S(3,46,47)
XXMRGHD_S(4,48,49)
XXMRGLD_S(5,48,49)
XXMRGHD_S(44,6,7)
XXMRGLD_S(45,6,7)
"xvadddp 46, 0,1 \n\t"
"xvadddp 47, 2,3 \n\t"
"xvadddp 48, 4,5 \n\t"
"xvadddp 49, 44,45 \n\t"
"xvcmpgtdp 50,47,46 \n\t "
"xvcmpgtdp 51,49,48 \n\t "
"xvcmpgtdp 6,47,46 \n\t "
"xvcmpgtdp 7,49,48 \n\t "
"addi %[ptr_tmp] ,%[ptr_tmp] , 128 \n\t"
"xxsel 32,40,41,50 \n\t"
"xxsel 0,46,47,50 \n\t"
"xxsel 33,42,43,51 \n\t"
"xxsel 1,48,49,51 \n\t"
"xxsel 32,40,41,6 \n\t"
"xxsel 0,46,47,6 \n\t"
"xxsel 33,42,43,7 \n\t"
"xxsel 1,48,49,7 \n\t"
"lxvd2x 44, 0,%[ptr_tmp] \n\t"
"lxvd2x 45, %[i16],%[ptr_tmp] \n\t"
@@ -198,8 +198,8 @@ static BLASLONG ziamax_kernel_16(BLASLONG n, FLOAT *x, FLOAT *maxf) {
"lxvd2x 48, %[i64],%[ptr_tmp] \n\t"
"lxvd2x 49, %[i80],%[ptr_tmp] \n\t"
"lxvd2x 50, %[i96],%[ptr_tmp] \n\t"
"lxvd2x 51,%[i112],%[ptr_tmp] \n\t"
"lxvd2x 6, %[i96],%[ptr_tmp] \n\t"
"lxvd2x 7,%[i112],%[ptr_tmp] \n\t"
//select with previous
"xxsel 38,38,32,4 \n\t"
"xxsel 39,39,3,4 \n\t"
@@ -211,24 +211,24 @@ static BLASLONG ziamax_kernel_16(BLASLONG n, FLOAT *x, FLOAT *maxf) {
"xvabsdp 47, 47 \n\t"
"xvabsdp 48, 48 \n\t"
"xvabsdp 49, 49 \n\t"
"xvabsdp 50, 50 \n\t"
"xvabsdp 51, 51 \n\t"
"xvabsdp 6, 6 \n\t"
"xvabsdp 7, 7 \n\t"
//decrement n
"addic. %[n], %[n], -16 \n\t"
//Loop back if >0
"bgt+ 1b \n\t"
"bgt+ one%= \n\t"
"xxmrghd 0,44,45 \n\t"
"xxmrgld 1,44,45 \n\t"
"xxmrghd 2,46,47 \n\t"
"xxmrgld 3,46,47 \n\t"
"xxmrghd 4,48,49 \n\t"
"xxmrgld 5,48,49 \n\t"
"xxmrghd 44,50,51 \n\t"
"xxmrgld 45,50,51 \n\t"
XXMRGHD_S(0,44,45)
XXMRGLD_S(1,44,45)
XXMRGHD_S(2,46,47)
XXMRGLD_S(3,46,47)
XXMRGHD_S(4,48,49)
XXMRGLD_S(5,48,49)
XXMRGHD_S(44,6,7)
XXMRGLD_S(45,6,7)
"xvadddp 46, 0,1 \n\t"
"xvadddp 47, 2,3 \n\t"
@@ -237,13 +237,13 @@ static BLASLONG ziamax_kernel_16(BLASLONG n, FLOAT *x, FLOAT *maxf) {
"xvcmpgtdp 50,47,46 \n\t "
"xvcmpgtdp 51,49,48 \n\t "
"xvcmpgtdp 6,47,46 \n\t "
"xvcmpgtdp 7,49,48 \n\t "
"xxsel 32,40,41,50 \n\t"
"xxsel 0,46,47,50 \n\t"
"xxsel 33,42,43,51 \n\t"
"xxsel 1,48,49,51 \n\t"
"xxsel 32,40,41,6 \n\t"
"xxsel 0,46,47,6 \n\t"
"xxsel 33,42,43,7 \n\t"
"xxsel 1,48,49,7 \n\t"
"xvcmpgtdp 2,1,0 \n\t "
"xxsel 32,32,33,2 \n\t"
@@ -262,28 +262,28 @@ static BLASLONG ziamax_kernel_16(BLASLONG n, FLOAT *x, FLOAT *maxf) {
///////extract max value and max index from vector
"xxspltd 32,38,1 \n\t"
"xxspltd 40,39,1 \n\t"
XXSPLTD_S(32,38,1)
XXSPLTD_S(40,39,1)
"xvcmpeqdp. 2, 40,39 \n\t"
//cr6 0 bit set if all true, cr6=4*6+bit_ind=24,0011at CR(BI)==1, at=10 hint that it occurs rarely
//0b001110=14
"bc 14,24, 3f \n\t"
"bc 14,24, three%= \n\t"
"xvcmpgtdp 4, 40,39 \n\t"
"xxsel 0,39,40,4 \n\t"
"xxsel 1,38,32,4 \n\t"
"stxsdx 0,0,%[ptr_maxf] \n\t"
"b 4f \n\t"
"b four%= \n\t"
"3: \n\t"
"three%=: \n\t"
//if elements value are equal then choose minimum index
"xxspltd 0,40,0 \n\t"
XXSPLTD_S(0,40,0)
"vminud 0,0,6 \n\t" //vs32 vs38
"xxlor 1,32,32 \n\t"
"stxsdx 0,0,%[ptr_maxf] \n\t"
"4: \n\t"
"four%=: \n\t"
"mfvsrd %[index],1 \n\t"
: [maxf] "=m"(*maxf),[ptr_tmp] "+&b"(x),[index] "=r"(index), [n] "+&r"(n)
@@ -292,7 +292,7 @@ static BLASLONG ziamax_kernel_16(BLASLONG n, FLOAT *x, FLOAT *maxf) {
[i64] "b"(64), [i80] "b"(80), [i96] "b"(96), [i112] "b"(112),
[start] "v"(start), [adder] "v"(temp_add_index)
: "cc", "vs0", "vs1","vs2","vs3", "vs4","vs5","vs32", "vs33", "vs34", "vs35", "vs36",
"vs37", "vs38", "vs39", "vs40", "vs41", "vs42", "vs43", "vs44", "vs45", "vs46", "vs47", "vs48", "vs49", "vs50", "vs51"
"vs37", "vs38", "vs39", "vs40", "vs41", "vs42", "vs43", "vs44", "vs45", "vs46", "vs47", "vs48", "vs49", "vs6", "vs7"
);
return index;