updated cgemm- and ctrmm-kernel for POWER8

This commit is contained in:
Werner Saar 2016-04-03 14:30:49 +02:00
parent 12540cedb5
commit d4c0330967
5 changed files with 2698 additions and 2582 deletions

View File

@ -26,7 +26,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/ *****************************************************************************/
/************************************************************************************** /**************************************************************************************
* 2016/03/18 Werner Saar (wernsaar@googlemail.com) * 2016/04/03 Werner Saar (wernsaar@googlemail.com)
* BLASTEST : OK * BLASTEST : OK
* CTEST : OK * CTEST : OK
* TEST : OK * TEST : OK
@ -130,10 +130,14 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#endif #endif
#define o0 0 #define o0 0
#define alpha_r vs30
#define alpha_i vs31
#define TBUFFER r14 #define alpha_dr vs28
#define alpha_di vs29
#define alpha_sr vs30
#define alpha_si vs31
#define NOTUSED r14
#define L r15 #define L r15
#define o12 r16 #define o12 r16
#define o4 r17 #define o4 r17
@ -271,21 +275,20 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "cgemm_macros_8x4_power8.S" #include "cgemm_macros_8x4_power8.S"
cmpwi cr0, M, 0 cmpwi cr0, M, 0
ble .L999_H1 ble L999_H1
cmpwi cr0, N, 0 cmpwi cr0, N, 0
ble .L999_H1 ble L999_H1
cmpwi cr0, K, 0 cmpwi cr0, K, 0
ble .L999_H1 ble L999_H1
slwi LDC, LDC, ZBASE_SHIFT slwi LDC, LDC, ZBASE_SHIFT
li PRE, 256 li PRE, 384
li o4 , 4 li o4 , 4
li o8 , 8 li o8 , 8
li o12 , 12 li o12 , 12
li o16 , 16 li o16 , 16
li o32 , 32 li o32 , 32
li o48 , 48 li o48 , 48
addi TBUFFER, SP, 360
#ifdef __64BIT__ #ifdef __64BIT__
@ -294,14 +297,31 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
addi T1 , SP, 224 addi T1 , SP, 224
#endif #endif
lxsspx alpha_r, 0, T1 stxsspx vs1, 0, T1
lxsspx alpha_i, o8, T1 lxsspx alpha_dr, 0, T1
stxsspx vs2, o8 , T1
lxsspx alpha_di, o8, T1
addi T1, SP, 360
li T2, 0
stw T2, 0(T1)
stw T2, 4(T1)
stw T2, 8(T1)
stxsspx alpha_dr, o12, T1
lxvw4x alpha_sr, o0 , T1
addi T1, T1, 16
stw T2, 0(T1)
stw T2, 4(T1)
stw T2, 8(T1)
stxsspx alpha_di, o12, T1
lxvw4x alpha_si, o0 , T1
.align 5 .align 5
#include "cgemm_logic_8x4_power8.S" #include "cgemm_logic_8x4_power8.S"
.L999: L999:
addi r3, 0, 0 addi r3, 0, 0
lfd f14, 0(SP) lfd f14, 0(SP)

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -26,7 +26,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/ *****************************************************************************/
/************************************************************************************** /**************************************************************************************
* 2016/03/18 Werner Saar (wernsaar@googlemail.com) * 2016/04/03 Werner Saar (wernsaar@googlemail.com)
* BLASTEST : OK * BLASTEST : OK
* CTEST : OK * CTEST : OK
* TEST : OK * TEST : OK
@ -129,18 +129,18 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#endif #endif
#define o0 0 #define o0 0
#define alpha_r vs30
#define alpha_i vs31
#define alpha_vr vs28
#define alpha_vi vs29
#define alpha_dr vs28
#define alpha_di vs29
#define alpha_sr vs30
#define alpha_si vs31
#define o12 r12 #define o12 r12
#define KKK r13 #define KKK r13
#define K1 r14 #define K1 r14
#define L r15 #define L r15
#define o16 r16 #define o16 r16
#define TBUFFER r17 #define NOTUSED r17
#define T2 r19 #define T2 r19
#define KK r20 #define KK r20
#define o8 r21 #define o8 r21
@ -278,21 +278,20 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "cgemm_macros_8x4_power8.S" #include "cgemm_macros_8x4_power8.S"
cmpwi cr0, M, 0 cmpwi cr0, M, 0
ble .L999_H1 ble L999_H1
cmpwi cr0, N, 0 cmpwi cr0, N, 0
ble .L999_H1 ble L999_H1
cmpwi cr0, K, 0 cmpwi cr0, K, 0
ble .L999_H1 ble L999_H1
slwi LDC, LDC, ZBASE_SHIFT slwi LDC, LDC, ZBASE_SHIFT
li PRE, 256 li PRE, 384
li o4 , 4 li o4 , 4
li o8 , 8 li o8 , 8
li o12 , 12 li o12 , 12
li o16 , 16 li o16 , 16
li o32 , 32 li o32 , 32
li o48 , 48 li o48 , 48
addi TBUFFER, SP, 360
#ifdef __64BIT__ #ifdef __64BIT__
@ -301,14 +300,29 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
addi T1, SP, 224 addi T1, SP, 224
#endif #endif
lxsspx alpha_r, 0, T1 lxsspx alpha_dr, 0, T1
lxsspx alpha_i, o8, T1 lxsspx alpha_di, o8, T1
addi T1, SP, 360
li T2, 0
stw T2, 0(T1)
stw T2, 4(T1)
stw T2, 8(T1)
stxsspx alpha_dr, o12, T1
lxvw4x alpha_sr, o0 , T1
addi T1, T1, 16
stw T2, 0(T1)
stw T2, 4(T1)
stw T2, 8(T1)
stxsspx alpha_di, o12, T1
lxvw4x alpha_si, o0 , T1
.align 5 .align 5
#include "ctrmm_logic_8x4_power8.S" #include "ctrmm_logic_8x4_power8.S"
.L999: L999:
addi r3, 0, 0 addi r3, 0, 0
lfd f14, 0(SP) lfd f14, 0(SP)

File diff suppressed because it is too large Load Diff