updated cgemm- and ctrmm-kernel for POWER8
This commit is contained in:
parent
12540cedb5
commit
d4c0330967
|
@ -26,11 +26,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
*****************************************************************************/
|
||||
|
||||
/**************************************************************************************
|
||||
* 2016/03/18 Werner Saar (wernsaar@googlemail.com)
|
||||
* 2016/04/03 Werner Saar (wernsaar@googlemail.com)
|
||||
* BLASTEST : OK
|
||||
* CTEST : OK
|
||||
* TEST : OK
|
||||
* LAPACK-TEST : OK
|
||||
* LAPACK-TEST : OK
|
||||
**************************************************************************************/
|
||||
|
||||
/*********************************************************************/
|
||||
|
@ -130,10 +130,14 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#endif
|
||||
|
||||
#define o0 0
|
||||
#define alpha_r vs30
|
||||
#define alpha_i vs31
|
||||
|
||||
#define TBUFFER r14
|
||||
#define alpha_dr vs28
|
||||
#define alpha_di vs29
|
||||
#define alpha_sr vs30
|
||||
#define alpha_si vs31
|
||||
|
||||
|
||||
#define NOTUSED r14
|
||||
#define L r15
|
||||
#define o12 r16
|
||||
#define o4 r17
|
||||
|
@ -271,21 +275,20 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#include "cgemm_macros_8x4_power8.S"
|
||||
|
||||
cmpwi cr0, M, 0
|
||||
ble .L999_H1
|
||||
ble L999_H1
|
||||
cmpwi cr0, N, 0
|
||||
ble .L999_H1
|
||||
ble L999_H1
|
||||
cmpwi cr0, K, 0
|
||||
ble .L999_H1
|
||||
ble L999_H1
|
||||
|
||||
slwi LDC, LDC, ZBASE_SHIFT
|
||||
li PRE, 256
|
||||
li PRE, 384
|
||||
li o4 , 4
|
||||
li o8 , 8
|
||||
li o12 , 12
|
||||
li o16 , 16
|
||||
li o32 , 32
|
||||
li o48 , 48
|
||||
addi TBUFFER, SP, 360
|
||||
|
||||
|
||||
#ifdef __64BIT__
|
||||
|
@ -294,14 +297,31 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
addi T1 , SP, 224
|
||||
#endif
|
||||
|
||||
lxsspx alpha_r, 0, T1
|
||||
lxsspx alpha_i, o8, T1
|
||||
stxsspx vs1, 0, T1
|
||||
lxsspx alpha_dr, 0, T1
|
||||
stxsspx vs2, o8 , T1
|
||||
lxsspx alpha_di, o8, T1
|
||||
addi T1, SP, 360
|
||||
li T2, 0
|
||||
|
||||
stw T2, 0(T1)
|
||||
stw T2, 4(T1)
|
||||
stw T2, 8(T1)
|
||||
stxsspx alpha_dr, o12, T1
|
||||
lxvw4x alpha_sr, o0 , T1
|
||||
addi T1, T1, 16
|
||||
|
||||
stw T2, 0(T1)
|
||||
stw T2, 4(T1)
|
||||
stw T2, 8(T1)
|
||||
stxsspx alpha_di, o12, T1
|
||||
lxvw4x alpha_si, o0 , T1
|
||||
|
||||
.align 5
|
||||
|
||||
#include "cgemm_logic_8x4_power8.S"
|
||||
|
||||
.L999:
|
||||
L999:
|
||||
addi r3, 0, 0
|
||||
|
||||
lfd f14, 0(SP)
|
||||
|
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -26,11 +26,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
*****************************************************************************/
|
||||
|
||||
/**************************************************************************************
|
||||
* 2016/03/18 Werner Saar (wernsaar@googlemail.com)
|
||||
* 2016/04/03 Werner Saar (wernsaar@googlemail.com)
|
||||
* BLASTEST : OK
|
||||
* CTEST : OK
|
||||
* TEST : OK
|
||||
* LAPACK-TEST : OK
|
||||
* LAPACK-TEST : OK
|
||||
**************************************************************************************/
|
||||
|
||||
/*********************************************************************/
|
||||
|
@ -129,18 +129,18 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#endif
|
||||
|
||||
#define o0 0
|
||||
#define alpha_r vs30
|
||||
#define alpha_i vs31
|
||||
#define alpha_vr vs28
|
||||
#define alpha_vi vs29
|
||||
|
||||
#define alpha_dr vs28
|
||||
#define alpha_di vs29
|
||||
#define alpha_sr vs30
|
||||
#define alpha_si vs31
|
||||
|
||||
#define o12 r12
|
||||
#define KKK r13
|
||||
#define K1 r14
|
||||
#define L r15
|
||||
#define o16 r16
|
||||
#define TBUFFER r17
|
||||
#define NOTUSED r17
|
||||
#define T2 r19
|
||||
#define KK r20
|
||||
#define o8 r21
|
||||
|
@ -278,21 +278,20 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#include "cgemm_macros_8x4_power8.S"
|
||||
|
||||
cmpwi cr0, M, 0
|
||||
ble .L999_H1
|
||||
ble L999_H1
|
||||
cmpwi cr0, N, 0
|
||||
ble .L999_H1
|
||||
ble L999_H1
|
||||
cmpwi cr0, K, 0
|
||||
ble .L999_H1
|
||||
ble L999_H1
|
||||
|
||||
slwi LDC, LDC, ZBASE_SHIFT
|
||||
li PRE, 256
|
||||
li PRE, 384
|
||||
li o4 , 4
|
||||
li o8 , 8
|
||||
li o12 , 12
|
||||
li o16 , 16
|
||||
li o32 , 32
|
||||
li o48 , 48
|
||||
addi TBUFFER, SP, 360
|
||||
|
||||
|
||||
#ifdef __64BIT__
|
||||
|
@ -301,14 +300,29 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
addi T1, SP, 224
|
||||
#endif
|
||||
|
||||
lxsspx alpha_r, 0, T1
|
||||
lxsspx alpha_i, o8, T1
|
||||
lxsspx alpha_dr, 0, T1
|
||||
lxsspx alpha_di, o8, T1
|
||||
addi T1, SP, 360
|
||||
li T2, 0
|
||||
|
||||
stw T2, 0(T1)
|
||||
stw T2, 4(T1)
|
||||
stw T2, 8(T1)
|
||||
stxsspx alpha_dr, o12, T1
|
||||
lxvw4x alpha_sr, o0 , T1
|
||||
addi T1, T1, 16
|
||||
|
||||
stw T2, 0(T1)
|
||||
stw T2, 4(T1)
|
||||
stw T2, 8(T1)
|
||||
stxsspx alpha_di, o12, T1
|
||||
lxvw4x alpha_si, o0 , T1
|
||||
|
||||
.align 5
|
||||
|
||||
#include "ctrmm_logic_8x4_power8.S"
|
||||
|
||||
.L999:
|
||||
L999:
|
||||
addi r3, 0, 0
|
||||
|
||||
lfd f14, 0(SP)
|
||||
|
|
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue