updated sgemm- and strmm-kernel for POWER8
This commit is contained in:
parent
68a69c5b50
commit
6a9bbfc227
|
@ -26,11 +26,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
*****************************************************************************/
|
*****************************************************************************/
|
||||||
|
|
||||||
/**************************************************************************************
|
/**************************************************************************************
|
||||||
* 2016/03/18 Werner Saar (wernsaar@googlemail.com)
|
* 2016/04/02 Werner Saar (wernsaar@googlemail.com)
|
||||||
* BLASTEST : OK
|
* BLASTEST : OK
|
||||||
* CTEST : OK
|
* CTEST : OK
|
||||||
* TEST : OK
|
* TEST : OK
|
||||||
* LAPACK-TEST : OK
|
* LAPACK-TEST : OK
|
||||||
**************************************************************************************/
|
**************************************************************************************/
|
||||||
|
|
||||||
/*********************************************************************/
|
/*********************************************************************/
|
||||||
|
@ -128,17 +128,18 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#define alpha_r vs30
|
#define alpha_r vs30
|
||||||
|
#define alpha_vr vs31
|
||||||
|
|
||||||
#define o0 0
|
#define o0 0
|
||||||
|
|
||||||
#define TBUFFER r14
|
#define BBUFFER r14
|
||||||
#define o4 r15
|
#define o4 r15
|
||||||
#define o12 r16
|
#define o12 r16
|
||||||
#define o8 r17
|
#define o8 r17
|
||||||
#define L r18
|
#define L r18
|
||||||
#define T1 r19
|
#define T1 r19
|
||||||
#define KK r20
|
#define KK r20
|
||||||
#define BB r21
|
#define BBO r21
|
||||||
#define I r22
|
#define I r22
|
||||||
#define J r23
|
#define J r23
|
||||||
#define AO r24
|
#define AO r24
|
||||||
|
@ -256,11 +257,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
|
|
||||||
cmpwi cr0, M, 0
|
cmpwi cr0, M, 0
|
||||||
ble .L999_H1
|
ble L999_H1
|
||||||
cmpwi cr0, N, 0
|
cmpwi cr0, N, 0
|
||||||
ble .L999_H1
|
ble L999_H1
|
||||||
cmpwi cr0, K, 0
|
cmpwi cr0, K, 0
|
||||||
ble .L999_H1
|
ble L999_H1
|
||||||
|
|
||||||
li PRE, 256
|
li PRE, 256
|
||||||
li o4 , 4
|
li o4 , 4
|
||||||
|
@ -269,18 +270,25 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
li o16, 16
|
li o16, 16
|
||||||
li o32, 32
|
li o32, 32
|
||||||
li o48, 48
|
li o48, 48
|
||||||
addi TBUFFER, SP, 320
|
|
||||||
|
li T1, 256
|
||||||
|
slwi T1, T1, 9 // 131072
|
||||||
|
sub BBUFFER, A, T1 // temp buffer for B unrolled
|
||||||
|
|
||||||
addi T1, SP, 300
|
addi T1, SP, 300
|
||||||
stfs f1, 0(T1)
|
stxsspx f1, o0 , T1
|
||||||
|
stxsspx f1, o4 , T1
|
||||||
|
stxsspx f1, o8 , T1
|
||||||
|
stxsspx f1, o12 , T1
|
||||||
|
|
||||||
lxsspx alpha_r, 0, T1
|
lxsspx alpha_r, o0, T1
|
||||||
|
lxvw4x alpha_vr, o0, T1
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#include "sgemm_logic_16x8_power8.S"
|
#include "sgemm_logic_16x8_power8.S"
|
||||||
|
|
||||||
.L999:
|
L999:
|
||||||
addi r3, 0, 0
|
addi r3, 0, 0
|
||||||
|
|
||||||
lfd f14, 0(SP)
|
lfd f14, 0(SP)
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -26,11 +26,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
*****************************************************************************/
|
*****************************************************************************/
|
||||||
|
|
||||||
/**************************************************************************************
|
/**************************************************************************************
|
||||||
* 2016/03/18 Werner Saar (wernsaar@googlemail.com)
|
* 2016/04/02 Werner Saar (wernsaar@googlemail.com)
|
||||||
* BLASTEST : OK
|
* BLASTEST : OK
|
||||||
* CTEST : OK
|
* CTEST : OK
|
||||||
* TEST : OK
|
* TEST : OK
|
||||||
* LAPACK-TEST : OK
|
* LAPACK-TEST : OK
|
||||||
**************************************************************************************/
|
**************************************************************************************/
|
||||||
|
|
||||||
/*********************************************************************/
|
/*********************************************************************/
|
||||||
|
@ -128,6 +128,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#define alpha_r vs30
|
#define alpha_r vs30
|
||||||
|
#define alpha_vr vs31
|
||||||
|
|
||||||
#define o0 0
|
#define o0 0
|
||||||
|
|
||||||
|
@ -152,7 +153,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#define PRE r30
|
#define PRE r30
|
||||||
#define T2 r31
|
#define T2 r31
|
||||||
|
|
||||||
#include "sgemm_macros_16x8_power8.S"
|
#include "strmm_macros_16x8_power8.S"
|
||||||
|
|
||||||
|
|
||||||
#ifndef NEEDPARAM
|
#ifndef NEEDPARAM
|
||||||
|
@ -264,11 +265,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
|
|
||||||
cmpwi cr0, M, 0
|
cmpwi cr0, M, 0
|
||||||
ble .L999_H1
|
ble L999_H1
|
||||||
cmpwi cr0, N, 0
|
cmpwi cr0, N, 0
|
||||||
ble .L999_H1
|
ble L999_H1
|
||||||
cmpwi cr0, K, 0
|
cmpwi cr0, K, 0
|
||||||
ble .L999_H1
|
ble L999_H1
|
||||||
|
|
||||||
li PRE, 256
|
li PRE, 256
|
||||||
li o4 , 4
|
li o4 , 4
|
||||||
|
@ -280,16 +281,19 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
addi TBUFFER, SP, 320
|
addi TBUFFER, SP, 320
|
||||||
|
|
||||||
addi T1, SP, 300
|
addi T1, SP, 300
|
||||||
stfs f1, 0(T1)
|
stxsspx f1, o0 , T1
|
||||||
|
stxsspx f1, o4 , T1
|
||||||
lxsspx alpha_r, 0, T1
|
stxsspx f1, o8 , T1
|
||||||
|
stxsspx f1, o12 , T1
|
||||||
|
|
||||||
|
lxsspx alpha_r, o0, T1
|
||||||
|
lxvw4x alpha_vr, o0, T1
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#include "strmm_logic_16x8_power8.S"
|
#include "strmm_logic_16x8_power8.S"
|
||||||
|
|
||||||
.L999:
|
L999:
|
||||||
addi r3, 0, 0
|
addi r3, 0, 0
|
||||||
|
|
||||||
lfd f14, 0(SP)
|
lfd f14, 0(SP)
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
8
param.h
8
param.h
|
@ -1964,7 +1964,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#define SNUMOPT 16
|
#define SNUMOPT 16
|
||||||
#define DNUMOPT 8
|
#define DNUMOPT 8
|
||||||
|
|
||||||
#define GEMM_DEFAULT_OFFSET_A 384
|
#define GEMM_DEFAULT_OFFSET_A 131072
|
||||||
#define GEMM_DEFAULT_OFFSET_B 1024
|
#define GEMM_DEFAULT_OFFSET_B 1024
|
||||||
#define GEMM_DEFAULT_ALIGN 0x03fffUL
|
#define GEMM_DEFAULT_ALIGN 0x03fffUL
|
||||||
|
|
||||||
|
@ -1977,17 +1977,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#define ZGEMM_DEFAULT_UNROLL_M 8
|
#define ZGEMM_DEFAULT_UNROLL_M 8
|
||||||
#define ZGEMM_DEFAULT_UNROLL_N 2
|
#define ZGEMM_DEFAULT_UNROLL_N 2
|
||||||
|
|
||||||
#define SGEMM_DEFAULT_P 480
|
#define SGEMM_DEFAULT_P 960
|
||||||
#define DGEMM_DEFAULT_P 480
|
#define DGEMM_DEFAULT_P 480
|
||||||
#define CGEMM_DEFAULT_P 480
|
#define CGEMM_DEFAULT_P 480
|
||||||
#define ZGEMM_DEFAULT_P 240
|
#define ZGEMM_DEFAULT_P 240
|
||||||
|
|
||||||
#define SGEMM_DEFAULT_Q 1440
|
#define SGEMM_DEFAULT_Q 720
|
||||||
#define DGEMM_DEFAULT_Q 720
|
#define DGEMM_DEFAULT_Q 720
|
||||||
#define CGEMM_DEFAULT_Q 720
|
#define CGEMM_DEFAULT_Q 720
|
||||||
#define ZGEMM_DEFAULT_Q 360
|
#define ZGEMM_DEFAULT_Q 360
|
||||||
|
|
||||||
#define SGEMM_DEFAULT_R 28800
|
#define SGEMM_DEFAULT_R 14400
|
||||||
#define DGEMM_DEFAULT_R 14400
|
#define DGEMM_DEFAULT_R 14400
|
||||||
#define CGEMM_DEFAULT_R 14400
|
#define CGEMM_DEFAULT_R 14400
|
||||||
#define ZGEMM_DEFAULT_R 7200
|
#define ZGEMM_DEFAULT_R 7200
|
||||||
|
|
Loading…
Reference in New Issue