Merge pull request #831 from wernsaar/develop
updated sgemm- and strmm-kernel for POWER8
This commit is contained in:
commit
99adc8b062
|
@ -26,11 +26,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
*****************************************************************************/
|
||||
|
||||
/**************************************************************************************
|
||||
* 2016/03/18 Werner Saar (wernsaar@googlemail.com)
|
||||
* 2016/04/02 Werner Saar (wernsaar@googlemail.com)
|
||||
* BLASTEST : OK
|
||||
* CTEST : OK
|
||||
* TEST : OK
|
||||
* LAPACK-TEST : OK
|
||||
* LAPACK-TEST : OK
|
||||
**************************************************************************************/
|
||||
|
||||
/*********************************************************************/
|
||||
|
@ -128,17 +128,18 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#endif
|
||||
|
||||
#define alpha_r vs30
|
||||
#define alpha_vr vs31
|
||||
|
||||
#define o0 0
|
||||
|
||||
#define TBUFFER r14
|
||||
#define BBUFFER r14
|
||||
#define o4 r15
|
||||
#define o12 r16
|
||||
#define o8 r17
|
||||
#define L r18
|
||||
#define T1 r19
|
||||
#define KK r20
|
||||
#define BB r21
|
||||
#define BBO r21
|
||||
#define I r22
|
||||
#define J r23
|
||||
#define AO r24
|
||||
|
@ -256,11 +257,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
|
||||
cmpwi cr0, M, 0
|
||||
ble .L999_H1
|
||||
ble L999_H1
|
||||
cmpwi cr0, N, 0
|
||||
ble .L999_H1
|
||||
ble L999_H1
|
||||
cmpwi cr0, K, 0
|
||||
ble .L999_H1
|
||||
ble L999_H1
|
||||
|
||||
li PRE, 256
|
||||
li o4 , 4
|
||||
|
@ -269,18 +270,25 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
li o16, 16
|
||||
li o32, 32
|
||||
li o48, 48
|
||||
addi TBUFFER, SP, 320
|
||||
|
||||
li T1, 256
|
||||
slwi T1, T1, 9 // 131072
|
||||
sub BBUFFER, A, T1 // temp buffer for B unrolled
|
||||
|
||||
addi T1, SP, 300
|
||||
stfs f1, 0(T1)
|
||||
stxsspx f1, o0 , T1
|
||||
stxsspx f1, o4 , T1
|
||||
stxsspx f1, o8 , T1
|
||||
stxsspx f1, o12 , T1
|
||||
|
||||
lxsspx alpha_r, 0, T1
|
||||
lxsspx alpha_r, o0, T1
|
||||
lxvw4x alpha_vr, o0, T1
|
||||
|
||||
|
||||
|
||||
#include "sgemm_logic_16x8_power8.S"
|
||||
|
||||
.L999:
|
||||
L999:
|
||||
addi r3, 0, 0
|
||||
|
||||
lfd f14, 0(SP)
|
||||
|
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -26,11 +26,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
*****************************************************************************/
|
||||
|
||||
/**************************************************************************************
|
||||
* 2016/03/18 Werner Saar (wernsaar@googlemail.com)
|
||||
* 2016/04/02 Werner Saar (wernsaar@googlemail.com)
|
||||
* BLASTEST : OK
|
||||
* CTEST : OK
|
||||
* TEST : OK
|
||||
* LAPACK-TEST : OK
|
||||
* LAPACK-TEST : OK
|
||||
**************************************************************************************/
|
||||
|
||||
/*********************************************************************/
|
||||
|
@ -128,6 +128,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#endif
|
||||
|
||||
#define alpha_r vs30
|
||||
#define alpha_vr vs31
|
||||
|
||||
#define o0 0
|
||||
|
||||
|
@ -152,7 +153,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#define PRE r30
|
||||
#define T2 r31
|
||||
|
||||
#include "sgemm_macros_16x8_power8.S"
|
||||
#include "strmm_macros_16x8_power8.S"
|
||||
|
||||
|
||||
#ifndef NEEDPARAM
|
||||
|
@ -264,11 +265,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
|
||||
cmpwi cr0, M, 0
|
||||
ble .L999_H1
|
||||
ble L999_H1
|
||||
cmpwi cr0, N, 0
|
||||
ble .L999_H1
|
||||
ble L999_H1
|
||||
cmpwi cr0, K, 0
|
||||
ble .L999_H1
|
||||
ble L999_H1
|
||||
|
||||
li PRE, 256
|
||||
li o4 , 4
|
||||
|
@ -280,16 +281,19 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
addi TBUFFER, SP, 320
|
||||
|
||||
addi T1, SP, 300
|
||||
stfs f1, 0(T1)
|
||||
|
||||
lxsspx alpha_r, 0, T1
|
||||
stxsspx f1, o0 , T1
|
||||
stxsspx f1, o4 , T1
|
||||
stxsspx f1, o8 , T1
|
||||
stxsspx f1, o12 , T1
|
||||
|
||||
lxsspx alpha_r, o0, T1
|
||||
lxvw4x alpha_vr, o0, T1
|
||||
|
||||
|
||||
|
||||
#include "strmm_logic_16x8_power8.S"
|
||||
|
||||
.L999:
|
||||
L999:
|
||||
addi r3, 0, 0
|
||||
|
||||
lfd f14, 0(SP)
|
||||
|
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
8
param.h
8
param.h
|
@ -1964,7 +1964,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#define SNUMOPT 16
|
||||
#define DNUMOPT 8
|
||||
|
||||
#define GEMM_DEFAULT_OFFSET_A 384
|
||||
#define GEMM_DEFAULT_OFFSET_A 131072
|
||||
#define GEMM_DEFAULT_OFFSET_B 1024
|
||||
#define GEMM_DEFAULT_ALIGN 0x03fffUL
|
||||
|
||||
|
@ -1977,17 +1977,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#define ZGEMM_DEFAULT_UNROLL_M 8
|
||||
#define ZGEMM_DEFAULT_UNROLL_N 2
|
||||
|
||||
#define SGEMM_DEFAULT_P 480
|
||||
#define SGEMM_DEFAULT_P 960
|
||||
#define DGEMM_DEFAULT_P 480
|
||||
#define CGEMM_DEFAULT_P 480
|
||||
#define ZGEMM_DEFAULT_P 240
|
||||
|
||||
#define SGEMM_DEFAULT_Q 1440
|
||||
#define SGEMM_DEFAULT_Q 720
|
||||
#define DGEMM_DEFAULT_Q 720
|
||||
#define CGEMM_DEFAULT_Q 720
|
||||
#define ZGEMM_DEFAULT_Q 360
|
||||
|
||||
#define SGEMM_DEFAULT_R 28800
|
||||
#define SGEMM_DEFAULT_R 14400
|
||||
#define DGEMM_DEFAULT_R 14400
|
||||
#define CGEMM_DEFAULT_R 14400
|
||||
#define ZGEMM_DEFAULT_R 7200
|
||||
|
|
Loading…
Reference in New Issue