optimized dgemm for POWER8
This commit is contained in:
parent
6abec09eb4
commit
0d0c6f7d7d
|
@ -21,12 +21,12 @@ SGEMMOTCOPYOBJ = sgemm_otcopy.o
|
|||
DGEMMKERNEL = dgemm_kernel_16x4_power8.S
|
||||
DGEMMINCOPY = ../generic/gemm_ncopy_16.c
|
||||
DGEMMITCOPY = dgemm_tcopy_16_power8.S
|
||||
DGEMMONCOPY = gemm_ncopy_4.S
|
||||
DGEMMOTCOPY = gemm_tcopy_4.S
|
||||
DGEMMINCOPYOBJ = dgemm_incopy.o
|
||||
DGEMMITCOPYOBJ = dgemm_itcopy.o
|
||||
DGEMMONCOPYOBJ = dgemm_oncopy.o
|
||||
DGEMMOTCOPYOBJ = dgemm_otcopy.o
|
||||
DGEMMONCOPY = ../generic/gemm_ncopy_4.c
|
||||
DGEMMOTCOPY = ../generic/gemm_tcopy_4.c
|
||||
DGEMMINCOPYOBJ = dgemm_incopy.o
|
||||
DGEMMITCOPYOBJ = dgemm_itcopy.o
|
||||
DGEMMONCOPYOBJ = dgemm_oncopy.o
|
||||
DGEMMOTCOPYOBJ = dgemm_otcopy.o
|
||||
|
||||
CGEMMKERNEL = cgemm_kernel_8x4_power8.S
|
||||
CGEMMINCOPY = ../generic/zgemm_ncopy_8.c
|
||||
|
|
|
@ -131,6 +131,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
#define o0 0
|
||||
|
||||
#define T4 r12
|
||||
#define T3 r11
|
||||
|
||||
#define o8 r15
|
||||
#define o24 r16
|
||||
#define ALPHA r17
|
||||
|
@ -265,7 +268,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
addi ALPHA, SP, 224
|
||||
#endif
|
||||
|
||||
li PRE, 256
|
||||
li PRE, 384
|
||||
li o8 , 8
|
||||
li o16, 16
|
||||
li o24, 24
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -431,6 +431,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
mr T1, CO
|
||||
addi T2, T1, 64
|
||||
add T3, T1, LDC
|
||||
addi T4, T3, 64
|
||||
|
||||
#ifndef TRMMKERNEL
|
||||
lxvd2x vs0, 0, T1
|
||||
|
@ -442,6 +444,16 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
lxvd2x vs5, o16, T2
|
||||
lxvd2x vs6, o32, T2
|
||||
lxvd2x vs7, o48, T2
|
||||
|
||||
lxvd2x vs8, 0, T3
|
||||
lxvd2x vs9, o16, T3
|
||||
lxvd2x vs10, o32, T3
|
||||
lxvd2x vs11, o48, T3
|
||||
|
||||
lxvd2x vs12, 0, T4
|
||||
lxvd2x vs13, o16, T4
|
||||
lxvd2x vs14, o32, T4
|
||||
lxvd2x vs15, o48, T4
|
||||
#endif
|
||||
|
||||
#ifndef TRMMKERNEL
|
||||
|
@ -453,45 +465,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
xvmaddadp vs5, vs37, alpha_r
|
||||
xvmaddadp vs6, vs38, alpha_r
|
||||
xvmaddadp vs7, vs39, alpha_r
|
||||
#else
|
||||
xvmuldp vs0, vs32, alpha_r
|
||||
xvmuldp vs1, vs33, alpha_r
|
||||
xvmuldp vs2, vs34, alpha_r
|
||||
xvmuldp vs3, vs35, alpha_r
|
||||
xvmuldp vs4, vs36, alpha_r
|
||||
xvmuldp vs5, vs37, alpha_r
|
||||
xvmuldp vs6, vs38, alpha_r
|
||||
xvmuldp vs7, vs39, alpha_r
|
||||
#endif
|
||||
|
||||
stxvd2x vs0, 0, T1
|
||||
stxvd2x vs1, o16, T1
|
||||
stxvd2x vs2, o32, T1
|
||||
stxvd2x vs3, o48, T1
|
||||
|
||||
dcbt T1, PRE
|
||||
|
||||
stxvd2x vs4, 0, T2
|
||||
stxvd2x vs5, o16, T2
|
||||
stxvd2x vs6, o32, T2
|
||||
stxvd2x vs7, o48, T2
|
||||
|
||||
add T1, T1, LDC
|
||||
add T2, T2, LDC
|
||||
|
||||
#ifndef TRMMKERNEL
|
||||
lxvd2x vs8, 0, T1
|
||||
lxvd2x vs9, o16, T1
|
||||
lxvd2x vs10, o32, T1
|
||||
lxvd2x vs11, o48, T1
|
||||
|
||||
lxvd2x vs12, 0, T2
|
||||
lxvd2x vs13, o16, T2
|
||||
lxvd2x vs14, o32, T2
|
||||
lxvd2x vs15, o48, T2
|
||||
#endif
|
||||
|
||||
#ifndef TRMMKERNEL
|
||||
xvmaddadp vs8, vs40, alpha_r
|
||||
xvmaddadp vs9, vs41, alpha_r
|
||||
xvmaddadp vs10, vs42, alpha_r
|
||||
|
@ -501,6 +474,14 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
xvmaddadp vs14, vs46, alpha_r
|
||||
xvmaddadp vs15, vs47, alpha_r
|
||||
#else
|
||||
xvmuldp vs0, vs32, alpha_r
|
||||
xvmuldp vs1, vs33, alpha_r
|
||||
xvmuldp vs2, vs34, alpha_r
|
||||
xvmuldp vs3, vs35, alpha_r
|
||||
xvmuldp vs4, vs36, alpha_r
|
||||
xvmuldp vs5, vs37, alpha_r
|
||||
xvmuldp vs6, vs38, alpha_r
|
||||
xvmuldp vs7, vs39, alpha_r
|
||||
xvmuldp vs8, vs40, alpha_r
|
||||
xvmuldp vs9, vs41, alpha_r
|
||||
xvmuldp vs10, vs42, alpha_r
|
||||
|
@ -511,20 +492,31 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
xvmuldp vs15, vs47, alpha_r
|
||||
#endif
|
||||
|
||||
stxvd2x vs8, 0, T1
|
||||
stxvd2x vs9, o16, T1
|
||||
stxvd2x vs10, o32, T1
|
||||
stxvd2x vs11, o48, T1
|
||||
stxvd2x vs0, 0, T1
|
||||
stxvd2x vs1, o16, T1
|
||||
stxvd2x vs2, o32, T1
|
||||
stxvd2x vs3, o48, T1
|
||||
|
||||
dcbt T1, PRE
|
||||
stxvd2x vs4, 0, T2
|
||||
stxvd2x vs5, o16, T2
|
||||
stxvd2x vs6, o32, T2
|
||||
stxvd2x vs7, o48, T2
|
||||
|
||||
stxvd2x vs12, 0, T2
|
||||
stxvd2x vs13, o16, T2
|
||||
stxvd2x vs14, o32, T2
|
||||
stxvd2x vs15, o48, T2
|
||||
stxvd2x vs8, 0, T3
|
||||
stxvd2x vs9, o16, T3
|
||||
stxvd2x vs10, o32, T3
|
||||
stxvd2x vs11, o48, T3
|
||||
|
||||
add T1, T1, LDC
|
||||
add T2, T2, LDC
|
||||
stxvd2x vs12, 0, T4
|
||||
stxvd2x vs13, o16, T4
|
||||
stxvd2x vs14, o32, T4
|
||||
stxvd2x vs15, o48, T4
|
||||
|
||||
slwi T4, LDC, 1
|
||||
add T1, T1, T4
|
||||
add T3, T3, T4
|
||||
addi T2, T1, 64
|
||||
addi T4, T3, 64
|
||||
|
||||
#ifndef TRMMKERNEL
|
||||
lxvd2x vs0, 0, T1
|
||||
|
@ -536,6 +528,16 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
lxvd2x vs5, o16, T2
|
||||
lxvd2x vs6, o32, T2
|
||||
lxvd2x vs7, o48, T2
|
||||
|
||||
lxvd2x vs8, 0, T3
|
||||
lxvd2x vs9, o16, T3
|
||||
lxvd2x vs10, o32, T3
|
||||
lxvd2x vs11, o48, T3
|
||||
|
||||
lxvd2x vs12, 0, T4
|
||||
lxvd2x vs13, o16, T4
|
||||
lxvd2x vs14, o32, T4
|
||||
lxvd2x vs15, o48, T4
|
||||
#endif
|
||||
|
||||
#ifndef TRMMKERNEL
|
||||
|
@ -547,45 +549,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
xvmaddadp vs5, vs53, alpha_r
|
||||
xvmaddadp vs6, vs54, alpha_r
|
||||
xvmaddadp vs7, vs55, alpha_r
|
||||
#else
|
||||
xvmuldp vs0, vs48, alpha_r
|
||||
xvmuldp vs1, vs49, alpha_r
|
||||
xvmuldp vs2, vs50, alpha_r
|
||||
xvmuldp vs3, vs51, alpha_r
|
||||
xvmuldp vs4, vs52, alpha_r
|
||||
xvmuldp vs5, vs53, alpha_r
|
||||
xvmuldp vs6, vs54, alpha_r
|
||||
xvmuldp vs7, vs55, alpha_r
|
||||
#endif
|
||||
|
||||
stxvd2x vs0, 0, T1
|
||||
stxvd2x vs1, o16, T1
|
||||
stxvd2x vs2, o32, T1
|
||||
stxvd2x vs3, o48, T1
|
||||
|
||||
dcbt T1, PRE
|
||||
|
||||
stxvd2x vs4, 0, T2
|
||||
stxvd2x vs5, o16, T2
|
||||
stxvd2x vs6, o32, T2
|
||||
stxvd2x vs7, o48, T2
|
||||
|
||||
add T1, T1, LDC
|
||||
add T2, T2, LDC
|
||||
|
||||
#ifndef TRMMKERNEL
|
||||
lxvd2x vs8, 0, T1
|
||||
lxvd2x vs9, o16, T1
|
||||
lxvd2x vs10, o32, T1
|
||||
lxvd2x vs11, o48, T1
|
||||
|
||||
lxvd2x vs12, 0, T2
|
||||
lxvd2x vs13, o16, T2
|
||||
lxvd2x vs14, o32, T2
|
||||
lxvd2x vs15, o48, T2
|
||||
#endif
|
||||
|
||||
#ifndef TRMMKERNEL
|
||||
xvmaddadp vs8, vs56, alpha_r
|
||||
xvmaddadp vs9, vs57, alpha_r
|
||||
xvmaddadp vs10, vs58, alpha_r
|
||||
|
@ -595,6 +558,14 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
xvmaddadp vs14, vs62, alpha_r
|
||||
xvmaddadp vs15, vs63, alpha_r
|
||||
#else
|
||||
xvmuldp vs0, vs48, alpha_r
|
||||
xvmuldp vs1, vs49, alpha_r
|
||||
xvmuldp vs2, vs50, alpha_r
|
||||
xvmuldp vs3, vs51, alpha_r
|
||||
xvmuldp vs4, vs52, alpha_r
|
||||
xvmuldp vs5, vs53, alpha_r
|
||||
xvmuldp vs6, vs54, alpha_r
|
||||
xvmuldp vs7, vs55, alpha_r
|
||||
xvmuldp vs8, vs56, alpha_r
|
||||
xvmuldp vs9, vs57, alpha_r
|
||||
xvmuldp vs10, vs58, alpha_r
|
||||
|
@ -605,17 +576,25 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
xvmuldp vs15, vs63, alpha_r
|
||||
#endif
|
||||
|
||||
stxvd2x vs8, 0, T1
|
||||
stxvd2x vs9, o16, T1
|
||||
stxvd2x vs10, o32, T1
|
||||
stxvd2x vs11, o48, T1
|
||||
stxvd2x vs0, 0, T1
|
||||
stxvd2x vs1, o16, T1
|
||||
stxvd2x vs2, o32, T1
|
||||
stxvd2x vs3, o48, T1
|
||||
|
||||
dcbt T1, PRE
|
||||
stxvd2x vs4, 0, T2
|
||||
stxvd2x vs5, o16, T2
|
||||
stxvd2x vs6, o32, T2
|
||||
stxvd2x vs7, o48, T2
|
||||
|
||||
stxvd2x vs12, 0, T2
|
||||
stxvd2x vs13, o16, T2
|
||||
stxvd2x vs14, o32, T2
|
||||
stxvd2x vs15, o48, T2
|
||||
stxvd2x vs8, 0, T3
|
||||
stxvd2x vs9, o16, T3
|
||||
stxvd2x vs10, o32, T3
|
||||
stxvd2x vs11, o48, T3
|
||||
|
||||
stxvd2x vs12, 0, T4
|
||||
stxvd2x vs13, o16, T4
|
||||
stxvd2x vs14, o32, T4
|
||||
stxvd2x vs15, o48, T4
|
||||
|
||||
addi CO, CO, 128
|
||||
|
||||
|
|
|
@ -170,7 +170,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
add B2, B2, B
|
||||
add B1, B1, B
|
||||
|
||||
li PREA, 768
|
||||
li PREA, 256
|
||||
addi PREB, M16, 128
|
||||
|
||||
li o8, 8
|
||||
|
|
|
@ -57,16 +57,20 @@ DCOPYT_L4_BEGIN:
|
|||
|
||||
DCOPYT_L4x16_LOOP:
|
||||
|
||||
/*
|
||||
addi T1, PREB, 128
|
||||
addi T2, PREB, 256
|
||||
*/
|
||||
dcbt A0, PREA
|
||||
dcbt A1, PREA
|
||||
dcbt A2, PREA
|
||||
dcbt A3, PREA
|
||||
/*
|
||||
dcbtst BO, M16
|
||||
dcbtst BO, PREB
|
||||
dcbtst BO, T1
|
||||
dcbtst BO, T2
|
||||
*/
|
||||
COPY_4x16
|
||||
|
||||
add BO, BO, M16
|
||||
|
|
|
@ -152,7 +152,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#define PRE r30
|
||||
#define T2 r31
|
||||
|
||||
#include "dgemm_macros_16x4_power8.S"
|
||||
#include "dtrmm_macros_16x4_power8.S"
|
||||
|
||||
|
||||
#ifndef NEEDPARAM
|
||||
|
|
File diff suppressed because it is too large
Load Diff
99
param.h
99
param.h
|
@ -410,7 +410,100 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
#endif
|
||||
|
||||
#if defined(STEAMROLLER) || defined(EXCAVATOR)
|
||||
#ifdef STEAMROLLER
|
||||
#define SNUMOPT 8
|
||||
#define DNUMOPT 4
|
||||
|
||||
#define GEMM_DEFAULT_OFFSET_A 64
|
||||
#define GEMM_DEFAULT_OFFSET_B 832
|
||||
#define GEMM_DEFAULT_ALIGN 0x0fffUL
|
||||
|
||||
|
||||
|
||||
#define QGEMM_DEFAULT_UNROLL_N 2
|
||||
#define CGEMM_DEFAULT_UNROLL_N 2
|
||||
#define ZGEMM_DEFAULT_UNROLL_N 2
|
||||
#define XGEMM_DEFAULT_UNROLL_N 1
|
||||
|
||||
#ifdef ARCH_X86
|
||||
#define SGEMM_DEFAULT_UNROLL_N 4
|
||||
#define DGEMM_DEFAULT_UNROLL_N 4
|
||||
#define SGEMM_DEFAULT_UNROLL_M 4
|
||||
#define DGEMM_DEFAULT_UNROLL_M 2
|
||||
#define QGEMM_DEFAULT_UNROLL_M 2
|
||||
#define CGEMM_DEFAULT_UNROLL_M 2
|
||||
#define ZGEMM_DEFAULT_UNROLL_M 1
|
||||
#define XGEMM_DEFAULT_UNROLL_M 1
|
||||
#else
|
||||
#define SGEMM_DEFAULT_UNROLL_N 2
|
||||
#define DGEMM_DEFAULT_UNROLL_N 2
|
||||
#define SGEMM_DEFAULT_UNROLL_M 16
|
||||
#define DGEMM_DEFAULT_UNROLL_M 8
|
||||
#define QGEMM_DEFAULT_UNROLL_M 2
|
||||
#define CGEMM_DEFAULT_UNROLL_M 4
|
||||
#define ZGEMM_DEFAULT_UNROLL_M 2
|
||||
#define XGEMM_DEFAULT_UNROLL_M 1
|
||||
#define CGEMM3M_DEFAULT_UNROLL_N 4
|
||||
#define CGEMM3M_DEFAULT_UNROLL_M 8
|
||||
#define ZGEMM3M_DEFAULT_UNROLL_N 4
|
||||
#define ZGEMM3M_DEFAULT_UNROLL_M 4
|
||||
#define GEMV_UNROLL 8
|
||||
#endif
|
||||
|
||||
#if defined(ARCH_X86_64)
|
||||
#define SGEMM_DEFAULT_P 768
|
||||
#define DGEMM_DEFAULT_P 576
|
||||
#define ZGEMM_DEFAULT_P 288
|
||||
#define CGEMM_DEFAULT_P 576
|
||||
#else
|
||||
#define SGEMM_DEFAULT_P 448
|
||||
#define DGEMM_DEFAULT_P 480
|
||||
#define ZGEMM_DEFAULT_P 112
|
||||
#define CGEMM_DEFAULT_P 224
|
||||
#endif
|
||||
#define QGEMM_DEFAULT_P 112
|
||||
#define XGEMM_DEFAULT_P 56
|
||||
|
||||
#if defined(ARCH_X86_64)
|
||||
#define SGEMM_DEFAULT_Q 192
|
||||
#define DGEMM_DEFAULT_Q 160
|
||||
#define ZGEMM_DEFAULT_Q 160
|
||||
#define CGEMM_DEFAULT_Q 160
|
||||
#else
|
||||
#define SGEMM_DEFAULT_Q 224
|
||||
#define DGEMM_DEFAULT_Q 224
|
||||
#define ZGEMM_DEFAULT_Q 224
|
||||
#define CGEMM_DEFAULT_Q 224
|
||||
#endif
|
||||
#define QGEMM_DEFAULT_Q 224
|
||||
#define XGEMM_DEFAULT_Q 224
|
||||
|
||||
#define CGEMM3M_DEFAULT_P 448
|
||||
#define ZGEMM3M_DEFAULT_P 224
|
||||
#define XGEMM3M_DEFAULT_P 112
|
||||
#define CGEMM3M_DEFAULT_Q 224
|
||||
#define ZGEMM3M_DEFAULT_Q 224
|
||||
#define XGEMM3M_DEFAULT_Q 224
|
||||
#define CGEMM3M_DEFAULT_R 12288
|
||||
#define ZGEMM3M_DEFAULT_R 12288
|
||||
#define XGEMM3M_DEFAULT_R 12288
|
||||
|
||||
#define SGEMM_DEFAULT_R 12288
|
||||
#define QGEMM_DEFAULT_R qgemm_r
|
||||
#define DGEMM_DEFAULT_R 12288
|
||||
#define CGEMM_DEFAULT_R cgemm_r
|
||||
#define ZGEMM_DEFAULT_R zgemm_r
|
||||
#define XGEMM_DEFAULT_R xgemm_r
|
||||
|
||||
#define SYMV_P 16
|
||||
#define HAVE_EXCLUSIVE_CACHE
|
||||
|
||||
#define GEMM_THREAD gemm_thread_mn
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef EXCAVATOR
|
||||
#define SNUMOPT 8
|
||||
#define DNUMOPT 4
|
||||
|
||||
|
@ -1885,12 +1978,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#define ZGEMM_DEFAULT_UNROLL_N 2
|
||||
|
||||
#define SGEMM_DEFAULT_P 1280
|
||||
#define DGEMM_DEFAULT_P 640
|
||||
#define DGEMM_DEFAULT_P 768
|
||||
#define CGEMM_DEFAULT_P 640
|
||||
#define ZGEMM_DEFAULT_P 320
|
||||
|
||||
#define SGEMM_DEFAULT_Q 640
|
||||
#define DGEMM_DEFAULT_Q 640
|
||||
#define DGEMM_DEFAULT_Q 768
|
||||
#define CGEMM_DEFAULT_Q 640
|
||||
#define ZGEMM_DEFAULT_Q 640
|
||||
|
||||
|
|
Loading…
Reference in New Issue