optimized dgemm for POWER8
This commit is contained in:
parent
6abec09eb4
commit
0d0c6f7d7d
|
@ -21,12 +21,12 @@ SGEMMOTCOPYOBJ = sgemm_otcopy.o
|
||||||
DGEMMKERNEL = dgemm_kernel_16x4_power8.S
|
DGEMMKERNEL = dgemm_kernel_16x4_power8.S
|
||||||
DGEMMINCOPY = ../generic/gemm_ncopy_16.c
|
DGEMMINCOPY = ../generic/gemm_ncopy_16.c
|
||||||
DGEMMITCOPY = dgemm_tcopy_16_power8.S
|
DGEMMITCOPY = dgemm_tcopy_16_power8.S
|
||||||
DGEMMONCOPY = gemm_ncopy_4.S
|
DGEMMONCOPY = ../generic/gemm_ncopy_4.c
|
||||||
DGEMMOTCOPY = gemm_tcopy_4.S
|
DGEMMOTCOPY = ../generic/gemm_tcopy_4.c
|
||||||
DGEMMINCOPYOBJ = dgemm_incopy.o
|
DGEMMINCOPYOBJ = dgemm_incopy.o
|
||||||
DGEMMITCOPYOBJ = dgemm_itcopy.o
|
DGEMMITCOPYOBJ = dgemm_itcopy.o
|
||||||
DGEMMONCOPYOBJ = dgemm_oncopy.o
|
DGEMMONCOPYOBJ = dgemm_oncopy.o
|
||||||
DGEMMOTCOPYOBJ = dgemm_otcopy.o
|
DGEMMOTCOPYOBJ = dgemm_otcopy.o
|
||||||
|
|
||||||
CGEMMKERNEL = cgemm_kernel_8x4_power8.S
|
CGEMMKERNEL = cgemm_kernel_8x4_power8.S
|
||||||
CGEMMINCOPY = ../generic/zgemm_ncopy_8.c
|
CGEMMINCOPY = ../generic/zgemm_ncopy_8.c
|
||||||
|
|
|
@ -131,6 +131,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
#define o0 0
|
#define o0 0
|
||||||
|
|
||||||
|
#define T4 r12
|
||||||
|
#define T3 r11
|
||||||
|
|
||||||
#define o8 r15
|
#define o8 r15
|
||||||
#define o24 r16
|
#define o24 r16
|
||||||
#define ALPHA r17
|
#define ALPHA r17
|
||||||
|
@ -265,7 +268,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
addi ALPHA, SP, 224
|
addi ALPHA, SP, 224
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
li PRE, 256
|
li PRE, 384
|
||||||
li o8 , 8
|
li o8 , 8
|
||||||
li o16, 16
|
li o16, 16
|
||||||
li o24, 24
|
li o24, 24
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -431,6 +431,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
mr T1, CO
|
mr T1, CO
|
||||||
addi T2, T1, 64
|
addi T2, T1, 64
|
||||||
|
add T3, T1, LDC
|
||||||
|
addi T4, T3, 64
|
||||||
|
|
||||||
#ifndef TRMMKERNEL
|
#ifndef TRMMKERNEL
|
||||||
lxvd2x vs0, 0, T1
|
lxvd2x vs0, 0, T1
|
||||||
|
@ -442,6 +444,16 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
lxvd2x vs5, o16, T2
|
lxvd2x vs5, o16, T2
|
||||||
lxvd2x vs6, o32, T2
|
lxvd2x vs6, o32, T2
|
||||||
lxvd2x vs7, o48, T2
|
lxvd2x vs7, o48, T2
|
||||||
|
|
||||||
|
lxvd2x vs8, 0, T3
|
||||||
|
lxvd2x vs9, o16, T3
|
||||||
|
lxvd2x vs10, o32, T3
|
||||||
|
lxvd2x vs11, o48, T3
|
||||||
|
|
||||||
|
lxvd2x vs12, 0, T4
|
||||||
|
lxvd2x vs13, o16, T4
|
||||||
|
lxvd2x vs14, o32, T4
|
||||||
|
lxvd2x vs15, o48, T4
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifndef TRMMKERNEL
|
#ifndef TRMMKERNEL
|
||||||
|
@ -453,45 +465,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
xvmaddadp vs5, vs37, alpha_r
|
xvmaddadp vs5, vs37, alpha_r
|
||||||
xvmaddadp vs6, vs38, alpha_r
|
xvmaddadp vs6, vs38, alpha_r
|
||||||
xvmaddadp vs7, vs39, alpha_r
|
xvmaddadp vs7, vs39, alpha_r
|
||||||
#else
|
|
||||||
xvmuldp vs0, vs32, alpha_r
|
|
||||||
xvmuldp vs1, vs33, alpha_r
|
|
||||||
xvmuldp vs2, vs34, alpha_r
|
|
||||||
xvmuldp vs3, vs35, alpha_r
|
|
||||||
xvmuldp vs4, vs36, alpha_r
|
|
||||||
xvmuldp vs5, vs37, alpha_r
|
|
||||||
xvmuldp vs6, vs38, alpha_r
|
|
||||||
xvmuldp vs7, vs39, alpha_r
|
|
||||||
#endif
|
|
||||||
|
|
||||||
stxvd2x vs0, 0, T1
|
|
||||||
stxvd2x vs1, o16, T1
|
|
||||||
stxvd2x vs2, o32, T1
|
|
||||||
stxvd2x vs3, o48, T1
|
|
||||||
|
|
||||||
dcbt T1, PRE
|
|
||||||
|
|
||||||
stxvd2x vs4, 0, T2
|
|
||||||
stxvd2x vs5, o16, T2
|
|
||||||
stxvd2x vs6, o32, T2
|
|
||||||
stxvd2x vs7, o48, T2
|
|
||||||
|
|
||||||
add T1, T1, LDC
|
|
||||||
add T2, T2, LDC
|
|
||||||
|
|
||||||
#ifndef TRMMKERNEL
|
|
||||||
lxvd2x vs8, 0, T1
|
|
||||||
lxvd2x vs9, o16, T1
|
|
||||||
lxvd2x vs10, o32, T1
|
|
||||||
lxvd2x vs11, o48, T1
|
|
||||||
|
|
||||||
lxvd2x vs12, 0, T2
|
|
||||||
lxvd2x vs13, o16, T2
|
|
||||||
lxvd2x vs14, o32, T2
|
|
||||||
lxvd2x vs15, o48, T2
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifndef TRMMKERNEL
|
|
||||||
xvmaddadp vs8, vs40, alpha_r
|
xvmaddadp vs8, vs40, alpha_r
|
||||||
xvmaddadp vs9, vs41, alpha_r
|
xvmaddadp vs9, vs41, alpha_r
|
||||||
xvmaddadp vs10, vs42, alpha_r
|
xvmaddadp vs10, vs42, alpha_r
|
||||||
|
@ -501,6 +474,14 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
xvmaddadp vs14, vs46, alpha_r
|
xvmaddadp vs14, vs46, alpha_r
|
||||||
xvmaddadp vs15, vs47, alpha_r
|
xvmaddadp vs15, vs47, alpha_r
|
||||||
#else
|
#else
|
||||||
|
xvmuldp vs0, vs32, alpha_r
|
||||||
|
xvmuldp vs1, vs33, alpha_r
|
||||||
|
xvmuldp vs2, vs34, alpha_r
|
||||||
|
xvmuldp vs3, vs35, alpha_r
|
||||||
|
xvmuldp vs4, vs36, alpha_r
|
||||||
|
xvmuldp vs5, vs37, alpha_r
|
||||||
|
xvmuldp vs6, vs38, alpha_r
|
||||||
|
xvmuldp vs7, vs39, alpha_r
|
||||||
xvmuldp vs8, vs40, alpha_r
|
xvmuldp vs8, vs40, alpha_r
|
||||||
xvmuldp vs9, vs41, alpha_r
|
xvmuldp vs9, vs41, alpha_r
|
||||||
xvmuldp vs10, vs42, alpha_r
|
xvmuldp vs10, vs42, alpha_r
|
||||||
|
@ -511,20 +492,31 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
xvmuldp vs15, vs47, alpha_r
|
xvmuldp vs15, vs47, alpha_r
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
stxvd2x vs8, 0, T1
|
stxvd2x vs0, 0, T1
|
||||||
stxvd2x vs9, o16, T1
|
stxvd2x vs1, o16, T1
|
||||||
stxvd2x vs10, o32, T1
|
stxvd2x vs2, o32, T1
|
||||||
stxvd2x vs11, o48, T1
|
stxvd2x vs3, o48, T1
|
||||||
|
|
||||||
dcbt T1, PRE
|
stxvd2x vs4, 0, T2
|
||||||
|
stxvd2x vs5, o16, T2
|
||||||
|
stxvd2x vs6, o32, T2
|
||||||
|
stxvd2x vs7, o48, T2
|
||||||
|
|
||||||
stxvd2x vs12, 0, T2
|
stxvd2x vs8, 0, T3
|
||||||
stxvd2x vs13, o16, T2
|
stxvd2x vs9, o16, T3
|
||||||
stxvd2x vs14, o32, T2
|
stxvd2x vs10, o32, T3
|
||||||
stxvd2x vs15, o48, T2
|
stxvd2x vs11, o48, T3
|
||||||
|
|
||||||
add T1, T1, LDC
|
stxvd2x vs12, 0, T4
|
||||||
add T2, T2, LDC
|
stxvd2x vs13, o16, T4
|
||||||
|
stxvd2x vs14, o32, T4
|
||||||
|
stxvd2x vs15, o48, T4
|
||||||
|
|
||||||
|
slwi T4, LDC, 1
|
||||||
|
add T1, T1, T4
|
||||||
|
add T3, T3, T4
|
||||||
|
addi T2, T1, 64
|
||||||
|
addi T4, T3, 64
|
||||||
|
|
||||||
#ifndef TRMMKERNEL
|
#ifndef TRMMKERNEL
|
||||||
lxvd2x vs0, 0, T1
|
lxvd2x vs0, 0, T1
|
||||||
|
@ -536,6 +528,16 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
lxvd2x vs5, o16, T2
|
lxvd2x vs5, o16, T2
|
||||||
lxvd2x vs6, o32, T2
|
lxvd2x vs6, o32, T2
|
||||||
lxvd2x vs7, o48, T2
|
lxvd2x vs7, o48, T2
|
||||||
|
|
||||||
|
lxvd2x vs8, 0, T3
|
||||||
|
lxvd2x vs9, o16, T3
|
||||||
|
lxvd2x vs10, o32, T3
|
||||||
|
lxvd2x vs11, o48, T3
|
||||||
|
|
||||||
|
lxvd2x vs12, 0, T4
|
||||||
|
lxvd2x vs13, o16, T4
|
||||||
|
lxvd2x vs14, o32, T4
|
||||||
|
lxvd2x vs15, o48, T4
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifndef TRMMKERNEL
|
#ifndef TRMMKERNEL
|
||||||
|
@ -547,45 +549,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
xvmaddadp vs5, vs53, alpha_r
|
xvmaddadp vs5, vs53, alpha_r
|
||||||
xvmaddadp vs6, vs54, alpha_r
|
xvmaddadp vs6, vs54, alpha_r
|
||||||
xvmaddadp vs7, vs55, alpha_r
|
xvmaddadp vs7, vs55, alpha_r
|
||||||
#else
|
|
||||||
xvmuldp vs0, vs48, alpha_r
|
|
||||||
xvmuldp vs1, vs49, alpha_r
|
|
||||||
xvmuldp vs2, vs50, alpha_r
|
|
||||||
xvmuldp vs3, vs51, alpha_r
|
|
||||||
xvmuldp vs4, vs52, alpha_r
|
|
||||||
xvmuldp vs5, vs53, alpha_r
|
|
||||||
xvmuldp vs6, vs54, alpha_r
|
|
||||||
xvmuldp vs7, vs55, alpha_r
|
|
||||||
#endif
|
|
||||||
|
|
||||||
stxvd2x vs0, 0, T1
|
|
||||||
stxvd2x vs1, o16, T1
|
|
||||||
stxvd2x vs2, o32, T1
|
|
||||||
stxvd2x vs3, o48, T1
|
|
||||||
|
|
||||||
dcbt T1, PRE
|
|
||||||
|
|
||||||
stxvd2x vs4, 0, T2
|
|
||||||
stxvd2x vs5, o16, T2
|
|
||||||
stxvd2x vs6, o32, T2
|
|
||||||
stxvd2x vs7, o48, T2
|
|
||||||
|
|
||||||
add T1, T1, LDC
|
|
||||||
add T2, T2, LDC
|
|
||||||
|
|
||||||
#ifndef TRMMKERNEL
|
|
||||||
lxvd2x vs8, 0, T1
|
|
||||||
lxvd2x vs9, o16, T1
|
|
||||||
lxvd2x vs10, o32, T1
|
|
||||||
lxvd2x vs11, o48, T1
|
|
||||||
|
|
||||||
lxvd2x vs12, 0, T2
|
|
||||||
lxvd2x vs13, o16, T2
|
|
||||||
lxvd2x vs14, o32, T2
|
|
||||||
lxvd2x vs15, o48, T2
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifndef TRMMKERNEL
|
|
||||||
xvmaddadp vs8, vs56, alpha_r
|
xvmaddadp vs8, vs56, alpha_r
|
||||||
xvmaddadp vs9, vs57, alpha_r
|
xvmaddadp vs9, vs57, alpha_r
|
||||||
xvmaddadp vs10, vs58, alpha_r
|
xvmaddadp vs10, vs58, alpha_r
|
||||||
|
@ -595,6 +558,14 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
xvmaddadp vs14, vs62, alpha_r
|
xvmaddadp vs14, vs62, alpha_r
|
||||||
xvmaddadp vs15, vs63, alpha_r
|
xvmaddadp vs15, vs63, alpha_r
|
||||||
#else
|
#else
|
||||||
|
xvmuldp vs0, vs48, alpha_r
|
||||||
|
xvmuldp vs1, vs49, alpha_r
|
||||||
|
xvmuldp vs2, vs50, alpha_r
|
||||||
|
xvmuldp vs3, vs51, alpha_r
|
||||||
|
xvmuldp vs4, vs52, alpha_r
|
||||||
|
xvmuldp vs5, vs53, alpha_r
|
||||||
|
xvmuldp vs6, vs54, alpha_r
|
||||||
|
xvmuldp vs7, vs55, alpha_r
|
||||||
xvmuldp vs8, vs56, alpha_r
|
xvmuldp vs8, vs56, alpha_r
|
||||||
xvmuldp vs9, vs57, alpha_r
|
xvmuldp vs9, vs57, alpha_r
|
||||||
xvmuldp vs10, vs58, alpha_r
|
xvmuldp vs10, vs58, alpha_r
|
||||||
|
@ -605,17 +576,25 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
xvmuldp vs15, vs63, alpha_r
|
xvmuldp vs15, vs63, alpha_r
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
stxvd2x vs8, 0, T1
|
stxvd2x vs0, 0, T1
|
||||||
stxvd2x vs9, o16, T1
|
stxvd2x vs1, o16, T1
|
||||||
stxvd2x vs10, o32, T1
|
stxvd2x vs2, o32, T1
|
||||||
stxvd2x vs11, o48, T1
|
stxvd2x vs3, o48, T1
|
||||||
|
|
||||||
dcbt T1, PRE
|
stxvd2x vs4, 0, T2
|
||||||
|
stxvd2x vs5, o16, T2
|
||||||
|
stxvd2x vs6, o32, T2
|
||||||
|
stxvd2x vs7, o48, T2
|
||||||
|
|
||||||
stxvd2x vs12, 0, T2
|
stxvd2x vs8, 0, T3
|
||||||
stxvd2x vs13, o16, T2
|
stxvd2x vs9, o16, T3
|
||||||
stxvd2x vs14, o32, T2
|
stxvd2x vs10, o32, T3
|
||||||
stxvd2x vs15, o48, T2
|
stxvd2x vs11, o48, T3
|
||||||
|
|
||||||
|
stxvd2x vs12, 0, T4
|
||||||
|
stxvd2x vs13, o16, T4
|
||||||
|
stxvd2x vs14, o32, T4
|
||||||
|
stxvd2x vs15, o48, T4
|
||||||
|
|
||||||
addi CO, CO, 128
|
addi CO, CO, 128
|
||||||
|
|
||||||
|
|
|
@ -170,7 +170,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
add B2, B2, B
|
add B2, B2, B
|
||||||
add B1, B1, B
|
add B1, B1, B
|
||||||
|
|
||||||
li PREA, 768
|
li PREA, 256
|
||||||
addi PREB, M16, 128
|
addi PREB, M16, 128
|
||||||
|
|
||||||
li o8, 8
|
li o8, 8
|
||||||
|
|
|
@ -57,16 +57,20 @@ DCOPYT_L4_BEGIN:
|
||||||
|
|
||||||
DCOPYT_L4x16_LOOP:
|
DCOPYT_L4x16_LOOP:
|
||||||
|
|
||||||
|
/*
|
||||||
addi T1, PREB, 128
|
addi T1, PREB, 128
|
||||||
addi T2, PREB, 256
|
addi T2, PREB, 256
|
||||||
|
*/
|
||||||
dcbt A0, PREA
|
dcbt A0, PREA
|
||||||
dcbt A1, PREA
|
dcbt A1, PREA
|
||||||
dcbt A2, PREA
|
dcbt A2, PREA
|
||||||
dcbt A3, PREA
|
dcbt A3, PREA
|
||||||
|
/*
|
||||||
dcbtst BO, M16
|
dcbtst BO, M16
|
||||||
dcbtst BO, PREB
|
dcbtst BO, PREB
|
||||||
dcbtst BO, T1
|
dcbtst BO, T1
|
||||||
dcbtst BO, T2
|
dcbtst BO, T2
|
||||||
|
*/
|
||||||
COPY_4x16
|
COPY_4x16
|
||||||
|
|
||||||
add BO, BO, M16
|
add BO, BO, M16
|
||||||
|
|
|
@ -152,7 +152,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#define PRE r30
|
#define PRE r30
|
||||||
#define T2 r31
|
#define T2 r31
|
||||||
|
|
||||||
#include "dgemm_macros_16x4_power8.S"
|
#include "dtrmm_macros_16x4_power8.S"
|
||||||
|
|
||||||
|
|
||||||
#ifndef NEEDPARAM
|
#ifndef NEEDPARAM
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
99
param.h
99
param.h
|
@ -410,7 +410,100 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(STEAMROLLER) || defined(EXCAVATOR)
|
#ifdef STEAMROLLER
|
||||||
|
#define SNUMOPT 8
|
||||||
|
#define DNUMOPT 4
|
||||||
|
|
||||||
|
#define GEMM_DEFAULT_OFFSET_A 64
|
||||||
|
#define GEMM_DEFAULT_OFFSET_B 832
|
||||||
|
#define GEMM_DEFAULT_ALIGN 0x0fffUL
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#define QGEMM_DEFAULT_UNROLL_N 2
|
||||||
|
#define CGEMM_DEFAULT_UNROLL_N 2
|
||||||
|
#define ZGEMM_DEFAULT_UNROLL_N 2
|
||||||
|
#define XGEMM_DEFAULT_UNROLL_N 1
|
||||||
|
|
||||||
|
#ifdef ARCH_X86
|
||||||
|
#define SGEMM_DEFAULT_UNROLL_N 4
|
||||||
|
#define DGEMM_DEFAULT_UNROLL_N 4
|
||||||
|
#define SGEMM_DEFAULT_UNROLL_M 4
|
||||||
|
#define DGEMM_DEFAULT_UNROLL_M 2
|
||||||
|
#define QGEMM_DEFAULT_UNROLL_M 2
|
||||||
|
#define CGEMM_DEFAULT_UNROLL_M 2
|
||||||
|
#define ZGEMM_DEFAULT_UNROLL_M 1
|
||||||
|
#define XGEMM_DEFAULT_UNROLL_M 1
|
||||||
|
#else
|
||||||
|
#define SGEMM_DEFAULT_UNROLL_N 2
|
||||||
|
#define DGEMM_DEFAULT_UNROLL_N 2
|
||||||
|
#define SGEMM_DEFAULT_UNROLL_M 16
|
||||||
|
#define DGEMM_DEFAULT_UNROLL_M 8
|
||||||
|
#define QGEMM_DEFAULT_UNROLL_M 2
|
||||||
|
#define CGEMM_DEFAULT_UNROLL_M 4
|
||||||
|
#define ZGEMM_DEFAULT_UNROLL_M 2
|
||||||
|
#define XGEMM_DEFAULT_UNROLL_M 1
|
||||||
|
#define CGEMM3M_DEFAULT_UNROLL_N 4
|
||||||
|
#define CGEMM3M_DEFAULT_UNROLL_M 8
|
||||||
|
#define ZGEMM3M_DEFAULT_UNROLL_N 4
|
||||||
|
#define ZGEMM3M_DEFAULT_UNROLL_M 4
|
||||||
|
#define GEMV_UNROLL 8
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if defined(ARCH_X86_64)
|
||||||
|
#define SGEMM_DEFAULT_P 768
|
||||||
|
#define DGEMM_DEFAULT_P 576
|
||||||
|
#define ZGEMM_DEFAULT_P 288
|
||||||
|
#define CGEMM_DEFAULT_P 576
|
||||||
|
#else
|
||||||
|
#define SGEMM_DEFAULT_P 448
|
||||||
|
#define DGEMM_DEFAULT_P 480
|
||||||
|
#define ZGEMM_DEFAULT_P 112
|
||||||
|
#define CGEMM_DEFAULT_P 224
|
||||||
|
#endif
|
||||||
|
#define QGEMM_DEFAULT_P 112
|
||||||
|
#define XGEMM_DEFAULT_P 56
|
||||||
|
|
||||||
|
#if defined(ARCH_X86_64)
|
||||||
|
#define SGEMM_DEFAULT_Q 192
|
||||||
|
#define DGEMM_DEFAULT_Q 160
|
||||||
|
#define ZGEMM_DEFAULT_Q 160
|
||||||
|
#define CGEMM_DEFAULT_Q 160
|
||||||
|
#else
|
||||||
|
#define SGEMM_DEFAULT_Q 224
|
||||||
|
#define DGEMM_DEFAULT_Q 224
|
||||||
|
#define ZGEMM_DEFAULT_Q 224
|
||||||
|
#define CGEMM_DEFAULT_Q 224
|
||||||
|
#endif
|
||||||
|
#define QGEMM_DEFAULT_Q 224
|
||||||
|
#define XGEMM_DEFAULT_Q 224
|
||||||
|
|
||||||
|
#define CGEMM3M_DEFAULT_P 448
|
||||||
|
#define ZGEMM3M_DEFAULT_P 224
|
||||||
|
#define XGEMM3M_DEFAULT_P 112
|
||||||
|
#define CGEMM3M_DEFAULT_Q 224
|
||||||
|
#define ZGEMM3M_DEFAULT_Q 224
|
||||||
|
#define XGEMM3M_DEFAULT_Q 224
|
||||||
|
#define CGEMM3M_DEFAULT_R 12288
|
||||||
|
#define ZGEMM3M_DEFAULT_R 12288
|
||||||
|
#define XGEMM3M_DEFAULT_R 12288
|
||||||
|
|
||||||
|
#define SGEMM_DEFAULT_R 12288
|
||||||
|
#define QGEMM_DEFAULT_R qgemm_r
|
||||||
|
#define DGEMM_DEFAULT_R 12288
|
||||||
|
#define CGEMM_DEFAULT_R cgemm_r
|
||||||
|
#define ZGEMM_DEFAULT_R zgemm_r
|
||||||
|
#define XGEMM_DEFAULT_R xgemm_r
|
||||||
|
|
||||||
|
#define SYMV_P 16
|
||||||
|
#define HAVE_EXCLUSIVE_CACHE
|
||||||
|
|
||||||
|
#define GEMM_THREAD gemm_thread_mn
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
#ifdef EXCAVATOR
|
||||||
#define SNUMOPT 8
|
#define SNUMOPT 8
|
||||||
#define DNUMOPT 4
|
#define DNUMOPT 4
|
||||||
|
|
||||||
|
@ -1885,12 +1978,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#define ZGEMM_DEFAULT_UNROLL_N 2
|
#define ZGEMM_DEFAULT_UNROLL_N 2
|
||||||
|
|
||||||
#define SGEMM_DEFAULT_P 1280
|
#define SGEMM_DEFAULT_P 1280
|
||||||
#define DGEMM_DEFAULT_P 640
|
#define DGEMM_DEFAULT_P 768
|
||||||
#define CGEMM_DEFAULT_P 640
|
#define CGEMM_DEFAULT_P 640
|
||||||
#define ZGEMM_DEFAULT_P 320
|
#define ZGEMM_DEFAULT_P 320
|
||||||
|
|
||||||
#define SGEMM_DEFAULT_Q 640
|
#define SGEMM_DEFAULT_Q 640
|
||||||
#define DGEMM_DEFAULT_Q 640
|
#define DGEMM_DEFAULT_Q 768
|
||||||
#define CGEMM_DEFAULT_Q 640
|
#define CGEMM_DEFAULT_Q 640
|
||||||
#define ZGEMM_DEFAULT_Q 640
|
#define ZGEMM_DEFAULT_Q 640
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue