New dgemm kernel for BULLDOZER: dgemm_kernel_8x2_bulldozer.S
This commit is contained in:
parent
731220f870
commit
25491e42f9
|
@ -332,7 +332,20 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
|
||||||
#else
|
#else
|
||||||
for(jjs = js; jjs < js + min_j; jjs += min_jj){
|
for(jjs = js; jjs < js + min_j; jjs += min_jj){
|
||||||
min_jj = min_j + js - jjs;
|
min_jj = min_j + js - jjs;
|
||||||
if (min_jj > GEMM_UNROLL_N) min_jj = GEMM_UNROLL_N;
|
|
||||||
|
#if defined(BULLDOZER) && defined(ARCH_X86_64) && defined(DOUBLE) && !defined(COMPLEX)
|
||||||
|
if (min_jj >= 12*GEMM_UNROLL_N) min_jj = 12*GEMM_UNROLL_N;
|
||||||
|
else
|
||||||
|
if (min_jj >= 6*GEMM_UNROLL_N) min_jj = 6*GEMM_UNROLL_N;
|
||||||
|
else
|
||||||
|
if (min_jj >= 3*GEMM_UNROLL_N) min_jj = 3*GEMM_UNROLL_N;
|
||||||
|
else
|
||||||
|
if (min_jj > GEMM_UNROLL_N) min_jj = GEMM_UNROLL_N;
|
||||||
|
#else
|
||||||
|
|
||||||
|
if (min_jj > GEMM_UNROLL_N) min_jj = GEMM_UNROLL_N;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
START_RPCC();
|
START_RPCC();
|
||||||
|
|
||||||
|
|
|
@ -360,7 +360,19 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
|
||||||
|
|
||||||
for(jjs = xxx; jjs < MIN(n_to, xxx + div_n); jjs += min_jj){
|
for(jjs = xxx; jjs < MIN(n_to, xxx + div_n); jjs += min_jj){
|
||||||
min_jj = MIN(n_to, xxx + div_n) - jjs;
|
min_jj = MIN(n_to, xxx + div_n) - jjs;
|
||||||
|
|
||||||
|
#if defined(BULLDOZER) && defined(ARCH_X86_64) && defined(DOUBLE) && !defined(COMPLEX)
|
||||||
|
if (min_jj >= 12*GEMM_UNROLL_N) min_jj = 12*GEMM_UNROLL_N;
|
||||||
|
else
|
||||||
|
if (min_jj >= 6*GEMM_UNROLL_N) min_jj = 6*GEMM_UNROLL_N;
|
||||||
|
else
|
||||||
|
if (min_jj >= 3*GEMM_UNROLL_N) min_jj = 3*GEMM_UNROLL_N;
|
||||||
|
else
|
||||||
|
if (min_jj > GEMM_UNROLL_N) min_jj = GEMM_UNROLL_N;
|
||||||
|
#else
|
||||||
|
|
||||||
if (min_jj > GEMM_UNROLL_N) min_jj = GEMM_UNROLL_N;
|
if (min_jj > GEMM_UNROLL_N) min_jj = GEMM_UNROLL_N;
|
||||||
|
#endif
|
||||||
|
|
||||||
START_RPCC();
|
START_RPCC();
|
||||||
|
|
||||||
|
|
|
@ -10,13 +10,13 @@ SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX)
|
||||||
SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX)
|
SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX)
|
||||||
SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||||
SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||||
DGEMMKERNEL = dgemm_kernel_4x4_bulldozer.S
|
DGEMMKERNEL = dgemm_kernel_8x2_bulldozer.S
|
||||||
DGEMMINCOPY =
|
DGEMMINCOPY = ../generic/gemm_ncopy_8.c
|
||||||
DGEMMITCOPY =
|
DGEMMITCOPY = ../generic/gemm_tcopy_8.c
|
||||||
DGEMMONCOPY = ../generic/gemm_ncopy_4.c
|
DGEMMONCOPY = ../generic/gemm_ncopy_2.c
|
||||||
DGEMMOTCOPY = ../generic/gemm_tcopy_4.c
|
DGEMMOTCOPY = ../generic/gemm_tcopy_2.c
|
||||||
DGEMMINCOPYOBJ =
|
DGEMMINCOPYOBJ = dgemm_incopy$(TSUFFIX).$(SUFFIX)
|
||||||
DGEMMITCOPYOBJ =
|
DGEMMITCOPYOBJ = dgemm_itcopy$(TSUFFIX).$(SUFFIX)
|
||||||
DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||||
DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||||
CGEMMKERNEL = zgemm_kernel_4x2_barcelona.S
|
CGEMMKERNEL = zgemm_kernel_4x2_barcelona.S
|
||||||
|
@ -38,25 +38,27 @@ ZGEMMITCOPYOBJ =
|
||||||
ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||||
ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||||
|
|
||||||
STRSMKERNEL_LN = trsm_kernel_LN_8x4_sse.S
|
|
||||||
STRSMKERNEL_LT = trsm_kernel_LT_8x4_sse.S
|
|
||||||
STRSMKERNEL_RN = trsm_kernel_LT_8x4_sse.S
|
|
||||||
STRSMKERNEL_RT = trsm_kernel_RT_8x4_sse.S
|
|
||||||
|
|
||||||
DTRSMKERNEL_LN = trsm_kernel_LN_4x4_barcelona.S
|
|
||||||
DTRSMKERNEL_LT = trsm_kernel_LT_4x4_barcelona.S
|
|
||||||
DTRSMKERNEL_RN = trsm_kernel_LT_4x4_barcelona.S
|
|
||||||
DTRSMKERNEL_RT = trsm_kernel_RT_4x4_barcelona.S
|
|
||||||
|
|
||||||
CTRSMKERNEL_LN = ztrsm_kernel_LN_4x2_sse.S
|
|
||||||
CTRSMKERNEL_LT = ztrsm_kernel_LT_4x2_sse.S
|
|
||||||
CTRSMKERNEL_RN = ztrsm_kernel_LT_4x2_sse.S
|
|
||||||
CTRSMKERNEL_RT = ztrsm_kernel_RT_4x2_sse.S
|
|
||||||
|
|
||||||
ZTRSMKERNEL_LN = ztrsm_kernel_LN_2x2_sse2.S
|
|
||||||
ZTRSMKERNEL_LT = ztrsm_kernel_LT_2x2_sse2.S
|
|
||||||
ZTRSMKERNEL_RN = ztrsm_kernel_LT_2x2_sse2.S
|
|
||||||
ZTRSMKERNEL_RT = ztrsm_kernel_RT_2x2_sse2.S
|
|
||||||
|
|
||||||
CGEMM3MKERNEL = zgemm3m_kernel_8x4_barcelona.S
|
CGEMM3MKERNEL = zgemm3m_kernel_8x4_barcelona.S
|
||||||
ZGEMM3MKERNEL = zgemm3m_kernel_4x4_barcelona.S
|
ZGEMM3MKERNEL = zgemm3m_kernel_4x4_barcelona.S
|
||||||
|
|
||||||
|
STRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
|
||||||
|
STRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
|
||||||
|
STRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
|
||||||
|
STRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
|
||||||
|
|
||||||
|
DTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
|
||||||
|
DTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
|
||||||
|
DTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
|
||||||
|
DTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
|
||||||
|
|
||||||
|
CTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
|
||||||
|
CTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
|
||||||
|
CTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
|
||||||
|
CTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
|
||||||
|
|
||||||
|
ZTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
|
||||||
|
ZTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
|
||||||
|
ZTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
|
||||||
|
ZTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
|
||||||
|
|
||||||
|
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
24
param.h
24
param.h
|
@ -153,7 +153,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#define GEMM_DEFAULT_ALIGN 0x0fffUL
|
#define GEMM_DEFAULT_ALIGN 0x0fffUL
|
||||||
|
|
||||||
#define SGEMM_DEFAULT_UNROLL_N 4
|
#define SGEMM_DEFAULT_UNROLL_N 4
|
||||||
|
#if defined(BULLDOZER) && defined(ARCH_X86_64) && !defined(COMPLEX)
|
||||||
|
#define DGEMM_DEFAULT_UNROLL_N 2
|
||||||
|
#else
|
||||||
#define DGEMM_DEFAULT_UNROLL_N 4
|
#define DGEMM_DEFAULT_UNROLL_N 4
|
||||||
|
#endif
|
||||||
#define QGEMM_DEFAULT_UNROLL_N 2
|
#define QGEMM_DEFAULT_UNROLL_N 2
|
||||||
#define CGEMM_DEFAULT_UNROLL_N 2
|
#define CGEMM_DEFAULT_UNROLL_N 2
|
||||||
#define ZGEMM_DEFAULT_UNROLL_N 2
|
#define ZGEMM_DEFAULT_UNROLL_N 2
|
||||||
|
@ -161,14 +165,18 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
#ifdef ARCH_X86
|
#ifdef ARCH_X86
|
||||||
#define SGEMM_DEFAULT_UNROLL_M 4
|
#define SGEMM_DEFAULT_UNROLL_M 4
|
||||||
#define DGEMM_DEFAULT_UNROLL_M 2
|
#define DGEMM_DEFAULT_UNROLL_M 4
|
||||||
#define QGEMM_DEFAULT_UNROLL_M 2
|
#define QGEMM_DEFAULT_UNROLL_M 2
|
||||||
#define CGEMM_DEFAULT_UNROLL_M 2
|
#define CGEMM_DEFAULT_UNROLL_M 2
|
||||||
#define ZGEMM_DEFAULT_UNROLL_M 1
|
#define ZGEMM_DEFAULT_UNROLL_M 1
|
||||||
#define XGEMM_DEFAULT_UNROLL_M 1
|
#define XGEMM_DEFAULT_UNROLL_M 1
|
||||||
#else
|
#else
|
||||||
#define SGEMM_DEFAULT_UNROLL_M 8
|
#define SGEMM_DEFAULT_UNROLL_M 8
|
||||||
|
#if defined(BULLDOZER) && !defined(COMPLEX)
|
||||||
|
#define DGEMM_DEFAULT_UNROLL_M 8
|
||||||
|
#else
|
||||||
#define DGEMM_DEFAULT_UNROLL_M 4
|
#define DGEMM_DEFAULT_UNROLL_M 4
|
||||||
|
#endif
|
||||||
#define QGEMM_DEFAULT_UNROLL_M 2
|
#define QGEMM_DEFAULT_UNROLL_M 2
|
||||||
#define CGEMM_DEFAULT_UNROLL_M 4
|
#define CGEMM_DEFAULT_UNROLL_M 4
|
||||||
#define ZGEMM_DEFAULT_UNROLL_M 2
|
#define ZGEMM_DEFAULT_UNROLL_M 2
|
||||||
|
@ -193,26 +201,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#else
|
#else
|
||||||
|
|
||||||
#define SGEMM_DEFAULT_P 448
|
#define SGEMM_DEFAULT_P 448
|
||||||
|
#if defined(BULLDOZER) && defined(ARCH_X86_64) && !defined(COMPLEX)
|
||||||
#if defined(BULLDOZER) && defined(ARCH_X86_64)
|
#define DGEMM_DEFAULT_P 384
|
||||||
#define DGEMM_DEFAULT_P 248
|
|
||||||
#else
|
#else
|
||||||
#define DGEMM_DEFAULT_P 224
|
#define DGEMM_DEFAULT_P 224
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#define QGEMM_DEFAULT_P 112
|
#define QGEMM_DEFAULT_P 112
|
||||||
#define CGEMM_DEFAULT_P 224
|
#define CGEMM_DEFAULT_P 224
|
||||||
#define ZGEMM_DEFAULT_P 112
|
#define ZGEMM_DEFAULT_P 112
|
||||||
#define XGEMM_DEFAULT_P 56
|
#define XGEMM_DEFAULT_P 56
|
||||||
|
|
||||||
#define SGEMM_DEFAULT_Q 224
|
#define SGEMM_DEFAULT_Q 224
|
||||||
|
#if defined(BULLDOZER) && defined(ARCH_X86_64) && !defined(COMPLEX)
|
||||||
#if defined(BULLDOZER) && defined(ARCH_X86_64)
|
#define DGEMM_DEFAULT_Q 168
|
||||||
#define DGEMM_DEFAULT_Q 248
|
|
||||||
#else
|
#else
|
||||||
#define DGEMM_DEFAULT_Q 224
|
#define DGEMM_DEFAULT_Q 224
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#define QGEMM_DEFAULT_Q 224
|
#define QGEMM_DEFAULT_Q 224
|
||||||
#define CGEMM_DEFAULT_Q 224
|
#define CGEMM_DEFAULT_Q 224
|
||||||
#define ZGEMM_DEFAULT_Q 224
|
#define ZGEMM_DEFAULT_Q 224
|
||||||
|
@ -230,7 +234,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#define SYMV_P 16
|
#define SYMV_P 16
|
||||||
#define HAVE_EXCLUSIVE_CACHE
|
#define HAVE_EXCLUSIVE_CACHE
|
||||||
|
|
||||||
#define GEMM_THREAD gemm_thread_mn
|
#define GEMM_THREAD gemm_thread_m
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue