added dgemm_kernel for Piledriver
This commit is contained in:
parent
6c4a7d0828
commit
2840d56aeb
7
common.h
7
common.h
|
@ -310,6 +310,13 @@ typedef int blasint;
|
|||
#define YIELDING SwitchToThread()
|
||||
#endif
|
||||
|
||||
/***************************************************
|
||||
Some no-oprations are enough
|
||||
***************************************************/
|
||||
#ifdef PILEDRIVER
|
||||
#define YIELDING __asm__ __volatile__ ("nop;nop;nop;nop;nop;nop;nop;nop;\n");
|
||||
#endif
|
||||
|
||||
#ifndef YIELDING
|
||||
#define YIELDING sched_yield()
|
||||
#endif
|
||||
|
|
|
@ -333,7 +333,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
|
|||
for(jjs = js; jjs < js + min_j; jjs += min_jj){
|
||||
min_jj = min_j + js - jjs;
|
||||
|
||||
#if defined(BULLDOZER) && defined(ARCH_X86_64) && !defined(XDOUBLE) && !defined(COMPLEX)
|
||||
#if ( defined(BULLDOZER) || defined(PILEDRIVER) ) && defined(ARCH_X86_64) && !defined(XDOUBLE) && !defined(COMPLEX)
|
||||
if (min_jj >= 12*GEMM_UNROLL_N) min_jj = 12*GEMM_UNROLL_N;
|
||||
else
|
||||
if (min_jj >= 6*GEMM_UNROLL_N) min_jj = 6*GEMM_UNROLL_N;
|
||||
|
|
|
@ -367,7 +367,7 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
|
|||
for(jjs = xxx; jjs < MIN(n_to, xxx + div_n); jjs += min_jj){
|
||||
min_jj = MIN(n_to, xxx + div_n) - jjs;
|
||||
|
||||
#if defined(BULLDOZER) && defined(ARCH_X86_64) && !defined(XDOUBLE) && !defined(COMPLEX)
|
||||
#if ( defined(BULLDOZER) || defined(PILEDRIVER) ) && defined(ARCH_X86_64) && !defined(XDOUBLE) && !defined(COMPLEX)
|
||||
if (min_jj >= 12*GEMM_UNROLL_N) min_jj = 12*GEMM_UNROLL_N;
|
||||
else
|
||||
if (min_jj >= 6*GEMM_UNROLL_N) min_jj = 6*GEMM_UNROLL_N;
|
||||
|
|
|
@ -17,11 +17,11 @@ SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX)
|
|||
SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||
SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||
|
||||
DGEMMKERNEL = dgemm_kernel_6x4_piledriver.S
|
||||
DGEMMINCOPY = ../generic/gemm_ncopy_6.c
|
||||
DGEMMITCOPY = ../generic/gemm_tcopy_6.c
|
||||
DGEMMONCOPY = ../generic/gemm_ncopy_4.c
|
||||
DGEMMOTCOPY = ../generic/gemm_tcopy_4.c
|
||||
DGEMMKERNEL = dgemm_kernel_8x2_piledriver.S
|
||||
DGEMMINCOPY = dgemm_ncopy_8_bulldozer.S
|
||||
DGEMMITCOPY = dgemm_tcopy_8_bulldozer.S
|
||||
DGEMMONCOPY = gemm_ncopy_2_bulldozer.S
|
||||
DGEMMOTCOPY = gemm_tcopy_2_bulldozer.S
|
||||
DGEMMINCOPYOBJ = dgemm_incopy$(TSUFFIX).$(SUFFIX)
|
||||
DGEMMITCOPYOBJ = dgemm_itcopy$(TSUFFIX).$(SUFFIX)
|
||||
DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||
|
|
File diff suppressed because it is too large
Load Diff
10
param.h
10
param.h
|
@ -330,9 +330,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#define XGEMM_DEFAULT_UNROLL_M 1
|
||||
#else
|
||||
#define SGEMM_DEFAULT_UNROLL_N 2
|
||||
#define DGEMM_DEFAULT_UNROLL_N 4
|
||||
#define DGEMM_DEFAULT_UNROLL_N 2
|
||||
#define SGEMM_DEFAULT_UNROLL_M 16
|
||||
#define DGEMM_DEFAULT_UNROLL_M 6
|
||||
#define DGEMM_DEFAULT_UNROLL_M 8
|
||||
#define QGEMM_DEFAULT_UNROLL_M 2
|
||||
#define CGEMM_DEFAULT_UNROLL_M 4
|
||||
#define ZGEMM_DEFAULT_UNROLL_M 2
|
||||
|
@ -347,7 +347,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
#if defined(ARCH_X86_64)
|
||||
#define SGEMM_DEFAULT_P 768
|
||||
#define DGEMM_DEFAULT_P 480
|
||||
#define DGEMM_DEFAULT_P 384
|
||||
#else
|
||||
#define SGEMM_DEFAULT_P 448
|
||||
#define DGEMM_DEFAULT_P 480
|
||||
|
@ -359,7 +359,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
#if defined(ARCH_X86_64)
|
||||
#define SGEMM_DEFAULT_Q 168
|
||||
#define DGEMM_DEFAULT_Q 128
|
||||
#define DGEMM_DEFAULT_Q 168
|
||||
#else
|
||||
#define SGEMM_DEFAULT_Q 224
|
||||
#define DGEMM_DEFAULT_Q 224
|
||||
|
@ -371,7 +371,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
#define SGEMM_DEFAULT_R sgemm_r
|
||||
#define QGEMM_DEFAULT_R qgemm_r
|
||||
#define DGEMM_DEFAULT_R dgemm_r
|
||||
#define DGEMM_DEFAULT_R 12288
|
||||
#define CGEMM_DEFAULT_R cgemm_r
|
||||
#define ZGEMM_DEFAULT_R zgemm_r
|
||||
#define XGEMM_DEFAULT_R xgemm_r
|
||||
|
|
Loading…
Reference in New Issue