optimized dgemm_kernel for HASWELL
This commit is contained in:
parent
f6b50057e2
commit
fe8c5666f9
|
@ -333,7 +333,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
|
|||
for(jjs = js; jjs < js + min_j; jjs += min_jj){
|
||||
min_jj = min_j + js - jjs;
|
||||
|
||||
#if defined(BULLDOZER) && defined(ARCH_X86_64) && !defined(XDOUBLE) && !defined(COMPLEX)
|
||||
#if defined(HASWELL) && defined(ARCH_X86_64) && !defined(XDOUBLE) && !defined(COMPLEX)
|
||||
if (min_jj >= 12*GEMM_UNROLL_N) min_jj = 12*GEMM_UNROLL_N;
|
||||
else
|
||||
if (min_jj >= 6*GEMM_UNROLL_N) min_jj = 6*GEMM_UNROLL_N;
|
||||
|
|
|
@ -367,7 +367,7 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
|
|||
for(jjs = xxx; jjs < MIN(n_to, xxx + div_n); jjs += min_jj){
|
||||
min_jj = MIN(n_to, xxx + div_n) - jjs;
|
||||
|
||||
#if defined(BULLDOZER) && defined(ARCH_X86_64) && !defined(XDOUBLE) && !defined(COMPLEX)
|
||||
#if defined(HASWELL) && defined(ARCH_X86_64) && !defined(XDOUBLE) && !defined(COMPLEX)
|
||||
if (min_jj >= 12*GEMM_UNROLL_N) min_jj = 12*GEMM_UNROLL_N;
|
||||
else
|
||||
if (min_jj >= 6*GEMM_UNROLL_N) min_jj = 6*GEMM_UNROLL_N;
|
||||
|
|
File diff suppressed because it is too large
Load Diff
59
param.h
59
param.h
|
@ -1164,6 +1164,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#define SWITCH_RATIO 4
|
||||
|
||||
#ifdef ARCH_X86
|
||||
|
||||
#define SGEMM_DEFAULT_UNROLL_M 4
|
||||
#define DGEMM_DEFAULT_UNROLL_M 2
|
||||
#define QGEMM_DEFAULT_UNROLL_M 2
|
||||
|
@ -1177,44 +1178,39 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#define CGEMM_DEFAULT_UNROLL_N 2
|
||||
#define ZGEMM_DEFAULT_UNROLL_N 2
|
||||
#define XGEMM_DEFAULT_UNROLL_N 1
|
||||
|
||||
#else
|
||||
#define SGEMM_DEFAULT_UNROLL_M 8
|
||||
#define DGEMM_DEFAULT_UNROLL_M 8
|
||||
|
||||
#define SGEMM_DEFAULT_UNROLL_M 16
|
||||
#define DGEMM_DEFAULT_UNROLL_M 16
|
||||
#define QGEMM_DEFAULT_UNROLL_M 2
|
||||
#define CGEMM_DEFAULT_UNROLL_M 8
|
||||
#define ZGEMM_DEFAULT_UNROLL_M 4
|
||||
#define XGEMM_DEFAULT_UNROLL_M 1
|
||||
|
||||
#define SGEMM_DEFAULT_UNROLL_N 8
|
||||
#define DGEMM_DEFAULT_UNROLL_N 4
|
||||
#define SGEMM_DEFAULT_UNROLL_N 4
|
||||
#define DGEMM_DEFAULT_UNROLL_N 2
|
||||
#define QGEMM_DEFAULT_UNROLL_N 2
|
||||
#define CGEMM_DEFAULT_UNROLL_N 4
|
||||
#define ZGEMM_DEFAULT_UNROLL_N 4
|
||||
#define CGEMM_DEFAULT_UNROLL_N 2
|
||||
#define ZGEMM_DEFAULT_UNROLL_N 2
|
||||
#define XGEMM_DEFAULT_UNROLL_N 1
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef ARCH_X86
|
||||
|
||||
#define SGEMM_DEFAULT_P 512
|
||||
#define SGEMM_DEFAULT_R sgemm_r
|
||||
//#define SGEMM_DEFAULT_R 1024
|
||||
|
||||
#define DGEMM_DEFAULT_P 512
|
||||
#define DGEMM_DEFAULT_R dgemm_r
|
||||
//#define DGEMM_DEFAULT_R 1024
|
||||
|
||||
#define QGEMM_DEFAULT_P 504
|
||||
#define QGEMM_DEFAULT_R qgemm_r
|
||||
|
||||
#define CGEMM_DEFAULT_P 128
|
||||
//#define CGEMM_DEFAULT_R cgemm_r
|
||||
#define CGEMM_DEFAULT_R 1024
|
||||
|
||||
#define ZGEMM_DEFAULT_P 512
|
||||
#define ZGEMM_DEFAULT_R zgemm_r
|
||||
//#define ZGEMM_DEFAULT_R 1024
|
||||
|
||||
#define XGEMM_DEFAULT_P 252
|
||||
#define XGEMM_DEFAULT_R xgemm_r
|
||||
|
||||
#define SGEMM_DEFAULT_Q 256
|
||||
#define DGEMM_DEFAULT_Q 256
|
||||
#define QGEMM_DEFAULT_Q 128
|
||||
|
@ -1222,7 +1218,36 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#define ZGEMM_DEFAULT_Q 192
|
||||
#define XGEMM_DEFAULT_Q 128
|
||||
|
||||
#define GETRF_FACTOR 0.72
|
||||
#else
|
||||
|
||||
#define SGEMM_DEFAULT_P 768
|
||||
#define DGEMM_DEFAULT_P 192
|
||||
#define CGEMM_DEFAULT_P 384
|
||||
#define ZGEMM_DEFAULT_P 192
|
||||
|
||||
#define SGEMM_DEFAULT_Q 168
|
||||
#define DGEMM_DEFAULT_Q 128
|
||||
#define CGEMM_DEFAULT_Q 168
|
||||
#define ZGEMM_DEFAULT_Q 168
|
||||
|
||||
#define SGEMM_DEFAULT_R sgemm_r
|
||||
#define DGEMM_DEFAULT_R dgemm_r
|
||||
#define CGEMM_DEFAULT_R cgemm_r
|
||||
#define ZGEMM_DEFAULT_R zgemm_r
|
||||
|
||||
#define QGEMM_DEFAULT_Q 128
|
||||
#define QGEMM_DEFAULT_P 504
|
||||
#define QGEMM_DEFAULT_R qgemm_r
|
||||
#define XGEMM_DEFAULT_P 252
|
||||
#define XGEMM_DEFAULT_R xgemm_r
|
||||
#define XGEMM_DEFAULT_Q 128
|
||||
|
||||
#define CGEMM3M_DEFAULT_UNROLL_N 4
|
||||
#define CGEMM3M_DEFAULT_UNROLL_M 8
|
||||
#define ZGEMM3M_DEFAULT_UNROLL_N 2
|
||||
#define ZGEMM3M_DEFAULT_UNROLL_M 8
|
||||
#endif
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
|
|
Loading…
Reference in New Issue