added highly optimized dgemm_kernel for HASWELL

This commit is contained in:
wernsaar
2013-10-28 10:23:47 +01:00
parent fe8c5666f9
commit a77c71eaf5
5 changed files with 3514 additions and 58 deletions

View File

@@ -334,13 +334,12 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
min_jj = min_j + js - jjs;
#if defined(HASWELL) && defined(ARCH_X86_64) && !defined(XDOUBLE) && !defined(COMPLEX)
if (min_jj >= 12*GEMM_UNROLL_N) min_jj = 12*GEMM_UNROLL_N;
else
if (min_jj >= 6*GEMM_UNROLL_N) min_jj = 6*GEMM_UNROLL_N;
else
if (min_jj >= 3*GEMM_UNROLL_N) min_jj = 3*GEMM_UNROLL_N;
else
if (min_jj > GEMM_UNROLL_N) min_jj = GEMM_UNROLL_N;
#else
if (min_jj > GEMM_UNROLL_N) min_jj = GEMM_UNROLL_N;

View File

@@ -368,8 +368,6 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
min_jj = MIN(n_to, xxx + div_n) - jjs;
#if defined(HASWELL) && defined(ARCH_X86_64) && !defined(XDOUBLE) && !defined(COMPLEX)
if (min_jj >= 12*GEMM_UNROLL_N) min_jj = 12*GEMM_UNROLL_N;
else
if (min_jj >= 6*GEMM_UNROLL_N) min_jj = 6*GEMM_UNROLL_N;
else
if (min_jj >= 3*GEMM_UNROLL_N) min_jj = 3*GEMM_UNROLL_N;