optimized dgemm_kernel for HASWELL

This commit is contained in:
wernsaar 2013-10-20 16:52:26 +02:00
parent f6b50057e2
commit fe8c5666f9
4 changed files with 333 additions and 504 deletions

View File

@ -333,7 +333,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
for(jjs = js; jjs < js + min_j; jjs += min_jj){ for(jjs = js; jjs < js + min_j; jjs += min_jj){
min_jj = min_j + js - jjs; min_jj = min_j + js - jjs;
#if defined(BULLDOZER) && defined(ARCH_X86_64) && !defined(XDOUBLE) && !defined(COMPLEX) #if defined(HASWELL) && defined(ARCH_X86_64) && !defined(XDOUBLE) && !defined(COMPLEX)
if (min_jj >= 12*GEMM_UNROLL_N) min_jj = 12*GEMM_UNROLL_N; if (min_jj >= 12*GEMM_UNROLL_N) min_jj = 12*GEMM_UNROLL_N;
else else
if (min_jj >= 6*GEMM_UNROLL_N) min_jj = 6*GEMM_UNROLL_N; if (min_jj >= 6*GEMM_UNROLL_N) min_jj = 6*GEMM_UNROLL_N;

View File

@ -367,7 +367,7 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
for(jjs = xxx; jjs < MIN(n_to, xxx + div_n); jjs += min_jj){ for(jjs = xxx; jjs < MIN(n_to, xxx + div_n); jjs += min_jj){
min_jj = MIN(n_to, xxx + div_n) - jjs; min_jj = MIN(n_to, xxx + div_n) - jjs;
#if defined(BULLDOZER) && defined(ARCH_X86_64) && !defined(XDOUBLE) && !defined(COMPLEX) #if defined(HASWELL) && defined(ARCH_X86_64) && !defined(XDOUBLE) && !defined(COMPLEX)
if (min_jj >= 12*GEMM_UNROLL_N) min_jj = 12*GEMM_UNROLL_N; if (min_jj >= 12*GEMM_UNROLL_N) min_jj = 12*GEMM_UNROLL_N;
else else
if (min_jj >= 6*GEMM_UNROLL_N) min_jj = 6*GEMM_UNROLL_N; if (min_jj >= 6*GEMM_UNROLL_N) min_jj = 6*GEMM_UNROLL_N;

File diff suppressed because it is too large Load Diff

59
param.h
View File

@ -1164,6 +1164,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define SWITCH_RATIO 4 #define SWITCH_RATIO 4
#ifdef ARCH_X86 #ifdef ARCH_X86
#define SGEMM_DEFAULT_UNROLL_M 4 #define SGEMM_DEFAULT_UNROLL_M 4
#define DGEMM_DEFAULT_UNROLL_M 2 #define DGEMM_DEFAULT_UNROLL_M 2
#define QGEMM_DEFAULT_UNROLL_M 2 #define QGEMM_DEFAULT_UNROLL_M 2
@ -1177,44 +1178,39 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define CGEMM_DEFAULT_UNROLL_N 2 #define CGEMM_DEFAULT_UNROLL_N 2
#define ZGEMM_DEFAULT_UNROLL_N 2 #define ZGEMM_DEFAULT_UNROLL_N 2
#define XGEMM_DEFAULT_UNROLL_N 1 #define XGEMM_DEFAULT_UNROLL_N 1
#else #else
#define SGEMM_DEFAULT_UNROLL_M 8
#define DGEMM_DEFAULT_UNROLL_M 8 #define SGEMM_DEFAULT_UNROLL_M 16
#define DGEMM_DEFAULT_UNROLL_M 16
#define QGEMM_DEFAULT_UNROLL_M 2 #define QGEMM_DEFAULT_UNROLL_M 2
#define CGEMM_DEFAULT_UNROLL_M 8 #define CGEMM_DEFAULT_UNROLL_M 8
#define ZGEMM_DEFAULT_UNROLL_M 4 #define ZGEMM_DEFAULT_UNROLL_M 4
#define XGEMM_DEFAULT_UNROLL_M 1 #define XGEMM_DEFAULT_UNROLL_M 1
#define SGEMM_DEFAULT_UNROLL_N 8 #define SGEMM_DEFAULT_UNROLL_N 4
#define DGEMM_DEFAULT_UNROLL_N 4 #define DGEMM_DEFAULT_UNROLL_N 2
#define QGEMM_DEFAULT_UNROLL_N 2 #define QGEMM_DEFAULT_UNROLL_N 2
#define CGEMM_DEFAULT_UNROLL_N 4 #define CGEMM_DEFAULT_UNROLL_N 2
#define ZGEMM_DEFAULT_UNROLL_N 4 #define ZGEMM_DEFAULT_UNROLL_N 2
#define XGEMM_DEFAULT_UNROLL_N 1 #define XGEMM_DEFAULT_UNROLL_N 1
#endif #endif
#ifdef ARCH_X86
#define SGEMM_DEFAULT_P 512 #define SGEMM_DEFAULT_P 512
#define SGEMM_DEFAULT_R sgemm_r #define SGEMM_DEFAULT_R sgemm_r
//#define SGEMM_DEFAULT_R 1024
#define DGEMM_DEFAULT_P 512 #define DGEMM_DEFAULT_P 512
#define DGEMM_DEFAULT_R dgemm_r #define DGEMM_DEFAULT_R dgemm_r
//#define DGEMM_DEFAULT_R 1024
#define QGEMM_DEFAULT_P 504 #define QGEMM_DEFAULT_P 504
#define QGEMM_DEFAULT_R qgemm_r #define QGEMM_DEFAULT_R qgemm_r
#define CGEMM_DEFAULT_P 128 #define CGEMM_DEFAULT_P 128
//#define CGEMM_DEFAULT_R cgemm_r
#define CGEMM_DEFAULT_R 1024 #define CGEMM_DEFAULT_R 1024
#define ZGEMM_DEFAULT_P 512 #define ZGEMM_DEFAULT_P 512
#define ZGEMM_DEFAULT_R zgemm_r #define ZGEMM_DEFAULT_R zgemm_r
//#define ZGEMM_DEFAULT_R 1024
#define XGEMM_DEFAULT_P 252 #define XGEMM_DEFAULT_P 252
#define XGEMM_DEFAULT_R xgemm_r #define XGEMM_DEFAULT_R xgemm_r
#define SGEMM_DEFAULT_Q 256 #define SGEMM_DEFAULT_Q 256
#define DGEMM_DEFAULT_Q 256 #define DGEMM_DEFAULT_Q 256
#define QGEMM_DEFAULT_Q 128 #define QGEMM_DEFAULT_Q 128
@ -1222,7 +1218,36 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define ZGEMM_DEFAULT_Q 192 #define ZGEMM_DEFAULT_Q 192
#define XGEMM_DEFAULT_Q 128 #define XGEMM_DEFAULT_Q 128
#define GETRF_FACTOR 0.72 #else
#define SGEMM_DEFAULT_P 768
#define DGEMM_DEFAULT_P 192
#define CGEMM_DEFAULT_P 384
#define ZGEMM_DEFAULT_P 192
#define SGEMM_DEFAULT_Q 168
#define DGEMM_DEFAULT_Q 128
#define CGEMM_DEFAULT_Q 168
#define ZGEMM_DEFAULT_Q 168
#define SGEMM_DEFAULT_R sgemm_r
#define DGEMM_DEFAULT_R dgemm_r
#define CGEMM_DEFAULT_R cgemm_r
#define ZGEMM_DEFAULT_R zgemm_r
#define QGEMM_DEFAULT_Q 128
#define QGEMM_DEFAULT_P 504
#define QGEMM_DEFAULT_R qgemm_r
#define XGEMM_DEFAULT_P 252
#define XGEMM_DEFAULT_R xgemm_r
#define XGEMM_DEFAULT_Q 128
#define CGEMM3M_DEFAULT_UNROLL_N 4
#define CGEMM3M_DEFAULT_UNROLL_M 8
#define ZGEMM3M_DEFAULT_UNROLL_N 2
#define ZGEMM3M_DEFAULT_UNROLL_M 8
#endif
#endif #endif