Create a AVX512 enabled version of DGEMM

This patch adds dgemm_kernel_4x8_skylakex.c which is
* dgemm_kernel_4x8_haswell.s converted to C + intrinsics
* 8x8 support added
* 8x8 kernel implemented using AVX512

Performance is a work in progress, but already shows a 10% - 20%
increase for a wide range of matrix sizes.
This commit is contained in:
Arjan van de Ven 2018-10-03 14:45:25 +00:00
parent 544b069e85
commit 45fe8cb0c5
2 changed files with 1293 additions and 11 deletions

View File

@ -2,18 +2,12 @@ include $(KERNELDIR)/KERNEL.HASWELL
SGEMMKERNEL = sgemm_kernel_16x4_skylakex.S SGEMMKERNEL = sgemm_kernel_16x4_skylakex.S
DGEMMKERNEL = dgemm_kernel_4x8_skylakex.c
#DTRMMKERNEL = ../generic/trmmkernel_16x2.c DGEMMINCOPY = ../generic/gemm_ncopy_8.c
#DGEMMKERNEL = dgemm_kernel_16x2_skylakex.S DGEMMITCOPY = ../generic/gemm_tcopy_8.c
#DGEMMINCOPY = ../generic/gemm_ncopy_16.c DGEMMONCOPY = ../generic/gemm_ncopy_8.c
#DGEMMITCOPY = ../generic/gemm_tcopy_16.c DGEMMOTCOPY = ../generic/gemm_tcopy_8.c
#DGEMMONCOPY = ../generic/gemm_ncopy_2.c
#DGEMMOTCOPY = ../generic/gemm_tcopy_2.c
#DGEMMINCOPYOBJ = dgemm_incopy$(TSUFFIX).$(SUFFIX)
#DGEMMITCOPYOBJ = dgemm_itcopy$(TSUFFIX).$(SUFFIX)
#DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX)
#DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX)
SGEMM_BETA = ../generic/gemm_beta.c SGEMM_BETA = ../generic/gemm_beta.c
DGEMM_BETA = ../generic/gemm_beta.c DGEMM_BETA = ../generic/gemm_beta.c

File diff suppressed because it is too large Load Diff