Create a AVX512 enabled version of DGEMM
This patch adds dgemm_kernel_4x8_skylakex.c which is * dgemm_kernel_4x8_haswell.s converted to C + intrinsics * 8x8 support added * 8x8 kernel implemented using AVX512 Performance is a work in progress, but already shows a 10% - 20% increase for a wide range of matrix sizes.
This commit is contained in:
parent
544b069e85
commit
45fe8cb0c5
|
@ -2,18 +2,12 @@ include $(KERNELDIR)/KERNEL.HASWELL
|
||||||
|
|
||||||
SGEMMKERNEL = sgemm_kernel_16x4_skylakex.S
|
SGEMMKERNEL = sgemm_kernel_16x4_skylakex.S
|
||||||
|
|
||||||
|
DGEMMKERNEL = dgemm_kernel_4x8_skylakex.c
|
||||||
|
|
||||||
#DTRMMKERNEL = ../generic/trmmkernel_16x2.c
|
DGEMMINCOPY = ../generic/gemm_ncopy_8.c
|
||||||
#DGEMMKERNEL = dgemm_kernel_16x2_skylakex.S
|
DGEMMITCOPY = ../generic/gemm_tcopy_8.c
|
||||||
#DGEMMINCOPY = ../generic/gemm_ncopy_16.c
|
DGEMMONCOPY = ../generic/gemm_ncopy_8.c
|
||||||
#DGEMMITCOPY = ../generic/gemm_tcopy_16.c
|
DGEMMOTCOPY = ../generic/gemm_tcopy_8.c
|
||||||
#DGEMMONCOPY = ../generic/gemm_ncopy_2.c
|
|
||||||
#DGEMMOTCOPY = ../generic/gemm_tcopy_2.c
|
|
||||||
#DGEMMINCOPYOBJ = dgemm_incopy$(TSUFFIX).$(SUFFIX)
|
|
||||||
#DGEMMITCOPYOBJ = dgemm_itcopy$(TSUFFIX).$(SUFFIX)
|
|
||||||
#DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
|
||||||
#DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
|
||||||
|
|
||||||
|
|
||||||
SGEMM_BETA = ../generic/gemm_beta.c
|
SGEMM_BETA = ../generic/gemm_beta.c
|
||||||
DGEMM_BETA = ../generic/gemm_beta.c
|
DGEMM_BETA = ../generic/gemm_beta.c
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue