Use AVX512 also for DGEMM
this required switching to the generic gemm_beta code (which is faster anyway on SKX) for both DGEMM and SGEMM Performance for the not-retuned version is in the 30% range
This commit is contained in:
parent
ef626c6824
commit
89372e0993
|
@ -2,3 +2,18 @@ include $(KERNELDIR)/KERNEL.HASWELL
|
|||
|
||||
SGEMMKERNEL = sgemm_kernel_16x4_skylakex.S
|
||||
|
||||
|
||||
DTRMMKERNEL = ../generic/trmmkernel_16x2.c
|
||||
DGEMMKERNEL = dgemm_kernel_16x2_skylakex.S
|
||||
DGEMMINCOPY = ../generic/gemm_ncopy_16.c
|
||||
DGEMMITCOPY = ../generic/gemm_tcopy_16.c
|
||||
DGEMMONCOPY = ../generic/gemm_ncopy_2.c
|
||||
DGEMMOTCOPY = ../generic/gemm_tcopy_2.c
|
||||
DGEMMINCOPYOBJ = dgemm_incopy$(TSUFFIX).$(SUFFIX)
|
||||
DGEMMITCOPYOBJ = dgemm_itcopy$(TSUFFIX).$(SUFFIX)
|
||||
DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||
DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||
|
||||
|
||||
SGEMM_BETA = ../generic/gemm_beta.c
|
||||
DGEMM_BETA = ../generic/gemm_beta.c
|
File diff suppressed because it is too large
Load Diff
|
@ -159,7 +159,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
vmovups -16 * SIZE(AO), %zmm0
|
||||
vbroadcastss -4 * SIZE(BO), %zmm2
|
||||
vbroadcastss -3 * SIZE(BO), %zmm3
|
||||
prefetcht0 A_PR1(AO)
|
||||
# prefetcht0 A_PR1(AO)
|
||||
|
||||
VFMADD231PS_( %zmm4,%zmm2,%zmm0 )
|
||||
VFMADD231PS_( %zmm6,%zmm3,%zmm0 )
|
||||
|
@ -183,7 +183,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
vmovups -16 * SIZE(AO), %zmm0
|
||||
vbroadcastss -4 * SIZE(BO), %zmm2
|
||||
vbroadcastss -3 * SIZE(BO), %zmm3
|
||||
prefetcht0 A_PR1(AO)
|
||||
|
||||
VFMADD231PS_( %zmm4,%zmm2,%zmm0 )
|
||||
VFMADD231PS_( %zmm6,%zmm3,%zmm0 )
|
||||
|
|
Loading…
Reference in New Issue