s390x: Use new sgemm kernel also for DGEMM and DTRMM on Z14
Apply our new GEMM kernel implementation, written in C with vector intrinsics, also for DGEMM and DTRMM on Z14 and newer (i.e., architectures with FP32 SIMD instructions). As a result, we gain around 10% in performance on z15, in addition to improving maintainability. Signed-off-by: Marius Hillenbrand <mhillen@linux.ibm.com>
This commit is contained in:
parent
bdd795ed03
commit
89fe17f20e
|
@ -87,7 +87,7 @@ CGEMVTKERNEL = cgemv_t_4.c
|
||||||
ZGEMVTKERNEL = zgemv_t_4.c
|
ZGEMVTKERNEL = zgemv_t_4.c
|
||||||
|
|
||||||
STRMMKERNEL = gemm_vec.c
|
STRMMKERNEL = gemm_vec.c
|
||||||
DTRMMKERNEL = trmm8x4V.S
|
DTRMMKERNEL = gemm_vec.c
|
||||||
CTRMMKERNEL = ctrmm4x4V.S
|
CTRMMKERNEL = ctrmm4x4V.S
|
||||||
ZTRMMKERNEL = ztrmm4x4V.S
|
ZTRMMKERNEL = ztrmm4x4V.S
|
||||||
|
|
||||||
|
@ -103,7 +103,7 @@ SGEMMOTCOPY = ../generic/gemm_tcopy_$(SGEMM_UNROLL_N).c
|
||||||
SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||||
SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||||
|
|
||||||
DGEMMKERNEL = gemm8x4V.S
|
DGEMMKERNEL = gemm_vec.c
|
||||||
DGEMMINCOPY = ../generic/gemm_ncopy_8.c
|
DGEMMINCOPY = ../generic/gemm_ncopy_8.c
|
||||||
DGEMMITCOPY = ../generic/gemm_tcopy_8.c
|
DGEMMITCOPY = ../generic/gemm_tcopy_8.c
|
||||||
DGEMMONCOPY = ../generic/gemm_ncopy_4.c
|
DGEMMONCOPY = ../generic/gemm_ncopy_4.c
|
||||||
|
|
Loading…
Reference in New Issue