From 9fb54ee1dafdc7b90689608d3a72e272e81ecad5 Mon Sep 17 00:00:00 2001 From: Martin Liska Date: Tue, 18 Feb 2020 11:52:06 +0100 Subject: [PATCH] Restore ZEN SGEMM speed after #2361. I partially reverted the changes in #2361 and I received the following speed up on: ./xsl3blastst -R gemm -N 2048 2048 1 -a 5 1 1 1 1 1 AMD Ryzen 7 2700X (Zen+): 61400 to 63300 MFlops AMD EPYC 7742 (Zen v2): 91400 to 94500 MFlops These numbers are single-threaded performance. --- kernel/x86_64/KERNEL.ZEN | 4 ++-- param.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/kernel/x86_64/KERNEL.ZEN b/kernel/x86_64/KERNEL.ZEN index 7cec2e5ed..ed14a52da 100644 --- a/kernel/x86_64/KERNEL.ZEN +++ b/kernel/x86_64/KERNEL.ZEN @@ -30,8 +30,8 @@ DAXPYKERNEL = daxpy.c CAXPYKERNEL = caxpy.c ZAXPYKERNEL = zaxpy.c -STRMMKERNEL = sgemm_kernel_8x4_haswell.c -SGEMMKERNEL = sgemm_kernel_8x4_haswell.c +STRMMKERNEL = sgemm_kernel_16x4_haswell.S +SGEMMKERNEL = sgemm_kernel_16x4_haswell.S SGEMMINCOPY = ../generic/gemm_ncopy_8.c SGEMMITCOPY = ../generic/gemm_tcopy_8.c SGEMMONCOPY = ../generic/gemm_ncopy_4.c diff --git a/param.h b/param.h index e6ab93aa5..3c495af05 100644 --- a/param.h +++ b/param.h @@ -666,7 +666,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #else -#define SGEMM_DEFAULT_P 320 +#define SGEMM_DEFAULT_P 768 #define DGEMM_DEFAULT_P 512 #define CGEMM_DEFAULT_P 256 #define ZGEMM_DEFAULT_P 192