From 095f4e6964ba150b1293747d842a60294836be45 Mon Sep 17 00:00:00 2001 From: Marius Hillenbrand Date: Tue, 1 Sep 2020 15:09:32 +0200 Subject: [PATCH] s390x: allow clang to emit fused multiply-adds (replicates gcc's default behavior) gcc's default setting for floating-point expression contraction is "fast", which allows the compiler to emit fused multiply adds instead of separate multiplies and adds (amongst others). Fused multiply-adds, which assembly kernels typically apply, also bring a significant performance advantage to the C implementation for matrix-matrix multiplication on s390x. To enable that performance advantage for builds with clang, add -ffp-contract=fast to the compiler options. Signed-off-by: Marius Hillenbrand --- Makefile.zarch | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/Makefile.zarch b/Makefile.zarch index be1e34f6d..b841d9b4d 100644 --- a/Makefile.zarch +++ b/Makefile.zarch @@ -8,3 +8,9 @@ ifeq ($(CORE), Z14) CCOMMON_OPT += -march=z14 -mzvector -O3 FCOMMON_OPT += -march=z14 -mzvector endif + +# Enable floating-point expression contraction for clang, since it is the +# default for gcc +ifeq ($(C_COMPILER), CLANG) +CCOMMON_OPT += -ffp-contract=fast +endif