From 8d2df7d066dbe6988502b352a4594cc78f9d89c7 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Tue, 13 Oct 2020 00:14:29 +0200 Subject: [PATCH 1/8] Revert special handling of Windows xNRM2 and enable C+intrinsics kernel for SSUM/DSUM --- kernel/x86_64/KERNEL | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/kernel/x86_64/KERNEL b/kernel/x86_64/KERNEL index d75196974..cb98fd89a 100644 --- a/kernel/x86_64/KERNEL +++ b/kernel/x86_64/KERNEL @@ -259,12 +259,8 @@ SNRM2KERNEL = nrm2_sse.S endif ifndef DNRM2KERNEL -ifeq ($(OSNAME),WINNT) -DNRM2KERNEL = ../arm/nrm2.c -else DNRM2KERNEL = nrm2.S endif -endif ifndef QNRM2KERNEL QNRM2KERNEL = nrm2.S @@ -275,12 +271,8 @@ CNRM2KERNEL = znrm2_sse.S endif ifndef ZNRM2KERNEL -ifeq ($(OSNAME),WINNT) -ZNRM2KERNEL = ../arm/znrm2.c -else ZNRM2KERNEL = znrm2.S endif -endif ifndef XNRM2KERNEL XNRM2KERNEL = znrm2.S @@ -486,3 +478,6 @@ XTRSMKERNEL_RN = xtrsm_kernel_LT_1x1.S XTRSMKERNEL_RT = xtrsm_kernel_LT_1x1.S XGEMM3MKERNEL = xgemm3m_kernel_2x2.S + +SSUMKERNEL = ../arm/sum.c +DSUMKERNEL = ../arm/sum.c From 6999086a2bc4be5796a5d091f491af3b32970a71 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Tue, 13 Oct 2020 10:32:19 +0200 Subject: [PATCH 2/8] whitelist SANDYBRIDGE for SSE3 --- kernel/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/Makefile b/kernel/Makefile index 290fb2afe..6745a79dd 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -41,7 +41,7 @@ ifdef NO_AVX2 endif ifdef TARGET_CORE - ifeq ($(TARGET_CORE), $(filter $(TARGET_CORE),PRESCOTT CORE2 PENRYN DUNNINGTON ATOM NANO NEHALEM BARCELONA BOBCAT BULLDOZER PILEDRIVER EXCAVATOR STEAMROLLER OPTERON_SSE3)) + ifeq ($(TARGET_CORE), $(filter $(TARGET_CORE),PRESCOTT CORE2 PENRYN DUNNINGTON ATOM NANO SANDYBRIDGE NEHALEM BARCELONA BOBCAT BULLDOZER PILEDRIVER EXCAVATOR STEAMROLLER OPTERON_SSE3)) override CFLAGS += -msse3 endif ifeq ($(TARGET_CORE), COOPERLAKE) From 0eacbca85fa30657f749f7818e081952b9fb49f4 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Tue, 13 Oct 2020 11:42:39 +0200 Subject: [PATCH 3/8] Add Haswell and Zen to temporary sse3 whitelist --- kernel/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/Makefile b/kernel/Makefile index 6745a79dd..e567485a6 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -41,7 +41,7 @@ ifdef NO_AVX2 endif ifdef TARGET_CORE - ifeq ($(TARGET_CORE), $(filter $(TARGET_CORE),PRESCOTT CORE2 PENRYN DUNNINGTON ATOM NANO SANDYBRIDGE NEHALEM BARCELONA BOBCAT BULLDOZER PILEDRIVER EXCAVATOR STEAMROLLER OPTERON_SSE3)) + ifeq ($(TARGET_CORE), $(filter $(TARGET_CORE),PRESCOTT CORE2 PENRYN DUNNINGTON ATOM NANO SANDYBRIDGE HASWELL NEHALEM ZEN BARCELONA BOBCAT BULLDOZER PILEDRIVER EXCAVATOR STEAMROLLER OPTERON_SSE3)) override CFLAGS += -msse3 endif ifeq ($(TARGET_CORE), COOPERLAKE) From fecedc9c699527dfdb208bde4634374eca1ebbce Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Tue, 13 Oct 2020 11:55:41 +0200 Subject: [PATCH 4/8] Add -mssse3 --- kernel/Makefile | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/kernel/Makefile b/kernel/Makefile index e567485a6..c95c15f56 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -8,6 +8,9 @@ include $(TOPDIR)/Makefile.system ifdef HAVE_SSE3 CFLAGS += -msse3 endif +ifdef HAVE_SSSE3 +CFLAGS += -mssse3 +endif ifeq ($(C_COMPILER), GCC) GCCVERSIONGTEQ9 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 9) @@ -42,7 +45,7 @@ endif ifdef TARGET_CORE ifeq ($(TARGET_CORE), $(filter $(TARGET_CORE),PRESCOTT CORE2 PENRYN DUNNINGTON ATOM NANO SANDYBRIDGE HASWELL NEHALEM ZEN BARCELONA BOBCAT BULLDOZER PILEDRIVER EXCAVATOR STEAMROLLER OPTERON_SSE3)) - override CFLAGS += -msse3 + override CFLAGS += -msse3 -mssse3 endif ifeq ($(TARGET_CORE), COOPERLAKE) override CFLAGS += -DBUILD_KERNEL -DTABLE_NAME=gotoblas_$(TARGET_CORE) From 5f60a32cacc4e168202c7f8729d97b11e861e0c3 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Tue, 13 Oct 2020 11:57:04 +0200 Subject: [PATCH 5/8] Add -mssse3 if supported by the hardware --- Makefile.x86_64 | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Makefile.x86_64 b/Makefile.x86_64 index e793a1c2f..f055828a9 100644 --- a/Makefile.x86_64 +++ b/Makefile.x86_64 @@ -12,6 +12,10 @@ ifdef HAVE_SSE3 ifndef DYNAMIC_ARCH CCOMMON_OPT += -msse3 FCOMMON_OPT += -msse3 +ifdef HAVE_SSSE3 +CCOMMON_OPT += -mssse3 +FCOMMON_OPT += -mssse3 +endif endif endif From 9e3cff5cf2cf841e9a7a73b70b4465c87ac45643 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Tue, 13 Oct 2020 14:41:25 +0200 Subject: [PATCH 6/8] Expressly enable -mavx2 on Zen, SkylakeX and Cooperlake as well --- Makefile.x86_64 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile.x86_64 b/Makefile.x86_64 index f055828a9..9e75dc91c 100644 --- a/Makefile.x86_64 +++ b/Makefile.x86_64 @@ -64,7 +64,7 @@ endif endif endif -ifeq ($(CORE), HASWELL) +ifeq ($(CORE), $(filter $(CORE), HASWELL ZEN SKYLAKEX COOPERLAKE) ifndef DYNAMIC_ARCH ifndef NO_AVX2 ifeq ($(C_COMPILER), GCC) From 137ae618dba8ddf2ee899cb2a7854b34f1100ed3 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Tue, 13 Oct 2020 15:02:17 +0200 Subject: [PATCH 7/8] Fix typo --- Makefile.x86_64 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile.x86_64 b/Makefile.x86_64 index 9e75dc91c..8a3fc4eae 100644 --- a/Makefile.x86_64 +++ b/Makefile.x86_64 @@ -64,7 +64,7 @@ endif endif endif -ifeq ($(CORE), $(filter $(CORE), HASWELL ZEN SKYLAKEX COOPERLAKE) +ifeq ($(CORE), $(filter $(CORE), HASWELL ZEN SKYLAKEX COOPERLAKE)) ifndef DYNAMIC_ARCH ifndef NO_AVX2 ifeq ($(C_COMPILER), GCC) From 75e3a92df6b4100c05d034c85a6076678b5cc6af Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Wed, 14 Oct 2020 01:01:58 +0200 Subject: [PATCH 8/8] Add express -mavx and -msse options (and fix a stray = for cooperlake) --- cmake/cc.cmake | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/cmake/cc.cmake b/cmake/cc.cmake index c490dd9ab..9f5cc1bf7 100644 --- a/cmake/cc.cmake +++ b/cmake/cc.cmake @@ -109,10 +109,25 @@ if (${CORE} STREQUAL "COOPERLAKE") if (NOT NO_AVX512) execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION) if (${GCC_VERSION} VERSION_GREATER 10.1 OR ${GCC_VERSION} VERSION_EQUAL 10.1) - set (CCOMMON_OPT = "${CCOMMON_OPT} -march=cooperlake") + set (CCOMMON_OPT "${CCOMMON_OPT} -march=cooperlake") else () set (CCOMMON_OPT "${CCOMMON_OPT} -march=skylake-avx512") endif() endif () endif () endif () + +if (NOT DYNAMIC_ARCH) + if (HAVE_AVX2) + set (CCOMMON_OPT "${CCOMMON_OPT} -mavx2") + endif () + if (HAVE_AVX) + set (CCOMMON_OPT "${CCOMMON_OPT} -mavx") + endif () + if (HAVE_SSE3) + set (CCOMMON_OPT "${CCOMMON_OPT} -msse3") + endif () + if (HAVE_SSSE3) + set (CCOMMON_OPT "${CCOMMON_OPT} -mssse3") + endif () +endif()