From 10379fc83baced749a2e4f881daa923d9361df26 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Thu, 15 Oct 2020 19:05:37 +0200 Subject: [PATCH 1/8] Use ifdef instead of if --- kernel/setparam-ref.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/setparam-ref.c b/kernel/setparam-ref.c index 72fbf32bf..849a4194a 100644 --- a/kernel/setparam-ref.c +++ b/kernel/setparam-ref.c @@ -1164,7 +1164,7 @@ static void init_parameter(void) { TABLE_NAME.xgemm3m_q = QGEMM_DEFAULT_Q; #endif -#if (CORE_KATMAI) || (CORE_COPPERMINE) || (CORE_BANIAS) || (CORE_YONAH) || (CORE_ATHLON) +#if defined(CORE_KATMAI) || defined(CORE_COPPERMINE) || defined(CORE_BANIAS) || defined(CORE_YONAH) || defined(CORE_ATHLON) #ifdef DEBUG fprintf(stderr, "Katmai, Coppermine, Banias, Athlon\n"); From ac8af9cec6e9c391f9047992c15454db8ada1821 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Thu, 15 Oct 2020 19:06:45 +0200 Subject: [PATCH 2/8] Add -msse where supported, apparently required for older gcc --- Makefile.x86 | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/Makefile.x86 b/Makefile.x86 index a6196d365..330690935 100644 --- a/Makefile.x86 +++ b/Makefile.x86 @@ -54,3 +54,19 @@ LIBATLAS = -L$(ATLASPATH)/32 -lcblas -lf77blas -latlas -lm else LIBATLAS = -L$(ATLASPATH)/32 -lptf77blas -lptatlas -lpthread -lm endif + +ifdef HAVE_SSE3 +ifndef DYNAMIC_ARCH +CCOMMON_OPT += -msse3 +FCOMMON_OPT += -msse3 +ifdef HAVE_SSSE3 +CCOMMON_OPT += -mssse3 +FCOMMON_OPT += -mssse3 +endif +ifdef HAVE_SSE4_1 +CCOMMON_OPT += -msse4.1 +FCOMMON_OPT += -msse4.1 +endif +endif +endif + From c339c40c01c11046bd9886a00f16deb9a6d675a2 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Thu, 15 Oct 2020 19:08:12 +0200 Subject: [PATCH 3/8] Silence a redefinition warning --- kernel/x86_64/iamax_sse.S | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/x86_64/iamax_sse.S b/kernel/x86_64/iamax_sse.S index 4f62b9be2..14c7f43ec 100644 --- a/kernel/x86_64/iamax_sse.S +++ b/kernel/x86_64/iamax_sse.S @@ -51,6 +51,8 @@ #define MAXPS maxps #define MAXSS maxss #ifdef USE_MIN +#undef MAXPS +#undef MAXSS #define MAXPS minps #define MAXSS minss #endif From dc6cefd2f588c27847f2c4b5a8ad42cbf6331299 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Thu, 15 Oct 2020 20:16:15 +0200 Subject: [PATCH 4/8] Expressly enable -msse for 32bit DYNAMIC_ARCH kernels --- kernel/Makefile | 3 +++ 1 file changed, 3 insertions(+) diff --git a/kernel/Makefile b/kernel/Makefile index abe2e08d6..65e2a0ad6 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -46,6 +46,9 @@ endif ifdef TARGET_CORE ifeq ($(TARGET_CORE), $(filter $(TARGET_CORE),PRESCOTT CORE2 PENRYN DUNNINGTON ATOM NANO SANDYBRIDGE HASWELL NEHALEM ZEN BARCELONA BOBCAT BULLDOZER PILEDRIVER EXCAVATOR STEAMROLLER OPTERON_SSE3)) override CFLAGS += -msse3 -mssse3 -msse4.1 +endif + ifeq ($(TARGET_CORE), $(filter $(TARGET_CORE),KATMAI COPPERMINE NEHALEM BARCELONA CORE2 PRESCOTT NORTHWOOD ATHLON)) + override CFLAGS += -msse endif ifeq ($(TARGET_CORE), COOPERLAKE) override CFLAGS += -DBUILD_KERNEL -DTABLE_NAME=gotoblas_$(TARGET_CORE) From f071d1207ab2d25247bf6ba02a2f16bf02273a5b Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Thu, 15 Oct 2020 22:10:32 +0200 Subject: [PATCH 5/8] add sse2 --- kernel/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/Makefile b/kernel/Makefile index 65e2a0ad6..495f3609f 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -48,7 +48,7 @@ ifdef TARGET_CORE override CFLAGS += -msse3 -mssse3 -msse4.1 endif ifeq ($(TARGET_CORE), $(filter $(TARGET_CORE),KATMAI COPPERMINE NEHALEM BARCELONA CORE2 PRESCOTT NORTHWOOD ATHLON)) - override CFLAGS += -msse + override CFLAGS += -msse -msse2 endif ifeq ($(TARGET_CORE), COOPERLAKE) override CFLAGS += -DBUILD_KERNEL -DTABLE_NAME=gotoblas_$(TARGET_CORE) From df706670430ef39aeb0a423e367560e452909139 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Fri, 16 Oct 2020 09:55:48 +0200 Subject: [PATCH 6/8] fix core list for sse/sse2 --- kernel/Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/Makefile b/kernel/Makefile index 495f3609f..43318d475 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -45,9 +45,9 @@ endif ifdef TARGET_CORE ifeq ($(TARGET_CORE), $(filter $(TARGET_CORE),PRESCOTT CORE2 PENRYN DUNNINGTON ATOM NANO SANDYBRIDGE HASWELL NEHALEM ZEN BARCELONA BOBCAT BULLDOZER PILEDRIVER EXCAVATOR STEAMROLLER OPTERON_SSE3)) - override CFLAGS += -msse3 -mssse3 -msse4.1 + override CFLAGS += -msse -msse2 -msse3 -mssse3 -msse4.1 endif - ifeq ($(TARGET_CORE), $(filter $(TARGET_CORE),KATMAI COPPERMINE NEHALEM BARCELONA CORE2 PRESCOTT NORTHWOOD ATHLON)) + ifeq ($(TARGET_CORE), $(filter $(TARGET_CORE),KATMAI COPPERMINE BANIAS NORTHWOOD ATHLON OPTERON)) override CFLAGS += -msse -msse2 endif ifeq ($(TARGET_CORE), COOPERLAKE) From 786c0a3ce80b4a3598d7a534470aa5f6b7e6b01c Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Fri, 16 Oct 2020 10:41:53 +0200 Subject: [PATCH 7/8] Add sse options for use of intrinics with older compilers --- cmake/cc.cmake | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/cmake/cc.cmake b/cmake/cc.cmake index 9f5cc1bf7..2f4d1c6d7 100644 --- a/cmake/cc.cmake +++ b/cmake/cc.cmake @@ -124,10 +124,19 @@ if (NOT DYNAMIC_ARCH) if (HAVE_AVX) set (CCOMMON_OPT "${CCOMMON_OPT} -mavx") endif () + if (HAVE_SSE) + set (CCOMMON_OPT "${CCOMMON_OPT} -msse") + endif () + if (HAVE_SSE2) + set (CCOMMON_OPT "${CCOMMON_OPT} -msse2") + endif () if (HAVE_SSE3) set (CCOMMON_OPT "${CCOMMON_OPT} -msse3") endif () if (HAVE_SSSE3) set (CCOMMON_OPT "${CCOMMON_OPT} -mssse3") endif () + if (HAVE_SSE4_1) + set (CCOMMON_OPT "${CCOMMON_OPT} -msse4.1") + endif () endif() From f64243ff57d79c6bd23d39c49648adfddbe018a4 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Fri, 16 Oct 2020 10:47:06 +0200 Subject: [PATCH 8/8] Add compiler options for sse/sse2/ssse3/sse4.1 --- cmake/system.cmake | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/cmake/system.cmake b/cmake/system.cmake index b34d4a9a5..4cc46236d 100644 --- a/cmake/system.cmake +++ b/cmake/system.cmake @@ -70,9 +70,21 @@ if (DEFINED TARGET) set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -mavx2") endif() endif() + if (DEFINED HAVE_SSE) + set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -msse") + endif() + if (DEFINED HAVE_SSE2) + set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -msse2") + endif() if (DEFINED HAVE_SSE3) set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -msse3") endif() + if (DEFINED HAVE_SSSE3) + set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -mssse3") + endif() + if (DEFINED HAVE_SSE4_1) + set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -msse4.1") + endif() endif() if (DEFINED TARGET)