From 438a8e5624ef1adfe98f989655ca398866143458 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Sat, 7 Nov 2020 20:26:12 +0100 Subject: [PATCH 1/8] Fix placement of getarch call and spurious cpu property accumulation in DYNAMIC_ARCH builds --- cmake/prebuild.cmake | 45 ++++++---------- cmake/system.cmake | 124 ++++++++++++++++++++----------------------- 2 files changed, 73 insertions(+), 96 deletions(-) diff --git a/cmake/prebuild.cmake b/cmake/prebuild.cmake index b1b4c501a..da7686c33 100644 --- a/cmake/prebuild.cmake +++ b/cmake/prebuild.cmake @@ -139,36 +139,6 @@ if (DEFINED CORE AND CMAKE_CROSSCOMPILING AND NOT (${HOST_OS} STREQUAL "WINDOWSS set(CGEMM3M_UNROLL_N 4) set(ZGEMM3M_UNROLL_M 4) set(ZGEMM3M_UNROLL_N 4) - elseif ("${TCORE}" STREQUAL "BARCELONA") - file(APPEND ${TARGET_CONF_TEMP} - "#define HAVE_SSE3\n") - elseif ("${TCORE}" STREQUAL "STEAMROLLER") - file(APPEND ${TARGET_CONF_TEMP} - "#define HAVE_SSE3\n") - elseif ("${TCORE}" STREQUAL "EXCAVATOR") - file(APPEND ${TARGET_CONF_TEMP} - "#define HAVE_SSE3\n") - elseif ("${TCORE}" STREQUAL "NEHALEM") - file(APPEND ${TARGET_CONF_TEMP} - "#define HAVE_SSE3\n") - elseif ("${TCORE}" STREQUAL "PRESCOTT") - file(APPEND ${TARGET_CONF_TEMP} - "#define HAVE_SSE3\n") - elseif ("${TCORE}" STREQUAL "SANDYBRIDGE") - file(APPEND ${TARGET_CONF_TEMP} - "#define HAVE_AVX\n") - elseif ("${TCORE}" STREQUAL "HASWELL") - file(APPEND ${TARGET_CONF_TEMP} - "#define HAVE_AVX2\n") - elseif ("${TCORE}" STREQUAL "ZEN") - file(APPEND ${TARGET_CONF_TEMP} - "#define HAVE_AVX2\n") - elseif ("${TCORE}" STREQUAL "SKYLAKEX") - file(APPEND ${TARGET_CONF_TEMP} - "#define HAVE_AVX512\n") - elseif ("${TCORE}" STREQUAL "COOPERLAKE") - file(APPEND ${TARGET_CONF_TEMP} - "#define HAVE_AVX512\n") elseif ("${TCORE}" STREQUAL "ARMV7") file(APPEND ${TARGET_CONF_TEMP} "#define L1_DATA_SIZE\t65536\n" @@ -586,6 +556,21 @@ else(NOT CMAKE_CROSSCOMPILING) MESSAGE(FATAL_ERROR "Compiling getarch failed ${GETARCH_LOG}") endif () endif () + unset (HAVE_AVX2) + unset (HAVE_AVX) + unset (HAVE_FMA3) + unset (HAVE_MMX) + unset (HAVE_SSE) + unset (HAVE_SSE2) + unset (HAVE_SSE3) + unset (HAVE_SSSE3) + unset (HAVE_SSE4A) + unset (HAVE_SSE4_1) + unset (HAVE_SSE4_2) + unset (HAVE_NEON) + unset (HAVE_VFP) + unset (HAVE_VFPV3) + unset (HAVE_VFPV4) message(STATUS "Running getarch") # use the cmake binary w/ the -E param to run a shell command in a cross-platform way diff --git a/cmake/system.cmake b/cmake/system.cmake index 48d206b12..66e95c6d3 100644 --- a/cmake/system.cmake +++ b/cmake/system.cmake @@ -44,74 +44,9 @@ if (DEFINED BINARY AND DEFINED TARGET AND BINARY EQUAL 32) endif () endif () -if (DEFINED TARGET) - if (${TARGET} STREQUAL "COOPERLAKE" AND NOT NO_AVX512) -# if (${CMAKE_C_COMPILER_ID} STREQUAL "GNU") - execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION) - if (${GCC_VERSION} VERSION_GREATER 10.1 OR ${GCC_VERSION} VERSION_EQUAL 10.1) - set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=cooperlake") - else() - set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=skylake-avx512") - endif() -# elseif (${CMAKE_C_COMPILER_ID} STREQUAL "CLANG") -# set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -mavx2") -# endif() - endif() - if (${TARGET} STREQUAL "SKYLAKEX" AND NOT NO_AVX512) - set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=skylake-avx512") - endif() - if (${TARGET} STREQUAL "HASWELL" AND NOT NO_AVX2) - if (${CMAKE_C_COMPILER_ID} STREQUAL "GNU") - execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION) - if (${GCC_VERSION} VERSION_GREATER 4.7 OR ${GCC_VERSION} VERSION_EQUAL 4.7) - set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -msse3 -mavx2") - endif() - elseif (${CMAKE_C_COMPILER_ID} STREQUAL "CLANG") - set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -msse -msse3 -mavx2") - endif() - endif() - if (${TARGET} STREQUAL "HASWELL" AND NOT NO_AVX2) - set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -msse3 -mavx2") - endif() - if (${TARGET} STREQUAL "ZEN" AND NOT NO_AVX2) - set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -msse3 -mavx2") - endif() - if (${TARGET} STREQUAL "SANDYBRIDGE" AND NOT NO_AVX) - set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -msse3 -mavx") - endif() - if (${TARGET} STREQUAL "BARCELONA" OR ${TARGET} STREQUAL "STEAMROLLER" OR ${TARGET} STREQUAL "BULLDOZER" OR ${TARGET} STREQUAL "EXCAVATOR") - set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -msse3") - endif() - if (${TARGET} STREQUAL "PILEDRIVER" OR ${TARGET} STREQUAL "BOBCAT" OR ${TARGET} STREQUAL "OPTERON_SSE3") - set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -msse3") - endif() - if (${TARGET} STREQUAL "PRESCOTT" OR ${TARGET} STREQUAL "NANO") - set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -msse3") - endif() - if (${TARGET} STREQUAL "NEHALEM" OR ${TARGET} STREQUAL "ATOM") - set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -msse3") - endif() - if (${TARGET} STREQUAL "CORE2" OR ${TARGET} STREQUAL "PENRYN" OR ${TARGET} STREQUAL "DUNNINGTON") - set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -msse3") - endif() - if (DEFINED HAVE_SSE) - set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -msse") - endif() - if (DEFINED HAVE_SSE2) - set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -msse2") - endif() - if (DEFINED HAVE_SSE3) - set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -msse3") - endif() - if (DEFINED HAVE_SSSE3) - set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -mssse3") - endif() - if (DEFINED HAVE_SSE4_1) - set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -msse4.1") - endif() -endif() if (DEFINED TARGET) + message(STATUS "-- -- -- -- -- -- -- -- -- -- -- -- --") message(STATUS "Targeting the ${TARGET} architecture.") set(GETARCH_FLAGS "-DFORCE_${TARGET}") endif () @@ -211,6 +146,63 @@ else() endif () include("${PROJECT_SOURCE_DIR}/cmake/prebuild.cmake") +if (DEFINED TARGET) + if (${TARGET} STREQUAL COOPERLAKE AND NOT NO_AVX512) +# if (${CMAKE_C_COMPILER_ID} STREQUAL "GNU") + execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION) + if (${GCC_VERSION} VERSION_GREATER 10.1 OR ${GCC_VERSION} VERSION_EQUAL 10.1) + set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=cooperlake") + else() + set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=skylake-avx512") + endif() +# elseif (${CMAKE_C_COMPILER_ID} STREQUAL "CLANG") +# set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -mavx2") +# endif() + endif() + if (${TARGET} STREQUAL SKYLAKEX AND NOT NO_AVX512) + set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=skylake-avx512") + endif() + if (${TARGET} STREQUAL HASWELL AND NOT NO_AVX2) + if (${CMAKE_C_COMPILER_ID} STREQUAL "GNU") + execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION) + if (${GCC_VERSION} VERSION_GREATER 4.7 OR ${GCC_VERSION} VERSION_EQUAL 4.7) + set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -mavx2") + endif() + elseif (${CMAKE_C_COMPILER_ID} STREQUAL "CLANG") + set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -mavx2") + endif() + endif() + if (DEFINED HAVE_AVX) + if (NOT NO_AVX) + set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -mavx") + endif() + endif() + if (DEFINED HAVE_AVX2) + if (NOT NO_AVX2) + set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -mavx2") + endif() + endif() + if (DEFINED HAVE_FMA3) + if (NOT NO_AVX2) + set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -mfma") + endif() + endif() + if (DEFINED HAVE_SSE) + set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -msse") + endif() + if (DEFINED HAVE_SSE2) + set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -msse2") + endif() + if (DEFINED HAVE_SSE3) + set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -msse3") + endif() + if (DEFINED HAVE_SSSE3) + set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -mssse3") + endif() + if (DEFINED HAVE_SSE4_1) + set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -msse4.1") + endif() +endif() if (DEFINED BINARY) message(STATUS "Compiling a ${BINARY}-bit binary.") endif () From a29338aaa6b364ce99ea30785d1227bd327ce3c7 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Sat, 7 Nov 2020 20:27:42 +0100 Subject: [PATCH 2/8] Remove extraneous quotes that caused a cmake policy warning --- cmake/cc.cmake | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cmake/cc.cmake b/cmake/cc.cmake index 2f4d1c6d7..b963940d6 100644 --- a/cmake/cc.cmake +++ b/cmake/cc.cmake @@ -96,7 +96,7 @@ if (${CMAKE_C_COMPILER_ID} STREQUAL "SUN") endif () endif () -if (${CORE} STREQUAL "SKYLAKEX") +if (${CORE} STREQUAL SKYLAKEX) if (NOT DYNAMIC_ARCH) if (NOT NO_AVX512) set (CCOMMON_OPT "${CCOMMON_OPT} -march=skylake-avx512") @@ -104,7 +104,7 @@ if (${CORE} STREQUAL "SKYLAKEX") endif () endif () -if (${CORE} STREQUAL "COOPERLAKE") +if (${CORE} STREQUAL COOPERLAKE) if (NOT DYNAMIC_ARCH) if (NOT NO_AVX512) execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION) From ccb9731c7b41b601412b00b73f6da98613d66b7f Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Sat, 7 Nov 2020 20:30:15 +0100 Subject: [PATCH 3/8] Fix propagation of cpu properties to compiler options --- Makefile.x86_64 | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/Makefile.x86_64 b/Makefile.x86_64 index 49a9a0a23..43bfc9ecd 100644 --- a/Makefile.x86_64 +++ b/Makefile.x86_64 @@ -9,9 +9,9 @@ endif endif ifdef HAVE_SSE3 -ifndef DYNAMIC_ARCH CCOMMON_OPT += -msse3 FCOMMON_OPT += -msse3 +endif ifdef HAVE_SSSE3 CCOMMON_OPT += -mssse3 FCOMMON_OPT += -mssse3 @@ -20,7 +20,17 @@ ifdef HAVE_SSE4_1 CCOMMON_OPT += -msse4.1 FCOMMON_OPT += -msse4.1 endif +ifdef HAVE_AVX +CCOMMON_OPT += -mavx +FCOMMON_OPT += -mavx endif +ifdef HAVE_AVX2 +CCOMMON_OPT += -mavx2 +FCOMMON_OPT += -mavx2 +endif +ifdef HAVE_FMA3 +CCOMMON_OPT += -mfma +FCOMMON_OPT += -mfma endif ifeq ($(CORE), SKYLAKEX) @@ -66,8 +76,7 @@ endif endif endif -ifeq ($(CORE), $(filter $(CORE), HASWELL ZEN SKYLAKEX COOPERLAKE)) -ifndef DYNAMIC_ARCH +ifdef HAVE_AVX2 ifndef NO_AVX2 ifeq ($(C_COMPILER), GCC) # AVX2 support was added in 4.7.0 @@ -96,7 +105,6 @@ endif endif endif endif -endif From a04f532edfe65a7e4cf4dfb2dc34d363e2eba065 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Sat, 7 Nov 2020 20:37:03 +0100 Subject: [PATCH 4/8] Reset cpu property flags between build cycles in DYNAMIC_ARCH mode --- Makefile.system | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/Makefile.system b/Makefile.system index ca302a98a..dc7ed3f3a 100644 --- a/Makefile.system +++ b/Makefile.system @@ -252,6 +252,22 @@ DUMMY := $(shell $(MAKE) -C $(TOPDIR) -f Makefile.prebuild CC="$(CC)" FC="$(FC)" ifndef TARGET_CORE include $(TOPDIR)/Makefile.conf else +undefine HAVE_NEON +undefine HAVE_VFP +undefine HAVE_VFPV3 +undefine HAVE_VFPV4 +undefine HAVE_MMX +undefine HAVE_SSE +undefine HAVE_SSE2 +undefine HAVE_SSE3 +undefine HAVE_SSSE3 +undefine HAVE_SSE4_1 +undefine HAVE_SSE4_2 +undefine HAVE_SSE4A +undefine HAVE_SSE5 +undefine HAVE_AVX +undefine HAVE_AVX2 +undefine HAVE_FMA3 include $(TOPDIR)/Makefile_kernel.conf endif @@ -1522,6 +1538,8 @@ export HAVE_SSE4_2 export HAVE_SSE4A export HAVE_SSE5 export HAVE_AVX +export HAVE_AVX2 +export HAVE_FMA3 export HAVE_VFP export HAVE_VFPV3 export HAVE_VFPV4 From b976a0bf4095fd8b9e80ae3cf0e0f6eab200219e Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Sat, 7 Nov 2020 20:39:56 +0100 Subject: [PATCH 5/8] Remove previous workaround for compiler flags related to cpu capabilities in x86_64 DYNAMIC_ARCH builds --- kernel/Makefile | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/kernel/Makefile b/kernel/Makefile index e811ed43d..fb1d5d39a 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -5,13 +5,6 @@ endif TOPDIR = .. include $(TOPDIR)/Makefile.system -ifdef HAVE_SSE3 -CFLAGS += -msse3 -endif -ifdef HAVE_SSSE3 -CFLAGS += -mssse3 -endif - ifeq ($(ARCH), power) ifeq ($(C_COMPILER), CLANG) override CFLAGS += -fno-integrated-as @@ -38,12 +31,6 @@ ifdef NO_AVX2 endif ifdef TARGET_CORE - ifeq ($(TARGET_CORE), $(filter $(TARGET_CORE),PRESCOTT CORE2 PENRYN DUNNINGTON ATOM NANO SANDYBRIDGE HASWELL NEHALEM ZEN BARCELONA BOBCAT BULLDOZER PILEDRIVER EXCAVATOR STEAMROLLER OPTERON_SSE3)) - override CFLAGS += -msse -msse2 -msse3 -mssse3 -msse4.1 -endif - ifeq ($(TARGET_CORE), $(filter $(TARGET_CORE),KATMAI COPPERMINE BANIAS NORTHWOOD ATHLON OPTERON)) - override CFLAGS += -msse -msse2 -endif ifeq ($(TARGET_CORE), COOPERLAKE) override CFLAGS += -DBUILD_KERNEL -DTABLE_NAME=gotoblas_$(TARGET_CORE) ifeq ($(GCCVERSIONGTEQ10), 1) From f4b7ba12b71f97b6e5f8cec462635b9334c62a72 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Sat, 7 Nov 2020 23:37:21 +0100 Subject: [PATCH 6/8] Update Makefile.system --- Makefile.system | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Makefile.system b/Makefile.system index dc7ed3f3a..258a84262 100644 --- a/Makefile.system +++ b/Makefile.system @@ -252,7 +252,9 @@ DUMMY := $(shell $(MAKE) -C $(TOPDIR) -f Makefile.prebuild CC="$(CC)" FC="$(FC)" ifndef TARGET_CORE include $(TOPDIR)/Makefile.conf else +ifdef HAVE_NEON undefine HAVE_NEON +endif undefine HAVE_VFP undefine HAVE_VFPV3 undefine HAVE_VFPV4 From f6a57d8f63ed0f1fa4823d27daafc2cb3a6dc96b Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Sun, 8 Nov 2020 00:01:36 +0100 Subject: [PATCH 7/8] Update Makefile.system --- Makefile.system | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/Makefile.system b/Makefile.system index 258a84262..da2d452b2 100644 --- a/Makefile.system +++ b/Makefile.system @@ -255,9 +255,15 @@ else ifdef HAVE_NEON undefine HAVE_NEON endif +ifdef HAVE_VFP undefine HAVE_VFP +endif +ifdef HAVE_VFPV3 undefine HAVE_VFPV3 +endif +ifdef HAVE_VFPV4 undefine HAVE_VFPV4 +endif undefine HAVE_MMX undefine HAVE_SSE undefine HAVE_SSE2 From 1c4cfdc13937765dd9bd0ef8b846ba027ec086b3 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Sun, 8 Nov 2020 00:12:55 +0100 Subject: [PATCH 8/8] Stay compatible with old gmake that did not support undefine --- Makefile.system | 42 +++++++++++++++++------------------------- 1 file changed, 17 insertions(+), 25 deletions(-) diff --git a/Makefile.system b/Makefile.system index da2d452b2..aae7ba503 100644 --- a/Makefile.system +++ b/Makefile.system @@ -6,7 +6,7 @@ INCLUDED = 1 ifndef TOPDIR -TOPDIR = . +TOPDIR = . endif # If ARCH is not set, we use the host system's architecture for getarch compile options. @@ -252,30 +252,22 @@ DUMMY := $(shell $(MAKE) -C $(TOPDIR) -f Makefile.prebuild CC="$(CC)" FC="$(FC)" ifndef TARGET_CORE include $(TOPDIR)/Makefile.conf else -ifdef HAVE_NEON -undefine HAVE_NEON -endif -ifdef HAVE_VFP -undefine HAVE_VFP -endif -ifdef HAVE_VFPV3 -undefine HAVE_VFPV3 -endif -ifdef HAVE_VFPV4 -undefine HAVE_VFPV4 -endif -undefine HAVE_MMX -undefine HAVE_SSE -undefine HAVE_SSE2 -undefine HAVE_SSE3 -undefine HAVE_SSSE3 -undefine HAVE_SSE4_1 -undefine HAVE_SSE4_2 -undefine HAVE_SSE4A -undefine HAVE_SSE5 -undefine HAVE_AVX -undefine HAVE_AVX2 -undefine HAVE_FMA3 +HAVE_NEON= +HAVE_VFP= +HAVE_VFPV3= +HAVE_VFPV4= +HAVE_MMX= +HAVE_SSE= +HAVE_SSE2= +HAVE_SSE3= +HAVE_SSSE3= +HAVE_SSE4_1= +HAVE_SSE4_2= +HAVE_SSE4A= +HAVE_SSE5= +HAVE_AVX= +HAVE_AVX2= +HAVE_FMA3= include $(TOPDIR)/Makefile_kernel.conf endif