Merge pull request #2978 from martin-frbg/fixdynfeatures

Fix handling of cpu capability flags in DYNAMIC_ARCH builds
This commit is contained in:
Martin Kroeker 2020-11-08 10:19:17 +01:00 committed by GitHub
commit d2faa1be4e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 106 additions and 116 deletions

View File

@ -6,7 +6,7 @@
INCLUDED = 1
ifndef TOPDIR
TOPDIR = .
TOPDIR = .
endif
# If ARCH is not set, we use the host system's architecture for getarch compile options.
@ -252,6 +252,22 @@ DUMMY := $(shell $(MAKE) -C $(TOPDIR) -f Makefile.prebuild CC="$(CC)" FC="$(FC)"
ifndef TARGET_CORE
include $(TOPDIR)/Makefile.conf
else
HAVE_NEON=
HAVE_VFP=
HAVE_VFPV3=
HAVE_VFPV4=
HAVE_MMX=
HAVE_SSE=
HAVE_SSE2=
HAVE_SSE3=
HAVE_SSSE3=
HAVE_SSE4_1=
HAVE_SSE4_2=
HAVE_SSE4A=
HAVE_SSE5=
HAVE_AVX=
HAVE_AVX2=
HAVE_FMA3=
include $(TOPDIR)/Makefile_kernel.conf
endif
@ -1522,6 +1538,8 @@ export HAVE_SSE4_2
export HAVE_SSE4A
export HAVE_SSE5
export HAVE_AVX
export HAVE_AVX2
export HAVE_FMA3
export HAVE_VFP
export HAVE_VFPV3
export HAVE_VFPV4

View File

@ -9,9 +9,9 @@ endif
endif
ifdef HAVE_SSE3
ifndef DYNAMIC_ARCH
CCOMMON_OPT += -msse3
FCOMMON_OPT += -msse3
endif
ifdef HAVE_SSSE3
CCOMMON_OPT += -mssse3
FCOMMON_OPT += -mssse3
@ -20,7 +20,17 @@ ifdef HAVE_SSE4_1
CCOMMON_OPT += -msse4.1
FCOMMON_OPT += -msse4.1
endif
ifdef HAVE_AVX
CCOMMON_OPT += -mavx
FCOMMON_OPT += -mavx
endif
ifdef HAVE_AVX2
CCOMMON_OPT += -mavx2
FCOMMON_OPT += -mavx2
endif
ifdef HAVE_FMA3
CCOMMON_OPT += -mfma
FCOMMON_OPT += -mfma
endif
ifeq ($(CORE), SKYLAKEX)
@ -66,8 +76,7 @@ endif
endif
endif
ifeq ($(CORE), $(filter $(CORE), HASWELL ZEN SKYLAKEX COOPERLAKE))
ifndef DYNAMIC_ARCH
ifdef HAVE_AVX2
ifndef NO_AVX2
ifeq ($(C_COMPILER), GCC)
# AVX2 support was added in 4.7.0
@ -96,7 +105,6 @@ endif
endif
endif
endif
endif

View File

@ -96,7 +96,7 @@ if (${CMAKE_C_COMPILER_ID} STREQUAL "SUN")
endif ()
endif ()
if (${CORE} STREQUAL "SKYLAKEX")
if (${CORE} STREQUAL SKYLAKEX)
if (NOT DYNAMIC_ARCH)
if (NOT NO_AVX512)
set (CCOMMON_OPT "${CCOMMON_OPT} -march=skylake-avx512")
@ -104,7 +104,7 @@ if (${CORE} STREQUAL "SKYLAKEX")
endif ()
endif ()
if (${CORE} STREQUAL "COOPERLAKE")
if (${CORE} STREQUAL COOPERLAKE)
if (NOT DYNAMIC_ARCH)
if (NOT NO_AVX512)
execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION)

View File

@ -139,36 +139,6 @@ if (DEFINED CORE AND CMAKE_CROSSCOMPILING AND NOT (${HOST_OS} STREQUAL "WINDOWSS
set(CGEMM3M_UNROLL_N 4)
set(ZGEMM3M_UNROLL_M 4)
set(ZGEMM3M_UNROLL_N 4)
elseif ("${TCORE}" STREQUAL "BARCELONA")
file(APPEND ${TARGET_CONF_TEMP}
"#define HAVE_SSE3\n")
elseif ("${TCORE}" STREQUAL "STEAMROLLER")
file(APPEND ${TARGET_CONF_TEMP}
"#define HAVE_SSE3\n")
elseif ("${TCORE}" STREQUAL "EXCAVATOR")
file(APPEND ${TARGET_CONF_TEMP}
"#define HAVE_SSE3\n")
elseif ("${TCORE}" STREQUAL "NEHALEM")
file(APPEND ${TARGET_CONF_TEMP}
"#define HAVE_SSE3\n")
elseif ("${TCORE}" STREQUAL "PRESCOTT")
file(APPEND ${TARGET_CONF_TEMP}
"#define HAVE_SSE3\n")
elseif ("${TCORE}" STREQUAL "SANDYBRIDGE")
file(APPEND ${TARGET_CONF_TEMP}
"#define HAVE_AVX\n")
elseif ("${TCORE}" STREQUAL "HASWELL")
file(APPEND ${TARGET_CONF_TEMP}
"#define HAVE_AVX2\n")
elseif ("${TCORE}" STREQUAL "ZEN")
file(APPEND ${TARGET_CONF_TEMP}
"#define HAVE_AVX2\n")
elseif ("${TCORE}" STREQUAL "SKYLAKEX")
file(APPEND ${TARGET_CONF_TEMP}
"#define HAVE_AVX512\n")
elseif ("${TCORE}" STREQUAL "COOPERLAKE")
file(APPEND ${TARGET_CONF_TEMP}
"#define HAVE_AVX512\n")
elseif ("${TCORE}" STREQUAL "ARMV7")
file(APPEND ${TARGET_CONF_TEMP}
"#define L1_DATA_SIZE\t65536\n"
@ -586,6 +556,21 @@ else(NOT CMAKE_CROSSCOMPILING)
MESSAGE(FATAL_ERROR "Compiling getarch failed ${GETARCH_LOG}")
endif ()
endif ()
unset (HAVE_AVX2)
unset (HAVE_AVX)
unset (HAVE_FMA3)
unset (HAVE_MMX)
unset (HAVE_SSE)
unset (HAVE_SSE2)
unset (HAVE_SSE3)
unset (HAVE_SSSE3)
unset (HAVE_SSE4A)
unset (HAVE_SSE4_1)
unset (HAVE_SSE4_2)
unset (HAVE_NEON)
unset (HAVE_VFP)
unset (HAVE_VFPV3)
unset (HAVE_VFPV4)
message(STATUS "Running getarch")
# use the cmake binary w/ the -E param to run a shell command in a cross-platform way

View File

@ -44,74 +44,9 @@ if (DEFINED BINARY AND DEFINED TARGET AND BINARY EQUAL 32)
endif ()
endif ()
if (DEFINED TARGET)
if (${TARGET} STREQUAL "COOPERLAKE" AND NOT NO_AVX512)
# if (${CMAKE_C_COMPILER_ID} STREQUAL "GNU")
execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION)
if (${GCC_VERSION} VERSION_GREATER 10.1 OR ${GCC_VERSION} VERSION_EQUAL 10.1)
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=cooperlake")
else()
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=skylake-avx512")
endif()
# elseif (${CMAKE_C_COMPILER_ID} STREQUAL "CLANG")
# set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -mavx2")
# endif()
endif()
if (${TARGET} STREQUAL "SKYLAKEX" AND NOT NO_AVX512)
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=skylake-avx512")
endif()
if (${TARGET} STREQUAL "HASWELL" AND NOT NO_AVX2)
if (${CMAKE_C_COMPILER_ID} STREQUAL "GNU")
execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION)
if (${GCC_VERSION} VERSION_GREATER 4.7 OR ${GCC_VERSION} VERSION_EQUAL 4.7)
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -msse3 -mavx2")
endif()
elseif (${CMAKE_C_COMPILER_ID} STREQUAL "CLANG")
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -msse -msse3 -mavx2")
endif()
endif()
if (${TARGET} STREQUAL "HASWELL" AND NOT NO_AVX2)
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -msse3 -mavx2")
endif()
if (${TARGET} STREQUAL "ZEN" AND NOT NO_AVX2)
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -msse3 -mavx2")
endif()
if (${TARGET} STREQUAL "SANDYBRIDGE" AND NOT NO_AVX)
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -msse3 -mavx")
endif()
if (${TARGET} STREQUAL "BARCELONA" OR ${TARGET} STREQUAL "STEAMROLLER" OR ${TARGET} STREQUAL "BULLDOZER" OR ${TARGET} STREQUAL "EXCAVATOR")
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -msse3")
endif()
if (${TARGET} STREQUAL "PILEDRIVER" OR ${TARGET} STREQUAL "BOBCAT" OR ${TARGET} STREQUAL "OPTERON_SSE3")
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -msse3")
endif()
if (${TARGET} STREQUAL "PRESCOTT" OR ${TARGET} STREQUAL "NANO")
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -msse3")
endif()
if (${TARGET} STREQUAL "NEHALEM" OR ${TARGET} STREQUAL "ATOM")
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -msse3")
endif()
if (${TARGET} STREQUAL "CORE2" OR ${TARGET} STREQUAL "PENRYN" OR ${TARGET} STREQUAL "DUNNINGTON")
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -msse3")
endif()
if (DEFINED HAVE_SSE)
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -msse")
endif()
if (DEFINED HAVE_SSE2)
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -msse2")
endif()
if (DEFINED HAVE_SSE3)
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -msse3")
endif()
if (DEFINED HAVE_SSSE3)
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -mssse3")
endif()
if (DEFINED HAVE_SSE4_1)
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -msse4.1")
endif()
endif()
if (DEFINED TARGET)
message(STATUS "-- -- -- -- -- -- -- -- -- -- -- -- --")
message(STATUS "Targeting the ${TARGET} architecture.")
set(GETARCH_FLAGS "-DFORCE_${TARGET}")
endif ()
@ -211,6 +146,63 @@ else()
endif ()
include("${PROJECT_SOURCE_DIR}/cmake/prebuild.cmake")
if (DEFINED TARGET)
if (${TARGET} STREQUAL COOPERLAKE AND NOT NO_AVX512)
# if (${CMAKE_C_COMPILER_ID} STREQUAL "GNU")
execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION)
if (${GCC_VERSION} VERSION_GREATER 10.1 OR ${GCC_VERSION} VERSION_EQUAL 10.1)
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=cooperlake")
else()
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=skylake-avx512")
endif()
# elseif (${CMAKE_C_COMPILER_ID} STREQUAL "CLANG")
# set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -mavx2")
# endif()
endif()
if (${TARGET} STREQUAL SKYLAKEX AND NOT NO_AVX512)
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=skylake-avx512")
endif()
if (${TARGET} STREQUAL HASWELL AND NOT NO_AVX2)
if (${CMAKE_C_COMPILER_ID} STREQUAL "GNU")
execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION)
if (${GCC_VERSION} VERSION_GREATER 4.7 OR ${GCC_VERSION} VERSION_EQUAL 4.7)
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -mavx2")
endif()
elseif (${CMAKE_C_COMPILER_ID} STREQUAL "CLANG")
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -mavx2")
endif()
endif()
if (DEFINED HAVE_AVX)
if (NOT NO_AVX)
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -mavx")
endif()
endif()
if (DEFINED HAVE_AVX2)
if (NOT NO_AVX2)
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -mavx2")
endif()
endif()
if (DEFINED HAVE_FMA3)
if (NOT NO_AVX2)
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -mfma")
endif()
endif()
if (DEFINED HAVE_SSE)
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -msse")
endif()
if (DEFINED HAVE_SSE2)
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -msse2")
endif()
if (DEFINED HAVE_SSE3)
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -msse3")
endif()
if (DEFINED HAVE_SSSE3)
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -mssse3")
endif()
if (DEFINED HAVE_SSE4_1)
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -msse4.1")
endif()
endif()
if (DEFINED BINARY)
message(STATUS "Compiling a ${BINARY}-bit binary.")
endif ()

View File

@ -5,13 +5,6 @@ endif
TOPDIR = ..
include $(TOPDIR)/Makefile.system
ifdef HAVE_SSE3
CFLAGS += -msse3
endif
ifdef HAVE_SSSE3
CFLAGS += -mssse3
endif
ifeq ($(ARCH), power)
ifeq ($(C_COMPILER), CLANG)
override CFLAGS += -fno-integrated-as
@ -38,12 +31,6 @@ ifdef NO_AVX2
endif
ifdef TARGET_CORE
ifeq ($(TARGET_CORE), $(filter $(TARGET_CORE),PRESCOTT CORE2 PENRYN DUNNINGTON ATOM NANO SANDYBRIDGE HASWELL NEHALEM ZEN BARCELONA BOBCAT BULLDOZER PILEDRIVER EXCAVATOR STEAMROLLER OPTERON_SSE3))
override CFLAGS += -msse -msse2 -msse3 -mssse3 -msse4.1
endif
ifeq ($(TARGET_CORE), $(filter $(TARGET_CORE),KATMAI COPPERMINE BANIAS NORTHWOOD ATHLON OPTERON))
override CFLAGS += -msse -msse2
endif
ifeq ($(TARGET_CORE), COOPERLAKE)
override CFLAGS += -DBUILD_KERNEL -DTABLE_NAME=gotoblas_$(TARGET_CORE)
ifeq ($(GCCVERSIONGTEQ10), 1)