Merge pull request #2978 from martin-frbg/fixdynfeatures
Fix handling of cpu capability flags in DYNAMIC_ARCH builds
This commit is contained in:
commit
d2faa1be4e
|
@ -252,6 +252,22 @@ DUMMY := $(shell $(MAKE) -C $(TOPDIR) -f Makefile.prebuild CC="$(CC)" FC="$(FC)"
|
|||
ifndef TARGET_CORE
|
||||
include $(TOPDIR)/Makefile.conf
|
||||
else
|
||||
HAVE_NEON=
|
||||
HAVE_VFP=
|
||||
HAVE_VFPV3=
|
||||
HAVE_VFPV4=
|
||||
HAVE_MMX=
|
||||
HAVE_SSE=
|
||||
HAVE_SSE2=
|
||||
HAVE_SSE3=
|
||||
HAVE_SSSE3=
|
||||
HAVE_SSE4_1=
|
||||
HAVE_SSE4_2=
|
||||
HAVE_SSE4A=
|
||||
HAVE_SSE5=
|
||||
HAVE_AVX=
|
||||
HAVE_AVX2=
|
||||
HAVE_FMA3=
|
||||
include $(TOPDIR)/Makefile_kernel.conf
|
||||
endif
|
||||
|
||||
|
@ -1522,6 +1538,8 @@ export HAVE_SSE4_2
|
|||
export HAVE_SSE4A
|
||||
export HAVE_SSE5
|
||||
export HAVE_AVX
|
||||
export HAVE_AVX2
|
||||
export HAVE_FMA3
|
||||
export HAVE_VFP
|
||||
export HAVE_VFPV3
|
||||
export HAVE_VFPV4
|
||||
|
|
|
@ -9,9 +9,9 @@ endif
|
|||
endif
|
||||
|
||||
ifdef HAVE_SSE3
|
||||
ifndef DYNAMIC_ARCH
|
||||
CCOMMON_OPT += -msse3
|
||||
FCOMMON_OPT += -msse3
|
||||
endif
|
||||
ifdef HAVE_SSSE3
|
||||
CCOMMON_OPT += -mssse3
|
||||
FCOMMON_OPT += -mssse3
|
||||
|
@ -20,7 +20,17 @@ ifdef HAVE_SSE4_1
|
|||
CCOMMON_OPT += -msse4.1
|
||||
FCOMMON_OPT += -msse4.1
|
||||
endif
|
||||
ifdef HAVE_AVX
|
||||
CCOMMON_OPT += -mavx
|
||||
FCOMMON_OPT += -mavx
|
||||
endif
|
||||
ifdef HAVE_AVX2
|
||||
CCOMMON_OPT += -mavx2
|
||||
FCOMMON_OPT += -mavx2
|
||||
endif
|
||||
ifdef HAVE_FMA3
|
||||
CCOMMON_OPT += -mfma
|
||||
FCOMMON_OPT += -mfma
|
||||
endif
|
||||
|
||||
ifeq ($(CORE), SKYLAKEX)
|
||||
|
@ -66,8 +76,7 @@ endif
|
|||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(CORE), $(filter $(CORE), HASWELL ZEN SKYLAKEX COOPERLAKE))
|
||||
ifndef DYNAMIC_ARCH
|
||||
ifdef HAVE_AVX2
|
||||
ifndef NO_AVX2
|
||||
ifeq ($(C_COMPILER), GCC)
|
||||
# AVX2 support was added in 4.7.0
|
||||
|
@ -96,7 +105,6 @@ endif
|
|||
endif
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
|
||||
|
||||
|
|
|
@ -96,7 +96,7 @@ if (${CMAKE_C_COMPILER_ID} STREQUAL "SUN")
|
|||
endif ()
|
||||
endif ()
|
||||
|
||||
if (${CORE} STREQUAL "SKYLAKEX")
|
||||
if (${CORE} STREQUAL SKYLAKEX)
|
||||
if (NOT DYNAMIC_ARCH)
|
||||
if (NOT NO_AVX512)
|
||||
set (CCOMMON_OPT "${CCOMMON_OPT} -march=skylake-avx512")
|
||||
|
@ -104,7 +104,7 @@ if (${CORE} STREQUAL "SKYLAKEX")
|
|||
endif ()
|
||||
endif ()
|
||||
|
||||
if (${CORE} STREQUAL "COOPERLAKE")
|
||||
if (${CORE} STREQUAL COOPERLAKE)
|
||||
if (NOT DYNAMIC_ARCH)
|
||||
if (NOT NO_AVX512)
|
||||
execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION)
|
||||
|
|
|
@ -139,36 +139,6 @@ if (DEFINED CORE AND CMAKE_CROSSCOMPILING AND NOT (${HOST_OS} STREQUAL "WINDOWSS
|
|||
set(CGEMM3M_UNROLL_N 4)
|
||||
set(ZGEMM3M_UNROLL_M 4)
|
||||
set(ZGEMM3M_UNROLL_N 4)
|
||||
elseif ("${TCORE}" STREQUAL "BARCELONA")
|
||||
file(APPEND ${TARGET_CONF_TEMP}
|
||||
"#define HAVE_SSE3\n")
|
||||
elseif ("${TCORE}" STREQUAL "STEAMROLLER")
|
||||
file(APPEND ${TARGET_CONF_TEMP}
|
||||
"#define HAVE_SSE3\n")
|
||||
elseif ("${TCORE}" STREQUAL "EXCAVATOR")
|
||||
file(APPEND ${TARGET_CONF_TEMP}
|
||||
"#define HAVE_SSE3\n")
|
||||
elseif ("${TCORE}" STREQUAL "NEHALEM")
|
||||
file(APPEND ${TARGET_CONF_TEMP}
|
||||
"#define HAVE_SSE3\n")
|
||||
elseif ("${TCORE}" STREQUAL "PRESCOTT")
|
||||
file(APPEND ${TARGET_CONF_TEMP}
|
||||
"#define HAVE_SSE3\n")
|
||||
elseif ("${TCORE}" STREQUAL "SANDYBRIDGE")
|
||||
file(APPEND ${TARGET_CONF_TEMP}
|
||||
"#define HAVE_AVX\n")
|
||||
elseif ("${TCORE}" STREQUAL "HASWELL")
|
||||
file(APPEND ${TARGET_CONF_TEMP}
|
||||
"#define HAVE_AVX2\n")
|
||||
elseif ("${TCORE}" STREQUAL "ZEN")
|
||||
file(APPEND ${TARGET_CONF_TEMP}
|
||||
"#define HAVE_AVX2\n")
|
||||
elseif ("${TCORE}" STREQUAL "SKYLAKEX")
|
||||
file(APPEND ${TARGET_CONF_TEMP}
|
||||
"#define HAVE_AVX512\n")
|
||||
elseif ("${TCORE}" STREQUAL "COOPERLAKE")
|
||||
file(APPEND ${TARGET_CONF_TEMP}
|
||||
"#define HAVE_AVX512\n")
|
||||
elseif ("${TCORE}" STREQUAL "ARMV7")
|
||||
file(APPEND ${TARGET_CONF_TEMP}
|
||||
"#define L1_DATA_SIZE\t65536\n"
|
||||
|
@ -586,6 +556,21 @@ else(NOT CMAKE_CROSSCOMPILING)
|
|||
MESSAGE(FATAL_ERROR "Compiling getarch failed ${GETARCH_LOG}")
|
||||
endif ()
|
||||
endif ()
|
||||
unset (HAVE_AVX2)
|
||||
unset (HAVE_AVX)
|
||||
unset (HAVE_FMA3)
|
||||
unset (HAVE_MMX)
|
||||
unset (HAVE_SSE)
|
||||
unset (HAVE_SSE2)
|
||||
unset (HAVE_SSE3)
|
||||
unset (HAVE_SSSE3)
|
||||
unset (HAVE_SSE4A)
|
||||
unset (HAVE_SSE4_1)
|
||||
unset (HAVE_SSE4_2)
|
||||
unset (HAVE_NEON)
|
||||
unset (HAVE_VFP)
|
||||
unset (HAVE_VFPV3)
|
||||
unset (HAVE_VFPV4)
|
||||
message(STATUS "Running getarch")
|
||||
|
||||
# use the cmake binary w/ the -E param to run a shell command in a cross-platform way
|
||||
|
|
|
@ -44,74 +44,9 @@ if (DEFINED BINARY AND DEFINED TARGET AND BINARY EQUAL 32)
|
|||
endif ()
|
||||
endif ()
|
||||
|
||||
if (DEFINED TARGET)
|
||||
if (${TARGET} STREQUAL "COOPERLAKE" AND NOT NO_AVX512)
|
||||
# if (${CMAKE_C_COMPILER_ID} STREQUAL "GNU")
|
||||
execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION)
|
||||
if (${GCC_VERSION} VERSION_GREATER 10.1 OR ${GCC_VERSION} VERSION_EQUAL 10.1)
|
||||
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=cooperlake")
|
||||
else()
|
||||
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=skylake-avx512")
|
||||
endif()
|
||||
# elseif (${CMAKE_C_COMPILER_ID} STREQUAL "CLANG")
|
||||
# set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -mavx2")
|
||||
# endif()
|
||||
endif()
|
||||
if (${TARGET} STREQUAL "SKYLAKEX" AND NOT NO_AVX512)
|
||||
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=skylake-avx512")
|
||||
endif()
|
||||
if (${TARGET} STREQUAL "HASWELL" AND NOT NO_AVX2)
|
||||
if (${CMAKE_C_COMPILER_ID} STREQUAL "GNU")
|
||||
execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION)
|
||||
if (${GCC_VERSION} VERSION_GREATER 4.7 OR ${GCC_VERSION} VERSION_EQUAL 4.7)
|
||||
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -msse3 -mavx2")
|
||||
endif()
|
||||
elseif (${CMAKE_C_COMPILER_ID} STREQUAL "CLANG")
|
||||
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -msse -msse3 -mavx2")
|
||||
endif()
|
||||
endif()
|
||||
if (${TARGET} STREQUAL "HASWELL" AND NOT NO_AVX2)
|
||||
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -msse3 -mavx2")
|
||||
endif()
|
||||
if (${TARGET} STREQUAL "ZEN" AND NOT NO_AVX2)
|
||||
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -msse3 -mavx2")
|
||||
endif()
|
||||
if (${TARGET} STREQUAL "SANDYBRIDGE" AND NOT NO_AVX)
|
||||
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -msse3 -mavx")
|
||||
endif()
|
||||
if (${TARGET} STREQUAL "BARCELONA" OR ${TARGET} STREQUAL "STEAMROLLER" OR ${TARGET} STREQUAL "BULLDOZER" OR ${TARGET} STREQUAL "EXCAVATOR")
|
||||
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -msse3")
|
||||
endif()
|
||||
if (${TARGET} STREQUAL "PILEDRIVER" OR ${TARGET} STREQUAL "BOBCAT" OR ${TARGET} STREQUAL "OPTERON_SSE3")
|
||||
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -msse3")
|
||||
endif()
|
||||
if (${TARGET} STREQUAL "PRESCOTT" OR ${TARGET} STREQUAL "NANO")
|
||||
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -msse3")
|
||||
endif()
|
||||
if (${TARGET} STREQUAL "NEHALEM" OR ${TARGET} STREQUAL "ATOM")
|
||||
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -msse3")
|
||||
endif()
|
||||
if (${TARGET} STREQUAL "CORE2" OR ${TARGET} STREQUAL "PENRYN" OR ${TARGET} STREQUAL "DUNNINGTON")
|
||||
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -msse3")
|
||||
endif()
|
||||
if (DEFINED HAVE_SSE)
|
||||
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -msse")
|
||||
endif()
|
||||
if (DEFINED HAVE_SSE2)
|
||||
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -msse2")
|
||||
endif()
|
||||
if (DEFINED HAVE_SSE3)
|
||||
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -msse3")
|
||||
endif()
|
||||
if (DEFINED HAVE_SSSE3)
|
||||
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -mssse3")
|
||||
endif()
|
||||
if (DEFINED HAVE_SSE4_1)
|
||||
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -msse4.1")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if (DEFINED TARGET)
|
||||
message(STATUS "-- -- -- -- -- -- -- -- -- -- -- -- --")
|
||||
message(STATUS "Targeting the ${TARGET} architecture.")
|
||||
set(GETARCH_FLAGS "-DFORCE_${TARGET}")
|
||||
endif ()
|
||||
|
@ -211,6 +146,63 @@ else()
|
|||
endif ()
|
||||
|
||||
include("${PROJECT_SOURCE_DIR}/cmake/prebuild.cmake")
|
||||
if (DEFINED TARGET)
|
||||
if (${TARGET} STREQUAL COOPERLAKE AND NOT NO_AVX512)
|
||||
# if (${CMAKE_C_COMPILER_ID} STREQUAL "GNU")
|
||||
execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION)
|
||||
if (${GCC_VERSION} VERSION_GREATER 10.1 OR ${GCC_VERSION} VERSION_EQUAL 10.1)
|
||||
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=cooperlake")
|
||||
else()
|
||||
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=skylake-avx512")
|
||||
endif()
|
||||
# elseif (${CMAKE_C_COMPILER_ID} STREQUAL "CLANG")
|
||||
# set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -mavx2")
|
||||
# endif()
|
||||
endif()
|
||||
if (${TARGET} STREQUAL SKYLAKEX AND NOT NO_AVX512)
|
||||
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=skylake-avx512")
|
||||
endif()
|
||||
if (${TARGET} STREQUAL HASWELL AND NOT NO_AVX2)
|
||||
if (${CMAKE_C_COMPILER_ID} STREQUAL "GNU")
|
||||
execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION)
|
||||
if (${GCC_VERSION} VERSION_GREATER 4.7 OR ${GCC_VERSION} VERSION_EQUAL 4.7)
|
||||
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -mavx2")
|
||||
endif()
|
||||
elseif (${CMAKE_C_COMPILER_ID} STREQUAL "CLANG")
|
||||
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -mavx2")
|
||||
endif()
|
||||
endif()
|
||||
if (DEFINED HAVE_AVX)
|
||||
if (NOT NO_AVX)
|
||||
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -mavx")
|
||||
endif()
|
||||
endif()
|
||||
if (DEFINED HAVE_AVX2)
|
||||
if (NOT NO_AVX2)
|
||||
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -mavx2")
|
||||
endif()
|
||||
endif()
|
||||
if (DEFINED HAVE_FMA3)
|
||||
if (NOT NO_AVX2)
|
||||
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -mfma")
|
||||
endif()
|
||||
endif()
|
||||
if (DEFINED HAVE_SSE)
|
||||
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -msse")
|
||||
endif()
|
||||
if (DEFINED HAVE_SSE2)
|
||||
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -msse2")
|
||||
endif()
|
||||
if (DEFINED HAVE_SSE3)
|
||||
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -msse3")
|
||||
endif()
|
||||
if (DEFINED HAVE_SSSE3)
|
||||
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -mssse3")
|
||||
endif()
|
||||
if (DEFINED HAVE_SSE4_1)
|
||||
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -msse4.1")
|
||||
endif()
|
||||
endif()
|
||||
if (DEFINED BINARY)
|
||||
message(STATUS "Compiling a ${BINARY}-bit binary.")
|
||||
endif ()
|
||||
|
|
|
@ -5,13 +5,6 @@ endif
|
|||
TOPDIR = ..
|
||||
include $(TOPDIR)/Makefile.system
|
||||
|
||||
ifdef HAVE_SSE3
|
||||
CFLAGS += -msse3
|
||||
endif
|
||||
ifdef HAVE_SSSE3
|
||||
CFLAGS += -mssse3
|
||||
endif
|
||||
|
||||
ifeq ($(ARCH), power)
|
||||
ifeq ($(C_COMPILER), CLANG)
|
||||
override CFLAGS += -fno-integrated-as
|
||||
|
@ -38,12 +31,6 @@ ifdef NO_AVX2
|
|||
endif
|
||||
|
||||
ifdef TARGET_CORE
|
||||
ifeq ($(TARGET_CORE), $(filter $(TARGET_CORE),PRESCOTT CORE2 PENRYN DUNNINGTON ATOM NANO SANDYBRIDGE HASWELL NEHALEM ZEN BARCELONA BOBCAT BULLDOZER PILEDRIVER EXCAVATOR STEAMROLLER OPTERON_SSE3))
|
||||
override CFLAGS += -msse -msse2 -msse3 -mssse3 -msse4.1
|
||||
endif
|
||||
ifeq ($(TARGET_CORE), $(filter $(TARGET_CORE),KATMAI COPPERMINE BANIAS NORTHWOOD ATHLON OPTERON))
|
||||
override CFLAGS += -msse -msse2
|
||||
endif
|
||||
ifeq ($(TARGET_CORE), COOPERLAKE)
|
||||
override CFLAGS += -DBUILD_KERNEL -DTABLE_NAME=gotoblas_$(TARGET_CORE)
|
||||
ifeq ($(GCCVERSIONGTEQ10), 1)
|
||||
|
|
Loading…
Reference in New Issue