Merge pull request #4203 from martin-frbg/issue4201
Add support for building arm64 SVE kernels with the NVIDIA HPC compiler
This commit is contained in:
commit
be57c595aa
3
c_check
3
c_check
|
@ -283,6 +283,9 @@ if [ "$architecture" = "arm64" ]; then
|
||||||
no_sve=0
|
no_sve=0
|
||||||
{
|
{
|
||||||
$compiler_name $flags $args >/dev/null 2>&1
|
$compiler_name $flags $args >/dev/null 2>&1
|
||||||
|
} || {
|
||||||
|
args=" -Msve_intrinsics -c -o $tmpf.o $tmpf"
|
||||||
|
$compiler_name $flags $args >/dev/null 2>&1
|
||||||
} || {
|
} || {
|
||||||
no_sve=1
|
no_sve=1
|
||||||
}
|
}
|
||||||
|
|
|
@ -180,22 +180,30 @@ endif ()
|
||||||
|
|
||||||
if (${CORE} STREQUAL NEOVERSEN2)
|
if (${CORE} STREQUAL NEOVERSEN2)
|
||||||
if (NOT DYNAMIC_ARCH)
|
if (NOT DYNAMIC_ARCH)
|
||||||
execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION)
|
if (${CMAKE_C_COMPILER_ID} STREQUAL "PGI" AND NOT NO_SVE)
|
||||||
if (${GCC_VERSION} VERSION_GREATER 10.4 OR ${GCC_VERSION} VERSION_EQUAL 10.4)
|
set (CCOMMON_OPT "${CCOMMON_OPT} -Msve_intrinsics -march=armv8.5-a+sve+sve2+bf16 -mtune=neoverse-n2")
|
||||||
set (CCOMMON_OPT "${CCOMMON_OPT} -march=armv8.5-a+sve+sve2+bf16 -mtune=neoverse-n2")
|
|
||||||
else ()
|
else ()
|
||||||
set (CCOMMON_OPT "${CCOMMON_OPT} -march=armv8.2-a+sve")
|
execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION)
|
||||||
endif()
|
if (${GCC_VERSION} VERSION_GREATER 10.4 OR ${GCC_VERSION} VERSION_EQUAL 10.4)
|
||||||
|
set (CCOMMON_OPT "${CCOMMON_OPT} -march=armv8.5-a+sve+sve2+bf16 -mtune=neoverse-n2")
|
||||||
|
else ()
|
||||||
|
set (CCOMMON_OPT "${CCOMMON_OPT} -march=armv8.2-a+sve")
|
||||||
|
endif()
|
||||||
|
endif ()
|
||||||
endif ()
|
endif ()
|
||||||
endif ()
|
endif ()
|
||||||
|
|
||||||
if (${CORE} STREQUAL NEOVERSEV1)
|
if (${CORE} STREQUAL NEOVERSEV1)
|
||||||
if (NOT DYNAMIC_ARCH)
|
if (NOT DYNAMIC_ARCH)
|
||||||
execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION)
|
if (${CMAKE_C_COMPILER_ID} STREQUAL "PGI" AND NOT NO_SVE)
|
||||||
if (${GCC_VERSION} VERSION_GREATER 10.4 OR ${GCC_VERSION} VERSION_EQUAL 10.4)
|
set (CCOMMON_OPT "${CCOMMON_OPT} -Msve_intrinsics -march=armv8.4-a+sve -mtune=neoverse-v1")
|
||||||
set (CCOMMON_OPT "${CCOMMON_OPT} -march=armv8.4-a+sve -mtune=neoverse-v1")
|
|
||||||
else ()
|
else ()
|
||||||
set (CCOMMON_OPT "${CCOMMON_OPT} -march=armv8.2-a+sve")
|
execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION)
|
||||||
|
if (${GCC_VERSION} VERSION_GREATER 10.4 OR ${GCC_VERSION} VERSION_EQUAL 10.4)
|
||||||
|
set (CCOMMON_OPT "${CCOMMON_OPT} -march=armv8.4-a+sve -mtune=neoverse-v1")
|
||||||
|
else ()
|
||||||
|
set (CCOMMON_OPT "${CCOMMON_OPT} -march=armv8.2-a+sve")
|
||||||
|
endif()
|
||||||
endif()
|
endif()
|
||||||
endif ()
|
endif ()
|
||||||
endif ()
|
endif ()
|
||||||
|
@ -213,7 +221,11 @@ endif ()
|
||||||
|
|
||||||
if (${CORE} STREQUAL ARMV8SVE)
|
if (${CORE} STREQUAL ARMV8SVE)
|
||||||
if (NOT DYNAMIC_ARCH)
|
if (NOT DYNAMIC_ARCH)
|
||||||
set (CCOMMON_OPT "${CCOMMON_OPT} -march=armv8-a+sve")
|
if (${CMAKE_C_COMPILER_ID} STREQUAL "PGI" AND NOT NO_SVE)
|
||||||
|
set (CCOMMON_OPT "${CCOMMON_OPT} -Msve_intrinsics -march=armv8-a+sve")
|
||||||
|
else ()
|
||||||
|
set (CCOMMON_OPT "${CCOMMON_OPT} -march=armv8-a+sve")
|
||||||
|
endif ()
|
||||||
endif ()
|
endif ()
|
||||||
endif ()
|
endif ()
|
||||||
|
|
||||||
|
|
|
@ -282,23 +282,35 @@ if (DEFINED TARGET)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
if (${TARGET} STREQUAL NEOVERSEV1)
|
if (${TARGET} STREQUAL NEOVERSEV1)
|
||||||
|
if (${CMAKE_C_COMPILER_ID} STREQUAL "PGI" AND NOT NO_SVE)
|
||||||
|
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -Msve_intrinsics -march=armv8.4-a+sve -mtune=neoverse-v1")
|
||||||
|
else ()
|
||||||
execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION)
|
execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION)
|
||||||
if (${GCC_VERSION} VERSION_GREATER 10.4 OR ${GCC_VERSION} VERSION_EQUAL 10.4)
|
if (${GCC_VERSION} VERSION_GREATER 10.4 OR ${GCC_VERSION} VERSION_EQUAL 10.4)
|
||||||
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=armv8.4-a+sve -mtune=neoverse-v1")
|
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=armv8.4-a+sve -mtune=neoverse-v1")
|
||||||
else ()
|
else ()
|
||||||
message(FATAL_ERROR "Compiler ${CMAKE_C_COMPILER} ${GCC_VERSION} does not support Neoverse V1.")
|
message(FATAL_ERROR "Compiler ${CMAKE_C_COMPILER} ${GCC_VERSION} does not support Neoverse V1.")
|
||||||
endif()
|
endif()
|
||||||
|
endif()
|
||||||
endif()
|
endif()
|
||||||
if (${TARGET} STREQUAL NEOVERSEN2)
|
if (${TARGET} STREQUAL NEOVERSEN2)
|
||||||
|
if (${CMAKE_C_COMPILER_ID} STREQUAL "PGI" AND NOT NO_SVE)
|
||||||
|
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -Msve-intrinsics -march=armv8.5-a+sve+sve2+bf16 -mtune=neoverse-n2")
|
||||||
|
else ()
|
||||||
execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION)
|
execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION)
|
||||||
if (${GCC_VERSION} VERSION_GREATER 10.4 OR ${GCC_VERSION} VERSION_EQUAL 10.4)
|
if (${GCC_VERSION} VERSION_GREATER 10.4 OR ${GCC_VERSION} VERSION_EQUAL 10.4)
|
||||||
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=armv8.5-a+sve+sve2+bf16 -mtune=neoverse-n2")
|
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=armv8.5-a+sve+sve2+bf16 -mtune=neoverse-n2")
|
||||||
else ()
|
else ()
|
||||||
message(FATAL_ERROR "Compiler $${CMAKE_C_COMPILER} {GCC_VERSION} does not support Neoverse N2.")
|
message(FATAL_ERROR "Compiler $${CMAKE_C_COMPILER} {GCC_VERSION} does not support Neoverse N2.")
|
||||||
endif()
|
endif()
|
||||||
|
endif()
|
||||||
endif()
|
endif()
|
||||||
if (${TARGET} STREQUAL ARMV8SVE)
|
if (${TARGET} STREQUAL ARMV8SVE)
|
||||||
|
if (${CMAKE_C_COMPILER_ID} STREQUAL "PGI" AND NOT NO_SVE)
|
||||||
|
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -Msve-intrinsics -march=armv8.2-a+sve")
|
||||||
|
else ()
|
||||||
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=armv8.2-a+sve")
|
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=armv8.2-a+sve")
|
||||||
|
endif()
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
endif()
|
endif()
|
||||||
|
|
|
@ -77,6 +77,12 @@ else ifeq ($(TARGET_CORE), ZEN)
|
||||||
override CFLAGS += -DBUILD_KERNEL -DTABLE_NAME=gotoblas_$(TARGET_CORE) $(AVX2OPT)
|
override CFLAGS += -DBUILD_KERNEL -DTABLE_NAME=gotoblas_$(TARGET_CORE) $(AVX2OPT)
|
||||||
else ifeq ($(TARGET_CORE), LOONGSON3R4)
|
else ifeq ($(TARGET_CORE), LOONGSON3R4)
|
||||||
override CFLAGS += -DBUILD_KERNEL -DTABLE_NAME=gotoblas_$(TARGET_CORE) $(MSA_FLAGS)
|
override CFLAGS += -DBUILD_KERNEL -DTABLE_NAME=gotoblas_$(TARGET_CORE) $(MSA_FLAGS)
|
||||||
|
else ifneq ($(filter NEOVERSEN2 NEOVERSEV1, $(TARGET_CORE)),)
|
||||||
|
ifeq ($(C_COMPILER), PGI)
|
||||||
|
override CFLAGS += -DBUILD_KERNEL -DTABLE_NAME=gotoblas_$(TARGET_CORE) -Msve_intrinsics
|
||||||
|
else
|
||||||
|
override CFLAGS += -DBUILD_KERNEL -DTABLE_NAME=gotoblas_$(TARGET_CORE)
|
||||||
|
endif
|
||||||
else
|
else
|
||||||
override CFLAGS += -DBUILD_KERNEL -DTABLE_NAME=gotoblas_$(TARGET_CORE)
|
override CFLAGS += -DBUILD_KERNEL -DTABLE_NAME=gotoblas_$(TARGET_CORE)
|
||||||
endif
|
endif
|
||||||
|
|
Loading…
Reference in New Issue