Merge pull request #4203 from martin-frbg/issue4201

Add support for building arm64 SVE kernels with the NVIDIA HPC compiler
This commit is contained in:
Martin Kroeker 2023-08-25 22:55:38 +02:00 committed by GitHub
commit be57c595aa
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 43 additions and 10 deletions

View File

@ -283,6 +283,9 @@ if [ "$architecture" = "arm64" ]; then
no_sve=0 no_sve=0
{ {
$compiler_name $flags $args >/dev/null 2>&1 $compiler_name $flags $args >/dev/null 2>&1
} || {
args=" -Msve_intrinsics -c -o $tmpf.o $tmpf"
$compiler_name $flags $args >/dev/null 2>&1
} || { } || {
no_sve=1 no_sve=1
} }

View File

@ -180,6 +180,9 @@ endif ()
if (${CORE} STREQUAL NEOVERSEN2) if (${CORE} STREQUAL NEOVERSEN2)
if (NOT DYNAMIC_ARCH) if (NOT DYNAMIC_ARCH)
if (${CMAKE_C_COMPILER_ID} STREQUAL "PGI" AND NOT NO_SVE)
set (CCOMMON_OPT "${CCOMMON_OPT} -Msve_intrinsics -march=armv8.5-a+sve+sve2+bf16 -mtune=neoverse-n2")
else ()
execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION) execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION)
if (${GCC_VERSION} VERSION_GREATER 10.4 OR ${GCC_VERSION} VERSION_EQUAL 10.4) if (${GCC_VERSION} VERSION_GREATER 10.4 OR ${GCC_VERSION} VERSION_EQUAL 10.4)
set (CCOMMON_OPT "${CCOMMON_OPT} -march=armv8.5-a+sve+sve2+bf16 -mtune=neoverse-n2") set (CCOMMON_OPT "${CCOMMON_OPT} -march=armv8.5-a+sve+sve2+bf16 -mtune=neoverse-n2")
@ -188,9 +191,13 @@ if (${CORE} STREQUAL NEOVERSEN2)
endif() endif()
endif () endif ()
endif () endif ()
endif ()
if (${CORE} STREQUAL NEOVERSEV1) if (${CORE} STREQUAL NEOVERSEV1)
if (NOT DYNAMIC_ARCH) if (NOT DYNAMIC_ARCH)
if (${CMAKE_C_COMPILER_ID} STREQUAL "PGI" AND NOT NO_SVE)
set (CCOMMON_OPT "${CCOMMON_OPT} -Msve_intrinsics -march=armv8.4-a+sve -mtune=neoverse-v1")
else ()
execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION) execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION)
if (${GCC_VERSION} VERSION_GREATER 10.4 OR ${GCC_VERSION} VERSION_EQUAL 10.4) if (${GCC_VERSION} VERSION_GREATER 10.4 OR ${GCC_VERSION} VERSION_EQUAL 10.4)
set (CCOMMON_OPT "${CCOMMON_OPT} -march=armv8.4-a+sve -mtune=neoverse-v1") set (CCOMMON_OPT "${CCOMMON_OPT} -march=armv8.4-a+sve -mtune=neoverse-v1")
@ -199,6 +206,7 @@ if (${CORE} STREQUAL NEOVERSEV1)
endif() endif()
endif() endif()
endif () endif ()
endif ()
if (${CORE} STREQUAL NEOVERSEN1) if (${CORE} STREQUAL NEOVERSEN1)
if (NOT DYNAMIC_ARCH) if (NOT DYNAMIC_ARCH)
@ -213,9 +221,13 @@ endif ()
if (${CORE} STREQUAL ARMV8SVE) if (${CORE} STREQUAL ARMV8SVE)
if (NOT DYNAMIC_ARCH) if (NOT DYNAMIC_ARCH)
if (${CMAKE_C_COMPILER_ID} STREQUAL "PGI" AND NOT NO_SVE)
set (CCOMMON_OPT "${CCOMMON_OPT} -Msve_intrinsics -march=armv8-a+sve")
else ()
set (CCOMMON_OPT "${CCOMMON_OPT} -march=armv8-a+sve") set (CCOMMON_OPT "${CCOMMON_OPT} -march=armv8-a+sve")
endif () endif ()
endif () endif ()
endif ()
if (${CORE} STREQUAL CORTEXA510) if (${CORE} STREQUAL CORTEXA510)
if (NOT DYNAMIC_ARCH) if (NOT DYNAMIC_ARCH)

View File

@ -282,6 +282,9 @@ if (DEFINED TARGET)
endif() endif()
if (${TARGET} STREQUAL NEOVERSEV1) if (${TARGET} STREQUAL NEOVERSEV1)
if (${CMAKE_C_COMPILER_ID} STREQUAL "PGI" AND NOT NO_SVE)
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -Msve_intrinsics -march=armv8.4-a+sve -mtune=neoverse-v1")
else ()
execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION) execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION)
if (${GCC_VERSION} VERSION_GREATER 10.4 OR ${GCC_VERSION} VERSION_EQUAL 10.4) if (${GCC_VERSION} VERSION_GREATER 10.4 OR ${GCC_VERSION} VERSION_EQUAL 10.4)
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=armv8.4-a+sve -mtune=neoverse-v1") set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=armv8.4-a+sve -mtune=neoverse-v1")
@ -289,7 +292,11 @@ if (${TARGET} STREQUAL NEOVERSEV1)
message(FATAL_ERROR "Compiler ${CMAKE_C_COMPILER} ${GCC_VERSION} does not support Neoverse V1.") message(FATAL_ERROR "Compiler ${CMAKE_C_COMPILER} ${GCC_VERSION} does not support Neoverse V1.")
endif() endif()
endif() endif()
endif()
if (${TARGET} STREQUAL NEOVERSEN2) if (${TARGET} STREQUAL NEOVERSEN2)
if (${CMAKE_C_COMPILER_ID} STREQUAL "PGI" AND NOT NO_SVE)
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -Msve-intrinsics -march=armv8.5-a+sve+sve2+bf16 -mtune=neoverse-n2")
else ()
execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION) execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION)
if (${GCC_VERSION} VERSION_GREATER 10.4 OR ${GCC_VERSION} VERSION_EQUAL 10.4) if (${GCC_VERSION} VERSION_GREATER 10.4 OR ${GCC_VERSION} VERSION_EQUAL 10.4)
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=armv8.5-a+sve+sve2+bf16 -mtune=neoverse-n2") set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=armv8.5-a+sve+sve2+bf16 -mtune=neoverse-n2")
@ -297,9 +304,14 @@ if (${TARGET} STREQUAL NEOVERSEV1)
message(FATAL_ERROR "Compiler $${CMAKE_C_COMPILER} {GCC_VERSION} does not support Neoverse N2.") message(FATAL_ERROR "Compiler $${CMAKE_C_COMPILER} {GCC_VERSION} does not support Neoverse N2.")
endif() endif()
endif() endif()
endif()
if (${TARGET} STREQUAL ARMV8SVE) if (${TARGET} STREQUAL ARMV8SVE)
if (${CMAKE_C_COMPILER_ID} STREQUAL "PGI" AND NOT NO_SVE)
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -Msve-intrinsics -march=armv8.2-a+sve")
else ()
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=armv8.2-a+sve") set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=armv8.2-a+sve")
endif() endif()
endif()
endif() endif()

View File

@ -77,6 +77,12 @@ else ifeq ($(TARGET_CORE), ZEN)
override CFLAGS += -DBUILD_KERNEL -DTABLE_NAME=gotoblas_$(TARGET_CORE) $(AVX2OPT) override CFLAGS += -DBUILD_KERNEL -DTABLE_NAME=gotoblas_$(TARGET_CORE) $(AVX2OPT)
else ifeq ($(TARGET_CORE), LOONGSON3R4) else ifeq ($(TARGET_CORE), LOONGSON3R4)
override CFLAGS += -DBUILD_KERNEL -DTABLE_NAME=gotoblas_$(TARGET_CORE) $(MSA_FLAGS) override CFLAGS += -DBUILD_KERNEL -DTABLE_NAME=gotoblas_$(TARGET_CORE) $(MSA_FLAGS)
else ifneq ($(filter NEOVERSEN2 NEOVERSEV1, $(TARGET_CORE)),)
ifeq ($(C_COMPILER), PGI)
override CFLAGS += -DBUILD_KERNEL -DTABLE_NAME=gotoblas_$(TARGET_CORE) -Msve_intrinsics
else
override CFLAGS += -DBUILD_KERNEL -DTABLE_NAME=gotoblas_$(TARGET_CORE)
endif
else else
override CFLAGS += -DBUILD_KERNEL -DTABLE_NAME=gotoblas_$(TARGET_CORE) override CFLAGS += -DBUILD_KERNEL -DTABLE_NAME=gotoblas_$(TARGET_CORE)
endif endif