Merge pull request #4203 from martin-frbg/issue4201
Add support for building arm64 SVE kernels with the NVIDIA HPC compiler
This commit is contained in:
commit
be57c595aa
3
c_check
3
c_check
|
@ -283,6 +283,9 @@ if [ "$architecture" = "arm64" ]; then
|
|||
no_sve=0
|
||||
{
|
||||
$compiler_name $flags $args >/dev/null 2>&1
|
||||
} || {
|
||||
args=" -Msve_intrinsics -c -o $tmpf.o $tmpf"
|
||||
$compiler_name $flags $args >/dev/null 2>&1
|
||||
} || {
|
||||
no_sve=1
|
||||
}
|
||||
|
|
|
@ -180,6 +180,9 @@ endif ()
|
|||
|
||||
if (${CORE} STREQUAL NEOVERSEN2)
|
||||
if (NOT DYNAMIC_ARCH)
|
||||
if (${CMAKE_C_COMPILER_ID} STREQUAL "PGI" AND NOT NO_SVE)
|
||||
set (CCOMMON_OPT "${CCOMMON_OPT} -Msve_intrinsics -march=armv8.5-a+sve+sve2+bf16 -mtune=neoverse-n2")
|
||||
else ()
|
||||
execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION)
|
||||
if (${GCC_VERSION} VERSION_GREATER 10.4 OR ${GCC_VERSION} VERSION_EQUAL 10.4)
|
||||
set (CCOMMON_OPT "${CCOMMON_OPT} -march=armv8.5-a+sve+sve2+bf16 -mtune=neoverse-n2")
|
||||
|
@ -187,16 +190,21 @@ if (${CORE} STREQUAL NEOVERSEN2)
|
|||
set (CCOMMON_OPT "${CCOMMON_OPT} -march=armv8.2-a+sve")
|
||||
endif()
|
||||
endif ()
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
if (${CORE} STREQUAL NEOVERSEV1)
|
||||
if (NOT DYNAMIC_ARCH)
|
||||
if (${CMAKE_C_COMPILER_ID} STREQUAL "PGI" AND NOT NO_SVE)
|
||||
set (CCOMMON_OPT "${CCOMMON_OPT} -Msve_intrinsics -march=armv8.4-a+sve -mtune=neoverse-v1")
|
||||
else ()
|
||||
execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION)
|
||||
if (${GCC_VERSION} VERSION_GREATER 10.4 OR ${GCC_VERSION} VERSION_EQUAL 10.4)
|
||||
set (CCOMMON_OPT "${CCOMMON_OPT} -march=armv8.4-a+sve -mtune=neoverse-v1")
|
||||
else ()
|
||||
set (CCOMMON_OPT "${CCOMMON_OPT} -march=armv8.2-a+sve")
|
||||
endif()
|
||||
endif()
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
|
@ -213,8 +221,12 @@ endif ()
|
|||
|
||||
if (${CORE} STREQUAL ARMV8SVE)
|
||||
if (NOT DYNAMIC_ARCH)
|
||||
if (${CMAKE_C_COMPILER_ID} STREQUAL "PGI" AND NOT NO_SVE)
|
||||
set (CCOMMON_OPT "${CCOMMON_OPT} -Msve_intrinsics -march=armv8-a+sve")
|
||||
else ()
|
||||
set (CCOMMON_OPT "${CCOMMON_OPT} -march=armv8-a+sve")
|
||||
endif ()
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
if (${CORE} STREQUAL CORTEXA510)
|
||||
|
|
|
@ -282,6 +282,9 @@ if (DEFINED TARGET)
|
|||
endif()
|
||||
|
||||
if (${TARGET} STREQUAL NEOVERSEV1)
|
||||
if (${CMAKE_C_COMPILER_ID} STREQUAL "PGI" AND NOT NO_SVE)
|
||||
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -Msve_intrinsics -march=armv8.4-a+sve -mtune=neoverse-v1")
|
||||
else ()
|
||||
execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION)
|
||||
if (${GCC_VERSION} VERSION_GREATER 10.4 OR ${GCC_VERSION} VERSION_EQUAL 10.4)
|
||||
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=armv8.4-a+sve -mtune=neoverse-v1")
|
||||
|
@ -289,7 +292,11 @@ if (${TARGET} STREQUAL NEOVERSEV1)
|
|||
message(FATAL_ERROR "Compiler ${CMAKE_C_COMPILER} ${GCC_VERSION} does not support Neoverse V1.")
|
||||
endif()
|
||||
endif()
|
||||
endif()
|
||||
if (${TARGET} STREQUAL NEOVERSEN2)
|
||||
if (${CMAKE_C_COMPILER_ID} STREQUAL "PGI" AND NOT NO_SVE)
|
||||
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -Msve-intrinsics -march=armv8.5-a+sve+sve2+bf16 -mtune=neoverse-n2")
|
||||
else ()
|
||||
execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION)
|
||||
if (${GCC_VERSION} VERSION_GREATER 10.4 OR ${GCC_VERSION} VERSION_EQUAL 10.4)
|
||||
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=armv8.5-a+sve+sve2+bf16 -mtune=neoverse-n2")
|
||||
|
@ -297,9 +304,14 @@ if (${TARGET} STREQUAL NEOVERSEV1)
|
|||
message(FATAL_ERROR "Compiler $${CMAKE_C_COMPILER} {GCC_VERSION} does not support Neoverse N2.")
|
||||
endif()
|
||||
endif()
|
||||
endif()
|
||||
if (${TARGET} STREQUAL ARMV8SVE)
|
||||
if (${CMAKE_C_COMPILER_ID} STREQUAL "PGI" AND NOT NO_SVE)
|
||||
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -Msve-intrinsics -march=armv8.2-a+sve")
|
||||
else ()
|
||||
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=armv8.2-a+sve")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
endif()
|
||||
|
||||
|
|
|
@ -77,6 +77,12 @@ else ifeq ($(TARGET_CORE), ZEN)
|
|||
override CFLAGS += -DBUILD_KERNEL -DTABLE_NAME=gotoblas_$(TARGET_CORE) $(AVX2OPT)
|
||||
else ifeq ($(TARGET_CORE), LOONGSON3R4)
|
||||
override CFLAGS += -DBUILD_KERNEL -DTABLE_NAME=gotoblas_$(TARGET_CORE) $(MSA_FLAGS)
|
||||
else ifneq ($(filter NEOVERSEN2 NEOVERSEV1, $(TARGET_CORE)),)
|
||||
ifeq ($(C_COMPILER), PGI)
|
||||
override CFLAGS += -DBUILD_KERNEL -DTABLE_NAME=gotoblas_$(TARGET_CORE) -Msve_intrinsics
|
||||
else
|
||||
override CFLAGS += -DBUILD_KERNEL -DTABLE_NAME=gotoblas_$(TARGET_CORE)
|
||||
endif
|
||||
else
|
||||
override CFLAGS += -DBUILD_KERNEL -DTABLE_NAME=gotoblas_$(TARGET_CORE)
|
||||
endif
|
||||
|
|
Loading…
Reference in New Issue