From e9f1b2d26f8c68c2bd1f108565645d72f55b7180 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Fri, 25 Aug 2023 16:45:56 +0200 Subject: [PATCH 1/4] Expand the SVE compatibility check for the NVIDIA HPC compiler --- c_check | 3 +++ 1 file changed, 3 insertions(+) diff --git a/c_check b/c_check index 7ee183163..4d12c1674 100755 --- a/c_check +++ b/c_check @@ -283,6 +283,9 @@ if [ "$architecture" = "arm64" ]; then no_sve=0 { $compiler_name $flags $args >/dev/null 2>&1 + } || { + args=" -Msve_intrinsics -c -o $tmpf.o $tmpf" + $compiler_name $flags $args >/dev/null 2>&1 } || { no_sve=1 } From 8794544b4322d478608fbd742200b2d6aea12294 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Fri, 25 Aug 2023 16:47:32 +0200 Subject: [PATCH 2/4] Add support for compiling the Neoverse SVE kernels with the NVIDIA HPC compiler --- kernel/Makefile | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/kernel/Makefile b/kernel/Makefile index 795f25eec..4bcb571a4 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -77,6 +77,10 @@ else ifeq ($(TARGET_CORE), ZEN) override CFLAGS += -DBUILD_KERNEL -DTABLE_NAME=gotoblas_$(TARGET_CORE) $(AVX2OPT) else ifeq ($(TARGET_CORE), LOONGSON3R4) override CFLAGS += -DBUILD_KERNEL -DTABLE_NAME=gotoblas_$(TARGET_CORE) $(MSA_FLAGS) +else ifneq ($(filter NEOVERSEN2 NEOVERSEV1, $(TARGET_CORE)),) + ifeq ($(C_COMPILER), PGI) + override CFLAGS += -DBUILD_KERNEL -DTABLE_NAME=gotoblas_$(TARGET_CORE) -Msve_intrinsics + endif else override CFLAGS += -DBUILD_KERNEL -DTABLE_NAME=gotoblas_$(TARGET_CORE) endif From 49689fbef7b929f0382322b0d21217837ae9b375 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Fri, 25 Aug 2023 17:11:04 +0200 Subject: [PATCH 3/4] Add support for compiling SVE kernels with the NVIDIA HPC compiler --- cmake/cc.cmake | 32 ++++++++++++++++++++++---------- cmake/system.cmake | 12 ++++++++++++ 2 files changed, 34 insertions(+), 10 deletions(-) diff --git a/cmake/cc.cmake b/cmake/cc.cmake index aeaa76710..7b4ef8947 100644 --- a/cmake/cc.cmake +++ b/cmake/cc.cmake @@ -180,22 +180,30 @@ endif () if (${CORE} STREQUAL NEOVERSEN2) if (NOT DYNAMIC_ARCH) - execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION) - if (${GCC_VERSION} VERSION_GREATER 10.4 OR ${GCC_VERSION} VERSION_EQUAL 10.4) - set (CCOMMON_OPT "${CCOMMON_OPT} -march=armv8.5-a+sve+sve2+bf16 -mtune=neoverse-n2") + if (${CMAKE_C_COMPILER_ID} STREQUAL "PGI" AND NOT NO_SVE) + set (CCOMMON_OPT "${CCOMMON_OPT} -Msve_intrinsics -march=armv8.5-a+sve+sve2+bf16 -mtune=neoverse-n2") else () - set (CCOMMON_OPT "${CCOMMON_OPT} -march=armv8.2-a+sve") - endif() + execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION) + if (${GCC_VERSION} VERSION_GREATER 10.4 OR ${GCC_VERSION} VERSION_EQUAL 10.4) + set (CCOMMON_OPT "${CCOMMON_OPT} -march=armv8.5-a+sve+sve2+bf16 -mtune=neoverse-n2") + else () + set (CCOMMON_OPT "${CCOMMON_OPT} -march=armv8.2-a+sve") + endif() + endif () endif () endif () if (${CORE} STREQUAL NEOVERSEV1) if (NOT DYNAMIC_ARCH) - execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION) - if (${GCC_VERSION} VERSION_GREATER 10.4 OR ${GCC_VERSION} VERSION_EQUAL 10.4) - set (CCOMMON_OPT "${CCOMMON_OPT} -march=armv8.4-a+sve -mtune=neoverse-v1") + if (${CMAKE_C_COMPILER_ID} STREQUAL "PGI" AND NOT NO_SVE) + set (CCOMMON_OPT "${CCOMMON_OPT} -Msve_intrinsics -march=armv8.4-a+sve -mtune=neoverse-v1") else () - set (CCOMMON_OPT "${CCOMMON_OPT} -march=armv8.2-a+sve") + execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION) + if (${GCC_VERSION} VERSION_GREATER 10.4 OR ${GCC_VERSION} VERSION_EQUAL 10.4) + set (CCOMMON_OPT "${CCOMMON_OPT} -march=armv8.4-a+sve -mtune=neoverse-v1") + else () + set (CCOMMON_OPT "${CCOMMON_OPT} -march=armv8.2-a+sve") + endif() endif() endif () endif () @@ -213,7 +221,11 @@ endif () if (${CORE} STREQUAL ARMV8SVE) if (NOT DYNAMIC_ARCH) - set (CCOMMON_OPT "${CCOMMON_OPT} -march=armv8-a+sve") + if (${CMAKE_C_COMPILER_ID} STREQUAL "PGI" AND NOT NO_SVE) + set (CCOMMON_OPT "${CCOMMON_OPT} -Msve_intrinsics -march=armv8-a+sve") + else () + set (CCOMMON_OPT "${CCOMMON_OPT} -march=armv8-a+sve") + endif () endif () endif () diff --git a/cmake/system.cmake b/cmake/system.cmake index 414193ec8..bc87f7b44 100644 --- a/cmake/system.cmake +++ b/cmake/system.cmake @@ -282,23 +282,35 @@ if (DEFINED TARGET) endif() if (${TARGET} STREQUAL NEOVERSEV1) + if (${CMAKE_C_COMPILER_ID} STREQUAL "PGI" AND NOT NO_SVE) + set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -Msve_intrinsics -march=armv8.4-a+sve -mtune=neoverse-v1") + else () execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION) if (${GCC_VERSION} VERSION_GREATER 10.4 OR ${GCC_VERSION} VERSION_EQUAL 10.4) set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=armv8.4-a+sve -mtune=neoverse-v1") else () message(FATAL_ERROR "Compiler ${CMAKE_C_COMPILER} ${GCC_VERSION} does not support Neoverse V1.") endif() + endif() endif() if (${TARGET} STREQUAL NEOVERSEN2) + if (${CMAKE_C_COMPILER_ID} STREQUAL "PGI" AND NOT NO_SVE) + set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -Msve-intrinsics -march=armv8.5-a+sve+sve2+bf16 -mtune=neoverse-n2") + else () execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION) if (${GCC_VERSION} VERSION_GREATER 10.4 OR ${GCC_VERSION} VERSION_EQUAL 10.4) set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=armv8.5-a+sve+sve2+bf16 -mtune=neoverse-n2") else () message(FATAL_ERROR "Compiler $${CMAKE_C_COMPILER} {GCC_VERSION} does not support Neoverse N2.") endif() + endif() endif() if (${TARGET} STREQUAL ARMV8SVE) + if (${CMAKE_C_COMPILER_ID} STREQUAL "PGI" AND NOT NO_SVE) + set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -Msve-intrinsics -march=armv8.2-a+sve") + else () set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=armv8.2-a+sve") + endif() endif() endif() From 7a6203ffa15d962bcfde5c23f0f6ff03c1a7b60f Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Fri, 25 Aug 2023 18:25:51 +0200 Subject: [PATCH 4/4] restore default Neoverse SVE build instructions for non-NVIDIA compilers --- kernel/Makefile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/Makefile b/kernel/Makefile index 4bcb571a4..1e0a0074f 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -80,6 +80,8 @@ else ifeq ($(TARGET_CORE), LOONGSON3R4) else ifneq ($(filter NEOVERSEN2 NEOVERSEV1, $(TARGET_CORE)),) ifeq ($(C_COMPILER), PGI) override CFLAGS += -DBUILD_KERNEL -DTABLE_NAME=gotoblas_$(TARGET_CORE) -Msve_intrinsics + else + override CFLAGS += -DBUILD_KERNEL -DTABLE_NAME=gotoblas_$(TARGET_CORE) endif else override CFLAGS += -DBUILD_KERNEL -DTABLE_NAME=gotoblas_$(TARGET_CORE)