Merge pull request #3061 from martin-frbg/arm64-pgi

Support NVIDIA HPC SDK on ARM64
This commit is contained in:
Martin Kroeker 2021-01-14 15:59:21 +01:00 committed by GitHub
commit a0e4fb3a28
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 50 additions and 10 deletions

View File

@ -1,4 +1,4 @@
ifneq ($(C_COMPILER), PGI)
ifeq ($(CORE), ARMV8)
CCOMMON_OPT += -march=armv8-a
FCOMMON_OPT += -march=armv8-a
@ -77,4 +77,4 @@ CCOMMON_OPT += -march=armv8.2-a -mtune=tsv110
FCOMMON_OPT += -march=armv8.2-a -mtune=tsv110
endif
endif
endif

View File

@ -74,6 +74,9 @@ macro(ParseMakefileVars MAKEFILE_IN)
string(REGEX MATCH "ifneq \\(\\$\\(([_A-Z]+)\\),[ \t]*([0-9_A-Z]+)\\)" line_match "${makefile_line}")
if (NOT "${line_match}" STREQUAL "")
# message(STATUS "IFNEQ: ${line_match} first: ${CMAKE_MATCH_1} second: ${CMAKE_MATCH_2}")
if ( ${CMAKE_MATCH_1} STREQUAL C_COMPILER)
set (CMAKE_MATCH_1 CMAKE_C_COMPILER)
endif ()
if (NOT ( ${${CMAKE_MATCH_1}} STREQUAL ${CMAKE_MATCH_2}))
# message (STATUS "condition is true")
set (IfElse 1)

View File

@ -39,7 +39,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define INLINE inline
#ifdef F_INTERFACE_FLANG
#if defined( F_INTERFACE_FLANG) || defined(F_INTERFACE_PGI)
#define RETURN_BY_STACK
#else
#define RETURN_BY_COMPLEX

View File

@ -48,7 +48,7 @@ OPENBLAS_COMPLEX_FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLA
dot[0]=0.0;
dot[1]=0.0;
#if !defined(__PPC__) && !defined(__SunOS)
#if !defined(__PPC__) && !defined(__SunOS) && !defined(__PGI)
CREAL(result) = 0.0 ;
CIMAG(result) = 0.0 ;
#else
@ -73,7 +73,7 @@ OPENBLAS_COMPLEX_FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLA
i++ ;
}
#if !defined(__PPC__) && !defined(__SunOS)
#if !defined(__PPC__) && !defined(__SunOS) && !defined(__PGI)
CREAL(result) = dot[0];
CIMAG(result) = dot[1];
#else

View File

@ -97,9 +97,18 @@ CNRM2KERNEL = znrm2.S
ZNRM2KERNEL = znrm2.S
DDOTKERNEL = dot.S
ifneq ($(C_COMPILER), PGI)
SDOTKERNEL = ../generic/dot.c
else
SDOTKERNEL = dot.S
endif
ifneq ($(C_COMPILER), PGI)
CDOTKERNEL = zdot.S
ZDOTKERNEL = zdot.S
else
CDOTKERNEL = ../arm/zdot.c
ZDOTKERNEL = ../arm/zdot.c
endif
DSDOTKERNEL = dot.S
DGEMM_BETA = dgemm_beta.S

View File

@ -96,11 +96,20 @@ DNRM2KERNEL = nrm2.S
CNRM2KERNEL = znrm2.S
ZNRM2KERNEL = znrm2.S
DDOTKERNEL = dot.S
SDOTKERNEL = ../generic/dot.c
CDOTKERNEL = zdot.S
ZDOTKERNEL = zdot.S
DSDOTKERNEL = dot.S
ifneq ($(C_COMPILER), PGI)
SDOTKERNEL = ../generic/dot.c
else
SDOTKERNEL = dot.S
endif
DDOTKERNEL = dot.S
ifneq ($(C_COMPILER), PGI)
CDOTKERNEL = zdot.S
ZDOTKERNEL = zdot.S
else
CDOTKERNEL = ../arm/zdot.c
ZDOTKERNEL = ../arm/zdot.c
endif
DSDOTKERNEL = dot.S
DGEMM_BETA = dgemm_beta.S
SGEMM_BETA = sgemm_beta.S

View File

@ -70,10 +70,19 @@ DCOPYKERNEL = copy.S
CCOPYKERNEL = copy.S
ZCOPYKERNEL = copy.S
ifneq ($(C_COMPILER), PGI)
SDOTKERNEL = ../generic/dot.c
else
SDOTKERNEL = dot.S
endif
DDOTKERNEL = dot.S
ifneq ($(C_COMPILER), PGI)
CDOTKERNEL = zdot.S
ZDOTKERNEL = zdot.S
else
CDOTKERNEL = ../arm/zdot.c
ZDOTKERNEL = ../arm/zdot.c
endif
DSDOTKERNEL = dot.S
SNRM2KERNEL = nrm2.S

View File

@ -47,8 +47,13 @@ ZCOPYKERNEL = copy.S
SDOTKERNEL = dot_thunderx.c
DDOTKERNEL = ddot_thunderx.c
ifneq ($(C_COMPILER), PGI)
CDOTKERNEL = zdot.S
ZDOTKERNEL = zdot.S
else
CDOTKERNEL = ../arm/zdot.c
ZDOTKERNEL = ../arm/zdot.c
endif
DSDOTKERNEL = dot.S
SNRM2KERNEL = nrm2.S

View File

@ -72,8 +72,13 @@ ZCOPYKERNEL = copy.S
SDOTKERNEL = dot.S
DDOTKERNEL = dot.S
ifneq ($(C_COMPILER), PGI)
CDOTKERNEL = zdot.S
ZDOTKERNEL = zdot.S
else
CDOTKERNEL = ../arm/zdot.c
ZDOTKERNEL = ../arm/zdot.c
endif
DSDOTKERNEL = dot.S
SNRM2KERNEL = nrm2.S