From 44e6e5479b87f697b4d4fc92030c162f2451b384 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Thu, 21 Sep 2023 23:01:21 +0200 Subject: [PATCH 1/8] Use the C compiler for the C SBGEMM test source --- test/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/Makefile b/test/Makefile index 46a7b1158..fa054f15b 100644 --- a/test/Makefile +++ b/test/Makefile @@ -326,7 +326,7 @@ endif ifeq ($(BUILD_BFLOAT16),1) test_sbgemm : compare_sgemm_sbgemm.c ../$(LIBNAME) - $(FC) $(FLDFLAGS) -o test_sbgemm compare_sgemm_sbgemm.c ../$(LIBNAME) $(EXTRALIB) $(CEXTRALIB) + $(CC) $(FLDFLAGS) -o test_sbgemm compare_sgemm_sbgemm.c ../$(LIBNAME) $(EXTRALIB) $(CEXTRALIB) endif ifeq ($(BUILD_COMPLEX),1) From 2390e0bfbc203f5566b0fede523b1caf1c344deb Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Thu, 21 Sep 2023 23:04:25 +0200 Subject: [PATCH 2/8] Quote the BU (underscore) option as it may not be set --- exports/Makefile | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/exports/Makefile b/exports/Makefile index d81735342..7682f851d 100644 --- a/exports/Makefile +++ b/exports/Makefile @@ -272,23 +272,23 @@ static : ../$(LIBNAME) rm -f goto.$(SUFFIX) osx.def : $(GENSYM) ../Makefile.system ../getarch.c - ./$(GENSYM) osx $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_BFLOAT16) $(BUILD_SINGLE) $(BUILD_DOUBLE) $(BUILD_COMPLEX) $(BUILD_COMPLEX16) > $(@F) + ./$(GENSYM) osx $(ARCH) "$(BU)" $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_BFLOAT16) $(BUILD_SINGLE) $(BUILD_DOUBLE) $(BUILD_COMPLEX) $(BUILD_COMPLEX16) > $(@F) aix.def : $(GENSYM) ../Makefile.system ../getarch.c - ./$(GENSYM) aix $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_BFLOAT16) $(BUILD_SINGLE) $(BUILD_DOUBLE) $(BUILD_COMPLEX) $(BUILD_COMPLEX16) > $(@F) + ./$(GENSYM) aix $(ARCH) "$(BU)" $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_BFLOAT16) $(BUILD_SINGLE) $(BUILD_DOUBLE) $(BUILD_COMPLEX) $(BUILD_COMPLEX16) > $(@F) objcopy.def : $(GENSYM) ../Makefile.system ../getarch.c - ./$(GENSYM) objcopy $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_BFLOAT16) $(BUILD_SINGLE) $(BUILD_DOUBLE) $(BUILD_COMPLEX) $(BUILD_COMPLEX16) > $(@F) + ./$(GENSYM) objcopy $(ARCH) "$(BU)" $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_BFLOAT16) $(BUILD_SINGLE) $(BUILD_DOUBLE) $(BUILD_COMPLEX) $(BUILD_COMPLEX16) > $(@F) objconv.def : $(GENSYM) ../Makefile.system ../getarch.c - ./$(GENSYM) objconv $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_BFLOAT16) $(BUILD_SINGLE) $(BUILD_DOUBLE) $(BUILD_COMPLEX) $(BUILD_COMPLEX16) > $(@F) + ./$(GENSYM) objconv $(ARCH) "$(BU)" $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_BFLOAT16) $(BUILD_SINGLE) $(BUILD_DOUBLE) $(BUILD_COMPLEX) $(BUILD_COMPLEX16) > $(@F) test : linktest.c $(CC) $(CFLAGS) $(LDFLAGS) -w -o linktest linktest.c ../$(LIBSONAME) -lm && echo OK. rm -f linktest linktest.c : $(GENSYM) ../Makefile.system ../getarch.c - ./$(GENSYM) linktest $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_BFLOAT16) $(BUILD_SINGLE) $(BUILD_DOUBLE) $(BUILD_COMPLEX) $(BUILD_COMPLEX16) > linktest.c + ./$(GENSYM) linktest $(ARCH) "$(BU)" $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_BFLOAT16) $(BUILD_SINGLE) $(BUILD_DOUBLE) $(BUILD_COMPLEX) $(BUILD_COMPLEX16) > linktest.c clean :: @rm -f *.def *.dylib __.SYMDEF* *.renamed From b926e70ebd879bb022d265a3859bfb5481b4d99f Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Thu, 21 Sep 2023 23:07:32 +0200 Subject: [PATCH 3/8] Fix typo in build rule of "profiled" sbgemm --- interface/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/interface/Makefile b/interface/Makefile index 2ac9663d6..78335357b 100644 --- a/interface/Makefile +++ b/interface/Makefile @@ -1301,7 +1301,7 @@ xhpr2.$(SUFFIX) xhpr2.$(PSUFFIX) : zhpr2.c ifeq ($(BUILD_BFLOAT16),1) sbgemm.$(SUFFIX) sbgemm.$(PSUFFIX) : gemm.c ../param.h $(CC) -c $(CFLAGS) $< -o $(@F) -sbgemmt.$(SUFFIX) sbgemm.$(PSUFFIX) : gemmt.c ../param.h +sbgemmt.$(SUFFIX) sbgemmt.$(PSUFFIX) : gemmt.c ../param.h $(CC) -c $(CFLAGS) $< -o $(@F) endif From bb4718322294bd9f28b0343a643d0986e9046a2b Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Sun, 24 Sep 2023 10:13:47 +0200 Subject: [PATCH 4/8] Force -qextname for trailing underscore generation when IBM xlf is used with gcc --- Makefile.system | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Makefile.system b/Makefile.system index 5a4af9698..ae6db40b0 100644 --- a/Makefile.system +++ b/Makefile.system @@ -1167,6 +1167,10 @@ endif ifeq ($(F_COMPILER), IBM) CCOMMON_OPT += -DF_INTERFACE_IBM +FEXTRALIB += -lxlf90 +ifeq ($(C_COMPILER), GCC) +FCOMMON_OPT += -qextname +endif # FCOMMON_OPT += -qarch=440 ifdef BINARY64 FCOMMON_OPT += -q64 From 8012afcabbc912e32961924a77858e334ee75356 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Sun, 24 Sep 2023 10:15:12 +0200 Subject: [PATCH 5/8] Avoid using some gcc-specific flags with IBM xlf --- Makefile.power | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/Makefile.power b/Makefile.power index 28a0bae08..33702c932 100644 --- a/Makefile.power +++ b/Makefile.power @@ -42,15 +42,14 @@ FCOMMON_OPT += -O2 -qrecur -qnosave else FCOMMON_OPT += -O2 -frecursive -fno-fast-math endif -ifeq ($(C_COMPILER), GCC) + +ifeq ($(F_COMPILER), GFORTRAN) ifneq ($(GCCVERSIONGT4), 1) $(warning your compiler is too old to fully support POWER9, getting a newer version of gcc is recommended) FCOMMON_OPT += -mcpu=power8 -mtune=power8 else FCOMMON_OPT += -mcpu=power9 -mtune=power9 endif -else -FCOMMON_OPT += -mcpu=power9 -mtune=power9 endif else FCOMMON_OPT += -O2 -Mrecursive @@ -84,12 +83,16 @@ CCOMMON_OPT += -DUSE_OPENMP -fopenmp else CCOMMON_OPT += -DUSE_OPENMP -mp endif +ifeq ($(F_COMPILER), IBM) +FCOMMON_OPT += -DUSE_OPENMP +else ifneq ($(F_COMPILER), PGI) FCOMMON_OPT += -DUSE_OPENMP -fopenmp else FCOMMON_OPT += -DUSE_OPENMP -mp endif endif +endif # workaround for C->FORTRAN ABI violation in LAPACKE ifeq ($(F_COMPILER), GFORTRAN) From 4de963dc17eb682e774a85a494a28001d6e6aa98 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Sun, 24 Sep 2023 10:16:37 +0200 Subject: [PATCH 6/8] Enforce trailing underscores on symbols when IBM xlf is combined with gcc --- f_check | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/f_check b/f_check index 526c41dc6..f30231bc4 100755 --- a/f_check +++ b/f_check @@ -155,6 +155,10 @@ else *'IBM XL'*) vendor=IBM openmp='-openmp' + case "$CC" in *gcc*) + bu=_ + ;; + esac ;; *NAG*) vendor=NAG @@ -223,6 +227,10 @@ else *ppuf*|*xlf*) vendor=IBM openmp='-openmp' + case "$CC" in *gcc*) + bu=_ + ;; + esac ;; *open64*) vendor=OPEN64 From 7a96908d0cb0ee3cc5b49390a5ec0ca3a71fefdf Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Sun, 24 Sep 2023 10:18:24 +0200 Subject: [PATCH 7/8] Add -lgomp when IBM xlf is combined with gcc in OPENMP builds --- ctest/Makefile | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/ctest/Makefile b/ctest/Makefile index 9e85d23b9..af5b34a36 100644 --- a/ctest/Makefile +++ b/ctest/Makefile @@ -214,6 +214,11 @@ endif ifeq ($(F_COMPILER), NAG) CEXTRALIB = -lgomp endif +ifeq ($(F_COMPILER), IBM) +ifeq ($(C_COMPILER), GCC) +CEXTRALIB += -lgomp +endif +endif endif ifeq ($(BUILD_SINGLE),1) From 2a9981a2442106f67d963ee68cb4ee3b1a7a0334 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Sun, 24 Sep 2023 10:19:11 +0200 Subject: [PATCH 8/8] Add -lgomp when IBM xlf is combined with gcc in OPENMP builds --- test/Makefile | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/test/Makefile b/test/Makefile index fa054f15b..715842b4d 100644 --- a/test/Makefile +++ b/test/Makefile @@ -271,6 +271,11 @@ endif ifeq ($(F_COMPILER), NAG) CEXTRALIB = -lgomp endif +ifeq ($(F_COMPILER), IBM) +ifeq ($(C_COMPILER), GCC) +CEXTRALIB += -lgomp +endif +endif endif ifeq ($(BUILD_SINGLE),1) @@ -326,7 +331,7 @@ endif ifeq ($(BUILD_BFLOAT16),1) test_sbgemm : compare_sgemm_sbgemm.c ../$(LIBNAME) - $(CC) $(FLDFLAGS) -o test_sbgemm compare_sgemm_sbgemm.c ../$(LIBNAME) $(EXTRALIB) $(CEXTRALIB) + $(CC) $(CFLAGS) -o test_sbgemm compare_sgemm_sbgemm.c ../$(LIBNAME) $(EXTRALIB) $(CEXTRALIB) endif ifeq ($(BUILD_COMPLEX),1)