From 06b022b139c82c07a00f7b76e46c31b49b2cd728 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Sun, 30 Oct 2022 12:42:36 +0100 Subject: [PATCH 01/12] Fix ReLAPACK source selection --- CMakeLists.txt | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 0c92356e7..e830589e8 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -212,10 +212,10 @@ if(NOT NO_LAPACKE) add_library(LAPACKE OBJECT ${LAPACKE_SOURCES}) list(APPEND TARGET_OBJS "$") endif() -if(BUILD_RELAPACK) - add_library(RELAPACK OBJECT ${RELA_SOURCES}) - list(APPEND TARGET_OBJS "$") -endif() +#if(BUILD_RELAPACK) +# add_library(RELAPACK OBJECT ${RELA_SOURCES}) +# list(APPEND TARGET_OBJS "$") +#endif() set(OpenBLAS_LIBS "") if(BUILD_STATIC_LIBS) add_library(${OpenBLAS_LIBNAME}_static STATIC ${TARGET_OBJS} ${OpenBLAS_DEF_FILE}) From eeebaf22948192c151c87903865de603e93f2874 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Sun, 30 Oct 2022 12:45:54 +0100 Subject: [PATCH 02/12] move INCLUDE_ALL to (c)make options --- relapack/config.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/relapack/config.h b/relapack/config.h index 9d6919463..914efcbf0 100644 --- a/relapack/config.h +++ b/relapack/config.h @@ -45,7 +45,7 @@ // The following macros specify which routines are included in the library under // LAPACK's symbol names: 1 included, 0 not included -#define INCLUDE_ALL 1 +// #define INCLUDE_ALL 1 #define INCLUDE_XLAUUM INCLUDE_ALL #define INCLUDE_SLAUUM INCLUDE_XLAUUM From a082d54035d1e32db2dea16c74013c6ae6dc056d Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Sun, 30 Oct 2022 12:47:01 +0100 Subject: [PATCH 03/12] Rename to avoid conflict with OpenBLAS' toplevel config.h --- relapack/{config.h => relapack_config.h} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename relapack/{config.h => relapack_config.h} (100%) diff --git a/relapack/config.h b/relapack/relapack_config.h similarity index 100% rename from relapack/config.h rename to relapack/relapack_config.h From 3ebf5d219d41f0613f08ba89e9998ae8333d6118 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Sun, 30 Oct 2022 12:49:07 +0100 Subject: [PATCH 04/12] handle INCLUDE_ALL and optional function prefixes --- relapack/Makefile | 84 ++++++++++++++++++++++++++--------------------- 1 file changed, 46 insertions(+), 38 deletions(-) diff --git a/relapack/Makefile b/relapack/Makefile index ddf101bd1..056a0ee48 100644 --- a/relapack/Makefile +++ b/relapack/Makefile @@ -1,53 +1,61 @@ TOPDIR = .. include $(TOPDIR)/Makefile.system - +ifeq ($(RELAPACK_REPLACE),0) +RELAPREFIX=RELAPACK_ +INCLALL=-DINCLUDE_ALL=0 +else +INCLALL=-DINCLUDE_ALL=1 +endif SRC = $(wildcard src/*.c) SRC1 = \ - src/slauum.c src/clauum.c src/dlauum.c src/zlauum.c \ - src/strtri.c src/dtrtri.c src/ctrtri.c src/ztrtri.c \ - src/spotrf.c src/dpotrf.c src/cpotrf.c src/zpotrf.c \ - src/sgetrf.c src/dgetrf.c src/cgetrf.c src/zgetrf.c + slauum.c clauum.c dlauum.c zlauum.c \ + strtri.c dtrtri.c ctrtri.c ztrtri.c \ + spotrf.c dpotrf.c cpotrf.c zpotrf.c \ + sgetrf.c dgetrf.c cgetrf.c zgetrf.c SRC2 = \ - src/cgbtrf.c src/cpbtrf.c src/dsytrf_rec2.c src/sgbtrf.c src/ssytrf_rook.c src/zhegst.c src/zsytrf_rec2.c \ - src/cgemmt.c src/dgbtrf.c src/dsytrf_rook.c src/sgemmt.c src/ssytrf_rook_rec2.c src/zhetrf.c src/zsytrf_rook.c \ - src/csytrf.c src/dgemmt.c src/dsytrf_rook_rec2.c src/stgsyl.c src/zhetrf_rec2.c src/zsytrf_rook_rec2.c \ - src/chegst.c src/csytrf_rec2.c src/dtgsyl.c src/strsyl.c src/zhetrf_rook.c src/ztgsyl.c \ - src/chetrf.c src/csytrf_rook.c src/dtrsyl.c src/spbtrf.c src/strsyl_rec2.c src/zhetrf_rook_rec2.c src/ztrsyl.c \ - src/chetrf_rec2.c src/csytrf_rook_rec2.c src/dpbtrf.c src/dtrsyl_rec2.c src/ztrsyl_rec2.c \ - src/chetrf_rook.c src/ctgsyl.c src/ssygst.c src/zgbtrf.c src/zpbtrf.c \ - src/chetrf_rook_rec2.c src/ctrsyl.c src/dsygst.c src/f2c.c src/ssytrf.c src/zgemmt.c \ - src/ctrsyl_rec2.c src/dsytrf.c src/lapack_wrappers.c src/ssytrf_rec2.c src/zsytrf.c + cgbtrf.c cpbtrf.c dsytrf_rec2.c sgbtrf.c ssytrf_rook.c zhegst.c zsytrf_rec2.c \ + cgemmt.c dgbtrf.c dsytrf_rook.c sgemmt.c ssytrf_rook_rec2.c zhetrf.c zsytrf_rook.c \ + csytrf.c dgemmt.c dsytrf_rook_rec2.c stgsyl.c zhetrf_rec2.c zsytrf_rook_rec2.c \ + chegst.c csytrf_rec2.c dtgsyl.c strsyl.c zhetrf_rook.c ztgsyl.c \ + chetrf.c csytrf_rook.c dtrsyl.c spbtrf.c strsyl_rec2.c zhetrf_rook_rec2.c ztrsyl.c \ + chetrf_rec2.c csytrf_rook_rec2.c dpbtrf.c dtrsyl_rec2.c ztrsyl_rec2.c \ + chetrf_rook.c ctgsyl.c ssygst.c zgbtrf.c zpbtrf.c \ + chetrf_rook_rec2.c ctrsyl.c dsygst.c f2c.c ssytrf.c zgemmt.c \ + ctrsyl_rec2.c dsytrf.c lapack_wrappers.c ssytrf_rec2.c zsytrf.c SRCX = \ - src/cgbtrf.c src/cpbtrf.c src/ctrtri.c src/dsytrf_rec2.c src/sgbtrf.c src/ssytrf_rook.c src/zhegst.c src/zsytrf_rec2.c \ - src/cgemmt.c src/cpotrf.c src/dgbtrf.c src/dsytrf_rook.c src/sgemmt.c src/ssytrf_rook_rec2.c src/zhetrf.c src/zsytrf_rook.c \ - src/cgetrf.c src/csytrf.c src/dgemmt.c src/dsytrf_rook_rec2.c src/sgetrf.c src/stgsyl.c src/zhetrf_rec2.c src/zsytrf_rook_rec2.c \ - src/chegst.c src/csytrf_rec2.c src/dgetrf.c src/dtgsyl.c src/slauum.c src/strsyl.c src/zhetrf_rook.c src/ztgsyl.c \ - src/chetrf.c src/csytrf_rook.c src/dlauum.c src/dtrsyl.c src/spbtrf.c src/strsyl_rec2.c src/zhetrf_rook_rec2.c src/ztrsyl.c \ - src/chetrf_rec2.c src/csytrf_rook_rec2.c src/dpbtrf.c src/dtrsyl_rec2.c src/spotrf.c src/strtri.c src/zlauum.c src/ztrsyl_rec2.c \ - src/chetrf_rook.c src/ctgsyl.c src/dpotrf.c src/dtrtri.c src/ssygst.c src/zgbtrf.c src/zpbtrf.c src/ztrtri.c \ - src/chetrf_rook_rec2.c src/ctrsyl.c src/dsygst.c src/f2c.c src/ssytrf.c src/zgemmt.c src/zpotrf.c \ - src/clauum.c src/ctrsyl_rec2.c src/dsytrf.c src/lapack_wrappers.c src/ssytrf_rec2.c src/zgetrf.c src/zsytrf.c + cgbtrf.c cpbtrf.c ctrtri.c dsytrf_rec2.c sgbtrf.c ssytrf_rook.c zhegst.c zsytrf_rec2.c \ + cgemmt.c cpotrf.c dgbtrf.c dsytrf_rook.c sgemmt.c ssytrf_rook_rec2.c zhetrf.c zsytrf_rook.c \ + cgetrf.c csytrf.c dgemmt.c dsytrf_rook_rec2.c sgetrf.c stgsyl.c zhetrf_rec2.c zsytrf_rook_rec2.c \ + chegst.c csytrf_rec2.c dgetrf.c dtgsyl.c slauum.c strsyl.c zhetrf_rook.c ztgsyl.c \ + chetrf.c csytrf_rook.c dlauum.c dtrsyl.c spbtrf.c strsyl_rec2.c zhetrf_rook_rec2.c ztrsyl.c \ + chetrf_rec2.c csytrf_rook_rec2.c dpbtrf.c dtrsyl_rec2.c spotrf.c strtri.c zlauum.c ztrsyl_rec2.c \ + chetrf_rook.c ctgsyl.c dpotrf.c dtrtri.c ssygst.c zgbtrf.c zpbtrf.c ztrtri.c \ + chetrf_rook_rec2.c ctrsyl.c dsygst.c f2c.c ssytrf.c zgemmt.c zpotrf.c \ + clauum.c ctrsyl_rec2.c dsytrf.c lapack_wrappers.c ssytrf_rec2.c zgetrf.c zsytrf.c -OBJS1 = $(SRC1:%.c=%.$(SUFFIX)) -OBJS2 = $(SRC2:%.c=%.o) + +OBJS1 = $(SRC1:%.c=src/$(RELAPREFIX)%.$(SUFFIX)) +OBJS2 = $(SRC2:%.c=src/$(RELAPREFIX)%.o) OBJS = $(OBJS1) $(OBJS2) TEST_SUITS = \ - slauum dlauum clauum zlauum \ - spotrf dpotrf cpotrf zpotrf \ - spbtrf dpbtrf cpbtrf zpbtrf \ - ssygst dsygst chegst zhegst \ - ssytrf dsytrf csytrf chetrf zsytrf zhetrf \ - sgetrf dgetrf cgetrf zgetrf \ - sgbtrf dgbtrf cgbtrf zgbtrf \ - strsyl dtrsyl ctrsyl ztrsyl \ - stgsyl dtgsyl ctgsyl ztgsyl \ sgemmt dgemmt cgemmt zgemmt + + # slauum dlauum clauum zlauum \ + # spotrf dpotrf cpotrf zpotrf \ + # spbtrf dpbtrf cpbtrf zpbtrf \ + # ssygst dsygst chegst zhegst \ + # ssytrf dsytrf csytrf chetrf zsytrf zhetrf \ + # sgetrf dgetrf cgetrf zgetrf \ + # sgbtrf dgbtrf cgbtrf zgbtrf \ + # strsyl dtrsyl ctrsyl ztrsyl \ + # stgsyl dtgsyl ctgsyl ztgsyl \ + TESTS = $(TEST_SUITS:%=test/%.pass) # dummies TEST_EXES = $(TEST_SUITS:%=test/%.x) @@ -63,11 +71,11 @@ libs: $(OBJS) $(AR) -r $(TOPDIR)/$(LIBNAME) $(OBJS) $(RANLIB) $(TOPDIR)/$(LIBNAME) -%.$(SUFFIX): %.c config.h - $(CC) $(CFLAGS) -c $< -o $@ +src/$(RELAPREFIX)%.$(SUFFIX): src/%.c relapack_config.h + $(CC) -v $(CFLAGS) -I. $(INCLALL) -c $< -o $@ -%.o: %.c config.h - $(CC) $(CFLAGS) -c $< -o $@ +src/$(RELAPREFIX)%.o: src/%.c relapack_config.h + $(CC) -v $(CFLAGS) -I. $(INCLALL) -c $< -o $@ # ReLAPACK testing From ce7ea72de101707c1c0b8b4f9830e6dc7d25a44b Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Sun, 30 Oct 2022 12:50:51 +0100 Subject: [PATCH 05/12] Fix include paths --- relapack/src/CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/relapack/src/CMakeLists.txt b/relapack/src/CMakeLists.txt index 2d861f54b..78fb1431f 100644 --- a/relapack/src/CMakeLists.txt +++ b/relapack/src/CMakeLists.txt @@ -1,5 +1,6 @@ include_directories(${PROJECT_SOURCE_DIR}) include_directories(${PROJECT_BINARY_DIR}) +include_directories(${PROJECT_SOURCE_DIR}/relapack) set(RELAFILES clauum.c From d39978cd7ff702e2a9d3df439814a7a8a511deb0 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Sun, 30 Oct 2022 12:53:19 +0100 Subject: [PATCH 06/12] Fix includes --- relapack/src/ctrsyl_rec2.c | 2 +- relapack/src/relapack.h | 4 ++-- relapack/src/ztrsyl_rec2.c | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/relapack/src/ctrsyl_rec2.c b/relapack/src/ctrsyl_rec2.c index 556491c7a..674d73709 100644 --- a/relapack/src/ctrsyl_rec2.c +++ b/relapack/src/ctrsyl_rec2.c @@ -10,7 +10,7 @@ http://www.netlib.org/f2c/libf2c.zip */ -#include "../config.h" +#include "relapack_config.h" #include "f2c.h" #if BLAS_COMPLEX_FUNCTIONS_AS_ROUTINES diff --git a/relapack/src/relapack.h b/relapack/src/relapack.h index 38c5c30d0..44652a074 100644 --- a/relapack/src/relapack.h +++ b/relapack/src/relapack.h @@ -1,7 +1,7 @@ #ifndef RELAPACK_INT_H #define RELAPACK_INT_H #include -#include "../../config.h" +#include "config.h" #if defined(OS_WINDOWS) && defined(__64BIT__) typedef long long BLASLONG; typedef unsigned long long BLASULONG; @@ -9,7 +9,7 @@ typedef unsigned long long BLASULONG; typedef long BLASLONG; typedef unsigned long BLASULONG; #endif -#include "../config.h" +#include "relapack_config.h" #include "../inc/relapack.h" diff --git a/relapack/src/ztrsyl_rec2.c b/relapack/src/ztrsyl_rec2.c index edc6ffc6b..d07a4e8de 100644 --- a/relapack/src/ztrsyl_rec2.c +++ b/relapack/src/ztrsyl_rec2.c @@ -10,7 +10,7 @@ http://www.netlib.org/f2c/libf2c.zip */ -#include "../config.h" +#include "relapack_config.h" #include "f2c.h" #if BLAS_COMPLEX_FUNCTIONS_AS_ROUTINES From ea6c5f3cf553a23f8e2e787307805e7874e1f9c6 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Sun, 30 Oct 2022 12:55:23 +0100 Subject: [PATCH 07/12] Add option RELAPACK_REPLACE --- Makefile.rule | 5 ++++- Makefile.system | 4 ++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/Makefile.rule b/Makefile.rule index a0ad90a68..9665d951a 100644 --- a/Makefile.rule +++ b/Makefile.rule @@ -131,6 +131,9 @@ BUILD_LAPACK_DEPRECATED = 1 # Build RecursiveLAPACK on top of LAPACK # BUILD_RELAPACK = 1 +# Have RecursiveLAPACK actually replace standard LAPACK routines instead of +# just adding its equivalents with a RELAPACK_ prefix +# RELAPACK_REPLACE = 1 # If you want to use the legacy threaded Level 3 implementation. # USE_SIMPLE_THREADED_LEVEL3 = 1 @@ -207,7 +210,7 @@ NO_AFFINITY = 1 # to the user space. If bigphysarea is enabled, it will use it. # DEVICEDRIVER_ALLOCATION = 1 -# If you need to synchronize FP CSR between threads (for x86/x86_64 and aarch64 only). +# If you need to synchronize FP CSR between threads (for x86/x86_64 only). # CONSISTENT_FPCSR = 1 # If any gemm argument m, n or k is less or equal this threshold, gemm will be execute diff --git a/Makefile.system b/Makefile.system index 10b952d4b..3c29ab3f3 100644 --- a/Makefile.system +++ b/Makefile.system @@ -9,6 +9,10 @@ ifndef TOPDIR TOPDIR = . endif +ifndef RELAPACK_REPLACE +RELAPACK_REPLACE=0 +endif + # we need to use the host system's architecture for getarch compile options even especially when cross-compiling HOSTARCH := $(shell uname -m) ifeq ($(HOSTARCH), amd64) From fcda11c1ae0c50d5ab393352d8b78084a4e1dcad Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Sat, 5 Nov 2022 23:48:50 +0100 Subject: [PATCH 08/12] Revert special handling of GEMMT --- relapack/relapack_config.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/relapack/relapack_config.h b/relapack/relapack_config.h index 914efcbf0..ba428a61b 100644 --- a/relapack/relapack_config.h +++ b/relapack/relapack_config.h @@ -115,7 +115,7 @@ #define INCLUDE_CTGSYL INCLUDE_XTGSYL #define INCLUDE_ZTGSYL INCLUDE_XTGSYL -#define INCLUDE_XGEMMT 1 +#define INCLUDE_XGEMMT INCLUDE_ALL #define INCLUDE_SGEMMT INCLUDE_XGEMMT #define INCLUDE_DGEMMT INCLUDE_XGEMMT #define INCLUDE_CGEMMT INCLUDE_XGEMMT From 1b777641825f9f97f2fb0a3386d32e1d106c36db Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Tue, 8 Nov 2022 12:02:59 +0100 Subject: [PATCH 09/12] Conditionally leave out bits of LAPACK to be overridden by ReLAPACK --- interface/CMakeLists.txt | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/interface/CMakeLists.txt b/interface/CMakeLists.txt index 0b2998237..4e082928b 100644 --- a/interface/CMakeLists.txt +++ b/interface/CMakeLists.txt @@ -53,7 +53,7 @@ set(BLAS2_COMPLEX_ONLY_MANGLED_SOURCES # these do not have separate 'z' sources set(BLAS3_SOURCES gemm.c symm.c - trsm.c syrk.c syr2k.c + trsm.c syrk.c syr2k.c gemmt.c ) set(BLAS3_MANGLED_SOURCES @@ -189,7 +189,16 @@ if (NOT DEFINED NO_LAPACK) ) GenerateNamedObjects("${LAPACK_SOURCES}") + if (NOT RELAPACK_REPLACE) GenerateNamedObjects("${LAPACK_MANGLED_SOURCES}" "" "" 0 "" "" 0 3) + else () + GenerateNamedObjects("lapack/getrs.c" "" "" 0 "" "" 0 3) + GenerateNamedObjects("lapack/getf2.c" "" "" 0 "" "" 0 3) + GenerateNamedObjects("lapack/potf2.c" "" "" 0 "" "" 0 3) + GenerateNamedObjects("lapack/laswp.c" "" "" 0 "" "" 0 3) + GenerateNamedObjects("lapack/lauu2.c" "" "" 0 "" "" 0 3) + GenerateNamedObjects("lapack/trti2.c" "" "" 0 "" "" 0 3) + endif() endif () if ( BUILD_COMPLEX AND NOT BUILD_SINGLE) From aa2a2d9c01357befb2d168d6833332b3dc50f008 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Tue, 8 Nov 2022 12:04:46 +0100 Subject: [PATCH 10/12] Conditionally compile files that may get replaced by ReLAPACK --- lapack/CMakeLists.txt | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/lapack/CMakeLists.txt b/lapack/CMakeLists.txt index fd4e57048..1d44e9490 100644 --- a/lapack/CMakeLists.txt +++ b/lapack/CMakeLists.txt @@ -39,8 +39,12 @@ set(UNIT_SOURCES2 trti2/trti2_L.c ) +if (NOT RELAPACK_REPLACE) GenerateNamedObjects("${LAPACK_SOURCES}") GenerateNamedObjects("${LAPACK_MANGLED_SOURCES}" "" "" false "" "" false 3) +else() +GenerateNamedObjects("${LAPACK_MANGLED_SOURCES}" "" "" false "" "" false 3) +endif() GenerateNamedObjects("laswp/generic/laswp_k_4.c" "" "laswp_plus" false "" "" false 3) GenerateNamedObjects("laswp/generic/laswp_k_4.c" "MINUS" "laswp_minus" false "" "" false 3) @@ -113,4 +117,3 @@ GenerateCombinationObjects("${UNIT_SOURCES}" "UNIT" "N" "" 4) GenerateCombinationObjects("${UNIT_SOURCES2}" "UNIT" "N" "" 0 "" "" 3) add_library(lapack OBJECT ${OPENBLAS_SRC}) - From 2e64722681cd94ec3f7c077ee3f96c5350ddc352 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Tue, 8 Nov 2022 16:20:17 +0100 Subject: [PATCH 11/12] Update Makefile.rule --- Makefile.rule | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile.rule b/Makefile.rule index 9665d951a..5e6cefc22 100644 --- a/Makefile.rule +++ b/Makefile.rule @@ -210,7 +210,7 @@ NO_AFFINITY = 1 # to the user space. If bigphysarea is enabled, it will use it. # DEVICEDRIVER_ALLOCATION = 1 -# If you need to synchronize FP CSR between threads (for x86/x86_64 only). +# If you need to synchronize FP CSR between threads (for x86/x86_64 and aarch64 only). # CONSISTENT_FPCSR = 1 # If any gemm argument m, n or k is less or equal this threshold, gemm will be execute From e6204d254f1ef1ca8524f7d82ceaf31cbe63c17b Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Tue, 8 Nov 2022 16:21:11 +0100 Subject: [PATCH 12/12] Update CMakeLists.txt --- interface/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/interface/CMakeLists.txt b/interface/CMakeLists.txt index 4e082928b..ce1434a90 100644 --- a/interface/CMakeLists.txt +++ b/interface/CMakeLists.txt @@ -53,7 +53,7 @@ set(BLAS2_COMPLEX_ONLY_MANGLED_SOURCES # these do not have separate 'z' sources set(BLAS3_SOURCES gemm.c symm.c - trsm.c syrk.c syr2k.c gemmt.c + trsm.c syrk.c syr2k.c ) set(BLAS3_MANGLED_SOURCES