diff --git a/CMakeLists.txt b/CMakeLists.txt index 0c92356e7..e830589e8 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -212,10 +212,10 @@ if(NOT NO_LAPACKE) add_library(LAPACKE OBJECT ${LAPACKE_SOURCES}) list(APPEND TARGET_OBJS "$") endif() -if(BUILD_RELAPACK) - add_library(RELAPACK OBJECT ${RELA_SOURCES}) - list(APPEND TARGET_OBJS "$") -endif() +#if(BUILD_RELAPACK) +# add_library(RELAPACK OBJECT ${RELA_SOURCES}) +# list(APPEND TARGET_OBJS "$") +#endif() set(OpenBLAS_LIBS "") if(BUILD_STATIC_LIBS) add_library(${OpenBLAS_LIBNAME}_static STATIC ${TARGET_OBJS} ${OpenBLAS_DEF_FILE}) diff --git a/Makefile.rule b/Makefile.rule index a0ad90a68..5e6cefc22 100644 --- a/Makefile.rule +++ b/Makefile.rule @@ -131,6 +131,9 @@ BUILD_LAPACK_DEPRECATED = 1 # Build RecursiveLAPACK on top of LAPACK # BUILD_RELAPACK = 1 +# Have RecursiveLAPACK actually replace standard LAPACK routines instead of +# just adding its equivalents with a RELAPACK_ prefix +# RELAPACK_REPLACE = 1 # If you want to use the legacy threaded Level 3 implementation. # USE_SIMPLE_THREADED_LEVEL3 = 1 diff --git a/Makefile.system b/Makefile.system index 10b952d4b..3c29ab3f3 100644 --- a/Makefile.system +++ b/Makefile.system @@ -9,6 +9,10 @@ ifndef TOPDIR TOPDIR = . endif +ifndef RELAPACK_REPLACE +RELAPACK_REPLACE=0 +endif + # we need to use the host system's architecture for getarch compile options even especially when cross-compiling HOSTARCH := $(shell uname -m) ifeq ($(HOSTARCH), amd64) diff --git a/interface/CMakeLists.txt b/interface/CMakeLists.txt index 0b2998237..ce1434a90 100644 --- a/interface/CMakeLists.txt +++ b/interface/CMakeLists.txt @@ -189,7 +189,16 @@ if (NOT DEFINED NO_LAPACK) ) GenerateNamedObjects("${LAPACK_SOURCES}") + if (NOT RELAPACK_REPLACE) GenerateNamedObjects("${LAPACK_MANGLED_SOURCES}" "" "" 0 "" "" 0 3) + else () + GenerateNamedObjects("lapack/getrs.c" "" "" 0 "" "" 0 3) + GenerateNamedObjects("lapack/getf2.c" "" "" 0 "" "" 0 3) + GenerateNamedObjects("lapack/potf2.c" "" "" 0 "" "" 0 3) + GenerateNamedObjects("lapack/laswp.c" "" "" 0 "" "" 0 3) + GenerateNamedObjects("lapack/lauu2.c" "" "" 0 "" "" 0 3) + GenerateNamedObjects("lapack/trti2.c" "" "" 0 "" "" 0 3) + endif() endif () if ( BUILD_COMPLEX AND NOT BUILD_SINGLE) diff --git a/lapack/CMakeLists.txt b/lapack/CMakeLists.txt index fd4e57048..1d44e9490 100644 --- a/lapack/CMakeLists.txt +++ b/lapack/CMakeLists.txt @@ -39,8 +39,12 @@ set(UNIT_SOURCES2 trti2/trti2_L.c ) +if (NOT RELAPACK_REPLACE) GenerateNamedObjects("${LAPACK_SOURCES}") GenerateNamedObjects("${LAPACK_MANGLED_SOURCES}" "" "" false "" "" false 3) +else() +GenerateNamedObjects("${LAPACK_MANGLED_SOURCES}" "" "" false "" "" false 3) +endif() GenerateNamedObjects("laswp/generic/laswp_k_4.c" "" "laswp_plus" false "" "" false 3) GenerateNamedObjects("laswp/generic/laswp_k_4.c" "MINUS" "laswp_minus" false "" "" false 3) @@ -113,4 +117,3 @@ GenerateCombinationObjects("${UNIT_SOURCES}" "UNIT" "N" "" 4) GenerateCombinationObjects("${UNIT_SOURCES2}" "UNIT" "N" "" 0 "" "" 3) add_library(lapack OBJECT ${OPENBLAS_SRC}) - diff --git a/relapack/Makefile b/relapack/Makefile index ddf101bd1..056a0ee48 100644 --- a/relapack/Makefile +++ b/relapack/Makefile @@ -1,53 +1,61 @@ TOPDIR = .. include $(TOPDIR)/Makefile.system - +ifeq ($(RELAPACK_REPLACE),0) +RELAPREFIX=RELAPACK_ +INCLALL=-DINCLUDE_ALL=0 +else +INCLALL=-DINCLUDE_ALL=1 +endif SRC = $(wildcard src/*.c) SRC1 = \ - src/slauum.c src/clauum.c src/dlauum.c src/zlauum.c \ - src/strtri.c src/dtrtri.c src/ctrtri.c src/ztrtri.c \ - src/spotrf.c src/dpotrf.c src/cpotrf.c src/zpotrf.c \ - src/sgetrf.c src/dgetrf.c src/cgetrf.c src/zgetrf.c + slauum.c clauum.c dlauum.c zlauum.c \ + strtri.c dtrtri.c ctrtri.c ztrtri.c \ + spotrf.c dpotrf.c cpotrf.c zpotrf.c \ + sgetrf.c dgetrf.c cgetrf.c zgetrf.c SRC2 = \ - src/cgbtrf.c src/cpbtrf.c src/dsytrf_rec2.c src/sgbtrf.c src/ssytrf_rook.c src/zhegst.c src/zsytrf_rec2.c \ - src/cgemmt.c src/dgbtrf.c src/dsytrf_rook.c src/sgemmt.c src/ssytrf_rook_rec2.c src/zhetrf.c src/zsytrf_rook.c \ - src/csytrf.c src/dgemmt.c src/dsytrf_rook_rec2.c src/stgsyl.c src/zhetrf_rec2.c src/zsytrf_rook_rec2.c \ - src/chegst.c src/csytrf_rec2.c src/dtgsyl.c src/strsyl.c src/zhetrf_rook.c src/ztgsyl.c \ - src/chetrf.c src/csytrf_rook.c src/dtrsyl.c src/spbtrf.c src/strsyl_rec2.c src/zhetrf_rook_rec2.c src/ztrsyl.c \ - src/chetrf_rec2.c src/csytrf_rook_rec2.c src/dpbtrf.c src/dtrsyl_rec2.c src/ztrsyl_rec2.c \ - src/chetrf_rook.c src/ctgsyl.c src/ssygst.c src/zgbtrf.c src/zpbtrf.c \ - src/chetrf_rook_rec2.c src/ctrsyl.c src/dsygst.c src/f2c.c src/ssytrf.c src/zgemmt.c \ - src/ctrsyl_rec2.c src/dsytrf.c src/lapack_wrappers.c src/ssytrf_rec2.c src/zsytrf.c + cgbtrf.c cpbtrf.c dsytrf_rec2.c sgbtrf.c ssytrf_rook.c zhegst.c zsytrf_rec2.c \ + cgemmt.c dgbtrf.c dsytrf_rook.c sgemmt.c ssytrf_rook_rec2.c zhetrf.c zsytrf_rook.c \ + csytrf.c dgemmt.c dsytrf_rook_rec2.c stgsyl.c zhetrf_rec2.c zsytrf_rook_rec2.c \ + chegst.c csytrf_rec2.c dtgsyl.c strsyl.c zhetrf_rook.c ztgsyl.c \ + chetrf.c csytrf_rook.c dtrsyl.c spbtrf.c strsyl_rec2.c zhetrf_rook_rec2.c ztrsyl.c \ + chetrf_rec2.c csytrf_rook_rec2.c dpbtrf.c dtrsyl_rec2.c ztrsyl_rec2.c \ + chetrf_rook.c ctgsyl.c ssygst.c zgbtrf.c zpbtrf.c \ + chetrf_rook_rec2.c ctrsyl.c dsygst.c f2c.c ssytrf.c zgemmt.c \ + ctrsyl_rec2.c dsytrf.c lapack_wrappers.c ssytrf_rec2.c zsytrf.c SRCX = \ - src/cgbtrf.c src/cpbtrf.c src/ctrtri.c src/dsytrf_rec2.c src/sgbtrf.c src/ssytrf_rook.c src/zhegst.c src/zsytrf_rec2.c \ - src/cgemmt.c src/cpotrf.c src/dgbtrf.c src/dsytrf_rook.c src/sgemmt.c src/ssytrf_rook_rec2.c src/zhetrf.c src/zsytrf_rook.c \ - src/cgetrf.c src/csytrf.c src/dgemmt.c src/dsytrf_rook_rec2.c src/sgetrf.c src/stgsyl.c src/zhetrf_rec2.c src/zsytrf_rook_rec2.c \ - src/chegst.c src/csytrf_rec2.c src/dgetrf.c src/dtgsyl.c src/slauum.c src/strsyl.c src/zhetrf_rook.c src/ztgsyl.c \ - src/chetrf.c src/csytrf_rook.c src/dlauum.c src/dtrsyl.c src/spbtrf.c src/strsyl_rec2.c src/zhetrf_rook_rec2.c src/ztrsyl.c \ - src/chetrf_rec2.c src/csytrf_rook_rec2.c src/dpbtrf.c src/dtrsyl_rec2.c src/spotrf.c src/strtri.c src/zlauum.c src/ztrsyl_rec2.c \ - src/chetrf_rook.c src/ctgsyl.c src/dpotrf.c src/dtrtri.c src/ssygst.c src/zgbtrf.c src/zpbtrf.c src/ztrtri.c \ - src/chetrf_rook_rec2.c src/ctrsyl.c src/dsygst.c src/f2c.c src/ssytrf.c src/zgemmt.c src/zpotrf.c \ - src/clauum.c src/ctrsyl_rec2.c src/dsytrf.c src/lapack_wrappers.c src/ssytrf_rec2.c src/zgetrf.c src/zsytrf.c + cgbtrf.c cpbtrf.c ctrtri.c dsytrf_rec2.c sgbtrf.c ssytrf_rook.c zhegst.c zsytrf_rec2.c \ + cgemmt.c cpotrf.c dgbtrf.c dsytrf_rook.c sgemmt.c ssytrf_rook_rec2.c zhetrf.c zsytrf_rook.c \ + cgetrf.c csytrf.c dgemmt.c dsytrf_rook_rec2.c sgetrf.c stgsyl.c zhetrf_rec2.c zsytrf_rook_rec2.c \ + chegst.c csytrf_rec2.c dgetrf.c dtgsyl.c slauum.c strsyl.c zhetrf_rook.c ztgsyl.c \ + chetrf.c csytrf_rook.c dlauum.c dtrsyl.c spbtrf.c strsyl_rec2.c zhetrf_rook_rec2.c ztrsyl.c \ + chetrf_rec2.c csytrf_rook_rec2.c dpbtrf.c dtrsyl_rec2.c spotrf.c strtri.c zlauum.c ztrsyl_rec2.c \ + chetrf_rook.c ctgsyl.c dpotrf.c dtrtri.c ssygst.c zgbtrf.c zpbtrf.c ztrtri.c \ + chetrf_rook_rec2.c ctrsyl.c dsygst.c f2c.c ssytrf.c zgemmt.c zpotrf.c \ + clauum.c ctrsyl_rec2.c dsytrf.c lapack_wrappers.c ssytrf_rec2.c zgetrf.c zsytrf.c -OBJS1 = $(SRC1:%.c=%.$(SUFFIX)) -OBJS2 = $(SRC2:%.c=%.o) + +OBJS1 = $(SRC1:%.c=src/$(RELAPREFIX)%.$(SUFFIX)) +OBJS2 = $(SRC2:%.c=src/$(RELAPREFIX)%.o) OBJS = $(OBJS1) $(OBJS2) TEST_SUITS = \ - slauum dlauum clauum zlauum \ - spotrf dpotrf cpotrf zpotrf \ - spbtrf dpbtrf cpbtrf zpbtrf \ - ssygst dsygst chegst zhegst \ - ssytrf dsytrf csytrf chetrf zsytrf zhetrf \ - sgetrf dgetrf cgetrf zgetrf \ - sgbtrf dgbtrf cgbtrf zgbtrf \ - strsyl dtrsyl ctrsyl ztrsyl \ - stgsyl dtgsyl ctgsyl ztgsyl \ sgemmt dgemmt cgemmt zgemmt + + # slauum dlauum clauum zlauum \ + # spotrf dpotrf cpotrf zpotrf \ + # spbtrf dpbtrf cpbtrf zpbtrf \ + # ssygst dsygst chegst zhegst \ + # ssytrf dsytrf csytrf chetrf zsytrf zhetrf \ + # sgetrf dgetrf cgetrf zgetrf \ + # sgbtrf dgbtrf cgbtrf zgbtrf \ + # strsyl dtrsyl ctrsyl ztrsyl \ + # stgsyl dtgsyl ctgsyl ztgsyl \ + TESTS = $(TEST_SUITS:%=test/%.pass) # dummies TEST_EXES = $(TEST_SUITS:%=test/%.x) @@ -63,11 +71,11 @@ libs: $(OBJS) $(AR) -r $(TOPDIR)/$(LIBNAME) $(OBJS) $(RANLIB) $(TOPDIR)/$(LIBNAME) -%.$(SUFFIX): %.c config.h - $(CC) $(CFLAGS) -c $< -o $@ +src/$(RELAPREFIX)%.$(SUFFIX): src/%.c relapack_config.h + $(CC) -v $(CFLAGS) -I. $(INCLALL) -c $< -o $@ -%.o: %.c config.h - $(CC) $(CFLAGS) -c $< -o $@ +src/$(RELAPREFIX)%.o: src/%.c relapack_config.h + $(CC) -v $(CFLAGS) -I. $(INCLALL) -c $< -o $@ # ReLAPACK testing diff --git a/relapack/config.h b/relapack/relapack_config.h similarity index 99% rename from relapack/config.h rename to relapack/relapack_config.h index 9d6919463..ba428a61b 100644 --- a/relapack/config.h +++ b/relapack/relapack_config.h @@ -45,7 +45,7 @@ // The following macros specify which routines are included in the library under // LAPACK's symbol names: 1 included, 0 not included -#define INCLUDE_ALL 1 +// #define INCLUDE_ALL 1 #define INCLUDE_XLAUUM INCLUDE_ALL #define INCLUDE_SLAUUM INCLUDE_XLAUUM @@ -115,7 +115,7 @@ #define INCLUDE_CTGSYL INCLUDE_XTGSYL #define INCLUDE_ZTGSYL INCLUDE_XTGSYL -#define INCLUDE_XGEMMT 1 +#define INCLUDE_XGEMMT INCLUDE_ALL #define INCLUDE_SGEMMT INCLUDE_XGEMMT #define INCLUDE_DGEMMT INCLUDE_XGEMMT #define INCLUDE_CGEMMT INCLUDE_XGEMMT diff --git a/relapack/src/CMakeLists.txt b/relapack/src/CMakeLists.txt index 2d861f54b..78fb1431f 100644 --- a/relapack/src/CMakeLists.txt +++ b/relapack/src/CMakeLists.txt @@ -1,5 +1,6 @@ include_directories(${PROJECT_SOURCE_DIR}) include_directories(${PROJECT_BINARY_DIR}) +include_directories(${PROJECT_SOURCE_DIR}/relapack) set(RELAFILES clauum.c diff --git a/relapack/src/ctrsyl_rec2.c b/relapack/src/ctrsyl_rec2.c index 556491c7a..674d73709 100644 --- a/relapack/src/ctrsyl_rec2.c +++ b/relapack/src/ctrsyl_rec2.c @@ -10,7 +10,7 @@ http://www.netlib.org/f2c/libf2c.zip */ -#include "../config.h" +#include "relapack_config.h" #include "f2c.h" #if BLAS_COMPLEX_FUNCTIONS_AS_ROUTINES diff --git a/relapack/src/relapack.h b/relapack/src/relapack.h index 38c5c30d0..44652a074 100644 --- a/relapack/src/relapack.h +++ b/relapack/src/relapack.h @@ -1,7 +1,7 @@ #ifndef RELAPACK_INT_H #define RELAPACK_INT_H #include -#include "../../config.h" +#include "config.h" #if defined(OS_WINDOWS) && defined(__64BIT__) typedef long long BLASLONG; typedef unsigned long long BLASULONG; @@ -9,7 +9,7 @@ typedef unsigned long long BLASULONG; typedef long BLASLONG; typedef unsigned long BLASULONG; #endif -#include "../config.h" +#include "relapack_config.h" #include "../inc/relapack.h" diff --git a/relapack/src/ztrsyl_rec2.c b/relapack/src/ztrsyl_rec2.c index edc6ffc6b..d07a4e8de 100644 --- a/relapack/src/ztrsyl_rec2.c +++ b/relapack/src/ztrsyl_rec2.c @@ -10,7 +10,7 @@ http://www.netlib.org/f2c/libf2c.zip */ -#include "../config.h" +#include "relapack_config.h" #include "f2c.h" #if BLAS_COMPLEX_FUNCTIONS_AS_ROUTINES