Add Elmar Peise's ReLAPACK
This commit is contained in:
parent
482015f8d6
commit
9b7b5f7fdc
|
@ -0,0 +1,22 @@
|
|||
The MIT License (MIT)
|
||||
|
||||
Copyright (c) 2016 Elmar Peise
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
|
|
@ -0,0 +1,64 @@
|
|||
TOPDIR = ..
|
||||
include $(TOPDIR)/Makefile.system
|
||||
|
||||
|
||||
|
||||
SRC = $(wildcard src/*.c)
|
||||
OBJS = $(SRC:%.c=%.o)
|
||||
|
||||
TEST_SUITS = \
|
||||
slauum dlauum clauum zlauum \
|
||||
spotrf dpotrf cpotrf zpotrf \
|
||||
spbtrf dpbtrf cpbtrf zpbtrf \
|
||||
ssygst dsygst chegst zhegst \
|
||||
ssytrf dsytrf csytrf chetrf zsytrf zhetrf \
|
||||
sgetrf dgetrf cgetrf zgetrf \
|
||||
sgbtrf dgbtrf cgbtrf zgbtrf \
|
||||
strsyl dtrsyl ctrsyl ztrsyl \
|
||||
stgsyl dtgsyl ctgsyl ztgsyl \
|
||||
sgemmt dgemmt cgemmt zgemmt
|
||||
TESTS = $(TEST_SUITS:%=test/%.pass) # dummies
|
||||
TEST_EXES = $(TEST_SUITS:%=test/%.x)
|
||||
|
||||
LINK_TEST = -L$(TOPDIR) -lopenblas -lgfortran -lm
|
||||
|
||||
.SECONDARY: $(TEST_EXES)
|
||||
.PHONY: test
|
||||
|
||||
# ReLAPACK compilation
|
||||
|
||||
libs: $(OBJS)
|
||||
@echo "Building ReLAPACK library $(LIBNAME)"
|
||||
$(AR) -r $(TOPDIR)/$(LIBNAME) $(OBJS)
|
||||
$(RANLIB) $(TOPDIR)/$(LIBNAME)
|
||||
|
||||
%.o: %.c config.h
|
||||
$(CC) $(CFLAGS) -c $< -o $@
|
||||
|
||||
|
||||
# ReLAPACK testing
|
||||
|
||||
test: $(TEST_EXES) $(TESTS)
|
||||
@echo "passed all tests"
|
||||
|
||||
test/%.pass: test/%.x
|
||||
@echo -n $*:
|
||||
@./$< > /dev/null && echo " pass" || (echo " FAIL" && ./$<)
|
||||
|
||||
test/s%.x: test/x%.c test/util.o $(TOPDIR)/$(LIBNAME) test/config.h test/test.h
|
||||
$(CC) $(CFLAGS) -DDT_PREFIX=s $< test/util.o -o $@ $(LINK_TEST) $(TOPDIR)/$(LIBNAME) $(LINK_TEST)
|
||||
|
||||
test/d%.x: test/x%.c test/util.o $(TOPDIR)/$(LIBNAME) test/config.h test/test.h
|
||||
$(CC) $(CFLAGS) -DDT_PREFIX=d $< test/util.o -o $@ $(LINK_TEST) $(TOPDIR)/$(LIBNAME) $(LINK_TEST)
|
||||
|
||||
test/c%.x: test/x%.c test/util.o $(TOPDIR)/$(LIBNAME) test/config.h test/test.h
|
||||
$(CC) $(CFLAGS) -DDT_PREFIX=c $< test/util.o -o $@ $(LINK_TEST) $(TOPDIR)/$(LIBNAME) $(LINK_TEST)
|
||||
|
||||
test/z%.x: test/x%.c test/util.o $(TOPDIR)/$(LIBNAME) test/config.h test/test.h
|
||||
$(CC) $(CFLAGS) -DDT_PREFIX=z $< test/util.o -o $@ $(LINK_TEST) $(TOPDIR)/$(LIBNAME) $(LINK_TEST)
|
||||
|
||||
|
||||
# cleaning up
|
||||
|
||||
clean:
|
||||
rm -f $(OBJS) test/util.o test/*.x
|
|
@ -0,0 +1,68 @@
|
|||
ReLAPACK
|
||||
========
|
||||
|
||||
[](https://travis-ci.org/HPAC/ReLAPACK)
|
||||
|
||||
[Recursive LAPACK Collection](https://github.com/HPAC/ReLAPACK)
|
||||
|
||||
ReLAPACK offers a collection of recursive algorithms for many of LAPACK's
|
||||
compute kernels. Since it preserves LAPACK's established interfaces, ReLAPACK
|
||||
integrates effortlessly into existing application codes. ReLAPACK's routines
|
||||
not only outperform the reference LAPACK but also improve upon the performance
|
||||
of tuned implementations, such as OpenBLAS and MKL.
|
||||
|
||||
|
||||
Coverage
|
||||
--------
|
||||
For a detailed list of covered operations and an overview of operations to which
|
||||
recursion is not efficiently applicable, see [coverage.md](coverage.md).
|
||||
|
||||
|
||||
Installation
|
||||
------------
|
||||
To compile with the default configuration, simply run `make` to create the
|
||||
library `librelapack.a`.
|
||||
|
||||
### Linking with MKL
|
||||
Note that to link with MKL, you currently need to set the flag
|
||||
`COMPLEX_FUNCTIONS_AS_ROUTINES` to `1` to avoid problems in `ctrsyl` and
|
||||
`ztrsyl`. For further configuration options see [config.md](config.md).
|
||||
|
||||
|
||||
### Dependencies
|
||||
ReLAPACK builds on top of [BLAS](http://www.netlib.org/blas/) and unblocked
|
||||
kernels from [LAPACK](http://www.netlib.org/lapack/). There are many optimized
|
||||
and machine specific implementations of these libraries, which are commonly
|
||||
provided by hardware vendors or available as open source (e.g.,
|
||||
[OpenBLAS](http://www.openblas.net/)).
|
||||
|
||||
|
||||
Testing
|
||||
-------
|
||||
ReLAPACK's test suite compares its routines numerically with LAPACK's
|
||||
counterparts. To set up the tests (located int `test/`) you need to specify
|
||||
link flags for BLAS and LAPACK (version 3.5.0 or newer) in `make.inc`; then
|
||||
`make test` runs the tests. For details on the performed tests, see
|
||||
[test/README.md](test/README.md).
|
||||
|
||||
|
||||
Examples
|
||||
--------
|
||||
Since ReLAPACK replaces parts of LAPACK, any LAPACK example involving the
|
||||
covered routines applies directly to ReLAPACK. A few separate examples are
|
||||
given in `examples/`. For details, see [examples/README.md](examples/README.md).
|
||||
|
||||
|
||||
Citing
|
||||
------
|
||||
When referencing ReLAPACK, please cite the preprint of the paper
|
||||
[Recursive Algorithms for Dense Linear Algebra: The ReLAPACK Collection](http://arxiv.org/abs/1602.06763):
|
||||
|
||||
@article{relapack,
|
||||
author = {Elmar Peise and Paolo Bientinesi},
|
||||
title = {Recursive Algorithms for Dense Linear Algebra: The ReLAPACK Collection},
|
||||
journal = {CoRR},
|
||||
volume = {abs/1602.06763},
|
||||
year = {2016},
|
||||
url = {http://arxiv.org/abs/1602.06763},
|
||||
}
|
|
@ -0,0 +1,208 @@
|
|||
#ifndef RELAPACK_CONFIG_H
|
||||
#define RELAPACK_CONFIG_H
|
||||
|
||||
// ReLAPACK configuration file.
|
||||
// See also config.md
|
||||
|
||||
|
||||
///////////////////////////////
|
||||
// BLAS/LAPACK obect symbols //
|
||||
///////////////////////////////
|
||||
|
||||
// BLAS routines linked against have a trailing underscore
|
||||
#define BLAS_UNDERSCORE 1
|
||||
// LAPACK routines linked against have a trailing underscore
|
||||
#define LAPACK_UNDERSCORE BLAS_UNDERSCORE
|
||||
|
||||
// Complex BLAS/LAPACK routines return their result in the first argument
|
||||
// This option must be enabled when linking to MKL for ctrsyl and ztrsyl to
|
||||
// work.
|
||||
#define COMPLEX_FUNCTIONS_AS_ROUTINES 0
|
||||
#ifdef F_INTERFACE_INTEL
|
||||
#define COMPLEX_FUNCTIONS_AS_ROUTINES 1
|
||||
#endif
|
||||
#define BLAS_COMPLEX_FUNCTIONS_AS_ROUTINES COMPLEX_FUNCTIONS_AS_ROUTINES
|
||||
#define LAPACK_BLAS_COMPLEX_FUNCTIONS_AS_ROUTINES COMPLEX_FUNCTIONS_AS_ROUTINES
|
||||
|
||||
// The BLAS-like extension xgemmt is provided by an external library.
|
||||
#define HAVE_XGEMMT 0
|
||||
|
||||
|
||||
////////////////////////////
|
||||
// Use malloc in ReLAPACK //
|
||||
////////////////////////////
|
||||
|
||||
#define ALLOW_MALLOC 1
|
||||
// allow malloc in xsygst for improved performance
|
||||
#define XSYGST_ALLOW_MALLOC ALLOW_MALLOC
|
||||
// allow malloc in xsytrf if the passed work buffer is too small
|
||||
#define XSYTRF_ALLOW_MALLOC ALLOW_MALLOC
|
||||
|
||||
|
||||
////////////////////////////////
|
||||
// LAPACK routine replacement //
|
||||
////////////////////////////////
|
||||
// The following macros specify which routines are included in the library under
|
||||
// LAPACK's symbol names: 1 included, 0 not included
|
||||
|
||||
#define INCLUDE_ALL 1
|
||||
|
||||
#define INCLUDE_XLAUUM INCLUDE_ALL
|
||||
#define INCLUDE_SLAUUM INCLUDE_XLAUUM
|
||||
#define INCLUDE_DLAUUM INCLUDE_XLAUUM
|
||||
#define INCLUDE_CLAUUM INCLUDE_XLAUUM
|
||||
#define INCLUDE_ZLAUUM INCLUDE_XLAUUM
|
||||
|
||||
#define INCLUDE_XSYGST INCLUDE_ALL
|
||||
#define INCLUDE_SSYGST INCLUDE_XSYGST
|
||||
#define INCLUDE_DSYGST INCLUDE_XSYGST
|
||||
#define INCLUDE_CHEGST INCLUDE_XSYGST
|
||||
#define INCLUDE_ZHEGST INCLUDE_XSYGST
|
||||
|
||||
#define INCLUDE_XTRTRI INCLUDE_ALL
|
||||
#define INCLUDE_STRTRI INCLUDE_XTRTRI
|
||||
#define INCLUDE_DTRTRI INCLUDE_XTRTRI
|
||||
#define INCLUDE_CTRTRI INCLUDE_XTRTRI
|
||||
#define INCLUDE_ZTRTRI INCLUDE_XTRTRI
|
||||
|
||||
#define INCLUDE_XPOTRF INCLUDE_ALL
|
||||
#define INCLUDE_SPOTRF INCLUDE_XPOTRF
|
||||
#define INCLUDE_DPOTRF INCLUDE_XPOTRF
|
||||
#define INCLUDE_CPOTRF INCLUDE_XPOTRF
|
||||
#define INCLUDE_ZPOTRF INCLUDE_XPOTRF
|
||||
|
||||
#define INCLUDE_XPBTRF INCLUDE_ALL
|
||||
#define INCLUDE_SPBTRF INCLUDE_XPBTRF
|
||||
#define INCLUDE_DPBTRF INCLUDE_XPBTRF
|
||||
#define INCLUDE_CPBTRF INCLUDE_XPBTRF
|
||||
#define INCLUDE_ZPBTRF INCLUDE_XPBTRF
|
||||
|
||||
#define INCLUDE_XSYTRF INCLUDE_ALL
|
||||
#define INCLUDE_SSYTRF INCLUDE_XSYTRF
|
||||
#define INCLUDE_DSYTRF INCLUDE_XSYTRF
|
||||
#define INCLUDE_CSYTRF INCLUDE_XSYTRF
|
||||
#define INCLUDE_CHETRF INCLUDE_XSYTRF
|
||||
#define INCLUDE_ZSYTRF INCLUDE_XSYTRF
|
||||
#define INCLUDE_ZHETRF INCLUDE_XSYTRF
|
||||
#define INCLUDE_SSYTRF_ROOK INCLUDE_SSYTRF
|
||||
#define INCLUDE_DSYTRF_ROOK INCLUDE_DSYTRF
|
||||
#define INCLUDE_CSYTRF_ROOK INCLUDE_CSYTRF
|
||||
#define INCLUDE_CHETRF_ROOK INCLUDE_CHETRF
|
||||
#define INCLUDE_ZSYTRF_ROOK INCLUDE_ZSYTRF
|
||||
#define INCLUDE_ZHETRF_ROOK INCLUDE_ZHETRF
|
||||
|
||||
#define INCLUDE_XGETRF INCLUDE_ALL
|
||||
#define INCLUDE_SGETRF INCLUDE_XGETRF
|
||||
#define INCLUDE_DGETRF INCLUDE_XGETRF
|
||||
#define INCLUDE_CGETRF INCLUDE_XGETRF
|
||||
#define INCLUDE_ZGETRF INCLUDE_XGETRF
|
||||
|
||||
#define INCLUDE_XGBTRF INCLUDE_ALL
|
||||
#define INCLUDE_SGBTRF INCLUDE_XGBTRF
|
||||
#define INCLUDE_DGBTRF INCLUDE_XGBTRF
|
||||
#define INCLUDE_CGBTRF INCLUDE_XGBTRF
|
||||
#define INCLUDE_ZGBTRF INCLUDE_XGBTRF
|
||||
|
||||
#define INCLUDE_XTRSYL INCLUDE_ALL
|
||||
#define INCLUDE_STRSYL INCLUDE_XTRSYL
|
||||
#define INCLUDE_DTRSYL INCLUDE_XTRSYL
|
||||
#define INCLUDE_CTRSYL INCLUDE_XTRSYL
|
||||
#define INCLUDE_ZTRSYL INCLUDE_XTRSYL
|
||||
|
||||
#define INCLUDE_XTGSYL INCLUDE_ALL
|
||||
#define INCLUDE_STGSYL INCLUDE_XTGSYL
|
||||
#define INCLUDE_DTGSYL INCLUDE_XTGSYL
|
||||
#define INCLUDE_CTGSYL INCLUDE_XTGSYL
|
||||
#define INCLUDE_ZTGSYL INCLUDE_XTGSYL
|
||||
|
||||
#define INCLUDE_XGEMMT 0
|
||||
#define INCLUDE_SGEMMT INCLUDE_XGEMMT
|
||||
#define INCLUDE_DGEMMT INCLUDE_XGEMMT
|
||||
#define INCLUDE_CGEMMT INCLUDE_XGEMMT
|
||||
#define INCLUDE_ZGEMMT INCLUDE_XGEMMT
|
||||
|
||||
|
||||
/////////////////////
|
||||
// crossover sizes //
|
||||
/////////////////////
|
||||
|
||||
// default crossover size
|
||||
#define CROSSOVER 24
|
||||
|
||||
// individual crossover sizes
|
||||
#define CROSSOVER_XLAUUM CROSSOVER
|
||||
#define CROSSOVER_SLAUUM CROSSOVER_XLAUUM
|
||||
#define CROSSOVER_DLAUUM CROSSOVER_XLAUUM
|
||||
#define CROSSOVER_CLAUUM CROSSOVER_XLAUUM
|
||||
#define CROSSOVER_ZLAUUM CROSSOVER_XLAUUM
|
||||
|
||||
#define CROSSOVER_XSYGST CROSSOVER
|
||||
#define CROSSOVER_SSYGST CROSSOVER_XSYGST
|
||||
#define CROSSOVER_DSYGST CROSSOVER_XSYGST
|
||||
#define CROSSOVER_CHEGST CROSSOVER_XSYGST
|
||||
#define CROSSOVER_ZHEGST CROSSOVER_XSYGST
|
||||
|
||||
#define CROSSOVER_XTRTRI CROSSOVER
|
||||
#define CROSSOVER_STRTRI CROSSOVER_XTRTRI
|
||||
#define CROSSOVER_DTRTRI CROSSOVER_XTRTRI
|
||||
#define CROSSOVER_CTRTRI CROSSOVER_XTRTRI
|
||||
#define CROSSOVER_ZTRTRI CROSSOVER_XTRTRI
|
||||
|
||||
#define CROSSOVER_XPOTRF CROSSOVER
|
||||
#define CROSSOVER_SPOTRF CROSSOVER_XPOTRF
|
||||
#define CROSSOVER_DPOTRF CROSSOVER_XPOTRF
|
||||
#define CROSSOVER_CPOTRF CROSSOVER_XPOTRF
|
||||
#define CROSSOVER_ZPOTRF CROSSOVER_XPOTRF
|
||||
|
||||
#define CROSSOVER_XPBTRF CROSSOVER
|
||||
#define CROSSOVER_SPBTRF CROSSOVER_XPBTRF
|
||||
#define CROSSOVER_DPBTRF CROSSOVER_XPBTRF
|
||||
#define CROSSOVER_CPBTRF CROSSOVER_XPBTRF
|
||||
#define CROSSOVER_ZPBTRF CROSSOVER_XPBTRF
|
||||
|
||||
#define CROSSOVER_XSYTRF CROSSOVER
|
||||
#define CROSSOVER_SSYTRF CROSSOVER_XSYTRF
|
||||
#define CROSSOVER_DSYTRF CROSSOVER_XSYTRF
|
||||
#define CROSSOVER_CSYTRF CROSSOVER_XSYTRF
|
||||
#define CROSSOVER_CHETRF CROSSOVER_XSYTRF
|
||||
#define CROSSOVER_ZSYTRF CROSSOVER_XSYTRF
|
||||
#define CROSSOVER_ZHETRF CROSSOVER_XSYTRF
|
||||
#define CROSSOVER_SSYTRF_ROOK CROSSOVER_SSYTRF
|
||||
#define CROSSOVER_DSYTRF_ROOK CROSSOVER_DSYTRF
|
||||
#define CROSSOVER_CSYTRF_ROOK CROSSOVER_CSYTRF
|
||||
#define CROSSOVER_CHETRF_ROOK CROSSOVER_CHETRF
|
||||
#define CROSSOVER_ZSYTRF_ROOK CROSSOVER_ZSYTRF
|
||||
#define CROSSOVER_ZHETRF_ROOK CROSSOVER_ZHETRF
|
||||
|
||||
#define CROSSOVER_XGETRF CROSSOVER
|
||||
#define CROSSOVER_SGETRF CROSSOVER_XGETRF
|
||||
#define CROSSOVER_DGETRF CROSSOVER_XGETRF
|
||||
#define CROSSOVER_CGETRF CROSSOVER_XGETRF
|
||||
#define CROSSOVER_ZGETRF CROSSOVER_XGETRF
|
||||
|
||||
#define CROSSOVER_XGBTRF CROSSOVER
|
||||
#define CROSSOVER_SGBTRF CROSSOVER_XGBTRF
|
||||
#define CROSSOVER_DGBTRF CROSSOVER_XGBTRF
|
||||
#define CROSSOVER_CGBTRF CROSSOVER_XGBTRF
|
||||
#define CROSSOVER_ZGBTRF CROSSOVER_XGBTRF
|
||||
|
||||
#define CROSSOVER_XTRSYL CROSSOVER
|
||||
#define CROSSOVER_STRSYL CROSSOVER_XTRSYL
|
||||
#define CROSSOVER_DTRSYL CROSSOVER_XTRSYL
|
||||
#define CROSSOVER_CTRSYL CROSSOVER_XTRSYL
|
||||
#define CROSSOVER_ZTRSYL CROSSOVER_XTRSYL
|
||||
|
||||
#define CROSSOVER_XTGSYL CROSSOVER
|
||||
#define CROSSOVER_STGSYL CROSSOVER_XTGSYL
|
||||
#define CROSSOVER_DTGSYL CROSSOVER_XTGSYL
|
||||
#define CROSSOVER_CTGSYL CROSSOVER_XTGSYL
|
||||
#define CROSSOVER_ZTGSYL CROSSOVER_XTGSYL
|
||||
|
||||
// sytrf helper routine
|
||||
#define CROSSOVER_XGEMMT CROSSOVER_XSYTRF
|
||||
#define CROSSOVER_SGEMMT CROSSOVER_XGEMMT
|
||||
#define CROSSOVER_DGEMMT CROSSOVER_XGEMMT
|
||||
#define CROSSOVER_CGEMMT CROSSOVER_XGEMMT
|
||||
#define CROSSOVER_ZGEMMT CROSSOVER_XGEMMT
|
||||
|
||||
#endif /* RELAPACK_CONFIG_H */
|
|
@ -0,0 +1,87 @@
|
|||
RELAPACK Configuration
|
||||
======================
|
||||
|
||||
ReLAPACK has two configuration files: `make.inc`, which is included by the
|
||||
Makefile, and `config.h` which is included in the source files.
|
||||
|
||||
|
||||
Build and Testing Environment
|
||||
-----------------------------
|
||||
The build environment (compiler and flags) and the test configuration (linker
|
||||
flags for BLAS and LAPACK) are specified in `make.inc`. The test matrix size
|
||||
and error bounds are defined in `test/config.h`.
|
||||
|
||||
The library `librelapack.a` is compiled by invoking `make`. The tests are
|
||||
performed by either `make test` or calling `make` in the test folder.
|
||||
|
||||
|
||||
BLAS/LAPACK complex function interfaces
|
||||
---------------------------------------
|
||||
For BLAS and LAPACK functions that return a complex number, there exist two
|
||||
conflicting (FORTRAN compiler dependent) calling conventions: either the result
|
||||
is returned as a `struct` of two floating point numbers or an additional first
|
||||
argument with a pointer to such a `struct` is used. By default ReLAPACK uses
|
||||
the former (which is what gfortran uses), but it can switch to the latter by
|
||||
setting `COMPLEX_FUNCTIONS_AS_ROUTINES` (or explicitly the BLAS and LAPACK
|
||||
specific counterparts) to `1` in `config.h`.
|
||||
|
||||
**For MKL, `COMPLEX_FUNCTIONS_AS_ROUTINES` must be set to `1`.**
|
||||
|
||||
(Using the wrong convention will break `ctrsyl` and `ztrsyl` and the test cases
|
||||
will segfault or return errors on the order of 1 or larger.)
|
||||
|
||||
|
||||
BLAS extension `xgemmt`
|
||||
-----------------------
|
||||
The LDL decompositions require a general matrix-matrix product that updates only
|
||||
a triangular matrix called `xgemmt`. If the BLAS implementation linked against
|
||||
provides such a routine, set the flag `HAVE_XGEMMT` to `1` in `config.h`;
|
||||
otherwise, ReLAPACK uses its own recursive implementation of these kernels.
|
||||
|
||||
`xgemmt` is provided by MKL.
|
||||
|
||||
|
||||
Routine Selection
|
||||
-----------------
|
||||
ReLAPACK's routines are named `RELAPACK_X` (e.g., `RELAPACK_dgetrf`). If the
|
||||
corresponding `INCLUDE_X` flag in `config.h` (e.g., `INCLUDE_DGETRF`) is set to
|
||||
`1`, ReLAPACK additionally provides a wrapper under the LAPACK name (e.g.,
|
||||
`dgetrf_`). By default, wrappers for all routines are enabled.
|
||||
|
||||
|
||||
Crossover Size
|
||||
--------------
|
||||
The crossover size determines below which matrix sizes ReLAPACK's recursive
|
||||
algorithms switch to LAPACK's unblocked routines to avoid tiny BLAS Level 3
|
||||
routines. The crossover size is set in `config.h` and can be chosen either
|
||||
globally for the entire library, by operation, or individually by routine.
|
||||
|
||||
|
||||
Allowing Temporary Buffers
|
||||
--------------------------
|
||||
Two of ReLAPACK's routines make use of temporary buffers, which are allocated
|
||||
and freed within ReLAPACK. Setting `ALLOW_MALLOC` (or one of the routine
|
||||
specific counterparts) to 0 in `config.h` will disable these buffers. The
|
||||
affected routines are:
|
||||
|
||||
* `xsytrf`: The LDL decomposition requires a buffer of size n^2 / 2. As in
|
||||
LAPACK, this size can be queried by setting `lWork = -1` and the passed
|
||||
buffer will be used if it is large enough; only if it is not, a local buffer
|
||||
will be allocated.
|
||||
|
||||
The advantage of this mechanism is that ReLAPACK will seamlessly work even
|
||||
with codes that statically provide too little memory instead of breaking
|
||||
them.
|
||||
|
||||
* `xsygst`: The reduction of a real symmetric-definite generalized eigenproblem
|
||||
to standard form can use an auxiliary buffer of size n^2 / 2 to avoid
|
||||
redundant computations. It thereby performs about 30% less FLOPs than
|
||||
LAPACK.
|
||||
|
||||
|
||||
FORTRAN symbol names
|
||||
--------------------
|
||||
ReLAPACK is commonly linked to BLAS and LAPACK with standard FORTRAN interfaces.
|
||||
Since these libraries usually have an underscore to their symbol names, ReLAPACK
|
||||
has configuration switches in `config.h` to adjust the corresponding routine
|
||||
names.
|
|
@ -0,0 +1,212 @@
|
|||
Coverage of ReLAPACK
|
||||
====================
|
||||
|
||||
This file lists all LAPACK compute routines that are covered by recursive
|
||||
algorithms in ReLAPACK, it also lists all of LAPACK's blocked algorithms which
|
||||
are not (yet) part of ReLAPACK.
|
||||
|
||||
<!-- START doctoc generated TOC please keep comment here to allow auto update -->
|
||||
<!-- DON'T EDIT THIS SECTION, INSTEAD RE-RUN doctoc TO UPDATE -->
|
||||
**Table of Contents** *generated with [DocToc](https://github.com/thlorenz/doctoc)*
|
||||
|
||||
- [List of covered LAPACK routines](#list-of-covered-lapack-routines)
|
||||
- [`xlauum`](#xlauum)
|
||||
- [`xsygst`](#xsygst)
|
||||
- [`xtrtri`](#xtrtri)
|
||||
- [`xpotrf`](#xpotrf)
|
||||
- [`xpbtrf`](#xpbtrf)
|
||||
- [`xsytrf`](#xsytrf)
|
||||
- [`xgetrf`](#xgetrf)
|
||||
- [`xgbtrf`](#xgbtrf)
|
||||
- [`xtrsyl`](#xtrsyl)
|
||||
- [`xtgsyl`](#xtgsyl)
|
||||
- [Covered BLAS extension](#covered-blas-extension)
|
||||
- [`xgemmt`](#xgemmt)
|
||||
- [Not covered yet](#not-covered-yet)
|
||||
- [`xpstrf`](#xpstrf)
|
||||
- [Not covered: extra FLOPs](#not-covered-extra-flops)
|
||||
- [QR decomposition (and related)](#qr-decomposition-and-related)
|
||||
- [Symmetric reduction to tridiagonal](#symmetric-reduction-to-tridiagonal)
|
||||
- [Symmetric reduction to bidiagonal](#symmetric-reduction-to-bidiagonal)
|
||||
- [Reduction to upper Hessenberg](#reduction-to-upper-hessenberg)
|
||||
|
||||
<!-- END doctoc generated TOC please keep comment here to allow auto update -->
|
||||
|
||||
|
||||
List of covered LAPACK routines
|
||||
-------------------------------
|
||||
|
||||
### `xlauum`
|
||||
Multiplication of a triangular matrix with its (complex conjugate) transpose,
|
||||
resulting in a symmetric (Hermitian) matrix.
|
||||
|
||||
Routines: `slauum`, `dlauum`, `clauum`, `zlauum`
|
||||
|
||||
Operations:
|
||||
* A = L^T L
|
||||
* A = U U^T
|
||||
|
||||
### `xsygst`
|
||||
Simultaneous two-sided multiplication of a symmetric matrix with a triangular
|
||||
matrix and its transpose
|
||||
|
||||
Routines: `ssygst`, `dsygst`, `chegst`, `zhegst`
|
||||
|
||||
Operations:
|
||||
* A = inv(L) A inv(L^T)
|
||||
* A = inv(U^T) A inv(U)
|
||||
* A = L^T A L
|
||||
* A = U A U^T
|
||||
|
||||
### `xtrtri`
|
||||
Inversion of a triangular matrix
|
||||
|
||||
Routines: `strtri`, `dtrtri`, `ctrtri`, `ztrtri`
|
||||
|
||||
Operations:
|
||||
* L = inv(L)
|
||||
* U = inv(U)
|
||||
|
||||
### `xpotrf`
|
||||
Cholesky decomposition of a symmetric (Hermitian) positive definite matrix
|
||||
|
||||
Routines: `spotrf`, `dpotrf`, `cpotrf`, `zpotrf`
|
||||
|
||||
Operations:
|
||||
* L L^T = A
|
||||
* U^T U = A
|
||||
|
||||
### `xpbtrf`
|
||||
Cholesky decomposition of a banded symmetric (Hermitian) positive definite matrix
|
||||
|
||||
Routines: `spbtrf`, `dpbtrf`, `cpbtrf`, `zpbtrf`
|
||||
|
||||
Operations:
|
||||
* L L^T = A
|
||||
* U^T U = A
|
||||
|
||||
### `xsytrf`
|
||||
LDL decomposition of a symmetric (or Hermitian) matrix
|
||||
|
||||
Routines:
|
||||
* `ssytrf`, `dsytrf`, `csytrf`, `chetrf`, `zsytrf`, `zhetrf`,
|
||||
* `ssytrf_rook`, `dsytrf_rook`, `csytrf_rook`, `chetrf_rook`, `zsytrf_rook`,
|
||||
`zhetrf_rook`
|
||||
|
||||
Operations:
|
||||
* L D L^T = A
|
||||
* U^T D U = A
|
||||
|
||||
### `xgetrf`
|
||||
LU decomposition of a general matrix with pivoting
|
||||
|
||||
Routines: `sgetrf`, `dgetrf`, `cgetrf`, `zgetrf`
|
||||
|
||||
Operation: P L U = A
|
||||
|
||||
### `xgbtrf`
|
||||
LU decomposition of a general banded matrix with pivoting
|
||||
|
||||
Routines: `sgbtrf`, `dgbtrf`, `cgbtrf`, `zgbtrf`
|
||||
|
||||
Operation: L U = A
|
||||
|
||||
### `xtrsyl`
|
||||
Solution of the quasi-triangular Sylvester equation
|
||||
|
||||
Routines: `strsyl`, `dtrsyl`, `ctrsyl`, `ztrsyl`
|
||||
|
||||
Operations:
|
||||
* A X + B Y = C -> X
|
||||
* A^T X + B Y = C -> X
|
||||
* A X + B^T Y = C -> X
|
||||
* A^T X + B^T Y = C -> X
|
||||
* A X - B Y = C -> X
|
||||
* A^T X - B Y = C -> X
|
||||
* A X - B^T Y = C -> X
|
||||
* A^T X - B^T Y = C -> X
|
||||
|
||||
### `xtgsyl`
|
||||
Solution of the generalized Sylvester equations
|
||||
|
||||
Routines: `stgsyl`, `dtgsyl`, `ctgsyl`, `ztgsyl`
|
||||
|
||||
Operations:
|
||||
* A R - L B = C, D R - L E = F -> L, R
|
||||
* A^T R + D^T L = C, R B^T - L E^T = -F -> L, R
|
||||
|
||||
|
||||
Covered BLAS extension
|
||||
----------------------
|
||||
|
||||
### `xgemmt`
|
||||
Matrix-matrix product updating only a triangular part of the result
|
||||
|
||||
Routines: `sgemmt`, `dgemmt`, `cgemmt`, `zgemmt`
|
||||
|
||||
Operations:
|
||||
* C = alpha A B + beta C
|
||||
* C = alpha A B^T + beta C
|
||||
* C = alpha A^T B + beta C
|
||||
* C = alpha A^T B^T + beta C
|
||||
|
||||
|
||||
Not covered yet
|
||||
---------------
|
||||
The following operation is implemented as a blocked algorithm in LAPACK but
|
||||
currently not yet covered in ReLAPACK as a recursive algorithm
|
||||
|
||||
### `xpstrf`
|
||||
Cholesky decomposition of a positive semi-definite matrix with complete pivoting.
|
||||
|
||||
Routines: `spstrf`, `dpstrf`, `cpstrf`, `zpstrf`
|
||||
|
||||
Operations:
|
||||
* P L L^T P^T = A
|
||||
* P U^T U P^T = A
|
||||
|
||||
|
||||
Not covered: extra FLOPs
|
||||
------------------------
|
||||
The following routines are not covered because recursive variants would require
|
||||
considerably more FLOPs or operate on banded matrices.
|
||||
|
||||
### QR decomposition (and related)
|
||||
Routines:
|
||||
* `sgeqrf`, `dgeqrf`, `cgeqrf`, `zgeqrf`
|
||||
* `sgerqf`, `dgerqf`, `cgerqf`, `zgerqf`
|
||||
* `sgeqlf`, `dgeqlf`, `cgeqlf`, `zgeqlf`
|
||||
* `sgelqf`, `dgelqf`, `cgelqf`, `zgelqf`
|
||||
* `stzrzf`, `dtzrzf`, `ctzrzf`, `ztzrzf`
|
||||
|
||||
Operations: Q R = A, R Q = A, Q L = A, L Q = A, R Z = A
|
||||
|
||||
Routines for multiplication with Q:
|
||||
* `sormqr`, `dormqr`, `cunmqr`, `zunmqr`
|
||||
* `sormrq`, `dormrq`, `cunmrq`, `zunmrq`
|
||||
* `sormql`, `dormql`, `cunmql`, `zunmql`
|
||||
* `sormlq`, `dormlq`, `cunmlq`, `zunmlq`
|
||||
* `sormrz`, `dormrz`, `cunmrz`, `zunmrz`
|
||||
|
||||
Operations: C = Q C, C = C Q, C = Q^T C, C = C Q^T
|
||||
|
||||
Routines for construction of Q:
|
||||
* `sorgqr`, `dorgqr`, `cungqr`, `zungqr`
|
||||
* `sorgrq`, `dorgrq`, `cungrq`, `zungrq`
|
||||
* `sorgql`, `dorgql`, `cungql`, `zungql`
|
||||
* `sorglq`, `dorglq`, `cunglq`, `zunglq`
|
||||
|
||||
### Symmetric reduction to tridiagonal
|
||||
Routines: `ssytrd`, `dsytrd`, `csytrd`, `zsytrd`
|
||||
|
||||
Operation: Q T Q^T = A
|
||||
|
||||
### Symmetric reduction to bidiagonal
|
||||
Routines: `ssybrd`, `dsybrd`, `csybrd`, `zsybrd`
|
||||
|
||||
Operation: Q T P^T = A
|
||||
|
||||
### Reduction to upper Hessenberg
|
||||
Routines: `sgehrd`, `dgehrd`, `cgehrd`, `zgehrd`
|
||||
|
||||
Operation: Q H Q^T = A
|
|
@ -0,0 +1,67 @@
|
|||
#ifndef RELAPACK_H
|
||||
#define RELAPACK_H
|
||||
|
||||
void RELAPACK_slauum(const char *, const int *, float *, const int *, int *);
|
||||
void RELAPACK_dlauum(const char *, const int *, double *, const int *, int *);
|
||||
void RELAPACK_clauum(const char *, const int *, float *, const int *, int *);
|
||||
void RELAPACK_zlauum(const char *, const int *, double *, const int *, int *);
|
||||
|
||||
void RELAPACK_strtri(const char *, const char *, const int *, float *, const int *, int *);
|
||||
void RELAPACK_dtrtri(const char *, const char *, const int *, double *, const int *, int *);
|
||||
void RELAPACK_ctrtri(const char *, const char *, const int *, float *, const int *, int *);
|
||||
void RELAPACK_ztrtri(const char *, const char *, const int *, double *, const int *, int *);
|
||||
|
||||
void RELAPACK_spotrf(const char *, const int *, float *, const int *, int *);
|
||||
void RELAPACK_dpotrf(const char *, const int *, double *, const int *, int *);
|
||||
void RELAPACK_cpotrf(const char *, const int *, float *, const int *, int *);
|
||||
void RELAPACK_zpotrf(const char *, const int *, double *, const int *, int *);
|
||||
|
||||
void RELAPACK_spbtrf(const char *, const int *, const int *, float *, const int *, int *);
|
||||
void RELAPACK_dpbtrf(const char *, const int *, const int *, double *, const int *, int *);
|
||||
void RELAPACK_cpbtrf(const char *, const int *, const int *, float *, const int *, int *);
|
||||
void RELAPACK_zpbtrf(const char *, const int *, const int *, double *, const int *, int *);
|
||||
|
||||
void RELAPACK_ssytrf(const char *, const int *, float *, const int *, int *, float *, const int *, int *);
|
||||
void RELAPACK_dsytrf(const char *, const int *, double *, const int *, int *, double *, const int *, int *);
|
||||
void RELAPACK_csytrf(const char *, const int *, float *, const int *, int *, float *, const int *, int *);
|
||||
void RELAPACK_chetrf(const char *, const int *, float *, const int *, int *, float *, const int *, int *);
|
||||
void RELAPACK_zsytrf(const char *, const int *, double *, const int *, int *, double *, const int *, int *);
|
||||
void RELAPACK_zhetrf(const char *, const int *, double *, const int *, int *, double *, const int *, int *);
|
||||
void RELAPACK_ssytrf_rook(const char *, const int *, float *, const int *, int *, float *, const int *, int *);
|
||||
void RELAPACK_dsytrf_rook(const char *, const int *, double *, const int *, int *, double *, const int *, int *);
|
||||
void RELAPACK_csytrf_rook(const char *, const int *, float *, const int *, int *, float *, const int *, int *);
|
||||
void RELAPACK_chetrf_rook(const char *, const int *, float *, const int *, int *, float *, const int *, int *);
|
||||
void RELAPACK_zsytrf_rook(const char *, const int *, double *, const int *, int *, double *, const int *, int *);
|
||||
void RELAPACK_zhetrf_rook(const char *, const int *, double *, const int *, int *, double *, const int *, int *);
|
||||
|
||||
void RELAPACK_sgetrf(const int *, const int *, float *, const int *, int *, int *);
|
||||
void RELAPACK_dgetrf(const int *, const int *, double *, const int *, int *, int *);
|
||||
void RELAPACK_cgetrf(const int *, const int *, float *, const int *, int *, int *);
|
||||
void RELAPACK_zgetrf(const int *, const int *, double *, const int *, int *, int *);
|
||||
|
||||
void RELAPACK_sgbtrf(const int *, const int *, const int *, const int *, float *, const int *, int *, int *);
|
||||
void RELAPACK_dgbtrf(const int *, const int *, const int *, const int *, double *, const int *, int *, int *);
|
||||
void RELAPACK_cgbtrf(const int *, const int *, const int *, const int *, float *, const int *, int *, int *);
|
||||
void RELAPACK_zgbtrf(const int *, const int *, const int *, const int *, double *, const int *, int *, int *);
|
||||
|
||||
void RELAPACK_ssygst(const int *, const char *, const int *, float *, const int *, const float *, const int *, int *);
|
||||
void RELAPACK_dsygst(const int *, const char *, const int *, double *, const int *, const double *, const int *, int *);
|
||||
void RELAPACK_chegst(const int *, const char *, const int *, float *, const int *, const float *, const int *, int *);
|
||||
void RELAPACK_zhegst(const int *, const char *, const int *, double *, const int *, const double *, const int *, int *);
|
||||
|
||||
void RELAPACK_strsyl(const char *, const char *, const int *, const int *, const int *, const float *, const int *, const float *, const int *, float *, const int *, float *, int *);
|
||||
void RELAPACK_dtrsyl(const char *, const char *, const int *, const int *, const int *, const double *, const int *, const double *, const int *, double *, const int *, double *, int *);
|
||||
void RELAPACK_ctrsyl(const char *, const char *, const int *, const int *, const int *, const float *, const int *, const float *, const int *, float *, const int *, float *, int *);
|
||||
void RELAPACK_ztrsyl(const char *, const char *, const int *, const int *, const int *, const double *, const int *, const double *, const int *, double *, const int *, double *, int *);
|
||||
|
||||
void RELAPACK_stgsyl(const char *, const int *, const int *, const int *, const float *, const int *, const float *, const int *, float *, const int *, const float *, const int *, const float *, const int *, float *, const int *, float *, float *, float *, const int *, int *, int *);
|
||||
void RELAPACK_dtgsyl(const char *, const int *, const int *, const int *, const double *, const int *, const double *, const int *, double *, const int *, const double *, const int *, const double *, const int *, double *, const int *, double *, double *, double *, const int *, int *, int *);
|
||||
void RELAPACK_ctgsyl(const char *, const int *, const int *, const int *, const float *, const int *, const float *, const int *, float *, const int *, const float *, const int *, const float *, const int *, float *, const int *, float *, float *, float *, const int *, int *, int *);
|
||||
void RELAPACK_ztgsyl(const char *, const int *, const int *, const int *, const double *, const int *, const double *, const int *, double *, const int *, const double *, const int *, const double *, const int *, double *, const int *, double *, double *, double *, const int *, int *, int *);
|
||||
|
||||
void RELAPACK_sgemmt(const char *, const char *, const char *, const int *, const int *, const float *, const float *, const int *, const float *, const int *, const float *, float *, const int *);
|
||||
void RELAPACK_dgemmt(const char *, const char *, const char *, const int *, const int *, const double *, const double *, const int *, const double *, const int *, const double *, double *, const int *);
|
||||
void RELAPACK_cgemmt(const char *, const char *, const char *, const int *, const int *, const float *, const float *, const int *, const float *, const int *, const float *, float *, const int *);
|
||||
void RELAPACK_zgemmt(const char *, const char *, const char *, const int *, const int *, const double *, const double *, const int *, const double *, const int *, const double *, double *, const int *);
|
||||
|
||||
#endif /* RELAPACK_H */
|
|
@ -0,0 +1,61 @@
|
|||
#ifndef BLAS_H
|
||||
#define BLAS_H
|
||||
|
||||
extern void BLAS(sswap)(const int *, float *, const int *, float *, const int *);
|
||||
extern void BLAS(dswap)(const int *, double *, const int *, double *, const int *);
|
||||
extern void BLAS(cswap)(const int *, float *, const int *, float *, const int *);
|
||||
extern void BLAS(zswap)(const int *, double *, const int *, double *, const int *);
|
||||
|
||||
extern void BLAS(sscal)(const int *, const float *, float *, const int *);
|
||||
extern void BLAS(dscal)(const int *, const double *, double *, const int *);
|
||||
extern void BLAS(cscal)(const int *, const float *, float *, const int *);
|
||||
extern void BLAS(zscal)(const int *, const double *, double *, const int *);
|
||||
|
||||
extern void BLAS(saxpy)(const int *, const float *, const float *, const int *, float *, const int *);
|
||||
extern void BLAS(daxpy)(const int *, const double *, const double *, const int *, double *, const int *);
|
||||
extern void BLAS(caxpy)(const int *, const float *, const float *, const int *, float *, const int *);
|
||||
extern void BLAS(zaxpy)(const int *, const double *, const double *, const int *, double *, const int *);
|
||||
|
||||
extern void BLAS(sgemv)(const char *, const int *, const int *, const float *, const float *, const int *, const float *, const int *, const float *, const float *, const int*);
|
||||
extern void BLAS(dgemv)(const char *, const int *, const int *, const double *, const double *, const int *, const double *, const int *, const double *, const double *, const int*);
|
||||
extern void BLAS(cgemv)(const char *, const int *, const int *, const float *, const float *, const int *, const float *, const int *, const float *, const float *, const int*);
|
||||
extern void BLAS(zgemv)(const char *, const int *, const int *, const double *, const double *, const int *, const double *, const int *, const double *, const double *, const int*);
|
||||
|
||||
extern void BLAS(sgemm)(const char *, const char *, const int *, const int *, const int *, const float *, const float *, const int *, const float *, const int *, const float *, const float *, const int*);
|
||||
extern void BLAS(dgemm)(const char *, const char *, const int *, const int *, const int *, const double *, const double *, const int *, const double *, const int *, const double *, const double *, const int*);
|
||||
extern void BLAS(cgemm)(const char *, const char *, const int *, const int *, const int *, const float *, const float *, const int *, const float *, const int *, const float *, const float *, const int*);
|
||||
extern void BLAS(zgemm)(const char *, const char *, const int *, const int *, const int *, const double *, const double *, const int *, const double *, const int *, const double *, const double *, const int*);
|
||||
|
||||
extern void BLAS(strsm)(const char *, const char *, const char *, const char *, const int *, const int *, const float *, const float *, const int *, float *, const int *);
|
||||
extern void BLAS(dtrsm)(const char *, const char *, const char *, const char *, const int *, const int *, const double *, const double *, const int *, double *, const int *);
|
||||
extern void BLAS(ctrsm)(const char *, const char *, const char *, const char *, const int *, const int *, const float *, const float *, const int *, float *, const int *);
|
||||
extern void BLAS(ztrsm)(const char *, const char *, const char *, const char *, const int *, const int *, const double *, const double *, const int *, double *, const int *);
|
||||
|
||||
extern void BLAS(strmm)(const char *, const char *, const char *, const char *, const int *, const int *, const float *, const float *, const int *, float *, const int *);
|
||||
extern void BLAS(dtrmm)(const char *, const char *, const char *, const char *, const int *, const int *, const double *, const double *, const int *, double *, const int *);
|
||||
extern void BLAS(ctrmm)(const char *, const char *, const char *, const char *, const int *, const int *, const float *, const float *, const int *, float *, const int *);
|
||||
extern void BLAS(ztrmm)(const char *, const char *, const char *, const char *, const int *, const int *, const double *, const double *, const int *, double *, const int *);
|
||||
|
||||
extern void BLAS(ssyrk)(const char *, const char *, const int *, const int *, const float *, float *, const int *, const float *, float *, const int *);
|
||||
extern void BLAS(dsyrk)(const char *, const char *, const int *, const int *, const double *, double *, const int *, const double *, double *, const int *);
|
||||
extern void BLAS(cherk)(const char *, const char *, const int *, const int *, const float *, float *, const int *, const float *, float *, const int *);
|
||||
extern void BLAS(zherk)(const char *, const char *, const int *, const int *, const double *, double *, const int *, const double *, double *, const int *);
|
||||
|
||||
extern void BLAS(ssymm)(const char *, const char *, const int *, const int *, const float *, const float *, const int *, const float *, const int *, const float *, float *, const int *);
|
||||
extern void BLAS(dsymm)(const char *, const char *, const int *, const int *, const double *, const double *, const int *, const double *, const int *, const double *, double *, const int *);
|
||||
extern void BLAS(chemm)(const char *, const char *, const int *, const int *, const float *, const float *, const int *, const float *, const int *, const float *, float *, const int *);
|
||||
extern void BLAS(zhemm)(const char *, const char *, const int *, const int *, const double *, const double *, const int *, const double *, const int *, const double *, double *, const int *);
|
||||
|
||||
extern void BLAS(ssyr2k)(const char *, const char *, const int *, const int *, const float *, const float *, const int *, const float *, const int *, const float *, float *, const int *);
|
||||
extern void BLAS(dsyr2k)(const char *, const char *, const int *, const int *, const double *, const double *, const int *, const double *, const int *, const double *, double *, const int *);
|
||||
extern void BLAS(cher2k)(const char *, const char *, const int *, const int *, const float *, const float *, const int *, const float *, const int *, const float *, float *, const int *);
|
||||
extern void BLAS(zher2k)(const char *, const char *, const int *, const int *, const double *, const double *, const int *, const double *, const int *, const double *, double *, const int *);
|
||||
|
||||
#if HAVE_XGEMMT
|
||||
extern void BLAS(sgemmt)(const char *, const char *, const char *, const int *, const int *, const float *, const float *, const int *, const float *, const int *, const float *, const float *, const int*);
|
||||
extern void BLAS(dgemmt)(const char *, const char *, const char *, const int *, const int *, const double *, const double *, const int *, const double *, const int *, const double *, const double *, const int*);
|
||||
extern void BLAS(cgemmt)(const char *, const char *, const char *, const int *, const int *, const float *, const float *, const int *, const float *, const int *, const float *, const float *, const int*);
|
||||
extern void BLAS(zgemmt)(const char *, const char *, const char *, const int *, const int *, const double *, const double *, const int *, const double *, const int *, const double *, const double *, const int*);
|
||||
#endif
|
||||
|
||||
#endif /* BLAS_H */
|
|
@ -0,0 +1,230 @@
|
|||
#include "relapack.h"
|
||||
#include "stdlib.h"
|
||||
|
||||
static void RELAPACK_cgbtrf_rec(const int *, const int *, const int *,
|
||||
const int *, float *, const int *, int *, float *, const int *, float *,
|
||||
const int *, int *);
|
||||
|
||||
|
||||
/** CGBTRF computes an LU factorization of a complex m-by-n band matrix A using partial pivoting with row interchanges.
|
||||
*
|
||||
* This routine is functionally equivalent to LAPACK's cgbtrf.
|
||||
* For details on its interface, see
|
||||
* http://www.netlib.org/lapack/explore-html/d0/d3a/cgbtrf_8f.html
|
||||
* */
|
||||
void RELAPACK_cgbtrf(
|
||||
const int *m, const int *n, const int *kl, const int *ku,
|
||||
float *Ab, const int *ldAb, int *ipiv,
|
||||
int *info
|
||||
) {
|
||||
|
||||
// Check arguments
|
||||
*info = 0;
|
||||
if (*m < 0)
|
||||
*info = -1;
|
||||
else if (*n < 0)
|
||||
*info = -2;
|
||||
else if (*kl < 0)
|
||||
*info = -3;
|
||||
else if (*ku < 0)
|
||||
*info = -4;
|
||||
else if (*ldAb < 2 * *kl + *ku + 1)
|
||||
*info = -6;
|
||||
if (*info) {
|
||||
const int minfo = -*info;
|
||||
LAPACK(xerbla)("CGBTRF", &minfo);
|
||||
return;
|
||||
}
|
||||
|
||||
// Constant
|
||||
const float ZERO[] = { 0., 0. };
|
||||
|
||||
// Result upper band width
|
||||
const int kv = *ku + *kl;
|
||||
|
||||
// Unskew A
|
||||
const int ldA[] = { *ldAb - 1 };
|
||||
float *const A = Ab + 2 * kv;
|
||||
|
||||
// Zero upper diagonal fill-in elements
|
||||
int i, j;
|
||||
for (j = 0; j < *n; j++) {
|
||||
float *const A_j = A + 2 * *ldA * j;
|
||||
for (i = MAX(0, j - kv); i < j - *ku; i++)
|
||||
A_j[2 * i] = A_j[2 * i + 1] = 0.;
|
||||
}
|
||||
|
||||
// Allocate work space
|
||||
const int n1 = CREC_SPLIT(*n);
|
||||
const int mWorkl = (kv > n1) ? MAX(1, *m - *kl) : kv;
|
||||
const int nWorkl = (kv > n1) ? n1 : kv;
|
||||
const int mWorku = (*kl > n1) ? n1 : *kl;
|
||||
const int nWorku = (*kl > n1) ? MAX(0, *n - *kl) : *kl;
|
||||
float *Workl = malloc(mWorkl * nWorkl * 2 * sizeof(float));
|
||||
float *Worku = malloc(mWorku * nWorku * 2 * sizeof(float));
|
||||
LAPACK(claset)("L", &mWorkl, &nWorkl, ZERO, ZERO, Workl, &mWorkl);
|
||||
LAPACK(claset)("U", &mWorku, &nWorku, ZERO, ZERO, Worku, &mWorku);
|
||||
|
||||
// Recursive kernel
|
||||
RELAPACK_cgbtrf_rec(m, n, kl, ku, Ab, ldAb, ipiv, Workl, &mWorkl, Worku, &mWorku, info);
|
||||
|
||||
// Free work space
|
||||
free(Workl);
|
||||
free(Worku);
|
||||
}
|
||||
|
||||
|
||||
/** cgbtrf's recursive compute kernel */
|
||||
static void RELAPACK_cgbtrf_rec(
|
||||
const int *m, const int *n, const int *kl, const int *ku,
|
||||
float *Ab, const int *ldAb, int *ipiv,
|
||||
float *Workl, const int *ldWorkl, float *Worku, const int *ldWorku,
|
||||
int *info
|
||||
) {
|
||||
|
||||
if (*n <= MAX(CROSSOVER_CGBTRF, 1)) {
|
||||
// Unblocked
|
||||
LAPACK(cgbtf2)(m, n, kl, ku, Ab, ldAb, ipiv, info);
|
||||
return;
|
||||
}
|
||||
|
||||
// Constants
|
||||
const float ONE[] = { 1., 0. };
|
||||
const float MONE[] = { -1., 0. };
|
||||
const int iONE[] = { 1 };
|
||||
|
||||
// Loop iterators
|
||||
int i, j;
|
||||
|
||||
// Output upper band width
|
||||
const int kv = *ku + *kl;
|
||||
|
||||
// Unskew A
|
||||
const int ldA[] = { *ldAb - 1 };
|
||||
float *const A = Ab + 2 * kv;
|
||||
|
||||
// Splitting
|
||||
const int n1 = MIN(CREC_SPLIT(*n), *kl);
|
||||
const int n2 = *n - n1;
|
||||
const int m1 = MIN(n1, *m);
|
||||
const int m2 = *m - m1;
|
||||
const int mn1 = MIN(m1, n1);
|
||||
const int mn2 = MIN(m2, n2);
|
||||
|
||||
// Ab_L *
|
||||
// Ab_BR
|
||||
float *const Ab_L = Ab;
|
||||
float *const Ab_BR = Ab + 2 * *ldAb * n1;
|
||||
|
||||
// A_L A_R
|
||||
float *const A_L = A;
|
||||
float *const A_R = A + 2 * *ldA * n1;
|
||||
|
||||
// A_TL A_TR
|
||||
// A_BL A_BR
|
||||
float *const A_TL = A;
|
||||
float *const A_TR = A + 2 * *ldA * n1;
|
||||
float *const A_BL = A + 2 * m1;
|
||||
float *const A_BR = A + 2 * *ldA * n1 + 2 * m1;
|
||||
|
||||
// ipiv_T
|
||||
// ipiv_B
|
||||
int *const ipiv_T = ipiv;
|
||||
int *const ipiv_B = ipiv + n1;
|
||||
|
||||
// Banded splitting
|
||||
const int n21 = MIN(n2, kv - n1);
|
||||
const int n22 = MIN(n2 - n21, n1);
|
||||
const int m21 = MIN(m2, *kl - m1);
|
||||
const int m22 = MIN(m2 - m21, m1);
|
||||
|
||||
// n1 n21 n22
|
||||
// m * A_Rl ARr
|
||||
float *const A_Rl = A_R;
|
||||
float *const A_Rr = A_R + 2 * *ldA * n21;
|
||||
|
||||
// n1 n21 n22
|
||||
// m1 * A_TRl A_TRr
|
||||
// m21 A_BLt A_BRtl A_BRtr
|
||||
// m22 A_BLb A_BRbl A_BRbr
|
||||
float *const A_TRl = A_TR;
|
||||
float *const A_TRr = A_TR + 2 * *ldA * n21;
|
||||
float *const A_BLt = A_BL;
|
||||
float *const A_BLb = A_BL + 2 * m21;
|
||||
float *const A_BRtl = A_BR;
|
||||
float *const A_BRtr = A_BR + 2 * *ldA * n21;
|
||||
float *const A_BRbl = A_BR + 2 * m21;
|
||||
float *const A_BRbr = A_BR + 2 * *ldA * n21 + 2 * m21;
|
||||
|
||||
// recursion(Ab_L, ipiv_T)
|
||||
RELAPACK_cgbtrf_rec(m, &n1, kl, ku, Ab_L, ldAb, ipiv_T, Workl, ldWorkl, Worku, ldWorku, info);
|
||||
|
||||
// Workl = A_BLb
|
||||
LAPACK(clacpy)("U", &m22, &n1, A_BLb, ldA, Workl, ldWorkl);
|
||||
|
||||
// partially redo swaps in A_L
|
||||
for (i = 0; i < mn1; i++) {
|
||||
const int ip = ipiv_T[i] - 1;
|
||||
if (ip != i) {
|
||||
if (ip < *kl)
|
||||
BLAS(cswap)(&i, A_L + 2 * i, ldA, A_L + 2 * ip, ldA);
|
||||
else
|
||||
BLAS(cswap)(&i, A_L + 2 * i, ldA, Workl + 2 * (ip - *kl), ldWorkl);
|
||||
}
|
||||
}
|
||||
|
||||
// apply pivots to A_Rl
|
||||
LAPACK(claswp)(&n21, A_Rl, ldA, iONE, &mn1, ipiv_T, iONE);
|
||||
|
||||
// apply pivots to A_Rr columnwise
|
||||
for (j = 0; j < n22; j++) {
|
||||
float *const A_Rrj = A_Rr + 2 * *ldA * j;
|
||||
for (i = j; i < mn1; i++) {
|
||||
const int ip = ipiv_T[i] - 1;
|
||||
if (ip != i) {
|
||||
const float tmpr = A_Rrj[2 * i];
|
||||
const float tmpc = A_Rrj[2 * i + 1];
|
||||
A_Rrj[2 * i] = A_Rrj[2 * ip];
|
||||
A_Rrj[2 * i + 1] = A_Rr[2 * ip + 1];
|
||||
A_Rrj[2 * ip] = tmpr;
|
||||
A_Rrj[2 * ip + 1] = tmpc;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// A_TRl = A_TL \ A_TRl
|
||||
BLAS(ctrsm)("L", "L", "N", "U", &m1, &n21, ONE, A_TL, ldA, A_TRl, ldA);
|
||||
// Worku = A_TRr
|
||||
LAPACK(clacpy)("L", &m1, &n22, A_TRr, ldA, Worku, ldWorku);
|
||||
// Worku = A_TL \ Worku
|
||||
BLAS(ctrsm)("L", "L", "N", "U", &m1, &n22, ONE, A_TL, ldA, Worku, ldWorku);
|
||||
// A_TRr = Worku
|
||||
LAPACK(clacpy)("L", &m1, &n22, Worku, ldWorku, A_TRr, ldA);
|
||||
// A_BRtl = A_BRtl - A_BLt * A_TRl
|
||||
BLAS(cgemm)("N", "N", &m21, &n21, &n1, MONE, A_BLt, ldA, A_TRl, ldA, ONE, A_BRtl, ldA);
|
||||
// A_BRbl = A_BRbl - Workl * A_TRl
|
||||
BLAS(cgemm)("N", "N", &m22, &n21, &n1, MONE, Workl, ldWorkl, A_TRl, ldA, ONE, A_BRbl, ldA);
|
||||
// A_BRtr = A_BRtr - A_BLt * Worku
|
||||
BLAS(cgemm)("N", "N", &m21, &n22, &n1, MONE, A_BLt, ldA, Worku, ldWorku, ONE, A_BRtr, ldA);
|
||||
// A_BRbr = A_BRbr - Workl * Worku
|
||||
BLAS(cgemm)("N", "N", &m22, &n22, &n1, MONE, Workl, ldWorkl, Worku, ldWorku, ONE, A_BRbr, ldA);
|
||||
|
||||
// partially undo swaps in A_L
|
||||
for (i = mn1 - 1; i >= 0; i--) {
|
||||
const int ip = ipiv_T[i] - 1;
|
||||
if (ip != i) {
|
||||
if (ip < *kl)
|
||||
BLAS(cswap)(&i, A_L + 2 * i, ldA, A_L + 2 * ip, ldA);
|
||||
else
|
||||
BLAS(cswap)(&i, A_L + 2 * i, ldA, Workl + 2 * (ip - *kl), ldWorkl);
|
||||
}
|
||||
}
|
||||
|
||||
// recursion(Ab_BR, ipiv_B)
|
||||
RELAPACK_cgbtrf_rec(&m2, &n2, kl, ku, Ab_BR, ldAb, ipiv_B, Workl, ldWorkl, Worku, ldWorku, info);
|
||||
if (*info)
|
||||
*info += n1;
|
||||
// shift pivots
|
||||
for (i = 0; i < mn2; i++)
|
||||
ipiv_B[i] += n1;
|
||||
}
|
|
@ -0,0 +1,167 @@
|
|||
#include "relapack.h"
|
||||
|
||||
static void RELAPACK_cgemmt_rec(const char *, const char *, const char *,
|
||||
const int *, const int *, const float *, const float *, const int *,
|
||||
const float *, const int *, const float *, float *, const int *);
|
||||
|
||||
static void RELAPACK_cgemmt_rec2(const char *, const char *, const char *,
|
||||
const int *, const int *, const float *, const float *, const int *,
|
||||
const float *, const int *, const float *, float *, const int *);
|
||||
|
||||
|
||||
/** CGEMMT computes a matrix-matrix product with general matrices but updates
|
||||
* only the upper or lower triangular part of the result matrix.
|
||||
*
|
||||
* This routine performs the same operation as the BLAS routine
|
||||
* cgemm(transA, transB, n, n, k, alpha, A, ldA, B, ldB, beta, C, ldC)
|
||||
* but only updates the triangular part of C specified by uplo:
|
||||
* If (*uplo == 'L'), only the lower triangular part of C is updated,
|
||||
* otherwise the upper triangular part is updated.
|
||||
* */
|
||||
void RELAPACK_cgemmt(
|
||||
const char *uplo, const char *transA, const char *transB,
|
||||
const int *n, const int *k,
|
||||
const float *alpha, const float *A, const int *ldA,
|
||||
const float *B, const int *ldB,
|
||||
const float *beta, float *C, const int *ldC
|
||||
) {
|
||||
|
||||
#if HAVE_XGEMMT
|
||||
BLAS(cgemmt)(uplo, transA, transB, n, k, alpha, A, ldA, B, ldB, beta, C, ldC);
|
||||
return;
|
||||
#else
|
||||
|
||||
// Check arguments
|
||||
const int lower = LAPACK(lsame)(uplo, "L");
|
||||
const int upper = LAPACK(lsame)(uplo, "U");
|
||||
const int notransA = LAPACK(lsame)(transA, "N");
|
||||
const int tranA = LAPACK(lsame)(transA, "T");
|
||||
const int ctransA = LAPACK(lsame)(transA, "C");
|
||||
const int notransB = LAPACK(lsame)(transB, "N");
|
||||
const int tranB = LAPACK(lsame)(transB, "T");
|
||||
const int ctransB = LAPACK(lsame)(transB, "C");
|
||||
int info = 0;
|
||||
if (!lower && !upper)
|
||||
info = 1;
|
||||
else if (!tranA && !ctransA && !notransA)
|
||||
info = 2;
|
||||
else if (!tranB && !ctransB && !notransB)
|
||||
info = 3;
|
||||
else if (*n < 0)
|
||||
info = 4;
|
||||
else if (*k < 0)
|
||||
info = 5;
|
||||
else if (*ldA < MAX(1, notransA ? *n : *k))
|
||||
info = 8;
|
||||
else if (*ldB < MAX(1, notransB ? *k : *n))
|
||||
info = 10;
|
||||
else if (*ldC < MAX(1, *n))
|
||||
info = 13;
|
||||
if (info) {
|
||||
LAPACK(xerbla)("CGEMMT", &info);
|
||||
return;
|
||||
}
|
||||
|
||||
// Clean char * arguments
|
||||
const char cleanuplo = lower ? 'L' : 'U';
|
||||
const char cleantransA = notransA ? 'N' : (tranA ? 'T' : 'C');
|
||||
const char cleantransB = notransB ? 'N' : (tranB ? 'T' : 'C');
|
||||
|
||||
// Recursive kernel
|
||||
RELAPACK_cgemmt_rec(&cleanuplo, &cleantransA, &cleantransB, n, k, alpha, A, ldA, B, ldB, beta, C, ldC);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
/** cgemmt's recursive compute kernel */
|
||||
static void RELAPACK_cgemmt_rec(
|
||||
const char *uplo, const char *transA, const char *transB,
|
||||
const int *n, const int *k,
|
||||
const float *alpha, const float *A, const int *ldA,
|
||||
const float *B, const int *ldB,
|
||||
const float *beta, float *C, const int *ldC
|
||||
) {
|
||||
|
||||
if (*n <= MAX(CROSSOVER_CGEMMT, 1)) {
|
||||
// Unblocked
|
||||
RELAPACK_cgemmt_rec2(uplo, transA, transB, n, k, alpha, A, ldA, B, ldB, beta, C, ldC);
|
||||
return;
|
||||
}
|
||||
|
||||
// Splitting
|
||||
const int n1 = CREC_SPLIT(*n);
|
||||
const int n2 = *n - n1;
|
||||
|
||||
// A_T
|
||||
// A_B
|
||||
const float *const A_T = A;
|
||||
const float *const A_B = A + 2 * ((*transA == 'N') ? n1 : *ldA * n1);
|
||||
|
||||
// B_L B_R
|
||||
const float *const B_L = B;
|
||||
const float *const B_R = B + 2 * ((*transB == 'N') ? *ldB * n1 : n1);
|
||||
|
||||
// C_TL C_TR
|
||||
// C_BL C_BR
|
||||
float *const C_TL = C;
|
||||
float *const C_TR = C + 2 * *ldC * n1;
|
||||
float *const C_BL = C + 2 * n1;
|
||||
float *const C_BR = C + 2 * *ldC * n1 + 2 * n1;
|
||||
|
||||
// recursion(C_TL)
|
||||
RELAPACK_cgemmt_rec(uplo, transA, transB, &n1, k, alpha, A_T, ldA, B_L, ldB, beta, C_TL, ldC);
|
||||
|
||||
if (*uplo == 'L')
|
||||
// C_BL = alpha A_B B_L + beta C_BL
|
||||
BLAS(cgemm)(transA, transB, &n2, &n1, k, alpha, A_B, ldA, B_L, ldB, beta, C_BL, ldC);
|
||||
else
|
||||
// C_TR = alpha A_T B_R + beta C_TR
|
||||
BLAS(cgemm)(transA, transB, &n1, &n2, k, alpha, A_T, ldA, B_R, ldB, beta, C_TR, ldC);
|
||||
|
||||
// recursion(C_BR)
|
||||
RELAPACK_cgemmt_rec(uplo, transA, transB, &n2, k, alpha, A_B, ldA, B_R, ldB, beta, C_BR, ldC);
|
||||
}
|
||||
|
||||
|
||||
/** cgemmt's unblocked compute kernel */
|
||||
static void RELAPACK_cgemmt_rec2(
|
||||
const char *uplo, const char *transA, const char *transB,
|
||||
const int *n, const int *k,
|
||||
const float *alpha, const float *A, const int *ldA,
|
||||
const float *B, const int *ldB,
|
||||
const float *beta, float *C, const int *ldC
|
||||
) {
|
||||
|
||||
const int incB = (*transB == 'N') ? 1 : *ldB;
|
||||
const int incC = 1;
|
||||
|
||||
int i;
|
||||
for (i = 0; i < *n; i++) {
|
||||
// A_0
|
||||
// A_i
|
||||
const float *const A_0 = A;
|
||||
const float *const A_i = A + 2 * ((*transA == 'N') ? i : *ldA * i);
|
||||
|
||||
// * B_i *
|
||||
const float *const B_i = B + 2 * ((*transB == 'N') ? *ldB * i : i);
|
||||
|
||||
// * C_0i *
|
||||
// * C_ii *
|
||||
float *const C_0i = C + 2 * *ldC * i;
|
||||
float *const C_ii = C + 2 * *ldC * i + 2 * i;
|
||||
|
||||
if (*uplo == 'L') {
|
||||
const int nmi = *n - i;
|
||||
if (*transA == 'N')
|
||||
BLAS(cgemv)(transA, &nmi, k, alpha, A_i, ldA, B_i, &incB, beta, C_ii, &incC);
|
||||
else
|
||||
BLAS(cgemv)(transA, k, &nmi, alpha, A_i, ldA, B_i, &incB, beta, C_ii, &incC);
|
||||
} else {
|
||||
const int ip1 = i + 1;
|
||||
if (*transA == 'N')
|
||||
BLAS(cgemv)(transA, &ip1, k, alpha, A_0, ldA, B_i, &incB, beta, C_0i, &incC);
|
||||
else
|
||||
BLAS(cgemv)(transA, k, &ip1, alpha, A_0, ldA, B_i, &incB, beta, C_0i, &incC);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,117 @@
|
|||
#include "relapack.h"
|
||||
|
||||
static void RELAPACK_cgetrf_rec(const int *, const int *, float *,
|
||||
const int *, int *, int *);
|
||||
|
||||
|
||||
/** CGETRF computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges.
|
||||
*
|
||||
* This routine is functionally equivalent to LAPACK's cgetrf.
|
||||
* For details on its interface, see
|
||||
* http://www.netlib.org/lapack/explore-html/d9/dfb/cgetrf_8f.html
|
||||
*/
|
||||
void RELAPACK_cgetrf(
|
||||
const int *m, const int *n,
|
||||
float *A, const int *ldA, int *ipiv,
|
||||
int *info
|
||||
) {
|
||||
|
||||
// Check arguments
|
||||
*info = 0;
|
||||
if (*m < 0)
|
||||
*info = -1;
|
||||
else if (*n < 0)
|
||||
*info = -2;
|
||||
else if (*ldA < MAX(1, *n))
|
||||
*info = -4;
|
||||
if (*info) {
|
||||
const int minfo = -*info;
|
||||
LAPACK(xerbla)("CGETRF", &minfo);
|
||||
return;
|
||||
}
|
||||
|
||||
const int sn = MIN(*m, *n);
|
||||
|
||||
RELAPACK_cgetrf_rec(m, &sn, A, ldA, ipiv, info);
|
||||
|
||||
// Right remainder
|
||||
if (*m < *n) {
|
||||
// Constants
|
||||
const float ONE[] = { 1., 0. };
|
||||
const int iONE[] = { 1 };
|
||||
|
||||
// Splitting
|
||||
const int rn = *n - *m;
|
||||
|
||||
// A_L A_R
|
||||
const float *const A_L = A;
|
||||
float *const A_R = A + 2 * *ldA * *m;
|
||||
|
||||
// A_R = apply(ipiv, A_R)
|
||||
LAPACK(claswp)(&rn, A_R, ldA, iONE, m, ipiv, iONE);
|
||||
// A_R = A_L \ A_R
|
||||
BLAS(ctrsm)("L", "L", "N", "U", m, &rn, ONE, A_L, ldA, A_R, ldA);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/** cgetrf's recursive compute kernel */
|
||||
static void RELAPACK_cgetrf_rec(
|
||||
const int *m, const int *n,
|
||||
float *A, const int *ldA, int *ipiv,
|
||||
int *info
|
||||
) {
|
||||
|
||||
if (*n <= MAX(CROSSOVER_CGETRF, 1)) {
|
||||
// Unblocked
|
||||
LAPACK(cgetf2)(m, n, A, ldA, ipiv, info);
|
||||
return;
|
||||
}
|
||||
|
||||
// Constants
|
||||
const float ONE[] = { 1., 0. };
|
||||
const float MONE[] = { -1., 0. };
|
||||
const int iONE[] = { 1 };
|
||||
|
||||
// Splitting
|
||||
const int n1 = CREC_SPLIT(*n);
|
||||
const int n2 = *n - n1;
|
||||
const int m2 = *m - n1;
|
||||
|
||||
// A_L A_R
|
||||
float *const A_L = A;
|
||||
float *const A_R = A + 2 * *ldA * n1;
|
||||
|
||||
// A_TL A_TR
|
||||
// A_BL A_BR
|
||||
float *const A_TL = A;
|
||||
float *const A_TR = A + 2 * *ldA * n1;
|
||||
float *const A_BL = A + 2 * n1;
|
||||
float *const A_BR = A + 2 * *ldA * n1 + 2 * n1;
|
||||
|
||||
// ipiv_T
|
||||
// ipiv_B
|
||||
int *const ipiv_T = ipiv;
|
||||
int *const ipiv_B = ipiv + n1;
|
||||
|
||||
// recursion(A_L, ipiv_T)
|
||||
RELAPACK_cgetrf_rec(m, &n1, A_L, ldA, ipiv_T, info);
|
||||
// apply pivots to A_R
|
||||
LAPACK(claswp)(&n2, A_R, ldA, iONE, &n1, ipiv_T, iONE);
|
||||
|
||||
// A_TR = A_TL \ A_TR
|
||||
BLAS(ctrsm)("L", "L", "N", "U", &n1, &n2, ONE, A_TL, ldA, A_TR, ldA);
|
||||
// A_BR = A_BR - A_BL * A_TR
|
||||
BLAS(cgemm)("N", "N", &m2, &n2, &n1, MONE, A_BL, ldA, A_TR, ldA, ONE, A_BR, ldA);
|
||||
|
||||
// recursion(A_BR, ipiv_B)
|
||||
RELAPACK_cgetrf_rec(&m2, &n2, A_BR, ldA, ipiv_B, info);
|
||||
if (*info)
|
||||
*info += n1;
|
||||
// apply pivots to A_BL
|
||||
LAPACK(claswp)(&n1, A_BL, ldA, iONE, &n2, ipiv_B, iONE);
|
||||
// shift pivots
|
||||
int i;
|
||||
for (i = 0; i < n2; i++)
|
||||
ipiv_B[i] += n1;
|
||||
}
|
|
@ -0,0 +1,212 @@
|
|||
#include "relapack.h"
|
||||
#if XSYGST_ALLOW_MALLOC
|
||||
#include "stdlib.h"
|
||||
#endif
|
||||
|
||||
static void RELAPACK_chegst_rec(const int *, const char *, const int *,
|
||||
float *, const int *, const float *, const int *,
|
||||
float *, const int *, int *);
|
||||
|
||||
|
||||
/** CHEGST reduces a complex Hermitian-definite generalized eigenproblem to standard form.
|
||||
*
|
||||
* This routine is functionally equivalent to LAPACK's chegst.
|
||||
* For details on its interface, see
|
||||
* http://www.netlib.org/lapack/explore-html/d7/d2a/chegst_8f.html
|
||||
* */
|
||||
void RELAPACK_chegst(
|
||||
const int *itype, const char *uplo, const int *n,
|
||||
float *A, const int *ldA, const float *B, const int *ldB,
|
||||
int *info
|
||||
) {
|
||||
|
||||
// Check arguments
|
||||
const int lower = LAPACK(lsame)(uplo, "L");
|
||||
const int upper = LAPACK(lsame)(uplo, "U");
|
||||
*info = 0;
|
||||
if (*itype < 1 || *itype > 3)
|
||||
*info = -1;
|
||||
else if (!lower && !upper)
|
||||
*info = -2;
|
||||
else if (*n < 0)
|
||||
*info = -3;
|
||||
else if (*ldA < MAX(1, *n))
|
||||
*info = -5;
|
||||
else if (*ldB < MAX(1, *n))
|
||||
*info = -7;
|
||||
if (*info) {
|
||||
const int minfo = -*info;
|
||||
LAPACK(xerbla)("CHEGST", &minfo);
|
||||
return;
|
||||
}
|
||||
|
||||
// Clean char * arguments
|
||||
const char cleanuplo = lower ? 'L' : 'U';
|
||||
|
||||
// Allocate work space
|
||||
float *Work = NULL;
|
||||
int lWork = 0;
|
||||
#if XSYGST_ALLOW_MALLOC
|
||||
const int n1 = CREC_SPLIT(*n);
|
||||
lWork = n1 * (*n - n1);
|
||||
Work = malloc(lWork * 2 * sizeof(float));
|
||||
if (!Work)
|
||||
lWork = 0;
|
||||
#endif
|
||||
|
||||
// recursive kernel
|
||||
RELAPACK_chegst_rec(itype, &cleanuplo, n, A, ldA, B, ldB, Work, &lWork, info);
|
||||
|
||||
// Free work space
|
||||
#if XSYGST_ALLOW_MALLOC
|
||||
if (Work)
|
||||
free(Work);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
/** chegst's recursive compute kernel */
|
||||
static void RELAPACK_chegst_rec(
|
||||
const int *itype, const char *uplo, const int *n,
|
||||
float *A, const int *ldA, const float *B, const int *ldB,
|
||||
float *Work, const int *lWork, int *info
|
||||
) {
|
||||
|
||||
if (*n <= MAX(CROSSOVER_CHEGST, 1)) {
|
||||
// Unblocked
|
||||
LAPACK(chegs2)(itype, uplo, n, A, ldA, B, ldB, info);
|
||||
return;
|
||||
}
|
||||
|
||||
// Constants
|
||||
const float ZERO[] = { 0., 0. };
|
||||
const float ONE[] = { 1., 0. };
|
||||
const float MONE[] = { -1., 0. };
|
||||
const float HALF[] = { .5, 0. };
|
||||
const float MHALF[] = { -.5, 0. };
|
||||
const int iONE[] = { 1 };
|
||||
|
||||
// Loop iterator
|
||||
int i;
|
||||
|
||||
// Splitting
|
||||
const int n1 = CREC_SPLIT(*n);
|
||||
const int n2 = *n - n1;
|
||||
|
||||
// A_TL A_TR
|
||||
// A_BL A_BR
|
||||
float *const A_TL = A;
|
||||
float *const A_TR = A + 2 * *ldA * n1;
|
||||
float *const A_BL = A + 2 * n1;
|
||||
float *const A_BR = A + 2 * *ldA * n1 + 2 * n1;
|
||||
|
||||
// B_TL B_TR
|
||||
// B_BL B_BR
|
||||
const float *const B_TL = B;
|
||||
const float *const B_TR = B + 2 * *ldB * n1;
|
||||
const float *const B_BL = B + 2 * n1;
|
||||
const float *const B_BR = B + 2 * *ldB * n1 + 2 * n1;
|
||||
|
||||
// recursion(A_TL, B_TL)
|
||||
RELAPACK_chegst_rec(itype, uplo, &n1, A_TL, ldA, B_TL, ldB, Work, lWork, info);
|
||||
|
||||
if (*itype == 1)
|
||||
if (*uplo == 'L') {
|
||||
// A_BL = A_BL / B_TL'
|
||||
BLAS(ctrsm)("R", "L", "C", "N", &n2, &n1, ONE, B_TL, ldB, A_BL, ldA);
|
||||
if (*lWork > n2 * n1) {
|
||||
// T = -1/2 * B_BL * A_TL
|
||||
BLAS(chemm)("R", "L", &n2, &n1, MHALF, A_TL, ldA, B_BL, ldB, ZERO, Work, &n2);
|
||||
// A_BL = A_BL + T
|
||||
for (i = 0; i < n1; i++)
|
||||
BLAS(caxpy)(&n2, ONE, Work + 2 * n2 * i, iONE, A_BL + 2 * *ldA * i, iONE);
|
||||
} else
|
||||
// A_BL = A_BL - 1/2 B_BL * A_TL
|
||||
BLAS(chemm)("R", "L", &n2, &n1, MHALF, A_TL, ldA, B_BL, ldB, ONE, A_BL, ldA);
|
||||
// A_BR = A_BR - A_BL * B_BL' - B_BL * A_BL'
|
||||
BLAS(cher2k)("L", "N", &n2, &n1, MONE, A_BL, ldA, B_BL, ldB, ONE, A_BR, ldA);
|
||||
if (*lWork > n2 * n1)
|
||||
// A_BL = A_BL + T
|
||||
for (i = 0; i < n1; i++)
|
||||
BLAS(caxpy)(&n2, ONE, Work + 2 * n2 * i, iONE, A_BL + 2 * *ldA * i, iONE);
|
||||
else
|
||||
// A_BL = A_BL - 1/2 B_BL * A_TL
|
||||
BLAS(chemm)("R", "L", &n2, &n1, MHALF, A_TL, ldA, B_BL, ldB, ONE, A_BL, ldA);
|
||||
// A_BL = B_BR \ A_BL
|
||||
BLAS(ctrsm)("L", "L", "N", "N", &n2, &n1, ONE, B_BR, ldB, A_BL, ldA);
|
||||
} else {
|
||||
// A_TR = B_TL' \ A_TR
|
||||
BLAS(ctrsm)("L", "U", "C", "N", &n1, &n2, ONE, B_TL, ldB, A_TR, ldA);
|
||||
if (*lWork > n2 * n1) {
|
||||
// T = -1/2 * A_TL * B_TR
|
||||
BLAS(chemm)("L", "U", &n1, &n2, MHALF, A_TL, ldA, B_TR, ldB, ZERO, Work, &n1);
|
||||
// A_TR = A_BL + T
|
||||
for (i = 0; i < n2; i++)
|
||||
BLAS(caxpy)(&n1, ONE, Work + 2 * n1 * i, iONE, A_TR + 2 * *ldA * i, iONE);
|
||||
} else
|
||||
// A_TR = A_TR - 1/2 A_TL * B_TR
|
||||
BLAS(chemm)("L", "U", &n1, &n2, MHALF, A_TL, ldA, B_TR, ldB, ONE, A_TR, ldA);
|
||||
// A_BR = A_BR - A_TR' * B_TR - B_TR' * A_TR
|
||||
BLAS(cher2k)("U", "C", &n2, &n1, MONE, A_TR, ldA, B_TR, ldB, ONE, A_BR, ldA);
|
||||
if (*lWork > n2 * n1)
|
||||
// A_TR = A_BL + T
|
||||
for (i = 0; i < n2; i++)
|
||||
BLAS(caxpy)(&n1, ONE, Work + 2 * n1 * i, iONE, A_TR + 2 * *ldA * i, iONE);
|
||||
else
|
||||
// A_TR = A_TR - 1/2 A_TL * B_TR
|
||||
BLAS(chemm)("L", "U", &n1, &n2, MHALF, A_TL, ldA, B_TR, ldB, ONE, A_TR, ldA);
|
||||
// A_TR = A_TR / B_BR
|
||||
BLAS(ctrsm)("R", "U", "N", "N", &n1, &n2, ONE, B_BR, ldB, A_TR, ldA);
|
||||
}
|
||||
else
|
||||
if (*uplo == 'L') {
|
||||
// A_BL = A_BL * B_TL
|
||||
BLAS(ctrmm)("R", "L", "N", "N", &n2, &n1, ONE, B_TL, ldB, A_BL, ldA);
|
||||
if (*lWork > n2 * n1) {
|
||||
// T = 1/2 * A_BR * B_BL
|
||||
BLAS(chemm)("L", "L", &n2, &n1, HALF, A_BR, ldA, B_BL, ldB, ZERO, Work, &n2);
|
||||
// A_BL = A_BL + T
|
||||
for (i = 0; i < n1; i++)
|
||||
BLAS(caxpy)(&n2, ONE, Work + 2 * n2 * i, iONE, A_BL + 2 * *ldA * i, iONE);
|
||||
} else
|
||||
// A_BL = A_BL + 1/2 A_BR * B_BL
|
||||
BLAS(chemm)("L", "L", &n2, &n1, HALF, A_BR, ldA, B_BL, ldB, ONE, A_BL, ldA);
|
||||
// A_TL = A_TL + A_BL' * B_BL + B_BL' * A_BL
|
||||
BLAS(cher2k)("L", "C", &n1, &n2, ONE, A_BL, ldA, B_BL, ldB, ONE, A_TL, ldA);
|
||||
if (*lWork > n2 * n1)
|
||||
// A_BL = A_BL + T
|
||||
for (i = 0; i < n1; i++)
|
||||
BLAS(caxpy)(&n2, ONE, Work + 2 * n2 * i, iONE, A_BL + 2 * *ldA * i, iONE);
|
||||
else
|
||||
// A_BL = A_BL + 1/2 A_BR * B_BL
|
||||
BLAS(chemm)("L", "L", &n2, &n1, HALF, A_BR, ldA, B_BL, ldB, ONE, A_BL, ldA);
|
||||
// A_BL = B_BR * A_BL
|
||||
BLAS(ctrmm)("L", "L", "C", "N", &n2, &n1, ONE, B_BR, ldB, A_BL, ldA);
|
||||
} else {
|
||||
// A_TR = B_TL * A_TR
|
||||
BLAS(ctrmm)("L", "U", "N", "N", &n1, &n2, ONE, B_TL, ldB, A_TR, ldA);
|
||||
if (*lWork > n2 * n1) {
|
||||
// T = 1/2 * B_TR * A_BR
|
||||
BLAS(chemm)("R", "U", &n1, &n2, HALF, A_BR, ldA, B_TR, ldB, ZERO, Work, &n1);
|
||||
// A_TR = A_TR + T
|
||||
for (i = 0; i < n2; i++)
|
||||
BLAS(caxpy)(&n1, ONE, Work + 2 * n1 * i, iONE, A_TR + 2 * *ldA * i, iONE);
|
||||
} else
|
||||
// A_TR = A_TR + 1/2 B_TR A_BR
|
||||
BLAS(chemm)("R", "U", &n1, &n2, HALF, A_BR, ldA, B_TR, ldB, ONE, A_TR, ldA);
|
||||
// A_TL = A_TL + A_TR * B_TR' + B_TR * A_TR'
|
||||
BLAS(cher2k)("U", "N", &n1, &n2, ONE, A_TR, ldA, B_TR, ldB, ONE, A_TL, ldA);
|
||||
if (*lWork > n2 * n1)
|
||||
// A_TR = A_TR + T
|
||||
for (i = 0; i < n2; i++)
|
||||
BLAS(caxpy)(&n1, ONE, Work + 2 * n1 * i, iONE, A_TR + 2 * *ldA * i, iONE);
|
||||
else
|
||||
// A_TR = A_TR + 1/2 B_TR * A_BR
|
||||
BLAS(chemm)("R", "U", &n1, &n2, HALF, A_BR, ldA, B_TR, ldB, ONE, A_TR, ldA);
|
||||
// A_TR = A_TR * B_BR
|
||||
BLAS(ctrmm)("R", "U", "C", "N", &n1, &n2, ONE, B_BR, ldB, A_TR, ldA);
|
||||
}
|
||||
|
||||
// recursion(A_BR, B_BR)
|
||||
RELAPACK_chegst_rec(itype, uplo, &n2, A_BR, ldA, B_BR, ldB, Work, lWork, info);
|
||||
}
|
|
@ -0,0 +1,236 @@
|
|||
#include "relapack.h"
|
||||
#if XSYTRF_ALLOW_MALLOC
|
||||
#include <stdlib.h>
|
||||
#endif
|
||||
|
||||
static void RELAPACK_chetrf_rec(const char *, const int *, const int *, int *,
|
||||
float *, const int *, int *, float *, const int *, int *);
|
||||
|
||||
|
||||
/** CHETRF computes the factorization of a complex Hermitian matrix A using the Bunch-Kaufman diagonal pivoting method.
|
||||
*
|
||||
* This routine is functionally equivalent to LAPACK's chetrf.
|
||||
* For details on its interface, see
|
||||
* http://www.netlib.org/lapack/explore-html/da/dc1/chetrf_8f.html
|
||||
* */
|
||||
void RELAPACK_chetrf(
|
||||
const char *uplo, const int *n,
|
||||
float *A, const int *ldA, int *ipiv,
|
||||
float *Work, const int *lWork, int *info
|
||||
) {
|
||||
|
||||
// Required work size
|
||||
const int cleanlWork = *n * (*n / 2);
|
||||
int minlWork = cleanlWork;
|
||||
#if XSYTRF_ALLOW_MALLOC
|
||||
minlWork = 1;
|
||||
#endif
|
||||
|
||||
// Check arguments
|
||||
const int lower = LAPACK(lsame)(uplo, "L");
|
||||
const int upper = LAPACK(lsame)(uplo, "U");
|
||||
*info = 0;
|
||||
if (!lower && !upper)
|
||||
*info = -1;
|
||||
else if (*n < 0)
|
||||
*info = -2;
|
||||
else if (*ldA < MAX(1, *n))
|
||||
*info = -4;
|
||||
else if (*lWork < minlWork && *lWork != -1)
|
||||
*info = -7;
|
||||
else if (*lWork == -1) {
|
||||
// Work size query
|
||||
*Work = cleanlWork;
|
||||
return;
|
||||
}
|
||||
|
||||
// Ensure Work size
|
||||
float *cleanWork = Work;
|
||||
#if XSYTRF_ALLOW_MALLOC
|
||||
if (!*info && *lWork < cleanlWork) {
|
||||
cleanWork = malloc(cleanlWork * 2 * sizeof(float));
|
||||
if (!cleanWork)
|
||||
*info = -7;
|
||||
}
|
||||
#endif
|
||||
|
||||
if (*info) {
|
||||
const int minfo = -*info;
|
||||
LAPACK(xerbla)("CHETRF", &minfo);
|
||||
return;
|
||||
}
|
||||
|
||||
// Clean char * arguments
|
||||
const char cleanuplo = lower ? 'L' : 'U';
|
||||
|
||||
// Dummy argument
|
||||
int nout;
|
||||
|
||||
// Recursive kernel
|
||||
RELAPACK_chetrf_rec(&cleanuplo, n, n, &nout, A, ldA, ipiv, cleanWork, n, info);
|
||||
|
||||
#if XSYTRF_ALLOW_MALLOC
|
||||
if (cleanWork != Work)
|
||||
free(cleanWork);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
/** chetrf's recursive compute kernel */
|
||||
static void RELAPACK_chetrf_rec(
|
||||
const char *uplo, const int *n_full, const int *n, int *n_out,
|
||||
float *A, const int *ldA, int *ipiv,
|
||||
float *Work, const int *ldWork, int *info
|
||||
) {
|
||||
|
||||
// top recursion level?
|
||||
const int top = *n_full == *n;
|
||||
|
||||
if (*n <= MAX(CROSSOVER_CHETRF, 3)) {
|
||||
// Unblocked
|
||||
if (top) {
|
||||
LAPACK(chetf2)(uplo, n, A, ldA, ipiv, info);
|
||||
*n_out = *n;
|
||||
} else
|
||||
RELAPACK_chetrf_rec2(uplo, n_full, n, n_out, A, ldA, ipiv, Work, ldWork, info);
|
||||
return;
|
||||
}
|
||||
|
||||
int info1, info2;
|
||||
|
||||
// Constants
|
||||
const float ONE[] = { 1., 0. };
|
||||
const float MONE[] = { -1., 0. };
|
||||
const int iONE[] = { 1 };
|
||||
|
||||
const int n_rest = *n_full - *n;
|
||||
|
||||
if (*uplo == 'L') {
|
||||
// Splitting (setup)
|
||||
int n1 = CREC_SPLIT(*n);
|
||||
int n2 = *n - n1;
|
||||
|
||||
// Work_L *
|
||||
float *const Work_L = Work;
|
||||
|
||||
// recursion(A_L)
|
||||
int n1_out;
|
||||
RELAPACK_chetrf_rec(uplo, n_full, &n1, &n1_out, A, ldA, ipiv, Work_L, ldWork, &info1);
|
||||
n1 = n1_out;
|
||||
|
||||
// Splitting (continued)
|
||||
n2 = *n - n1;
|
||||
const int n_full2 = *n_full - n1;
|
||||
|
||||
// * *
|
||||
// A_BL A_BR
|
||||
// A_BL_B A_BR_B
|
||||
float *const A_BL = A + 2 * n1;
|
||||
float *const A_BR = A + 2 * *ldA * n1 + 2 * n1;
|
||||
float *const A_BL_B = A + 2 * *n;
|
||||
float *const A_BR_B = A + 2 * *ldA * n1 + 2 * *n;
|
||||
|
||||
// * *
|
||||
// Work_BL Work_BR
|
||||
// * *
|
||||
// (top recursion level: use Work as Work_BR)
|
||||
float *const Work_BL = Work + 2 * n1;
|
||||
float *const Work_BR = top ? Work : Work + 2 * *ldWork * n1 + 2 * n1;
|
||||
const int ldWork_BR = top ? n2 : *ldWork;
|
||||
|
||||
// ipiv_T
|
||||
// ipiv_B
|
||||
int *const ipiv_B = ipiv + n1;
|
||||
|
||||
// A_BR = A_BR - A_BL Work_BL'
|
||||
RELAPACK_cgemmt(uplo, "N", "T", &n2, &n1, MONE, A_BL, ldA, Work_BL, ldWork, ONE, A_BR, ldA);
|
||||
BLAS(cgemm)("N", "T", &n_rest, &n2, &n1, MONE, A_BL_B, ldA, Work_BL, ldWork, ONE, A_BR_B, ldA);
|
||||
|
||||
// recursion(A_BR)
|
||||
int n2_out;
|
||||
RELAPACK_chetrf_rec(uplo, &n_full2, &n2, &n2_out, A_BR, ldA, ipiv_B, Work_BR, &ldWork_BR, &info2);
|
||||
|
||||
if (n2_out != n2) {
|
||||
// undo 1 column of updates
|
||||
const int n_restp1 = n_rest + 1;
|
||||
|
||||
// last column of A_BR
|
||||
float *const A_BR_r = A_BR + 2 * *ldA * n2_out + 2 * n2_out;
|
||||
|
||||
// last row of A_BL
|
||||
float *const A_BL_b = A_BL + 2 * n2_out;
|
||||
|
||||
// last row of Work_BL
|
||||
float *const Work_BL_b = Work_BL + 2 * n2_out;
|
||||
|
||||
// A_BR_r = A_BR_r + A_BL_b Work_BL_b'
|
||||
BLAS(cgemv)("N", &n_restp1, &n1, ONE, A_BL_b, ldA, Work_BL_b, ldWork, ONE, A_BR_r, iONE);
|
||||
}
|
||||
n2 = n2_out;
|
||||
|
||||
// shift pivots
|
||||
int i;
|
||||
for (i = 0; i < n2; i++)
|
||||
if (ipiv_B[i] > 0)
|
||||
ipiv_B[i] += n1;
|
||||
else
|
||||
ipiv_B[i] -= n1;
|
||||
|
||||
*info = info1 || info2;
|
||||
*n_out = n1 + n2;
|
||||
} else {
|
||||
// Splitting (setup)
|
||||
int n2 = CREC_SPLIT(*n);
|
||||
int n1 = *n - n2;
|
||||
|
||||
// * Work_R
|
||||
// (top recursion level: use Work as Work_R)
|
||||
float *const Work_R = top ? Work : Work + 2 * *ldWork * n1;
|
||||
|
||||
// recursion(A_R)
|
||||
int n2_out;
|
||||
RELAPACK_chetrf_rec(uplo, n_full, &n2, &n2_out, A, ldA, ipiv, Work_R, ldWork, &info2);
|
||||
const int n2_diff = n2 - n2_out;
|
||||
n2 = n2_out;
|
||||
|
||||
// Splitting (continued)
|
||||
n1 = *n - n2;
|
||||
const int n_full1 = *n_full - n2;
|
||||
|
||||
// * A_TL_T A_TR_T
|
||||
// * A_TL A_TR
|
||||
// * * *
|
||||
float *const A_TL_T = A + 2 * *ldA * n_rest;
|
||||
float *const A_TR_T = A + 2 * *ldA * (n_rest + n1);
|
||||
float *const A_TL = A + 2 * *ldA * n_rest + 2 * n_rest;
|
||||
float *const A_TR = A + 2 * *ldA * (n_rest + n1) + 2 * n_rest;
|
||||
|
||||
// Work_L *
|
||||
// * Work_TR
|
||||
// * *
|
||||
// (top recursion level: Work_R was Work)
|
||||
float *const Work_L = Work;
|
||||
float *const Work_TR = Work + 2 * *ldWork * (top ? n2_diff : n1) + 2 * n_rest;
|
||||
const int ldWork_L = top ? n1 : *ldWork;
|
||||
|
||||
// A_TL = A_TL - A_TR Work_TR'
|
||||
RELAPACK_cgemmt(uplo, "N", "T", &n1, &n2, MONE, A_TR, ldA, Work_TR, ldWork, ONE, A_TL, ldA);
|
||||
BLAS(cgemm)("N", "T", &n_rest, &n1, &n2, MONE, A_TR_T, ldA, Work_TR, ldWork, ONE, A_TL_T, ldA);
|
||||
|
||||
// recursion(A_TL)
|
||||
int n1_out;
|
||||
RELAPACK_chetrf_rec(uplo, &n_full1, &n1, &n1_out, A, ldA, ipiv, Work_L, &ldWork_L, &info1);
|
||||
|
||||
if (n1_out != n1) {
|
||||
// undo 1 column of updates
|
||||
const int n_restp1 = n_rest + 1;
|
||||
|
||||
// A_TL_T_l = A_TL_T_l + A_TR_T Work_TR_t'
|
||||
BLAS(cgemv)("N", &n_restp1, &n2, ONE, A_TR_T, ldA, Work_TR, ldWork, ONE, A_TL_T, iONE);
|
||||
}
|
||||
n1 = n1_out;
|
||||
|
||||
*info = info2 || info1;
|
||||
*n_out = n1 + n2;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,520 @@
|
|||
/* -- translated by f2c (version 20100827).
|
||||
You must link the resulting object file with libf2c:
|
||||
on Microsoft Windows system, link with libf2c.lib;
|
||||
on Linux or Unix systems, link with .../path/to/libf2c.a -lm
|
||||
or, if you install libf2c.a in a standard place, with -lf2c -lm
|
||||
-- in that order, at the end of the command line, as in
|
||||
cc *.o -lf2c -lm
|
||||
Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
|
||||
|
||||
http://www.netlib.org/f2c/libf2c.zip
|
||||
*/
|
||||
|
||||
#include "f2c.h"
|
||||
|
||||
/* Table of constant values */
|
||||
|
||||
static complex c_b1 = {1.f,0.f};
|
||||
static int c__1 = 1;
|
||||
|
||||
/** CHETRF_REC2 computes a partial factorization of a complex Hermitian indefinite matrix using the Bunch-Kau fman diagonal pivoting method
|
||||
*
|
||||
* This routine is a minor modification of LAPACK's clahef.
|
||||
* It serves as an unblocked kernel in the recursive algorithms.
|
||||
* The blocked BLAS Level 3 updates were removed and moved to the
|
||||
* recursive algorithm.
|
||||
* */
|
||||
/* Subroutine */ void RELAPACK_chetrf_rec2(char *uplo, int *n, int *
|
||||
nb, int *kb, complex *a, int *lda, int *ipiv, complex *w,
|
||||
int *ldw, int *info, ftnlen uplo_len)
|
||||
{
|
||||
/* System generated locals */
|
||||
int a_dim1, a_offset, w_dim1, w_offset, i__1, i__2, i__3, i__4;
|
||||
float r__1, r__2, r__3, r__4;
|
||||
complex q__1, q__2, q__3, q__4;
|
||||
|
||||
/* Builtin functions */
|
||||
double sqrt(double), r_imag(complex *);
|
||||
void r_cnjg(complex *, complex *), c_div(complex *, complex *, complex *);
|
||||
|
||||
/* Local variables */
|
||||
static int j, k;
|
||||
static float t, r1;
|
||||
static complex d11, d21, d22;
|
||||
static int jj, kk, jp, kp, kw, kkw, imax, jmax;
|
||||
static float alpha;
|
||||
extern logical lsame_(char *, char *, ftnlen, ftnlen);
|
||||
extern /* Subroutine */ int cgemv_(char *, int *, int *, complex *
|
||||
, complex *, int *, complex *, int *, complex *, complex *
|
||||
, int *, ftnlen), ccopy_(int *, complex *, int *,
|
||||
complex *, int *), cswap_(int *, complex *, int *,
|
||||
complex *, int *);
|
||||
static int kstep;
|
||||
static float absakk;
|
||||
extern /* Subroutine */ int clacgv_(int *, complex *, int *);
|
||||
extern int icamax_(int *, complex *, int *);
|
||||
extern /* Subroutine */ int csscal_(int *, float *, complex *, int
|
||||
*);
|
||||
static float colmax, rowmax;
|
||||
|
||||
/* Parameter adjustments */
|
||||
a_dim1 = *lda;
|
||||
a_offset = 1 + a_dim1;
|
||||
a -= a_offset;
|
||||
--ipiv;
|
||||
w_dim1 = *ldw;
|
||||
w_offset = 1 + w_dim1;
|
||||
w -= w_offset;
|
||||
|
||||
/* Function Body */
|
||||
*info = 0;
|
||||
alpha = (sqrt(17.f) + 1.f) / 8.f;
|
||||
if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
|
||||
k = *n;
|
||||
L10:
|
||||
kw = *nb + k - *n;
|
||||
if ((k <= *n - *nb + 1 && *nb < *n) || k < 1) {
|
||||
goto L30;
|
||||
}
|
||||
kstep = 1;
|
||||
i__1 = k - 1;
|
||||
ccopy_(&i__1, &a[k * a_dim1 + 1], &c__1, &w[kw * w_dim1 + 1], &c__1);
|
||||
i__1 = k + kw * w_dim1;
|
||||
i__2 = k + k * a_dim1;
|
||||
r__1 = a[i__2].r;
|
||||
w[i__1].r = r__1, w[i__1].i = 0.f;
|
||||
if (k < *n) {
|
||||
i__1 = *n - k;
|
||||
q__1.r = -1.f, q__1.i = -0.f;
|
||||
cgemv_("No transpose", &k, &i__1, &q__1, &a[(k + 1) * a_dim1 + 1],
|
||||
lda, &w[k + (kw + 1) * w_dim1], ldw, &c_b1, &w[kw *
|
||||
w_dim1 + 1], &c__1, (ftnlen)12);
|
||||
i__1 = k + kw * w_dim1;
|
||||
i__2 = k + kw * w_dim1;
|
||||
r__1 = w[i__2].r;
|
||||
w[i__1].r = r__1, w[i__1].i = 0.f;
|
||||
}
|
||||
i__1 = k + kw * w_dim1;
|
||||
absakk = (r__1 = w[i__1].r, dabs(r__1));
|
||||
if (k > 1) {
|
||||
i__1 = k - 1;
|
||||
imax = icamax_(&i__1, &w[kw * w_dim1 + 1], &c__1);
|
||||
i__1 = imax + kw * w_dim1;
|
||||
colmax = (r__1 = w[i__1].r, dabs(r__1)) + (r__2 = r_imag(&w[imax
|
||||
+ kw * w_dim1]), dabs(r__2));
|
||||
} else {
|
||||
colmax = 0.f;
|
||||
}
|
||||
if (dmax(absakk,colmax) == 0.f) {
|
||||
if (*info == 0) {
|
||||
*info = k;
|
||||
}
|
||||
kp = k;
|
||||
i__1 = k + k * a_dim1;
|
||||
i__2 = k + k * a_dim1;
|
||||
r__1 = a[i__2].r;
|
||||
a[i__1].r = r__1, a[i__1].i = 0.f;
|
||||
} else {
|
||||
if (absakk >= alpha * colmax) {
|
||||
kp = k;
|
||||
} else {
|
||||
i__1 = imax - 1;
|
||||
ccopy_(&i__1, &a[imax * a_dim1 + 1], &c__1, &w[(kw - 1) *
|
||||
w_dim1 + 1], &c__1);
|
||||
i__1 = imax + (kw - 1) * w_dim1;
|
||||
i__2 = imax + imax * a_dim1;
|
||||
r__1 = a[i__2].r;
|
||||
w[i__1].r = r__1, w[i__1].i = 0.f;
|
||||
i__1 = k - imax;
|
||||
ccopy_(&i__1, &a[imax + (imax + 1) * a_dim1], lda, &w[imax +
|
||||
1 + (kw - 1) * w_dim1], &c__1);
|
||||
i__1 = k - imax;
|
||||
clacgv_(&i__1, &w[imax + 1 + (kw - 1) * w_dim1], &c__1);
|
||||
if (k < *n) {
|
||||
i__1 = *n - k;
|
||||
q__1.r = -1.f, q__1.i = -0.f;
|
||||
cgemv_("No transpose", &k, &i__1, &q__1, &a[(k + 1) *
|
||||
a_dim1 + 1], lda, &w[imax + (kw + 1) * w_dim1],
|
||||
ldw, &c_b1, &w[(kw - 1) * w_dim1 + 1], &c__1, (
|
||||
ftnlen)12);
|
||||
i__1 = imax + (kw - 1) * w_dim1;
|
||||
i__2 = imax + (kw - 1) * w_dim1;
|
||||
r__1 = w[i__2].r;
|
||||
w[i__1].r = r__1, w[i__1].i = 0.f;
|
||||
}
|
||||
i__1 = k - imax;
|
||||
jmax = imax + icamax_(&i__1, &w[imax + 1 + (kw - 1) * w_dim1],
|
||||
&c__1);
|
||||
i__1 = jmax + (kw - 1) * w_dim1;
|
||||
rowmax = (r__1 = w[i__1].r, dabs(r__1)) + (r__2 = r_imag(&w[
|
||||
jmax + (kw - 1) * w_dim1]), dabs(r__2));
|
||||
if (imax > 1) {
|
||||
i__1 = imax - 1;
|
||||
jmax = icamax_(&i__1, &w[(kw - 1) * w_dim1 + 1], &c__1);
|
||||
/* Computing MAX */
|
||||
i__1 = jmax + (kw - 1) * w_dim1;
|
||||
r__3 = rowmax, r__4 = (r__1 = w[i__1].r, dabs(r__1)) + (
|
||||
r__2 = r_imag(&w[jmax + (kw - 1) * w_dim1]), dabs(
|
||||
r__2));
|
||||
rowmax = dmax(r__3,r__4);
|
||||
}
|
||||
if (absakk >= alpha * colmax * (colmax / rowmax)) {
|
||||
kp = k;
|
||||
} else /* if(complicated condition) */ {
|
||||
i__1 = imax + (kw - 1) * w_dim1;
|
||||
if ((r__1 = w[i__1].r, dabs(r__1)) >= alpha * rowmax) {
|
||||
kp = imax;
|
||||
ccopy_(&k, &w[(kw - 1) * w_dim1 + 1], &c__1, &w[kw *
|
||||
w_dim1 + 1], &c__1);
|
||||
} else {
|
||||
kp = imax;
|
||||
kstep = 2;
|
||||
}
|
||||
}
|
||||
}
|
||||
kk = k - kstep + 1;
|
||||
kkw = *nb + kk - *n;
|
||||
if (kp != kk) {
|
||||
i__1 = kp + kp * a_dim1;
|
||||
i__2 = kk + kk * a_dim1;
|
||||
r__1 = a[i__2].r;
|
||||
a[i__1].r = r__1, a[i__1].i = 0.f;
|
||||
i__1 = kk - 1 - kp;
|
||||
ccopy_(&i__1, &a[kp + 1 + kk * a_dim1], &c__1, &a[kp + (kp +
|
||||
1) * a_dim1], lda);
|
||||
i__1 = kk - 1 - kp;
|
||||
clacgv_(&i__1, &a[kp + (kp + 1) * a_dim1], lda);
|
||||
if (kp > 1) {
|
||||
i__1 = kp - 1;
|
||||
ccopy_(&i__1, &a[kk * a_dim1 + 1], &c__1, &a[kp * a_dim1
|
||||
+ 1], &c__1);
|
||||
}
|
||||
if (k < *n) {
|
||||
i__1 = *n - k;
|
||||
cswap_(&i__1, &a[kk + (k + 1) * a_dim1], lda, &a[kp + (k
|
||||
+ 1) * a_dim1], lda);
|
||||
}
|
||||
i__1 = *n - kk + 1;
|
||||
cswap_(&i__1, &w[kk + kkw * w_dim1], ldw, &w[kp + kkw *
|
||||
w_dim1], ldw);
|
||||
}
|
||||
if (kstep == 1) {
|
||||
ccopy_(&k, &w[kw * w_dim1 + 1], &c__1, &a[k * a_dim1 + 1], &
|
||||
c__1);
|
||||
if (k > 1) {
|
||||
i__1 = k + k * a_dim1;
|
||||
r1 = 1.f / a[i__1].r;
|
||||
i__1 = k - 1;
|
||||
csscal_(&i__1, &r1, &a[k * a_dim1 + 1], &c__1);
|
||||
i__1 = k - 1;
|
||||
clacgv_(&i__1, &w[kw * w_dim1 + 1], &c__1);
|
||||
}
|
||||
} else {
|
||||
if (k > 2) {
|
||||
i__1 = k - 1 + kw * w_dim1;
|
||||
d21.r = w[i__1].r, d21.i = w[i__1].i;
|
||||
r_cnjg(&q__2, &d21);
|
||||
c_div(&q__1, &w[k + kw * w_dim1], &q__2);
|
||||
d11.r = q__1.r, d11.i = q__1.i;
|
||||
c_div(&q__1, &w[k - 1 + (kw - 1) * w_dim1], &d21);
|
||||
d22.r = q__1.r, d22.i = q__1.i;
|
||||
q__1.r = d11.r * d22.r - d11.i * d22.i, q__1.i = d11.r *
|
||||
d22.i + d11.i * d22.r;
|
||||
t = 1.f / (q__1.r - 1.f);
|
||||
q__2.r = t, q__2.i = 0.f;
|
||||
c_div(&q__1, &q__2, &d21);
|
||||
d21.r = q__1.r, d21.i = q__1.i;
|
||||
i__1 = k - 2;
|
||||
for (j = 1; j <= i__1; ++j) {
|
||||
i__2 = j + (k - 1) * a_dim1;
|
||||
i__3 = j + (kw - 1) * w_dim1;
|
||||
q__3.r = d11.r * w[i__3].r - d11.i * w[i__3].i,
|
||||
q__3.i = d11.r * w[i__3].i + d11.i * w[i__3]
|
||||
.r;
|
||||
i__4 = j + kw * w_dim1;
|
||||
q__2.r = q__3.r - w[i__4].r, q__2.i = q__3.i - w[i__4]
|
||||
.i;
|
||||
q__1.r = d21.r * q__2.r - d21.i * q__2.i, q__1.i =
|
||||
d21.r * q__2.i + d21.i * q__2.r;
|
||||
a[i__2].r = q__1.r, a[i__2].i = q__1.i;
|
||||
i__2 = j + k * a_dim1;
|
||||
r_cnjg(&q__2, &d21);
|
||||
i__3 = j + kw * w_dim1;
|
||||
q__4.r = d22.r * w[i__3].r - d22.i * w[i__3].i,
|
||||
q__4.i = d22.r * w[i__3].i + d22.i * w[i__3]
|
||||
.r;
|
||||
i__4 = j + (kw - 1) * w_dim1;
|
||||
q__3.r = q__4.r - w[i__4].r, q__3.i = q__4.i - w[i__4]
|
||||
.i;
|
||||
q__1.r = q__2.r * q__3.r - q__2.i * q__3.i, q__1.i =
|
||||
q__2.r * q__3.i + q__2.i * q__3.r;
|
||||
a[i__2].r = q__1.r, a[i__2].i = q__1.i;
|
||||
/* L20: */
|
||||
}
|
||||
}
|
||||
i__1 = k - 1 + (k - 1) * a_dim1;
|
||||
i__2 = k - 1 + (kw - 1) * w_dim1;
|
||||
a[i__1].r = w[i__2].r, a[i__1].i = w[i__2].i;
|
||||
i__1 = k - 1 + k * a_dim1;
|
||||
i__2 = k - 1 + kw * w_dim1;
|
||||
a[i__1].r = w[i__2].r, a[i__1].i = w[i__2].i;
|
||||
i__1 = k + k * a_dim1;
|
||||
i__2 = k + kw * w_dim1;
|
||||
a[i__1].r = w[i__2].r, a[i__1].i = w[i__2].i;
|
||||
i__1 = k - 1;
|
||||
clacgv_(&i__1, &w[kw * w_dim1 + 1], &c__1);
|
||||
i__1 = k - 2;
|
||||
clacgv_(&i__1, &w[(kw - 1) * w_dim1 + 1], &c__1);
|
||||
}
|
||||
}
|
||||
if (kstep == 1) {
|
||||
ipiv[k] = kp;
|
||||
} else {
|
||||
ipiv[k] = -kp;
|
||||
ipiv[k - 1] = -kp;
|
||||
}
|
||||
k -= kstep;
|
||||
goto L10;
|
||||
L30:
|
||||
j = k + 1;
|
||||
L60:
|
||||
jj = j;
|
||||
jp = ipiv[j];
|
||||
if (jp < 0) {
|
||||
jp = -jp;
|
||||
++j;
|
||||
}
|
||||
++j;
|
||||
if (jp != jj && j <= *n) {
|
||||
i__1 = *n - j + 1;
|
||||
cswap_(&i__1, &a[jp + j * a_dim1], lda, &a[jj + j * a_dim1], lda);
|
||||
}
|
||||
if (j <= *n) {
|
||||
goto L60;
|
||||
}
|
||||
*kb = *n - k;
|
||||
} else {
|
||||
k = 1;
|
||||
L70:
|
||||
if ((k >= *nb && *nb < *n) || k > *n) {
|
||||
goto L90;
|
||||
}
|
||||
kstep = 1;
|
||||
i__1 = k + k * w_dim1;
|
||||
i__2 = k + k * a_dim1;
|
||||
r__1 = a[i__2].r;
|
||||
w[i__1].r = r__1, w[i__1].i = 0.f;
|
||||
if (k < *n) {
|
||||
i__1 = *n - k;
|
||||
ccopy_(&i__1, &a[k + 1 + k * a_dim1], &c__1, &w[k + 1 + k *
|
||||
w_dim1], &c__1);
|
||||
}
|
||||
i__1 = *n - k + 1;
|
||||
i__2 = k - 1;
|
||||
q__1.r = -1.f, q__1.i = -0.f;
|
||||
cgemv_("No transpose", &i__1, &i__2, &q__1, &a[k + a_dim1], lda, &w[k
|
||||
+ w_dim1], ldw, &c_b1, &w[k + k * w_dim1], &c__1, (ftnlen)12);
|
||||
i__1 = k + k * w_dim1;
|
||||
i__2 = k + k * w_dim1;
|
||||
r__1 = w[i__2].r;
|
||||
w[i__1].r = r__1, w[i__1].i = 0.f;
|
||||
i__1 = k + k * w_dim1;
|
||||
absakk = (r__1 = w[i__1].r, dabs(r__1));
|
||||
if (k < *n) {
|
||||
i__1 = *n - k;
|
||||
imax = k + icamax_(&i__1, &w[k + 1 + k * w_dim1], &c__1);
|
||||
i__1 = imax + k * w_dim1;
|
||||
colmax = (r__1 = w[i__1].r, dabs(r__1)) + (r__2 = r_imag(&w[imax
|
||||
+ k * w_dim1]), dabs(r__2));
|
||||
} else {
|
||||
colmax = 0.f;
|
||||
}
|
||||
if (dmax(absakk,colmax) == 0.f) {
|
||||
if (*info == 0) {
|
||||
*info = k;
|
||||
}
|
||||
kp = k;
|
||||
i__1 = k + k * a_dim1;
|
||||
i__2 = k + k * a_dim1;
|
||||
r__1 = a[i__2].r;
|
||||
a[i__1].r = r__1, a[i__1].i = 0.f;
|
||||
} else {
|
||||
if (absakk >= alpha * colmax) {
|
||||
kp = k;
|
||||
} else {
|
||||
i__1 = imax - k;
|
||||
ccopy_(&i__1, &a[imax + k * a_dim1], lda, &w[k + (k + 1) *
|
||||
w_dim1], &c__1);
|
||||
i__1 = imax - k;
|
||||
clacgv_(&i__1, &w[k + (k + 1) * w_dim1], &c__1);
|
||||
i__1 = imax + (k + 1) * w_dim1;
|
||||
i__2 = imax + imax * a_dim1;
|
||||
r__1 = a[i__2].r;
|
||||
w[i__1].r = r__1, w[i__1].i = 0.f;
|
||||
if (imax < *n) {
|
||||
i__1 = *n - imax;
|
||||
ccopy_(&i__1, &a[imax + 1 + imax * a_dim1], &c__1, &w[
|
||||
imax + 1 + (k + 1) * w_dim1], &c__1);
|
||||
}
|
||||
i__1 = *n - k + 1;
|
||||
i__2 = k - 1;
|
||||
q__1.r = -1.f, q__1.i = -0.f;
|
||||
cgemv_("No transpose", &i__1, &i__2, &q__1, &a[k + a_dim1],
|
||||
lda, &w[imax + w_dim1], ldw, &c_b1, &w[k + (k + 1) *
|
||||
w_dim1], &c__1, (ftnlen)12);
|
||||
i__1 = imax + (k + 1) * w_dim1;
|
||||
i__2 = imax + (k + 1) * w_dim1;
|
||||
r__1 = w[i__2].r;
|
||||
w[i__1].r = r__1, w[i__1].i = 0.f;
|
||||
i__1 = imax - k;
|
||||
jmax = k - 1 + icamax_(&i__1, &w[k + (k + 1) * w_dim1], &c__1)
|
||||
;
|
||||
i__1 = jmax + (k + 1) * w_dim1;
|
||||
rowmax = (r__1 = w[i__1].r, dabs(r__1)) + (r__2 = r_imag(&w[
|
||||
jmax + (k + 1) * w_dim1]), dabs(r__2));
|
||||
if (imax < *n) {
|
||||
i__1 = *n - imax;
|
||||
jmax = imax + icamax_(&i__1, &w[imax + 1 + (k + 1) *
|
||||
w_dim1], &c__1);
|
||||
/* Computing MAX */
|
||||
i__1 = jmax + (k + 1) * w_dim1;
|
||||
r__3 = rowmax, r__4 = (r__1 = w[i__1].r, dabs(r__1)) + (
|
||||
r__2 = r_imag(&w[jmax + (k + 1) * w_dim1]), dabs(
|
||||
r__2));
|
||||
rowmax = dmax(r__3,r__4);
|
||||
}
|
||||
if (absakk >= alpha * colmax * (colmax / rowmax)) {
|
||||
kp = k;
|
||||
} else /* if(complicated condition) */ {
|
||||
i__1 = imax + (k + 1) * w_dim1;
|
||||
if ((r__1 = w[i__1].r, dabs(r__1)) >= alpha * rowmax) {
|
||||
kp = imax;
|
||||
i__1 = *n - k + 1;
|
||||
ccopy_(&i__1, &w[k + (k + 1) * w_dim1], &c__1, &w[k +
|
||||
k * w_dim1], &c__1);
|
||||
} else {
|
||||
kp = imax;
|
||||
kstep = 2;
|
||||
}
|
||||
}
|
||||
}
|
||||
kk = k + kstep - 1;
|
||||
if (kp != kk) {
|
||||
i__1 = kp + kp * a_dim1;
|
||||
i__2 = kk + kk * a_dim1;
|
||||
r__1 = a[i__2].r;
|
||||
a[i__1].r = r__1, a[i__1].i = 0.f;
|
||||
i__1 = kp - kk - 1;
|
||||
ccopy_(&i__1, &a[kk + 1 + kk * a_dim1], &c__1, &a[kp + (kk +
|
||||
1) * a_dim1], lda);
|
||||
i__1 = kp - kk - 1;
|
||||
clacgv_(&i__1, &a[kp + (kk + 1) * a_dim1], lda);
|
||||
if (kp < *n) {
|
||||
i__1 = *n - kp;
|
||||
ccopy_(&i__1, &a[kp + 1 + kk * a_dim1], &c__1, &a[kp + 1
|
||||
+ kp * a_dim1], &c__1);
|
||||
}
|
||||
if (k > 1) {
|
||||
i__1 = k - 1;
|
||||
cswap_(&i__1, &a[kk + a_dim1], lda, &a[kp + a_dim1], lda);
|
||||
}
|
||||
cswap_(&kk, &w[kk + w_dim1], ldw, &w[kp + w_dim1], ldw);
|
||||
}
|
||||
if (kstep == 1) {
|
||||
i__1 = *n - k + 1;
|
||||
ccopy_(&i__1, &w[k + k * w_dim1], &c__1, &a[k + k * a_dim1], &
|
||||
c__1);
|
||||
if (k < *n) {
|
||||
i__1 = k + k * a_dim1;
|
||||
r1 = 1.f / a[i__1].r;
|
||||
i__1 = *n - k;
|
||||
csscal_(&i__1, &r1, &a[k + 1 + k * a_dim1], &c__1);
|
||||
i__1 = *n - k;
|
||||
clacgv_(&i__1, &w[k + 1 + k * w_dim1], &c__1);
|
||||
}
|
||||
} else {
|
||||
if (k < *n - 1) {
|
||||
i__1 = k + 1 + k * w_dim1;
|
||||
d21.r = w[i__1].r, d21.i = w[i__1].i;
|
||||
c_div(&q__1, &w[k + 1 + (k + 1) * w_dim1], &d21);
|
||||
d11.r = q__1.r, d11.i = q__1.i;
|
||||
r_cnjg(&q__2, &d21);
|
||||
c_div(&q__1, &w[k + k * w_dim1], &q__2);
|
||||
d22.r = q__1.r, d22.i = q__1.i;
|
||||
q__1.r = d11.r * d22.r - d11.i * d22.i, q__1.i = d11.r *
|
||||
d22.i + d11.i * d22.r;
|
||||
t = 1.f / (q__1.r - 1.f);
|
||||
q__2.r = t, q__2.i = 0.f;
|
||||
c_div(&q__1, &q__2, &d21);
|
||||
d21.r = q__1.r, d21.i = q__1.i;
|
||||
i__1 = *n;
|
||||
for (j = k + 2; j <= i__1; ++j) {
|
||||
i__2 = j + k * a_dim1;
|
||||
r_cnjg(&q__2, &d21);
|
||||
i__3 = j + k * w_dim1;
|
||||
q__4.r = d11.r * w[i__3].r - d11.i * w[i__3].i,
|
||||
q__4.i = d11.r * w[i__3].i + d11.i * w[i__3]
|
||||
.r;
|
||||
i__4 = j + (k + 1) * w_dim1;
|
||||
q__3.r = q__4.r - w[i__4].r, q__3.i = q__4.i - w[i__4]
|
||||
.i;
|
||||
q__1.r = q__2.r * q__3.r - q__2.i * q__3.i, q__1.i =
|
||||
q__2.r * q__3.i + q__2.i * q__3.r;
|
||||
a[i__2].r = q__1.r, a[i__2].i = q__1.i;
|
||||
i__2 = j + (k + 1) * a_dim1;
|
||||
i__3 = j + (k + 1) * w_dim1;
|
||||
q__3.r = d22.r * w[i__3].r - d22.i * w[i__3].i,
|
||||
q__3.i = d22.r * w[i__3].i + d22.i * w[i__3]
|
||||
.r;
|
||||
i__4 = j + k * w_dim1;
|
||||
q__2.r = q__3.r - w[i__4].r, q__2.i = q__3.i - w[i__4]
|
||||
.i;
|
||||
q__1.r = d21.r * q__2.r - d21.i * q__2.i, q__1.i =
|
||||
d21.r * q__2.i + d21.i * q__2.r;
|
||||
a[i__2].r = q__1.r, a[i__2].i = q__1.i;
|
||||
/* L80: */
|
||||
}
|
||||
}
|
||||
i__1 = k + k * a_dim1;
|
||||
i__2 = k + k * w_dim1;
|
||||
a[i__1].r = w[i__2].r, a[i__1].i = w[i__2].i;
|
||||
i__1 = k + 1 + k * a_dim1;
|
||||
i__2 = k + 1 + k * w_dim1;
|
||||
a[i__1].r = w[i__2].r, a[i__1].i = w[i__2].i;
|
||||
i__1 = k + 1 + (k + 1) * a_dim1;
|
||||
i__2 = k + 1 + (k + 1) * w_dim1;
|
||||
a[i__1].r = w[i__2].r, a[i__1].i = w[i__2].i;
|
||||
i__1 = *n - k;
|
||||
clacgv_(&i__1, &w[k + 1 + k * w_dim1], &c__1);
|
||||
i__1 = *n - k - 1;
|
||||
clacgv_(&i__1, &w[k + 2 + (k + 1) * w_dim1], &c__1);
|
||||
}
|
||||
}
|
||||
if (kstep == 1) {
|
||||
ipiv[k] = kp;
|
||||
} else {
|
||||
ipiv[k] = -kp;
|
||||
ipiv[k + 1] = -kp;
|
||||
}
|
||||
k += kstep;
|
||||
goto L70;
|
||||
L90:
|
||||
j = k - 1;
|
||||
L120:
|
||||
jj = j;
|
||||
jp = ipiv[j];
|
||||
if (jp < 0) {
|
||||
jp = -jp;
|
||||
--j;
|
||||
}
|
||||
--j;
|
||||
if (jp != jj && j >= 1) {
|
||||
cswap_(&j, &a[jp + a_dim1], lda, &a[jj + a_dim1], lda);
|
||||
}
|
||||
if (j >= 1) {
|
||||
goto L120;
|
||||
}
|
||||
*kb = k - 1;
|
||||
}
|
||||
return;
|
||||
}
|
|
@ -0,0 +1,236 @@
|
|||
#include "relapack.h"
|
||||
#if XSYTRF_ALLOW_MALLOC
|
||||
#include <stdlib.h>
|
||||
#endif
|
||||
|
||||
static void RELAPACK_chetrf_rook_rec(const char *, const int *, const int *, int *,
|
||||
float *, const int *, int *, float *, const int *, int *);
|
||||
|
||||
|
||||
/** CHETRF_ROOK computes the factorization of a complex Hermitian indefinite matrix using the bounded Bunch-Kaufman ("rook") diagonal pivoting method.
|
||||
*
|
||||
* This routine is functionally equivalent to LAPACK's chetrf_rook.
|
||||
* For details on its interface, see
|
||||
* http://www.netlib.org/lapack/explore-html/d0/d5e/chetrf__rook_8f.html
|
||||
* */
|
||||
void RELAPACK_chetrf_rook(
|
||||
const char *uplo, const int *n,
|
||||
float *A, const int *ldA, int *ipiv,
|
||||
float *Work, const int *lWork, int *info
|
||||
) {
|
||||
|
||||
// Required work size
|
||||
const int cleanlWork = *n * (*n / 2);
|
||||
int minlWork = cleanlWork;
|
||||
#if XSYTRF_ALLOW_MALLOC
|
||||
minlWork = 1;
|
||||
#endif
|
||||
|
||||
// Check arguments
|
||||
const int lower = LAPACK(lsame)(uplo, "L");
|
||||
const int upper = LAPACK(lsame)(uplo, "U");
|
||||
*info = 0;
|
||||
if (!lower && !upper)
|
||||
*info = -1;
|
||||
else if (*n < 0)
|
||||
*info = -2;
|
||||
else if (*ldA < MAX(1, *n))
|
||||
*info = -4;
|
||||
else if (*lWork < minlWork && *lWork != -1)
|
||||
*info = -7;
|
||||
else if (*lWork == -1) {
|
||||
// Work size query
|
||||
*Work = cleanlWork;
|
||||
return;
|
||||
}
|
||||
|
||||
// Ensure Work size
|
||||
float *cleanWork = Work;
|
||||
#if XSYTRF_ALLOW_MALLOC
|
||||
if (!*info && *lWork < cleanlWork) {
|
||||
cleanWork = malloc(cleanlWork * 2 * sizeof(float));
|
||||
if (!cleanWork)
|
||||
*info = -7;
|
||||
}
|
||||
#endif
|
||||
|
||||
if (*info) {
|
||||
const int minfo = -*info;
|
||||
LAPACK(xerbla)("CHETRF", &minfo);
|
||||
return;
|
||||
}
|
||||
|
||||
// Clean char * arguments
|
||||
const char cleanuplo = lower ? 'L' : 'U';
|
||||
|
||||
// Dummy argument
|
||||
int nout;
|
||||
|
||||
// Recursive kernel
|
||||
RELAPACK_chetrf_rook_rec(&cleanuplo, n, n, &nout, A, ldA, ipiv, cleanWork, n, info);
|
||||
|
||||
#if XSYTRF_ALLOW_MALLOC
|
||||
if (cleanWork != Work)
|
||||
free(cleanWork);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
/** chetrf_rook's recursive compute kernel */
|
||||
static void RELAPACK_chetrf_rook_rec(
|
||||
const char *uplo, const int *n_full, const int *n, int *n_out,
|
||||
float *A, const int *ldA, int *ipiv,
|
||||
float *Work, const int *ldWork, int *info
|
||||
) {
|
||||
|
||||
// top recursion level?
|
||||
const int top = *n_full == *n;
|
||||
|
||||
if (*n <= MAX(CROSSOVER_CHETRF, 3)) {
|
||||
// Unblocked
|
||||
if (top) {
|
||||
LAPACK(chetf2)(uplo, n, A, ldA, ipiv, info);
|
||||
*n_out = *n;
|
||||
} else
|
||||
RELAPACK_chetrf_rook_rec2(uplo, n_full, n, n_out, A, ldA, ipiv, Work, ldWork, info);
|
||||
return;
|
||||
}
|
||||
|
||||
int info1, info2;
|
||||
|
||||
// Constants
|
||||
const float ONE[] = { 1., 0. };
|
||||
const float MONE[] = { -1., 0. };
|
||||
const int iONE[] = { 1 };
|
||||
|
||||
const int n_rest = *n_full - *n;
|
||||
|
||||
if (*uplo == 'L') {
|
||||
// Splitting (setup)
|
||||
int n1 = CREC_SPLIT(*n);
|
||||
int n2 = *n - n1;
|
||||
|
||||
// Work_L *
|
||||
float *const Work_L = Work;
|
||||
|
||||
// recursion(A_L)
|
||||
int n1_out;
|
||||
RELAPACK_chetrf_rook_rec(uplo, n_full, &n1, &n1_out, A, ldA, ipiv, Work_L, ldWork, &info1);
|
||||
n1 = n1_out;
|
||||
|
||||
// Splitting (continued)
|
||||
n2 = *n - n1;
|
||||
const int n_full2 = *n_full - n1;
|
||||
|
||||
// * *
|
||||
// A_BL A_BR
|
||||
// A_BL_B A_BR_B
|
||||
float *const A_BL = A + 2 * n1;
|
||||
float *const A_BR = A + 2 * *ldA * n1 + 2 * n1;
|
||||
float *const A_BL_B = A + 2 * *n;
|
||||
float *const A_BR_B = A + 2 * *ldA * n1 + 2 * *n;
|
||||
|
||||
// * *
|
||||
// Work_BL Work_BR
|
||||
// * *
|
||||
// (top recursion level: use Work as Work_BR)
|
||||
float *const Work_BL = Work + 2 * n1;
|
||||
float *const Work_BR = top ? Work : Work + 2 * *ldWork * n1 + 2 * n1;
|
||||
const int ldWork_BR = top ? n2 : *ldWork;
|
||||
|
||||
// ipiv_T
|
||||
// ipiv_B
|
||||
int *const ipiv_B = ipiv + n1;
|
||||
|
||||
// A_BR = A_BR - A_BL Work_BL'
|
||||
RELAPACK_cgemmt(uplo, "N", "T", &n2, &n1, MONE, A_BL, ldA, Work_BL, ldWork, ONE, A_BR, ldA);
|
||||
BLAS(cgemm)("N", "T", &n_rest, &n2, &n1, MONE, A_BL_B, ldA, Work_BL, ldWork, ONE, A_BR_B, ldA);
|
||||
|
||||
// recursion(A_BR)
|
||||
int n2_out;
|
||||
RELAPACK_chetrf_rook_rec(uplo, &n_full2, &n2, &n2_out, A_BR, ldA, ipiv_B, Work_BR, &ldWork_BR, &info2);
|
||||
|
||||
if (n2_out != n2) {
|
||||
// undo 1 column of updates
|
||||
const int n_restp1 = n_rest + 1;
|
||||
|
||||
// last column of A_BR
|
||||
float *const A_BR_r = A_BR + 2 * *ldA * n2_out + 2 * n2_out;
|
||||
|
||||
// last row of A_BL
|
||||
float *const A_BL_b = A_BL + 2 * n2_out;
|
||||
|
||||
// last row of Work_BL
|
||||
float *const Work_BL_b = Work_BL + 2 * n2_out;
|
||||
|
||||
// A_BR_r = A_BR_r + A_BL_b Work_BL_b'
|
||||
BLAS(cgemv)("N", &n_restp1, &n1, ONE, A_BL_b, ldA, Work_BL_b, ldWork, ONE, A_BR_r, iONE);
|
||||
}
|
||||
n2 = n2_out;
|
||||
|
||||
// shift pivots
|
||||
int i;
|
||||
for (i = 0; i < n2; i++)
|
||||
if (ipiv_B[i] > 0)
|
||||
ipiv_B[i] += n1;
|
||||
else
|
||||
ipiv_B[i] -= n1;
|
||||
|
||||
*info = info1 || info2;
|
||||
*n_out = n1 + n2;
|
||||
} else {
|
||||
// Splitting (setup)
|
||||
int n2 = CREC_SPLIT(*n);
|
||||
int n1 = *n - n2;
|
||||
|
||||
// * Work_R
|
||||
// (top recursion level: use Work as Work_R)
|
||||
float *const Work_R = top ? Work : Work + 2 * *ldWork * n1;
|
||||
|
||||
// recursion(A_R)
|
||||
int n2_out;
|
||||
RELAPACK_chetrf_rook_rec(uplo, n_full, &n2, &n2_out, A, ldA, ipiv, Work_R, ldWork, &info2);
|
||||
const int n2_diff = n2 - n2_out;
|
||||
n2 = n2_out;
|
||||
|
||||
// Splitting (continued)
|
||||
n1 = *n - n2;
|
||||
const int n_full1 = *n_full - n2;
|
||||
|
||||
// * A_TL_T A_TR_T
|
||||
// * A_TL A_TR
|
||||
// * * *
|
||||
float *const A_TL_T = A + 2 * *ldA * n_rest;
|
||||
float *const A_TR_T = A + 2 * *ldA * (n_rest + n1);
|
||||
float *const A_TL = A + 2 * *ldA * n_rest + 2 * n_rest;
|
||||
float *const A_TR = A + 2 * *ldA * (n_rest + n1) + 2 * n_rest;
|
||||
|
||||
// Work_L *
|
||||
// * Work_TR
|
||||
// * *
|
||||
// (top recursion level: Work_R was Work)
|
||||
float *const Work_L = Work;
|
||||
float *const Work_TR = Work + 2 * *ldWork * (top ? n2_diff : n1) + 2 * n_rest;
|
||||
const int ldWork_L = top ? n1 : *ldWork;
|
||||
|
||||
// A_TL = A_TL - A_TR Work_TR'
|
||||
RELAPACK_cgemmt(uplo, "N", "T", &n1, &n2, MONE, A_TR, ldA, Work_TR, ldWork, ONE, A_TL, ldA);
|
||||
BLAS(cgemm)("N", "T", &n_rest, &n1, &n2, MONE, A_TR_T, ldA, Work_TR, ldWork, ONE, A_TL_T, ldA);
|
||||
|
||||
// recursion(A_TL)
|
||||
int n1_out;
|
||||
RELAPACK_chetrf_rook_rec(uplo, &n_full1, &n1, &n1_out, A, ldA, ipiv, Work_L, &ldWork_L, &info1);
|
||||
|
||||
if (n1_out != n1) {
|
||||
// undo 1 column of updates
|
||||
const int n_restp1 = n_rest + 1;
|
||||
|
||||
// A_TL_T_l = A_TL_T_l + A_TR_T Work_TR_t'
|
||||
BLAS(cgemv)("N", &n_restp1, &n2, ONE, A_TR_T, ldA, Work_TR, ldWork, ONE, A_TL_T, iONE);
|
||||
}
|
||||
n1 = n1_out;
|
||||
|
||||
*info = info2 || info1;
|
||||
*n_out = n1 + n2;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,661 @@
|
|||
/* -- translated by f2c (version 20100827).
|
||||
You must link the resulting object file with libf2c:
|
||||
on Microsoft Windows system, link with libf2c.lib;
|
||||
on Linux or Unix systems, link with .../path/to/libf2c.a -lm
|
||||
or, if you install libf2c.a in a standard place, with -lf2c -lm
|
||||
-- in that order, at the end of the command line, as in
|
||||
cc *.o -lf2c -lm
|
||||
Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
|
||||
|
||||
http://www.netlib.org/f2c/libf2c.zip
|
||||
*/
|
||||
|
||||
#include "f2c.h"
|
||||
|
||||
/* Table of constant values */
|
||||
|
||||
static complex c_b1 = {1.f,0.f};
|
||||
static int c__1 = 1;
|
||||
|
||||
/** CHETRF_ROOK_REC2 computes a partial factorization of a complex Hermitian indefinite matrix using the boun ded Bunch-Kaufman ("rook") diagonal pivoting method
|
||||
*
|
||||
* This routine is a minor modification of LAPACK's clahef_rook.
|
||||
* It serves as an unblocked kernel in the recursive algorithms.
|
||||
* The blocked BLAS Level 3 updates were removed and moved to the
|
||||
* recursive algorithm.
|
||||
* */
|
||||
/* Subroutine */ void RELAPACK_chetrf_rook_rec2(char *uplo, int *n,
|
||||
int *nb, int *kb, complex *a, int *lda, int *ipiv,
|
||||
complex *w, int *ldw, int *info, ftnlen uplo_len)
|
||||
{
|
||||
/* System generated locals */
|
||||
int a_dim1, a_offset, w_dim1, w_offset, i__1, i__2, i__3, i__4;
|
||||
float r__1, r__2;
|
||||
complex q__1, q__2, q__3, q__4, q__5;
|
||||
|
||||
/* Builtin functions */
|
||||
double sqrt(double), r_imag(complex *);
|
||||
void r_cnjg(complex *, complex *), c_div(complex *, complex *, complex *);
|
||||
|
||||
/* Local variables */
|
||||
static int j, k, p;
|
||||
static float t, r1;
|
||||
static complex d11, d21, d22;
|
||||
static int ii, jj, kk, kp, kw, jp1, jp2, kkw;
|
||||
static logical done;
|
||||
static int imax, jmax;
|
||||
static float alpha;
|
||||
extern logical lsame_(char *, char *, ftnlen, ftnlen);
|
||||
extern /* Subroutine */ int cgemv_(char *, int *, int *, complex *
|
||||
, complex *, int *, complex *, int *, complex *, complex *
|
||||
, int *, ftnlen);
|
||||
static float sfmin;
|
||||
extern /* Subroutine */ int ccopy_(int *, complex *, int *,
|
||||
complex *, int *);
|
||||
static int itemp;
|
||||
extern /* Subroutine */ int cswap_(int *, complex *, int *,
|
||||
complex *, int *);
|
||||
static int kstep;
|
||||
static float stemp, absakk;
|
||||
extern /* Subroutine */ int clacgv_(int *, complex *, int *);
|
||||
extern int icamax_(int *, complex *, int *);
|
||||
extern double slamch_(char *, ftnlen);
|
||||
extern /* Subroutine */ int csscal_(int *, float *, complex *, int
|
||||
*);
|
||||
static float colmax, rowmax;
|
||||
|
||||
/* Parameter adjustments */
|
||||
a_dim1 = *lda;
|
||||
a_offset = 1 + a_dim1;
|
||||
a -= a_offset;
|
||||
--ipiv;
|
||||
w_dim1 = *ldw;
|
||||
w_offset = 1 + w_dim1;
|
||||
w -= w_offset;
|
||||
|
||||
/* Function Body */
|
||||
*info = 0;
|
||||
alpha = (sqrt(17.f) + 1.f) / 8.f;
|
||||
sfmin = slamch_("S", (ftnlen)1);
|
||||
if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
|
||||
k = *n;
|
||||
L10:
|
||||
kw = *nb + k - *n;
|
||||
if ((k <= *n - *nb + 1 && *nb < *n) || k < 1) {
|
||||
goto L30;
|
||||
}
|
||||
kstep = 1;
|
||||
p = k;
|
||||
if (k > 1) {
|
||||
i__1 = k - 1;
|
||||
ccopy_(&i__1, &a[k * a_dim1 + 1], &c__1, &w[kw * w_dim1 + 1], &
|
||||
c__1);
|
||||
}
|
||||
i__1 = k + kw * w_dim1;
|
||||
i__2 = k + k * a_dim1;
|
||||
r__1 = a[i__2].r;
|
||||
w[i__1].r = r__1, w[i__1].i = 0.f;
|
||||
if (k < *n) {
|
||||
i__1 = *n - k;
|
||||
q__1.r = -1.f, q__1.i = -0.f;
|
||||
cgemv_("No transpose", &k, &i__1, &q__1, &a[(k + 1) * a_dim1 + 1],
|
||||
lda, &w[k + (kw + 1) * w_dim1], ldw, &c_b1, &w[kw *
|
||||
w_dim1 + 1], &c__1, (ftnlen)12);
|
||||
i__1 = k + kw * w_dim1;
|
||||
i__2 = k + kw * w_dim1;
|
||||
r__1 = w[i__2].r;
|
||||
w[i__1].r = r__1, w[i__1].i = 0.f;
|
||||
}
|
||||
i__1 = k + kw * w_dim1;
|
||||
absakk = (r__1 = w[i__1].r, dabs(r__1));
|
||||
if (k > 1) {
|
||||
i__1 = k - 1;
|
||||
imax = icamax_(&i__1, &w[kw * w_dim1 + 1], &c__1);
|
||||
i__1 = imax + kw * w_dim1;
|
||||
colmax = (r__1 = w[i__1].r, dabs(r__1)) + (r__2 = r_imag(&w[imax
|
||||
+ kw * w_dim1]), dabs(r__2));
|
||||
} else {
|
||||
colmax = 0.f;
|
||||
}
|
||||
if (dmax(absakk,colmax) == 0.f) {
|
||||
if (*info == 0) {
|
||||
*info = k;
|
||||
}
|
||||
kp = k;
|
||||
i__1 = k + k * a_dim1;
|
||||
i__2 = k + kw * w_dim1;
|
||||
r__1 = w[i__2].r;
|
||||
a[i__1].r = r__1, a[i__1].i = 0.f;
|
||||
if (k > 1) {
|
||||
i__1 = k - 1;
|
||||
ccopy_(&i__1, &w[kw * w_dim1 + 1], &c__1, &a[k * a_dim1 + 1],
|
||||
&c__1);
|
||||
}
|
||||
} else {
|
||||
if (! (absakk < alpha * colmax)) {
|
||||
kp = k;
|
||||
} else {
|
||||
done = FALSE_;
|
||||
L12:
|
||||
if (imax > 1) {
|
||||
i__1 = imax - 1;
|
||||
ccopy_(&i__1, &a[imax * a_dim1 + 1], &c__1, &w[(kw - 1) *
|
||||
w_dim1 + 1], &c__1);
|
||||
}
|
||||
i__1 = imax + (kw - 1) * w_dim1;
|
||||
i__2 = imax + imax * a_dim1;
|
||||
r__1 = a[i__2].r;
|
||||
w[i__1].r = r__1, w[i__1].i = 0.f;
|
||||
i__1 = k - imax;
|
||||
ccopy_(&i__1, &a[imax + (imax + 1) * a_dim1], lda, &w[imax +
|
||||
1 + (kw - 1) * w_dim1], &c__1);
|
||||
i__1 = k - imax;
|
||||
clacgv_(&i__1, &w[imax + 1 + (kw - 1) * w_dim1], &c__1);
|
||||
if (k < *n) {
|
||||
i__1 = *n - k;
|
||||
q__1.r = -1.f, q__1.i = -0.f;
|
||||
cgemv_("No transpose", &k, &i__1, &q__1, &a[(k + 1) *
|
||||
a_dim1 + 1], lda, &w[imax + (kw + 1) * w_dim1],
|
||||
ldw, &c_b1, &w[(kw - 1) * w_dim1 + 1], &c__1, (
|
||||
ftnlen)12);
|
||||
i__1 = imax + (kw - 1) * w_dim1;
|
||||
i__2 = imax + (kw - 1) * w_dim1;
|
||||
r__1 = w[i__2].r;
|
||||
w[i__1].r = r__1, w[i__1].i = 0.f;
|
||||
}
|
||||
if (imax != k) {
|
||||
i__1 = k - imax;
|
||||
jmax = imax + icamax_(&i__1, &w[imax + 1 + (kw - 1) *
|
||||
w_dim1], &c__1);
|
||||
i__1 = jmax + (kw - 1) * w_dim1;
|
||||
rowmax = (r__1 = w[i__1].r, dabs(r__1)) + (r__2 = r_imag(&
|
||||
w[jmax + (kw - 1) * w_dim1]), dabs(r__2));
|
||||
} else {
|
||||
rowmax = 0.f;
|
||||
}
|
||||
if (imax > 1) {
|
||||
i__1 = imax - 1;
|
||||
itemp = icamax_(&i__1, &w[(kw - 1) * w_dim1 + 1], &c__1);
|
||||
i__1 = itemp + (kw - 1) * w_dim1;
|
||||
stemp = (r__1 = w[i__1].r, dabs(r__1)) + (r__2 = r_imag(&
|
||||
w[itemp + (kw - 1) * w_dim1]), dabs(r__2));
|
||||
if (stemp > rowmax) {
|
||||
rowmax = stemp;
|
||||
jmax = itemp;
|
||||
}
|
||||
}
|
||||
i__1 = imax + (kw - 1) * w_dim1;
|
||||
if (! ((r__1 = w[i__1].r, dabs(r__1)) < alpha * rowmax)) {
|
||||
kp = imax;
|
||||
ccopy_(&k, &w[(kw - 1) * w_dim1 + 1], &c__1, &w[kw *
|
||||
w_dim1 + 1], &c__1);
|
||||
done = TRUE_;
|
||||
} else if (p == jmax || rowmax <= colmax) {
|
||||
kp = imax;
|
||||
kstep = 2;
|
||||
done = TRUE_;
|
||||
} else {
|
||||
p = imax;
|
||||
colmax = rowmax;
|
||||
imax = jmax;
|
||||
ccopy_(&k, &w[(kw - 1) * w_dim1 + 1], &c__1, &w[kw *
|
||||
w_dim1 + 1], &c__1);
|
||||
}
|
||||
if (! done) {
|
||||
goto L12;
|
||||
}
|
||||
}
|
||||
kk = k - kstep + 1;
|
||||
kkw = *nb + kk - *n;
|
||||
if (kstep == 2 && p != k) {
|
||||
i__1 = p + p * a_dim1;
|
||||
i__2 = k + k * a_dim1;
|
||||
r__1 = a[i__2].r;
|
||||
a[i__1].r = r__1, a[i__1].i = 0.f;
|
||||
i__1 = k - 1 - p;
|
||||
ccopy_(&i__1, &a[p + 1 + k * a_dim1], &c__1, &a[p + (p + 1) *
|
||||
a_dim1], lda);
|
||||
i__1 = k - 1 - p;
|
||||
clacgv_(&i__1, &a[p + (p + 1) * a_dim1], lda);
|
||||
if (p > 1) {
|
||||
i__1 = p - 1;
|
||||
ccopy_(&i__1, &a[k * a_dim1 + 1], &c__1, &a[p * a_dim1 +
|
||||
1], &c__1);
|
||||
}
|
||||
if (k < *n) {
|
||||
i__1 = *n - k;
|
||||
cswap_(&i__1, &a[k + (k + 1) * a_dim1], lda, &a[p + (k +
|
||||
1) * a_dim1], lda);
|
||||
}
|
||||
i__1 = *n - kk + 1;
|
||||
cswap_(&i__1, &w[k + kkw * w_dim1], ldw, &w[p + kkw * w_dim1],
|
||||
ldw);
|
||||
}
|
||||
if (kp != kk) {
|
||||
i__1 = kp + kp * a_dim1;
|
||||
i__2 = kk + kk * a_dim1;
|
||||
r__1 = a[i__2].r;
|
||||
a[i__1].r = r__1, a[i__1].i = 0.f;
|
||||
i__1 = kk - 1 - kp;
|
||||
ccopy_(&i__1, &a[kp + 1 + kk * a_dim1], &c__1, &a[kp + (kp +
|
||||
1) * a_dim1], lda);
|
||||
i__1 = kk - 1 - kp;
|
||||
clacgv_(&i__1, &a[kp + (kp + 1) * a_dim1], lda);
|
||||
if (kp > 1) {
|
||||
i__1 = kp - 1;
|
||||
ccopy_(&i__1, &a[kk * a_dim1 + 1], &c__1, &a[kp * a_dim1
|
||||
+ 1], &c__1);
|
||||
}
|
||||
if (k < *n) {
|
||||
i__1 = *n - k;
|
||||
cswap_(&i__1, &a[kk + (k + 1) * a_dim1], lda, &a[kp + (k
|
||||
+ 1) * a_dim1], lda);
|
||||
}
|
||||
i__1 = *n - kk + 1;
|
||||
cswap_(&i__1, &w[kk + kkw * w_dim1], ldw, &w[kp + kkw *
|
||||
w_dim1], ldw);
|
||||
}
|
||||
if (kstep == 1) {
|
||||
ccopy_(&k, &w[kw * w_dim1 + 1], &c__1, &a[k * a_dim1 + 1], &
|
||||
c__1);
|
||||
if (k > 1) {
|
||||
i__1 = k + k * a_dim1;
|
||||
t = a[i__1].r;
|
||||
if (dabs(t) >= sfmin) {
|
||||
r1 = 1.f / t;
|
||||
i__1 = k - 1;
|
||||
csscal_(&i__1, &r1, &a[k * a_dim1 + 1], &c__1);
|
||||
} else {
|
||||
i__1 = k - 1;
|
||||
for (ii = 1; ii <= i__1; ++ii) {
|
||||
i__2 = ii + k * a_dim1;
|
||||
i__3 = ii + k * a_dim1;
|
||||
q__1.r = a[i__3].r / t, q__1.i = a[i__3].i / t;
|
||||
a[i__2].r = q__1.r, a[i__2].i = q__1.i;
|
||||
/* L14: */
|
||||
}
|
||||
}
|
||||
i__1 = k - 1;
|
||||
clacgv_(&i__1, &w[kw * w_dim1 + 1], &c__1);
|
||||
}
|
||||
} else {
|
||||
if (k > 2) {
|
||||
i__1 = k - 1 + kw * w_dim1;
|
||||
d21.r = w[i__1].r, d21.i = w[i__1].i;
|
||||
r_cnjg(&q__2, &d21);
|
||||
c_div(&q__1, &w[k + kw * w_dim1], &q__2);
|
||||
d11.r = q__1.r, d11.i = q__1.i;
|
||||
c_div(&q__1, &w[k - 1 + (kw - 1) * w_dim1], &d21);
|
||||
d22.r = q__1.r, d22.i = q__1.i;
|
||||
q__1.r = d11.r * d22.r - d11.i * d22.i, q__1.i = d11.r *
|
||||
d22.i + d11.i * d22.r;
|
||||
t = 1.f / (q__1.r - 1.f);
|
||||
i__1 = k - 2;
|
||||
for (j = 1; j <= i__1; ++j) {
|
||||
i__2 = j + (k - 1) * a_dim1;
|
||||
i__3 = j + (kw - 1) * w_dim1;
|
||||
q__4.r = d11.r * w[i__3].r - d11.i * w[i__3].i,
|
||||
q__4.i = d11.r * w[i__3].i + d11.i * w[i__3]
|
||||
.r;
|
||||
i__4 = j + kw * w_dim1;
|
||||
q__3.r = q__4.r - w[i__4].r, q__3.i = q__4.i - w[i__4]
|
||||
.i;
|
||||
c_div(&q__2, &q__3, &d21);
|
||||
q__1.r = t * q__2.r, q__1.i = t * q__2.i;
|
||||
a[i__2].r = q__1.r, a[i__2].i = q__1.i;
|
||||
i__2 = j + k * a_dim1;
|
||||
i__3 = j + kw * w_dim1;
|
||||
q__4.r = d22.r * w[i__3].r - d22.i * w[i__3].i,
|
||||
q__4.i = d22.r * w[i__3].i + d22.i * w[i__3]
|
||||
.r;
|
||||
i__4 = j + (kw - 1) * w_dim1;
|
||||
q__3.r = q__4.r - w[i__4].r, q__3.i = q__4.i - w[i__4]
|
||||
.i;
|
||||
r_cnjg(&q__5, &d21);
|
||||
c_div(&q__2, &q__3, &q__5);
|
||||
q__1.r = t * q__2.r, q__1.i = t * q__2.i;
|
||||
a[i__2].r = q__1.r, a[i__2].i = q__1.i;
|
||||
/* L20: */
|
||||
}
|
||||
}
|
||||
i__1 = k - 1 + (k - 1) * a_dim1;
|
||||
i__2 = k - 1 + (kw - 1) * w_dim1;
|
||||
a[i__1].r = w[i__2].r, a[i__1].i = w[i__2].i;
|
||||
i__1 = k - 1 + k * a_dim1;
|
||||
i__2 = k - 1 + kw * w_dim1;
|
||||
a[i__1].r = w[i__2].r, a[i__1].i = w[i__2].i;
|
||||
i__1 = k + k * a_dim1;
|
||||
i__2 = k + kw * w_dim1;
|
||||
a[i__1].r = w[i__2].r, a[i__1].i = w[i__2].i;
|
||||
i__1 = k - 1;
|
||||
clacgv_(&i__1, &w[kw * w_dim1 + 1], &c__1);
|
||||
i__1 = k - 2;
|
||||
clacgv_(&i__1, &w[(kw - 1) * w_dim1 + 1], &c__1);
|
||||
}
|
||||
}
|
||||
if (kstep == 1) {
|
||||
ipiv[k] = kp;
|
||||
} else {
|
||||
ipiv[k] = -p;
|
||||
ipiv[k - 1] = -kp;
|
||||
}
|
||||
k -= kstep;
|
||||
goto L10;
|
||||
L30:
|
||||
j = k + 1;
|
||||
L60:
|
||||
kstep = 1;
|
||||
jp1 = 1;
|
||||
jj = j;
|
||||
jp2 = ipiv[j];
|
||||
if (jp2 < 0) {
|
||||
jp2 = -jp2;
|
||||
++j;
|
||||
jp1 = -ipiv[j];
|
||||
kstep = 2;
|
||||
}
|
||||
++j;
|
||||
if (jp2 != jj && j <= *n) {
|
||||
i__1 = *n - j + 1;
|
||||
cswap_(&i__1, &a[jp2 + j * a_dim1], lda, &a[jj + j * a_dim1], lda)
|
||||
;
|
||||
}
|
||||
++jj;
|
||||
if (kstep == 2 && jp1 != jj && j <= *n) {
|
||||
i__1 = *n - j + 1;
|
||||
cswap_(&i__1, &a[jp1 + j * a_dim1], lda, &a[jj + j * a_dim1], lda)
|
||||
;
|
||||
}
|
||||
if (j < *n) {
|
||||
goto L60;
|
||||
}
|
||||
*kb = *n - k;
|
||||
} else {
|
||||
k = 1;
|
||||
L70:
|
||||
if ((k >= *nb && *nb < *n) || k > *n) {
|
||||
goto L90;
|
||||
}
|
||||
kstep = 1;
|
||||
p = k;
|
||||
i__1 = k + k * w_dim1;
|
||||
i__2 = k + k * a_dim1;
|
||||
r__1 = a[i__2].r;
|
||||
w[i__1].r = r__1, w[i__1].i = 0.f;
|
||||
if (k < *n) {
|
||||
i__1 = *n - k;
|
||||
ccopy_(&i__1, &a[k + 1 + k * a_dim1], &c__1, &w[k + 1 + k *
|
||||
w_dim1], &c__1);
|
||||
}
|
||||
if (k > 1) {
|
||||
i__1 = *n - k + 1;
|
||||
i__2 = k - 1;
|
||||
q__1.r = -1.f, q__1.i = -0.f;
|
||||
cgemv_("No transpose", &i__1, &i__2, &q__1, &a[k + a_dim1], lda, &
|
||||
w[k + w_dim1], ldw, &c_b1, &w[k + k * w_dim1], &c__1, (
|
||||
ftnlen)12);
|
||||
i__1 = k + k * w_dim1;
|
||||
i__2 = k + k * w_dim1;
|
||||
r__1 = w[i__2].r;
|
||||
w[i__1].r = r__1, w[i__1].i = 0.f;
|
||||
}
|
||||
i__1 = k + k * w_dim1;
|
||||
absakk = (r__1 = w[i__1].r, dabs(r__1));
|
||||
if (k < *n) {
|
||||
i__1 = *n - k;
|
||||
imax = k + icamax_(&i__1, &w[k + 1 + k * w_dim1], &c__1);
|
||||
i__1 = imax + k * w_dim1;
|
||||
colmax = (r__1 = w[i__1].r, dabs(r__1)) + (r__2 = r_imag(&w[imax
|
||||
+ k * w_dim1]), dabs(r__2));
|
||||
} else {
|
||||
colmax = 0.f;
|
||||
}
|
||||
if (dmax(absakk,colmax) == 0.f) {
|
||||
if (*info == 0) {
|
||||
*info = k;
|
||||
}
|
||||
kp = k;
|
||||
i__1 = k + k * a_dim1;
|
||||
i__2 = k + k * w_dim1;
|
||||
r__1 = w[i__2].r;
|
||||
a[i__1].r = r__1, a[i__1].i = 0.f;
|
||||
if (k < *n) {
|
||||
i__1 = *n - k;
|
||||
ccopy_(&i__1, &w[k + 1 + k * w_dim1], &c__1, &a[k + 1 + k *
|
||||
a_dim1], &c__1);
|
||||
}
|
||||
} else {
|
||||
if (! (absakk < alpha * colmax)) {
|
||||
kp = k;
|
||||
} else {
|
||||
done = FALSE_;
|
||||
L72:
|
||||
i__1 = imax - k;
|
||||
ccopy_(&i__1, &a[imax + k * a_dim1], lda, &w[k + (k + 1) *
|
||||
w_dim1], &c__1);
|
||||
i__1 = imax - k;
|
||||
clacgv_(&i__1, &w[k + (k + 1) * w_dim1], &c__1);
|
||||
i__1 = imax + (k + 1) * w_dim1;
|
||||
i__2 = imax + imax * a_dim1;
|
||||
r__1 = a[i__2].r;
|
||||
w[i__1].r = r__1, w[i__1].i = 0.f;
|
||||
if (imax < *n) {
|
||||
i__1 = *n - imax;
|
||||
ccopy_(&i__1, &a[imax + 1 + imax * a_dim1], &c__1, &w[
|
||||
imax + 1 + (k + 1) * w_dim1], &c__1);
|
||||
}
|
||||
if (k > 1) {
|
||||
i__1 = *n - k + 1;
|
||||
i__2 = k - 1;
|
||||
q__1.r = -1.f, q__1.i = -0.f;
|
||||
cgemv_("No transpose", &i__1, &i__2, &q__1, &a[k + a_dim1]
|
||||
, lda, &w[imax + w_dim1], ldw, &c_b1, &w[k + (k +
|
||||
1) * w_dim1], &c__1, (ftnlen)12);
|
||||
i__1 = imax + (k + 1) * w_dim1;
|
||||
i__2 = imax + (k + 1) * w_dim1;
|
||||
r__1 = w[i__2].r;
|
||||
w[i__1].r = r__1, w[i__1].i = 0.f;
|
||||
}
|
||||
if (imax != k) {
|
||||
i__1 = imax - k;
|
||||
jmax = k - 1 + icamax_(&i__1, &w[k + (k + 1) * w_dim1], &
|
||||
c__1);
|
||||
i__1 = jmax + (k + 1) * w_dim1;
|
||||
rowmax = (r__1 = w[i__1].r, dabs(r__1)) + (r__2 = r_imag(&
|
||||
w[jmax + (k + 1) * w_dim1]), dabs(r__2));
|
||||
} else {
|
||||
rowmax = 0.f;
|
||||
}
|
||||
if (imax < *n) {
|
||||
i__1 = *n - imax;
|
||||
itemp = imax + icamax_(&i__1, &w[imax + 1 + (k + 1) *
|
||||
w_dim1], &c__1);
|
||||
i__1 = itemp + (k + 1) * w_dim1;
|
||||
stemp = (r__1 = w[i__1].r, dabs(r__1)) + (r__2 = r_imag(&
|
||||
w[itemp + (k + 1) * w_dim1]), dabs(r__2));
|
||||
if (stemp > rowmax) {
|
||||
rowmax = stemp;
|
||||
jmax = itemp;
|
||||
}
|
||||
}
|
||||
i__1 = imax + (k + 1) * w_dim1;
|
||||
if (! ((r__1 = w[i__1].r, dabs(r__1)) < alpha * rowmax)) {
|
||||
kp = imax;
|
||||
i__1 = *n - k + 1;
|
||||
ccopy_(&i__1, &w[k + (k + 1) * w_dim1], &c__1, &w[k + k *
|
||||
w_dim1], &c__1);
|
||||
done = TRUE_;
|
||||
} else if (p == jmax || rowmax <= colmax) {
|
||||
kp = imax;
|
||||
kstep = 2;
|
||||
done = TRUE_;
|
||||
} else {
|
||||
p = imax;
|
||||
colmax = rowmax;
|
||||
imax = jmax;
|
||||
i__1 = *n - k + 1;
|
||||
ccopy_(&i__1, &w[k + (k + 1) * w_dim1], &c__1, &w[k + k *
|
||||
w_dim1], &c__1);
|
||||
}
|
||||
if (! done) {
|
||||
goto L72;
|
||||
}
|
||||
}
|
||||
kk = k + kstep - 1;
|
||||
if (kstep == 2 && p != k) {
|
||||
i__1 = p + p * a_dim1;
|
||||
i__2 = k + k * a_dim1;
|
||||
r__1 = a[i__2].r;
|
||||
a[i__1].r = r__1, a[i__1].i = 0.f;
|
||||
i__1 = p - k - 1;
|
||||
ccopy_(&i__1, &a[k + 1 + k * a_dim1], &c__1, &a[p + (k + 1) *
|
||||
a_dim1], lda);
|
||||
i__1 = p - k - 1;
|
||||
clacgv_(&i__1, &a[p + (k + 1) * a_dim1], lda);
|
||||
if (p < *n) {
|
||||
i__1 = *n - p;
|
||||
ccopy_(&i__1, &a[p + 1 + k * a_dim1], &c__1, &a[p + 1 + p
|
||||
* a_dim1], &c__1);
|
||||
}
|
||||
if (k > 1) {
|
||||
i__1 = k - 1;
|
||||
cswap_(&i__1, &a[k + a_dim1], lda, &a[p + a_dim1], lda);
|
||||
}
|
||||
cswap_(&kk, &w[k + w_dim1], ldw, &w[p + w_dim1], ldw);
|
||||
}
|
||||
if (kp != kk) {
|
||||
i__1 = kp + kp * a_dim1;
|
||||
i__2 = kk + kk * a_dim1;
|
||||
r__1 = a[i__2].r;
|
||||
a[i__1].r = r__1, a[i__1].i = 0.f;
|
||||
i__1 = kp - kk - 1;
|
||||
ccopy_(&i__1, &a[kk + 1 + kk * a_dim1], &c__1, &a[kp + (kk +
|
||||
1) * a_dim1], lda);
|
||||
i__1 = kp - kk - 1;
|
||||
clacgv_(&i__1, &a[kp + (kk + 1) * a_dim1], lda);
|
||||
if (kp < *n) {
|
||||
i__1 = *n - kp;
|
||||
ccopy_(&i__1, &a[kp + 1 + kk * a_dim1], &c__1, &a[kp + 1
|
||||
+ kp * a_dim1], &c__1);
|
||||
}
|
||||
if (k > 1) {
|
||||
i__1 = k - 1;
|
||||
cswap_(&i__1, &a[kk + a_dim1], lda, &a[kp + a_dim1], lda);
|
||||
}
|
||||
cswap_(&kk, &w[kk + w_dim1], ldw, &w[kp + w_dim1], ldw);
|
||||
}
|
||||
if (kstep == 1) {
|
||||
i__1 = *n - k + 1;
|
||||
ccopy_(&i__1, &w[k + k * w_dim1], &c__1, &a[k + k * a_dim1], &
|
||||
c__1);
|
||||
if (k < *n) {
|
||||
i__1 = k + k * a_dim1;
|
||||
t = a[i__1].r;
|
||||
if (dabs(t) >= sfmin) {
|
||||
r1 = 1.f / t;
|
||||
i__1 = *n - k;
|
||||
csscal_(&i__1, &r1, &a[k + 1 + k * a_dim1], &c__1);
|
||||
} else {
|
||||
i__1 = *n;
|
||||
for (ii = k + 1; ii <= i__1; ++ii) {
|
||||
i__2 = ii + k * a_dim1;
|
||||
i__3 = ii + k * a_dim1;
|
||||
q__1.r = a[i__3].r / t, q__1.i = a[i__3].i / t;
|
||||
a[i__2].r = q__1.r, a[i__2].i = q__1.i;
|
||||
/* L74: */
|
||||
}
|
||||
}
|
||||
i__1 = *n - k;
|
||||
clacgv_(&i__1, &w[k + 1 + k * w_dim1], &c__1);
|
||||
}
|
||||
} else {
|
||||
if (k < *n - 1) {
|
||||
i__1 = k + 1 + k * w_dim1;
|
||||
d21.r = w[i__1].r, d21.i = w[i__1].i;
|
||||
c_div(&q__1, &w[k + 1 + (k + 1) * w_dim1], &d21);
|
||||
d11.r = q__1.r, d11.i = q__1.i;
|
||||
r_cnjg(&q__2, &d21);
|
||||
c_div(&q__1, &w[k + k * w_dim1], &q__2);
|
||||
d22.r = q__1.r, d22.i = q__1.i;
|
||||
q__1.r = d11.r * d22.r - d11.i * d22.i, q__1.i = d11.r *
|
||||
d22.i + d11.i * d22.r;
|
||||
t = 1.f / (q__1.r - 1.f);
|
||||
i__1 = *n;
|
||||
for (j = k + 2; j <= i__1; ++j) {
|
||||
i__2 = j + k * a_dim1;
|
||||
i__3 = j + k * w_dim1;
|
||||
q__4.r = d11.r * w[i__3].r - d11.i * w[i__3].i,
|
||||
q__4.i = d11.r * w[i__3].i + d11.i * w[i__3]
|
||||
.r;
|
||||
i__4 = j + (k + 1) * w_dim1;
|
||||
q__3.r = q__4.r - w[i__4].r, q__3.i = q__4.i - w[i__4]
|
||||
.i;
|
||||
r_cnjg(&q__5, &d21);
|
||||
c_div(&q__2, &q__3, &q__5);
|
||||
q__1.r = t * q__2.r, q__1.i = t * q__2.i;
|
||||
a[i__2].r = q__1.r, a[i__2].i = q__1.i;
|
||||
i__2 = j + (k + 1) * a_dim1;
|
||||
i__3 = j + (k + 1) * w_dim1;
|
||||
q__4.r = d22.r * w[i__3].r - d22.i * w[i__3].i,
|
||||
q__4.i = d22.r * w[i__3].i + d22.i * w[i__3]
|
||||
.r;
|
||||
i__4 = j + k * w_dim1;
|
||||
q__3.r = q__4.r - w[i__4].r, q__3.i = q__4.i - w[i__4]
|
||||
.i;
|
||||
c_div(&q__2, &q__3, &d21);
|
||||
q__1.r = t * q__2.r, q__1.i = t * q__2.i;
|
||||
a[i__2].r = q__1.r, a[i__2].i = q__1.i;
|
||||
/* L80: */
|
||||
}
|
||||
}
|
||||
i__1 = k + k * a_dim1;
|
||||
i__2 = k + k * w_dim1;
|
||||
a[i__1].r = w[i__2].r, a[i__1].i = w[i__2].i;
|
||||
i__1 = k + 1 + k * a_dim1;
|
||||
i__2 = k + 1 + k * w_dim1;
|
||||
a[i__1].r = w[i__2].r, a[i__1].i = w[i__2].i;
|
||||
i__1 = k + 1 + (k + 1) * a_dim1;
|
||||
i__2 = k + 1 + (k + 1) * w_dim1;
|
||||
a[i__1].r = w[i__2].r, a[i__1].i = w[i__2].i;
|
||||
i__1 = *n - k;
|
||||
clacgv_(&i__1, &w[k + 1 + k * w_dim1], &c__1);
|
||||
i__1 = *n - k - 1;
|
||||
clacgv_(&i__1, &w[k + 2 + (k + 1) * w_dim1], &c__1);
|
||||
}
|
||||
}
|
||||
if (kstep == 1) {
|
||||
ipiv[k] = kp;
|
||||
} else {
|
||||
ipiv[k] = -p;
|
||||
ipiv[k + 1] = -kp;
|
||||
}
|
||||
k += kstep;
|
||||
goto L70;
|
||||
L90:
|
||||
j = k - 1;
|
||||
L120:
|
||||
kstep = 1;
|
||||
jp1 = 1;
|
||||
jj = j;
|
||||
jp2 = ipiv[j];
|
||||
if (jp2 < 0) {
|
||||
jp2 = -jp2;
|
||||
--j;
|
||||
jp1 = -ipiv[j];
|
||||
kstep = 2;
|
||||
}
|
||||
--j;
|
||||
if (jp2 != jj && j >= 1) {
|
||||
cswap_(&j, &a[jp2 + a_dim1], lda, &a[jj + a_dim1], lda);
|
||||
}
|
||||
--jj;
|
||||
if (kstep == 2 && jp1 != jj && j >= 1) {
|
||||
cswap_(&j, &a[jp1 + a_dim1], lda, &a[jj + a_dim1], lda);
|
||||
}
|
||||
if (j > 1) {
|
||||
goto L120;
|
||||
}
|
||||
*kb = k - 1;
|
||||
}
|
||||
return;
|
||||
}
|
|
@ -0,0 +1,87 @@
|
|||
#include "relapack.h"
|
||||
|
||||
static void RELAPACK_clauum_rec(const char *, const int *, float *,
|
||||
const int *, int *);
|
||||
|
||||
|
||||
/** CLAUUM computes the product U * U**H or L**H * L, where the triangular factor U or L is stored in the upper or lower triangular part of the array A.
|
||||
*
|
||||
* This routine is functionally equivalent to LAPACK's clauum.
|
||||
* For details on its interface, see
|
||||
* http://www.netlib.org/lapack/explore-html/d2/d36/clauum_8f.html
|
||||
* */
|
||||
void RELAPACK_clauum(
|
||||
const char *uplo, const int *n,
|
||||
float *A, const int *ldA,
|
||||
int *info
|
||||
) {
|
||||
|
||||
// Check arguments
|
||||
const int lower = LAPACK(lsame)(uplo, "L");
|
||||
const int upper = LAPACK(lsame)(uplo, "U");
|
||||
*info = 0;
|
||||
if (!lower && !upper)
|
||||
*info = -1;
|
||||
else if (*n < 0)
|
||||
*info = -2;
|
||||
else if (*ldA < MAX(1, *n))
|
||||
*info = -4;
|
||||
if (*info) {
|
||||
const int minfo = -*info;
|
||||
LAPACK(xerbla)("CLAUUM", &minfo);
|
||||
return;
|
||||
}
|
||||
|
||||
// Clean char * arguments
|
||||
const char cleanuplo = lower ? 'L' : 'U';
|
||||
|
||||
// Recursive kernel
|
||||
RELAPACK_clauum_rec(&cleanuplo, n, A, ldA, info);
|
||||
}
|
||||
|
||||
|
||||
/** clauum's recursive compute kernel */
|
||||
static void RELAPACK_clauum_rec(
|
||||
const char *uplo, const int *n,
|
||||
float *A, const int *ldA,
|
||||
int *info
|
||||
) {
|
||||
|
||||
if (*n <= MAX(CROSSOVER_CLAUUM, 1)) {
|
||||
// Unblocked
|
||||
LAPACK(clauu2)(uplo, n, A, ldA, info);
|
||||
return;
|
||||
}
|
||||
|
||||
// Constants
|
||||
const float ONE[] = { 1., 0. };
|
||||
|
||||
// Splitting
|
||||
const int n1 = CREC_SPLIT(*n);
|
||||
const int n2 = *n - n1;
|
||||
|
||||
// A_TL A_TR
|
||||
// A_BL A_BR
|
||||
float *const A_TL = A;
|
||||
float *const A_TR = A + 2 * *ldA * n1;
|
||||
float *const A_BL = A + 2 * n1;
|
||||
float *const A_BR = A + 2 * *ldA * n1 + 2 * n1;
|
||||
|
||||
// recursion(A_TL)
|
||||
RELAPACK_clauum_rec(uplo, &n1, A_TL, ldA, info);
|
||||
|
||||
if (*uplo == 'L') {
|
||||
// A_TL = A_TL + A_BL' * A_BL
|
||||
BLAS(cherk)("L", "C", &n1, &n2, ONE, A_BL, ldA, ONE, A_TL, ldA);
|
||||
// A_BL = A_BR' * A_BL
|
||||
BLAS(ctrmm)("L", "L", "C", "N", &n2, &n1, ONE, A_BR, ldA, A_BL, ldA);
|
||||
} else {
|
||||
// A_TL = A_TL + A_TR * A_TR'
|
||||
BLAS(cherk)("U", "N", &n1, &n2, ONE, A_TR, ldA, ONE, A_TL, ldA);
|
||||
// A_TR = A_TR * A_BR'
|
||||
BLAS(ctrmm)("R", "U", "C", "N", &n1, &n2, ONE, A_BR, ldA, A_TR, ldA);
|
||||
}
|
||||
|
||||
// recursion(A_BR)
|
||||
RELAPACK_clauum_rec(uplo, &n2, A_BR, ldA, info);
|
||||
}
|
|
@ -0,0 +1,157 @@
|
|||
#include "relapack.h"
|
||||
#include "stdlib.h"
|
||||
|
||||
static void RELAPACK_cpbtrf_rec(const char *, const int *, const int *,
|
||||
float *, const int *, float *, const int *, int *);
|
||||
|
||||
|
||||
/** CPBTRF computes the Cholesky factorization of a complex Hermitian positive definite band matrix A.
|
||||
*
|
||||
* This routine is functionally equivalent to LAPACK's cpbtrf.
|
||||
* For details on its interface, see
|
||||
* http://www.netlib.org/lapack/explore-html/de/d2d/cpbtrf_8f.html
|
||||
* */
|
||||
void RELAPACK_cpbtrf(
|
||||
const char *uplo, const int *n, const int *kd,
|
||||
float *Ab, const int *ldAb,
|
||||
int *info
|
||||
) {
|
||||
|
||||
// Check arguments
|
||||
const int lower = LAPACK(lsame)(uplo, "L");
|
||||
const int upper = LAPACK(lsame)(uplo, "U");
|
||||
*info = 0;
|
||||
if (!lower && !upper)
|
||||
*info = -1;
|
||||
else if (*n < 0)
|
||||
*info = -2;
|
||||
else if (*kd < 0)
|
||||
*info = -3;
|
||||
else if (*ldAb < *kd + 1)
|
||||
*info = -5;
|
||||
if (*info) {
|
||||
const int minfo = -*info;
|
||||
LAPACK(xerbla)("CPBTRF", &minfo);
|
||||
return;
|
||||
}
|
||||
|
||||
// Clean char * arguments
|
||||
const char cleanuplo = lower ? 'L' : 'U';
|
||||
|
||||
// Constant
|
||||
const float ZERO[] = { 0., 0. };
|
||||
|
||||
// Allocate work space
|
||||
const int n1 = CREC_SPLIT(*n);
|
||||
const int mWork = (*kd > n1) ? (lower ? *n - *kd : n1) : *kd;
|
||||
const int nWork = (*kd > n1) ? (lower ? n1 : *n - *kd) : *kd;
|
||||
float *Work = malloc(mWork * nWork * 2 * sizeof(float));
|
||||
LAPACK(claset)(uplo, &mWork, &nWork, ZERO, ZERO, Work, &mWork);
|
||||
|
||||
// Recursive kernel
|
||||
RELAPACK_cpbtrf_rec(&cleanuplo, n, kd, Ab, ldAb, Work, &mWork, info);
|
||||
|
||||
// Free work space
|
||||
free(Work);
|
||||
}
|
||||
|
||||
|
||||
/** cpbtrf's recursive compute kernel */
|
||||
static void RELAPACK_cpbtrf_rec(
|
||||
const char *uplo, const int *n, const int *kd,
|
||||
float *Ab, const int *ldAb,
|
||||
float *Work, const int *ldWork,
|
||||
int *info
|
||||
){
|
||||
|
||||
if (*n <= MAX(CROSSOVER_CPBTRF, 1)) {
|
||||
// Unblocked
|
||||
LAPACK(cpbtf2)(uplo, n, kd, Ab, ldAb, info);
|
||||
return;
|
||||
}
|
||||
|
||||
// Constants
|
||||
const float ONE[] = { 1., 0. };
|
||||
const float MONE[] = { -1., 0. };
|
||||
|
||||
// Unskew A
|
||||
const int ldA[] = { *ldAb - 1 };
|
||||
float *const A = Ab + 2 * ((*uplo == 'L') ? 0 : *kd);
|
||||
|
||||
// Splitting
|
||||
const int n1 = MIN(CREC_SPLIT(*n), *kd);
|
||||
const int n2 = *n - n1;
|
||||
|
||||
// * *
|
||||
// * Ab_BR
|
||||
float *const Ab_BR = Ab + 2 * *ldAb * n1;
|
||||
|
||||
// A_TL A_TR
|
||||
// A_BL A_BR
|
||||
float *const A_TL = A;
|
||||
float *const A_TR = A + 2 * *ldA * n1;
|
||||
float *const A_BL = A + 2 * n1;
|
||||
float *const A_BR = A + 2 * *ldA * n1 + 2 * n1;
|
||||
|
||||
// recursion(A_TL)
|
||||
RELAPACK_cpotrf(uplo, &n1, A_TL, ldA, info);
|
||||
if (*info)
|
||||
return;
|
||||
|
||||
// Banded splitting
|
||||
const int n21 = MIN(n2, *kd - n1);
|
||||
const int n22 = MIN(n2 - n21, *kd);
|
||||
|
||||
// n1 n21 n22
|
||||
// n1 * A_TRl A_TRr
|
||||
// n21 A_BLt A_BRtl A_BRtr
|
||||
// n22 A_BLb A_BRbl A_BRbr
|
||||
float *const A_TRl = A_TR;
|
||||
float *const A_TRr = A_TR + 2 * *ldA * n21;
|
||||
float *const A_BLt = A_BL;
|
||||
float *const A_BLb = A_BL + 2 * n21;
|
||||
float *const A_BRtl = A_BR;
|
||||
float *const A_BRtr = A_BR + 2 * *ldA * n21;
|
||||
float *const A_BRbl = A_BR + 2 * n21;
|
||||
float *const A_BRbr = A_BR + 2 * *ldA * n21 + 2 * n21;
|
||||
|
||||
if (*uplo == 'L') {
|
||||
// A_BLt = ABLt / A_TL'
|
||||
BLAS(ctrsm)("R", "L", "C", "N", &n21, &n1, ONE, A_TL, ldA, A_BLt, ldA);
|
||||
// A_BRtl = A_BRtl - A_BLt * A_BLt'
|
||||
BLAS(cherk)("L", "N", &n21, &n1, MONE, A_BLt, ldA, ONE, A_BRtl, ldA);
|
||||
// Work = A_BLb
|
||||
LAPACK(clacpy)("U", &n22, &n1, A_BLb, ldA, Work, ldWork);
|
||||
// Work = Work / A_TL'
|
||||
BLAS(ctrsm)("R", "L", "C", "N", &n22, &n1, ONE, A_TL, ldA, Work, ldWork);
|
||||
// A_BRbl = A_BRbl - Work * A_BLt'
|
||||
BLAS(cgemm)("N", "C", &n22, &n21, &n1, MONE, Work, ldWork, A_BLt, ldA, ONE, A_BRbl, ldA);
|
||||
// A_BRbr = A_BRbr - Work * Work'
|
||||
BLAS(cherk)("L", "N", &n22, &n1, MONE, Work, ldWork, ONE, A_BRbr, ldA);
|
||||
// A_BLb = Work
|
||||
LAPACK(clacpy)("U", &n22, &n1, Work, ldWork, A_BLb, ldA);
|
||||
} else {
|
||||
// A_TRl = A_TL' \ A_TRl
|
||||
BLAS(ctrsm)("L", "U", "C", "N", &n1, &n21, ONE, A_TL, ldA, A_TRl, ldA);
|
||||
// A_BRtl = A_BRtl - A_TRl' * A_TRl
|
||||
BLAS(cherk)("U", "C", &n21, &n1, MONE, A_TRl, ldA, ONE, A_BRtl, ldA);
|
||||
// Work = A_TRr
|
||||
LAPACK(clacpy)("L", &n1, &n22, A_TRr, ldA, Work, ldWork);
|
||||
// Work = A_TL' \ Work
|
||||
BLAS(ctrsm)("L", "U", "C", "N", &n1, &n22, ONE, A_TL, ldA, Work, ldWork);
|
||||
// A_BRtr = A_BRtr - A_TRl' * Work
|
||||
BLAS(cgemm)("C", "N", &n21, &n22, &n1, MONE, A_TRl, ldA, Work, ldWork, ONE, A_BRtr, ldA);
|
||||
// A_BRbr = A_BRbr - Work' * Work
|
||||
BLAS(cherk)("U", "C", &n22, &n1, MONE, Work, ldWork, ONE, A_BRbr, ldA);
|
||||
// A_TRr = Work
|
||||
LAPACK(clacpy)("L", &n1, &n22, Work, ldWork, A_TRr, ldA);
|
||||
}
|
||||
|
||||
// recursion(A_BR)
|
||||
if (*kd > n1)
|
||||
RELAPACK_cpotrf(uplo, &n2, A_BR, ldA, info);
|
||||
else
|
||||
RELAPACK_cpbtrf_rec(uplo, &n2, kd, Ab_BR, ldAb, Work, ldWork, info);
|
||||
if (*info)
|
||||
*info += n1;
|
||||
}
|
|
@ -0,0 +1,92 @@
|
|||
#include "relapack.h"
|
||||
|
||||
static void RELAPACK_cpotrf_rec(const char *, const int *, float *,
|
||||
const int *, int *);
|
||||
|
||||
|
||||
/** CPOTRF computes the Cholesky factorization of a complex Hermitian positive definite matrix A.
|
||||
*
|
||||
* This routine is functionally equivalent to LAPACK's cpotrf.
|
||||
* For details on its interface, see
|
||||
* http://www.netlib.org/lapack/explore-html/dd/dce/cpotrf_8f.html
|
||||
* */
|
||||
void RELAPACK_cpotrf(
|
||||
const char *uplo, const int *n,
|
||||
float *A, const int *ldA,
|
||||
int *info
|
||||
) {
|
||||
|
||||
// Check arguments
|
||||
const int lower = LAPACK(lsame)(uplo, "L");
|
||||
const int upper = LAPACK(lsame)(uplo, "U");
|
||||
*info = 0;
|
||||
if (!lower && !upper)
|
||||
*info = -1;
|
||||
else if (*n < 0)
|
||||
*info = -2;
|
||||
else if (*ldA < MAX(1, *n))
|
||||
*info = -4;
|
||||
if (*info) {
|
||||
const int minfo = -*info;
|
||||
LAPACK(xerbla)("CPOTRF", &minfo);
|
||||
return;
|
||||
}
|
||||
|
||||
// Clean char * arguments
|
||||
const char cleanuplo = lower ? 'L' : 'U';
|
||||
|
||||
// Recursive kernel
|
||||
RELAPACK_cpotrf_rec(&cleanuplo, n, A, ldA, info);
|
||||
}
|
||||
|
||||
|
||||
/** cpotrf's recursive compute kernel */
|
||||
static void RELAPACK_cpotrf_rec(
|
||||
const char *uplo, const int *n,
|
||||
float *A, const int *ldA,
|
||||
int *info
|
||||
){
|
||||
|
||||
if (*n <= MAX(CROSSOVER_CPOTRF, 1)) {
|
||||
// Unblocked
|
||||
LAPACK(cpotf2)(uplo, n, A, ldA, info);
|
||||
return;
|
||||
}
|
||||
|
||||
// Constants
|
||||
const float ONE[] = { 1., 0. };
|
||||
const float MONE[] = { -1., 0. };
|
||||
|
||||
// Splitting
|
||||
const int n1 = CREC_SPLIT(*n);
|
||||
const int n2 = *n - n1;
|
||||
|
||||
// A_TL A_TR
|
||||
// A_BL A_BR
|
||||
float *const A_TL = A;
|
||||
float *const A_TR = A + 2 * *ldA * n1;
|
||||
float *const A_BL = A + 2 * n1;
|
||||
float *const A_BR = A + 2 * *ldA * n1 + 2 * n1;
|
||||
|
||||
// recursion(A_TL)
|
||||
RELAPACK_cpotrf_rec(uplo, &n1, A_TL, ldA, info);
|
||||
if (*info)
|
||||
return;
|
||||
|
||||
if (*uplo == 'L') {
|
||||
// A_BL = A_BL / A_TL'
|
||||
BLAS(ctrsm)("R", "L", "C", "N", &n2, &n1, ONE, A_TL, ldA, A_BL, ldA);
|
||||
// A_BR = A_BR - A_BL * A_BL'
|
||||
BLAS(cherk)("L", "N", &n2, &n1, MONE, A_BL, ldA, ONE, A_BR, ldA);
|
||||
} else {
|
||||
// A_TR = A_TL' \ A_TR
|
||||
BLAS(ctrsm)("L", "U", "C", "N", &n1, &n2, ONE, A_TL, ldA, A_TR, ldA);
|
||||
// A_BR = A_BR - A_TR' * A_TR
|
||||
BLAS(cherk)("U", "C", &n2, &n1, MONE, A_TR, ldA, ONE, A_BR, ldA);
|
||||
}
|
||||
|
||||
// recursion(A_BR)
|
||||
RELAPACK_cpotrf_rec(uplo, &n2, A_BR, ldA, info);
|
||||
if (*info)
|
||||
*info += n1;
|
||||
}
|
|
@ -0,0 +1,238 @@
|
|||
#include "relapack.h"
|
||||
#if XSYTRF_ALLOW_MALLOC
|
||||
#include <stdlib.h>
|
||||
#endif
|
||||
|
||||
static void RELAPACK_csytrf_rec(const char *, const int *, const int *, int *,
|
||||
float *, const int *, int *, float *, const int *, int *);
|
||||
|
||||
|
||||
/** CSYTRF computes the factorization of a complex symmetric matrix A using the Bunch-Kaufman diagonal pivoting method.
|
||||
*
|
||||
* This routine is functionally equivalent to LAPACK's csytrf.
|
||||
* For details on its interface, see
|
||||
* http://www.netlib.org/lapack/explore-html/d5/d21/csytrf_8f.html
|
||||
* */
|
||||
void RELAPACK_csytrf(
|
||||
const char *uplo, const int *n,
|
||||
float *A, const int *ldA, int *ipiv,
|
||||
float *Work, const int *lWork, int *info
|
||||
) {
|
||||
|
||||
// Required work size
|
||||
const int cleanlWork = *n * (*n / 2);
|
||||
int minlWork = cleanlWork;
|
||||
#if XSYTRF_ALLOW_MALLOC
|
||||
minlWork = 1;
|
||||
#endif
|
||||
|
||||
// Check arguments
|
||||
const int lower = LAPACK(lsame)(uplo, "L");
|
||||
const int upper = LAPACK(lsame)(uplo, "U");
|
||||
*info = 0;
|
||||
if (!lower && !upper)
|
||||
*info = -1;
|
||||
else if (*n < 0)
|
||||
*info = -2;
|
||||
else if (*ldA < MAX(1, *n))
|
||||
*info = -4;
|
||||
else if (*lWork < minlWork && *lWork != -1)
|
||||
*info = -7;
|
||||
else if (*lWork == -1) {
|
||||
// Work size query
|
||||
*Work = cleanlWork;
|
||||
return;
|
||||
}
|
||||
|
||||
// Ensure Work size
|
||||
float *cleanWork = Work;
|
||||
#if XSYTRF_ALLOW_MALLOC
|
||||
if (!*info && *lWork < cleanlWork) {
|
||||
cleanWork = malloc(cleanlWork * 2 * sizeof(float));
|
||||
if (!cleanWork)
|
||||
*info = -7;
|
||||
}
|
||||
#endif
|
||||
|
||||
if (*info) {
|
||||
const int minfo = -*info;
|
||||
LAPACK(xerbla)("CSYTRF", &minfo);
|
||||
return;
|
||||
}
|
||||
|
||||
// Clean char * arguments
|
||||
const char cleanuplo = lower ? 'L' : 'U';
|
||||
|
||||
// Dummy arguments
|
||||
int nout;
|
||||
|
||||
// Recursive kernel
|
||||
RELAPACK_csytrf_rec(&cleanuplo, n, n, &nout, A, ldA, ipiv, cleanWork, n, info);
|
||||
|
||||
#if XSYTRF_ALLOW_MALLOC
|
||||
if (cleanWork != Work)
|
||||
free(cleanWork);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
/** csytrf's recursive compute kernel */
|
||||
static void RELAPACK_csytrf_rec(
|
||||
const char *uplo, const int *n_full, const int *n, int *n_out,
|
||||
float *A, const int *ldA, int *ipiv,
|
||||
float *Work, const int *ldWork, int *info
|
||||
) {
|
||||
|
||||
// top recursion level?
|
||||
const int top = *n_full == *n;
|
||||
|
||||
if (*n <= MAX(CROSSOVER_CSYTRF, 3)) {
|
||||
// Unblocked
|
||||
if (top) {
|
||||
LAPACK(csytf2)(uplo, n, A, ldA, ipiv, info);
|
||||
*n_out = *n;
|
||||
} else
|
||||
RELAPACK_csytrf_rec2(uplo, n_full, n, n_out, A, ldA, ipiv, Work, ldWork, info);
|
||||
return;
|
||||
}
|
||||
|
||||
int info1, info2;
|
||||
|
||||
// Constants
|
||||
const float ONE[] = { 1., 0. };
|
||||
const float MONE[] = { -1., 0. };
|
||||
const int iONE[] = { 1 };
|
||||
|
||||
// Loop iterator
|
||||
int i;
|
||||
|
||||
const int n_rest = *n_full - *n;
|
||||
|
||||
if (*uplo == 'L') {
|
||||
// Splitting (setup)
|
||||
int n1 = CREC_SPLIT(*n);
|
||||
int n2 = *n - n1;
|
||||
|
||||
// Work_L *
|
||||
float *const Work_L = Work;
|
||||
|
||||
// recursion(A_L)
|
||||
int n1_out;
|
||||
RELAPACK_csytrf_rec(uplo, n_full, &n1, &n1_out, A, ldA, ipiv, Work_L, ldWork, &info1);
|
||||
n1 = n1_out;
|
||||
|
||||
// Splitting (continued)
|
||||
n2 = *n - n1;
|
||||
const int n_full2 = *n_full - n1;
|
||||
|
||||
// * *
|
||||
// A_BL A_BR
|
||||
// A_BL_B A_BR_B
|
||||
float *const A_BL = A + 2 * n1;
|
||||
float *const A_BR = A + 2 * *ldA * n1 + 2 * n1;
|
||||
float *const A_BL_B = A + 2 * *n;
|
||||
float *const A_BR_B = A + 2 * *ldA * n1 + 2 * *n;
|
||||
|
||||
// * *
|
||||
// Work_BL Work_BR
|
||||
// * *
|
||||
// (top recursion level: use Work as Work_BR)
|
||||
float *const Work_BL = Work + 2 * n1;
|
||||
float *const Work_BR = top ? Work : Work + 2 * *ldWork * n1 + 2 * n1;
|
||||
const int ldWork_BR = top ? n2 : *ldWork;
|
||||
|
||||
// ipiv_T
|
||||
// ipiv_B
|
||||
int *const ipiv_B = ipiv + n1;
|
||||
|
||||
// A_BR = A_BR - A_BL Work_BL'
|
||||
RELAPACK_cgemmt(uplo, "N", "T", &n2, &n1, MONE, A_BL, ldA, Work_BL, ldWork, ONE, A_BR, ldA);
|
||||
BLAS(cgemm)("N", "T", &n_rest, &n2, &n1, MONE, A_BL_B, ldA, Work_BL, ldWork, ONE, A_BR_B, ldA);
|
||||
|
||||
// recursion(A_BR)
|
||||
int n2_out;
|
||||
RELAPACK_csytrf_rec(uplo, &n_full2, &n2, &n2_out, A_BR, ldA, ipiv_B, Work_BR, &ldWork_BR, &info2);
|
||||
|
||||
if (n2_out != n2) {
|
||||
// undo 1 column of updates
|
||||
const int n_restp1 = n_rest + 1;
|
||||
|
||||
// last column of A_BR
|
||||
float *const A_BR_r = A_BR + 2 * *ldA * n2_out + 2 * n2_out;
|
||||
|
||||
// last row of A_BL
|
||||
float *const A_BL_b = A_BL + 2 * n2_out;
|
||||
|
||||
// last row of Work_BL
|
||||
float *const Work_BL_b = Work_BL + 2 * n2_out;
|
||||
|
||||
// A_BR_r = A_BR_r + A_BL_b Work_BL_b'
|
||||
BLAS(cgemv)("N", &n_restp1, &n1, ONE, A_BL_b, ldA, Work_BL_b, ldWork, ONE, A_BR_r, iONE);
|
||||
}
|
||||
n2 = n2_out;
|
||||
|
||||
// shift pivots
|
||||
for (i = 0; i < n2; i++)
|
||||
if (ipiv_B[i] > 0)
|
||||
ipiv_B[i] += n1;
|
||||
else
|
||||
ipiv_B[i] -= n1;
|
||||
|
||||
*info = info1 || info2;
|
||||
*n_out = n1 + n2;
|
||||
} else {
|
||||
// Splitting (setup)
|
||||
int n2 = CREC_SPLIT(*n);
|
||||
int n1 = *n - n2;
|
||||
|
||||
// * Work_R
|
||||
// (top recursion level: use Work as Work_R)
|
||||
float *const Work_R = top ? Work : Work + 2 * *ldWork * n1;
|
||||
|
||||
// recursion(A_R)
|
||||
int n2_out;
|
||||
RELAPACK_csytrf_rec(uplo, n_full, &n2, &n2_out, A, ldA, ipiv, Work_R, ldWork, &info2);
|
||||
const int n2_diff = n2 - n2_out;
|
||||
n2 = n2_out;
|
||||
|
||||
// Splitting (continued)
|
||||
n1 = *n - n2;
|
||||
const int n_full1 = *n_full - n2;
|
||||
|
||||
// * A_TL_T A_TR_T
|
||||
// * A_TL A_TR
|
||||
// * * *
|
||||
float *const A_TL_T = A + 2 * *ldA * n_rest;
|
||||
float *const A_TR_T = A + 2 * *ldA * (n_rest + n1);
|
||||
float *const A_TL = A + 2 * *ldA * n_rest + 2 * n_rest;
|
||||
float *const A_TR = A + 2 * *ldA * (n_rest + n1) + 2 * n_rest;
|
||||
|
||||
// Work_L *
|
||||
// * Work_TR
|
||||
// * *
|
||||
// (top recursion level: Work_R was Work)
|
||||
float *const Work_L = Work;
|
||||
float *const Work_TR = Work + 2 * *ldWork * (top ? n2_diff : n1) + 2 * n_rest;
|
||||
const int ldWork_L = top ? n1 : *ldWork;
|
||||
|
||||
// A_TL = A_TL - A_TR Work_TR'
|
||||
RELAPACK_cgemmt(uplo, "N", "T", &n1, &n2, MONE, A_TR, ldA, Work_TR, ldWork, ONE, A_TL, ldA);
|
||||
BLAS(cgemm)("N", "T", &n_rest, &n1, &n2, MONE, A_TR_T, ldA, Work_TR, ldWork, ONE, A_TL_T, ldA);
|
||||
|
||||
// recursion(A_TL)
|
||||
int n1_out;
|
||||
RELAPACK_csytrf_rec(uplo, &n_full1, &n1, &n1_out, A, ldA, ipiv, Work_L, &ldWork_L, &info1);
|
||||
|
||||
if (n1_out != n1) {
|
||||
// undo 1 column of updates
|
||||
const int n_restp1 = n_rest + 1;
|
||||
|
||||
// A_TL_T_l = A_TL_T_l + A_TR_T Work_TR_t'
|
||||
BLAS(cgemv)("N", &n_restp1, &n2, ONE, A_TR_T, ldA, Work_TR, ldWork, ONE, A_TL_T, iONE);
|
||||
}
|
||||
n1 = n1_out;
|
||||
|
||||
*info = info2 || info1;
|
||||
*n_out = n1 + n2;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,451 @@
|
|||
/* -- translated by f2c (version 20100827).
|
||||
You must link the resulting object file with libf2c:
|
||||
on Microsoft Windows system, link with libf2c.lib;
|
||||
on Linux or Unix systems, link with .../path/to/libf2c.a -lm
|
||||
or, if you install libf2c.a in a standard place, with -lf2c -lm
|
||||
-- in that order, at the end of the command line, as in
|
||||
cc *.o -lf2c -lm
|
||||
Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
|
||||
|
||||
http://www.netlib.org/f2c/libf2c.zip
|
||||
*/
|
||||
|
||||
#include "f2c.h"
|
||||
|
||||
/* Table of constant values */
|
||||
|
||||
static complex c_b1 = {1.f,0.f};
|
||||
static int c__1 = 1;
|
||||
|
||||
/** CSYTRF_REC2 computes a partial factorization of a complex symmetric matrix using the Bunch-Kaufman diagon al pivoting method.
|
||||
*
|
||||
* This routine is a minor modification of LAPACK's clasyf.
|
||||
* It serves as an unblocked kernel in the recursive algorithms.
|
||||
* The blocked BLAS Level 3 updates were removed and moved to the
|
||||
* recursive algorithm.
|
||||
* */
|
||||
/* Subroutine */ void RELAPACK_csytrf_rec2(char *uplo, int *n, int *
|
||||
nb, int *kb, complex *a, int *lda, int *ipiv, complex *w,
|
||||
int *ldw, int *info, ftnlen uplo_len)
|
||||
{
|
||||
/* System generated locals */
|
||||
int a_dim1, a_offset, w_dim1, w_offset, i__1, i__2, i__3, i__4;
|
||||
float r__1, r__2, r__3, r__4;
|
||||
complex q__1, q__2, q__3;
|
||||
|
||||
/* Builtin functions */
|
||||
double sqrt(double), r_imag(complex *);
|
||||
void c_div(complex *, complex *, complex *);
|
||||
|
||||
/* Local variables */
|
||||
static int j, k;
|
||||
static complex t, r1, d11, d21, d22;
|
||||
static int jj, kk, jp, kp, kw, kkw, imax, jmax;
|
||||
static float alpha;
|
||||
extern /* Subroutine */ int cscal_(int *, complex *, complex *,
|
||||
int *);
|
||||
extern logical lsame_(char *, char *, ftnlen, ftnlen);
|
||||
extern /* Subroutine */ int cgemv_(char *, int *, int *, complex *
|
||||
, complex *, int *, complex *, int *, complex *, complex *
|
||||
, int *, ftnlen), ccopy_(int *, complex *, int *,
|
||||
complex *, int *), cswap_(int *, complex *, int *,
|
||||
complex *, int *);
|
||||
static int kstep;
|
||||
static float absakk;
|
||||
extern int icamax_(int *, complex *, int *);
|
||||
static float colmax, rowmax;
|
||||
|
||||
/* Parameter adjustments */
|
||||
a_dim1 = *lda;
|
||||
a_offset = 1 + a_dim1;
|
||||
a -= a_offset;
|
||||
--ipiv;
|
||||
w_dim1 = *ldw;
|
||||
w_offset = 1 + w_dim1;
|
||||
w -= w_offset;
|
||||
|
||||
/* Function Body */
|
||||
*info = 0;
|
||||
alpha = (sqrt(17.f) + 1.f) / 8.f;
|
||||
if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
|
||||
k = *n;
|
||||
L10:
|
||||
kw = *nb + k - *n;
|
||||
if ((k <= *n - *nb + 1 && *nb < *n) || k < 1) {
|
||||
goto L30;
|
||||
}
|
||||
ccopy_(&k, &a[k * a_dim1 + 1], &c__1, &w[kw * w_dim1 + 1], &c__1);
|
||||
if (k < *n) {
|
||||
i__1 = *n - k;
|
||||
q__1.r = -1.f, q__1.i = -0.f;
|
||||
cgemv_("No transpose", &k, &i__1, &q__1, &a[(k + 1) * a_dim1 + 1],
|
||||
lda, &w[k + (kw + 1) * w_dim1], ldw, &c_b1, &w[kw *
|
||||
w_dim1 + 1], &c__1, (ftnlen)12);
|
||||
}
|
||||
kstep = 1;
|
||||
i__1 = k + kw * w_dim1;
|
||||
absakk = (r__1 = w[i__1].r, dabs(r__1)) + (r__2 = r_imag(&w[k + kw *
|
||||
w_dim1]), dabs(r__2));
|
||||
if (k > 1) {
|
||||
i__1 = k - 1;
|
||||
imax = icamax_(&i__1, &w[kw * w_dim1 + 1], &c__1);
|
||||
i__1 = imax + kw * w_dim1;
|
||||
colmax = (r__1 = w[i__1].r, dabs(r__1)) + (r__2 = r_imag(&w[imax
|
||||
+ kw * w_dim1]), dabs(r__2));
|
||||
} else {
|
||||
colmax = 0.f;
|
||||
}
|
||||
if (dmax(absakk,colmax) == 0.f) {
|
||||
if (*info == 0) {
|
||||
*info = k;
|
||||
}
|
||||
kp = k;
|
||||
} else {
|
||||
if (absakk >= alpha * colmax) {
|
||||
kp = k;
|
||||
} else {
|
||||
ccopy_(&imax, &a[imax * a_dim1 + 1], &c__1, &w[(kw - 1) *
|
||||
w_dim1 + 1], &c__1);
|
||||
i__1 = k - imax;
|
||||
ccopy_(&i__1, &a[imax + (imax + 1) * a_dim1], lda, &w[imax +
|
||||
1 + (kw - 1) * w_dim1], &c__1);
|
||||
if (k < *n) {
|
||||
i__1 = *n - k;
|
||||
q__1.r = -1.f, q__1.i = -0.f;
|
||||
cgemv_("No transpose", &k, &i__1, &q__1, &a[(k + 1) *
|
||||
a_dim1 + 1], lda, &w[imax + (kw + 1) * w_dim1],
|
||||
ldw, &c_b1, &w[(kw - 1) * w_dim1 + 1], &c__1, (
|
||||
ftnlen)12);
|
||||
}
|
||||
i__1 = k - imax;
|
||||
jmax = imax + icamax_(&i__1, &w[imax + 1 + (kw - 1) * w_dim1],
|
||||
&c__1);
|
||||
i__1 = jmax + (kw - 1) * w_dim1;
|
||||
rowmax = (r__1 = w[i__1].r, dabs(r__1)) + (r__2 = r_imag(&w[
|
||||
jmax + (kw - 1) * w_dim1]), dabs(r__2));
|
||||
if (imax > 1) {
|
||||
i__1 = imax - 1;
|
||||
jmax = icamax_(&i__1, &w[(kw - 1) * w_dim1 + 1], &c__1);
|
||||
/* Computing MAX */
|
||||
i__1 = jmax + (kw - 1) * w_dim1;
|
||||
r__3 = rowmax, r__4 = (r__1 = w[i__1].r, dabs(r__1)) + (
|
||||
r__2 = r_imag(&w[jmax + (kw - 1) * w_dim1]), dabs(
|
||||
r__2));
|
||||
rowmax = dmax(r__3,r__4);
|
||||
}
|
||||
if (absakk >= alpha * colmax * (colmax / rowmax)) {
|
||||
kp = k;
|
||||
} else /* if(complicated condition) */ {
|
||||
i__1 = imax + (kw - 1) * w_dim1;
|
||||
if ((r__1 = w[i__1].r, dabs(r__1)) + (r__2 = r_imag(&w[
|
||||
imax + (kw - 1) * w_dim1]), dabs(r__2)) >= alpha *
|
||||
rowmax) {
|
||||
kp = imax;
|
||||
ccopy_(&k, &w[(kw - 1) * w_dim1 + 1], &c__1, &w[kw *
|
||||
w_dim1 + 1], &c__1);
|
||||
} else {
|
||||
kp = imax;
|
||||
kstep = 2;
|
||||
}
|
||||
}
|
||||
}
|
||||
kk = k - kstep + 1;
|
||||
kkw = *nb + kk - *n;
|
||||
if (kp != kk) {
|
||||
i__1 = kp + kp * a_dim1;
|
||||
i__2 = kk + kk * a_dim1;
|
||||
a[i__1].r = a[i__2].r, a[i__1].i = a[i__2].i;
|
||||
i__1 = kk - 1 - kp;
|
||||
ccopy_(&i__1, &a[kp + 1 + kk * a_dim1], &c__1, &a[kp + (kp +
|
||||
1) * a_dim1], lda);
|
||||
if (kp > 1) {
|
||||
i__1 = kp - 1;
|
||||
ccopy_(&i__1, &a[kk * a_dim1 + 1], &c__1, &a[kp * a_dim1
|
||||
+ 1], &c__1);
|
||||
}
|
||||
if (k < *n) {
|
||||
i__1 = *n - k;
|
||||
cswap_(&i__1, &a[kk + (k + 1) * a_dim1], lda, &a[kp + (k
|
||||
+ 1) * a_dim1], lda);
|
||||
}
|
||||
i__1 = *n - kk + 1;
|
||||
cswap_(&i__1, &w[kk + kkw * w_dim1], ldw, &w[kp + kkw *
|
||||
w_dim1], ldw);
|
||||
}
|
||||
if (kstep == 1) {
|
||||
ccopy_(&k, &w[kw * w_dim1 + 1], &c__1, &a[k * a_dim1 + 1], &
|
||||
c__1);
|
||||
c_div(&q__1, &c_b1, &a[k + k * a_dim1]);
|
||||
r1.r = q__1.r, r1.i = q__1.i;
|
||||
i__1 = k - 1;
|
||||
cscal_(&i__1, &r1, &a[k * a_dim1 + 1], &c__1);
|
||||
} else {
|
||||
if (k > 2) {
|
||||
i__1 = k - 1 + kw * w_dim1;
|
||||
d21.r = w[i__1].r, d21.i = w[i__1].i;
|
||||
c_div(&q__1, &w[k + kw * w_dim1], &d21);
|
||||
d11.r = q__1.r, d11.i = q__1.i;
|
||||
c_div(&q__1, &w[k - 1 + (kw - 1) * w_dim1], &d21);
|
||||
d22.r = q__1.r, d22.i = q__1.i;
|
||||
q__3.r = d11.r * d22.r - d11.i * d22.i, q__3.i = d11.r *
|
||||
d22.i + d11.i * d22.r;
|
||||
q__2.r = q__3.r - 1.f, q__2.i = q__3.i - 0.f;
|
||||
c_div(&q__1, &c_b1, &q__2);
|
||||
t.r = q__1.r, t.i = q__1.i;
|
||||
c_div(&q__1, &t, &d21);
|
||||
d21.r = q__1.r, d21.i = q__1.i;
|
||||
i__1 = k - 2;
|
||||
for (j = 1; j <= i__1; ++j) {
|
||||
i__2 = j + (k - 1) * a_dim1;
|
||||
i__3 = j + (kw - 1) * w_dim1;
|
||||
q__3.r = d11.r * w[i__3].r - d11.i * w[i__3].i,
|
||||
q__3.i = d11.r * w[i__3].i + d11.i * w[i__3]
|
||||
.r;
|
||||
i__4 = j + kw * w_dim1;
|
||||
q__2.r = q__3.r - w[i__4].r, q__2.i = q__3.i - w[i__4]
|
||||
.i;
|
||||
q__1.r = d21.r * q__2.r - d21.i * q__2.i, q__1.i =
|
||||
d21.r * q__2.i + d21.i * q__2.r;
|
||||
a[i__2].r = q__1.r, a[i__2].i = q__1.i;
|
||||
i__2 = j + k * a_dim1;
|
||||
i__3 = j + kw * w_dim1;
|
||||
q__3.r = d22.r * w[i__3].r - d22.i * w[i__3].i,
|
||||
q__3.i = d22.r * w[i__3].i + d22.i * w[i__3]
|
||||
.r;
|
||||
i__4 = j + (kw - 1) * w_dim1;
|
||||
q__2.r = q__3.r - w[i__4].r, q__2.i = q__3.i - w[i__4]
|
||||
.i;
|
||||
q__1.r = d21.r * q__2.r - d21.i * q__2.i, q__1.i =
|
||||
d21.r * q__2.i + d21.i * q__2.r;
|
||||
a[i__2].r = q__1.r, a[i__2].i = q__1.i;
|
||||
/* L20: */
|
||||
}
|
||||
}
|
||||
i__1 = k - 1 + (k - 1) * a_dim1;
|
||||
i__2 = k - 1 + (kw - 1) * w_dim1;
|
||||
a[i__1].r = w[i__2].r, a[i__1].i = w[i__2].i;
|
||||
i__1 = k - 1 + k * a_dim1;
|
||||
i__2 = k - 1 + kw * w_dim1;
|
||||
a[i__1].r = w[i__2].r, a[i__1].i = w[i__2].i;
|
||||
i__1 = k + k * a_dim1;
|
||||
i__2 = k + kw * w_dim1;
|
||||
a[i__1].r = w[i__2].r, a[i__1].i = w[i__2].i;
|
||||
}
|
||||
}
|
||||
if (kstep == 1) {
|
||||
ipiv[k] = kp;
|
||||
} else {
|
||||
ipiv[k] = -kp;
|
||||
ipiv[k - 1] = -kp;
|
||||
}
|
||||
k -= kstep;
|
||||
goto L10;
|
||||
L30:
|
||||
j = k + 1;
|
||||
L60:
|
||||
jj = j;
|
||||
jp = ipiv[j];
|
||||
if (jp < 0) {
|
||||
jp = -jp;
|
||||
++j;
|
||||
}
|
||||
++j;
|
||||
if (jp != jj && j <= *n) {
|
||||
i__1 = *n - j + 1;
|
||||
cswap_(&i__1, &a[jp + j * a_dim1], lda, &a[jj + j * a_dim1], lda);
|
||||
}
|
||||
if (j < *n) {
|
||||
goto L60;
|
||||
}
|
||||
*kb = *n - k;
|
||||
} else {
|
||||
k = 1;
|
||||
L70:
|
||||
if ((k >= *nb && *nb < *n) || k > *n) {
|
||||
goto L90;
|
||||
}
|
||||
i__1 = *n - k + 1;
|
||||
ccopy_(&i__1, &a[k + k * a_dim1], &c__1, &w[k + k * w_dim1], &c__1);
|
||||
i__1 = *n - k + 1;
|
||||
i__2 = k - 1;
|
||||
q__1.r = -1.f, q__1.i = -0.f;
|
||||
cgemv_("No transpose", &i__1, &i__2, &q__1, &a[k + a_dim1], lda, &w[k
|
||||
+ w_dim1], ldw, &c_b1, &w[k + k * w_dim1], &c__1, (ftnlen)12);
|
||||
kstep = 1;
|
||||
i__1 = k + k * w_dim1;
|
||||
absakk = (r__1 = w[i__1].r, dabs(r__1)) + (r__2 = r_imag(&w[k + k *
|
||||
w_dim1]), dabs(r__2));
|
||||
if (k < *n) {
|
||||
i__1 = *n - k;
|
||||
imax = k + icamax_(&i__1, &w[k + 1 + k * w_dim1], &c__1);
|
||||
i__1 = imax + k * w_dim1;
|
||||
colmax = (r__1 = w[i__1].r, dabs(r__1)) + (r__2 = r_imag(&w[imax
|
||||
+ k * w_dim1]), dabs(r__2));
|
||||
} else {
|
||||
colmax = 0.f;
|
||||
}
|
||||
if (dmax(absakk,colmax) == 0.f) {
|
||||
if (*info == 0) {
|
||||
*info = k;
|
||||
}
|
||||
kp = k;
|
||||
} else {
|
||||
if (absakk >= alpha * colmax) {
|
||||
kp = k;
|
||||
} else {
|
||||
i__1 = imax - k;
|
||||
ccopy_(&i__1, &a[imax + k * a_dim1], lda, &w[k + (k + 1) *
|
||||
w_dim1], &c__1);
|
||||
i__1 = *n - imax + 1;
|
||||
ccopy_(&i__1, &a[imax + imax * a_dim1], &c__1, &w[imax + (k +
|
||||
1) * w_dim1], &c__1);
|
||||
i__1 = *n - k + 1;
|
||||
i__2 = k - 1;
|
||||
q__1.r = -1.f, q__1.i = -0.f;
|
||||
cgemv_("No transpose", &i__1, &i__2, &q__1, &a[k + a_dim1],
|
||||
lda, &w[imax + w_dim1], ldw, &c_b1, &w[k + (k + 1) *
|
||||
w_dim1], &c__1, (ftnlen)12);
|
||||
i__1 = imax - k;
|
||||
jmax = k - 1 + icamax_(&i__1, &w[k + (k + 1) * w_dim1], &c__1)
|
||||
;
|
||||
i__1 = jmax + (k + 1) * w_dim1;
|
||||
rowmax = (r__1 = w[i__1].r, dabs(r__1)) + (r__2 = r_imag(&w[
|
||||
jmax + (k + 1) * w_dim1]), dabs(r__2));
|
||||
if (imax < *n) {
|
||||
i__1 = *n - imax;
|
||||
jmax = imax + icamax_(&i__1, &w[imax + 1 + (k + 1) *
|
||||
w_dim1], &c__1);
|
||||
/* Computing MAX */
|
||||
i__1 = jmax + (k + 1) * w_dim1;
|
||||
r__3 = rowmax, r__4 = (r__1 = w[i__1].r, dabs(r__1)) + (
|
||||
r__2 = r_imag(&w[jmax + (k + 1) * w_dim1]), dabs(
|
||||
r__2));
|
||||
rowmax = dmax(r__3,r__4);
|
||||
}
|
||||
if (absakk >= alpha * colmax * (colmax / rowmax)) {
|
||||
kp = k;
|
||||
} else /* if(complicated condition) */ {
|
||||
i__1 = imax + (k + 1) * w_dim1;
|
||||
if ((r__1 = w[i__1].r, dabs(r__1)) + (r__2 = r_imag(&w[
|
||||
imax + (k + 1) * w_dim1]), dabs(r__2)) >= alpha *
|
||||
rowmax) {
|
||||
kp = imax;
|
||||
i__1 = *n - k + 1;
|
||||
ccopy_(&i__1, &w[k + (k + 1) * w_dim1], &c__1, &w[k +
|
||||
k * w_dim1], &c__1);
|
||||
} else {
|
||||
kp = imax;
|
||||
kstep = 2;
|
||||
}
|
||||
}
|
||||
}
|
||||
kk = k + kstep - 1;
|
||||
if (kp != kk) {
|
||||
i__1 = kp + kp * a_dim1;
|
||||
i__2 = kk + kk * a_dim1;
|
||||
a[i__1].r = a[i__2].r, a[i__1].i = a[i__2].i;
|
||||
i__1 = kp - kk - 1;
|
||||
ccopy_(&i__1, &a[kk + 1 + kk * a_dim1], &c__1, &a[kp + (kk +
|
||||
1) * a_dim1], lda);
|
||||
if (kp < *n) {
|
||||
i__1 = *n - kp;
|
||||
ccopy_(&i__1, &a[kp + 1 + kk * a_dim1], &c__1, &a[kp + 1
|
||||
+ kp * a_dim1], &c__1);
|
||||
}
|
||||
if (k > 1) {
|
||||
i__1 = k - 1;
|
||||
cswap_(&i__1, &a[kk + a_dim1], lda, &a[kp + a_dim1], lda);
|
||||
}
|
||||
cswap_(&kk, &w[kk + w_dim1], ldw, &w[kp + w_dim1], ldw);
|
||||
}
|
||||
if (kstep == 1) {
|
||||
i__1 = *n - k + 1;
|
||||
ccopy_(&i__1, &w[k + k * w_dim1], &c__1, &a[k + k * a_dim1], &
|
||||
c__1);
|
||||
if (k < *n) {
|
||||
c_div(&q__1, &c_b1, &a[k + k * a_dim1]);
|
||||
r1.r = q__1.r, r1.i = q__1.i;
|
||||
i__1 = *n - k;
|
||||
cscal_(&i__1, &r1, &a[k + 1 + k * a_dim1], &c__1);
|
||||
}
|
||||
} else {
|
||||
if (k < *n - 1) {
|
||||
i__1 = k + 1 + k * w_dim1;
|
||||
d21.r = w[i__1].r, d21.i = w[i__1].i;
|
||||
c_div(&q__1, &w[k + 1 + (k + 1) * w_dim1], &d21);
|
||||
d11.r = q__1.r, d11.i = q__1.i;
|
||||
c_div(&q__1, &w[k + k * w_dim1], &d21);
|
||||
d22.r = q__1.r, d22.i = q__1.i;
|
||||
q__3.r = d11.r * d22.r - d11.i * d22.i, q__3.i = d11.r *
|
||||
d22.i + d11.i * d22.r;
|
||||
q__2.r = q__3.r - 1.f, q__2.i = q__3.i - 0.f;
|
||||
c_div(&q__1, &c_b1, &q__2);
|
||||
t.r = q__1.r, t.i = q__1.i;
|
||||
c_div(&q__1, &t, &d21);
|
||||
d21.r = q__1.r, d21.i = q__1.i;
|
||||
i__1 = *n;
|
||||
for (j = k + 2; j <= i__1; ++j) {
|
||||
i__2 = j + k * a_dim1;
|
||||
i__3 = j + k * w_dim1;
|
||||
q__3.r = d11.r * w[i__3].r - d11.i * w[i__3].i,
|
||||
q__3.i = d11.r * w[i__3].i + d11.i * w[i__3]
|
||||
.r;
|
||||
i__4 = j + (k + 1) * w_dim1;
|
||||
q__2.r = q__3.r - w[i__4].r, q__2.i = q__3.i - w[i__4]
|
||||
.i;
|
||||
q__1.r = d21.r * q__2.r - d21.i * q__2.i, q__1.i =
|
||||
d21.r * q__2.i + d21.i * q__2.r;
|
||||
a[i__2].r = q__1.r, a[i__2].i = q__1.i;
|
||||
i__2 = j + (k + 1) * a_dim1;
|
||||
i__3 = j + (k + 1) * w_dim1;
|
||||
q__3.r = d22.r * w[i__3].r - d22.i * w[i__3].i,
|
||||
q__3.i = d22.r * w[i__3].i + d22.i * w[i__3]
|
||||
.r;
|
||||
i__4 = j + k * w_dim1;
|
||||
q__2.r = q__3.r - w[i__4].r, q__2.i = q__3.i - w[i__4]
|
||||
.i;
|
||||
q__1.r = d21.r * q__2.r - d21.i * q__2.i, q__1.i =
|
||||
d21.r * q__2.i + d21.i * q__2.r;
|
||||
a[i__2].r = q__1.r, a[i__2].i = q__1.i;
|
||||
/* L80: */
|
||||
}
|
||||
}
|
||||
i__1 = k + k * a_dim1;
|
||||
i__2 = k + k * w_dim1;
|
||||
a[i__1].r = w[i__2].r, a[i__1].i = w[i__2].i;
|
||||
i__1 = k + 1 + k * a_dim1;
|
||||
i__2 = k + 1 + k * w_dim1;
|
||||
a[i__1].r = w[i__2].r, a[i__1].i = w[i__2].i;
|
||||
i__1 = k + 1 + (k + 1) * a_dim1;
|
||||
i__2 = k + 1 + (k + 1) * w_dim1;
|
||||
a[i__1].r = w[i__2].r, a[i__1].i = w[i__2].i;
|
||||
}
|
||||
}
|
||||
if (kstep == 1) {
|
||||
ipiv[k] = kp;
|
||||
} else {
|
||||
ipiv[k] = -kp;
|
||||
ipiv[k + 1] = -kp;
|
||||
}
|
||||
k += kstep;
|
||||
goto L70;
|
||||
L90:
|
||||
j = k - 1;
|
||||
L120:
|
||||
jj = j;
|
||||
jp = ipiv[j];
|
||||
if (jp < 0) {
|
||||
jp = -jp;
|
||||
--j;
|
||||
}
|
||||
--j;
|
||||
if (jp != jj && j >= 1) {
|
||||
cswap_(&j, &a[jp + a_dim1], lda, &a[jj + a_dim1], lda);
|
||||
}
|
||||
if (j > 1) {
|
||||
goto L120;
|
||||
}
|
||||
*kb = k - 1;
|
||||
}
|
||||
return;
|
||||
}
|
|
@ -0,0 +1,236 @@
|
|||
#include "relapack.h"
|
||||
#if XSYTRF_ALLOW_MALLOC
|
||||
#include <stdlib.h>
|
||||
#endif
|
||||
|
||||
static void RELAPACK_csytrf_rook_rec(const char *, const int *, const int *, int *,
|
||||
float *, const int *, int *, float *, const int *, int *);
|
||||
|
||||
|
||||
/** CSYTRF_ROOK computes the factorization of a complex symmetric matrix A using the bounded Bunch-Kaufman ("rook") diagonal pivoting method.
|
||||
*
|
||||
* This routine is functionally equivalent to LAPACK's csytrf_rook.
|
||||
* For details on its interface, see
|
||||
* http://www.netlib.org/lapack/explore-html/d8/dc8/csytrf__rook_8f.html
|
||||
* */
|
||||
void RELAPACK_csytrf_rook(
|
||||
const char *uplo, const int *n,
|
||||
float *A, const int *ldA, int *ipiv,
|
||||
float *Work, const int *lWork, int *info
|
||||
) {
|
||||
|
||||
// Required work size
|
||||
const int cleanlWork = *n * (*n / 2);
|
||||
int minlWork = cleanlWork;
|
||||
#if XSYTRF_ALLOW_MALLOC
|
||||
minlWork = 1;
|
||||
#endif
|
||||
|
||||
// Check arguments
|
||||
const int lower = LAPACK(lsame)(uplo, "L");
|
||||
const int upper = LAPACK(lsame)(uplo, "U");
|
||||
*info = 0;
|
||||
if (!lower && !upper)
|
||||
*info = -1;
|
||||
else if (*n < 0)
|
||||
*info = -2;
|
||||
else if (*ldA < MAX(1, *n))
|
||||
*info = -4;
|
||||
else if (*lWork < minlWork && *lWork != -1)
|
||||
*info = -7;
|
||||
else if (*lWork == -1) {
|
||||
// Work size query
|
||||
*Work = cleanlWork;
|
||||
return;
|
||||
}
|
||||
|
||||
// Ensure Work size
|
||||
float *cleanWork = Work;
|
||||
#if XSYTRF_ALLOW_MALLOC
|
||||
if (!*info && *lWork < cleanlWork) {
|
||||
cleanWork = malloc(cleanlWork * 2 * sizeof(float));
|
||||
if (!cleanWork)
|
||||
*info = -7;
|
||||
}
|
||||
#endif
|
||||
|
||||
if (*info) {
|
||||
const int minfo = -*info;
|
||||
LAPACK(xerbla)("CSYTRF", &minfo);
|
||||
return;
|
||||
}
|
||||
|
||||
// Clean char * arguments
|
||||
const char cleanuplo = lower ? 'L' : 'U';
|
||||
|
||||
// Dummy argument
|
||||
int nout;
|
||||
|
||||
// Recursive kernel
|
||||
RELAPACK_csytrf_rook_rec(&cleanuplo, n, n, &nout, A, ldA, ipiv, cleanWork, n, info);
|
||||
|
||||
#if XSYTRF_ALLOW_MALLOC
|
||||
if (cleanWork != Work)
|
||||
free(cleanWork);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
/** csytrf_rook's recursive compute kernel */
|
||||
static void RELAPACK_csytrf_rook_rec(
|
||||
const char *uplo, const int *n_full, const int *n, int *n_out,
|
||||
float *A, const int *ldA, int *ipiv,
|
||||
float *Work, const int *ldWork, int *info
|
||||
) {
|
||||
|
||||
// top recursion level?
|
||||
const int top = *n_full == *n;
|
||||
|
||||
if (*n <= MAX(CROSSOVER_CSYTRF_ROOK, 3)) {
|
||||
// Unblocked
|
||||
if (top) {
|
||||
LAPACK(csytf2)(uplo, n, A, ldA, ipiv, info);
|
||||
*n_out = *n;
|
||||
} else
|
||||
RELAPACK_csytrf_rook_rec2(uplo, n_full, n, n_out, A, ldA, ipiv, Work, ldWork, info);
|
||||
return;
|
||||
}
|
||||
|
||||
int info1, info2;
|
||||
|
||||
// Constants
|
||||
const float ONE[] = { 1., 0. };
|
||||
const float MONE[] = { -1., 0. };
|
||||
const int iONE[] = { 1 };
|
||||
|
||||
const int n_rest = *n_full - *n;
|
||||
|
||||
if (*uplo == 'L') {
|
||||
// Splitting (setup)
|
||||
int n1 = CREC_SPLIT(*n);
|
||||
int n2 = *n - n1;
|
||||
|
||||
// Work_L *
|
||||
float *const Work_L = Work;
|
||||
|
||||
// recursion(A_L)
|
||||
int n1_out;
|
||||
RELAPACK_csytrf_rook_rec(uplo, n_full, &n1, &n1_out, A, ldA, ipiv, Work_L, ldWork, &info1);
|
||||
n1 = n1_out;
|
||||
|
||||
// Splitting (continued)
|
||||
n2 = *n - n1;
|
||||
const int n_full2 = *n_full - n1;
|
||||
|
||||
// * *
|
||||
// A_BL A_BR
|
||||
// A_BL_B A_BR_B
|
||||
float *const A_BL = A + 2 * n1;
|
||||
float *const A_BR = A + 2 * *ldA * n1 + 2 * n1;
|
||||
float *const A_BL_B = A + 2 * *n;
|
||||
float *const A_BR_B = A + 2 * *ldA * n1 + 2 * *n;
|
||||
|
||||
// * *
|
||||
// Work_BL Work_BR
|
||||
// * *
|
||||
// (top recursion level: use Work as Work_BR)
|
||||
float *const Work_BL = Work + 2 * n1;
|
||||
float *const Work_BR = top ? Work : Work + 2 * *ldWork * n1 + 2 * n1;
|
||||
const int ldWork_BR = top ? n2 : *ldWork;
|
||||
|
||||
// ipiv_T
|
||||
// ipiv_B
|
||||
int *const ipiv_B = ipiv + n1;
|
||||
|
||||
// A_BR = A_BR - A_BL Work_BL'
|
||||
RELAPACK_cgemmt(uplo, "N", "T", &n2, &n1, MONE, A_BL, ldA, Work_BL, ldWork, ONE, A_BR, ldA);
|
||||
BLAS(cgemm)("N", "T", &n_rest, &n2, &n1, MONE, A_BL_B, ldA, Work_BL, ldWork, ONE, A_BR_B, ldA);
|
||||
|
||||
// recursion(A_BR)
|
||||
int n2_out;
|
||||
RELAPACK_csytrf_rook_rec(uplo, &n_full2, &n2, &n2_out, A_BR, ldA, ipiv_B, Work_BR, &ldWork_BR, &info2);
|
||||
|
||||
if (n2_out != n2) {
|
||||
// undo 1 column of updates
|
||||
const int n_restp1 = n_rest + 1;
|
||||
|
||||
// last column of A_BR
|
||||
float *const A_BR_r = A_BR + 2 * *ldA * n2_out + 2 * n2_out;
|
||||
|
||||
// last row of A_BL
|
||||
float *const A_BL_b = A_BL + 2 * n2_out;
|
||||
|
||||
// last row of Work_BL
|
||||
float *const Work_BL_b = Work_BL + 2 * n2_out;
|
||||
|
||||
// A_BR_r = A_BR_r + A_BL_b Work_BL_b'
|
||||
BLAS(cgemv)("N", &n_restp1, &n1, ONE, A_BL_b, ldA, Work_BL_b, ldWork, ONE, A_BR_r, iONE);
|
||||
}
|
||||
n2 = n2_out;
|
||||
|
||||
// shift pivots
|
||||
int i;
|
||||
for (i = 0; i < n2; i++)
|
||||
if (ipiv_B[i] > 0)
|
||||
ipiv_B[i] += n1;
|
||||
else
|
||||
ipiv_B[i] -= n1;
|
||||
|
||||
*info = info1 || info2;
|
||||
*n_out = n1 + n2;
|
||||
} else {
|
||||
// Splitting (setup)
|
||||
int n2 = CREC_SPLIT(*n);
|
||||
int n1 = *n - n2;
|
||||
|
||||
// * Work_R
|
||||
// (top recursion level: use Work as Work_R)
|
||||
float *const Work_R = top ? Work : Work + 2 * *ldWork * n1;
|
||||
|
||||
// recursion(A_R)
|
||||
int n2_out;
|
||||
RELAPACK_csytrf_rook_rec(uplo, n_full, &n2, &n2_out, A, ldA, ipiv, Work_R, ldWork, &info2);
|
||||
const int n2_diff = n2 - n2_out;
|
||||
n2 = n2_out;
|
||||
|
||||
// Splitting (continued)
|
||||
n1 = *n - n2;
|
||||
const int n_full1 = *n_full - n2;
|
||||
|
||||
// * A_TL_T A_TR_T
|
||||
// * A_TL A_TR
|
||||
// * * *
|
||||
float *const A_TL_T = A + 2 * *ldA * n_rest;
|
||||
float *const A_TR_T = A + 2 * *ldA * (n_rest + n1);
|
||||
float *const A_TL = A + 2 * *ldA * n_rest + 2 * n_rest;
|
||||
float *const A_TR = A + 2 * *ldA * (n_rest + n1) + 2 * n_rest;
|
||||
|
||||
// Work_L *
|
||||
// * Work_TR
|
||||
// * *
|
||||
// (top recursion level: Work_R was Work)
|
||||
float *const Work_L = Work;
|
||||
float *const Work_TR = Work + 2 * *ldWork * (top ? n2_diff : n1) + 2 * n_rest;
|
||||
const int ldWork_L = top ? n1 : *ldWork;
|
||||
|
||||
// A_TL = A_TL - A_TR Work_TR'
|
||||
RELAPACK_cgemmt(uplo, "N", "T", &n1, &n2, MONE, A_TR, ldA, Work_TR, ldWork, ONE, A_TL, ldA);
|
||||
BLAS(cgemm)("N", "T", &n_rest, &n1, &n2, MONE, A_TR_T, ldA, Work_TR, ldWork, ONE, A_TL_T, ldA);
|
||||
|
||||
// recursion(A_TL)
|
||||
int n1_out;
|
||||
RELAPACK_csytrf_rook_rec(uplo, &n_full1, &n1, &n1_out, A, ldA, ipiv, Work_L, &ldWork_L, &info1);
|
||||
|
||||
if (n1_out != n1) {
|
||||
// undo 1 column of updates
|
||||
const int n_restp1 = n_rest + 1;
|
||||
|
||||
// A_TL_T_l = A_TL_T_l + A_TR_T Work_TR_t'
|
||||
BLAS(cgemv)("N", &n_restp1, &n2, ONE, A_TR_T, ldA, Work_TR, ldWork, ONE, A_TL_T, iONE);
|
||||
}
|
||||
n1 = n1_out;
|
||||
|
||||
*info = info2 || info1;
|
||||
*n_out = n1 + n2;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,565 @@
|
|||
/* -- translated by f2c (version 20100827).
|
||||
You must link the resulting object file with libf2c:
|
||||
on Microsoft Windows system, link with libf2c.lib;
|
||||
on Linux or Unix systems, link with .../path/to/libf2c.a -lm
|
||||
or, if you install libf2c.a in a standard place, with -lf2c -lm
|
||||
-- in that order, at the end of the command line, as in
|
||||
cc *.o -lf2c -lm
|
||||
Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
|
||||
|
||||
http://www.netlib.org/f2c/libf2c.zip
|
||||
*/
|
||||
|
||||
#include "f2c.h"
|
||||
|
||||
/* Table of constant values */
|
||||
|
||||
static complex c_b1 = {1.f,0.f};
|
||||
static int c__1 = 1;
|
||||
|
||||
/** CSYTRF_ROOK_REC2 computes a partial factorization of a complex symmetric matrix using the bounded Bunch-K aufman ("rook") diagonal pivoting method.
|
||||
*
|
||||
* This routine is a minor modification of LAPACK's clasyf_rook.
|
||||
* It serves as an unblocked kernel in the recursive algorithms.
|
||||
* The blocked BLAS Level 3 updates were removed and moved to the
|
||||
* recursive algorithm.
|
||||
* */
|
||||
/* Subroutine */ void RELAPACK_csytrf_rook_rec2(char *uplo, int *n,
|
||||
int *nb, int *kb, complex *a, int *lda, int *ipiv,
|
||||
complex *w, int *ldw, int *info, ftnlen uplo_len)
|
||||
{
|
||||
/* System generated locals */
|
||||
int a_dim1, a_offset, w_dim1, w_offset, i__1, i__2, i__3, i__4;
|
||||
float r__1, r__2;
|
||||
complex q__1, q__2, q__3, q__4;
|
||||
|
||||
/* Builtin functions */
|
||||
double sqrt(double), r_imag(complex *);
|
||||
void c_div(complex *, complex *, complex *);
|
||||
|
||||
/* Local variables */
|
||||
static int j, k, p;
|
||||
static complex t, r1, d11, d12, d21, d22;
|
||||
static int ii, jj, kk, kp, kw, jp1, jp2, kkw;
|
||||
static logical done;
|
||||
static int imax, jmax;
|
||||
static float alpha;
|
||||
extern /* Subroutine */ int cscal_(int *, complex *, complex *,
|
||||
int *);
|
||||
extern logical lsame_(char *, char *, ftnlen, ftnlen);
|
||||
extern /* Subroutine */ int cgemv_(char *, int *, int *, complex *
|
||||
, complex *, int *, complex *, int *, complex *, complex *
|
||||
, int *, ftnlen);
|
||||
static float sfmin;
|
||||
extern /* Subroutine */ int ccopy_(int *, complex *, int *,
|
||||
complex *, int *);
|
||||
static int itemp;
|
||||
extern /* Subroutine */ int cswap_(int *, complex *, int *,
|
||||
complex *, int *);
|
||||
static int kstep;
|
||||
static float stemp, absakk;
|
||||
extern int icamax_(int *, complex *, int *);
|
||||
extern double slamch_(char *, ftnlen);
|
||||
static float colmax, rowmax;
|
||||
|
||||
/* Parameter adjustments */
|
||||
a_dim1 = *lda;
|
||||
a_offset = 1 + a_dim1;
|
||||
a -= a_offset;
|
||||
--ipiv;
|
||||
w_dim1 = *ldw;
|
||||
w_offset = 1 + w_dim1;
|
||||
w -= w_offset;
|
||||
|
||||
/* Function Body */
|
||||
*info = 0;
|
||||
alpha = (sqrt(17.f) + 1.f) / 8.f;
|
||||
sfmin = slamch_("S", (ftnlen)1);
|
||||
if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
|
||||
k = *n;
|
||||
L10:
|
||||
kw = *nb + k - *n;
|
||||
if ((k <= *n - *nb + 1 && *nb < *n) || k < 1) {
|
||||
goto L30;
|
||||
}
|
||||
kstep = 1;
|
||||
p = k;
|
||||
ccopy_(&k, &a[k * a_dim1 + 1], &c__1, &w[kw * w_dim1 + 1], &c__1);
|
||||
if (k < *n) {
|
||||
i__1 = *n - k;
|
||||
q__1.r = -1.f, q__1.i = -0.f;
|
||||
cgemv_("No transpose", &k, &i__1, &q__1, &a[(k + 1) * a_dim1 + 1],
|
||||
lda, &w[k + (kw + 1) * w_dim1], ldw, &c_b1, &w[kw *
|
||||
w_dim1 + 1], &c__1, (ftnlen)12);
|
||||
}
|
||||
i__1 = k + kw * w_dim1;
|
||||
absakk = (r__1 = w[i__1].r, dabs(r__1)) + (r__2 = r_imag(&w[k + kw *
|
||||
w_dim1]), dabs(r__2));
|
||||
if (k > 1) {
|
||||
i__1 = k - 1;
|
||||
imax = icamax_(&i__1, &w[kw * w_dim1 + 1], &c__1);
|
||||
i__1 = imax + kw * w_dim1;
|
||||
colmax = (r__1 = w[i__1].r, dabs(r__1)) + (r__2 = r_imag(&w[imax
|
||||
+ kw * w_dim1]), dabs(r__2));
|
||||
} else {
|
||||
colmax = 0.f;
|
||||
}
|
||||
if (dmax(absakk,colmax) == 0.f) {
|
||||
if (*info == 0) {
|
||||
*info = k;
|
||||
}
|
||||
kp = k;
|
||||
ccopy_(&k, &w[kw * w_dim1 + 1], &c__1, &a[k * a_dim1 + 1], &c__1);
|
||||
} else {
|
||||
if (! (absakk < alpha * colmax)) {
|
||||
kp = k;
|
||||
} else {
|
||||
done = FALSE_;
|
||||
L12:
|
||||
ccopy_(&imax, &a[imax * a_dim1 + 1], &c__1, &w[(kw - 1) *
|
||||
w_dim1 + 1], &c__1);
|
||||
i__1 = k - imax;
|
||||
ccopy_(&i__1, &a[imax + (imax + 1) * a_dim1], lda, &w[imax +
|
||||
1 + (kw - 1) * w_dim1], &c__1);
|
||||
if (k < *n) {
|
||||
i__1 = *n - k;
|
||||
q__1.r = -1.f, q__1.i = -0.f;
|
||||
cgemv_("No transpose", &k, &i__1, &q__1, &a[(k + 1) *
|
||||
a_dim1 + 1], lda, &w[imax + (kw + 1) * w_dim1],
|
||||
ldw, &c_b1, &w[(kw - 1) * w_dim1 + 1], &c__1, (
|
||||
ftnlen)12);
|
||||
}
|
||||
if (imax != k) {
|
||||
i__1 = k - imax;
|
||||
jmax = imax + icamax_(&i__1, &w[imax + 1 + (kw - 1) *
|
||||
w_dim1], &c__1);
|
||||
i__1 = jmax + (kw - 1) * w_dim1;
|
||||
rowmax = (r__1 = w[i__1].r, dabs(r__1)) + (r__2 = r_imag(&
|
||||
w[jmax + (kw - 1) * w_dim1]), dabs(r__2));
|
||||
} else {
|
||||
rowmax = 0.f;
|
||||
}
|
||||
if (imax > 1) {
|
||||
i__1 = imax - 1;
|
||||
itemp = icamax_(&i__1, &w[(kw - 1) * w_dim1 + 1], &c__1);
|
||||
i__1 = itemp + (kw - 1) * w_dim1;
|
||||
stemp = (r__1 = w[i__1].r, dabs(r__1)) + (r__2 = r_imag(&
|
||||
w[itemp + (kw - 1) * w_dim1]), dabs(r__2));
|
||||
if (stemp > rowmax) {
|
||||
rowmax = stemp;
|
||||
jmax = itemp;
|
||||
}
|
||||
}
|
||||
i__1 = imax + (kw - 1) * w_dim1;
|
||||
if (! ((r__1 = w[i__1].r, dabs(r__1)) + (r__2 = r_imag(&w[
|
||||
imax + (kw - 1) * w_dim1]), dabs(r__2)) < alpha *
|
||||
rowmax)) {
|
||||
kp = imax;
|
||||
ccopy_(&k, &w[(kw - 1) * w_dim1 + 1], &c__1, &w[kw *
|
||||
w_dim1 + 1], &c__1);
|
||||
done = TRUE_;
|
||||
} else if (p == jmax || rowmax <= colmax) {
|
||||
kp = imax;
|
||||
kstep = 2;
|
||||
done = TRUE_;
|
||||
} else {
|
||||
p = imax;
|
||||
colmax = rowmax;
|
||||
imax = jmax;
|
||||
ccopy_(&k, &w[(kw - 1) * w_dim1 + 1], &c__1, &w[kw *
|
||||
w_dim1 + 1], &c__1);
|
||||
}
|
||||
if (! done) {
|
||||
goto L12;
|
||||
}
|
||||
}
|
||||
kk = k - kstep + 1;
|
||||
kkw = *nb + kk - *n;
|
||||
if (kstep == 2 && p != k) {
|
||||
i__1 = k - p;
|
||||
ccopy_(&i__1, &a[p + 1 + k * a_dim1], &c__1, &a[p + (p + 1) *
|
||||
a_dim1], lda);
|
||||
ccopy_(&p, &a[k * a_dim1 + 1], &c__1, &a[p * a_dim1 + 1], &
|
||||
c__1);
|
||||
i__1 = *n - k + 1;
|
||||
cswap_(&i__1, &a[k + k * a_dim1], lda, &a[p + k * a_dim1],
|
||||
lda);
|
||||
i__1 = *n - kk + 1;
|
||||
cswap_(&i__1, &w[k + kkw * w_dim1], ldw, &w[p + kkw * w_dim1],
|
||||
ldw);
|
||||
}
|
||||
if (kp != kk) {
|
||||
i__1 = kp + k * a_dim1;
|
||||
i__2 = kk + k * a_dim1;
|
||||
a[i__1].r = a[i__2].r, a[i__1].i = a[i__2].i;
|
||||
i__1 = k - 1 - kp;
|
||||
ccopy_(&i__1, &a[kp + 1 + kk * a_dim1], &c__1, &a[kp + (kp +
|
||||
1) * a_dim1], lda);
|
||||
ccopy_(&kp, &a[kk * a_dim1 + 1], &c__1, &a[kp * a_dim1 + 1], &
|
||||
c__1);
|
||||
i__1 = *n - kk + 1;
|
||||
cswap_(&i__1, &a[kk + kk * a_dim1], lda, &a[kp + kk * a_dim1],
|
||||
lda);
|
||||
i__1 = *n - kk + 1;
|
||||
cswap_(&i__1, &w[kk + kkw * w_dim1], ldw, &w[kp + kkw *
|
||||
w_dim1], ldw);
|
||||
}
|
||||
if (kstep == 1) {
|
||||
ccopy_(&k, &w[kw * w_dim1 + 1], &c__1, &a[k * a_dim1 + 1], &
|
||||
c__1);
|
||||
if (k > 1) {
|
||||
i__1 = k + k * a_dim1;
|
||||
if ((r__1 = a[i__1].r, dabs(r__1)) + (r__2 = r_imag(&a[k
|
||||
+ k * a_dim1]), dabs(r__2)) >= sfmin) {
|
||||
c_div(&q__1, &c_b1, &a[k + k * a_dim1]);
|
||||
r1.r = q__1.r, r1.i = q__1.i;
|
||||
i__1 = k - 1;
|
||||
cscal_(&i__1, &r1, &a[k * a_dim1 + 1], &c__1);
|
||||
} else /* if(complicated condition) */ {
|
||||
i__1 = k + k * a_dim1;
|
||||
if (a[i__1].r != 0.f || a[i__1].i != 0.f) {
|
||||
i__1 = k - 1;
|
||||
for (ii = 1; ii <= i__1; ++ii) {
|
||||
i__2 = ii + k * a_dim1;
|
||||
c_div(&q__1, &a[ii + k * a_dim1], &a[k + k *
|
||||
a_dim1]);
|
||||
a[i__2].r = q__1.r, a[i__2].i = q__1.i;
|
||||
/* L14: */
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if (k > 2) {
|
||||
i__1 = k - 1 + kw * w_dim1;
|
||||
d12.r = w[i__1].r, d12.i = w[i__1].i;
|
||||
c_div(&q__1, &w[k + kw * w_dim1], &d12);
|
||||
d11.r = q__1.r, d11.i = q__1.i;
|
||||
c_div(&q__1, &w[k - 1 + (kw - 1) * w_dim1], &d12);
|
||||
d22.r = q__1.r, d22.i = q__1.i;
|
||||
q__3.r = d11.r * d22.r - d11.i * d22.i, q__3.i = d11.r *
|
||||
d22.i + d11.i * d22.r;
|
||||
q__2.r = q__3.r - 1.f, q__2.i = q__3.i - 0.f;
|
||||
c_div(&q__1, &c_b1, &q__2);
|
||||
t.r = q__1.r, t.i = q__1.i;
|
||||
i__1 = k - 2;
|
||||
for (j = 1; j <= i__1; ++j) {
|
||||
i__2 = j + (k - 1) * a_dim1;
|
||||
i__3 = j + (kw - 1) * w_dim1;
|
||||
q__4.r = d11.r * w[i__3].r - d11.i * w[i__3].i,
|
||||
q__4.i = d11.r * w[i__3].i + d11.i * w[i__3]
|
||||
.r;
|
||||
i__4 = j + kw * w_dim1;
|
||||
q__3.r = q__4.r - w[i__4].r, q__3.i = q__4.i - w[i__4]
|
||||
.i;
|
||||
c_div(&q__2, &q__3, &d12);
|
||||
q__1.r = t.r * q__2.r - t.i * q__2.i, q__1.i = t.r *
|
||||
q__2.i + t.i * q__2.r;
|
||||
a[i__2].r = q__1.r, a[i__2].i = q__1.i;
|
||||
i__2 = j + k * a_dim1;
|
||||
i__3 = j + kw * w_dim1;
|
||||
q__4.r = d22.r * w[i__3].r - d22.i * w[i__3].i,
|
||||
q__4.i = d22.r * w[i__3].i + d22.i * w[i__3]
|
||||
.r;
|
||||
i__4 = j + (kw - 1) * w_dim1;
|
||||
q__3.r = q__4.r - w[i__4].r, q__3.i = q__4.i - w[i__4]
|
||||
.i;
|
||||
c_div(&q__2, &q__3, &d12);
|
||||
q__1.r = t.r * q__2.r - t.i * q__2.i, q__1.i = t.r *
|
||||
q__2.i + t.i * q__2.r;
|
||||
a[i__2].r = q__1.r, a[i__2].i = q__1.i;
|
||||
/* L20: */
|
||||
}
|
||||
}
|
||||
i__1 = k - 1 + (k - 1) * a_dim1;
|
||||
i__2 = k - 1 + (kw - 1) * w_dim1;
|
||||
a[i__1].r = w[i__2].r, a[i__1].i = w[i__2].i;
|
||||
i__1 = k - 1 + k * a_dim1;
|
||||
i__2 = k - 1 + kw * w_dim1;
|
||||
a[i__1].r = w[i__2].r, a[i__1].i = w[i__2].i;
|
||||
i__1 = k + k * a_dim1;
|
||||
i__2 = k + kw * w_dim1;
|
||||
a[i__1].r = w[i__2].r, a[i__1].i = w[i__2].i;
|
||||
}
|
||||
}
|
||||
if (kstep == 1) {
|
||||
ipiv[k] = kp;
|
||||
} else {
|
||||
ipiv[k] = -p;
|
||||
ipiv[k - 1] = -kp;
|
||||
}
|
||||
k -= kstep;
|
||||
goto L10;
|
||||
L30:
|
||||
j = k + 1;
|
||||
L60:
|
||||
kstep = 1;
|
||||
jp1 = 1;
|
||||
jj = j;
|
||||
jp2 = ipiv[j];
|
||||
if (jp2 < 0) {
|
||||
jp2 = -jp2;
|
||||
++j;
|
||||
jp1 = -ipiv[j];
|
||||
kstep = 2;
|
||||
}
|
||||
++j;
|
||||
if (jp2 != jj && j <= *n) {
|
||||
i__1 = *n - j + 1;
|
||||
cswap_(&i__1, &a[jp2 + j * a_dim1], lda, &a[jj + j * a_dim1], lda)
|
||||
;
|
||||
}
|
||||
jj = j - 1;
|
||||
if (jp1 != jj && kstep == 2) {
|
||||
i__1 = *n - j + 1;
|
||||
cswap_(&i__1, &a[jp1 + j * a_dim1], lda, &a[jj + j * a_dim1], lda)
|
||||
;
|
||||
}
|
||||
if (j <= *n) {
|
||||
goto L60;
|
||||
}
|
||||
*kb = *n - k;
|
||||
} else {
|
||||
k = 1;
|
||||
L70:
|
||||
if ((k >= *nb && *nb < *n) || k > *n) {
|
||||
goto L90;
|
||||
}
|
||||
kstep = 1;
|
||||
p = k;
|
||||
i__1 = *n - k + 1;
|
||||
ccopy_(&i__1, &a[k + k * a_dim1], &c__1, &w[k + k * w_dim1], &c__1);
|
||||
if (k > 1) {
|
||||
i__1 = *n - k + 1;
|
||||
i__2 = k - 1;
|
||||
q__1.r = -1.f, q__1.i = -0.f;
|
||||
cgemv_("No transpose", &i__1, &i__2, &q__1, &a[k + a_dim1], lda, &
|
||||
w[k + w_dim1], ldw, &c_b1, &w[k + k * w_dim1], &c__1, (
|
||||
ftnlen)12);
|
||||
}
|
||||
i__1 = k + k * w_dim1;
|
||||
absakk = (r__1 = w[i__1].r, dabs(r__1)) + (r__2 = r_imag(&w[k + k *
|
||||
w_dim1]), dabs(r__2));
|
||||
if (k < *n) {
|
||||
i__1 = *n - k;
|
||||
imax = k + icamax_(&i__1, &w[k + 1 + k * w_dim1], &c__1);
|
||||
i__1 = imax + k * w_dim1;
|
||||
colmax = (r__1 = w[i__1].r, dabs(r__1)) + (r__2 = r_imag(&w[imax
|
||||
+ k * w_dim1]), dabs(r__2));
|
||||
} else {
|
||||
colmax = 0.f;
|
||||
}
|
||||
if (dmax(absakk,colmax) == 0.f) {
|
||||
if (*info == 0) {
|
||||
*info = k;
|
||||
}
|
||||
kp = k;
|
||||
i__1 = *n - k + 1;
|
||||
ccopy_(&i__1, &w[k + k * w_dim1], &c__1, &a[k + k * a_dim1], &
|
||||
c__1);
|
||||
} else {
|
||||
if (! (absakk < alpha * colmax)) {
|
||||
kp = k;
|
||||
} else {
|
||||
done = FALSE_;
|
||||
L72:
|
||||
i__1 = imax - k;
|
||||
ccopy_(&i__1, &a[imax + k * a_dim1], lda, &w[k + (k + 1) *
|
||||
w_dim1], &c__1);
|
||||
i__1 = *n - imax + 1;
|
||||
ccopy_(&i__1, &a[imax + imax * a_dim1], &c__1, &w[imax + (k +
|
||||
1) * w_dim1], &c__1);
|
||||
if (k > 1) {
|
||||
i__1 = *n - k + 1;
|
||||
i__2 = k - 1;
|
||||
q__1.r = -1.f, q__1.i = -0.f;
|
||||
cgemv_("No transpose", &i__1, &i__2, &q__1, &a[k + a_dim1]
|
||||
, lda, &w[imax + w_dim1], ldw, &c_b1, &w[k + (k +
|
||||
1) * w_dim1], &c__1, (ftnlen)12);
|
||||
}
|
||||
if (imax != k) {
|
||||
i__1 = imax - k;
|
||||
jmax = k - 1 + icamax_(&i__1, &w[k + (k + 1) * w_dim1], &
|
||||
c__1);
|
||||
i__1 = jmax + (k + 1) * w_dim1;
|
||||
rowmax = (r__1 = w[i__1].r, dabs(r__1)) + (r__2 = r_imag(&
|
||||
w[jmax + (k + 1) * w_dim1]), dabs(r__2));
|
||||
} else {
|
||||
rowmax = 0.f;
|
||||
}
|
||||
if (imax < *n) {
|
||||
i__1 = *n - imax;
|
||||
itemp = imax + icamax_(&i__1, &w[imax + 1 + (k + 1) *
|
||||
w_dim1], &c__1);
|
||||
i__1 = itemp + (k + 1) * w_dim1;
|
||||
stemp = (r__1 = w[i__1].r, dabs(r__1)) + (r__2 = r_imag(&
|
||||
w[itemp + (k + 1) * w_dim1]), dabs(r__2));
|
||||
if (stemp > rowmax) {
|
||||
rowmax = stemp;
|
||||
jmax = itemp;
|
||||
}
|
||||
}
|
||||
i__1 = imax + (k + 1) * w_dim1;
|
||||
if (! ((r__1 = w[i__1].r, dabs(r__1)) + (r__2 = r_imag(&w[
|
||||
imax + (k + 1) * w_dim1]), dabs(r__2)) < alpha *
|
||||
rowmax)) {
|
||||
kp = imax;
|
||||
i__1 = *n - k + 1;
|
||||
ccopy_(&i__1, &w[k + (k + 1) * w_dim1], &c__1, &w[k + k *
|
||||
w_dim1], &c__1);
|
||||
done = TRUE_;
|
||||
} else if (p == jmax || rowmax <= colmax) {
|
||||
kp = imax;
|
||||
kstep = 2;
|
||||
done = TRUE_;
|
||||
} else {
|
||||
p = imax;
|
||||
colmax = rowmax;
|
||||
imax = jmax;
|
||||
i__1 = *n - k + 1;
|
||||
ccopy_(&i__1, &w[k + (k + 1) * w_dim1], &c__1, &w[k + k *
|
||||
w_dim1], &c__1);
|
||||
}
|
||||
if (! done) {
|
||||
goto L72;
|
||||
}
|
||||
}
|
||||
kk = k + kstep - 1;
|
||||
if (kstep == 2 && p != k) {
|
||||
i__1 = p - k;
|
||||
ccopy_(&i__1, &a[k + k * a_dim1], &c__1, &a[p + k * a_dim1],
|
||||
lda);
|
||||
i__1 = *n - p + 1;
|
||||
ccopy_(&i__1, &a[p + k * a_dim1], &c__1, &a[p + p * a_dim1], &
|
||||
c__1);
|
||||
cswap_(&k, &a[k + a_dim1], lda, &a[p + a_dim1], lda);
|
||||
cswap_(&kk, &w[k + w_dim1], ldw, &w[p + w_dim1], ldw);
|
||||
}
|
||||
if (kp != kk) {
|
||||
i__1 = kp + k * a_dim1;
|
||||
i__2 = kk + k * a_dim1;
|
||||
a[i__1].r = a[i__2].r, a[i__1].i = a[i__2].i;
|
||||
i__1 = kp - k - 1;
|
||||
ccopy_(&i__1, &a[k + 1 + kk * a_dim1], &c__1, &a[kp + (k + 1)
|
||||
* a_dim1], lda);
|
||||
i__1 = *n - kp + 1;
|
||||
ccopy_(&i__1, &a[kp + kk * a_dim1], &c__1, &a[kp + kp *
|
||||
a_dim1], &c__1);
|
||||
cswap_(&kk, &a[kk + a_dim1], lda, &a[kp + a_dim1], lda);
|
||||
cswap_(&kk, &w[kk + w_dim1], ldw, &w[kp + w_dim1], ldw);
|
||||
}
|
||||
if (kstep == 1) {
|
||||
i__1 = *n - k + 1;
|
||||
ccopy_(&i__1, &w[k + k * w_dim1], &c__1, &a[k + k * a_dim1], &
|
||||
c__1);
|
||||
if (k < *n) {
|
||||
i__1 = k + k * a_dim1;
|
||||
if ((r__1 = a[i__1].r, dabs(r__1)) + (r__2 = r_imag(&a[k
|
||||
+ k * a_dim1]), dabs(r__2)) >= sfmin) {
|
||||
c_div(&q__1, &c_b1, &a[k + k * a_dim1]);
|
||||
r1.r = q__1.r, r1.i = q__1.i;
|
||||
i__1 = *n - k;
|
||||
cscal_(&i__1, &r1, &a[k + 1 + k * a_dim1], &c__1);
|
||||
} else /* if(complicated condition) */ {
|
||||
i__1 = k + k * a_dim1;
|
||||
if (a[i__1].r != 0.f || a[i__1].i != 0.f) {
|
||||
i__1 = *n;
|
||||
for (ii = k + 1; ii <= i__1; ++ii) {
|
||||
i__2 = ii + k * a_dim1;
|
||||
c_div(&q__1, &a[ii + k * a_dim1], &a[k + k *
|
||||
a_dim1]);
|
||||
a[i__2].r = q__1.r, a[i__2].i = q__1.i;
|
||||
/* L74: */
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if (k < *n - 1) {
|
||||
i__1 = k + 1 + k * w_dim1;
|
||||
d21.r = w[i__1].r, d21.i = w[i__1].i;
|
||||
c_div(&q__1, &w[k + 1 + (k + 1) * w_dim1], &d21);
|
||||
d11.r = q__1.r, d11.i = q__1.i;
|
||||
c_div(&q__1, &w[k + k * w_dim1], &d21);
|
||||
d22.r = q__1.r, d22.i = q__1.i;
|
||||
q__3.r = d11.r * d22.r - d11.i * d22.i, q__3.i = d11.r *
|
||||
d22.i + d11.i * d22.r;
|
||||
q__2.r = q__3.r - 1.f, q__2.i = q__3.i - 0.f;
|
||||
c_div(&q__1, &c_b1, &q__2);
|
||||
t.r = q__1.r, t.i = q__1.i;
|
||||
i__1 = *n;
|
||||
for (j = k + 2; j <= i__1; ++j) {
|
||||
i__2 = j + k * a_dim1;
|
||||
i__3 = j + k * w_dim1;
|
||||
q__4.r = d11.r * w[i__3].r - d11.i * w[i__3].i,
|
||||
q__4.i = d11.r * w[i__3].i + d11.i * w[i__3]
|
||||
.r;
|
||||
i__4 = j + (k + 1) * w_dim1;
|
||||
q__3.r = q__4.r - w[i__4].r, q__3.i = q__4.i - w[i__4]
|
||||
.i;
|
||||
c_div(&q__2, &q__3, &d21);
|
||||
q__1.r = t.r * q__2.r - t.i * q__2.i, q__1.i = t.r *
|
||||
q__2.i + t.i * q__2.r;
|
||||
a[i__2].r = q__1.r, a[i__2].i = q__1.i;
|
||||
i__2 = j + (k + 1) * a_dim1;
|
||||
i__3 = j + (k + 1) * w_dim1;
|
||||
q__4.r = d22.r * w[i__3].r - d22.i * w[i__3].i,
|
||||
q__4.i = d22.r * w[i__3].i + d22.i * w[i__3]
|
||||
.r;
|
||||
i__4 = j + k * w_dim1;
|
||||
q__3.r = q__4.r - w[i__4].r, q__3.i = q__4.i - w[i__4]
|
||||
.i;
|
||||
c_div(&q__2, &q__3, &d21);
|
||||
q__1.r = t.r * q__2.r - t.i * q__2.i, q__1.i = t.r *
|
||||
q__2.i + t.i * q__2.r;
|
||||
a[i__2].r = q__1.r, a[i__2].i = q__1.i;
|
||||
/* L80: */
|
||||
}
|
||||
}
|
||||
i__1 = k + k * a_dim1;
|
||||
i__2 = k + k * w_dim1;
|
||||
a[i__1].r = w[i__2].r, a[i__1].i = w[i__2].i;
|
||||
i__1 = k + 1 + k * a_dim1;
|
||||
i__2 = k + 1 + k * w_dim1;
|
||||
a[i__1].r = w[i__2].r, a[i__1].i = w[i__2].i;
|
||||
i__1 = k + 1 + (k + 1) * a_dim1;
|
||||
i__2 = k + 1 + (k + 1) * w_dim1;
|
||||
a[i__1].r = w[i__2].r, a[i__1].i = w[i__2].i;
|
||||
}
|
||||
}
|
||||
if (kstep == 1) {
|
||||
ipiv[k] = kp;
|
||||
} else {
|
||||
ipiv[k] = -p;
|
||||
ipiv[k + 1] = -kp;
|
||||
}
|
||||
k += kstep;
|
||||
goto L70;
|
||||
L90:
|
||||
j = k - 1;
|
||||
L120:
|
||||
kstep = 1;
|
||||
jp1 = 1;
|
||||
jj = j;
|
||||
jp2 = ipiv[j];
|
||||
if (jp2 < 0) {
|
||||
jp2 = -jp2;
|
||||
--j;
|
||||
jp1 = -ipiv[j];
|
||||
kstep = 2;
|
||||
}
|
||||
--j;
|
||||
if (jp2 != jj && j >= 1) {
|
||||
cswap_(&j, &a[jp2 + a_dim1], lda, &a[jj + a_dim1], lda);
|
||||
}
|
||||
jj = j + 1;
|
||||
if (jp1 != jj && kstep == 2) {
|
||||
cswap_(&j, &a[jp1 + a_dim1], lda, &a[jj + a_dim1], lda);
|
||||
}
|
||||
if (j >= 1) {
|
||||
goto L120;
|
||||
}
|
||||
*kb = k - 1;
|
||||
}
|
||||
return;
|
||||
}
|
|
@ -0,0 +1,268 @@
|
|||
#include "relapack.h"
|
||||
#include <math.h>
|
||||
|
||||
static void RELAPACK_ctgsyl_rec(const char *, const int *, const int *,
|
||||
const int *, const float *, const int *, const float *, const int *,
|
||||
float *, const int *, const float *, const int *, const float *,
|
||||
const int *, float *, const int *, float *, float *, float *, int *);
|
||||
|
||||
|
||||
/** CTGSYL solves the generalized Sylvester equation.
|
||||
*
|
||||
* This routine is functionally equivalent to LAPACK's ctgsyl.
|
||||
* For details on its interface, see
|
||||
* http://www.netlib.org/lapack/explore-html/d7/de7/ctgsyl_8f.html
|
||||
* */
|
||||
void RELAPACK_ctgsyl(
|
||||
const char *trans, const int *ijob, const int *m, const int *n,
|
||||
const float *A, const int *ldA, const float *B, const int *ldB,
|
||||
float *C, const int *ldC,
|
||||
const float *D, const int *ldD, const float *E, const int *ldE,
|
||||
float *F, const int *ldF,
|
||||
float *scale, float *dif,
|
||||
float *Work, const int *lWork, int *iWork, int *info
|
||||
) {
|
||||
|
||||
// Parse arguments
|
||||
const int notran = LAPACK(lsame)(trans, "N");
|
||||
const int tran = LAPACK(lsame)(trans, "C");
|
||||
|
||||
// Compute work buffer size
|
||||
int lwmin = 1;
|
||||
if (notran && (*ijob == 1 || *ijob == 2))
|
||||
lwmin = MAX(1, 2 * *m * *n);
|
||||
*info = 0;
|
||||
|
||||
// Check arguments
|
||||
if (!tran && !notran)
|
||||
*info = -1;
|
||||
else if (notran && (*ijob < 0 || *ijob > 4))
|
||||
*info = -2;
|
||||
else if (*m <= 0)
|
||||
*info = -3;
|
||||
else if (*n <= 0)
|
||||
*info = -4;
|
||||
else if (*ldA < MAX(1, *m))
|
||||
*info = -6;
|
||||
else if (*ldB < MAX(1, *n))
|
||||
*info = -8;
|
||||
else if (*ldC < MAX(1, *m))
|
||||
*info = -10;
|
||||
else if (*ldD < MAX(1, *m))
|
||||
*info = -12;
|
||||
else if (*ldE < MAX(1, *n))
|
||||
*info = -14;
|
||||
else if (*ldF < MAX(1, *m))
|
||||
*info = -16;
|
||||
else if (*lWork < lwmin && *lWork != -1)
|
||||
*info = -20;
|
||||
if (*info) {
|
||||
const int minfo = -*info;
|
||||
LAPACK(xerbla)("CTGSYL", &minfo);
|
||||
return;
|
||||
}
|
||||
|
||||
if (*lWork == -1) {
|
||||
// Work size query
|
||||
*Work = lwmin;
|
||||
return;
|
||||
}
|
||||
|
||||
// Clean char * arguments
|
||||
const char cleantrans = notran ? 'N' : 'C';
|
||||
|
||||
// Constant
|
||||
const float ZERO[] = { 0., 0. };
|
||||
|
||||
int isolve = 1;
|
||||
int ifunc = 0;
|
||||
if (notran) {
|
||||
if (*ijob >= 3) {
|
||||
ifunc = *ijob - 2;
|
||||
LAPACK(claset)("F", m, n, ZERO, ZERO, C, ldC);
|
||||
LAPACK(claset)("F", m, n, ZERO, ZERO, F, ldF);
|
||||
} else if (*ijob >= 1)
|
||||
isolve = 2;
|
||||
}
|
||||
|
||||
float scale2;
|
||||
int iround;
|
||||
for (iround = 1; iround <= isolve; iround++) {
|
||||
*scale = 1;
|
||||
float dscale = 0;
|
||||
float dsum = 1;
|
||||
RELAPACK_ctgsyl_rec(&cleantrans, &ifunc, m, n, A, ldA, B, ldB, C, ldC, D, ldD, E, ldE, F, ldF, scale, &dsum, &dscale, info);
|
||||
if (dscale != 0) {
|
||||
if (*ijob == 1 || *ijob == 3)
|
||||
*dif = sqrt(2 * *m * *n) / (dscale * sqrt(dsum));
|
||||
else
|
||||
*dif = sqrt(*m * *n) / (dscale * sqrt(dsum));
|
||||
}
|
||||
if (isolve == 2) {
|
||||
if (iround == 1) {
|
||||
if (notran)
|
||||
ifunc = *ijob;
|
||||
scale2 = *scale;
|
||||
LAPACK(clacpy)("F", m, n, C, ldC, Work, m);
|
||||
LAPACK(clacpy)("F", m, n, F, ldF, Work + 2 * *m * *n, m);
|
||||
LAPACK(claset)("F", m, n, ZERO, ZERO, C, ldC);
|
||||
LAPACK(claset)("F", m, n, ZERO, ZERO, F, ldF);
|
||||
} else {
|
||||
LAPACK(clacpy)("F", m, n, Work, m, C, ldC);
|
||||
LAPACK(clacpy)("F", m, n, Work + 2 * *m * *n, m, F, ldF);
|
||||
*scale = scale2;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/** ctgsyl's recursive vompute kernel */
|
||||
static void RELAPACK_ctgsyl_rec(
|
||||
const char *trans, const int *ifunc, const int *m, const int *n,
|
||||
const float *A, const int *ldA, const float *B, const int *ldB,
|
||||
float *C, const int *ldC,
|
||||
const float *D, const int *ldD, const float *E, const int *ldE,
|
||||
float *F, const int *ldF,
|
||||
float *scale, float *dsum, float *dscale,
|
||||
int *info
|
||||
) {
|
||||
|
||||
if (*m <= MAX(CROSSOVER_CTGSYL, 1) && *n <= MAX(CROSSOVER_CTGSYL, 1)) {
|
||||
// Unblocked
|
||||
LAPACK(ctgsy2)(trans, ifunc, m, n, A, ldA, B, ldB, C, ldC, D, ldD, E, ldE, F, ldF, scale, dsum, dscale, info);
|
||||
return;
|
||||
}
|
||||
|
||||
// Constants
|
||||
const float ONE[] = { 1., 0. };
|
||||
const float MONE[] = { -1., 0. };
|
||||
const int iONE[] = { 1 };
|
||||
|
||||
// Outputs
|
||||
float scale1[] = { 1., 0. };
|
||||
float scale2[] = { 1., 0. };
|
||||
int info1[] = { 0 };
|
||||
int info2[] = { 0 };
|
||||
|
||||
if (*m > *n) {
|
||||
// Splitting
|
||||
const int m1 = CREC_SPLIT(*m);
|
||||
const int m2 = *m - m1;
|
||||
|
||||
// A_TL A_TR
|
||||
// 0 A_BR
|
||||
const float *const A_TL = A;
|
||||
const float *const A_TR = A + 2 * *ldA * m1;
|
||||
const float *const A_BR = A + 2 * *ldA * m1 + 2 * m1;
|
||||
|
||||
// C_T
|
||||
// C_B
|
||||
float *const C_T = C;
|
||||
float *const C_B = C + 2 * m1;
|
||||
|
||||
// D_TL D_TR
|
||||
// 0 D_BR
|
||||
const float *const D_TL = D;
|
||||
const float *const D_TR = D + 2 * *ldD * m1;
|
||||
const float *const D_BR = D + 2 * *ldD * m1 + 2 * m1;
|
||||
|
||||
// F_T
|
||||
// F_B
|
||||
float *const F_T = F;
|
||||
float *const F_B = F + 2 * m1;
|
||||
|
||||
if (*trans == 'N') {
|
||||
// recursion(A_BR, B, C_B, D_BR, E, F_B)
|
||||
RELAPACK_ctgsyl_rec(trans, ifunc, &m2, n, A_BR, ldA, B, ldB, C_B, ldC, D_BR, ldD, E, ldE, F_B, ldF, scale1, dsum, dscale, info1);
|
||||
// C_T = C_T - A_TR * C_B
|
||||
BLAS(cgemm)("N", "N", &m1, n, &m2, MONE, A_TR, ldA, C_B, ldC, scale1, C_T, ldC);
|
||||
// F_T = F_T - D_TR * C_B
|
||||
BLAS(cgemm)("N", "N", &m1, n, &m2, MONE, D_TR, ldD, C_B, ldC, scale1, F_T, ldF);
|
||||
// recursion(A_TL, B, C_T, D_TL, E, F_T)
|
||||
RELAPACK_ctgsyl_rec(trans, ifunc, &m1, n, A_TL, ldA, B, ldB, C_T, ldC, D_TL, ldD, E, ldE, F_T, ldF, scale2, dsum, dscale, info2);
|
||||
// apply scale
|
||||
if (scale2[0] != 1) {
|
||||
LAPACK(clascl)("G", iONE, iONE, ONE, scale2, &m2, n, C_B, ldC, info);
|
||||
LAPACK(clascl)("G", iONE, iONE, ONE, scale2, &m2, n, F_B, ldF, info);
|
||||
}
|
||||
} else {
|
||||
// recursion(A_TL, B, C_T, D_TL, E, F_T)
|
||||
RELAPACK_ctgsyl_rec(trans, ifunc, &m1, n, A_TL, ldA, B, ldB, C_T, ldC, D_TL, ldD, E, ldE, F_T, ldF, scale1, dsum, dscale, info1);
|
||||
// apply scale
|
||||
if (scale1[0] != 1)
|
||||
LAPACK(clascl)("G", iONE, iONE, ONE, scale1, &m2, n, F_B, ldF, info);
|
||||
// C_B = C_B - A_TR^H * C_T
|
||||
BLAS(cgemm)("C", "N", &m2, n, &m1, MONE, A_TR, ldA, C_T, ldC, scale1, C_B, ldC);
|
||||
// C_B = C_B - D_TR^H * F_T
|
||||
BLAS(cgemm)("C", "N", &m2, n, &m1, MONE, D_TR, ldD, F_T, ldC, ONE, C_B, ldC);
|
||||
// recursion(A_BR, B, C_B, D_BR, E, F_B)
|
||||
RELAPACK_ctgsyl_rec(trans, ifunc, &m2, n, A_BR, ldA, B, ldB, C_B, ldC, D_BR, ldD, E, ldE, F_B, ldF, scale2, dsum, dscale, info2);
|
||||
// apply scale
|
||||
if (scale2[0] != 1) {
|
||||
LAPACK(clascl)("G", iONE, iONE, ONE, scale2, &m1, n, C_T, ldC, info);
|
||||
LAPACK(clascl)("G", iONE, iONE, ONE, scale2, &m1, n, F_T, ldF, info);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// Splitting
|
||||
const int n1 = CREC_SPLIT(*n);
|
||||
const int n2 = *n - n1;
|
||||
|
||||
// B_TL B_TR
|
||||
// 0 B_BR
|
||||
const float *const B_TL = B;
|
||||
const float *const B_TR = B + 2 * *ldB * n1;
|
||||
const float *const B_BR = B + 2 * *ldB * n1 + 2 * n1;
|
||||
|
||||
// C_L C_R
|
||||
float *const C_L = C;
|
||||
float *const C_R = C + 2 * *ldC * n1;
|
||||
|
||||
// E_TL E_TR
|
||||
// 0 E_BR
|
||||
const float *const E_TL = E;
|
||||
const float *const E_TR = E + 2 * *ldE * n1;
|
||||
const float *const E_BR = E + 2 * *ldE * n1 + 2 * n1;
|
||||
|
||||
// F_L F_R
|
||||
float *const F_L = F;
|
||||
float *const F_R = F + 2 * *ldF * n1;
|
||||
|
||||
if (*trans == 'N') {
|
||||
// recursion(A, B_TL, C_L, D, E_TL, F_L)
|
||||
RELAPACK_ctgsyl_rec(trans, ifunc, m, &n1, A, ldA, B_TL, ldB, C_L, ldC, D, ldD, E_TL, ldE, F_L, ldF, scale1, dsum, dscale, info1);
|
||||
// C_R = C_R + F_L * B_TR
|
||||
BLAS(cgemm)("N", "N", m, &n2, &n1, ONE, F_L, ldF, B_TR, ldB, scale1, C_R, ldC);
|
||||
// F_R = F_R + F_L * E_TR
|
||||
BLAS(cgemm)("N", "N", m, &n2, &n1, ONE, F_L, ldF, E_TR, ldE, scale1, F_R, ldF);
|
||||
// recursion(A, B_BR, C_R, D, E_BR, F_R)
|
||||
RELAPACK_ctgsyl_rec(trans, ifunc, m, &n2, A, ldA, B_BR, ldB, C_R, ldC, D, ldD, E_BR, ldE, F_R, ldF, scale2, dsum, dscale, info2);
|
||||
// apply scale
|
||||
if (scale2[0] != 1) {
|
||||
LAPACK(clascl)("G", iONE, iONE, ONE, scale2, m, &n1, C_L, ldC, info);
|
||||
LAPACK(clascl)("G", iONE, iONE, ONE, scale2, m, &n1, F_L, ldF, info);
|
||||
}
|
||||
} else {
|
||||
// recursion(A, B_BR, C_R, D, E_BR, F_R)
|
||||
RELAPACK_ctgsyl_rec(trans, ifunc, m, &n2, A, ldA, B_BR, ldB, C_R, ldC, D, ldD, E_BR, ldE, F_R, ldF, scale1, dsum, dscale, info1);
|
||||
// apply scale
|
||||
if (scale1[0] != 1)
|
||||
LAPACK(clascl)("G", iONE, iONE, ONE, scale1, m, &n1, C_L, ldC, info);
|
||||
// F_L = F_L + C_R * B_TR
|
||||
BLAS(cgemm)("N", "C", m, &n1, &n2, ONE, C_R, ldC, B_TR, ldB, scale1, F_L, ldF);
|
||||
// F_L = F_L + F_R * E_TR
|
||||
BLAS(cgemm)("N", "C", m, &n1, &n2, ONE, F_R, ldF, E_TR, ldB, ONE, F_L, ldF);
|
||||
// recursion(A, B_TL, C_L, D, E_TL, F_L)
|
||||
RELAPACK_ctgsyl_rec(trans, ifunc, m, &n1, A, ldA, B_TL, ldB, C_L, ldC, D, ldD, E_TL, ldE, F_L, ldF, scale2, dsum, dscale, info2);
|
||||
// apply scale
|
||||
if (scale2[0] != 1) {
|
||||
LAPACK(clascl)("G", iONE, iONE, ONE, scale2, m, &n2, C_R, ldC, info);
|
||||
LAPACK(clascl)("G", iONE, iONE, ONE, scale2, m, &n2, F_R, ldF, info);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
*scale = scale1[0] * scale2[0];
|
||||
*info = info1[0] || info2[0];
|
||||
}
|
|
@ -0,0 +1,163 @@
|
|||
#include "relapack.h"
|
||||
|
||||
static void RELAPACK_ctrsyl_rec(const char *, const char *, const int *,
|
||||
const int *, const int *, const float *, const int *, const float *,
|
||||
const int *, float *, const int *, float *, int *);
|
||||
|
||||
|
||||
/** CTRSYL solves the complex Sylvester matrix equation.
|
||||
*
|
||||
* This routine is functionally equivalent to LAPACK's ctrsyl.
|
||||
* For details on its interface, see
|
||||
* http://www.netlib.org/lapack/explore-html/d8/df4/ctrsyl_8f.html
|
||||
* */
|
||||
void RELAPACK_ctrsyl(
|
||||
const char *tranA, const char *tranB, const int *isgn,
|
||||
const int *m, const int *n,
|
||||
const float *A, const int *ldA, const float *B, const int *ldB,
|
||||
float *C, const int *ldC, float *scale,
|
||||
int *info
|
||||
) {
|
||||
|
||||
// Check arguments
|
||||
const int notransA = LAPACK(lsame)(tranA, "N");
|
||||
const int ctransA = LAPACK(lsame)(tranA, "C");
|
||||
const int notransB = LAPACK(lsame)(tranB, "N");
|
||||
const int ctransB = LAPACK(lsame)(tranB, "C");
|
||||
*info = 0;
|
||||
if (!ctransA && !notransA)
|
||||
*info = -1;
|
||||
else if (!ctransB && !notransB)
|
||||
*info = -2;
|
||||
else if (*isgn != 1 && *isgn != -1)
|
||||
*info = -3;
|
||||
else if (*m < 0)
|
||||
*info = -4;
|
||||
else if (*n < 0)
|
||||
*info = -5;
|
||||
else if (*ldA < MAX(1, *m))
|
||||
*info = -7;
|
||||
else if (*ldB < MAX(1, *n))
|
||||
*info = -9;
|
||||
else if (*ldC < MAX(1, *m))
|
||||
*info = -11;
|
||||
if (*info) {
|
||||
const int minfo = -*info;
|
||||
LAPACK(xerbla)("CTRSYL", &minfo);
|
||||
return;
|
||||
}
|
||||
|
||||
// Clean char * arguments
|
||||
const char cleantranA = notransA ? 'N' : 'C';
|
||||
const char cleantranB = notransB ? 'N' : 'C';
|
||||
|
||||
// Recursive kernel
|
||||
RELAPACK_ctrsyl_rec(&cleantranA, &cleantranB, isgn, m, n, A, ldA, B, ldB, C, ldC, scale, info);
|
||||
}
|
||||
|
||||
|
||||
/** ctrsyl's recursive compute kernel */
|
||||
static void RELAPACK_ctrsyl_rec(
|
||||
const char *tranA, const char *tranB, const int *isgn,
|
||||
const int *m, const int *n,
|
||||
const float *A, const int *ldA, const float *B, const int *ldB,
|
||||
float *C, const int *ldC, float *scale,
|
||||
int *info
|
||||
) {
|
||||
|
||||
if (*m <= MAX(CROSSOVER_CTRSYL, 1) && *n <= MAX(CROSSOVER_CTRSYL, 1)) {
|
||||
// Unblocked
|
||||
RELAPACK_ctrsyl_rec2(tranA, tranB, isgn, m, n, A, ldA, B, ldB, C, ldC, scale, info);
|
||||
return;
|
||||
}
|
||||
|
||||
// Constants
|
||||
const float ONE[] = { 1., 0. };
|
||||
const float MONE[] = { -1., 0. };
|
||||
const float MSGN[] = { -*isgn, 0. };
|
||||
const int iONE[] = { 1 };
|
||||
|
||||
// Outputs
|
||||
float scale1[] = { 1., 0. };
|
||||
float scale2[] = { 1., 0. };
|
||||
int info1[] = { 0 };
|
||||
int info2[] = { 0 };
|
||||
|
||||
if (*m > *n) {
|
||||
// Splitting
|
||||
const int m1 = CREC_SPLIT(*m);
|
||||
const int m2 = *m - m1;
|
||||
|
||||
// A_TL A_TR
|
||||
// 0 A_BR
|
||||
const float *const A_TL = A;
|
||||
const float *const A_TR = A + 2 * *ldA * m1;
|
||||
const float *const A_BR = A + 2 * *ldA * m1 + 2 * m1;
|
||||
|
||||
// C_T
|
||||
// C_B
|
||||
float *const C_T = C;
|
||||
float *const C_B = C + 2 * m1;
|
||||
|
||||
if (*tranA == 'N') {
|
||||
// recusion(A_BR, B, C_B)
|
||||
RELAPACK_ctrsyl_rec(tranA, tranB, isgn, &m2, n, A_BR, ldA, B, ldB, C_B, ldC, scale1, info1);
|
||||
// C_T = C_T - A_TR * C_B
|
||||
BLAS(cgemm)("N", "N", &m1, n, &m2, MONE, A_TR, ldA, C_B, ldC, scale1, C_T, ldC);
|
||||
// recusion(A_TL, B, C_T)
|
||||
RELAPACK_ctrsyl_rec(tranA, tranB, isgn, &m1, n, A_TL, ldA, B, ldB, C_T, ldC, scale2, info2);
|
||||
// apply scale
|
||||
if (scale2[0] != 1)
|
||||
LAPACK(clascl)("G", iONE, iONE, ONE, scale2, &m2, n, C_B, ldC, info);
|
||||
} else {
|
||||
// recusion(A_TL, B, C_T)
|
||||
RELAPACK_ctrsyl_rec(tranA, tranB, isgn, &m1, n, A_TL, ldA, B, ldB, C_T, ldC, scale1, info1);
|
||||
// C_B = C_B - A_TR' * C_T
|
||||
BLAS(cgemm)("C", "N", &m2, n, &m1, MONE, A_TR, ldA, C_T, ldC, scale1, C_B, ldC);
|
||||
// recusion(A_BR, B, C_B)
|
||||
RELAPACK_ctrsyl_rec(tranA, tranB, isgn, &m2, n, A_BR, ldA, B, ldB, C_B, ldC, scale2, info2);
|
||||
// apply scale
|
||||
if (scale2[0] != 1)
|
||||
LAPACK(clascl)("G", iONE, iONE, ONE, scale2, &m1, n, C_B, ldC, info);
|
||||
}
|
||||
} else {
|
||||
// Splitting
|
||||
const int n1 = CREC_SPLIT(*n);
|
||||
const int n2 = *n - n1;
|
||||
|
||||
// B_TL B_TR
|
||||
// 0 B_BR
|
||||
const float *const B_TL = B;
|
||||
const float *const B_TR = B + 2 * *ldB * n1;
|
||||
const float *const B_BR = B + 2 * *ldB * n1 + 2 * n1;
|
||||
|
||||
// C_L C_R
|
||||
float *const C_L = C;
|
||||
float *const C_R = C + 2 * *ldC * n1;
|
||||
|
||||
if (*tranB == 'N') {
|
||||
// recusion(A, B_TL, C_L)
|
||||
RELAPACK_ctrsyl_rec(tranA, tranB, isgn, m, &n1, A, ldA, B_TL, ldB, C_L, ldC, scale1, info1);
|
||||
// C_R = C_R -/+ C_L * B_TR
|
||||
BLAS(cgemm)("N", "N", m, &n2, &n1, MSGN, C_L, ldC, B_TR, ldB, scale1, C_R, ldC);
|
||||
// recusion(A, B_BR, C_R)
|
||||
RELAPACK_ctrsyl_rec(tranA, tranB, isgn, m, &n2, A, ldA, B_BR, ldB, C_R, ldC, scale2, info2);
|
||||
// apply scale
|
||||
if (scale2[0] != 1)
|
||||
LAPACK(clascl)("G", iONE, iONE, ONE, scale2, m, &n1, C_L, ldC, info);
|
||||
} else {
|
||||
// recusion(A, B_BR, C_R)
|
||||
RELAPACK_ctrsyl_rec(tranA, tranB, isgn, m, &n2, A, ldA, B_BR, ldB, C_R, ldC, scale1, info1);
|
||||
// C_L = C_L -/+ C_R * B_TR'
|
||||
BLAS(cgemm)("N", "C", m, &n1, &n2, MSGN, C_R, ldC, B_TR, ldB, scale1, C_L, ldC);
|
||||
// recusion(A, B_TL, C_L)
|
||||
RELAPACK_ctrsyl_rec(tranA, tranB, isgn, m, &n1, A, ldA, B_TL, ldB, C_L, ldC, scale2, info2);
|
||||
// apply scale
|
||||
if (scale2[0] != 1)
|
||||
LAPACK(clascl)("G", iONE, iONE, ONE, scale2, m, &n2, C_R, ldC, info);
|
||||
}
|
||||
}
|
||||
|
||||
*scale = scale1[0] * scale2[0];
|
||||
*info = info1[0] || info2[0];
|
||||
}
|
|
@ -0,0 +1,392 @@
|
|||
/* -- translated by f2c (version 20100827).
|
||||
You must link the resulting object file with libf2c:
|
||||
on Microsoft Windows system, link with libf2c.lib;
|
||||
on Linux or Unix systems, link with .../path/to/libf2c.a -lm
|
||||
or, if you install libf2c.a in a standard place, with -lf2c -lm
|
||||
-- in that order, at the end of the command line, as in
|
||||
cc *.o -lf2c -lm
|
||||
Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
|
||||
|
||||
http://www.netlib.org/f2c/libf2c.zip
|
||||
*/
|
||||
|
||||
#include "../config.h"
|
||||
#include "f2c.h"
|
||||
|
||||
#if BLAS_COMPLEX_FUNCTIONS_AS_ROUTINES
|
||||
complex cdotu_fun(int *n, complex *x, int *incx, complex *y, int *incy) {
|
||||
extern void cdotu_(complex *, int *, complex *, int *, complex *, int *);
|
||||
complex result;
|
||||
cdotu_(&result, n, x, incx, y, incy);
|
||||
return result;
|
||||
}
|
||||
#define cdotu_ cdotu_fun
|
||||
|
||||
complex cdotc_fun(int *n, complex *x, int *incx, complex *y, int *incy) {
|
||||
extern void cdotc_(complex *, int *, complex *, int *, complex *, int *);
|
||||
complex result;
|
||||
cdotc_(&result, n, x, incx, y, incy);
|
||||
return result;
|
||||
}
|
||||
#define cdotc_ cdotc_fun
|
||||
#endif
|
||||
|
||||
#if LAPACK_BLAS_COMPLEX_FUNCTIONS_AS_ROUTINES
|
||||
complex cladiv_fun(complex *a, complex *b) {
|
||||
extern void cladiv_(complex *, complex *, complex *);
|
||||
complex result;
|
||||
cladiv_(&result, a, b);
|
||||
return result;
|
||||
}
|
||||
#define cladiv_ cladiv_fun
|
||||
#endif
|
||||
|
||||
/* Table of constant values */
|
||||
|
||||
static int c__1 = 1;
|
||||
|
||||
/** RELAPACK_CTRSYL_REC2 solves the complex Sylvester matrix equation (unblocked algorithm)
|
||||
*
|
||||
* This routine is an exact copy of LAPACK's ctrsyl.
|
||||
* It serves as an unblocked kernel in the recursive algorithms.
|
||||
* */
|
||||
/* Subroutine */ void RELAPACK_ctrsyl_rec2(char *trana, char *tranb, int
|
||||
*isgn, int *m, int *n, complex *a, int *lda, complex *b,
|
||||
int *ldb, complex *c__, int *ldc, float *scale, int *info,
|
||||
ftnlen trana_len, ftnlen tranb_len)
|
||||
{
|
||||
/* System generated locals */
|
||||
int a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2,
|
||||
i__3, i__4;
|
||||
float r__1, r__2;
|
||||
complex q__1, q__2, q__3, q__4;
|
||||
|
||||
/* Builtin functions */
|
||||
float r_imag(complex *);
|
||||
void r_cnjg(complex *, complex *);
|
||||
|
||||
/* Local variables */
|
||||
static int j, k, l;
|
||||
static complex a11;
|
||||
static float db;
|
||||
static complex x11;
|
||||
static float da11;
|
||||
static complex vec;
|
||||
static float dum[1], eps, sgn, smin;
|
||||
static complex suml, sumr;
|
||||
/* Complex */ complex cdotc_(int *, complex *, int
|
||||
*, complex *, int *);
|
||||
extern int lsame_(char *, char *, ftnlen, ftnlen);
|
||||
/* Complex */ complex cdotu_(int *, complex *, int
|
||||
*, complex *, int *);
|
||||
extern /* Subroutine */ int slabad_(float *, float *);
|
||||
extern float clange_(char *, int *, int *, complex *,
|
||||
int *, float *, ftnlen);
|
||||
/* Complex */ complex cladiv_(complex *, complex *);
|
||||
static float scaloc;
|
||||
extern float slamch_(char *, ftnlen);
|
||||
extern /* Subroutine */ int csscal_(int *, float *, complex *, int
|
||||
*), xerbla_(char *, int *, ftnlen);
|
||||
static float bignum;
|
||||
static int notrna, notrnb;
|
||||
static float smlnum;
|
||||
|
||||
/* Parameter adjustments */
|
||||
a_dim1 = *lda;
|
||||
a_offset = 1 + a_dim1;
|
||||
a -= a_offset;
|
||||
b_dim1 = *ldb;
|
||||
b_offset = 1 + b_dim1;
|
||||
b -= b_offset;
|
||||
c_dim1 = *ldc;
|
||||
c_offset = 1 + c_dim1;
|
||||
c__ -= c_offset;
|
||||
|
||||
/* Function Body */
|
||||
notrna = lsame_(trana, "N", (ftnlen)1, (ftnlen)1);
|
||||
notrnb = lsame_(tranb, "N", (ftnlen)1, (ftnlen)1);
|
||||
*info = 0;
|
||||
if (! notrna && ! lsame_(trana, "C", (ftnlen)1, (ftnlen)1)) {
|
||||
*info = -1;
|
||||
} else if (! notrnb && ! lsame_(tranb, "C", (ftnlen)1, (ftnlen)1)) {
|
||||
*info = -2;
|
||||
} else if (*isgn != 1 && *isgn != -1) {
|
||||
*info = -3;
|
||||
} else if (*m < 0) {
|
||||
*info = -4;
|
||||
} else if (*n < 0) {
|
||||
*info = -5;
|
||||
} else if (*lda < max(1,*m)) {
|
||||
*info = -7;
|
||||
} else if (*ldb < max(1,*n)) {
|
||||
*info = -9;
|
||||
} else if (*ldc < max(1,*m)) {
|
||||
*info = -11;
|
||||
}
|
||||
if (*info != 0) {
|
||||
i__1 = -(*info);
|
||||
xerbla_("CTRSY2", &i__1, (ftnlen)6);
|
||||
return;
|
||||
}
|
||||
*scale = 1.f;
|
||||
if (*m == 0 || *n == 0) {
|
||||
return;
|
||||
}
|
||||
eps = slamch_("P", (ftnlen)1);
|
||||
smlnum = slamch_("S", (ftnlen)1);
|
||||
bignum = 1.f / smlnum;
|
||||
slabad_(&smlnum, &bignum);
|
||||
smlnum = smlnum * (float) (*m * *n) / eps;
|
||||
bignum = 1.f / smlnum;
|
||||
/* Computing MAX */
|
||||
r__1 = smlnum, r__2 = eps * clange_("M", m, m, &a[a_offset], lda, dum, (
|
||||
ftnlen)1), r__1 = max(r__1,r__2), r__2 = eps * clange_("M", n, n,
|
||||
&b[b_offset], ldb, dum, (ftnlen)1);
|
||||
smin = dmax(r__1,r__2);
|
||||
sgn = (float) (*isgn);
|
||||
if (notrna && notrnb) {
|
||||
i__1 = *n;
|
||||
for (l = 1; l <= i__1; ++l) {
|
||||
for (k = *m; k >= 1; --k) {
|
||||
i__2 = *m - k;
|
||||
/* Computing MIN */
|
||||
i__3 = k + 1;
|
||||
/* Computing MIN */
|
||||
i__4 = k + 1;
|
||||
q__1 = cdotu_(&i__2, &a[k + min(i__3,*m) * a_dim1], lda, &c__[
|
||||
min(i__4,*m) + l * c_dim1], &c__1);
|
||||
suml.r = q__1.r, suml.i = q__1.i;
|
||||
i__2 = l - 1;
|
||||
q__1 = cdotu_(&i__2, &c__[k + c_dim1], ldc, &b[l * b_dim1 + 1]
|
||||
, &c__1);
|
||||
sumr.r = q__1.r, sumr.i = q__1.i;
|
||||
i__2 = k + l * c_dim1;
|
||||
q__3.r = sgn * sumr.r, q__3.i = sgn * sumr.i;
|
||||
q__2.r = suml.r + q__3.r, q__2.i = suml.i + q__3.i;
|
||||
q__1.r = c__[i__2].r - q__2.r, q__1.i = c__[i__2].i - q__2.i;
|
||||
vec.r = q__1.r, vec.i = q__1.i;
|
||||
scaloc = 1.f;
|
||||
i__2 = k + k * a_dim1;
|
||||
i__3 = l + l * b_dim1;
|
||||
q__2.r = sgn * b[i__3].r, q__2.i = sgn * b[i__3].i;
|
||||
q__1.r = a[i__2].r + q__2.r, q__1.i = a[i__2].i + q__2.i;
|
||||
a11.r = q__1.r, a11.i = q__1.i;
|
||||
da11 = (r__1 = a11.r, dabs(r__1)) + (r__2 = r_imag(&a11),
|
||||
dabs(r__2));
|
||||
if (da11 <= smin) {
|
||||
a11.r = smin, a11.i = 0.f;
|
||||
da11 = smin;
|
||||
*info = 1;
|
||||
}
|
||||
db = (r__1 = vec.r, dabs(r__1)) + (r__2 = r_imag(&vec), dabs(
|
||||
r__2));
|
||||
if (da11 < 1.f && db > 1.f) {
|
||||
if (db > bignum * da11) {
|
||||
scaloc = 1.f / db;
|
||||
}
|
||||
}
|
||||
q__3.r = scaloc, q__3.i = 0.f;
|
||||
q__2.r = vec.r * q__3.r - vec.i * q__3.i, q__2.i = vec.r *
|
||||
q__3.i + vec.i * q__3.r;
|
||||
q__1 = cladiv_(&q__2, &a11);
|
||||
x11.r = q__1.r, x11.i = q__1.i;
|
||||
if (scaloc != 1.f) {
|
||||
i__2 = *n;
|
||||
for (j = 1; j <= i__2; ++j) {
|
||||
csscal_(m, &scaloc, &c__[j * c_dim1 + 1], &c__1);
|
||||
/* L10: */
|
||||
}
|
||||
*scale *= scaloc;
|
||||
}
|
||||
i__2 = k + l * c_dim1;
|
||||
c__[i__2].r = x11.r, c__[i__2].i = x11.i;
|
||||
/* L20: */
|
||||
}
|
||||
/* L30: */
|
||||
}
|
||||
} else if (! notrna && notrnb) {
|
||||
i__1 = *n;
|
||||
for (l = 1; l <= i__1; ++l) {
|
||||
i__2 = *m;
|
||||
for (k = 1; k <= i__2; ++k) {
|
||||
i__3 = k - 1;
|
||||
q__1 = cdotc_(&i__3, &a[k * a_dim1 + 1], &c__1, &c__[l *
|
||||
c_dim1 + 1], &c__1);
|
||||
suml.r = q__1.r, suml.i = q__1.i;
|
||||
i__3 = l - 1;
|
||||
q__1 = cdotu_(&i__3, &c__[k + c_dim1], ldc, &b[l * b_dim1 + 1]
|
||||
, &c__1);
|
||||
sumr.r = q__1.r, sumr.i = q__1.i;
|
||||
i__3 = k + l * c_dim1;
|
||||
q__3.r = sgn * sumr.r, q__3.i = sgn * sumr.i;
|
||||
q__2.r = suml.r + q__3.r, q__2.i = suml.i + q__3.i;
|
||||
q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
|
||||
vec.r = q__1.r, vec.i = q__1.i;
|
||||
scaloc = 1.f;
|
||||
r_cnjg(&q__2, &a[k + k * a_dim1]);
|
||||
i__3 = l + l * b_dim1;
|
||||
q__3.r = sgn * b[i__3].r, q__3.i = sgn * b[i__3].i;
|
||||
q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i;
|
||||
a11.r = q__1.r, a11.i = q__1.i;
|
||||
da11 = (r__1 = a11.r, dabs(r__1)) + (r__2 = r_imag(&a11),
|
||||
dabs(r__2));
|
||||
if (da11 <= smin) {
|
||||
a11.r = smin, a11.i = 0.f;
|
||||
da11 = smin;
|
||||
*info = 1;
|
||||
}
|
||||
db = (r__1 = vec.r, dabs(r__1)) + (r__2 = r_imag(&vec), dabs(
|
||||
r__2));
|
||||
if (da11 < 1.f && db > 1.f) {
|
||||
if (db > bignum * da11) {
|
||||
scaloc = 1.f / db;
|
||||
}
|
||||
}
|
||||
q__3.r = scaloc, q__3.i = 0.f;
|
||||
q__2.r = vec.r * q__3.r - vec.i * q__3.i, q__2.i = vec.r *
|
||||
q__3.i + vec.i * q__3.r;
|
||||
q__1 = cladiv_(&q__2, &a11);
|
||||
x11.r = q__1.r, x11.i = q__1.i;
|
||||
if (scaloc != 1.f) {
|
||||
i__3 = *n;
|
||||
for (j = 1; j <= i__3; ++j) {
|
||||
csscal_(m, &scaloc, &c__[j * c_dim1 + 1], &c__1);
|
||||
/* L40: */
|
||||
}
|
||||
*scale *= scaloc;
|
||||
}
|
||||
i__3 = k + l * c_dim1;
|
||||
c__[i__3].r = x11.r, c__[i__3].i = x11.i;
|
||||
/* L50: */
|
||||
}
|
||||
/* L60: */
|
||||
}
|
||||
} else if (! notrna && ! notrnb) {
|
||||
for (l = *n; l >= 1; --l) {
|
||||
i__1 = *m;
|
||||
for (k = 1; k <= i__1; ++k) {
|
||||
i__2 = k - 1;
|
||||
q__1 = cdotc_(&i__2, &a[k * a_dim1 + 1], &c__1, &c__[l *
|
||||
c_dim1 + 1], &c__1);
|
||||
suml.r = q__1.r, suml.i = q__1.i;
|
||||
i__2 = *n - l;
|
||||
/* Computing MIN */
|
||||
i__3 = l + 1;
|
||||
/* Computing MIN */
|
||||
i__4 = l + 1;
|
||||
q__1 = cdotc_(&i__2, &c__[k + min(i__3,*n) * c_dim1], ldc, &b[
|
||||
l + min(i__4,*n) * b_dim1], ldb);
|
||||
sumr.r = q__1.r, sumr.i = q__1.i;
|
||||
i__2 = k + l * c_dim1;
|
||||
r_cnjg(&q__4, &sumr);
|
||||
q__3.r = sgn * q__4.r, q__3.i = sgn * q__4.i;
|
||||
q__2.r = suml.r + q__3.r, q__2.i = suml.i + q__3.i;
|
||||
q__1.r = c__[i__2].r - q__2.r, q__1.i = c__[i__2].i - q__2.i;
|
||||
vec.r = q__1.r, vec.i = q__1.i;
|
||||
scaloc = 1.f;
|
||||
i__2 = k + k * a_dim1;
|
||||
i__3 = l + l * b_dim1;
|
||||
q__3.r = sgn * b[i__3].r, q__3.i = sgn * b[i__3].i;
|
||||
q__2.r = a[i__2].r + q__3.r, q__2.i = a[i__2].i + q__3.i;
|
||||
r_cnjg(&q__1, &q__2);
|
||||
a11.r = q__1.r, a11.i = q__1.i;
|
||||
da11 = (r__1 = a11.r, dabs(r__1)) + (r__2 = r_imag(&a11),
|
||||
dabs(r__2));
|
||||
if (da11 <= smin) {
|
||||
a11.r = smin, a11.i = 0.f;
|
||||
da11 = smin;
|
||||
*info = 1;
|
||||
}
|
||||
db = (r__1 = vec.r, dabs(r__1)) + (r__2 = r_imag(&vec), dabs(
|
||||
r__2));
|
||||
if (da11 < 1.f && db > 1.f) {
|
||||
if (db > bignum * da11) {
|
||||
scaloc = 1.f / db;
|
||||
}
|
||||
}
|
||||
q__3.r = scaloc, q__3.i = 0.f;
|
||||
q__2.r = vec.r * q__3.r - vec.i * q__3.i, q__2.i = vec.r *
|
||||
q__3.i + vec.i * q__3.r;
|
||||
q__1 = cladiv_(&q__2, &a11);
|
||||
x11.r = q__1.r, x11.i = q__1.i;
|
||||
if (scaloc != 1.f) {
|
||||
i__2 = *n;
|
||||
for (j = 1; j <= i__2; ++j) {
|
||||
csscal_(m, &scaloc, &c__[j * c_dim1 + 1], &c__1);
|
||||
/* L70: */
|
||||
}
|
||||
*scale *= scaloc;
|
||||
}
|
||||
i__2 = k + l * c_dim1;
|
||||
c__[i__2].r = x11.r, c__[i__2].i = x11.i;
|
||||
/* L80: */
|
||||
}
|
||||
/* L90: */
|
||||
}
|
||||
} else if (notrna && ! notrnb) {
|
||||
for (l = *n; l >= 1; --l) {
|
||||
for (k = *m; k >= 1; --k) {
|
||||
i__1 = *m - k;
|
||||
/* Computing MIN */
|
||||
i__2 = k + 1;
|
||||
/* Computing MIN */
|
||||
i__3 = k + 1;
|
||||
q__1 = cdotu_(&i__1, &a[k + min(i__2,*m) * a_dim1], lda, &c__[
|
||||
min(i__3,*m) + l * c_dim1], &c__1);
|
||||
suml.r = q__1.r, suml.i = q__1.i;
|
||||
i__1 = *n - l;
|
||||
/* Computing MIN */
|
||||
i__2 = l + 1;
|
||||
/* Computing MIN */
|
||||
i__3 = l + 1;
|
||||
q__1 = cdotc_(&i__1, &c__[k + min(i__2,*n) * c_dim1], ldc, &b[
|
||||
l + min(i__3,*n) * b_dim1], ldb);
|
||||
sumr.r = q__1.r, sumr.i = q__1.i;
|
||||
i__1 = k + l * c_dim1;
|
||||
r_cnjg(&q__4, &sumr);
|
||||
q__3.r = sgn * q__4.r, q__3.i = sgn * q__4.i;
|
||||
q__2.r = suml.r + q__3.r, q__2.i = suml.i + q__3.i;
|
||||
q__1.r = c__[i__1].r - q__2.r, q__1.i = c__[i__1].i - q__2.i;
|
||||
vec.r = q__1.r, vec.i = q__1.i;
|
||||
scaloc = 1.f;
|
||||
i__1 = k + k * a_dim1;
|
||||
r_cnjg(&q__3, &b[l + l * b_dim1]);
|
||||
q__2.r = sgn * q__3.r, q__2.i = sgn * q__3.i;
|
||||
q__1.r = a[i__1].r + q__2.r, q__1.i = a[i__1].i + q__2.i;
|
||||
a11.r = q__1.r, a11.i = q__1.i;
|
||||
da11 = (r__1 = a11.r, dabs(r__1)) + (r__2 = r_imag(&a11),
|
||||
dabs(r__2));
|
||||
if (da11 <= smin) {
|
||||
a11.r = smin, a11.i = 0.f;
|
||||
da11 = smin;
|
||||
*info = 1;
|
||||
}
|
||||
db = (r__1 = vec.r, dabs(r__1)) + (r__2 = r_imag(&vec), dabs(
|
||||
r__2));
|
||||
if (da11 < 1.f && db > 1.f) {
|
||||
if (db > bignum * da11) {
|
||||
scaloc = 1.f / db;
|
||||
}
|
||||
}
|
||||
q__3.r = scaloc, q__3.i = 0.f;
|
||||
q__2.r = vec.r * q__3.r - vec.i * q__3.i, q__2.i = vec.r *
|
||||
q__3.i + vec.i * q__3.r;
|
||||
q__1 = cladiv_(&q__2, &a11);
|
||||
x11.r = q__1.r, x11.i = q__1.i;
|
||||
if (scaloc != 1.f) {
|
||||
i__1 = *n;
|
||||
for (j = 1; j <= i__1; ++j) {
|
||||
csscal_(m, &scaloc, &c__[j * c_dim1 + 1], &c__1);
|
||||
/* L100: */
|
||||
}
|
||||
*scale *= scaloc;
|
||||
}
|
||||
i__1 = k + l * c_dim1;
|
||||
c__[i__1].r = x11.r, c__[i__1].i = x11.i;
|
||||
/* L110: */
|
||||
}
|
||||
/* L120: */
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
|
@ -0,0 +1,107 @@
|
|||
#include "relapack.h"
|
||||
|
||||
static void RELAPACK_ctrtri_rec(const char *, const char *, const int *,
|
||||
float *, const int *, int *);
|
||||
|
||||
|
||||
/** CTRTRI computes the inverse of a complex upper or lower triangular matrix A.
|
||||
*
|
||||
* This routine is functionally equivalent to LAPACK's ctrtri.
|
||||
* For details on its interface, see
|
||||
* http://www.netlib.org/lapack/explore-html/df/df8/ctrtri_8f.html
|
||||
* */
|
||||
void RELAPACK_ctrtri(
|
||||
const char *uplo, const char *diag, const int *n,
|
||||
float *A, const int *ldA,
|
||||
int *info
|
||||
) {
|
||||
|
||||
// Check arguments
|
||||
const int lower = LAPACK(lsame)(uplo, "L");
|
||||
const int upper = LAPACK(lsame)(uplo, "U");
|
||||
const int nounit = LAPACK(lsame)(diag, "N");
|
||||
const int unit = LAPACK(lsame)(diag, "U");
|
||||
*info = 0;
|
||||
if (!lower && !upper)
|
||||
*info = -1;
|
||||
else if (!nounit && !unit)
|
||||
*info = -2;
|
||||
else if (*n < 0)
|
||||
*info = -3;
|
||||
else if (*ldA < MAX(1, *n))
|
||||
*info = -5;
|
||||
if (*info) {
|
||||
const int minfo = -*info;
|
||||
LAPACK(xerbla)("CTRTRI", &minfo);
|
||||
return;
|
||||
}
|
||||
|
||||
// Clean char * arguments
|
||||
const char cleanuplo = lower ? 'L' : 'U';
|
||||
const char cleandiag = nounit ? 'N' : 'U';
|
||||
|
||||
// check for singularity
|
||||
if (nounit) {
|
||||
int i;
|
||||
for (i = 0; i < *n; i++)
|
||||
if (A[2 * (i + *ldA * i)] == 0 && A[2 * (i + *ldA * i) + 1] == 0) {
|
||||
*info = i;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// Recursive kernel
|
||||
RELAPACK_ctrtri_rec(&cleanuplo, &cleandiag, n, A, ldA, info);
|
||||
}
|
||||
|
||||
|
||||
/** ctrtri's recursive compute kernel */
|
||||
static void RELAPACK_ctrtri_rec(
|
||||
const char *uplo, const char *diag, const int *n,
|
||||
float *A, const int *ldA,
|
||||
int *info
|
||||
){
|
||||
|
||||
if (*n <= MAX(CROSSOVER_CTRTRI, 1)) {
|
||||
// Unblocked
|
||||
LAPACK(ctrti2)(uplo, diag, n, A, ldA, info);
|
||||
return;
|
||||
}
|
||||
|
||||
// Constants
|
||||
const float ONE[] = { 1., 0. };
|
||||
const float MONE[] = { -1., 0. };
|
||||
|
||||
// Splitting
|
||||
const int n1 = CREC_SPLIT(*n);
|
||||
const int n2 = *n - n1;
|
||||
|
||||
// A_TL A_TR
|
||||
// A_BL A_BR
|
||||
float *const A_TL = A;
|
||||
float *const A_TR = A + 2 * *ldA * n1;
|
||||
float *const A_BL = A + 2 * n1;
|
||||
float *const A_BR = A + 2 * *ldA * n1 + 2 * n1;
|
||||
|
||||
// recursion(A_TL)
|
||||
RELAPACK_ctrtri_rec(uplo, diag, &n1, A_TL, ldA, info);
|
||||
if (*info)
|
||||
return;
|
||||
|
||||
if (*uplo == 'L') {
|
||||
// A_BL = - A_BL * A_TL
|
||||
BLAS(ctrmm)("R", "L", "N", diag, &n2, &n1, MONE, A_TL, ldA, A_BL, ldA);
|
||||
// A_BL = A_BR \ A_BL
|
||||
BLAS(ctrsm)("L", "L", "N", diag, &n2, &n1, ONE, A_BR, ldA, A_BL, ldA);
|
||||
} else {
|
||||
// A_TR = - A_TL * A_TR
|
||||
BLAS(ctrmm)("L", "U", "N", diag, &n1, &n2, MONE, A_TL, ldA, A_TR, ldA);
|
||||
// A_TR = A_TR / A_BR
|
||||
BLAS(ctrsm)("R", "U", "N", diag, &n1, &n2, ONE, A_BR, ldA, A_TR, ldA);
|
||||
}
|
||||
|
||||
// recursion(A_BR)
|
||||
RELAPACK_ctrtri_rec(uplo, diag, &n2, A_BR, ldA, info);
|
||||
if (*info)
|
||||
*info += n1;
|
||||
}
|
|
@ -0,0 +1,227 @@
|
|||
#include "relapack.h"
|
||||
#include "stdlib.h"
|
||||
|
||||
static void RELAPACK_dgbtrf_rec(const int *, const int *, const int *,
|
||||
const int *, double *, const int *, int *, double *, const int *, double *,
|
||||
const int *, int *);
|
||||
|
||||
|
||||
/** DGBTRF computes an LU factorization of a real m-by-n band matrix A using partial pivoting with row interchanges.
|
||||
*
|
||||
* This routine is functionally equivalent to LAPACK's dgbtrf.
|
||||
* For details on its interface, see
|
||||
* http://www.netlib.org/lapack/explore-html/da/d87/dgbtrf_8f.html
|
||||
* */
|
||||
void RELAPACK_dgbtrf(
|
||||
const int *m, const int *n, const int *kl, const int *ku,
|
||||
double *Ab, const int *ldAb, int *ipiv,
|
||||
int *info
|
||||
) {
|
||||
|
||||
// Check arguments
|
||||
*info = 0;
|
||||
if (*m < 0)
|
||||
*info = -1;
|
||||
else if (*n < 0)
|
||||
*info = -2;
|
||||
else if (*kl < 0)
|
||||
*info = -3;
|
||||
else if (*ku < 0)
|
||||
*info = -4;
|
||||
else if (*ldAb < 2 * *kl + *ku + 1)
|
||||
*info = -6;
|
||||
if (*info) {
|
||||
const int minfo = -*info;
|
||||
LAPACK(xerbla)("DGBTRF", &minfo);
|
||||
return;
|
||||
}
|
||||
|
||||
// Constant
|
||||
const double ZERO[] = { 0. };
|
||||
|
||||
// Result upper band width
|
||||
const int kv = *ku + *kl;
|
||||
|
||||
// Unskew A
|
||||
const int ldA[] = { *ldAb - 1 };
|
||||
double *const A = Ab + kv;
|
||||
|
||||
// Zero upper diagonal fill-in elements
|
||||
int i, j;
|
||||
for (j = 0; j < *n; j++) {
|
||||
double *const A_j = A + *ldA * j;
|
||||
for (i = MAX(0, j - kv); i < j - *ku; i++)
|
||||
A_j[i] = 0.;
|
||||
}
|
||||
|
||||
// Allocate work space
|
||||
const int n1 = DREC_SPLIT(*n);
|
||||
const int mWorkl = (kv > n1) ? MAX(1, *m - *kl) : kv;
|
||||
const int nWorkl = (kv > n1) ? n1 : kv;
|
||||
const int mWorku = (*kl > n1) ? n1 : *kl;
|
||||
const int nWorku = (*kl > n1) ? MAX(0, *n - *kl) : *kl;
|
||||
double *Workl = malloc(mWorkl * nWorkl * sizeof(double));
|
||||
double *Worku = malloc(mWorku * nWorku * sizeof(double));
|
||||
LAPACK(dlaset)("L", &mWorkl, &nWorkl, ZERO, ZERO, Workl, &mWorkl);
|
||||
LAPACK(dlaset)("U", &mWorku, &nWorku, ZERO, ZERO, Worku, &mWorku);
|
||||
|
||||
// Recursive kernel
|
||||
RELAPACK_dgbtrf_rec(m, n, kl, ku, Ab, ldAb, ipiv, Workl, &mWorkl, Worku, &mWorku, info);
|
||||
|
||||
// Free work space
|
||||
free(Workl);
|
||||
free(Worku);
|
||||
}
|
||||
|
||||
|
||||
/** dgbtrf's recursive compute kernel */
|
||||
static void RELAPACK_dgbtrf_rec(
|
||||
const int *m, const int *n, const int *kl, const int *ku,
|
||||
double *Ab, const int *ldAb, int *ipiv,
|
||||
double *Workl, const int *ldWorkl, double *Worku, const int *ldWorku,
|
||||
int *info
|
||||
) {
|
||||
|
||||
if (*n <= MAX(CROSSOVER_DGBTRF, 1)) {
|
||||
// Unblocked
|
||||
LAPACK(dgbtf2)(m, n, kl, ku, Ab, ldAb, ipiv, info);
|
||||
return;
|
||||
}
|
||||
|
||||
// Constants
|
||||
const double ONE[] = { 1. };
|
||||
const double MONE[] = { -1. };
|
||||
const int iONE[] = { 1 };
|
||||
|
||||
// Loop iterators
|
||||
int i, j;
|
||||
|
||||
// Output upper band width
|
||||
const int kv = *ku + *kl;
|
||||
|
||||
// Unskew A
|
||||
const int ldA[] = { *ldAb - 1 };
|
||||
double *const A = Ab + kv;
|
||||
|
||||
// Splitting
|
||||
const int n1 = MIN(DREC_SPLIT(*n), *kl);
|
||||
const int n2 = *n - n1;
|
||||
const int m1 = MIN(n1, *m);
|
||||
const int m2 = *m - m1;
|
||||
const int mn1 = MIN(m1, n1);
|
||||
const int mn2 = MIN(m2, n2);
|
||||
|
||||
// Ab_L *
|
||||
// Ab_BR
|
||||
double *const Ab_L = Ab;
|
||||
double *const Ab_BR = Ab + *ldAb * n1;
|
||||
|
||||
// A_L A_R
|
||||
double *const A_L = A;
|
||||
double *const A_R = A + *ldA * n1;
|
||||
|
||||
// A_TL A_TR
|
||||
// A_BL A_BR
|
||||
double *const A_TL = A;
|
||||
double *const A_TR = A + *ldA * n1;
|
||||
double *const A_BL = A + m1;
|
||||
double *const A_BR = A + *ldA * n1 + m1;
|
||||
|
||||
// ipiv_T
|
||||
// ipiv_B
|
||||
int *const ipiv_T = ipiv;
|
||||
int *const ipiv_B = ipiv + n1;
|
||||
|
||||
// Banded splitting
|
||||
const int n21 = MIN(n2, kv - n1);
|
||||
const int n22 = MIN(n2 - n21, n1);
|
||||
const int m21 = MIN(m2, *kl - m1);
|
||||
const int m22 = MIN(m2 - m21, m1);
|
||||
|
||||
// n1 n21 n22
|
||||
// m * A_Rl ARr
|
||||
double *const A_Rl = A_R;
|
||||
double *const A_Rr = A_R + *ldA * n21;
|
||||
|
||||
// n1 n21 n22
|
||||
// m1 * A_TRl A_TRr
|
||||
// m21 A_BLt A_BRtl A_BRtr
|
||||
// m22 A_BLb A_BRbl A_BRbr
|
||||
double *const A_TRl = A_TR;
|
||||
double *const A_TRr = A_TR + *ldA * n21;
|
||||
double *const A_BLt = A_BL;
|
||||
double *const A_BLb = A_BL + m21;
|
||||
double *const A_BRtl = A_BR;
|
||||
double *const A_BRtr = A_BR + *ldA * n21;
|
||||
double *const A_BRbl = A_BR + m21;
|
||||
double *const A_BRbr = A_BR + *ldA * n21 + m21;
|
||||
|
||||
// recursion(Ab_L, ipiv_T)
|
||||
RELAPACK_dgbtrf_rec(m, &n1, kl, ku, Ab_L, ldAb, ipiv_T, Workl, ldWorkl, Worku, ldWorku, info);
|
||||
|
||||
// Workl = A_BLb
|
||||
LAPACK(dlacpy)("U", &m22, &n1, A_BLb, ldA, Workl, ldWorkl);
|
||||
|
||||
// partially redo swaps in A_L
|
||||
for (i = 0; i < mn1; i++) {
|
||||
const int ip = ipiv_T[i] - 1;
|
||||
if (ip != i) {
|
||||
if (ip < *kl)
|
||||
BLAS(dswap)(&i, A_L + i, ldA, A_L + ip, ldA);
|
||||
else
|
||||
BLAS(dswap)(&i, A_L + i, ldA, Workl + ip - *kl, ldWorkl);
|
||||
}
|
||||
}
|
||||
|
||||
// apply pivots to A_Rl
|
||||
LAPACK(dlaswp)(&n21, A_Rl, ldA, iONE, &mn1, ipiv_T, iONE);
|
||||
|
||||
// apply pivots to A_Rr columnwise
|
||||
for (j = 0; j < n22; j++) {
|
||||
double *const A_Rrj = A_Rr + *ldA * j;
|
||||
for (i = j; i < mn1; i++) {
|
||||
const int ip = ipiv_T[i] - 1;
|
||||
if (ip != i) {
|
||||
const double tmp = A_Rrj[i];
|
||||
A_Rrj[i] = A_Rr[ip];
|
||||
A_Rrj[ip] = tmp;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// A_TRl = A_TL \ A_TRl
|
||||
BLAS(dtrsm)("L", "L", "N", "U", &m1, &n21, ONE, A_TL, ldA, A_TRl, ldA);
|
||||
// Worku = A_TRr
|
||||
LAPACK(dlacpy)("L", &m1, &n22, A_TRr, ldA, Worku, ldWorku);
|
||||
// Worku = A_TL \ Worku
|
||||
BLAS(dtrsm)("L", "L", "N", "U", &m1, &n22, ONE, A_TL, ldA, Worku, ldWorku);
|
||||
// A_TRr = Worku
|
||||
LAPACK(dlacpy)("L", &m1, &n22, Worku, ldWorku, A_TRr, ldA);
|
||||
// A_BRtl = A_BRtl - A_BLt * A_TRl
|
||||
BLAS(dgemm)("N", "N", &m21, &n21, &n1, MONE, A_BLt, ldA, A_TRl, ldA, ONE, A_BRtl, ldA);
|
||||
// A_BRbl = A_BRbl - Workl * A_TRl
|
||||
BLAS(dgemm)("N", "N", &m22, &n21, &n1, MONE, Workl, ldWorkl, A_TRl, ldA, ONE, A_BRbl, ldA);
|
||||
// A_BRtr = A_BRtr - A_BLt * Worku
|
||||
BLAS(dgemm)("N", "N", &m21, &n22, &n1, MONE, A_BLt, ldA, Worku, ldWorku, ONE, A_BRtr, ldA);
|
||||
// A_BRbr = A_BRbr - Workl * Worku
|
||||
BLAS(dgemm)("N", "N", &m22, &n22, &n1, MONE, Workl, ldWorkl, Worku, ldWorku, ONE, A_BRbr, ldA);
|
||||
|
||||
// partially undo swaps in A_L
|
||||
for (i = mn1 - 1; i >= 0; i--) {
|
||||
const int ip = ipiv_T[i] - 1;
|
||||
if (ip != i) {
|
||||
if (ip < *kl)
|
||||
BLAS(dswap)(&i, A_L + i, ldA, A_L + ip, ldA);
|
||||
else
|
||||
BLAS(dswap)(&i, A_L + i, ldA, Workl + ip - *kl, ldWorkl);
|
||||
}
|
||||
}
|
||||
|
||||
// recursion(Ab_BR, ipiv_B)
|
||||
RELAPACK_dgbtrf_rec(&m2, &n2, kl, ku, Ab_BR, ldAb, ipiv_B, Workl, ldWorkl, Worku, ldWorku, info);
|
||||
if (*info)
|
||||
*info += n1;
|
||||
// shift pivots
|
||||
for (i = 0; i < mn2; i++)
|
||||
ipiv_B[i] += n1;
|
||||
}
|
|
@ -0,0 +1,165 @@
|
|||
#include "relapack.h"
|
||||
|
||||
static void RELAPACK_dgemmt_rec(const char *, const char *, const char *,
|
||||
const int *, const int *, const double *, const double *, const int *,
|
||||
const double *, const int *, const double *, double *, const int *);
|
||||
|
||||
static void RELAPACK_dgemmt_rec2(const char *, const char *, const char *,
|
||||
const int *, const int *, const double *, const double *, const int *,
|
||||
const double *, const int *, const double *, double *, const int *);
|
||||
|
||||
|
||||
/** DGEMMT computes a matrix-matrix product with general matrices but updates
|
||||
* only the upper or lower triangular part of the result matrix.
|
||||
*
|
||||
* This routine performs the same operation as the BLAS routine
|
||||
* dgemm(transA, transB, n, n, k, alpha, A, ldA, B, ldB, beta, C, ldC)
|
||||
* but only updates the triangular part of C specified by uplo:
|
||||
* If (*uplo == 'L'), only the lower triangular part of C is updated,
|
||||
* otherwise the upper triangular part is updated.
|
||||
* */
|
||||
void RELAPACK_dgemmt(
|
||||
const char *uplo, const char *transA, const char *transB,
|
||||
const int *n, const int *k,
|
||||
const double *alpha, const double *A, const int *ldA,
|
||||
const double *B, const int *ldB,
|
||||
const double *beta, double *C, const int *ldC
|
||||
) {
|
||||
|
||||
#if HAVE_XGEMMT
|
||||
BLAS(dgemmt)(uplo, transA, transB, n, k, alpha, A, ldA, B, ldB, beta, C, ldC);
|
||||
return;
|
||||
#else
|
||||
|
||||
// Check arguments
|
||||
const int lower = LAPACK(lsame)(uplo, "L");
|
||||
const int upper = LAPACK(lsame)(uplo, "U");
|
||||
const int notransA = LAPACK(lsame)(transA, "N");
|
||||
const int tranA = LAPACK(lsame)(transA, "T");
|
||||
const int notransB = LAPACK(lsame)(transB, "N");
|
||||
const int tranB = LAPACK(lsame)(transB, "T");
|
||||
int info = 0;
|
||||
if (!lower && !upper)
|
||||
info = 1;
|
||||
else if (!tranA && !notransA)
|
||||
info = 2;
|
||||
else if (!tranB && !notransB)
|
||||
info = 3;
|
||||
else if (*n < 0)
|
||||
info = 4;
|
||||
else if (*k < 0)
|
||||
info = 5;
|
||||
else if (*ldA < MAX(1, notransA ? *n : *k))
|
||||
info = 8;
|
||||
else if (*ldB < MAX(1, notransB ? *k : *n))
|
||||
info = 10;
|
||||
else if (*ldC < MAX(1, *n))
|
||||
info = 13;
|
||||
if (info) {
|
||||
LAPACK(xerbla)("DGEMMT", &info);
|
||||
return;
|
||||
}
|
||||
|
||||
// Clean char * arguments
|
||||
const char cleanuplo = lower ? 'L' : 'U';
|
||||
const char cleantransA = notransA ? 'N' : 'T';
|
||||
const char cleantransB = notransB ? 'N' : 'T';
|
||||
|
||||
// Recursive kernel
|
||||
RELAPACK_dgemmt_rec(&cleanuplo, &cleantransA, &cleantransB, n, k, alpha, A, ldA, B, ldB, beta, C, ldC);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
/** dgemmt's recursive compute kernel */
|
||||
static void RELAPACK_dgemmt_rec(
|
||||
const char *uplo, const char *transA, const char *transB,
|
||||
const int *n, const int *k,
|
||||
const double *alpha, const double *A, const int *ldA,
|
||||
const double *B, const int *ldB,
|
||||
const double *beta, double *C, const int *ldC
|
||||
) {
|
||||
|
||||
if (*n <= MAX(CROSSOVER_DGEMMT, 1)) {
|
||||
// Unblocked
|
||||
RELAPACK_dgemmt_rec2(uplo, transA, transB, n, k, alpha, A, ldA, B, ldB, beta, C, ldC);
|
||||
return;
|
||||
}
|
||||
|
||||
// Splitting
|
||||
const int n1 = DREC_SPLIT(*n);
|
||||
const int n2 = *n - n1;
|
||||
|
||||
// A_T
|
||||
// A_B
|
||||
const double *const A_T = A;
|
||||
const double *const A_B = A + ((*transA == 'N') ? n1 : *ldA * n1);
|
||||
|
||||
// B_L B_R
|
||||
const double *const B_L = B;
|
||||
const double *const B_R = B + ((*transB == 'N') ? *ldB * n1 : n1);
|
||||
|
||||
// C_TL C_TR
|
||||
// C_BL C_BR
|
||||
double *const C_TL = C;
|
||||
double *const C_TR = C + *ldC * n1;
|
||||
double *const C_BL = C + n1;
|
||||
double *const C_BR = C + *ldC * n1 + n1;
|
||||
|
||||
// recursion(C_TL)
|
||||
RELAPACK_dgemmt_rec(uplo, transA, transB, &n1, k, alpha, A_T, ldA, B_L, ldB, beta, C_TL, ldC);
|
||||
|
||||
if (*uplo == 'L')
|
||||
// C_BL = alpha A_B B_L + beta C_BL
|
||||
BLAS(dgemm)(transA, transB, &n2, &n1, k, alpha, A_B, ldA, B_L, ldB, beta, C_BL, ldC);
|
||||
else
|
||||
// C_TR = alpha A_T B_R + beta C_TR
|
||||
BLAS(dgemm)(transA, transB, &n1, &n2, k, alpha, A_T, ldA, B_R, ldB, beta, C_TR, ldC);
|
||||
|
||||
// recursion(C_BR)
|
||||
RELAPACK_dgemmt_rec(uplo, transA, transB, &n2, k, alpha, A_B, ldA, B_R, ldB, beta, C_BR, ldC);
|
||||
}
|
||||
|
||||
|
||||
/** dgemmt's unblocked compute kernel */
|
||||
static void RELAPACK_dgemmt_rec2(
|
||||
const char *uplo, const char *transA, const char *transB,
|
||||
const int *n, const int *k,
|
||||
const double *alpha, const double *A, const int *ldA,
|
||||
const double *B, const int *ldB,
|
||||
const double *beta, double *C, const int *ldC
|
||||
) {
|
||||
|
||||
const int incB = (*transB == 'N') ? 1 : *ldB;
|
||||
const int incC = 1;
|
||||
|
||||
int i;
|
||||
for (i = 0; i < *n; i++) {
|
||||
// A_0
|
||||
// A_i
|
||||
const double *const A_0 = A;
|
||||
const double *const A_i = A + ((*transA == 'N') ? i : *ldA * i);
|
||||
|
||||
// * B_i *
|
||||
const double *const B_i = B + ((*transB == 'N') ? *ldB * i : i);
|
||||
|
||||
// * C_0i *
|
||||
// * C_ii *
|
||||
double *const C_0i = C + *ldC * i;
|
||||
double *const C_ii = C + *ldC * i + i;
|
||||
|
||||
if (*uplo == 'L') {
|
||||
const int nmi = *n - i;
|
||||
if (*transA == 'N')
|
||||
BLAS(dgemv)(transA, &nmi, k, alpha, A_i, ldA, B_i, &incB, beta, C_ii, &incC);
|
||||
else
|
||||
BLAS(dgemv)(transA, k, &nmi, alpha, A_i, ldA, B_i, &incB, beta, C_ii, &incC);
|
||||
} else {
|
||||
const int ip1 = i + 1;
|
||||
if (*transA == 'N')
|
||||
BLAS(dgemv)(transA, &ip1, k, alpha, A_0, ldA, B_i, &incB, beta, C_0i, &incC);
|
||||
else
|
||||
BLAS(dgemv)(transA, k, &ip1, alpha, A_0, ldA, B_i, &incB, beta, C_0i, &incC);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,117 @@
|
|||
#include "relapack.h"
|
||||
|
||||
static void RELAPACK_dgetrf_rec(const int *, const int *, double *,
|
||||
const int *, int *, int *);
|
||||
|
||||
|
||||
/** DGETRF computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges.
|
||||
*
|
||||
* This routine is functionally equivalent to LAPACK's dgetrf.
|
||||
* For details on its interface, see
|
||||
* http://www.netlib.org/lapack/explore-html/d3/d6a/dgetrf_8f.html
|
||||
* */
|
||||
void RELAPACK_dgetrf(
|
||||
const int *m, const int *n,
|
||||
double *A, const int *ldA, int *ipiv,
|
||||
int *info
|
||||
) {
|
||||
|
||||
// Check arguments
|
||||
*info = 0;
|
||||
if (*m < 0)
|
||||
*info = -1;
|
||||
else if (*n < 0)
|
||||
*info = -2;
|
||||
else if (*ldA < MAX(1, *n))
|
||||
*info = -4;
|
||||
if (*info) {
|
||||
const int minfo = -*info;
|
||||
LAPACK(xerbla)("DGETRF", &minfo);
|
||||
return;
|
||||
}
|
||||
|
||||
const int sn = MIN(*m, *n);
|
||||
|
||||
RELAPACK_dgetrf_rec(m, &sn, A, ldA, ipiv, info);
|
||||
|
||||
// Right remainder
|
||||
if (*m < *n) {
|
||||
// Constants
|
||||
const double ONE[] = { 1. };
|
||||
const int iONE[] = { 1. };
|
||||
|
||||
// Splitting
|
||||
const int rn = *n - *m;
|
||||
|
||||
// A_L A_R
|
||||
const double *const A_L = A;
|
||||
double *const A_R = A + *ldA * *m;
|
||||
|
||||
// A_R = apply(ipiv, A_R)
|
||||
LAPACK(dlaswp)(&rn, A_R, ldA, iONE, m, ipiv, iONE);
|
||||
// A_R = A_S \ A_R
|
||||
BLAS(dtrsm)("L", "L", "N", "U", m, &rn, ONE, A_L, ldA, A_R, ldA);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/** dgetrf's recursive compute kernel */
|
||||
static void RELAPACK_dgetrf_rec(
|
||||
const int *m, const int *n,
|
||||
double *A, const int *ldA, int *ipiv,
|
||||
int *info
|
||||
) {
|
||||
|
||||
if (*n <= MAX(CROSSOVER_DGETRF, 1)) {
|
||||
// Unblocked
|
||||
LAPACK(dgetf2)(m, n, A, ldA, ipiv, info);
|
||||
return;
|
||||
}
|
||||
|
||||
// Constants
|
||||
const double ONE[] = { 1. };
|
||||
const double MONE[] = { -1. };
|
||||
const int iONE[] = { 1 };
|
||||
|
||||
// Splitting
|
||||
const int n1 = DREC_SPLIT(*n);
|
||||
const int n2 = *n - n1;
|
||||
const int m2 = *m - n1;
|
||||
|
||||
// A_L A_R
|
||||
double *const A_L = A;
|
||||
double *const A_R = A + *ldA * n1;
|
||||
|
||||
// A_TL A_TR
|
||||
// A_BL A_BR
|
||||
double *const A_TL = A;
|
||||
double *const A_TR = A + *ldA * n1;
|
||||
double *const A_BL = A + n1;
|
||||
double *const A_BR = A + *ldA * n1 + n1;
|
||||
|
||||
// ipiv_T
|
||||
// ipiv_B
|
||||
int *const ipiv_T = ipiv;
|
||||
int *const ipiv_B = ipiv + n1;
|
||||
|
||||
// recursion(A_L, ipiv_T)
|
||||
RELAPACK_dgetrf_rec(m, &n1, A_L, ldA, ipiv_T, info);
|
||||
// apply pivots to A_R
|
||||
LAPACK(dlaswp)(&n2, A_R, ldA, iONE, &n1, ipiv_T, iONE);
|
||||
|
||||
// A_TR = A_TL \ A_TR
|
||||
BLAS(dtrsm)("L", "L", "N", "U", &n1, &n2, ONE, A_TL, ldA, A_TR, ldA);
|
||||
// A_BR = A_BR - A_BL * A_TR
|
||||
BLAS(dgemm)("N", "N", &m2, &n2, &n1, MONE, A_BL, ldA, A_TR, ldA, ONE, A_BR, ldA);
|
||||
|
||||
// recursion(A_BR, ipiv_B)
|
||||
RELAPACK_dgetrf_rec(&m2, &n2, A_BR, ldA, ipiv_B, info);
|
||||
if (*info)
|
||||
*info += n1;
|
||||
// apply pivots to A_BL
|
||||
LAPACK(dlaswp)(&n1, A_BL, ldA, iONE, &n2, ipiv_B, iONE);
|
||||
// shift pivots
|
||||
int i;
|
||||
for (i = 0; i < n2; i++)
|
||||
ipiv_B[i] += n1;
|
||||
}
|
|
@ -0,0 +1,87 @@
|
|||
#include "relapack.h"
|
||||
|
||||
static void RELAPACK_dlauum_rec(const char *, const int *, double *,
|
||||
const int *, int *);
|
||||
|
||||
|
||||
/** DLAUUM computes the product U * U**T or L**T * L, where the triangular factor U or L is stored in the upper or lower triangular part of the array A.
|
||||
*
|
||||
* This routine is functionally equivalent to LAPACK's dlauum.
|
||||
* For details on its interface, see
|
||||
* http://www.netlib.org/lapack/explore-html/d0/dc2/dlauum_8f.html
|
||||
* */
|
||||
void RELAPACK_dlauum(
|
||||
const char *uplo, const int *n,
|
||||
double *A, const int *ldA,
|
||||
int *info
|
||||
) {
|
||||
|
||||
// Check arguments
|
||||
const int lower = LAPACK(lsame)(uplo, "L");
|
||||
const int upper = LAPACK(lsame)(uplo, "U");
|
||||
*info = 0;
|
||||
if (!lower && !upper)
|
||||
*info = -1;
|
||||
else if (*n < 0)
|
||||
*info = -2;
|
||||
else if (*ldA < MAX(1, *n))
|
||||
*info = -4;
|
||||
if (*info) {
|
||||
const int minfo = -*info;
|
||||
LAPACK(xerbla)("DLAUUM", &minfo);
|
||||
return;
|
||||
}
|
||||
|
||||
// Clean char * arguments
|
||||
const char cleanuplo = lower ? 'L' : 'U';
|
||||
|
||||
// Recursive kernel
|
||||
RELAPACK_dlauum_rec(&cleanuplo, n, A, ldA, info);
|
||||
}
|
||||
|
||||
|
||||
/** dlauum's recursive compute kernel */
|
||||
static void RELAPACK_dlauum_rec(
|
||||
const char *uplo, const int *n,
|
||||
double *A, const int *ldA,
|
||||
int *info
|
||||
) {
|
||||
|
||||
if (*n <= MAX(CROSSOVER_DLAUUM, 1)) {
|
||||
// Unblocked
|
||||
LAPACK(dlauu2)(uplo, n, A, ldA, info);
|
||||
return;
|
||||
}
|
||||
|
||||
// Constants
|
||||
const double ONE[] = { 1. };
|
||||
|
||||
// Splitting
|
||||
const int n1 = DREC_SPLIT(*n);
|
||||
const int n2 = *n - n1;
|
||||
|
||||
// A_TL A_TR
|
||||
// A_BL A_BR
|
||||
double *const A_TL = A;
|
||||
double *const A_TR = A + *ldA * n1;
|
||||
double *const A_BL = A + n1;
|
||||
double *const A_BR = A + *ldA * n1 + n1;
|
||||
|
||||
// recursion(A_TL)
|
||||
RELAPACK_dlauum_rec(uplo, &n1, A_TL, ldA, info);
|
||||
|
||||
if (*uplo == 'L') {
|
||||
// A_TL = A_TL + A_BL' * A_BL
|
||||
BLAS(dsyrk)("L", "T", &n1, &n2, ONE, A_BL, ldA, ONE, A_TL, ldA);
|
||||
// A_BL = A_BR' * A_BL
|
||||
BLAS(dtrmm)("L", "L", "T", "N", &n2, &n1, ONE, A_BR, ldA, A_BL, ldA);
|
||||
} else {
|
||||
// A_TL = A_TL + A_TR * A_TR'
|
||||
BLAS(dsyrk)("U", "N", &n1, &n2, ONE, A_TR, ldA, ONE, A_TL, ldA);
|
||||
// A_TR = A_TR * A_BR'
|
||||
BLAS(dtrmm)("R", "U", "T", "N", &n1, &n2, ONE, A_BR, ldA, A_TR, ldA);
|
||||
}
|
||||
|
||||
// recursion(A_BR)
|
||||
RELAPACK_dlauum_rec(uplo, &n2, A_BR, ldA, info);
|
||||
}
|
|
@ -0,0 +1,157 @@
|
|||
#include "relapack.h"
|
||||
#include "stdlib.h"
|
||||
|
||||
static void RELAPACK_dpbtrf_rec(const char *, const int *, const int *,
|
||||
double *, const int *, double *, const int *, int *);
|
||||
|
||||
|
||||
/** DPBTRF computes the Cholesky factorization of a real symmetric positive definite band matrix A.
|
||||
*
|
||||
* This routine is functionally equivalent to LAPACK's dpbtrf.
|
||||
* For details on its interface, see
|
||||
* http://www.netlib.org/lapack/explore-html/df/da9/dpbtrf_8f.html
|
||||
* */
|
||||
void RELAPACK_dpbtrf(
|
||||
const char *uplo, const int *n, const int *kd,
|
||||
double *Ab, const int *ldAb,
|
||||
int *info
|
||||
) {
|
||||
|
||||
// Check arguments
|
||||
const int lower = LAPACK(lsame)(uplo, "L");
|
||||
const int upper = LAPACK(lsame)(uplo, "U");
|
||||
*info = 0;
|
||||
if (!lower && !upper)
|
||||
*info = -1;
|
||||
else if (*n < 0)
|
||||
*info = -2;
|
||||
else if (*kd < 0)
|
||||
*info = -3;
|
||||
else if (*ldAb < *kd + 1)
|
||||
*info = -5;
|
||||
if (*info) {
|
||||
const int minfo = -*info;
|
||||
LAPACK(xerbla)("DPBTRF", &minfo);
|
||||
return;
|
||||
}
|
||||
|
||||
// Clean char * arguments
|
||||
const char cleanuplo = lower ? 'L' : 'U';
|
||||
|
||||
// Constant
|
||||
const double ZERO[] = { 0. };
|
||||
|
||||
// Allocate work space
|
||||
const int n1 = DREC_SPLIT(*n);
|
||||
const int mWork = (*kd > n1) ? (lower ? *n - *kd : n1) : *kd;
|
||||
const int nWork = (*kd > n1) ? (lower ? n1 : *n - *kd) : *kd;
|
||||
double *Work = malloc(mWork * nWork * sizeof(double));
|
||||
LAPACK(dlaset)(uplo, &mWork, &nWork, ZERO, ZERO, Work, &mWork);
|
||||
|
||||
// Recursive kernel
|
||||
RELAPACK_dpbtrf_rec(&cleanuplo, n, kd, Ab, ldAb, Work, &mWork, info);
|
||||
|
||||
// Free work space
|
||||
free(Work);
|
||||
}
|
||||
|
||||
|
||||
/** dpbtrf's recursive compute kernel */
|
||||
static void RELAPACK_dpbtrf_rec(
|
||||
const char *uplo, const int *n, const int *kd,
|
||||
double *Ab, const int *ldAb,
|
||||
double *Work, const int *ldWork,
|
||||
int *info
|
||||
){
|
||||
|
||||
if (*n <= MAX(CROSSOVER_DPBTRF, 1)) {
|
||||
// Unblocked
|
||||
LAPACK(dpbtf2)(uplo, n, kd, Ab, ldAb, info);
|
||||
return;
|
||||
}
|
||||
|
||||
// Constants
|
||||
const double ONE[] = { 1. };
|
||||
const double MONE[] = { -1. };
|
||||
|
||||
// Unskew A
|
||||
const int ldA[] = { *ldAb - 1 };
|
||||
double *const A = Ab + ((*uplo == 'L') ? 0 : *kd);
|
||||
|
||||
// Splitting
|
||||
const int n1 = MIN(DREC_SPLIT(*n), *kd);
|
||||
const int n2 = *n - n1;
|
||||
|
||||
// * *
|
||||
// * Ab_BR
|
||||
double *const Ab_BR = Ab + *ldAb * n1;
|
||||
|
||||
// A_TL A_TR
|
||||
// A_BL A_BR
|
||||
double *const A_TL = A;
|
||||
double *const A_TR = A + *ldA * n1;
|
||||
double *const A_BL = A + n1;
|
||||
double *const A_BR = A + *ldA * n1 + n1;
|
||||
|
||||
// recursion(A_TL)
|
||||
RELAPACK_dpotrf(uplo, &n1, A_TL, ldA, info);
|
||||
if (*info)
|
||||
return;
|
||||
|
||||
// Banded splitting
|
||||
const int n21 = MIN(n2, *kd - n1);
|
||||
const int n22 = MIN(n2 - n21, n1);
|
||||
|
||||
// n1 n21 n22
|
||||
// n1 * A_TRl A_TRr
|
||||
// n21 A_BLt A_BRtl A_BRtr
|
||||
// n22 A_BLb A_BRbl A_BRbr
|
||||
double *const A_TRl = A_TR;
|
||||
double *const A_TRr = A_TR + *ldA * n21;
|
||||
double *const A_BLt = A_BL;
|
||||
double *const A_BLb = A_BL + n21;
|
||||
double *const A_BRtl = A_BR;
|
||||
double *const A_BRtr = A_BR + *ldA * n21;
|
||||
double *const A_BRbl = A_BR + n21;
|
||||
double *const A_BRbr = A_BR + *ldA * n21 + n21;
|
||||
|
||||
if (*uplo == 'L') {
|
||||
// A_BLt = ABLt / A_TL'
|
||||
BLAS(dtrsm)("R", "L", "T", "N", &n21, &n1, ONE, A_TL, ldA, A_BLt, ldA);
|
||||
// A_BRtl = A_BRtl - A_BLt * A_BLt'
|
||||
BLAS(dsyrk)("L", "N", &n21, &n1, MONE, A_BLt, ldA, ONE, A_BRtl, ldA);
|
||||
// Work = A_BLb
|
||||
LAPACK(dlacpy)("U", &n22, &n1, A_BLb, ldA, Work, ldWork);
|
||||
// Work = Work / A_TL'
|
||||
BLAS(dtrsm)("R", "L", "T", "N", &n22, &n1, ONE, A_TL, ldA, Work, ldWork);
|
||||
// A_BRbl = A_BRbl - Work * A_BLt'
|
||||
BLAS(dgemm)("N", "T", &n22, &n21, &n1, MONE, Work, ldWork, A_BLt, ldA, ONE, A_BRbl, ldA);
|
||||
// A_BRbr = A_BRbr - Work * Work'
|
||||
BLAS(dsyrk)("L", "N", &n22, &n1, MONE, Work, ldWork, ONE, A_BRbr, ldA);
|
||||
// A_BLb = Work
|
||||
LAPACK(dlacpy)("U", &n22, &n1, Work, ldWork, A_BLb, ldA);
|
||||
} else {
|
||||
// A_TRl = A_TL' \ A_TRl
|
||||
BLAS(dtrsm)("L", "U", "T", "N", &n1, &n21, ONE, A_TL, ldA, A_TRl, ldA);
|
||||
// A_BRtl = A_BRtl - A_TRl' * A_TRl
|
||||
BLAS(dsyrk)("U", "T", &n21, &n1, MONE, A_TRl, ldA, ONE, A_BRtl, ldA);
|
||||
// Work = A_TRr
|
||||
LAPACK(dlacpy)("L", &n1, &n22, A_TRr, ldA, Work, ldWork);
|
||||
// Work = A_TL' \ Work
|
||||
BLAS(dtrsm)("L", "U", "T", "N", &n1, &n22, ONE, A_TL, ldA, Work, ldWork);
|
||||
// A_BRtr = A_BRtr - A_TRl' * Work
|
||||
BLAS(dgemm)("T", "N", &n21, &n22, &n1, MONE, A_TRl, ldA, Work, ldWork, ONE, A_BRtr, ldA);
|
||||
// A_BRbr = A_BRbr - Work' * Work
|
||||
BLAS(dsyrk)("U", "T", &n22, &n1, MONE, Work, ldWork, ONE, A_BRbr, ldA);
|
||||
// A_TRr = Work
|
||||
LAPACK(dlacpy)("L", &n1, &n22, Work, ldWork, A_TRr, ldA);
|
||||
}
|
||||
|
||||
// recursion(A_BR)
|
||||
if (*kd > n1)
|
||||
RELAPACK_dpotrf(uplo, &n2, A_BR, ldA, info);
|
||||
else
|
||||
RELAPACK_dpbtrf_rec(uplo, &n2, kd, Ab_BR, ldAb, Work, ldWork, info);
|
||||
if (*info)
|
||||
*info += n1;
|
||||
}
|
|
@ -0,0 +1,92 @@
|
|||
#include "relapack.h"
|
||||
|
||||
static void RELAPACK_dpotrf_rec(const char *, const int *, double *,
|
||||
const int *, int *);
|
||||
|
||||
|
||||
/** DPOTRF computes the Cholesky factorization of a real symmetric positive definite matrix A.
|
||||
*
|
||||
* This routine is functionally equivalent to LAPACK's dpotrf.
|
||||
* For details on its interface, see
|
||||
* http://www.netlib.org/lapack/explore-html/d0/d8a/dpotrf_8f.html
|
||||
* */
|
||||
void RELAPACK_dpotrf(
|
||||
const char *uplo, const int *n,
|
||||
double *A, const int *ldA,
|
||||
int *info
|
||||
) {
|
||||
|
||||
// Check arguments
|
||||
const int lower = LAPACK(lsame)(uplo, "L");
|
||||
const int upper = LAPACK(lsame)(uplo, "U");
|
||||
*info = 0;
|
||||
if (!lower && !upper)
|
||||
*info = -1;
|
||||
else if (*n < 0)
|
||||
*info = -2;
|
||||
else if (*ldA < MAX(1, *n))
|
||||
*info = -4;
|
||||
if (*info) {
|
||||
const int minfo = -*info;
|
||||
LAPACK(xerbla)("DPOTRF", &minfo);
|
||||
return;
|
||||
}
|
||||
|
||||
// Clean char * arguments
|
||||
const char cleanuplo = lower ? 'L' : 'U';
|
||||
|
||||
// Recursive kernel
|
||||
RELAPACK_dpotrf_rec(&cleanuplo, n, A, ldA, info);
|
||||
}
|
||||
|
||||
|
||||
/** dpotrf's recursive compute kernel */
|
||||
static void RELAPACK_dpotrf_rec(
|
||||
const char *uplo, const int *n,
|
||||
double *A, const int *ldA,
|
||||
int *info
|
||||
){
|
||||
|
||||
if (*n <= MAX(CROSSOVER_DPOTRF, 1)) {
|
||||
// Unblocked
|
||||
LAPACK(dpotf2)(uplo, n, A, ldA, info);
|
||||
return;
|
||||
}
|
||||
|
||||
// Constants
|
||||
const double ONE[] = { 1. };
|
||||
const double MONE[] = { -1. };
|
||||
|
||||
// Splitting
|
||||
const int n1 = DREC_SPLIT(*n);
|
||||
const int n2 = *n - n1;
|
||||
|
||||
// A_TL A_TR
|
||||
// A_BL A_BR
|
||||
double *const A_TL = A;
|
||||
double *const A_TR = A + *ldA * n1;
|
||||
double *const A_BL = A + n1;
|
||||
double *const A_BR = A + *ldA * n1 + n1;
|
||||
|
||||
// recursion(A_TL)
|
||||
RELAPACK_dpotrf_rec(uplo, &n1, A_TL, ldA, info);
|
||||
if (*info)
|
||||
return;
|
||||
|
||||
if (*uplo == 'L') {
|
||||
// A_BL = A_BL / A_TL'
|
||||
BLAS(dtrsm)("R", "L", "T", "N", &n2, &n1, ONE, A_TL, ldA, A_BL, ldA);
|
||||
// A_BR = A_BR - A_BL * A_BL'
|
||||
BLAS(dsyrk)("L", "N", &n2, &n1, MONE, A_BL, ldA, ONE, A_BR, ldA);
|
||||
} else {
|
||||
// A_TR = A_TL' \ A_TR
|
||||
BLAS(dtrsm)("L", "U", "T", "N", &n1, &n2, ONE, A_TL, ldA, A_TR, ldA);
|
||||
// A_BR = A_BR - A_TR' * A_TR
|
||||
BLAS(dsyrk)("U", "T", &n2, &n1, MONE, A_TR, ldA, ONE, A_BR, ldA);
|
||||
}
|
||||
|
||||
// recursion(A_BR)
|
||||
RELAPACK_dpotrf_rec(uplo, &n2, A_BR, ldA, info);
|
||||
if (*info)
|
||||
*info += n1;
|
||||
}
|
|
@ -0,0 +1,212 @@
|
|||
#include "relapack.h"
|
||||
#if XSYGST_ALLOW_MALLOC
|
||||
#include "stdlib.h"
|
||||
#endif
|
||||
|
||||
static void RELAPACK_dsygst_rec(const int *, const char *, const int *,
|
||||
double *, const int *, const double *, const int *,
|
||||
double *, const int *, int *);
|
||||
|
||||
|
||||
/** DSYGST reduces a real symmetric-definite generalized eigenproblem to standard form.
|
||||
*
|
||||
* This routine is functionally equivalent to LAPACK's dsygst.
|
||||
* For details on its interface, see
|
||||
* http://www.netlib.org/lapack/explore-html/dc/d04/dsygst_8f.html
|
||||
* */
|
||||
void RELAPACK_dsygst(
|
||||
const int *itype, const char *uplo, const int *n,
|
||||
double *A, const int *ldA, const double *B, const int *ldB,
|
||||
int *info
|
||||
) {
|
||||
|
||||
// Check arguments
|
||||
const int lower = LAPACK(lsame)(uplo, "L");
|
||||
const int upper = LAPACK(lsame)(uplo, "U");
|
||||
*info = 0;
|
||||
if (*itype < 1 || *itype > 3)
|
||||
*info = -1;
|
||||
else if (!lower && !upper)
|
||||
*info = -2;
|
||||
else if (*n < 0)
|
||||
*info = -3;
|
||||
else if (*ldA < MAX(1, *n))
|
||||
*info = -5;
|
||||
else if (*ldB < MAX(1, *n))
|
||||
*info = -7;
|
||||
if (*info) {
|
||||
const int minfo = -*info;
|
||||
LAPACK(xerbla)("DSYGST", &minfo);
|
||||
return;
|
||||
}
|
||||
|
||||
// Clean char * arguments
|
||||
const char cleanuplo = lower ? 'L' : 'U';
|
||||
|
||||
// Allocate work space
|
||||
double *Work = NULL;
|
||||
int lWork = 0;
|
||||
#if XSYGST_ALLOW_MALLOC
|
||||
const int n1 = DREC_SPLIT(*n);
|
||||
lWork = n1 * (*n - n1);
|
||||
Work = malloc(lWork * sizeof(double));
|
||||
if (!Work)
|
||||
lWork = 0;
|
||||
#endif
|
||||
|
||||
// recursive kernel
|
||||
RELAPACK_dsygst_rec(itype, &cleanuplo, n, A, ldA, B, ldB, Work, &lWork, info);
|
||||
|
||||
// Free work space
|
||||
#if XSYGST_ALLOW_MALLOC
|
||||
if (Work)
|
||||
free(Work);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
/** dsygst's recursive compute kernel */
|
||||
static void RELAPACK_dsygst_rec(
|
||||
const int *itype, const char *uplo, const int *n,
|
||||
double *A, const int *ldA, const double *B, const int *ldB,
|
||||
double *Work, const int *lWork, int *info
|
||||
) {
|
||||
|
||||
if (*n <= MAX(CROSSOVER_SSYGST, 1)) {
|
||||
// Unblocked
|
||||
LAPACK(dsygs2)(itype, uplo, n, A, ldA, B, ldB, info);
|
||||
return;
|
||||
}
|
||||
|
||||
// Constants
|
||||
const double ZERO[] = { 0. };
|
||||
const double ONE[] = { 1. };
|
||||
const double MONE[] = { -1. };
|
||||
const double HALF[] = { .5 };
|
||||
const double MHALF[] = { -.5 };
|
||||
const int iONE[] = { 1 };
|
||||
|
||||
// Loop iterator
|
||||
int i;
|
||||
|
||||
// Splitting
|
||||
const int n1 = DREC_SPLIT(*n);
|
||||
const int n2 = *n - n1;
|
||||
|
||||
// A_TL A_TR
|
||||
// A_BL A_BR
|
||||
double *const A_TL = A;
|
||||
double *const A_TR = A + *ldA * n1;
|
||||
double *const A_BL = A + n1;
|
||||
double *const A_BR = A + *ldA * n1 + n1;
|
||||
|
||||
// B_TL B_TR
|
||||
// B_BL B_BR
|
||||
const double *const B_TL = B;
|
||||
const double *const B_TR = B + *ldB * n1;
|
||||
const double *const B_BL = B + n1;
|
||||
const double *const B_BR = B + *ldB * n1 + n1;
|
||||
|
||||
// recursion(A_TL, B_TL)
|
||||
RELAPACK_dsygst_rec(itype, uplo, &n1, A_TL, ldA, B_TL, ldB, Work, lWork, info);
|
||||
|
||||
if (*itype == 1)
|
||||
if (*uplo == 'L') {
|
||||
// A_BL = A_BL / B_TL'
|
||||
BLAS(dtrsm)("R", "L", "T", "N", &n2, &n1, ONE, B_TL, ldB, A_BL, ldA);
|
||||
if (*lWork > n2 * n1) {
|
||||
// T = -1/2 * B_BL * A_TL
|
||||
BLAS(dsymm)("R", "L", &n2, &n1, MHALF, A_TL, ldA, B_BL, ldB, ZERO, Work, &n2);
|
||||
// A_BL = A_BL + T
|
||||
for (i = 0; i < n1; i++)
|
||||
BLAS(daxpy)(&n2, ONE, Work + n2 * i, iONE, A_BL + *ldA * i, iONE);
|
||||
} else
|
||||
// A_BL = A_BL - 1/2 B_BL * A_TL
|
||||
BLAS(dsymm)("R", "L", &n2, &n1, MHALF, A_TL, ldA, B_BL, ldB, ONE, A_BL, ldA);
|
||||
// A_BR = A_BR - A_BL * B_BL' - B_BL * A_BL'
|
||||
BLAS(dsyr2k)("L", "N", &n2, &n1, MONE, A_BL, ldA, B_BL, ldB, ONE, A_BR, ldA);
|
||||
if (*lWork > n2 * n1)
|
||||
// A_BL = A_BL + T
|
||||
for (i = 0; i < n1; i++)
|
||||
BLAS(daxpy)(&n2, ONE, Work + n2 * i, iONE, A_BL + *ldA * i, iONE);
|
||||
else
|
||||
// A_BL = A_BL - 1/2 B_BL * A_TL
|
||||
BLAS(dsymm)("R", "L", &n2, &n1, MHALF, A_TL, ldA, B_BL, ldB, ONE, A_BL, ldA);
|
||||
// A_BL = B_BR \ A_BL
|
||||
BLAS(dtrsm)("L", "L", "N", "N", &n2, &n1, ONE, B_BR, ldB, A_BL, ldA);
|
||||
} else {
|
||||
// A_TR = B_TL' \ A_TR
|
||||
BLAS(dtrsm)("L", "U", "T", "N", &n1, &n2, ONE, B_TL, ldB, A_TR, ldA);
|
||||
if (*lWork > n2 * n1) {
|
||||
// T = -1/2 * A_TL * B_TR
|
||||
BLAS(dsymm)("L", "U", &n1, &n2, MHALF, A_TL, ldA, B_TR, ldB, ZERO, Work, &n1);
|
||||
// A_TR = A_BL + T
|
||||
for (i = 0; i < n2; i++)
|
||||
BLAS(daxpy)(&n1, ONE, Work + n1 * i, iONE, A_TR + *ldA * i, iONE);
|
||||
} else
|
||||
// A_TR = A_TR - 1/2 A_TL * B_TR
|
||||
BLAS(dsymm)("L", "U", &n1, &n2, MHALF, A_TL, ldA, B_TR, ldB, ONE, A_TR, ldA);
|
||||
// A_BR = A_BR - A_TR' * B_TR - B_TR' * A_TR
|
||||
BLAS(dsyr2k)("U", "T", &n2, &n1, MONE, A_TR, ldA, B_TR, ldB, ONE, A_BR, ldA);
|
||||
if (*lWork > n2 * n1)
|
||||
// A_TR = A_BL + T
|
||||
for (i = 0; i < n2; i++)
|
||||
BLAS(daxpy)(&n1, ONE, Work + n1 * i, iONE, A_TR + *ldA * i, iONE);
|
||||
else
|
||||
// A_TR = A_TR - 1/2 A_TL * B_TR
|
||||
BLAS(dsymm)("L", "U", &n1, &n2, MHALF, A_TL, ldA, B_TR, ldB, ONE, A_TR, ldA);
|
||||
// A_TR = A_TR / B_BR
|
||||
BLAS(dtrsm)("R", "U", "N", "N", &n1, &n2, ONE, B_BR, ldB, A_TR, ldA);
|
||||
}
|
||||
else
|
||||
if (*uplo == 'L') {
|
||||
// A_BL = A_BL * B_TL
|
||||
BLAS(dtrmm)("R", "L", "N", "N", &n2, &n1, ONE, B_TL, ldB, A_BL, ldA);
|
||||
if (*lWork > n2 * n1) {
|
||||
// T = 1/2 * A_BR * B_BL
|
||||
BLAS(dsymm)("L", "L", &n2, &n1, HALF, A_BR, ldA, B_BL, ldB, ZERO, Work, &n2);
|
||||
// A_BL = A_BL + T
|
||||
for (i = 0; i < n1; i++)
|
||||
BLAS(daxpy)(&n2, ONE, Work + n2 * i, iONE, A_BL + *ldA * i, iONE);
|
||||
} else
|
||||
// A_BL = A_BL + 1/2 A_BR * B_BL
|
||||
BLAS(dsymm)("L", "L", &n2, &n1, HALF, A_BR, ldA, B_BL, ldB, ONE, A_BL, ldA);
|
||||
// A_TL = A_TL + A_BL' * B_BL + B_BL' * A_BL
|
||||
BLAS(dsyr2k)("L", "T", &n1, &n2, ONE, A_BL, ldA, B_BL, ldB, ONE, A_TL, ldA);
|
||||
if (*lWork > n2 * n1)
|
||||
// A_BL = A_BL + T
|
||||
for (i = 0; i < n1; i++)
|
||||
BLAS(daxpy)(&n2, ONE, Work + n2 * i, iONE, A_BL + *ldA * i, iONE);
|
||||
else
|
||||
// A_BL = A_BL + 1/2 A_BR * B_BL
|
||||
BLAS(dsymm)("L", "L", &n2, &n1, HALF, A_BR, ldA, B_BL, ldB, ONE, A_BL, ldA);
|
||||
// A_BL = B_BR * A_BL
|
||||
BLAS(dtrmm)("L", "L", "T", "N", &n2, &n1, ONE, B_BR, ldB, A_BL, ldA);
|
||||
} else {
|
||||
// A_TR = B_TL * A_TR
|
||||
BLAS(dtrmm)("L", "U", "N", "N", &n1, &n2, ONE, B_TL, ldB, A_TR, ldA);
|
||||
if (*lWork > n2 * n1) {
|
||||
// T = 1/2 * B_TR * A_BR
|
||||
BLAS(dsymm)("R", "U", &n1, &n2, HALF, A_BR, ldA, B_TR, ldB, ZERO, Work, &n1);
|
||||
// A_TR = A_TR + T
|
||||
for (i = 0; i < n2; i++)
|
||||
BLAS(daxpy)(&n1, ONE, Work + n1 * i, iONE, A_TR + *ldA * i, iONE);
|
||||
} else
|
||||
// A_TR = A_TR + 1/2 B_TR A_BR
|
||||
BLAS(dsymm)("R", "U", &n1, &n2, HALF, A_BR, ldA, B_TR, ldB, ONE, A_TR, ldA);
|
||||
// A_TL = A_TL + A_TR * B_TR' + B_TR * A_TR'
|
||||
BLAS(dsyr2k)("U", "N", &n1, &n2, ONE, A_TR, ldA, B_TR, ldB, ONE, A_TL, ldA);
|
||||
if (*lWork > n2 * n1)
|
||||
// A_TR = A_TR + T
|
||||
for (i = 0; i < n2; i++)
|
||||
BLAS(daxpy)(&n1, ONE, Work + n1 * i, iONE, A_TR + *ldA * i, iONE);
|
||||
else
|
||||
// A_TR = A_TR + 1/2 B_TR * A_BR
|
||||
BLAS(dsymm)("R", "U", &n1, &n2, HALF, A_BR, ldA, B_TR, ldB, ONE, A_TR, ldA);
|
||||
// A_TR = A_TR * B_BR
|
||||
BLAS(dtrmm)("R", "U", "T", "N", &n1, &n2, ONE, B_BR, ldB, A_TR, ldA);
|
||||
}
|
||||
|
||||
// recursion(A_BR, B_BR)
|
||||
RELAPACK_dsygst_rec(itype, uplo, &n2, A_BR, ldA, B_BR, ldB, Work, lWork, info);
|
||||
}
|
|
@ -0,0 +1,238 @@
|
|||
#include "relapack.h"
|
||||
#if XSYTRF_ALLOW_MALLOC
|
||||
#include <stdlib.h>
|
||||
#endif
|
||||
|
||||
static void RELAPACK_dsytrf_rec(const char *, const int *, const int *, int *,
|
||||
double *, const int *, int *, double *, const int *, int *);
|
||||
|
||||
|
||||
/** DSYTRF computes the factorization of a complex symmetric matrix A using the Bunch-Kaufman diagonal pivoting method.
|
||||
*
|
||||
* This routine is functionally equivalent to LAPACK's dsytrf.
|
||||
* For details on its interface, see
|
||||
* http://www.netlib.org/lapack/explore-html/dd/df4/dsytrf_8f.html
|
||||
* */
|
||||
void RELAPACK_dsytrf(
|
||||
const char *uplo, const int *n,
|
||||
double *A, const int *ldA, int *ipiv,
|
||||
double *Work, const int *lWork, int *info
|
||||
) {
|
||||
|
||||
// Required work size
|
||||
const int cleanlWork = *n * (*n / 2);
|
||||
int minlWork = cleanlWork;
|
||||
#if XSYTRF_ALLOW_MALLOC
|
||||
minlWork = 1;
|
||||
#endif
|
||||
|
||||
// Check arguments
|
||||
const int lower = LAPACK(lsame)(uplo, "L");
|
||||
const int upper = LAPACK(lsame)(uplo, "U");
|
||||
*info = 0;
|
||||
if (!lower && !upper)
|
||||
*info = -1;
|
||||
else if (*n < 0)
|
||||
*info = -2;
|
||||
else if (*ldA < MAX(1, *n))
|
||||
*info = -4;
|
||||
else if (*lWork < minlWork && *lWork != -1)
|
||||
*info = -7;
|
||||
else if (*lWork == -1) {
|
||||
// Work size query
|
||||
*Work = cleanlWork;
|
||||
return;
|
||||
}
|
||||
|
||||
// Ensure Work size
|
||||
double *cleanWork = Work;
|
||||
#if XSYTRF_ALLOW_MALLOC
|
||||
if (!*info && *lWork < cleanlWork) {
|
||||
cleanWork = malloc(cleanlWork * sizeof(double));
|
||||
if (!cleanWork)
|
||||
*info = -7;
|
||||
}
|
||||
#endif
|
||||
|
||||
if (*info) {
|
||||
const int minfo = -*info;
|
||||
LAPACK(xerbla)("DSYTRF", &minfo);
|
||||
return;
|
||||
}
|
||||
|
||||
// Clean char * arguments
|
||||
const char cleanuplo = lower ? 'L' : 'U';
|
||||
|
||||
// Dummy arguments
|
||||
int nout;
|
||||
|
||||
// Recursive kernel
|
||||
RELAPACK_dsytrf_rec(&cleanuplo, n, n, &nout, A, ldA, ipiv, cleanWork, n, info);
|
||||
|
||||
#if XSYTRF_ALLOW_MALLOC
|
||||
if (cleanWork != Work)
|
||||
free(cleanWork);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
/** dsytrf's recursive compute kernel */
|
||||
static void RELAPACK_dsytrf_rec(
|
||||
const char *uplo, const int *n_full, const int *n, int *n_out,
|
||||
double *A, const int *ldA, int *ipiv,
|
||||
double *Work, const int *ldWork, int *info
|
||||
) {
|
||||
|
||||
// top recursion level?
|
||||
const int top = *n_full == *n;
|
||||
|
||||
if (*n <= MAX(CROSSOVER_DSYTRF, 3)) {
|
||||
// Unblocked
|
||||
if (top) {
|
||||
LAPACK(dsytf2)(uplo, n, A, ldA, ipiv, info);
|
||||
*n_out = *n;
|
||||
} else
|
||||
RELAPACK_dsytrf_rec2(uplo, n_full, n, n_out, A, ldA, ipiv, Work, ldWork, info);
|
||||
return;
|
||||
}
|
||||
|
||||
int info1, info2;
|
||||
|
||||
// Constants
|
||||
const double ONE[] = { 1. };
|
||||
const double MONE[] = { -1. };
|
||||
const int iONE[] = { 1 };
|
||||
|
||||
// Loop iterator
|
||||
int i;
|
||||
|
||||
const int n_rest = *n_full - *n;
|
||||
|
||||
if (*uplo == 'L') {
|
||||
// Splitting (setup)
|
||||
int n1 = DREC_SPLIT(*n);
|
||||
int n2 = *n - n1;
|
||||
|
||||
// Work_L *
|
||||
double *const Work_L = Work;
|
||||
|
||||
// recursion(A_L)
|
||||
int n1_out;
|
||||
RELAPACK_dsytrf_rec(uplo, n_full, &n1, &n1_out, A, ldA, ipiv, Work_L, ldWork, &info1);
|
||||
n1 = n1_out;
|
||||
|
||||
// Splitting (continued)
|
||||
n2 = *n - n1;
|
||||
const int n_full2 = *n_full - n1;
|
||||
|
||||
// * *
|
||||
// A_BL A_BR
|
||||
// A_BL_B A_BR_B
|
||||
double *const A_BL = A + n1;
|
||||
double *const A_BR = A + *ldA * n1 + n1;
|
||||
double *const A_BL_B = A + *n;
|
||||
double *const A_BR_B = A + *ldA * n1 + *n;
|
||||
|
||||
// * *
|
||||
// Work_BL Work_BR
|
||||
// * *
|
||||
// (top recursion level: use Work as Work_BR)
|
||||
double *const Work_BL = Work + n1;
|
||||
double *const Work_BR = top ? Work : Work + *ldWork * n1 + n1;
|
||||
const int ldWork_BR = top ? n2 : *ldWork;
|
||||
|
||||
// ipiv_T
|
||||
// ipiv_B
|
||||
int *const ipiv_B = ipiv + n1;
|
||||
|
||||
// A_BR = A_BR - A_BL Work_BL'
|
||||
RELAPACK_dgemmt(uplo, "N", "T", &n2, &n1, MONE, A_BL, ldA, Work_BL, ldWork, ONE, A_BR, ldA);
|
||||
BLAS(dgemm)("N", "T", &n_rest, &n2, &n1, MONE, A_BL_B, ldA, Work_BL, ldWork, ONE, A_BR_B, ldA);
|
||||
|
||||
// recursion(A_BR)
|
||||
int n2_out;
|
||||
RELAPACK_dsytrf_rec(uplo, &n_full2, &n2, &n2_out, A_BR, ldA, ipiv_B, Work_BR, &ldWork_BR, &info2);
|
||||
|
||||
if (n2_out != n2) {
|
||||
// undo 1 column of updates
|
||||
const int n_restp1 = n_rest + 1;
|
||||
|
||||
// last column of A_BR
|
||||
double *const A_BR_r = A_BR + *ldA * n2_out + n2_out;
|
||||
|
||||
// last row of A_BL
|
||||
double *const A_BL_b = A_BL + n2_out;
|
||||
|
||||
// last row of Work_BL
|
||||
double *const Work_BL_b = Work_BL + n2_out;
|
||||
|
||||
// A_BR_r = A_BR_r + A_BL_b Work_BL_b'
|
||||
BLAS(dgemv)("N", &n_restp1, &n1, ONE, A_BL_b, ldA, Work_BL_b, ldWork, ONE, A_BR_r, iONE);
|
||||
}
|
||||
n2 = n2_out;
|
||||
|
||||
// shift pivots
|
||||
for (i = 0; i < n2; i++)
|
||||
if (ipiv_B[i] > 0)
|
||||
ipiv_B[i] += n1;
|
||||
else
|
||||
ipiv_B[i] -= n1;
|
||||
|
||||
*info = info1 || info2;
|
||||
*n_out = n1 + n2;
|
||||
} else {
|
||||
// Splitting (setup)
|
||||
int n2 = DREC_SPLIT(*n);
|
||||
int n1 = *n - n2;
|
||||
|
||||
// * Work_R
|
||||
// (top recursion level: use Work as Work_R)
|
||||
double *const Work_R = top ? Work : Work + *ldWork * n1;
|
||||
|
||||
// recursion(A_R)
|
||||
int n2_out;
|
||||
RELAPACK_dsytrf_rec(uplo, n_full, &n2, &n2_out, A, ldA, ipiv, Work_R, ldWork, &info2);
|
||||
const int n2_diff = n2 - n2_out;
|
||||
n2 = n2_out;
|
||||
|
||||
// Splitting (continued)
|
||||
n1 = *n - n2;
|
||||
const int n_full1 = *n_full - n2;
|
||||
|
||||
// * A_TL_T A_TR_T
|
||||
// * A_TL A_TR
|
||||
// * * *
|
||||
double *const A_TL_T = A + *ldA * n_rest;
|
||||
double *const A_TR_T = A + *ldA * (n_rest + n1);
|
||||
double *const A_TL = A + *ldA * n_rest + n_rest;
|
||||
double *const A_TR = A + *ldA * (n_rest + n1) + n_rest;
|
||||
|
||||
// Work_L *
|
||||
// * Work_TR
|
||||
// * *
|
||||
// (top recursion level: Work_R was Work)
|
||||
double *const Work_L = Work;
|
||||
double *const Work_TR = Work + *ldWork * (top ? n2_diff : n1) + n_rest;
|
||||
const int ldWork_L = top ? n1 : *ldWork;
|
||||
|
||||
// A_TL = A_TL - A_TR Work_TR'
|
||||
RELAPACK_dgemmt(uplo, "N", "T", &n1, &n2, MONE, A_TR, ldA, Work_TR, ldWork, ONE, A_TL, ldA);
|
||||
BLAS(dgemm)("N", "T", &n_rest, &n1, &n2, MONE, A_TR_T, ldA, Work_TR, ldWork, ONE, A_TL_T, ldA);
|
||||
|
||||
// recursion(A_TL)
|
||||
int n1_out;
|
||||
RELAPACK_dsytrf_rec(uplo, &n_full1, &n1, &n1_out, A, ldA, ipiv, Work_L, &ldWork_L, &info1);
|
||||
|
||||
if (n1_out != n1) {
|
||||
// undo 1 column of updates
|
||||
const int n_restp1 = n_rest + 1;
|
||||
|
||||
// A_TL_T_l = A_TL_T_l + A_TR_T Work_TR_t'
|
||||
BLAS(dgemv)("N", &n_restp1, &n2, ONE, A_TR_T, ldA, Work_TR, ldWork, ONE, A_TL_T, iONE);
|
||||
}
|
||||
n1 = n1_out;
|
||||
|
||||
*info = info2 || info1;
|
||||
*n_out = n1 + n2;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,352 @@
|
|||
/* -- translated by f2c (version 20100827).
|
||||
You must link the resulting object file with libf2c:
|
||||
on Microsoft Windows system, link with libf2c.lib;
|
||||
on Linux or Unix systems, link with .../path/to/libf2c.a -lm
|
||||
or, if you install libf2c.a in a standard place, with -lf2c -lm
|
||||
-- in that order, at the end of the command line, as in
|
||||
cc *.o -lf2c -lm
|
||||
Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
|
||||
|
||||
http://www.netlib.org/f2c/libf2c.zip
|
||||
*/
|
||||
|
||||
#include "f2c.h"
|
||||
|
||||
/* Table of constant values */
|
||||
|
||||
static int c__1 = 1;
|
||||
static double c_b8 = -1.;
|
||||
static double c_b9 = 1.;
|
||||
|
||||
/** DSYTRF_REC2 computes a partial factorization of a real symmetric matrix using the Bunch-Kaufman diagon al pivoting method.
|
||||
*
|
||||
* This routine is a minor modification of LAPACK's dlasyf.
|
||||
* It serves as an unblocked kernel in the recursive algorithms.
|
||||
* The blocked BLAS Level 3 updates were removed and moved to the
|
||||
* recursive algorithm.
|
||||
* */
|
||||
/* Subroutine */ void RELAPACK_dsytrf_rec2(char *uplo, int *n, int *
|
||||
nb, int *kb, double *a, int *lda, int *ipiv,
|
||||
double *w, int *ldw, int *info, ftnlen uplo_len)
|
||||
{
|
||||
/* System generated locals */
|
||||
int a_dim1, a_offset, w_dim1, w_offset, i__1, i__2;
|
||||
double d__1, d__2, d__3;
|
||||
|
||||
/* Builtin functions */
|
||||
double sqrt(double);
|
||||
|
||||
/* Local variables */
|
||||
static int j, k;
|
||||
static double t, r1, d11, d21, d22;
|
||||
static int jj, kk, jp, kp, kw, kkw, imax, jmax;
|
||||
static double alpha;
|
||||
extern /* Subroutine */ int dscal_(int *, double *, double *,
|
||||
int *);
|
||||
extern logical lsame_(char *, char *, ftnlen, ftnlen);
|
||||
extern /* Subroutine */ int dgemv_(char *, int *, int *,
|
||||
double *, double *, int *, double *, int *,
|
||||
double *, double *, int *, ftnlen), dcopy_(int *,
|
||||
double *, int *, double *, int *), dswap_(int
|
||||
*, double *, int *, double *, int *);
|
||||
static int kstep;
|
||||
static double absakk;
|
||||
extern int idamax_(int *, double *, int *);
|
||||
static double colmax, rowmax;
|
||||
|
||||
/* Parameter adjustments */
|
||||
a_dim1 = *lda;
|
||||
a_offset = 1 + a_dim1;
|
||||
a -= a_offset;
|
||||
--ipiv;
|
||||
w_dim1 = *ldw;
|
||||
w_offset = 1 + w_dim1;
|
||||
w -= w_offset;
|
||||
|
||||
/* Function Body */
|
||||
*info = 0;
|
||||
alpha = (sqrt(17.) + 1.) / 8.;
|
||||
if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
|
||||
k = *n;
|
||||
L10:
|
||||
kw = *nb + k - *n;
|
||||
if ((k <= *n - *nb + 1 && *nb < *n) || k < 1) {
|
||||
goto L30;
|
||||
}
|
||||
dcopy_(&k, &a[k * a_dim1 + 1], &c__1, &w[kw * w_dim1 + 1], &c__1);
|
||||
if (k < *n) {
|
||||
i__1 = *n - k;
|
||||
dgemv_("No transpose", &k, &i__1, &c_b8, &a[(k + 1) * a_dim1 + 1],
|
||||
lda, &w[k + (kw + 1) * w_dim1], ldw, &c_b9, &w[kw *
|
||||
w_dim1 + 1], &c__1, (ftnlen)12);
|
||||
}
|
||||
kstep = 1;
|
||||
absakk = (d__1 = w[k + kw * w_dim1], abs(d__1));
|
||||
if (k > 1) {
|
||||
i__1 = k - 1;
|
||||
imax = idamax_(&i__1, &w[kw * w_dim1 + 1], &c__1);
|
||||
colmax = (d__1 = w[imax + kw * w_dim1], abs(d__1));
|
||||
} else {
|
||||
colmax = 0.;
|
||||
}
|
||||
if (max(absakk,colmax) == 0.) {
|
||||
if (*info == 0) {
|
||||
*info = k;
|
||||
}
|
||||
kp = k;
|
||||
} else {
|
||||
if (absakk >= alpha * colmax) {
|
||||
kp = k;
|
||||
} else {
|
||||
dcopy_(&imax, &a[imax * a_dim1 + 1], &c__1, &w[(kw - 1) *
|
||||
w_dim1 + 1], &c__1);
|
||||
i__1 = k - imax;
|
||||
dcopy_(&i__1, &a[imax + (imax + 1) * a_dim1], lda, &w[imax +
|
||||
1 + (kw - 1) * w_dim1], &c__1);
|
||||
if (k < *n) {
|
||||
i__1 = *n - k;
|
||||
dgemv_("No transpose", &k, &i__1, &c_b8, &a[(k + 1) *
|
||||
a_dim1 + 1], lda, &w[imax + (kw + 1) * w_dim1],
|
||||
ldw, &c_b9, &w[(kw - 1) * w_dim1 + 1], &c__1, (
|
||||
ftnlen)12);
|
||||
}
|
||||
i__1 = k - imax;
|
||||
jmax = imax + idamax_(&i__1, &w[imax + 1 + (kw - 1) * w_dim1],
|
||||
&c__1);
|
||||
rowmax = (d__1 = w[jmax + (kw - 1) * w_dim1], abs(d__1));
|
||||
if (imax > 1) {
|
||||
i__1 = imax - 1;
|
||||
jmax = idamax_(&i__1, &w[(kw - 1) * w_dim1 + 1], &c__1);
|
||||
/* Computing MAX */
|
||||
d__2 = rowmax, d__3 = (d__1 = w[jmax + (kw - 1) * w_dim1],
|
||||
abs(d__1));
|
||||
rowmax = max(d__2,d__3);
|
||||
}
|
||||
if (absakk >= alpha * colmax * (colmax / rowmax)) {
|
||||
kp = k;
|
||||
} else if ((d__1 = w[imax + (kw - 1) * w_dim1], abs(d__1)) >=
|
||||
alpha * rowmax) {
|
||||
kp = imax;
|
||||
dcopy_(&k, &w[(kw - 1) * w_dim1 + 1], &c__1, &w[kw *
|
||||
w_dim1 + 1], &c__1);
|
||||
} else {
|
||||
kp = imax;
|
||||
kstep = 2;
|
||||
}
|
||||
}
|
||||
kk = k - kstep + 1;
|
||||
kkw = *nb + kk - *n;
|
||||
if (kp != kk) {
|
||||
a[kp + kp * a_dim1] = a[kk + kk * a_dim1];
|
||||
i__1 = kk - 1 - kp;
|
||||
dcopy_(&i__1, &a[kp + 1 + kk * a_dim1], &c__1, &a[kp + (kp +
|
||||
1) * a_dim1], lda);
|
||||
if (kp > 1) {
|
||||
i__1 = kp - 1;
|
||||
dcopy_(&i__1, &a[kk * a_dim1 + 1], &c__1, &a[kp * a_dim1
|
||||
+ 1], &c__1);
|
||||
}
|
||||
if (k < *n) {
|
||||
i__1 = *n - k;
|
||||
dswap_(&i__1, &a[kk + (k + 1) * a_dim1], lda, &a[kp + (k
|
||||
+ 1) * a_dim1], lda);
|
||||
}
|
||||
i__1 = *n - kk + 1;
|
||||
dswap_(&i__1, &w[kk + kkw * w_dim1], ldw, &w[kp + kkw *
|
||||
w_dim1], ldw);
|
||||
}
|
||||
if (kstep == 1) {
|
||||
dcopy_(&k, &w[kw * w_dim1 + 1], &c__1, &a[k * a_dim1 + 1], &
|
||||
c__1);
|
||||
r1 = 1. / a[k + k * a_dim1];
|
||||
i__1 = k - 1;
|
||||
dscal_(&i__1, &r1, &a[k * a_dim1 + 1], &c__1);
|
||||
} else {
|
||||
if (k > 2) {
|
||||
d21 = w[k - 1 + kw * w_dim1];
|
||||
d11 = w[k + kw * w_dim1] / d21;
|
||||
d22 = w[k - 1 + (kw - 1) * w_dim1] / d21;
|
||||
t = 1. / (d11 * d22 - 1.);
|
||||
d21 = t / d21;
|
||||
i__1 = k - 2;
|
||||
for (j = 1; j <= i__1; ++j) {
|
||||
a[j + (k - 1) * a_dim1] = d21 * (d11 * w[j + (kw - 1)
|
||||
* w_dim1] - w[j + kw * w_dim1]);
|
||||
a[j + k * a_dim1] = d21 * (d22 * w[j + kw * w_dim1] -
|
||||
w[j + (kw - 1) * w_dim1]);
|
||||
/* L20: */
|
||||
}
|
||||
}
|
||||
a[k - 1 + (k - 1) * a_dim1] = w[k - 1 + (kw - 1) * w_dim1];
|
||||
a[k - 1 + k * a_dim1] = w[k - 1 + kw * w_dim1];
|
||||
a[k + k * a_dim1] = w[k + kw * w_dim1];
|
||||
}
|
||||
}
|
||||
if (kstep == 1) {
|
||||
ipiv[k] = kp;
|
||||
} else {
|
||||
ipiv[k] = -kp;
|
||||
ipiv[k - 1] = -kp;
|
||||
}
|
||||
k -= kstep;
|
||||
goto L10;
|
||||
L30:
|
||||
j = k + 1;
|
||||
L60:
|
||||
jj = j;
|
||||
jp = ipiv[j];
|
||||
if (jp < 0) {
|
||||
jp = -jp;
|
||||
++j;
|
||||
}
|
||||
++j;
|
||||
if (jp != jj && j <= *n) {
|
||||
i__1 = *n - j + 1;
|
||||
dswap_(&i__1, &a[jp + j * a_dim1], lda, &a[jj + j * a_dim1], lda);
|
||||
}
|
||||
if (j < *n) {
|
||||
goto L60;
|
||||
}
|
||||
*kb = *n - k;
|
||||
} else {
|
||||
k = 1;
|
||||
L70:
|
||||
if ((k >= *nb && *nb < *n) || k > *n) {
|
||||
goto L90;
|
||||
}
|
||||
i__1 = *n - k + 1;
|
||||
dcopy_(&i__1, &a[k + k * a_dim1], &c__1, &w[k + k * w_dim1], &c__1);
|
||||
i__1 = *n - k + 1;
|
||||
i__2 = k - 1;
|
||||
dgemv_("No transpose", &i__1, &i__2, &c_b8, &a[k + a_dim1], lda, &w[k
|
||||
+ w_dim1], ldw, &c_b9, &w[k + k * w_dim1], &c__1, (ftnlen)12);
|
||||
kstep = 1;
|
||||
absakk = (d__1 = w[k + k * w_dim1], abs(d__1));
|
||||
if (k < *n) {
|
||||
i__1 = *n - k;
|
||||
imax = k + idamax_(&i__1, &w[k + 1 + k * w_dim1], &c__1);
|
||||
colmax = (d__1 = w[imax + k * w_dim1], abs(d__1));
|
||||
} else {
|
||||
colmax = 0.;
|
||||
}
|
||||
if (max(absakk,colmax) == 0.) {
|
||||
if (*info == 0) {
|
||||
*info = k;
|
||||
}
|
||||
kp = k;
|
||||
} else {
|
||||
if (absakk >= alpha * colmax) {
|
||||
kp = k;
|
||||
} else {
|
||||
i__1 = imax - k;
|
||||
dcopy_(&i__1, &a[imax + k * a_dim1], lda, &w[k + (k + 1) *
|
||||
w_dim1], &c__1);
|
||||
i__1 = *n - imax + 1;
|
||||
dcopy_(&i__1, &a[imax + imax * a_dim1], &c__1, &w[imax + (k +
|
||||
1) * w_dim1], &c__1);
|
||||
i__1 = *n - k + 1;
|
||||
i__2 = k - 1;
|
||||
dgemv_("No transpose", &i__1, &i__2, &c_b8, &a[k + a_dim1],
|
||||
lda, &w[imax + w_dim1], ldw, &c_b9, &w[k + (k + 1) *
|
||||
w_dim1], &c__1, (ftnlen)12);
|
||||
i__1 = imax - k;
|
||||
jmax = k - 1 + idamax_(&i__1, &w[k + (k + 1) * w_dim1], &c__1)
|
||||
;
|
||||
rowmax = (d__1 = w[jmax + (k + 1) * w_dim1], abs(d__1));
|
||||
if (imax < *n) {
|
||||
i__1 = *n - imax;
|
||||
jmax = imax + idamax_(&i__1, &w[imax + 1 + (k + 1) *
|
||||
w_dim1], &c__1);
|
||||
/* Computing MAX */
|
||||
d__2 = rowmax, d__3 = (d__1 = w[jmax + (k + 1) * w_dim1],
|
||||
abs(d__1));
|
||||
rowmax = max(d__2,d__3);
|
||||
}
|
||||
if (absakk >= alpha * colmax * (colmax / rowmax)) {
|
||||
kp = k;
|
||||
} else if ((d__1 = w[imax + (k + 1) * w_dim1], abs(d__1)) >=
|
||||
alpha * rowmax) {
|
||||
kp = imax;
|
||||
i__1 = *n - k + 1;
|
||||
dcopy_(&i__1, &w[k + (k + 1) * w_dim1], &c__1, &w[k + k *
|
||||
w_dim1], &c__1);
|
||||
} else {
|
||||
kp = imax;
|
||||
kstep = 2;
|
||||
}
|
||||
}
|
||||
kk = k + kstep - 1;
|
||||
if (kp != kk) {
|
||||
a[kp + kp * a_dim1] = a[kk + kk * a_dim1];
|
||||
i__1 = kp - kk - 1;
|
||||
dcopy_(&i__1, &a[kk + 1 + kk * a_dim1], &c__1, &a[kp + (kk +
|
||||
1) * a_dim1], lda);
|
||||
if (kp < *n) {
|
||||
i__1 = *n - kp;
|
||||
dcopy_(&i__1, &a[kp + 1 + kk * a_dim1], &c__1, &a[kp + 1
|
||||
+ kp * a_dim1], &c__1);
|
||||
}
|
||||
if (k > 1) {
|
||||
i__1 = k - 1;
|
||||
dswap_(&i__1, &a[kk + a_dim1], lda, &a[kp + a_dim1], lda);
|
||||
}
|
||||
dswap_(&kk, &w[kk + w_dim1], ldw, &w[kp + w_dim1], ldw);
|
||||
}
|
||||
if (kstep == 1) {
|
||||
i__1 = *n - k + 1;
|
||||
dcopy_(&i__1, &w[k + k * w_dim1], &c__1, &a[k + k * a_dim1], &
|
||||
c__1);
|
||||
if (k < *n) {
|
||||
r1 = 1. / a[k + k * a_dim1];
|
||||
i__1 = *n - k;
|
||||
dscal_(&i__1, &r1, &a[k + 1 + k * a_dim1], &c__1);
|
||||
}
|
||||
} else {
|
||||
if (k < *n - 1) {
|
||||
d21 = w[k + 1 + k * w_dim1];
|
||||
d11 = w[k + 1 + (k + 1) * w_dim1] / d21;
|
||||
d22 = w[k + k * w_dim1] / d21;
|
||||
t = 1. / (d11 * d22 - 1.);
|
||||
d21 = t / d21;
|
||||
i__1 = *n;
|
||||
for (j = k + 2; j <= i__1; ++j) {
|
||||
a[j + k * a_dim1] = d21 * (d11 * w[j + k * w_dim1] -
|
||||
w[j + (k + 1) * w_dim1]);
|
||||
a[j + (k + 1) * a_dim1] = d21 * (d22 * w[j + (k + 1) *
|
||||
w_dim1] - w[j + k * w_dim1]);
|
||||
/* L80: */
|
||||
}
|
||||
}
|
||||
a[k + k * a_dim1] = w[k + k * w_dim1];
|
||||
a[k + 1 + k * a_dim1] = w[k + 1 + k * w_dim1];
|
||||
a[k + 1 + (k + 1) * a_dim1] = w[k + 1 + (k + 1) * w_dim1];
|
||||
}
|
||||
}
|
||||
if (kstep == 1) {
|
||||
ipiv[k] = kp;
|
||||
} else {
|
||||
ipiv[k] = -kp;
|
||||
ipiv[k + 1] = -kp;
|
||||
}
|
||||
k += kstep;
|
||||
goto L70;
|
||||
L90:
|
||||
j = k - 1;
|
||||
L120:
|
||||
jj = j;
|
||||
jp = ipiv[j];
|
||||
if (jp < 0) {
|
||||
jp = -jp;
|
||||
--j;
|
||||
}
|
||||
--j;
|
||||
if (jp != jj && j >= 1) {
|
||||
dswap_(&j, &a[jp + a_dim1], lda, &a[jj + a_dim1], lda);
|
||||
}
|
||||
if (j > 1) {
|
||||
goto L120;
|
||||
}
|
||||
*kb = k - 1;
|
||||
}
|
||||
return;
|
||||
}
|
|
@ -0,0 +1,236 @@
|
|||
#include "relapack.h"
|
||||
#if XSYTRF_ALLOW_MALLOC
|
||||
#include <stdlib.h>
|
||||
#endif
|
||||
|
||||
static void RELAPACK_dsytrf_rook_rec(const char *, const int *, const int *, int *,
|
||||
double *, const int *, int *, double *, const int *, int *);
|
||||
|
||||
|
||||
/** DSYTRF_ROOK computes the factorization of a real symmetric matrix A using the bounded Bunch-Kaufman ("rook") diagonal pivoting method.
|
||||
*
|
||||
* This routine is functionally equivalent to LAPACK's dsytrf_rook.
|
||||
* For details on its interface, see
|
||||
* http://www.netlib.org/lapack/explore-html/db/df4/dsytrf__rook_8f.html
|
||||
* */
|
||||
void RELAPACK_dsytrf_rook(
|
||||
const char *uplo, const int *n,
|
||||
double *A, const int *ldA, int *ipiv,
|
||||
double *Work, const int *lWork, int *info
|
||||
) {
|
||||
|
||||
// Required work size
|
||||
const int cleanlWork = *n * (*n / 2);
|
||||
int minlWork = cleanlWork;
|
||||
#if XSYTRF_ALLOW_MALLOC
|
||||
minlWork = 1;
|
||||
#endif
|
||||
|
||||
// Check arguments
|
||||
const int lower = LAPACK(lsame)(uplo, "L");
|
||||
const int upper = LAPACK(lsame)(uplo, "U");
|
||||
*info = 0;
|
||||
if (!lower && !upper)
|
||||
*info = -1;
|
||||
else if (*n < 0)
|
||||
*info = -2;
|
||||
else if (*ldA < MAX(1, *n))
|
||||
*info = -4;
|
||||
else if (*lWork < minlWork && *lWork != -1)
|
||||
*info = -7;
|
||||
else if (*lWork == -1) {
|
||||
// Work size query
|
||||
*Work = cleanlWork;
|
||||
return;
|
||||
}
|
||||
|
||||
// Ensure Work size
|
||||
double *cleanWork = Work;
|
||||
#if XSYTRF_ALLOW_MALLOC
|
||||
if (!*info && *lWork < cleanlWork) {
|
||||
cleanWork = malloc(cleanlWork * sizeof(double));
|
||||
if (!cleanWork)
|
||||
*info = -7;
|
||||
}
|
||||
#endif
|
||||
|
||||
if (*info) {
|
||||
const int minfo = -*info;
|
||||
LAPACK(xerbla)("DSYTRF", &minfo);
|
||||
return;
|
||||
}
|
||||
|
||||
// Clean char * arguments
|
||||
const char cleanuplo = lower ? 'L' : 'U';
|
||||
|
||||
// Dummy argument
|
||||
int nout;
|
||||
|
||||
// Recursive kernel
|
||||
RELAPACK_dsytrf_rook_rec(&cleanuplo, n, n, &nout, A, ldA, ipiv, cleanWork, n, info);
|
||||
|
||||
#if XSYTRF_ALLOW_MALLOC
|
||||
if (cleanWork != Work)
|
||||
free(cleanWork);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
/** dsytrf_rook's recursive compute kernel */
|
||||
static void RELAPACK_dsytrf_rook_rec(
|
||||
const char *uplo, const int *n_full, const int *n, int *n_out,
|
||||
double *A, const int *ldA, int *ipiv,
|
||||
double *Work, const int *ldWork, int *info
|
||||
) {
|
||||
|
||||
// top recursion level?
|
||||
const int top = *n_full == *n;
|
||||
|
||||
if (*n <= MAX(CROSSOVER_DSYTRF_ROOK, 3)) {
|
||||
// Unblocked
|
||||
if (top) {
|
||||
LAPACK(dsytf2)(uplo, n, A, ldA, ipiv, info);
|
||||
*n_out = *n;
|
||||
} else
|
||||
RELAPACK_dsytrf_rook_rec2(uplo, n_full, n, n_out, A, ldA, ipiv, Work, ldWork, info);
|
||||
return;
|
||||
}
|
||||
|
||||
int info1, info2;
|
||||
|
||||
// Constants
|
||||
const double ONE[] = { 1. };
|
||||
const double MONE[] = { -1. };
|
||||
const int iONE[] = { 1 };
|
||||
|
||||
const int n_rest = *n_full - *n;
|
||||
|
||||
if (*uplo == 'L') {
|
||||
// Splitting (setup)
|
||||
int n1 = DREC_SPLIT(*n);
|
||||
int n2 = *n - n1;
|
||||
|
||||
// Work_L *
|
||||
double *const Work_L = Work;
|
||||
|
||||
// recursion(A_L)
|
||||
int n1_out;
|
||||
RELAPACK_dsytrf_rook_rec(uplo, n_full, &n1, &n1_out, A, ldA, ipiv, Work_L, ldWork, &info1);
|
||||
n1 = n1_out;
|
||||
|
||||
// Splitting (continued)
|
||||
n2 = *n - n1;
|
||||
const int n_full2 = *n_full - n1;
|
||||
|
||||
// * *
|
||||
// A_BL A_BR
|
||||
// A_BL_B A_BR_B
|
||||
double *const A_BL = A + n1;
|
||||
double *const A_BR = A + *ldA * n1 + n1;
|
||||
double *const A_BL_B = A + *n;
|
||||
double *const A_BR_B = A + *ldA * n1 + *n;
|
||||
|
||||
// * *
|
||||
// Work_BL Work_BR
|
||||
// * *
|
||||
// (top recursion level: use Work as Work_BR)
|
||||
double *const Work_BL = Work + n1;
|
||||
double *const Work_BR = top ? Work : Work + *ldWork * n1 + n1;
|
||||
const int ldWork_BR = top ? n2 : *ldWork;
|
||||
|
||||
// ipiv_T
|
||||
// ipiv_B
|
||||
int *const ipiv_B = ipiv + n1;
|
||||
|
||||
// A_BR = A_BR - A_BL Work_BL'
|
||||
RELAPACK_dgemmt(uplo, "N", "T", &n2, &n1, MONE, A_BL, ldA, Work_BL, ldWork, ONE, A_BR, ldA);
|
||||
BLAS(dgemm)("N", "T", &n_rest, &n2, &n1, MONE, A_BL_B, ldA, Work_BL, ldWork, ONE, A_BR_B, ldA);
|
||||
|
||||
// recursion(A_BR)
|
||||
int n2_out;
|
||||
RELAPACK_dsytrf_rook_rec(uplo, &n_full2, &n2, &n2_out, A_BR, ldA, ipiv_B, Work_BR, &ldWork_BR, &info2);
|
||||
|
||||
if (n2_out != n2) {
|
||||
// undo 1 column of updates
|
||||
const int n_restp1 = n_rest + 1;
|
||||
|
||||
// last column of A_BR
|
||||
double *const A_BR_r = A_BR + *ldA * n2_out + n2_out;
|
||||
|
||||
// last row of A_BL
|
||||
double *const A_BL_b = A_BL + n2_out;
|
||||
|
||||
// last row of Work_BL
|
||||
double *const Work_BL_b = Work_BL + n2_out;
|
||||
|
||||
// A_BR_r = A_BR_r + A_BL_b Work_BL_b'
|
||||
BLAS(dgemv)("N", &n_restp1, &n1, ONE, A_BL_b, ldA, Work_BL_b, ldWork, ONE, A_BR_r, iONE);
|
||||
}
|
||||
n2 = n2_out;
|
||||
|
||||
// shift pivots
|
||||
int i;
|
||||
for (i = 0; i < n2; i++)
|
||||
if (ipiv_B[i] > 0)
|
||||
ipiv_B[i] += n1;
|
||||
else
|
||||
ipiv_B[i] -= n1;
|
||||
|
||||
*info = info1 || info2;
|
||||
*n_out = n1 + n2;
|
||||
} else {
|
||||
// Splitting (setup)
|
||||
int n2 = DREC_SPLIT(*n);
|
||||
int n1 = *n - n2;
|
||||
|
||||
// * Work_R
|
||||
// (top recursion level: use Work as Work_R)
|
||||
double *const Work_R = top ? Work : Work + *ldWork * n1;
|
||||
|
||||
// recursion(A_R)
|
||||
int n2_out;
|
||||
RELAPACK_dsytrf_rook_rec(uplo, n_full, &n2, &n2_out, A, ldA, ipiv, Work_R, ldWork, &info2);
|
||||
const int n2_diff = n2 - n2_out;
|
||||
n2 = n2_out;
|
||||
|
||||
// Splitting (continued)
|
||||
n1 = *n - n2;
|
||||
const int n_full1 = *n_full - n2;
|
||||
|
||||
// * A_TL_T A_TR_T
|
||||
// * A_TL A_TR
|
||||
// * * *
|
||||
double *const A_TL_T = A + *ldA * n_rest;
|
||||
double *const A_TR_T = A + *ldA * (n_rest + n1);
|
||||
double *const A_TL = A + *ldA * n_rest + n_rest;
|
||||
double *const A_TR = A + *ldA * (n_rest + n1) + n_rest;
|
||||
|
||||
// Work_L *
|
||||
// * Work_TR
|
||||
// * *
|
||||
// (top recursion level: Work_R was Work)
|
||||
double *const Work_L = Work;
|
||||
double *const Work_TR = Work + *ldWork * (top ? n2_diff : n1) + n_rest;
|
||||
const int ldWork_L = top ? n1 : *ldWork;
|
||||
|
||||
// A_TL = A_TL - A_TR Work_TR'
|
||||
RELAPACK_dgemmt(uplo, "N", "T", &n1, &n2, MONE, A_TR, ldA, Work_TR, ldWork, ONE, A_TL, ldA);
|
||||
BLAS(dgemm)("N", "T", &n_rest, &n1, &n2, MONE, A_TR_T, ldA, Work_TR, ldWork, ONE, A_TL_T, ldA);
|
||||
|
||||
// recursion(A_TL)
|
||||
int n1_out;
|
||||
RELAPACK_dsytrf_rook_rec(uplo, &n_full1, &n1, &n1_out, A, ldA, ipiv, Work_L, &ldWork_L, &info1);
|
||||
|
||||
if (n1_out != n1) {
|
||||
// undo 1 column of updates
|
||||
const int n_restp1 = n_rest + 1;
|
||||
|
||||
// A_TL_T_l = A_TL_T_l + A_TR_T Work_TR_t'
|
||||
BLAS(dgemv)("N", &n_restp1, &n2, ONE, A_TR_T, ldA, Work_TR, ldWork, ONE, A_TL_T, iONE);
|
||||
}
|
||||
n1 = n1_out;
|
||||
|
||||
*info = info2 || info1;
|
||||
*n_out = n1 + n2;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,451 @@
|
|||
/* -- translated by f2c (version 20100827).
|
||||
You must link the resulting object file with libf2c:
|
||||
on Microsoft Windows system, link with libf2c.lib;
|
||||
on Linux or Unix systems, link with .../path/to/libf2c.a -lm
|
||||
or, if you install libf2c.a in a standard place, with -lf2c -lm
|
||||
-- in that order, at the end of the command line, as in
|
||||
cc *.o -lf2c -lm
|
||||
Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
|
||||
|
||||
http://www.netlib.org/f2c/libf2c.zip
|
||||
*/
|
||||
|
||||
#include "f2c.h"
|
||||
|
||||
/* Table of constant values */
|
||||
|
||||
static int c__1 = 1;
|
||||
static double c_b9 = -1.;
|
||||
static double c_b10 = 1.;
|
||||
|
||||
/** DSYTRF_ROOK_REC2 computes a partial factorization of a real symmetric matrix using the bounded Bunch-Kaufma n ("rook") diagonal pivoting method.
|
||||
*
|
||||
* This routine is a minor modification of LAPACK's dlasyf.
|
||||
* It serves as an unblocked kernel in the recursive algorithms.
|
||||
* The blocked BLAS Level 3 updates were removed and moved to the
|
||||
* recursive algorithm.
|
||||
* */
|
||||
/* Subroutine */ void RELAPACK_dsytrf_rook_rec2(char *uplo, int *n,
|
||||
int *nb, int *kb, double *a, int *lda, int *ipiv,
|
||||
double *w, int *ldw, int *info, ftnlen uplo_len)
|
||||
{
|
||||
/* System generated locals */
|
||||
int a_dim1, a_offset, w_dim1, w_offset, i__1, i__2;
|
||||
double d__1;
|
||||
|
||||
/* Builtin functions */
|
||||
double sqrt(double);
|
||||
|
||||
/* Local variables */
|
||||
static int j, k, p;
|
||||
static double t, r1, d11, d12, d21, d22;
|
||||
static int ii, jj, kk, kp, kw, jp1, jp2, kkw;
|
||||
static logical done;
|
||||
static int imax, jmax;
|
||||
static double alpha;
|
||||
extern /* Subroutine */ int dscal_(int *, double *, double *,
|
||||
int *);
|
||||
extern logical lsame_(char *, char *, ftnlen, ftnlen);
|
||||
extern /* Subroutine */ int dgemv_(char *, int *, int *,
|
||||
double *, double *, int *, double *, int *,
|
||||
double *, double *, int *, ftnlen);
|
||||
static double dtemp, sfmin;
|
||||
static int itemp;
|
||||
extern /* Subroutine */ int dcopy_(int *, double *, int *,
|
||||
double *, int *), dswap_(int *, double *, int
|
||||
*, double *, int *);
|
||||
static int kstep;
|
||||
extern double dlamch_(char *, ftnlen);
|
||||
static double absakk;
|
||||
extern int idamax_(int *, double *, int *);
|
||||
static double colmax, rowmax;
|
||||
|
||||
/* Parameter adjustments */
|
||||
a_dim1 = *lda;
|
||||
a_offset = 1 + a_dim1;
|
||||
a -= a_offset;
|
||||
--ipiv;
|
||||
w_dim1 = *ldw;
|
||||
w_offset = 1 + w_dim1;
|
||||
w -= w_offset;
|
||||
|
||||
/* Function Body */
|
||||
*info = 0;
|
||||
alpha = (sqrt(17.) + 1.) / 8.;
|
||||
sfmin = dlamch_("S", (ftnlen)1);
|
||||
if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
|
||||
k = *n;
|
||||
L10:
|
||||
kw = *nb + k - *n;
|
||||
if ((k <= *n - *nb + 1 && *nb < *n) || k < 1) {
|
||||
goto L30;
|
||||
}
|
||||
kstep = 1;
|
||||
p = k;
|
||||
dcopy_(&k, &a[k * a_dim1 + 1], &c__1, &w[kw * w_dim1 + 1], &c__1);
|
||||
if (k < *n) {
|
||||
i__1 = *n - k;
|
||||
dgemv_("No transpose", &k, &i__1, &c_b9, &a[(k + 1) * a_dim1 + 1],
|
||||
lda, &w[k + (kw + 1) * w_dim1], ldw, &c_b10, &w[kw *
|
||||
w_dim1 + 1], &c__1, (ftnlen)12);
|
||||
}
|
||||
absakk = (d__1 = w[k + kw * w_dim1], abs(d__1));
|
||||
if (k > 1) {
|
||||
i__1 = k - 1;
|
||||
imax = idamax_(&i__1, &w[kw * w_dim1 + 1], &c__1);
|
||||
colmax = (d__1 = w[imax + kw * w_dim1], abs(d__1));
|
||||
} else {
|
||||
colmax = 0.;
|
||||
}
|
||||
if (max(absakk,colmax) == 0.) {
|
||||
if (*info == 0) {
|
||||
*info = k;
|
||||
}
|
||||
kp = k;
|
||||
dcopy_(&k, &w[kw * w_dim1 + 1], &c__1, &a[k * a_dim1 + 1], &c__1);
|
||||
} else {
|
||||
if (! (absakk < alpha * colmax)) {
|
||||
kp = k;
|
||||
} else {
|
||||
done = FALSE_;
|
||||
L12:
|
||||
dcopy_(&imax, &a[imax * a_dim1 + 1], &c__1, &w[(kw - 1) *
|
||||
w_dim1 + 1], &c__1);
|
||||
i__1 = k - imax;
|
||||
dcopy_(&i__1, &a[imax + (imax + 1) * a_dim1], lda, &w[imax +
|
||||
1 + (kw - 1) * w_dim1], &c__1);
|
||||
if (k < *n) {
|
||||
i__1 = *n - k;
|
||||
dgemv_("No transpose", &k, &i__1, &c_b9, &a[(k + 1) *
|
||||
a_dim1 + 1], lda, &w[imax + (kw + 1) * w_dim1],
|
||||
ldw, &c_b10, &w[(kw - 1) * w_dim1 + 1], &c__1, (
|
||||
ftnlen)12);
|
||||
}
|
||||
if (imax != k) {
|
||||
i__1 = k - imax;
|
||||
jmax = imax + idamax_(&i__1, &w[imax + 1 + (kw - 1) *
|
||||
w_dim1], &c__1);
|
||||
rowmax = (d__1 = w[jmax + (kw - 1) * w_dim1], abs(d__1));
|
||||
} else {
|
||||
rowmax = 0.;
|
||||
}
|
||||
if (imax > 1) {
|
||||
i__1 = imax - 1;
|
||||
itemp = idamax_(&i__1, &w[(kw - 1) * w_dim1 + 1], &c__1);
|
||||
dtemp = (d__1 = w[itemp + (kw - 1) * w_dim1], abs(d__1));
|
||||
if (dtemp > rowmax) {
|
||||
rowmax = dtemp;
|
||||
jmax = itemp;
|
||||
}
|
||||
}
|
||||
if (! ((d__1 = w[imax + (kw - 1) * w_dim1], abs(d__1)) <
|
||||
alpha * rowmax)) {
|
||||
kp = imax;
|
||||
dcopy_(&k, &w[(kw - 1) * w_dim1 + 1], &c__1, &w[kw *
|
||||
w_dim1 + 1], &c__1);
|
||||
done = TRUE_;
|
||||
} else if (p == jmax || rowmax <= colmax) {
|
||||
kp = imax;
|
||||
kstep = 2;
|
||||
done = TRUE_;
|
||||
} else {
|
||||
p = imax;
|
||||
colmax = rowmax;
|
||||
imax = jmax;
|
||||
dcopy_(&k, &w[(kw - 1) * w_dim1 + 1], &c__1, &w[kw *
|
||||
w_dim1 + 1], &c__1);
|
||||
}
|
||||
if (! done) {
|
||||
goto L12;
|
||||
}
|
||||
}
|
||||
kk = k - kstep + 1;
|
||||
kkw = *nb + kk - *n;
|
||||
if (kstep == 2 && p != k) {
|
||||
i__1 = k - p;
|
||||
dcopy_(&i__1, &a[p + 1 + k * a_dim1], &c__1, &a[p + (p + 1) *
|
||||
a_dim1], lda);
|
||||
dcopy_(&p, &a[k * a_dim1 + 1], &c__1, &a[p * a_dim1 + 1], &
|
||||
c__1);
|
||||
i__1 = *n - k + 1;
|
||||
dswap_(&i__1, &a[k + k * a_dim1], lda, &a[p + k * a_dim1],
|
||||
lda);
|
||||
i__1 = *n - kk + 1;
|
||||
dswap_(&i__1, &w[k + kkw * w_dim1], ldw, &w[p + kkw * w_dim1],
|
||||
ldw);
|
||||
}
|
||||
if (kp != kk) {
|
||||
a[kp + k * a_dim1] = a[kk + k * a_dim1];
|
||||
i__1 = k - 1 - kp;
|
||||
dcopy_(&i__1, &a[kp + 1 + kk * a_dim1], &c__1, &a[kp + (kp +
|
||||
1) * a_dim1], lda);
|
||||
dcopy_(&kp, &a[kk * a_dim1 + 1], &c__1, &a[kp * a_dim1 + 1], &
|
||||
c__1);
|
||||
i__1 = *n - kk + 1;
|
||||
dswap_(&i__1, &a[kk + kk * a_dim1], lda, &a[kp + kk * a_dim1],
|
||||
lda);
|
||||
i__1 = *n - kk + 1;
|
||||
dswap_(&i__1, &w[kk + kkw * w_dim1], ldw, &w[kp + kkw *
|
||||
w_dim1], ldw);
|
||||
}
|
||||
if (kstep == 1) {
|
||||
dcopy_(&k, &w[kw * w_dim1 + 1], &c__1, &a[k * a_dim1 + 1], &
|
||||
c__1);
|
||||
if (k > 1) {
|
||||
if ((d__1 = a[k + k * a_dim1], abs(d__1)) >= sfmin) {
|
||||
r1 = 1. / a[k + k * a_dim1];
|
||||
i__1 = k - 1;
|
||||
dscal_(&i__1, &r1, &a[k * a_dim1 + 1], &c__1);
|
||||
} else if (a[k + k * a_dim1] != 0.) {
|
||||
i__1 = k - 1;
|
||||
for (ii = 1; ii <= i__1; ++ii) {
|
||||
a[ii + k * a_dim1] /= a[k + k * a_dim1];
|
||||
/* L14: */
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if (k > 2) {
|
||||
d12 = w[k - 1 + kw * w_dim1];
|
||||
d11 = w[k + kw * w_dim1] / d12;
|
||||
d22 = w[k - 1 + (kw - 1) * w_dim1] / d12;
|
||||
t = 1. / (d11 * d22 - 1.);
|
||||
i__1 = k - 2;
|
||||
for (j = 1; j <= i__1; ++j) {
|
||||
a[j + (k - 1) * a_dim1] = t * ((d11 * w[j + (kw - 1) *
|
||||
w_dim1] - w[j + kw * w_dim1]) / d12);
|
||||
a[j + k * a_dim1] = t * ((d22 * w[j + kw * w_dim1] -
|
||||
w[j + (kw - 1) * w_dim1]) / d12);
|
||||
/* L20: */
|
||||
}
|
||||
}
|
||||
a[k - 1 + (k - 1) * a_dim1] = w[k - 1 + (kw - 1) * w_dim1];
|
||||
a[k - 1 + k * a_dim1] = w[k - 1 + kw * w_dim1];
|
||||
a[k + k * a_dim1] = w[k + kw * w_dim1];
|
||||
}
|
||||
}
|
||||
if (kstep == 1) {
|
||||
ipiv[k] = kp;
|
||||
} else {
|
||||
ipiv[k] = -p;
|
||||
ipiv[k - 1] = -kp;
|
||||
}
|
||||
k -= kstep;
|
||||
goto L10;
|
||||
L30:
|
||||
j = k + 1;
|
||||
L60:
|
||||
kstep = 1;
|
||||
jp1 = 1;
|
||||
jj = j;
|
||||
jp2 = ipiv[j];
|
||||
if (jp2 < 0) {
|
||||
jp2 = -jp2;
|
||||
++j;
|
||||
jp1 = -ipiv[j];
|
||||
kstep = 2;
|
||||
}
|
||||
++j;
|
||||
if (jp2 != jj && j <= *n) {
|
||||
i__1 = *n - j + 1;
|
||||
dswap_(&i__1, &a[jp2 + j * a_dim1], lda, &a[jj + j * a_dim1], lda)
|
||||
;
|
||||
}
|
||||
jj = j - 1;
|
||||
if (jp1 != jj && kstep == 2) {
|
||||
i__1 = *n - j + 1;
|
||||
dswap_(&i__1, &a[jp1 + j * a_dim1], lda, &a[jj + j * a_dim1], lda)
|
||||
;
|
||||
}
|
||||
if (j <= *n) {
|
||||
goto L60;
|
||||
}
|
||||
*kb = *n - k;
|
||||
} else {
|
||||
k = 1;
|
||||
L70:
|
||||
if ((k >= *nb && *nb < *n) || k > *n) {
|
||||
goto L90;
|
||||
}
|
||||
kstep = 1;
|
||||
p = k;
|
||||
i__1 = *n - k + 1;
|
||||
dcopy_(&i__1, &a[k + k * a_dim1], &c__1, &w[k + k * w_dim1], &c__1);
|
||||
if (k > 1) {
|
||||
i__1 = *n - k + 1;
|
||||
i__2 = k - 1;
|
||||
dgemv_("No transpose", &i__1, &i__2, &c_b9, &a[k + a_dim1], lda, &
|
||||
w[k + w_dim1], ldw, &c_b10, &w[k + k * w_dim1], &c__1, (
|
||||
ftnlen)12);
|
||||
}
|
||||
absakk = (d__1 = w[k + k * w_dim1], abs(d__1));
|
||||
if (k < *n) {
|
||||
i__1 = *n - k;
|
||||
imax = k + idamax_(&i__1, &w[k + 1 + k * w_dim1], &c__1);
|
||||
colmax = (d__1 = w[imax + k * w_dim1], abs(d__1));
|
||||
} else {
|
||||
colmax = 0.;
|
||||
}
|
||||
if (max(absakk,colmax) == 0.) {
|
||||
if (*info == 0) {
|
||||
*info = k;
|
||||
}
|
||||
kp = k;
|
||||
i__1 = *n - k + 1;
|
||||
dcopy_(&i__1, &w[k + k * w_dim1], &c__1, &a[k + k * a_dim1], &
|
||||
c__1);
|
||||
} else {
|
||||
if (! (absakk < alpha * colmax)) {
|
||||
kp = k;
|
||||
} else {
|
||||
done = FALSE_;
|
||||
L72:
|
||||
i__1 = imax - k;
|
||||
dcopy_(&i__1, &a[imax + k * a_dim1], lda, &w[k + (k + 1) *
|
||||
w_dim1], &c__1);
|
||||
i__1 = *n - imax + 1;
|
||||
dcopy_(&i__1, &a[imax + imax * a_dim1], &c__1, &w[imax + (k +
|
||||
1) * w_dim1], &c__1);
|
||||
if (k > 1) {
|
||||
i__1 = *n - k + 1;
|
||||
i__2 = k - 1;
|
||||
dgemv_("No transpose", &i__1, &i__2, &c_b9, &a[k + a_dim1]
|
||||
, lda, &w[imax + w_dim1], ldw, &c_b10, &w[k + (k
|
||||
+ 1) * w_dim1], &c__1, (ftnlen)12);
|
||||
}
|
||||
if (imax != k) {
|
||||
i__1 = imax - k;
|
||||
jmax = k - 1 + idamax_(&i__1, &w[k + (k + 1) * w_dim1], &
|
||||
c__1);
|
||||
rowmax = (d__1 = w[jmax + (k + 1) * w_dim1], abs(d__1));
|
||||
} else {
|
||||
rowmax = 0.;
|
||||
}
|
||||
if (imax < *n) {
|
||||
i__1 = *n - imax;
|
||||
itemp = imax + idamax_(&i__1, &w[imax + 1 + (k + 1) *
|
||||
w_dim1], &c__1);
|
||||
dtemp = (d__1 = w[itemp + (k + 1) * w_dim1], abs(d__1));
|
||||
if (dtemp > rowmax) {
|
||||
rowmax = dtemp;
|
||||
jmax = itemp;
|
||||
}
|
||||
}
|
||||
if (! ((d__1 = w[imax + (k + 1) * w_dim1], abs(d__1)) < alpha
|
||||
* rowmax)) {
|
||||
kp = imax;
|
||||
i__1 = *n - k + 1;
|
||||
dcopy_(&i__1, &w[k + (k + 1) * w_dim1], &c__1, &w[k + k *
|
||||
w_dim1], &c__1);
|
||||
done = TRUE_;
|
||||
} else if (p == jmax || rowmax <= colmax) {
|
||||
kp = imax;
|
||||
kstep = 2;
|
||||
done = TRUE_;
|
||||
} else {
|
||||
p = imax;
|
||||
colmax = rowmax;
|
||||
imax = jmax;
|
||||
i__1 = *n - k + 1;
|
||||
dcopy_(&i__1, &w[k + (k + 1) * w_dim1], &c__1, &w[k + k *
|
||||
w_dim1], &c__1);
|
||||
}
|
||||
if (! done) {
|
||||
goto L72;
|
||||
}
|
||||
}
|
||||
kk = k + kstep - 1;
|
||||
if (kstep == 2 && p != k) {
|
||||
i__1 = p - k;
|
||||
dcopy_(&i__1, &a[k + k * a_dim1], &c__1, &a[p + k * a_dim1],
|
||||
lda);
|
||||
i__1 = *n - p + 1;
|
||||
dcopy_(&i__1, &a[p + k * a_dim1], &c__1, &a[p + p * a_dim1], &
|
||||
c__1);
|
||||
dswap_(&k, &a[k + a_dim1], lda, &a[p + a_dim1], lda);
|
||||
dswap_(&kk, &w[k + w_dim1], ldw, &w[p + w_dim1], ldw);
|
||||
}
|
||||
if (kp != kk) {
|
||||
a[kp + k * a_dim1] = a[kk + k * a_dim1];
|
||||
i__1 = kp - k - 1;
|
||||
dcopy_(&i__1, &a[k + 1 + kk * a_dim1], &c__1, &a[kp + (k + 1)
|
||||
* a_dim1], lda);
|
||||
i__1 = *n - kp + 1;
|
||||
dcopy_(&i__1, &a[kp + kk * a_dim1], &c__1, &a[kp + kp *
|
||||
a_dim1], &c__1);
|
||||
dswap_(&kk, &a[kk + a_dim1], lda, &a[kp + a_dim1], lda);
|
||||
dswap_(&kk, &w[kk + w_dim1], ldw, &w[kp + w_dim1], ldw);
|
||||
}
|
||||
if (kstep == 1) {
|
||||
i__1 = *n - k + 1;
|
||||
dcopy_(&i__1, &w[k + k * w_dim1], &c__1, &a[k + k * a_dim1], &
|
||||
c__1);
|
||||
if (k < *n) {
|
||||
if ((d__1 = a[k + k * a_dim1], abs(d__1)) >= sfmin) {
|
||||
r1 = 1. / a[k + k * a_dim1];
|
||||
i__1 = *n - k;
|
||||
dscal_(&i__1, &r1, &a[k + 1 + k * a_dim1], &c__1);
|
||||
} else if (a[k + k * a_dim1] != 0.) {
|
||||
i__1 = *n;
|
||||
for (ii = k + 1; ii <= i__1; ++ii) {
|
||||
a[ii + k * a_dim1] /= a[k + k * a_dim1];
|
||||
/* L74: */
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if (k < *n - 1) {
|
||||
d21 = w[k + 1 + k * w_dim1];
|
||||
d11 = w[k + 1 + (k + 1) * w_dim1] / d21;
|
||||
d22 = w[k + k * w_dim1] / d21;
|
||||
t = 1. / (d11 * d22 - 1.);
|
||||
i__1 = *n;
|
||||
for (j = k + 2; j <= i__1; ++j) {
|
||||
a[j + k * a_dim1] = t * ((d11 * w[j + k * w_dim1] - w[
|
||||
j + (k + 1) * w_dim1]) / d21);
|
||||
a[j + (k + 1) * a_dim1] = t * ((d22 * w[j + (k + 1) *
|
||||
w_dim1] - w[j + k * w_dim1]) / d21);
|
||||
/* L80: */
|
||||
}
|
||||
}
|
||||
a[k + k * a_dim1] = w[k + k * w_dim1];
|
||||
a[k + 1 + k * a_dim1] = w[k + 1 + k * w_dim1];
|
||||
a[k + 1 + (k + 1) * a_dim1] = w[k + 1 + (k + 1) * w_dim1];
|
||||
}
|
||||
}
|
||||
if (kstep == 1) {
|
||||
ipiv[k] = kp;
|
||||
} else {
|
||||
ipiv[k] = -p;
|
||||
ipiv[k + 1] = -kp;
|
||||
}
|
||||
k += kstep;
|
||||
goto L70;
|
||||
L90:
|
||||
j = k - 1;
|
||||
L120:
|
||||
kstep = 1;
|
||||
jp1 = 1;
|
||||
jj = j;
|
||||
jp2 = ipiv[j];
|
||||
if (jp2 < 0) {
|
||||
jp2 = -jp2;
|
||||
--j;
|
||||
jp1 = -ipiv[j];
|
||||
kstep = 2;
|
||||
}
|
||||
--j;
|
||||
if (jp2 != jj && j >= 1) {
|
||||
dswap_(&j, &a[jp2 + a_dim1], lda, &a[jj + a_dim1], lda);
|
||||
}
|
||||
jj = j + 1;
|
||||
if (jp1 != jj && kstep == 2) {
|
||||
dswap_(&j, &a[jp1 + a_dim1], lda, &a[jj + a_dim1], lda);
|
||||
}
|
||||
if (j >= 1) {
|
||||
goto L120;
|
||||
}
|
||||
*kb = k - 1;
|
||||
}
|
||||
return;
|
||||
}
|
|
@ -0,0 +1,274 @@
|
|||
#include "relapack.h"
|
||||
#include <math.h>
|
||||
|
||||
static void RELAPACK_dtgsyl_rec(const char *, const int *, const int *,
|
||||
const int *, const double *, const int *, const double *, const int *,
|
||||
double *, const int *, const double *, const int *, const double *,
|
||||
const int *, double *, const int *, double *, double *, double *, int *,
|
||||
int *, int *);
|
||||
|
||||
|
||||
/** DTGSYL solves the generalized Sylvester equation.
|
||||
*
|
||||
* This routine is functionally equivalent to LAPACK's dtgsyl.
|
||||
* For details on its interface, see
|
||||
* http://www.netlib.org/lapack/explore-html/db/d88/dtgsyl_8f.html
|
||||
* */
|
||||
void RELAPACK_dtgsyl(
|
||||
const char *trans, const int *ijob, const int *m, const int *n,
|
||||
const double *A, const int *ldA, const double *B, const int *ldB,
|
||||
double *C, const int *ldC,
|
||||
const double *D, const int *ldD, const double *E, const int *ldE,
|
||||
double *F, const int *ldF,
|
||||
double *scale, double *dif,
|
||||
double *Work, const int *lWork, int *iWork, int *info
|
||||
) {
|
||||
|
||||
// Parse arguments
|
||||
const int notran = LAPACK(lsame)(trans, "N");
|
||||
const int tran = LAPACK(lsame)(trans, "T");
|
||||
|
||||
// Compute work buffer size
|
||||
int lwmin = 1;
|
||||
if (notran && (*ijob == 1 || *ijob == 2))
|
||||
lwmin = MAX(1, 2 * *m * *n);
|
||||
*info = 0;
|
||||
|
||||
// Check arguments
|
||||
if (!tran && !notran)
|
||||
*info = -1;
|
||||
else if (notran && (*ijob < 0 || *ijob > 4))
|
||||
*info = -2;
|
||||
else if (*m <= 0)
|
||||
*info = -3;
|
||||
else if (*n <= 0)
|
||||
*info = -4;
|
||||
else if (*ldA < MAX(1, *m))
|
||||
*info = -6;
|
||||
else if (*ldB < MAX(1, *n))
|
||||
*info = -8;
|
||||
else if (*ldC < MAX(1, *m))
|
||||
*info = -10;
|
||||
else if (*ldD < MAX(1, *m))
|
||||
*info = -12;
|
||||
else if (*ldE < MAX(1, *n))
|
||||
*info = -14;
|
||||
else if (*ldF < MAX(1, *m))
|
||||
*info = -16;
|
||||
else if (*lWork < lwmin && *lWork != -1)
|
||||
*info = -20;
|
||||
if (*info) {
|
||||
const int minfo = -*info;
|
||||
LAPACK(xerbla)("DTGSYL", &minfo);
|
||||
return;
|
||||
}
|
||||
|
||||
if (*lWork == -1) {
|
||||
// Work size query
|
||||
*Work = lwmin;
|
||||
return;
|
||||
}
|
||||
|
||||
// Clean char * arguments
|
||||
const char cleantrans = notran ? 'N' : 'T';
|
||||
|
||||
// Constant
|
||||
const double ZERO[] = { 0. };
|
||||
|
||||
int isolve = 1;
|
||||
int ifunc = 0;
|
||||
if (notran) {
|
||||
if (*ijob >= 3) {
|
||||
ifunc = *ijob - 2;
|
||||
LAPACK(dlaset)("F", m, n, ZERO, ZERO, C, ldC);
|
||||
LAPACK(dlaset)("F", m, n, ZERO, ZERO, F, ldF);
|
||||
} else if (*ijob >= 1)
|
||||
isolve = 2;
|
||||
}
|
||||
|
||||
double scale2;
|
||||
int iround;
|
||||
for (iround = 1; iround <= isolve; iround++) {
|
||||
*scale = 1;
|
||||
double dscale = 0;
|
||||
double dsum = 1;
|
||||
int pq;
|
||||
RELAPACK_dtgsyl_rec(&cleantrans, &ifunc, m, n, A, ldA, B, ldB, C, ldC, D, ldD, E, ldE, F, ldF, scale, &dsum, &dscale, iWork, &pq, info);
|
||||
if (dscale != 0) {
|
||||
if (*ijob == 1 || *ijob == 3)
|
||||
*dif = sqrt(2 * *m * *n) / (dscale * sqrt(dsum));
|
||||
else
|
||||
*dif = sqrt(pq) / (dscale * sqrt(dsum));
|
||||
}
|
||||
if (isolve == 2) {
|
||||
if (iround == 1) {
|
||||
if (notran)
|
||||
ifunc = *ijob;
|
||||
scale2 = *scale;
|
||||
LAPACK(dlacpy)("F", m, n, C, ldC, Work, m);
|
||||
LAPACK(dlacpy)("F", m, n, F, ldF, Work + *m * *n, m);
|
||||
LAPACK(dlaset)("F", m, n, ZERO, ZERO, C, ldC);
|
||||
LAPACK(dlaset)("F", m, n, ZERO, ZERO, F, ldF);
|
||||
} else {
|
||||
LAPACK(dlacpy)("F", m, n, Work, m, C, ldC);
|
||||
LAPACK(dlacpy)("F", m, n, Work + *m * *n, m, F, ldF);
|
||||
*scale = scale2;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/** dtgsyl's recursive vompute kernel */
|
||||
static void RELAPACK_dtgsyl_rec(
|
||||
const char *trans, const int *ifunc, const int *m, const int *n,
|
||||
const double *A, const int *ldA, const double *B, const int *ldB,
|
||||
double *C, const int *ldC,
|
||||
const double *D, const int *ldD, const double *E, const int *ldE,
|
||||
double *F, const int *ldF,
|
||||
double *scale, double *dsum, double *dscale,
|
||||
int *iWork, int *pq, int *info
|
||||
) {
|
||||
|
||||
if (*m <= MAX(CROSSOVER_DTGSYL, 1) && *n <= MAX(CROSSOVER_DTGSYL, 1)) {
|
||||
// Unblocked
|
||||
LAPACK(dtgsy2)(trans, ifunc, m, n, A, ldA, B, ldB, C, ldC, D, ldD, E, ldE, F, ldF, scale, dsum, dscale, iWork, pq, info);
|
||||
return;
|
||||
}
|
||||
|
||||
// Constants
|
||||
const double ONE[] = { 1. };
|
||||
const double MONE[] = { -1. };
|
||||
const int iONE[] = { 1 };
|
||||
|
||||
// Outputs
|
||||
double scale1[] = { 1. };
|
||||
double scale2[] = { 1. };
|
||||
int info1[] = { 0 };
|
||||
int info2[] = { 0 };
|
||||
|
||||
if (*m > *n) {
|
||||
// Splitting
|
||||
int m1 = DREC_SPLIT(*m);
|
||||
if (A[m1 + *ldA * (m1 - 1)])
|
||||
m1++;
|
||||
const int m2 = *m - m1;
|
||||
|
||||
// A_TL A_TR
|
||||
// 0 A_BR
|
||||
const double *const A_TL = A;
|
||||
const double *const A_TR = A + *ldA * m1;
|
||||
const double *const A_BR = A + *ldA * m1 + m1;
|
||||
|
||||
// C_T
|
||||
// C_B
|
||||
double *const C_T = C;
|
||||
double *const C_B = C + m1;
|
||||
|
||||
// D_TL D_TR
|
||||
// 0 D_BR
|
||||
const double *const D_TL = D;
|
||||
const double *const D_TR = D + *ldD * m1;
|
||||
const double *const D_BR = D + *ldD * m1 + m1;
|
||||
|
||||
// F_T
|
||||
// F_B
|
||||
double *const F_T = F;
|
||||
double *const F_B = F + m1;
|
||||
|
||||
if (*trans == 'N') {
|
||||
// recursion(A_BR, B, C_B, D_BR, E, F_B)
|
||||
RELAPACK_dtgsyl_rec(trans, ifunc, &m2, n, A_BR, ldA, B, ldB, C_B, ldC, D_BR, ldD, E, ldE, F_B, ldF, scale1, dsum, dscale, iWork, pq, info1);
|
||||
// C_T = C_T - A_TR * C_B
|
||||
BLAS(dgemm)("N", "N", &m1, n, &m2, MONE, A_TR, ldA, C_B, ldC, scale1, C_T, ldC);
|
||||
// F_T = F_T - D_TR * C_B
|
||||
BLAS(dgemm)("N", "N", &m1, n, &m2, MONE, D_TR, ldD, C_B, ldC, scale1, F_T, ldF);
|
||||
// recursion(A_TL, B, C_T, D_TL, E, F_T)
|
||||
RELAPACK_dtgsyl_rec(trans, ifunc, &m1, n, A_TL, ldA, B, ldB, C_T, ldC, D_TL, ldD, E, ldE, F_T, ldF, scale2, dsum, dscale, iWork, pq, info2);
|
||||
// apply scale
|
||||
if (scale2[0] != 1) {
|
||||
LAPACK(dlascl)("G", iONE, iONE, ONE, scale2, &m2, n, C_B, ldC, info);
|
||||
LAPACK(dlascl)("G", iONE, iONE, ONE, scale2, &m2, n, F_B, ldF, info);
|
||||
}
|
||||
} else {
|
||||
// recursion(A_TL, B, C_T, D_TL, E, F_T)
|
||||
RELAPACK_dtgsyl_rec(trans, ifunc, &m1, n, A_TL, ldA, B, ldB, C_T, ldC, D_TL, ldD, E, ldE, F_T, ldF, scale1, dsum, dscale, iWork, pq, info1);
|
||||
// apply scale
|
||||
if (scale1[0] != 1)
|
||||
LAPACK(dlascl)("G", iONE, iONE, ONE, scale1, &m2, n, F_B, ldF, info);
|
||||
// C_B = C_B - A_TR^H * C_T
|
||||
BLAS(dgemm)("T", "N", &m2, n, &m1, MONE, A_TR, ldA, C_T, ldC, scale1, C_B, ldC);
|
||||
// C_B = C_B - D_TR^H * F_T
|
||||
BLAS(dgemm)("T", "N", &m2, n, &m1, MONE, D_TR, ldD, F_T, ldC, ONE, C_B, ldC);
|
||||
// recursion(A_BR, B, C_B, D_BR, E, F_B)
|
||||
RELAPACK_dtgsyl_rec(trans, ifunc, &m2, n, A_BR, ldA, B, ldB, C_B, ldC, D_BR, ldD, E, ldE, F_B, ldF, scale2, dsum, dscale, iWork, pq, info2);
|
||||
// apply scale
|
||||
if (scale2[0] != 1) {
|
||||
LAPACK(dlascl)("G", iONE, iONE, ONE, scale2, &m1, n, C_T, ldC, info);
|
||||
LAPACK(dlascl)("G", iONE, iONE, ONE, scale2, &m1, n, F_T, ldF, info);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// Splitting
|
||||
int n1 = DREC_SPLIT(*n);
|
||||
if (B[n1 + *ldB * (n1 - 1)])
|
||||
n1++;
|
||||
const int n2 = *n - n1;
|
||||
|
||||
// B_TL B_TR
|
||||
// 0 B_BR
|
||||
const double *const B_TL = B;
|
||||
const double *const B_TR = B + *ldB * n1;
|
||||
const double *const B_BR = B + *ldB * n1 + n1;
|
||||
|
||||
// C_L C_R
|
||||
double *const C_L = C;
|
||||
double *const C_R = C + *ldC * n1;
|
||||
|
||||
// E_TL E_TR
|
||||
// 0 E_BR
|
||||
const double *const E_TL = E;
|
||||
const double *const E_TR = E + *ldE * n1;
|
||||
const double *const E_BR = E + *ldE * n1 + n1;
|
||||
|
||||
// F_L F_R
|
||||
double *const F_L = F;
|
||||
double *const F_R = F + *ldF * n1;
|
||||
|
||||
if (*trans == 'N') {
|
||||
// recursion(A, B_TL, C_L, D, E_TL, F_L)
|
||||
RELAPACK_dtgsyl_rec(trans, ifunc, m, &n1, A, ldA, B_TL, ldB, C_L, ldC, D, ldD, E_TL, ldE, F_L, ldF, scale1, dsum, dscale, iWork, pq, info1);
|
||||
// C_R = C_R + F_L * B_TR
|
||||
BLAS(dgemm)("N", "N", m, &n2, &n1, ONE, F_L, ldF, B_TR, ldB, scale1, C_R, ldC);
|
||||
// F_R = F_R + F_L * E_TR
|
||||
BLAS(dgemm)("N", "N", m, &n2, &n1, ONE, F_L, ldF, E_TR, ldE, scale1, F_R, ldF);
|
||||
// recursion(A, B_BR, C_R, D, E_BR, F_R)
|
||||
RELAPACK_dtgsyl_rec(trans, ifunc, m, &n2, A, ldA, B_BR, ldB, C_R, ldC, D, ldD, E_BR, ldE, F_R, ldF, scale2, dsum, dscale, iWork, pq, info2);
|
||||
// apply scale
|
||||
if (scale2[0] != 1) {
|
||||
LAPACK(dlascl)("G", iONE, iONE, ONE, scale2, m, &n1, C_L, ldC, info);
|
||||
LAPACK(dlascl)("G", iONE, iONE, ONE, scale2, m, &n1, F_L, ldF, info);
|
||||
}
|
||||
} else {
|
||||
// recursion(A, B_BR, C_R, D, E_BR, F_R)
|
||||
RELAPACK_dtgsyl_rec(trans, ifunc, m, &n2, A, ldA, B_BR, ldB, C_R, ldC, D, ldD, E_BR, ldE, F_R, ldF, scale1, dsum, dscale, iWork, pq, info1);
|
||||
// apply scale
|
||||
if (scale1[0] != 1)
|
||||
LAPACK(dlascl)("G", iONE, iONE, ONE, scale1, m, &n1, C_L, ldC, info);
|
||||
// F_L = F_L + C_R * B_TR
|
||||
BLAS(dgemm)("N", "T", m, &n1, &n2, ONE, C_R, ldC, B_TR, ldB, scale1, F_L, ldF);
|
||||
// F_L = F_L + F_R * E_TR
|
||||
BLAS(dgemm)("N", "T", m, &n1, &n2, ONE, F_R, ldF, E_TR, ldB, ONE, F_L, ldF);
|
||||
// recursion(A, B_TL, C_L, D, E_TL, F_L)
|
||||
RELAPACK_dtgsyl_rec(trans, ifunc, m, &n1, A, ldA, B_TL, ldB, C_L, ldC, D, ldD, E_TL, ldE, F_L, ldF, scale2, dsum, dscale, iWork, pq, info2);
|
||||
// apply scale
|
||||
if (scale2[0] != 1) {
|
||||
LAPACK(dlascl)("G", iONE, iONE, ONE, scale2, m, &n2, C_R, ldC, info);
|
||||
LAPACK(dlascl)("G", iONE, iONE, ONE, scale2, m, &n2, F_R, ldF, info);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
*scale = scale1[0] * scale2[0];
|
||||
*info = info1[0] || info2[0];
|
||||
}
|
|
@ -0,0 +1,169 @@
|
|||
#include "relapack.h"
|
||||
|
||||
static void RELAPACK_dtrsyl_rec(const char *, const char *, const int *,
|
||||
const int *, const int *, const double *, const int *, const double *,
|
||||
const int *, double *, const int *, double *, int *);
|
||||
|
||||
|
||||
/** DTRSYL solves the real Sylvester matrix equation.
|
||||
*
|
||||
* This routine is functionally equivalent to LAPACK's dtrsyl.
|
||||
* For details on its interface, see
|
||||
* http://www.netlib.org/lapack/explore-html/d6/d43/dtrsyl_8f.html
|
||||
* */
|
||||
void RELAPACK_dtrsyl(
|
||||
const char *tranA, const char *tranB, const int *isgn,
|
||||
const int *m, const int *n,
|
||||
const double *A, const int *ldA, const double *B, const int *ldB,
|
||||
double *C, const int *ldC, double *scale,
|
||||
int *info
|
||||
) {
|
||||
|
||||
// Check arguments
|
||||
const int notransA = LAPACK(lsame)(tranA, "N");
|
||||
const int transA = LAPACK(lsame)(tranA, "T");
|
||||
const int ctransA = LAPACK(lsame)(tranA, "C");
|
||||
const int notransB = LAPACK(lsame)(tranB, "N");
|
||||
const int transB = LAPACK(lsame)(tranB, "T");
|
||||
const int ctransB = LAPACK(lsame)(tranB, "C");
|
||||
*info = 0;
|
||||
if (!transA && !ctransA && !notransA)
|
||||
*info = -1;
|
||||
else if (!transB && !ctransB && !notransB)
|
||||
*info = -2;
|
||||
else if (*isgn != 1 && *isgn != -1)
|
||||
*info = -3;
|
||||
else if (*m < 0)
|
||||
*info = -4;
|
||||
else if (*n < 0)
|
||||
*info = -5;
|
||||
else if (*ldA < MAX(1, *m))
|
||||
*info = -7;
|
||||
else if (*ldB < MAX(1, *n))
|
||||
*info = -9;
|
||||
else if (*ldC < MAX(1, *m))
|
||||
*info = -11;
|
||||
if (*info) {
|
||||
const int minfo = -*info;
|
||||
LAPACK(xerbla)("DTRSYL", &minfo);
|
||||
return;
|
||||
}
|
||||
|
||||
// Clean char * arguments
|
||||
const char cleantranA = notransA ? 'N' : (transA ? 'T' : 'C');
|
||||
const char cleantranB = notransB ? 'N' : (transB ? 'T' : 'C');
|
||||
|
||||
// Recursive kernel
|
||||
RELAPACK_dtrsyl_rec(&cleantranA, &cleantranB, isgn, m, n, A, ldA, B, ldB, C, ldC, scale, info);
|
||||
}
|
||||
|
||||
|
||||
/** dtrsyl's recursive compute kernel */
|
||||
static void RELAPACK_dtrsyl_rec(
|
||||
const char *tranA, const char *tranB, const int *isgn,
|
||||
const int *m, const int *n,
|
||||
const double *A, const int *ldA, const double *B, const int *ldB,
|
||||
double *C, const int *ldC, double *scale,
|
||||
int *info
|
||||
) {
|
||||
|
||||
if (*m <= MAX(CROSSOVER_DTRSYL, 1) && *n <= MAX(CROSSOVER_DTRSYL, 1)) {
|
||||
// Unblocked
|
||||
RELAPACK_dtrsyl_rec2(tranA, tranB, isgn, m, n, A, ldA, B, ldB, C, ldC, scale, info);
|
||||
return;
|
||||
}
|
||||
|
||||
// Constants
|
||||
const double ONE[] = { 1. };
|
||||
const double MONE[] = { -1. };
|
||||
const double MSGN[] = { -*isgn };
|
||||
const int iONE[] = { 1 };
|
||||
|
||||
// Outputs
|
||||
double scale1[] = { 1. };
|
||||
double scale2[] = { 1. };
|
||||
int info1[] = { 0 };
|
||||
int info2[] = { 0 };
|
||||
|
||||
if (*m > *n) {
|
||||
// Splitting
|
||||
int m1 = DREC_SPLIT(*m);
|
||||
if (A[m1 + *ldA * (m1 - 1)])
|
||||
m1++;
|
||||
const int m2 = *m - m1;
|
||||
|
||||
// A_TL A_TR
|
||||
// 0 A_BR
|
||||
const double *const A_TL = A;
|
||||
const double *const A_TR = A + *ldA * m1;
|
||||
const double *const A_BR = A + *ldA * m1 + m1;
|
||||
|
||||
// C_T
|
||||
// C_B
|
||||
double *const C_T = C;
|
||||
double *const C_B = C + m1;
|
||||
|
||||
if (*tranA == 'N') {
|
||||
// recusion(A_BR, B, C_B)
|
||||
RELAPACK_dtrsyl_rec(tranA, tranB, isgn, &m2, n, A_BR, ldA, B, ldB, C_B, ldC, scale1, info1);
|
||||
// C_T = C_T - A_TR * C_B
|
||||
BLAS(dgemm)("N", "N", &m1, n, &m2, MONE, A_TR, ldA, C_B, ldC, scale1, C_T, ldC);
|
||||
// recusion(A_TL, B, C_T)
|
||||
RELAPACK_dtrsyl_rec(tranA, tranB, isgn, &m1, n, A_TL, ldA, B, ldB, C_T, ldC, scale2, info2);
|
||||
// apply scale
|
||||
if (scale2[0] != 1)
|
||||
LAPACK(dlascl)("G", iONE, iONE, ONE, scale2, &m2, n, C_B, ldC, info);
|
||||
} else {
|
||||
// recusion(A_TL, B, C_T)
|
||||
RELAPACK_dtrsyl_rec(tranA, tranB, isgn, &m1, n, A_TL, ldA, B, ldB, C_T, ldC, scale1, info1);
|
||||
// C_B = C_B - A_TR' * C_T
|
||||
BLAS(dgemm)("C", "N", &m2, n, &m1, MONE, A_TR, ldA, C_T, ldC, scale1, C_B, ldC);
|
||||
// recusion(A_BR, B, C_B)
|
||||
RELAPACK_dtrsyl_rec(tranA, tranB, isgn, &m2, n, A_BR, ldA, B, ldB, C_B, ldC, scale2, info2);
|
||||
// apply scale
|
||||
if (scale2[0] != 1)
|
||||
LAPACK(dlascl)("G", iONE, iONE, ONE, scale2, &m1, n, C_B, ldC, info);
|
||||
}
|
||||
} else {
|
||||
// Splitting
|
||||
int n1 = DREC_SPLIT(*n);
|
||||
if (B[n1 + *ldB * (n1 - 1)])
|
||||
n1++;
|
||||
const int n2 = *n - n1;
|
||||
|
||||
// B_TL B_TR
|
||||
// 0 B_BR
|
||||
const double *const B_TL = B;
|
||||
const double *const B_TR = B + *ldB * n1;
|
||||
const double *const B_BR = B + *ldB * n1 + n1;
|
||||
|
||||
// C_L C_R
|
||||
double *const C_L = C;
|
||||
double *const C_R = C + *ldC * n1;
|
||||
|
||||
if (*tranB == 'N') {
|
||||
// recusion(A, B_TL, C_L)
|
||||
RELAPACK_dtrsyl_rec(tranA, tranB, isgn, m, &n1, A, ldA, B_TL, ldB, C_L, ldC, scale1, info1);
|
||||
// C_R = C_R -/+ C_L * B_TR
|
||||
BLAS(dgemm)("N", "N", m, &n2, &n1, MSGN, C_L, ldC, B_TR, ldB, scale1, C_R, ldC);
|
||||
// recusion(A, B_BR, C_R)
|
||||
RELAPACK_dtrsyl_rec(tranA, tranB, isgn, m, &n2, A, ldA, B_BR, ldB, C_R, ldC, scale2, info2);
|
||||
// apply scale
|
||||
if (scale2[0] != 1)
|
||||
LAPACK(dlascl)("G", iONE, iONE, ONE, scale2, m, &n1, C_L, ldC, info);
|
||||
} else {
|
||||
// recusion(A, B_BR, C_R)
|
||||
RELAPACK_dtrsyl_rec(tranA, tranB, isgn, m, &n2, A, ldA, B_BR, ldB, C_R, ldC, scale1, info1);
|
||||
// C_L = C_L -/+ C_R * B_TR'
|
||||
BLAS(dgemm)("N", "C", m, &n1, &n2, MSGN, C_R, ldC, B_TR, ldB, scale1, C_L, ldC);
|
||||
// recusion(A, B_TL, C_L)
|
||||
RELAPACK_dtrsyl_rec(tranA, tranB, isgn, m, &n1, A, ldA, B_TL, ldB, C_L, ldC, scale2, info2);
|
||||
// apply scale
|
||||
if (scale2[0] != 1)
|
||||
LAPACK(dlascl)("G", iONE, iONE, ONE, scale2, m, &n2, C_R, ldC, info);
|
||||
}
|
||||
}
|
||||
|
||||
*scale = scale1[0] * scale2[0];
|
||||
*info = info1[0] || info2[0];
|
||||
}
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,107 @@
|
|||
#include "relapack.h"
|
||||
|
||||
static void RELAPACK_dtrtri_rec(const char *, const char *, const int *,
|
||||
double *, const int *, int *);
|
||||
|
||||
|
||||
/** DTRTRI computes the inverse of a real upper or lower triangular matrix A.
|
||||
*
|
||||
* This routine is functionally equivalent to LAPACK's dtrtri.
|
||||
* For details on its interface, see
|
||||
* http://www.netlib.org/lapack/explore-html/d5/dba/dtrtri_8f.html
|
||||
* */
|
||||
void RELAPACK_dtrtri(
|
||||
const char *uplo, const char *diag, const int *n,
|
||||
double *A, const int *ldA,
|
||||
int *info
|
||||
) {
|
||||
|
||||
// Check arguments
|
||||
const int lower = LAPACK(lsame)(uplo, "L");
|
||||
const int upper = LAPACK(lsame)(uplo, "U");
|
||||
const int nounit = LAPACK(lsame)(diag, "N");
|
||||
const int unit = LAPACK(lsame)(diag, "U");
|
||||
*info = 0;
|
||||
if (!lower && !upper)
|
||||
*info = -1;
|
||||
else if (!nounit && !unit)
|
||||
*info = -2;
|
||||
else if (*n < 0)
|
||||
*info = -3;
|
||||
else if (*ldA < MAX(1, *n))
|
||||
*info = -5;
|
||||
if (*info) {
|
||||
const int minfo = -*info;
|
||||
LAPACK(xerbla)("DTRTRI", &minfo);
|
||||
return;
|
||||
}
|
||||
|
||||
// Clean char * arguments
|
||||
const char cleanuplo = lower ? 'L' : 'U';
|
||||
const char cleandiag = nounit ? 'N' : 'U';
|
||||
|
||||
// check for singularity
|
||||
if (nounit) {
|
||||
int i;
|
||||
for (i = 0; i < *n; i++)
|
||||
if (A[i + *ldA * i] == 0) {
|
||||
*info = i;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// Recursive kernel
|
||||
RELAPACK_dtrtri_rec(&cleanuplo, &cleandiag, n, A, ldA, info);
|
||||
}
|
||||
|
||||
|
||||
/** dtrtri's recursive compute kernel */
|
||||
static void RELAPACK_dtrtri_rec(
|
||||
const char *uplo, const char *diag, const int *n,
|
||||
double *A, const int *ldA,
|
||||
int *info
|
||||
){
|
||||
|
||||
if (*n <= MAX(CROSSOVER_DTRTRI, 1)) {
|
||||
// Unblocked
|
||||
LAPACK(dtrti2)(uplo, diag, n, A, ldA, info);
|
||||
return;
|
||||
}
|
||||
|
||||
// Constants
|
||||
const double ONE[] = { 1. };
|
||||
const double MONE[] = { -1. };
|
||||
|
||||
// Splitting
|
||||
const int n1 = DREC_SPLIT(*n);
|
||||
const int n2 = *n - n1;
|
||||
|
||||
// A_TL A_TR
|
||||
// A_BL A_BR
|
||||
double *const A_TL = A;
|
||||
double *const A_TR = A + *ldA * n1;
|
||||
double *const A_BL = A + n1;
|
||||
double *const A_BR = A + *ldA * n1 + n1;
|
||||
|
||||
// recursion(A_TL)
|
||||
RELAPACK_dtrtri_rec(uplo, diag, &n1, A_TL, ldA, info);
|
||||
if (*info)
|
||||
return;
|
||||
|
||||
if (*uplo == 'L') {
|
||||
// A_BL = - A_BL * A_TL
|
||||
BLAS(dtrmm)("R", "L", "N", diag, &n2, &n1, MONE, A_TL, ldA, A_BL, ldA);
|
||||
// A_BL = A_BR \ A_BL
|
||||
BLAS(dtrsm)("L", "L", "N", diag, &n2, &n1, ONE, A_BR, ldA, A_BL, ldA);
|
||||
} else {
|
||||
// A_TR = - A_TL * A_TR
|
||||
BLAS(dtrmm)("L", "U", "N", diag, &n1, &n2, MONE, A_TL, ldA, A_TR, ldA);
|
||||
// A_TR = A_TR / A_BR
|
||||
BLAS(dtrsm)("R", "U", "N", diag, &n1, &n2, ONE, A_BR, ldA, A_TR, ldA);
|
||||
}
|
||||
|
||||
// recursion(A_BR)
|
||||
RELAPACK_dtrtri_rec(uplo, diag, &n2, A_BR, ldA, info);
|
||||
if (*info)
|
||||
*info += n1;
|
||||
}
|
|
@ -0,0 +1,109 @@
|
|||
#include "stdlib.h"
|
||||
#include "stdio.h"
|
||||
#include "signal.h"
|
||||
#include "f2c.h"
|
||||
|
||||
#ifndef SIGIOT
|
||||
#ifdef SIGABRT
|
||||
#define SIGIOT SIGABRT
|
||||
#endif
|
||||
#endif
|
||||
|
||||
void sig_die(const char *s, int kill) {
|
||||
/* print error message, then clear buffers */
|
||||
fprintf(stderr, "%s\n", s);
|
||||
|
||||
if(kill) {
|
||||
fflush(stderr);
|
||||
/* now get a core */
|
||||
signal(SIGIOT, SIG_DFL);
|
||||
abort();
|
||||
} else
|
||||
exit(1);
|
||||
}
|
||||
|
||||
void c_div(complex *c, complex *a, complex *b) {
|
||||
double ratio, den;
|
||||
double abr, abi, cr;
|
||||
|
||||
if( (abr = b->r) < 0.)
|
||||
abr = - abr;
|
||||
if( (abi = b->i) < 0.)
|
||||
abi = - abi;
|
||||
if( abr <= abi ) {
|
||||
if(abi == 0) {
|
||||
#ifdef IEEE_COMPLEX_DIVIDE
|
||||
float af, bf;
|
||||
af = bf = abr;
|
||||
if (a->i != 0 || a->r != 0)
|
||||
af = 1.;
|
||||
c->i = c->r = af / bf;
|
||||
return;
|
||||
#else
|
||||
sig_die("complex division by zero", 1);
|
||||
#endif
|
||||
}
|
||||
ratio = (double)b->r / b->i ;
|
||||
den = b->i * (1 + ratio*ratio);
|
||||
cr = (a->r*ratio + a->i) / den;
|
||||
c->i = (a->i*ratio - a->r) / den;
|
||||
} else {
|
||||
ratio = (double)b->i / b->r ;
|
||||
den = b->r * (1 + ratio*ratio);
|
||||
cr = (a->r + a->i*ratio) / den;
|
||||
c->i = (a->i - a->r*ratio) / den;
|
||||
}
|
||||
c->r = cr;
|
||||
}
|
||||
|
||||
void z_div(doublecomplex *c, doublecomplex *a, doublecomplex *b) {
|
||||
double ratio, den;
|
||||
double abr, abi, cr;
|
||||
|
||||
if( (abr = b->r) < 0.)
|
||||
abr = - abr;
|
||||
if( (abi = b->i) < 0.)
|
||||
abi = - abi;
|
||||
if( abr <= abi ) {
|
||||
if(abi == 0) {
|
||||
#ifdef IEEE_COMPLEX_DIVIDE
|
||||
if (a->i != 0 || a->r != 0)
|
||||
abi = 1.;
|
||||
c->i = c->r = abi / abr;
|
||||
return;
|
||||
#else
|
||||
sig_die("complex division by zero", 1);
|
||||
#endif
|
||||
}
|
||||
ratio = b->r / b->i ;
|
||||
den = b->i * (1 + ratio*ratio);
|
||||
cr = (a->r*ratio + a->i) / den;
|
||||
c->i = (a->i*ratio - a->r) / den;
|
||||
} else {
|
||||
ratio = b->i / b->r ;
|
||||
den = b->r * (1 + ratio*ratio);
|
||||
cr = (a->r + a->i*ratio) / den;
|
||||
c->i = (a->i - a->r*ratio) / den;
|
||||
}
|
||||
c->r = cr;
|
||||
}
|
||||
|
||||
float r_imag(complex *z) {
|
||||
return z->i;
|
||||
}
|
||||
|
||||
void r_cnjg(complex *r, complex *z) {
|
||||
float zi = z->i;
|
||||
r->r = z->r;
|
||||
r->i = -zi;
|
||||
}
|
||||
|
||||
double d_imag(doublecomplex *z) {
|
||||
return z->i;
|
||||
}
|
||||
|
||||
void d_cnjg(doublecomplex *r, doublecomplex *z) {
|
||||
double zi = z->i;
|
||||
r->r = z->r;
|
||||
r->i = -zi;
|
||||
}
|
|
@ -0,0 +1,223 @@
|
|||
/* f2c.h -- Standard Fortran to C header file */
|
||||
|
||||
/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed."
|
||||
|
||||
- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
|
||||
|
||||
#ifndef F2C_INCLUDE
|
||||
#define F2C_INCLUDE
|
||||
|
||||
typedef long int integer;
|
||||
typedef unsigned long int uinteger;
|
||||
typedef char *address;
|
||||
typedef short int shortint;
|
||||
typedef float real;
|
||||
typedef double doublereal;
|
||||
typedef struct { real r, i; } complex;
|
||||
typedef struct { doublereal r, i; } doublecomplex;
|
||||
typedef long int logical;
|
||||
typedef short int shortlogical;
|
||||
typedef char logical1;
|
||||
typedef char integer1;
|
||||
#ifdef INTEGER_STAR_8 /* Adjust for integer*8. */
|
||||
typedef long long longint; /* system-dependent */
|
||||
typedef unsigned long long ulongint; /* system-dependent */
|
||||
#define qbit_clear(a,b) ((a) & ~((ulongint)1 << (b)))
|
||||
#define qbit_set(a,b) ((a) | ((ulongint)1 << (b)))
|
||||
#endif
|
||||
|
||||
#define TRUE_ (1)
|
||||
#define FALSE_ (0)
|
||||
|
||||
/* Extern is for use with -E */
|
||||
#ifndef Extern
|
||||
#define Extern extern
|
||||
#endif
|
||||
|
||||
/* I/O stuff */
|
||||
|
||||
#ifdef f2c_i2
|
||||
/* for -i2 */
|
||||
typedef short flag;
|
||||
typedef short ftnlen;
|
||||
typedef short ftnint;
|
||||
#else
|
||||
typedef long int flag;
|
||||
typedef long int ftnlen;
|
||||
typedef long int ftnint;
|
||||
#endif
|
||||
|
||||
/*external read, write*/
|
||||
typedef struct
|
||||
{ flag cierr;
|
||||
ftnint ciunit;
|
||||
flag ciend;
|
||||
char *cifmt;
|
||||
ftnint cirec;
|
||||
} cilist;
|
||||
|
||||
/*internal read, write*/
|
||||
typedef struct
|
||||
{ flag icierr;
|
||||
char *iciunit;
|
||||
flag iciend;
|
||||
char *icifmt;
|
||||
ftnint icirlen;
|
||||
ftnint icirnum;
|
||||
} icilist;
|
||||
|
||||
/*open*/
|
||||
typedef struct
|
||||
{ flag oerr;
|
||||
ftnint ounit;
|
||||
char *ofnm;
|
||||
ftnlen ofnmlen;
|
||||
char *osta;
|
||||
char *oacc;
|
||||
char *ofm;
|
||||
ftnint orl;
|
||||
char *oblnk;
|
||||
} olist;
|
||||
|
||||
/*close*/
|
||||
typedef struct
|
||||
{ flag cerr;
|
||||
ftnint cunit;
|
||||
char *csta;
|
||||
} cllist;
|
||||
|
||||
/*rewind, backspace, endfile*/
|
||||
typedef struct
|
||||
{ flag aerr;
|
||||
ftnint aunit;
|
||||
} alist;
|
||||
|
||||
/* inquire */
|
||||
typedef struct
|
||||
{ flag inerr;
|
||||
ftnint inunit;
|
||||
char *infile;
|
||||
ftnlen infilen;
|
||||
ftnint *inex; /*parameters in standard's order*/
|
||||
ftnint *inopen;
|
||||
ftnint *innum;
|
||||
ftnint *innamed;
|
||||
char *inname;
|
||||
ftnlen innamlen;
|
||||
char *inacc;
|
||||
ftnlen inacclen;
|
||||
char *inseq;
|
||||
ftnlen inseqlen;
|
||||
char *indir;
|
||||
ftnlen indirlen;
|
||||
char *infmt;
|
||||
ftnlen infmtlen;
|
||||
char *inform;
|
||||
ftnint informlen;
|
||||
char *inunf;
|
||||
ftnlen inunflen;
|
||||
ftnint *inrecl;
|
||||
ftnint *innrec;
|
||||
char *inblank;
|
||||
ftnlen inblanklen;
|
||||
} inlist;
|
||||
|
||||
#define VOID void
|
||||
|
||||
union Multitype { /* for multiple entry points */
|
||||
integer1 g;
|
||||
shortint h;
|
||||
integer i;
|
||||
/* longint j; */
|
||||
real r;
|
||||
doublereal d;
|
||||
complex c;
|
||||
doublecomplex z;
|
||||
};
|
||||
|
||||
typedef union Multitype Multitype;
|
||||
|
||||
/*typedef long int Long;*/ /* No longer used; formerly in Namelist */
|
||||
|
||||
struct Vardesc { /* for Namelist */
|
||||
char *name;
|
||||
char *addr;
|
||||
ftnlen *dims;
|
||||
int type;
|
||||
};
|
||||
typedef struct Vardesc Vardesc;
|
||||
|
||||
struct Namelist {
|
||||
char *name;
|
||||
Vardesc **vars;
|
||||
int nvars;
|
||||
};
|
||||
typedef struct Namelist Namelist;
|
||||
|
||||
#define abs(x) ((x) >= 0 ? (x) : -(x))
|
||||
#define dabs(x) (doublereal)abs(x)
|
||||
#define min(a,b) ((a) <= (b) ? (a) : (b))
|
||||
#define max(a,b) ((a) >= (b) ? (a) : (b))
|
||||
#define dmin(a,b) (doublereal)min(a,b)
|
||||
#define dmax(a,b) (doublereal)max(a,b)
|
||||
#define bit_test(a,b) ((a) >> (b) & 1)
|
||||
#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b)))
|
||||
#define bit_set(a,b) ((a) | ((uinteger)1 << (b)))
|
||||
|
||||
/* procedure parameter types for -A and -C++ */
|
||||
|
||||
#define F2C_proc_par_types 1
|
||||
#ifdef __cplusplus
|
||||
typedef int /* Unknown procedure type */ (*U_fp)(...);
|
||||
typedef shortint (*J_fp)(...);
|
||||
typedef integer (*I_fp)(...);
|
||||
typedef real (*R_fp)(...);
|
||||
typedef doublereal (*D_fp)(...), (*E_fp)(...);
|
||||
typedef /* Complex */ VOID (*C_fp)(...);
|
||||
typedef /* Double Complex */ VOID (*Z_fp)(...);
|
||||
typedef logical (*L_fp)(...);
|
||||
typedef shortlogical (*K_fp)(...);
|
||||
typedef /* Character */ VOID (*H_fp)(...);
|
||||
typedef /* Subroutine */ int (*S_fp)(...);
|
||||
#else
|
||||
typedef int /* Unknown procedure type */ (*U_fp)();
|
||||
typedef shortint (*J_fp)();
|
||||
typedef integer (*I_fp)();
|
||||
typedef real (*R_fp)();
|
||||
typedef doublereal (*D_fp)(), (*E_fp)();
|
||||
typedef /* Complex */ VOID (*C_fp)();
|
||||
typedef /* Double Complex */ VOID (*Z_fp)();
|
||||
typedef logical (*L_fp)();
|
||||
typedef shortlogical (*K_fp)();
|
||||
typedef /* Character */ VOID (*H_fp)();
|
||||
typedef /* Subroutine */ int (*S_fp)();
|
||||
#endif
|
||||
/* E_fp is for real functions when -R is not specified */
|
||||
typedef VOID C_f; /* complex function */
|
||||
typedef VOID H_f; /* character function */
|
||||
typedef VOID Z_f; /* double complex function */
|
||||
typedef doublereal E_f; /* real function with -R not specified */
|
||||
|
||||
/* undef any lower-case symbols that your C compiler predefines, e.g.: */
|
||||
|
||||
#ifndef Skip_f2c_Undefs
|
||||
#undef cray
|
||||
#undef gcos
|
||||
#undef mc68010
|
||||
#undef mc68020
|
||||
#undef mips
|
||||
#undef pdp11
|
||||
#undef sgi
|
||||
#undef sparc
|
||||
#undef sun
|
||||
#undef sun2
|
||||
#undef sun3
|
||||
#undef sun4
|
||||
#undef u370
|
||||
#undef u3b
|
||||
#undef u3b2
|
||||
#undef u3b5
|
||||
#undef unix
|
||||
#undef vax
|
||||
#endif
|
||||
#endif
|
|
@ -0,0 +1,80 @@
|
|||
#ifndef LAPACK_H
|
||||
#define LAPACK_H
|
||||
|
||||
extern int LAPACK(lsame)(const char *, const char *);
|
||||
extern int LAPACK(xerbla)(const char *, const int *);
|
||||
|
||||
extern void LAPACK(slaswp)(const int *, float *, const int *, const int *, const int *, const int *, const int *);
|
||||
extern void LAPACK(dlaswp)(const int *, double *, const int *, const int *, const int *, const int *, const int *);
|
||||
extern void LAPACK(claswp)(const int *, float *, const int *, const int *, const int *, const int *, const int *);
|
||||
extern void LAPACK(zlaswp)(const int *, double *, const int *, const int *, const int *, const int *, const int *);
|
||||
|
||||
extern void LAPACK(slaset)(const char *, const int *, const int *, const float *, const float *, float *, const int *);
|
||||
extern void LAPACK(dlaset)(const char *, const int *, const int *, const double *, const double *, double *, const int *);
|
||||
extern void LAPACK(claset)(const char *, const int *, const int *, const float *, const float *, float *, const int *);
|
||||
extern void LAPACK(zlaset)(const char *, const int *, const int *, const double *, const double *, double *, const int *);
|
||||
|
||||
extern void LAPACK(slacpy)(const char *, const int *, const int *, const float *, const int *, float *, const int *);
|
||||
extern void LAPACK(dlacpy)(const char *, const int *, const int *, const double *, const int *, double *, const int *);
|
||||
extern void LAPACK(clacpy)(const char *, const int *, const int *, const float *, const int *, float *, const int *);
|
||||
extern void LAPACK(zlacpy)(const char *, const int *, const int *, const double *, const int *, double *, const int *);
|
||||
|
||||
extern void LAPACK(slascl)(const char *, const int *, const int *, const float *, const float *, const int *, const int *, float *, const int *, int *);
|
||||
extern void LAPACK(dlascl)(const char *, const int *, const int *, const double *, const double *, const int *, const int *, double *, const int *, int *);
|
||||
extern void LAPACK(clascl)(const char *, const int *, const int *, const float *, const float *, const int *, const int *, float *, const int *, int *);
|
||||
extern void LAPACK(zlascl)(const char *, const int *, const int *, const double *, const double *, const int *, const int *, double *, const int *, int *);
|
||||
|
||||
extern void LAPACK(slauu2)(const char *, const int *, float *, const int *, int *);
|
||||
extern void LAPACK(dlauu2)(const char *, const int *, double *, const int *, int *);
|
||||
extern void LAPACK(clauu2)(const char *, const int *, float *, const int *, int *);
|
||||
extern void LAPACK(zlauu2)(const char *, const int *, double *, const int *, int *);
|
||||
|
||||
extern void LAPACK(ssygs2)(const int *, const char *, const int *, float *, const int *, const float *, const int *, int *);
|
||||
extern void LAPACK(dsygs2)(const int *, const char *, const int *, double *, const int *, const double *, const int *, int *);
|
||||
extern void LAPACK(chegs2)(const int *, const char *, const int *, float *, const int *, const float *, const int *, int *);
|
||||
extern void LAPACK(zhegs2)(const int *, const char *, const int *, double *, const int *, const double *, const int *, int *);
|
||||
|
||||
extern void LAPACK(strti2)(const char *, const char *, const int *, float *, const int *, int *);
|
||||
extern void LAPACK(dtrti2)(const char *, const char *, const int *, double *, const int *, int *);
|
||||
extern void LAPACK(ctrti2)(const char *, const char *, const int *, float *, const int *, int *);
|
||||
extern void LAPACK(ztrti2)(const char *, const char *, const int *, double *, const int *, int *);
|
||||
|
||||
extern void LAPACK(spotf2)(const char *, const int *, float *, const int *, int *);
|
||||
extern void LAPACK(dpotf2)(const char *, const int *, double *, const int *, int *);
|
||||
extern void LAPACK(cpotf2)(const char *, const int *, float *, const int *, int *);
|
||||
extern void LAPACK(zpotf2)(const char *, const int *, double *, const int *, int *);
|
||||
|
||||
extern void LAPACK(spbtf2)(const char *, const int *, const int *, float *, const int *, int *);
|
||||
extern void LAPACK(dpbtf2)(const char *, const int *, const int *, double *, const int *, int *);
|
||||
extern void LAPACK(cpbtf2)(const char *, const int *, const int *, float *, const int *, int *);
|
||||
extern void LAPACK(zpbtf2)(const char *, const int *, const int *, double *, const int *, int *);
|
||||
|
||||
extern void LAPACK(ssytf2)(const char *, const int *, float *, const int *, int *, int *);
|
||||
extern void LAPACK(dsytf2)(const char *, const int *, double *, const int *, int *, int *);
|
||||
extern void LAPACK(csytf2)(const char *, const int *, float *, const int *, int *, int *);
|
||||
extern void LAPACK(chetf2)(const char *, const int *, float *, const int *, int *, int *);
|
||||
extern void LAPACK(zsytf2)(const char *, const int *, double *, const int *, int *, int *);
|
||||
extern void LAPACK(zhetf2)(const char *, const int *, double *, const int *, int *, int *);
|
||||
extern void LAPACK(ssytf2_rook)(const char *, const int *, float *, const int *, int *, int *);
|
||||
extern void LAPACK(dsytf2_rook)(const char *, const int *, double *, const int *, int *, int *);
|
||||
extern void LAPACK(csytf2_rook)(const char *, const int *, float *, const int *, int *, int *);
|
||||
extern void LAPACK(chetf2_rook)(const char *, const int *, float *, const int *, int *, int *);
|
||||
extern void LAPACK(zsytf2_rook)(const char *, const int *, double *, const int *, int *, int *);
|
||||
extern void LAPACK(zhetf2_rook)(const char *, const int *, double *, const int *, int *, int *);
|
||||
|
||||
extern void LAPACK(sgetf2)(const int *, const int *, float *, const int *, int *, int *);
|
||||
extern void LAPACK(dgetf2)(const int *, const int *, double *, const int *, int *, int *);
|
||||
extern void LAPACK(cgetf2)(const int *, const int *, float *, const int *, int *, int *);
|
||||
extern void LAPACK(zgetf2)(const int *, const int *, double *, const int *, int *, int *);
|
||||
|
||||
extern void LAPACK(sgbtf2)(const int *, const int *, const int *, const int *, float *, const int *, int *, int *);
|
||||
extern void LAPACK(dgbtf2)(const int *, const int *, const int *, const int *, double *, const int *, int *, int *);
|
||||
extern void LAPACK(cgbtf2)(const int *, const int *, const int *, const int *, float *, const int *, int *, int *);
|
||||
extern void LAPACK(zgbtf2)(const int *, const int *, const int *, const int *, double *, const int *, int *, int *);
|
||||
|
||||
extern void LAPACK(stgsy2)(const char *, const int *, const int *, const int *, const float *, const int *, const float *, const int *, float *, const int *, const float *, const int *, const float *, const int *, float *, const int *, float *, float *, float *, int *, int *, int *);
|
||||
extern void LAPACK(dtgsy2)(const char *, const int *, const int *, const int *, const double *, const int *, const double *, const int *, double *, const int *, const double *, const int *, const double *, const int *, double *, const int *, double *, double *, double *, int *, int *, int *);
|
||||
extern void LAPACK(ctgsy2)(const char *, const int *, const int *, const int *, const float *, const int *, const float *, const int *, float *, const int *, const float *, const int *, const float *, const int *, float *, const int *, float *, float *, float *, int *);
|
||||
extern void LAPACK(ztgsy2)(const char *, const int *, const int *, const int *, const double *, const int *, const double *, const int *, double *, const int *, const double *, const int *, const double *, const int *, double *, const int *, double *, double *, double *, int *);
|
||||
|
||||
#endif /* LAPACK_H */
|
|
@ -0,0 +1,607 @@
|
|||
#include "relapack.h"
|
||||
|
||||
////////////
|
||||
// XLAUUM //
|
||||
////////////
|
||||
|
||||
#if INCLUDE_SLAUUM
|
||||
void LAPACK(slauum)(
|
||||
const char *uplo, const int *n,
|
||||
float *A, const int *ldA,
|
||||
int *info
|
||||
) {
|
||||
RELAPACK_slauum(uplo, n, A, ldA, info);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if INCLUDE_DLAUUM
|
||||
void LAPACK(dlauum)(
|
||||
const char *uplo, const int *n,
|
||||
double *A, const int *ldA,
|
||||
int *info
|
||||
) {
|
||||
RELAPACK_dlauum(uplo, n, A, ldA, info);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if INCLUDE_CLAUUM
|
||||
void LAPACK(clauum)(
|
||||
const char *uplo, const int *n,
|
||||
float *A, const int *ldA,
|
||||
int *info
|
||||
) {
|
||||
RELAPACK_clauum(uplo, n, A, ldA, info);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if INCLUDE_ZLAUUM
|
||||
void LAPACK(zlauum)(
|
||||
const char *uplo, const int *n,
|
||||
double *A, const int *ldA,
|
||||
int *info
|
||||
) {
|
||||
RELAPACK_zlauum(uplo, n, A, ldA, info);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
////////////
|
||||
// XSYGST //
|
||||
////////////
|
||||
|
||||
#if INCLUDE_SSYGST
|
||||
void LAPACK(ssygst)(
|
||||
const int *itype, const char *uplo, const int *n,
|
||||
float *A, const int *ldA, const float *B, const int *ldB,
|
||||
int *info
|
||||
) {
|
||||
RELAPACK_ssygst(itype, uplo, n, A, ldA, B, ldB, info);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if INCLUDE_DSYGST
|
||||
void LAPACK(dsygst)(
|
||||
const int *itype, const char *uplo, const int *n,
|
||||
double *A, const int *ldA, const double *B, const int *ldB,
|
||||
int *info
|
||||
) {
|
||||
RELAPACK_dsygst(itype, uplo, n, A, ldA, B, ldB, info);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if INCLUDE_CHEGST
|
||||
void LAPACK(chegst)(
|
||||
const int *itype, const char *uplo, const int *n,
|
||||
float *A, const int *ldA, const float *B, const int *ldB,
|
||||
int *info
|
||||
) {
|
||||
RELAPACK_chegst(itype, uplo, n, A, ldA, B, ldB, info);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if INCLUDE_ZHEGST
|
||||
void LAPACK(zhegst)(
|
||||
const int *itype, const char *uplo, const int *n,
|
||||
double *A, const int *ldA, const double *B, const int *ldB,
|
||||
int *info
|
||||
) {
|
||||
RELAPACK_zhegst(itype, uplo, n, A, ldA, B, ldB, info);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
////////////
|
||||
// XTRTRI //
|
||||
////////////
|
||||
|
||||
#if INCLUDE_STRTRI
|
||||
void LAPACK(strtri)(
|
||||
const char *uplo, const char *diag, const int *n,
|
||||
float *A, const int *ldA,
|
||||
int *info
|
||||
) {
|
||||
RELAPACK_strtri(uplo, diag, n, A, ldA, info);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if INCLUDE_DTRTRI
|
||||
void LAPACK(dtrtri)(
|
||||
const char *uplo, const char *diag, const int *n,
|
||||
double *A, const int *ldA,
|
||||
int *info
|
||||
) {
|
||||
RELAPACK_dtrtri(uplo, diag, n, A, ldA, info);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if INCLUDE_CTRTRI
|
||||
void LAPACK(ctrtri)(
|
||||
const char *uplo, const char *diag, const int *n,
|
||||
float *A, const int *ldA,
|
||||
int *info
|
||||
) {
|
||||
RELAPACK_ctrtri(uplo, diag, n, A, ldA, info);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if INCLUDE_ZTRTRI
|
||||
void LAPACK(ztrtri)(
|
||||
const char *uplo, const char *diag, const int *n,
|
||||
double *A, const int *ldA,
|
||||
int *info
|
||||
) {
|
||||
RELAPACK_ztrtri(uplo, diag, n, A, ldA, info);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
////////////
|
||||
// XPOTRF //
|
||||
////////////
|
||||
|
||||
#if INCLUDE_SPOTRF
|
||||
void LAPACK(spotrf)(
|
||||
const char *uplo, const int *n,
|
||||
float *A, const int *ldA,
|
||||
int *info
|
||||
) {
|
||||
RELAPACK_spotrf(uplo, n, A, ldA, info);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if INCLUDE_DPOTRF
|
||||
void LAPACK(dpotrf)(
|
||||
const char *uplo, const int *n,
|
||||
double *A, const int *ldA,
|
||||
int *info
|
||||
) {
|
||||
RELAPACK_dpotrf(uplo, n, A, ldA, info);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if INCLUDE_CPOTRF
|
||||
void LAPACK(cpotrf)(
|
||||
const char *uplo, const int *n,
|
||||
float *A, const int *ldA,
|
||||
int *info
|
||||
) {
|
||||
RELAPACK_cpotrf(uplo, n, A, ldA, info);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if INCLUDE_ZPOTRF
|
||||
void LAPACK(zpotrf)(
|
||||
const char *uplo, const int *n,
|
||||
double *A, const int *ldA,
|
||||
int *info
|
||||
) {
|
||||
RELAPACK_zpotrf(uplo, n, A, ldA, info);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
////////////
|
||||
// XPBTRF //
|
||||
////////////
|
||||
|
||||
#if INCLUDE_SPBTRF
|
||||
void LAPACK(spbtrf)(
|
||||
const char *uplo, const int *n, const int *kd,
|
||||
float *Ab, const int *ldAb,
|
||||
int *info
|
||||
) {
|
||||
RELAPACK_spbtrf(uplo, n, kd, Ab, ldAb, info);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if INCLUDE_DPBTRF
|
||||
void LAPACK(dpbtrf)(
|
||||
const char *uplo, const int *n, const int *kd,
|
||||
double *Ab, const int *ldAb,
|
||||
int *info
|
||||
) {
|
||||
RELAPACK_dpbtrf(uplo, n, kd, Ab, ldAb, info);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if INCLUDE_CPBTRF
|
||||
void LAPACK(cpbtrf)(
|
||||
const char *uplo, const int *n, const int *kd,
|
||||
float *Ab, const int *ldAb,
|
||||
int *info
|
||||
) {
|
||||
RELAPACK_cpbtrf(uplo, n, kd, Ab, ldAb, info);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if INCLUDE_ZPBTRF
|
||||
void LAPACK(zpbtrf)(
|
||||
const char *uplo, const int *n, const int *kd,
|
||||
double *Ab, const int *ldAb,
|
||||
int *info
|
||||
) {
|
||||
RELAPACK_zpbtrf(uplo, n, kd, Ab, ldAb, info);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
////////////
|
||||
// XSYTRF //
|
||||
////////////
|
||||
|
||||
#if INCLUDE_SSYTRF
|
||||
void LAPACK(ssytrf)(
|
||||
const char *uplo, const int *n,
|
||||
float *A, const int *ldA, int *ipiv,
|
||||
float *Work, const int *lWork, int *info
|
||||
) {
|
||||
RELAPACK_ssytrf(uplo, n, A, ldA, ipiv, Work, lWork, info);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if INCLUDE_DSYTRF
|
||||
void LAPACK(dsytrf)(
|
||||
const char *uplo, const int *n,
|
||||
double *A, const int *ldA, int *ipiv,
|
||||
double *Work, const int *lWork, int *info
|
||||
) {
|
||||
RELAPACK_dsytrf(uplo, n, A, ldA, ipiv, Work, lWork, info);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if INCLUDE_CSYTRF
|
||||
void LAPACK(csytrf)(
|
||||
const char *uplo, const int *n,
|
||||
float *A, const int *ldA, int *ipiv,
|
||||
float *Work, const int *lWork, int *info
|
||||
) {
|
||||
RELAPACK_csytrf(uplo, n, A, ldA, ipiv, Work, lWork, info);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if INCLUDE_ZSYTRF
|
||||
void LAPACK(zsytrf)(
|
||||
const char *uplo, const int *n,
|
||||
double *A, const int *ldA, int *ipiv,
|
||||
double *Work, const int *lWork, int *info
|
||||
) {
|
||||
RELAPACK_zsytrf(uplo, n, A, ldA, ipiv, Work, lWork, info);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if INCLUDE_CHETRF
|
||||
void LAPACK(chetrf)(
|
||||
const char *uplo, const int *n,
|
||||
float *A, const int *ldA, int *ipiv,
|
||||
float *Work, const int *lWork, int *info
|
||||
) {
|
||||
RELAPACK_chetrf(uplo, n, A, ldA, ipiv, Work, lWork, info);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if INCLUDE_ZHETRF
|
||||
void LAPACK(zhetrf)(
|
||||
const char *uplo, const int *n,
|
||||
double *A, const int *ldA, int *ipiv,
|
||||
double *Work, const int *lWork, int *info
|
||||
) {
|
||||
RELAPACK_zhetrf(uplo, n, A, ldA, ipiv, Work, lWork, info);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if INCLUDE_SSYTRF_ROOK
|
||||
void LAPACK(ssytrf_rook)(
|
||||
const char *uplo, const int *n,
|
||||
float *A, const int *ldA, int *ipiv,
|
||||
float *Work, const int *lWork, int *info
|
||||
) {
|
||||
RELAPACK_ssytrf_rook(uplo, n, A, ldA, ipiv, Work, lWork, info);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if INCLUDE_DSYTRF_ROOK
|
||||
void LAPACK(dsytrf_rook)(
|
||||
const char *uplo, const int *n,
|
||||
double *A, const int *ldA, int *ipiv,
|
||||
double *Work, const int *lWork, int *info
|
||||
) {
|
||||
RELAPACK_dsytrf_rook(uplo, n, A, ldA, ipiv, Work, lWork, info);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if INCLUDE_CSYTRF_ROOK
|
||||
void LAPACK(csytrf_rook)(
|
||||
const char *uplo, const int *n,
|
||||
float *A, const int *ldA, int *ipiv,
|
||||
float *Work, const int *lWork, int *info
|
||||
) {
|
||||
RELAPACK_csytrf_rook(uplo, n, A, ldA, ipiv, Work, lWork, info);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if INCLUDE_ZSYTRF_ROOK
|
||||
void LAPACK(zsytrf_rook)(
|
||||
const char *uplo, const int *n,
|
||||
double *A, const int *ldA, int *ipiv,
|
||||
double *Work, const int *lWork, int *info
|
||||
) {
|
||||
RELAPACK_zsytrf_rook(uplo, n, A, ldA, ipiv, Work, lWork, info);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if INCLUDE_CHETRF_ROOK
|
||||
void LAPACK(chetrf_rook)(
|
||||
const char *uplo, const int *n,
|
||||
float *A, const int *ldA, int *ipiv,
|
||||
float *Work, const int *lWork, int *info
|
||||
) {
|
||||
RELAPACK_chetrf_rook(uplo, n, A, ldA, ipiv, Work, lWork, info);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if INCLUDE_ZHETRF_ROOK
|
||||
void LAPACK(zhetrf_rook)(
|
||||
const char *uplo, const int *n,
|
||||
double *A, const int *ldA, int *ipiv,
|
||||
double *Work, const int *lWork, int *info
|
||||
) {
|
||||
RELAPACK_zhetrf_rook(uplo, n, A, ldA, ipiv, Work, lWork, info);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
////////////
|
||||
// XGETRF //
|
||||
////////////
|
||||
|
||||
#if INCLUDE_SGETRF
|
||||
void LAPACK(sgetrf)(
|
||||
const int *m, const int *n,
|
||||
float *A, const int *ldA, int *ipiv,
|
||||
int *info
|
||||
) {
|
||||
RELAPACK_sgetrf(m, n, A, ldA, ipiv, info);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if INCLUDE_DGETRF
|
||||
void LAPACK(dgetrf)(
|
||||
const int *m, const int *n,
|
||||
double *A, const int *ldA, int *ipiv,
|
||||
int *info
|
||||
) {
|
||||
RELAPACK_dgetrf(m, n, A, ldA, ipiv, info);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if INCLUDE_CGETRF
|
||||
void LAPACK(cgetrf)(
|
||||
const int *m, const int *n,
|
||||
float *A, const int *ldA, int *ipiv,
|
||||
int *info
|
||||
) {
|
||||
RELAPACK_cgetrf(m, n, A, ldA, ipiv, info);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if INCLUDE_ZGETRF
|
||||
void LAPACK(zgetrf)(
|
||||
const int *m, const int *n,
|
||||
double *A, const int *ldA, int *ipiv,
|
||||
int *info
|
||||
) {
|
||||
RELAPACK_zgetrf(m, n, A, ldA, ipiv, info);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
////////////
|
||||
// XGBTRF //
|
||||
////////////
|
||||
|
||||
#if INCLUDE_SGBTRF
|
||||
void LAPACK(sgbtrf)(
|
||||
const int *m, const int *n, const int *kl, const int *ku,
|
||||
float *Ab, const int *ldAb, int *ipiv,
|
||||
int *info
|
||||
) {
|
||||
RELAPACK_sgbtrf(m, n, kl, ku, Ab, ldAb, ipiv, info);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if INCLUDE_DGBTRF
|
||||
void LAPACK(dgbtrf)(
|
||||
const int *m, const int *n, const int *kl, const int *ku,
|
||||
double *Ab, const int *ldAb, int *ipiv,
|
||||
int *info
|
||||
) {
|
||||
RELAPACK_dgbtrf(m, n, kl, ku, Ab, ldAb, ipiv, info);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if INCLUDE_CGBTRF
|
||||
void LAPACK(cgbtrf)(
|
||||
const int *m, const int *n, const int *kl, const int *ku,
|
||||
float *Ab, const int *ldAb, int *ipiv,
|
||||
int *info
|
||||
) {
|
||||
RELAPACK_cgbtrf(m, n, kl, ku, Ab, ldAb, ipiv, info);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if INCLUDE_ZGBTRF
|
||||
void LAPACK(zgbtrf)(
|
||||
const int *m, const int *n, const int *kl, const int *ku,
|
||||
double *Ab, const int *ldAb, int *ipiv,
|
||||
int *info
|
||||
) {
|
||||
RELAPACK_zgbtrf(m, n, kl, ku, Ab, ldAb, ipiv, info);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
////////////
|
||||
// XTRSYL //
|
||||
////////////
|
||||
|
||||
#if INCLUDE_STRSYL
|
||||
void LAPACK(strsyl)(
|
||||
const char *tranA, const char *tranB, const int *isgn,
|
||||
const int *m, const int *n,
|
||||
const float *A, const int *ldA, const float *B, const int *ldB,
|
||||
float *C, const int *ldC, float *scale,
|
||||
int *info
|
||||
) {
|
||||
RELAPACK_strsyl(tranA, tranB, isgn, m, n, A, ldA, B, ldB, C, ldC, scale, info);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if INCLUDE_DTRSYL
|
||||
void LAPACK(dtrsyl)(
|
||||
const char *tranA, const char *tranB, const int *isgn,
|
||||
const int *m, const int *n,
|
||||
const double *A, const int *ldA, const double *B, const int *ldB,
|
||||
double *C, const int *ldC, double *scale,
|
||||
int *info
|
||||
) {
|
||||
RELAPACK_dtrsyl(tranA, tranB, isgn, m, n, A, ldA, B, ldB, C, ldC, scale, info);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if INCLUDE_CTRSYL
|
||||
void LAPACK(ctrsyl)(
|
||||
const char *tranA, const char *tranB, const int *isgn,
|
||||
const int *m, const int *n,
|
||||
const float *A, const int *ldA, const float *B, const int *ldB,
|
||||
float *C, const int *ldC, float *scale,
|
||||
int *info
|
||||
) {
|
||||
RELAPACK_ctrsyl(tranA, tranB, isgn, m, n, A, ldA, B, ldB, C, ldC, scale, info);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if INCLUDE_ZTRSYL
|
||||
void LAPACK(ztrsyl)(
|
||||
const char *tranA, const char *tranB, const int *isgn,
|
||||
const int *m, const int *n,
|
||||
const double *A, const int *ldA, const double *B, const int *ldB,
|
||||
double *C, const int *ldC, double *scale,
|
||||
int *info
|
||||
) {
|
||||
RELAPACK_ztrsyl(tranA, tranB, isgn, m, n, A, ldA, B, ldB, C, ldC, scale, info);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
////////////
|
||||
// XTGSYL //
|
||||
////////////
|
||||
|
||||
#if INCLUDE_STGSYL
|
||||
void LAPACK(stgsyl)(
|
||||
const char *trans, const int *ijob, const int *m, const int *n,
|
||||
const float *A, const int *ldA, const float *B, const int *ldB,
|
||||
float *C, const int *ldC,
|
||||
const float *D, const int *ldD, const float *E, const int *ldE,
|
||||
float *F, const int *ldF,
|
||||
float *scale, float *dif,
|
||||
float *Work, const int *lWork, int *iWork, int *info
|
||||
) {
|
||||
RELAPACK_stgsyl(trans, ijob, m, n, A, ldA, B, ldB, C, ldC, D, ldD, E, ldE, F, ldF, scale, dif, Work, lWork, iWork, info);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if INCLUDE_DTGSYL
|
||||
void LAPACK(dtgsyl)(
|
||||
const char *trans, const int *ijob, const int *m, const int *n,
|
||||
const double *A, const int *ldA, const double *B, const int *ldB,
|
||||
double *C, const int *ldC,
|
||||
const double *D, const int *ldD, const double *E, const int *ldE,
|
||||
double *F, const int *ldF,
|
||||
double *scale, double *dif,
|
||||
double *Work, const int *lWork, int *iWork, int *info
|
||||
) {
|
||||
RELAPACK_dtgsyl(trans, ijob, m, n, A, ldA, B, ldB, C, ldC, D, ldD, E, ldE, F, ldF, scale, dif, Work, lWork, iWork, info);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if INCLUDE_CTGSYL
|
||||
void LAPACK(ctgsyl)(
|
||||
const char *trans, const int *ijob, const int *m, const int *n,
|
||||
const float *A, const int *ldA, const float *B, const int *ldB,
|
||||
float *C, const int *ldC,
|
||||
const float *D, const int *ldD, const float *E, const int *ldE,
|
||||
float *F, const int *ldF,
|
||||
float *scale, float *dif,
|
||||
float *Work, const int *lWork, int *iWork, int *info
|
||||
) {
|
||||
RELAPACK_ctgsyl(trans, ijob, m, n, A, ldA, B, ldB, C, ldC, D, ldD, E, ldE, F, ldF, scale, dif, Work, lWork, iWork, info);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if INCLUDE_ZTGSYL
|
||||
void LAPACK(ztgsyl)(
|
||||
const char *trans, const int *ijob, const int *m, const int *n,
|
||||
const double *A, const int *ldA, const double *B, const int *ldB,
|
||||
double *C, const int *ldC,
|
||||
const double *D, const int *ldD, const double *E, const int *ldE,
|
||||
double *F, const int *ldF,
|
||||
double *scale, double *dif,
|
||||
double *Work, const int *lWork, int *iWork, int *info
|
||||
) {
|
||||
RELAPACK_ztgsyl(trans, ijob, m, n, A, ldA, B, ldB, C, ldC, D, ldD, E, ldE, F, ldF, scale, dif, Work, lWork, iWork, info);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
////////////
|
||||
// XGEMMT //
|
||||
////////////
|
||||
|
||||
#if INCLUDE_SGEMMT
|
||||
void LAPACK(sgemmt)(
|
||||
const char *uplo, const char *transA, const char *transB,
|
||||
const int *n, const int *k,
|
||||
const float *alpha, const float *A, const int *ldA,
|
||||
const float *B, const int *ldB,
|
||||
const float *beta, float *C, const int *ldC
|
||||
) {
|
||||
RELAPACK_sgemmt(uplo, n, A, ldA, info);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if INCLUDE_DGEMMT
|
||||
void LAPACK(dgemmt)(
|
||||
const char *uplo, const char *transA, const char *transB,
|
||||
const int *n, const int *k,
|
||||
const double *alpha, const double *A, const int *ldA,
|
||||
const double *B, const int *ldB,
|
||||
const double *beta, double *C, const int *ldC
|
||||
) {
|
||||
RELAPACK_dgemmt(uplo, n, A, ldA, info);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if INCLUDE_CGEMMT
|
||||
void LAPACK(cgemmt)(
|
||||
const char *uplo, const char *transA, const char *transB,
|
||||
const int *n, const int *k,
|
||||
const float *alpha, const float *A, const int *ldA,
|
||||
const float *B, const int *ldB,
|
||||
const float *beta, float *C, const int *ldC
|
||||
) {
|
||||
RELAPACK_cgemmt(uplo, n, A, ldA, info);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if INCLUDE_ZGEMMT
|
||||
void LAPACK(zgemmt)(
|
||||
const char *uplo, const char *transA, const char *transB,
|
||||
const int *n, const int *k,
|
||||
const double *alpha, const double *A, const int *ldA,
|
||||
const double *B, const int *ldB,
|
||||
const double *beta, double *C, const int *ldC
|
||||
) {
|
||||
RELAPACK_zgemmt(uplo, n, A, ldA, info);
|
||||
}
|
||||
#endif
|
|
@ -0,0 +1,607 @@
|
|||
#include "relapack.h"
|
||||
|
||||
////////////
|
||||
// XLAUUM //
|
||||
////////////
|
||||
|
||||
#if INCLUDE_SLAUUM
|
||||
void LAPACK(slauum)(
|
||||
const char *uplo, const int *n,
|
||||
float *A, const int *ldA,
|
||||
int *info
|
||||
) {
|
||||
RELAPACK_slauum(uplo, n, A, ldA, info);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if INCLUDE_DLAUUM
|
||||
void LAPACK(dlauum)(
|
||||
const char *uplo, const int *n,
|
||||
double *A, const int *ldA,
|
||||
int *info
|
||||
) {
|
||||
RELAPACK_dlauum(uplo, n, A, ldA, info);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if INCLUDE_CLAUUM
|
||||
void LAPACK(clauum)(
|
||||
const char *uplo, const int *n,
|
||||
float *A, const int *ldA,
|
||||
int *info
|
||||
) {
|
||||
RELAPACK_clauum(uplo, n, A, ldA, info);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if INCLUDE_ZLAUUM
|
||||
void LAPACK(zlauum)(
|
||||
const char *uplo, const int *n,
|
||||
double *A, const int *ldA,
|
||||
int *info
|
||||
) {
|
||||
RELAPACK_zlauum(uplo, n, A, ldA, info);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
////////////
|
||||
// XSYGST //
|
||||
////////////
|
||||
|
||||
#if INCLUDE_SSYGST
|
||||
void LAPACK(ssygst)(
|
||||
const int *itype, const char *uplo, const int *n,
|
||||
float *A, const int *ldA, const float *B, const int *ldB,
|
||||
int *info
|
||||
) {
|
||||
RELAPACK_ssygst(itype, uplo, n, A, ldA, B, ldB, info);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if INCLUDE_DSYGST
|
||||
void LAPACK(dsygst)(
|
||||
const int *itype, const char *uplo, const int *n,
|
||||
double *A, const int *ldA, const double *B, const int *ldB,
|
||||
int *info
|
||||
) {
|
||||
RELAPACK_dsygst(itype, uplo, n, A, ldA, B, ldB, info);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if INCLUDE_CSYGST
|
||||
void LAPACK(csygst)(
|
||||
const int *itype, const char *uplo, const int *n,
|
||||
float *A, const int *ldA, const float *B, const int *ldB,
|
||||
int *info
|
||||
) {
|
||||
RELAPACK_csygst(itype, uplo, n, A, ldA, B, ldB, info);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if INCLUDE_ZSYGST
|
||||
void LAPACK(zsygst)(
|
||||
const int *itype, const char *uplo, const int *n,
|
||||
double *A, const int *ldA, const double *B, const int *ldB,
|
||||
int *info
|
||||
) {
|
||||
RELAPACK_zsygst(itype, uplo, n, A, ldA, B, ldB, info);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
////////////
|
||||
// XTRTRI //
|
||||
////////////
|
||||
|
||||
#if INCLUDE_STRTRI
|
||||
void LAPACK(strtri)(
|
||||
const char *uplo, const char *diag, const int *n,
|
||||
float *A, const int *ldA,
|
||||
int *info
|
||||
) {
|
||||
RELAPACK_strtri(uplo, diag, n, A, ldA, info);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if INCLUDE_DTRTRI
|
||||
void LAPACK(dtrtri)(
|
||||
const char *uplo, const char *diag, const int *n,
|
||||
double *A, const int *ldA,
|
||||
int *info
|
||||
) {
|
||||
RELAPACK_dtrtri(uplo, diag, n, A, ldA, info);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if INCLUDE_CTRTRI
|
||||
void LAPACK(ctrtri)(
|
||||
const char *uplo, const char *diag, const int *n,
|
||||
float *A, const int *ldA,
|
||||
int *info
|
||||
) {
|
||||
RELAPACK_ctrtri(uplo, diag, n, A, ldA, info);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if INCLUDE_ZTRTRI
|
||||
void LAPACK(ztrtri)(
|
||||
const char *uplo, const char *diag, const int *n,
|
||||
double *A, const int *ldA,
|
||||
int *info
|
||||
) {
|
||||
RELAPACK_ztrtri(uplo, diag, n, A, ldA, info);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
////////////
|
||||
// XPOTRF //
|
||||
////////////
|
||||
|
||||
#if INCLUDE_SPOTRF
|
||||
void LAPACK(spotrf)(
|
||||
const char *uplo, const int *n,
|
||||
float *A, const int *ldA,
|
||||
int *info
|
||||
) {
|
||||
RELAPACK_spotrf(uplo, n, A, ldA, info);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if INCLUDE_DPOTRF
|
||||
void LAPACK(dpotrf)(
|
||||
const char *uplo, const int *n,
|
||||
double *A, const int *ldA,
|
||||
int *info
|
||||
) {
|
||||
RELAPACK_dpotrf(uplo, n, A, ldA, info);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if INCLUDE_CPOTRF
|
||||
void LAPACK(cpotrf)(
|
||||
const char *uplo, const int *n,
|
||||
float *A, const int *ldA,
|
||||
int *info
|
||||
) {
|
||||
RELAPACK_cpotrf(uplo, n, A, ldA, info);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if INCLUDE_ZPOTRF
|
||||
void LAPACK(zpotrf)(
|
||||
const char *uplo, const int *n,
|
||||
double *A, const int *ldA,
|
||||
int *info
|
||||
) {
|
||||
RELAPACK_zpotrf(uplo, n, A, ldA, info);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
////////////
|
||||
// XPBTRF //
|
||||
////////////
|
||||
|
||||
#if INCLUDE_SPBTRF
|
||||
void LAPACK(spbtrf)(
|
||||
const char *uplo, const int *n, const int *kd,
|
||||
float *Ab, const int *ldAb,
|
||||
int *info
|
||||
) {
|
||||
RELAPACK_spbtrf(uplo, n, kd, Ab, ldAb, info);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if INCLUDE_DPBTRF
|
||||
void LAPACK(dpbtrf)(
|
||||
const char *uplo, const int *n, const int *kd,
|
||||
double *Ab, const int *ldAb,
|
||||
int *info
|
||||
) {
|
||||
RELAPACK_dpbtrf(uplo, n, kd, Ab, ldAb, info);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if INCLUDE_CPBTRF
|
||||
void LAPACK(cpbtrf)(
|
||||
const char *uplo, const int *n, const int *kd,
|
||||
float *Ab, const int *ldAb,
|
||||
int *info
|
||||
) {
|
||||
RELAPACK_cpbtrf(uplo, n, kd, Ab, ldAb, info);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if INCLUDE_ZPBTRF
|
||||
void LAPACK(zpbtrf)(
|
||||
const char *uplo, const int *n, const int *kd,
|
||||
double *Ab, const int *ldAb,
|
||||
int *info
|
||||
) {
|
||||
RELAPACK_zpbtrf(uplo, n, kd, Ab, ldAb, info);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
////////////
|
||||
// XSYTRF //
|
||||
////////////
|
||||
|
||||
#if INCLUDE_SSYTRF
|
||||
void LAPACK(ssytrf)(
|
||||
const char *uplo, const int *n,
|
||||
float *A, const int *ldA, int *ipiv,
|
||||
float *Work, const int *lWork, int *info
|
||||
) {
|
||||
RELAPACK_ssytrf(uplo, n, A, ldA, ipiv, Work, lWork, info);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if INCLUDE_DSYTRF
|
||||
void LAPACK(dsytrf)(
|
||||
const char *uplo, const int *n,
|
||||
double *A, const int *ldA, int *ipiv,
|
||||
double *Work, const int *lWork, int *info
|
||||
) {
|
||||
RELAPACK_dsytrf(uplo, n, A, ldA, ipiv, Work, lWork, info);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if INCLUDE_CSYTRF
|
||||
void LAPACK(csytrf)(
|
||||
const char *uplo, const int *n,
|
||||
float *A, const int *ldA, int *ipiv,
|
||||
float *Work, const int *lWork, int *info
|
||||
) {
|
||||
RELAPACK_csytrf(uplo, n, A, ldA, ipiv, Work, lWork, info);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if INCLUDE_ZSYTRF
|
||||
void LAPACK(zsytrf)(
|
||||
const char *uplo, const int *n,
|
||||
double *A, const int *ldA, int *ipiv,
|
||||
double *Work, const int *lWork, int *info
|
||||
) {
|
||||
RELAPACK_zsytrf(uplo, n, A, ldA, ipiv, Work, lWork, info);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if INCLUDE_CHETRF
|
||||
void LAPACK(chetrf)(
|
||||
const char *uplo, const int *n,
|
||||
float *A, const int *ldA, int *ipiv,
|
||||
float *Work, const int *lWork, int *info
|
||||
) {
|
||||
RELAPACK_chetrf(uplo, n, A, ldA, ipiv, Work, lWork, info);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if INCLUDE_ZHETRF
|
||||
void LAPACK(zhetrf)(
|
||||
const char *uplo, const int *n,
|
||||
double *A, const int *ldA, int *ipiv,
|
||||
double *Work, const int *lWork, int *info
|
||||
) {
|
||||
RELAPACK_zhetrf(uplo, n, A, ldA, ipiv, Work, lWork, info);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if INCLUDE_SSYTRF_ROOK
|
||||
void LAPACK(ssytrf_rook)(
|
||||
const char *uplo, const int *n,
|
||||
float *A, const int *ldA, int *ipiv,
|
||||
float *Work, const int *lWork, int *info
|
||||
) {
|
||||
RELAPACK_ssytrf_rook(uplo, n, A, ldA, ipiv, Work, lWork, info);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if INCLUDE_DSYTRF_ROOK
|
||||
void LAPACK(dsytrf_rook)(
|
||||
const char *uplo, const int *n,
|
||||
double *A, const int *ldA, int *ipiv,
|
||||
double *Work, const int *lWork, int *info
|
||||
) {
|
||||
RELAPACK_dsytrf_rook(uplo, n, A, ldA, ipiv, Work, lWork, info);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if INCLUDE_CSYTRF_ROOK
|
||||
void LAPACK(csytrf_rook)(
|
||||
const char *uplo, const int *n,
|
||||
float *A, const int *ldA, int *ipiv,
|
||||
float *Work, const int *lWork, int *info
|
||||
) {
|
||||
RELAPACK_csytrf_rook(uplo, n, A, ldA, ipiv, Work, lWork, info);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if INCLUDE_ZSYTRF_ROOK
|
||||
void LAPACK(zsytrf_rook)(
|
||||
const char *uplo, const int *n,
|
||||
double *A, const int *ldA, int *ipiv,
|
||||
double *Work, const int *lWork, int *info
|
||||
) {
|
||||
RELAPACK_zsytrf_rook(uplo, n, A, ldA, ipiv, Work, lWork, info);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if INCLUDE_CHETRF_ROOK
|
||||
void LAPACK(chetrf_rook)(
|
||||
const char *uplo, const int *n,
|
||||
float *A, const int *ldA, int *ipiv,
|
||||
float *Work, const int *lWork, int *info
|
||||
) {
|
||||
RELAPACK_chetrf_rook(uplo, n, A, ldA, ipiv, Work, lWork, info);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if INCLUDE_ZHETRF_ROOK
|
||||
void LAPACK(zhetrf_rook)(
|
||||
const char *uplo, const int *n,
|
||||
double *A, const int *ldA, int *ipiv,
|
||||
double *Work, const int *lWork, int *info
|
||||
) {
|
||||
RELAPACK_zhetrf_rook(uplo, n, A, ldA, ipiv, Work, lWork, info);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
////////////
|
||||
// XGETRF //
|
||||
////////////
|
||||
|
||||
#if INCLUDE_SGETRF
|
||||
void LAPACK(sgetrf)(
|
||||
const int *m, const int *n,
|
||||
float *A, const int *ldA, int *ipiv,
|
||||
int *info
|
||||
) {
|
||||
RELAPACK_sgetrf(m, n, A, ldA, ipiv, info);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if INCLUDE_DGETRF
|
||||
void LAPACK(dgetrf)(
|
||||
const int *m, const int *n,
|
||||
double *A, const int *ldA, int *ipiv,
|
||||
int *info
|
||||
) {
|
||||
RELAPACK_dgetrf(m, n, A, ldA, ipiv, info);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if INCLUDE_CGETRF
|
||||
void LAPACK(cgetrf)(
|
||||
const int *m, const int *n,
|
||||
float *A, const int *ldA, int *ipiv,
|
||||
int *info
|
||||
) {
|
||||
RELAPACK_cgetrf(m, n, A, ldA, ipiv, info);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if INCLUDE_ZGETRF
|
||||
void LAPACK(zgetrf)(
|
||||
const int *m, const int *n,
|
||||
double *A, const int *ldA, int *ipiv,
|
||||
int *info
|
||||
) {
|
||||
RELAPACK_zgetrf(m, n, A, ldA, ipiv, info);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
////////////
|
||||
// XGBTRF //
|
||||
////////////
|
||||
|
||||
#if INCLUDE_SGBTRF
|
||||
void LAPACK(sgbtrf)(
|
||||
const int *m, const int *n, const int *kl, const int *ku,
|
||||
float *Ab, const int *ldAb, int *ipiv,
|
||||
int *info
|
||||
) {
|
||||
RELAPACK_sgbtrf(m, n, kl, ku, Ab, ldAb, ipiv, info);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if INCLUDE_DGBTRF
|
||||
void LAPACK(dgbtrf)(
|
||||
const int *m, const int *n, const int *kl, const int *ku,
|
||||
double *Ab, const int *ldAb, int *ipiv,
|
||||
int *info
|
||||
) {
|
||||
RELAPACK_dgbtrf(m, n, kl, ku, Ab, ldAb, ipiv, info);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if INCLUDE_CGBTRF
|
||||
void LAPACK(cgbtrf)(
|
||||
const int *m, const int *n, const int *kl, const int *ku,
|
||||
float *Ab, const int *ldAb, int *ipiv,
|
||||
int *info
|
||||
) {
|
||||
RELAPACK_cgbtrf(m, n, kl, ku, Ab, ldAb, ipiv, info);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if INCLUDE_ZGBTRF
|
||||
void LAPACK(zgbtrf)(
|
||||
const int *m, const int *n, const int *kl, const int *ku,
|
||||
double *Ab, const int *ldAb, int *ipiv,
|
||||
int *info
|
||||
) {
|
||||
RELAPACK_zgbtrf(m, n, kl, ku, Ab, ldAb, ipiv, info);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
////////////
|
||||
// XTRSYL //
|
||||
////////////
|
||||
|
||||
#if INCLUDE_STRSYL
|
||||
void LAPACK(strsyl)(
|
||||
const char *tranA, const char *tranB, const int *isgn,
|
||||
const int *m, const int *n,
|
||||
const float *A, const int *ldA, const float *B, const int *ldB,
|
||||
float *C, const int *ldC, float *scale,
|
||||
int *info
|
||||
) {
|
||||
RELAPACK_strsyl(tranA, tranB, isgn, m, n, A, ldA, B, ldB, C, ldC, scale, info);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if INCLUDE_DTRSYL
|
||||
void LAPACK(dtrsyl)(
|
||||
const char *tranA, const char *tranB, const int *isgn,
|
||||
const int *m, const int *n,
|
||||
const double *A, const int *ldA, const double *B, const int *ldB,
|
||||
double *C, const int *ldC, double *scale,
|
||||
int *info
|
||||
) {
|
||||
RELAPACK_dtrsyl(tranA, tranB, isgn, m, n, A, ldA, B, ldB, C, ldC, scale, info);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if INCLUDE_CTRSYL
|
||||
void LAPACK(ctrsyl)(
|
||||
const char *tranA, const char *tranB, const int *isgn,
|
||||
const int *m, const int *n,
|
||||
const float *A, const int *ldA, const float *B, const int *ldB,
|
||||
float *C, const int *ldC, float *scale,
|
||||
int *info
|
||||
) {
|
||||
RELAPACK_ctrsyl(tranA, tranB, isgn, m, n, A, ldA, B, ldB, C, ldC, scale, info);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if INCLUDE_ZTRSYL
|
||||
void LAPACK(ztrsyl)(
|
||||
const char *tranA, const char *tranB, const int *isgn,
|
||||
const int *m, const int *n,
|
||||
const double *A, const int *ldA, const double *B, const int *ldB,
|
||||
double *C, const int *ldC, double *scale,
|
||||
int *info
|
||||
) {
|
||||
RELAPACK_ztrsyl(tranA, tranB, isgn, m, n, A, ldA, B, ldB, C, ldC, scale, info);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
////////////
|
||||
// XTGSYL //
|
||||
////////////
|
||||
|
||||
#if INCLUDE_STGSYL
|
||||
void LAPACK(stgsyl)(
|
||||
const char *trans, const int *ijob, const int *m, const int *n,
|
||||
const float *A, const int *ldA, const float *B, const int *ldB,
|
||||
float *C, const int *ldC,
|
||||
const float *D, const int *ldD, const float *E, const int *ldE,
|
||||
float *F, const int *ldF,
|
||||
float *scale, float *dif,
|
||||
float *Work, const int *lWork, int *iWork, int *info
|
||||
) {
|
||||
RELAPACK_stgsyl(trans, ijob, m, n, A, ldA, B, ldB, C, ldC, D, ldD, E, ldE, F, ldF, scale, dif, Work, lWork, iWork, info);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if INCLUDE_DTGSYL
|
||||
void LAPACK(dtgsyl)(
|
||||
const char *trans, const int *ijob, const int *m, const int *n,
|
||||
const double *A, const int *ldA, const double *B, const int *ldB,
|
||||
double *C, const int *ldC,
|
||||
const double *D, const int *ldD, const double *E, const int *ldE,
|
||||
double *F, const int *ldF,
|
||||
double *scale, double *dif,
|
||||
double *Work, const int *lWork, int *iWork, int *info
|
||||
) {
|
||||
RELAPACK_dtgsyl(trans, ijob, m, n, A, ldA, B, ldB, C, ldC, D, ldD, E, ldE, F, ldF, scale, dif, Work, lWork, iWork, info);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if INCLUDE_CTGSYL
|
||||
void LAPACK(ctgsyl)(
|
||||
const char *trans, const int *ijob, const int *m, const int *n,
|
||||
const float *A, const int *ldA, const float *B, const int *ldB,
|
||||
float *C, const int *ldC,
|
||||
const float *D, const int *ldD, const float *E, const int *ldE,
|
||||
float *F, const int *ldF,
|
||||
float *scale, float *dif,
|
||||
float *Work, const int *lWork, int *iWork, int *info
|
||||
) {
|
||||
RELAPACK_ctgsyl(trans, ijob, m, n, A, ldA, B, ldB, C, ldC, D, ldD, E, ldE, F, ldF, scale, dif, Work, lWork, iWork, info);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if INCLUDE_ZTGSYL
|
||||
void LAPACK(ztgsyl)(
|
||||
const char *trans, const int *ijob, const int *m, const int *n,
|
||||
const double *A, const int *ldA, const double *B, const int *ldB,
|
||||
double *C, const int *ldC,
|
||||
const double *D, const int *ldD, const double *E, const int *ldE,
|
||||
double *F, const int *ldF,
|
||||
double *scale, double *dif,
|
||||
double *Work, const int *lWork, int *iWork, int *info
|
||||
) {
|
||||
RELAPACK_ztgsyl(trans, ijob, m, n, A, ldA, B, ldB, C, ldC, D, ldD, E, ldE, F, ldF, scale, dif, Work, lWork, iWork, info);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
////////////
|
||||
// XGEMMT //
|
||||
////////////
|
||||
|
||||
#if INCLUDE_SGEMMT
|
||||
void LAPACK(sgemmt)(
|
||||
const char *uplo, const char *transA, const char *transB,
|
||||
const int *n, const int *k,
|
||||
const float *alpha, const float *A, const int *ldA,
|
||||
const float *B, const int *ldB,
|
||||
const float *beta, float *C, const int *ldC
|
||||
) {
|
||||
RELAPACK_sgemmt(uplo, n, A, ldA, info);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if INCLUDE_DGEMMT
|
||||
void LAPACK(dgemmt)(
|
||||
const char *uplo, const char *transA, const char *transB,
|
||||
const int *n, const int *k,
|
||||
const double *alpha, const double *A, const int *ldA,
|
||||
const double *B, const int *ldB,
|
||||
const double *beta, double *C, const int *ldC
|
||||
) {
|
||||
RELAPACK_dgemmt(uplo, n, A, ldA, info);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if INCLUDE_CGEMMT
|
||||
void LAPACK(cgemmt)(
|
||||
const char *uplo, const char *transA, const char *transB,
|
||||
const int *n, const int *k,
|
||||
const float *alpha, const float *A, const int *ldA,
|
||||
const float *B, const int *ldB,
|
||||
const float *beta, float *C, const int *ldC
|
||||
) {
|
||||
RELAPACK_cgemmt(uplo, n, A, ldA, info);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if INCLUDE_ZGEMMT
|
||||
void LAPACK(zgemmt)(
|
||||
const char *uplo, const char *transA, const char *transB,
|
||||
const int *n, const int *k,
|
||||
const double *alpha, const double *A, const int *ldA,
|
||||
const double *B, const int *ldB,
|
||||
const double *beta, double *C, const int *ldC
|
||||
) {
|
||||
RELAPACK_zgemmt(uplo, n, A, ldA, info);
|
||||
}
|
||||
#endif
|
|
@ -0,0 +1,60 @@
|
|||
#ifndef RELAPACK_INT_H
|
||||
#define RELAPACK_INT_H
|
||||
|
||||
#include "../config.h"
|
||||
|
||||
#include "../inc/relapack.h"
|
||||
|
||||
// add an underscore to BLAS routines (or not)
|
||||
#if BLAS_UNDERSCORE
|
||||
#define BLAS(routine) routine ## _
|
||||
#else
|
||||
#define BLAS(routine) routine
|
||||
#endif
|
||||
|
||||
// add an underscore to LAPACK routines (or not)
|
||||
#if LAPACK_UNDERSCORE
|
||||
#define LAPACK(routine) routine ## _
|
||||
#else
|
||||
#define LAPACK(routine) routine
|
||||
#endif
|
||||
|
||||
// minimum and maximum macros
|
||||
#define MAX(a, b) ((a) > (b) ? (a) : (b))
|
||||
#define MIN(a, b) ((a) < (b) ? (a) : (b))
|
||||
|
||||
// REC_SPLIT(n) returns how a problem of size n is split recursively.
|
||||
// If n >= 16, we ensure that the size of at least one of the halves is
|
||||
// divisible by 8 (the cache line size in most CPUs), while both halves are
|
||||
// still as close as possible in size.
|
||||
// If n < 16 the problem is simply split in the middle. (Note that the
|
||||
// crossoversize is usually larger than 16.)
|
||||
#define SREC_SPLIT(n) ((n >= 32) ? ((n + 16) / 32) * 16 : n / 2)
|
||||
#define DREC_SPLIT(n) ((n >= 16) ? ((n + 8) / 16) * 8 : n / 2)
|
||||
#define CREC_SPLIT(n) ((n >= 16) ? ((n + 8) / 16) * 8 : n / 2)
|
||||
#define ZREC_SPLIT(n) ((n >= 8) ? ((n + 4) / 8) * 4 : n / 2)
|
||||
|
||||
#include "lapack.h"
|
||||
#include "blas.h"
|
||||
|
||||
// sytrf helper routines
|
||||
void RELAPACK_ssytrf_rec2(const char *, const int *, const int *, int *, float *, const int *, int *, float *, const int *, int *);
|
||||
void RELAPACK_dsytrf_rec2(const char *, const int *, const int *, int *, double *, const int *, int *, double *, const int *, int *);
|
||||
void RELAPACK_csytrf_rec2(const char *, const int *, const int *, int *, float *, const int *, int *, float *, const int *, int *);
|
||||
void RELAPACK_chetrf_rec2(const char *, const int *, const int *, int *, float *, const int *, int *, float *, const int *, int *);
|
||||
void RELAPACK_zsytrf_rec2(const char *, const int *, const int *, int *, double *, const int *, int *, double *, const int *, int *);
|
||||
void RELAPACK_zhetrf_rec2(const char *, const int *, const int *, int *, double *, const int *, int *, double *, const int *, int *);
|
||||
void RELAPACK_ssytrf_rook_rec2(const char *, const int *, const int *, int *, float *, const int *, int *, float *, const int *, int *);
|
||||
void RELAPACK_dsytrf_rook_rec2(const char *, const int *, const int *, int *, double *, const int *, int *, double *, const int *, int *);
|
||||
void RELAPACK_csytrf_rook_rec2(const char *, const int *, const int *, int *, float *, const int *, int *, float *, const int *, int *);
|
||||
void RELAPACK_chetrf_rook_rec2(const char *, const int *, const int *, int *, float *, const int *, int *, float *, const int *, int *);
|
||||
void RELAPACK_zsytrf_rook_rec2(const char *, const int *, const int *, int *, double *, const int *, int *, double *, const int *, int *);
|
||||
void RELAPACK_zhetrf_rook_rec2(const char *, const int *, const int *, int *, double *, const int *, int *, double *, const int *, int *);
|
||||
|
||||
// trsyl helper routines
|
||||
void RELAPACK_strsyl_rec2(const char *, const char *, const int *, const int *, const int *, const float *, const int *, const float *, const int *, float *, const int *, float *, int *);
|
||||
void RELAPACK_dtrsyl_rec2(const char *, const char *, const int *, const int *, const int *, const double *, const int *, const double *, const int *, double *, const int *, double *, int *);
|
||||
void RELAPACK_ctrsyl_rec2(const char *, const char *, const int *, const int *, const int *, const float *, const int *, const float *, const int *, float *, const int *, float *, int *);
|
||||
void RELAPACK_ztrsyl_rec2(const char *, const char *, const int *, const int *, const int *, const double *, const int *, const double *, const int *, double *, const int *, double *, int *);
|
||||
|
||||
#endif /* RELAPACK_INT_H */
|
|
@ -0,0 +1,227 @@
|
|||
#include "relapack.h"
|
||||
#include "stdlib.h"
|
||||
|
||||
static void RELAPACK_sgbtrf_rec(const int *, const int *, const int *,
|
||||
const int *, float *, const int *, int *, float *, const int *, float *,
|
||||
const int *, int *);
|
||||
|
||||
|
||||
/** SGBTRF computes an LU factorization of a real m-by-n band matrix A using partial pivoting with row interchanges.
|
||||
*
|
||||
* This routine is functionally equivalent to LAPACK's sgbtrf.
|
||||
* For details on its interface, see
|
||||
* http://www.netlib.org/lapack/explore-html/d5/d72/sgbtrf_8f.html
|
||||
* */
|
||||
void RELAPACK_sgbtrf(
|
||||
const int *m, const int *n, const int *kl, const int *ku,
|
||||
float *Ab, const int *ldAb, int *ipiv,
|
||||
int *info
|
||||
) {
|
||||
|
||||
// Check arguments
|
||||
*info = 0;
|
||||
if (*m < 0)
|
||||
*info = -1;
|
||||
else if (*n < 0)
|
||||
*info = -2;
|
||||
else if (*kl < 0)
|
||||
*info = -3;
|
||||
else if (*ku < 0)
|
||||
*info = -4;
|
||||
else if (*ldAb < 2 * *kl + *ku + 1)
|
||||
*info = -6;
|
||||
if (*info) {
|
||||
const int minfo = -*info;
|
||||
LAPACK(xerbla)("SGBTRF", &minfo);
|
||||
return;
|
||||
}
|
||||
|
||||
// Constant
|
||||
const float ZERO[] = { 0. };
|
||||
|
||||
// Result upper band width
|
||||
const int kv = *ku + *kl;
|
||||
|
||||
// Unskewg A
|
||||
const int ldA[] = { *ldAb - 1 };
|
||||
float *const A = Ab + kv;
|
||||
|
||||
// Zero upper diagonal fill-in elements
|
||||
int i, j;
|
||||
for (j = 0; j < *n; j++) {
|
||||
float *const A_j = A + *ldA * j;
|
||||
for (i = MAX(0, j - kv); i < j - *ku; i++)
|
||||
A_j[i] = 0.;
|
||||
}
|
||||
|
||||
// Allocate work space
|
||||
const int n1 = SREC_SPLIT(*n);
|
||||
const int mWorkl = (kv > n1) ? MAX(1, *m - *kl) : kv;
|
||||
const int nWorkl = (kv > n1) ? n1 : kv;
|
||||
const int mWorku = (*kl > n1) ? n1 : *kl;
|
||||
const int nWorku = (*kl > n1) ? MAX(0, *n - *kl) : *kl;
|
||||
float *Workl = malloc(mWorkl * nWorkl * sizeof(float));
|
||||
float *Worku = malloc(mWorku * nWorku * sizeof(float));
|
||||
LAPACK(slaset)("L", &mWorkl, &nWorkl, ZERO, ZERO, Workl, &mWorkl);
|
||||
LAPACK(slaset)("U", &mWorku, &nWorku, ZERO, ZERO, Worku, &mWorku);
|
||||
|
||||
// Recursive kernel
|
||||
RELAPACK_sgbtrf_rec(m, n, kl, ku, Ab, ldAb, ipiv, Workl, &mWorkl, Worku, &mWorku, info);
|
||||
|
||||
// Free work space
|
||||
free(Workl);
|
||||
free(Worku);
|
||||
}
|
||||
|
||||
|
||||
/** sgbtrf's recursive compute kernel */
|
||||
static void RELAPACK_sgbtrf_rec(
|
||||
const int *m, const int *n, const int *kl, const int *ku,
|
||||
float *Ab, const int *ldAb, int *ipiv,
|
||||
float *Workl, const int *ldWorkl, float *Worku, const int *ldWorku,
|
||||
int *info
|
||||
) {
|
||||
|
||||
if (*n <= MAX(CROSSOVER_SGBTRF, 1)) {
|
||||
// Unblocked
|
||||
LAPACK(sgbtf2)(m, n, kl, ku, Ab, ldAb, ipiv, info);
|
||||
return;
|
||||
}
|
||||
|
||||
// Constants
|
||||
const float ONE[] = { 1. };
|
||||
const float MONE[] = { -1. };
|
||||
const int iONE[] = { 1 };
|
||||
|
||||
// Loop iterators
|
||||
int i, j;
|
||||
|
||||
// Output upper band width
|
||||
const int kv = *ku + *kl;
|
||||
|
||||
// Unskew A
|
||||
const int ldA[] = { *ldAb - 1 };
|
||||
float *const A = Ab + kv;
|
||||
|
||||
// Splitting
|
||||
const int n1 = MIN(SREC_SPLIT(*n), *kl);
|
||||
const int n2 = *n - n1;
|
||||
const int m1 = MIN(n1, *m);
|
||||
const int m2 = *m - m1;
|
||||
const int mn1 = MIN(m1, n1);
|
||||
const int mn2 = MIN(m2, n2);
|
||||
|
||||
// Ab_L *
|
||||
// Ab_BR
|
||||
float *const Ab_L = Ab;
|
||||
float *const Ab_BR = Ab + *ldAb * n1;
|
||||
|
||||
// A_L A_R
|
||||
float *const A_L = A;
|
||||
float *const A_R = A + *ldA * n1;
|
||||
|
||||
// A_TL A_TR
|
||||
// A_BL A_BR
|
||||
float *const A_TL = A;
|
||||
float *const A_TR = A + *ldA * n1;
|
||||
float *const A_BL = A + m1;
|
||||
float *const A_BR = A + *ldA * n1 + m1;
|
||||
|
||||
// ipiv_T
|
||||
// ipiv_B
|
||||
int *const ipiv_T = ipiv;
|
||||
int *const ipiv_B = ipiv + n1;
|
||||
|
||||
// Banded splitting
|
||||
const int n21 = MIN(n2, kv - n1);
|
||||
const int n22 = MIN(n2 - n21, n1);
|
||||
const int m21 = MIN(m2, *kl - m1);
|
||||
const int m22 = MIN(m2 - m21, m1);
|
||||
|
||||
// n1 n21 n22
|
||||
// m * A_Rl ARr
|
||||
float *const A_Rl = A_R;
|
||||
float *const A_Rr = A_R + *ldA * n21;
|
||||
|
||||
// n1 n21 n22
|
||||
// m1 * A_TRl A_TRr
|
||||
// m21 A_BLt A_BRtl A_BRtr
|
||||
// m22 A_BLb A_BRbl A_BRbr
|
||||
float *const A_TRl = A_TR;
|
||||
float *const A_TRr = A_TR + *ldA * n21;
|
||||
float *const A_BLt = A_BL;
|
||||
float *const A_BLb = A_BL + m21;
|
||||
float *const A_BRtl = A_BR;
|
||||
float *const A_BRtr = A_BR + *ldA * n21;
|
||||
float *const A_BRbl = A_BR + m21;
|
||||
float *const A_BRbr = A_BR + *ldA * n21 + m21;
|
||||
|
||||
// recursion(Ab_L, ipiv_T)
|
||||
RELAPACK_sgbtrf_rec(m, &n1, kl, ku, Ab_L, ldAb, ipiv_T, Workl, ldWorkl, Worku, ldWorku, info);
|
||||
|
||||
// Workl = A_BLb
|
||||
LAPACK(slacpy)("U", &m22, &n1, A_BLb, ldA, Workl, ldWorkl);
|
||||
|
||||
// partially redo swaps in A_L
|
||||
for (i = 0; i < mn1; i++) {
|
||||
const int ip = ipiv_T[i] - 1;
|
||||
if (ip != i) {
|
||||
if (ip < *kl)
|
||||
BLAS(sswap)(&i, A_L + i, ldA, A_L + ip, ldA);
|
||||
else
|
||||
BLAS(sswap)(&i, A_L + i, ldA, Workl + ip - *kl, ldWorkl);
|
||||
}
|
||||
}
|
||||
|
||||
// apply pivots to A_Rl
|
||||
LAPACK(slaswp)(&n21, A_Rl, ldA, iONE, &mn1, ipiv_T, iONE);
|
||||
|
||||
// apply pivots to A_Rr columnwise
|
||||
for (j = 0; j < n22; j++) {
|
||||
float *const A_Rrj = A_Rr + *ldA * j;
|
||||
for (i = j; i < mn1; i++) {
|
||||
const int ip = ipiv_T[i] - 1;
|
||||
if (ip != i) {
|
||||
const float tmp = A_Rrj[i];
|
||||
A_Rrj[i] = A_Rr[ip];
|
||||
A_Rrj[ip] = tmp;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// A_TRl = A_TL \ A_TRl
|
||||
BLAS(strsm)("L", "L", "N", "U", &m1, &n21, ONE, A_TL, ldA, A_TRl, ldA);
|
||||
// Worku = A_TRr
|
||||
LAPACK(slacpy)("L", &m1, &n22, A_TRr, ldA, Worku, ldWorku);
|
||||
// Worku = A_TL \ Worku
|
||||
BLAS(strsm)("L", "L", "N", "U", &m1, &n22, ONE, A_TL, ldA, Worku, ldWorku);
|
||||
// A_TRr = Worku
|
||||
LAPACK(slacpy)("L", &m1, &n22, Worku, ldWorku, A_TRr, ldA);
|
||||
// A_BRtl = A_BRtl - A_BLt * A_TRl
|
||||
BLAS(sgemm)("N", "N", &m21, &n21, &n1, MONE, A_BLt, ldA, A_TRl, ldA, ONE, A_BRtl, ldA);
|
||||
// A_BRbl = A_BRbl - Workl * A_TRl
|
||||
BLAS(sgemm)("N", "N", &m22, &n21, &n1, MONE, Workl, ldWorkl, A_TRl, ldA, ONE, A_BRbl, ldA);
|
||||
// A_BRtr = A_BRtr - A_BLt * Worku
|
||||
BLAS(sgemm)("N", "N", &m21, &n22, &n1, MONE, A_BLt, ldA, Worku, ldWorku, ONE, A_BRtr, ldA);
|
||||
// A_BRbr = A_BRbr - Workl * Worku
|
||||
BLAS(sgemm)("N", "N", &m22, &n22, &n1, MONE, Workl, ldWorkl, Worku, ldWorku, ONE, A_BRbr, ldA);
|
||||
|
||||
// partially undo swaps in A_L
|
||||
for (i = mn1 - 1; i >= 0; i--) {
|
||||
const int ip = ipiv_T[i] - 1;
|
||||
if (ip != i) {
|
||||
if (ip < *kl)
|
||||
BLAS(sswap)(&i, A_L + i, ldA, A_L + ip, ldA);
|
||||
else
|
||||
BLAS(sswap)(&i, A_L + i, ldA, Workl + ip - *kl, ldWorkl);
|
||||
}
|
||||
}
|
||||
|
||||
// recursion(Ab_BR, ipiv_B)
|
||||
RELAPACK_sgbtrf_rec(&m2, &n2, kl, ku, Ab_BR, ldAb, ipiv_B, Workl, ldWorkl, Worku, ldWorku, info);
|
||||
if (*info)
|
||||
*info += n1;
|
||||
// shift pivots
|
||||
for (i = 0; i < mn2; i++)
|
||||
ipiv_B[i] += n1;
|
||||
}
|
|
@ -0,0 +1,165 @@
|
|||
#include "relapack.h"
|
||||
|
||||
static void RELAPACK_sgemmt_rec(const char *, const char *, const char *,
|
||||
const int *, const int *, const float *, const float *, const int *,
|
||||
const float *, const int *, const float *, float *, const int *);
|
||||
|
||||
static void RELAPACK_sgemmt_rec2(const char *, const char *, const char *,
|
||||
const int *, const int *, const float *, const float *, const int *,
|
||||
const float *, const int *, const float *, float *, const int *);
|
||||
|
||||
|
||||
/** SGEMMT computes a matrix-matrix product with general matrices but updates
|
||||
* only the upper or lower triangular part of the result matrix.
|
||||
*
|
||||
* This routine performs the same operation as the BLAS routine
|
||||
* sgemm(transA, transB, n, n, k, alpha, A, ldA, B, ldB, beta, C, ldC)
|
||||
* but only updates the triangular part of C specified by uplo:
|
||||
* If (*uplo == 'L'), only the lower triangular part of C is updated,
|
||||
* otherwise the upper triangular part is updated.
|
||||
* */
|
||||
void RELAPACK_sgemmt(
|
||||
const char *uplo, const char *transA, const char *transB,
|
||||
const int *n, const int *k,
|
||||
const float *alpha, const float *A, const int *ldA,
|
||||
const float *B, const int *ldB,
|
||||
const float *beta, float *C, const int *ldC
|
||||
) {
|
||||
|
||||
#if HAVE_XGEMMT
|
||||
BLAS(sgemmt)(uplo, transA, transB, n, k, alpha, A, ldA, B, ldB, beta, C, ldC);
|
||||
return;
|
||||
#else
|
||||
|
||||
// Check arguments
|
||||
const int lower = LAPACK(lsame)(uplo, "L");
|
||||
const int upper = LAPACK(lsame)(uplo, "U");
|
||||
const int notransA = LAPACK(lsame)(transA, "N");
|
||||
const int tranA = LAPACK(lsame)(transA, "T");
|
||||
const int notransB = LAPACK(lsame)(transB, "N");
|
||||
const int tranB = LAPACK(lsame)(transB, "T");
|
||||
int info = 0;
|
||||
if (!lower && !upper)
|
||||
info = 1;
|
||||
else if (!tranA && !notransA)
|
||||
info = 2;
|
||||
else if (!tranB && !notransB)
|
||||
info = 3;
|
||||
else if (*n < 0)
|
||||
info = 4;
|
||||
else if (*k < 0)
|
||||
info = 5;
|
||||
else if (*ldA < MAX(1, notransA ? *n : *k))
|
||||
info = 8;
|
||||
else if (*ldB < MAX(1, notransB ? *k : *n))
|
||||
info = 10;
|
||||
else if (*ldC < MAX(1, *n))
|
||||
info = 13;
|
||||
if (info) {
|
||||
LAPACK(xerbla)("SGEMMT", &info);
|
||||
return;
|
||||
}
|
||||
|
||||
// Clean char * arguments
|
||||
const char cleanuplo = lower ? 'L' : 'U';
|
||||
const char cleantransA = notransA ? 'N' : 'T';
|
||||
const char cleantransB = notransB ? 'N' : 'T';
|
||||
|
||||
// Recursive kernel
|
||||
RELAPACK_sgemmt_rec(&cleanuplo, &cleantransA, &cleantransB, n, k, alpha, A, ldA, B, ldB, beta, C, ldC);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
/** sgemmt's recursive compute kernel */
|
||||
static void RELAPACK_sgemmt_rec(
|
||||
const char *uplo, const char *transA, const char *transB,
|
||||
const int *n, const int *k,
|
||||
const float *alpha, const float *A, const int *ldA,
|
||||
const float *B, const int *ldB,
|
||||
const float *beta, float *C, const int *ldC
|
||||
) {
|
||||
|
||||
if (*n <= MAX(CROSSOVER_SGEMMT, 1)) {
|
||||
// Unblocked
|
||||
RELAPACK_sgemmt_rec2(uplo, transA, transB, n, k, alpha, A, ldA, B, ldB, beta, C, ldC);
|
||||
return;
|
||||
}
|
||||
|
||||
// Splitting
|
||||
const int n1 = SREC_SPLIT(*n);
|
||||
const int n2 = *n - n1;
|
||||
|
||||
// A_T
|
||||
// A_B
|
||||
const float *const A_T = A;
|
||||
const float *const A_B = A + ((*transA == 'N') ? n1 : *ldA * n1);
|
||||
|
||||
// B_L B_R
|
||||
const float *const B_L = B;
|
||||
const float *const B_R = B + ((*transB == 'N') ? *ldB * n1 : n1);
|
||||
|
||||
// C_TL C_TR
|
||||
// C_BL C_BR
|
||||
float *const C_TL = C;
|
||||
float *const C_TR = C + *ldC * n1;
|
||||
float *const C_BL = C + n1;
|
||||
float *const C_BR = C + *ldC * n1 + n1;
|
||||
|
||||
// recursion(C_TL)
|
||||
RELAPACK_sgemmt_rec(uplo, transA, transB, &n1, k, alpha, A_T, ldA, B_L, ldB, beta, C_TL, ldC);
|
||||
|
||||
if (*uplo == 'L')
|
||||
// C_BL = alpha A_B B_L + beta C_BL
|
||||
BLAS(sgemm)(transA, transB, &n2, &n1, k, alpha, A_B, ldA, B_L, ldB, beta, C_BL, ldC);
|
||||
else
|
||||
// C_TR = alpha A_T B_R + beta C_TR
|
||||
BLAS(sgemm)(transA, transB, &n1, &n2, k, alpha, A_T, ldA, B_R, ldB, beta, C_TR, ldC);
|
||||
|
||||
// recursion(C_BR)
|
||||
RELAPACK_sgemmt_rec(uplo, transA, transB, &n2, k, alpha, A_B, ldA, B_R, ldB, beta, C_BR, ldC);
|
||||
}
|
||||
|
||||
|
||||
/** sgemmt's unblocked compute kernel */
|
||||
static void RELAPACK_sgemmt_rec2(
|
||||
const char *uplo, const char *transA, const char *transB,
|
||||
const int *n, const int *k,
|
||||
const float *alpha, const float *A, const int *ldA,
|
||||
const float *B, const int *ldB,
|
||||
const float *beta, float *C, const int *ldC
|
||||
) {
|
||||
|
||||
const int incB = (*transB == 'N') ? 1 : *ldB;
|
||||
const int incC = 1;
|
||||
|
||||
int i;
|
||||
for (i = 0; i < *n; i++) {
|
||||
// A_0
|
||||
// A_i
|
||||
const float *const A_0 = A;
|
||||
const float *const A_i = A + ((*transA == 'N') ? i : *ldA * i);
|
||||
|
||||
// * B_i *
|
||||
const float *const B_i = B + ((*transB == 'N') ? *ldB * i : i);
|
||||
|
||||
// * C_0i *
|
||||
// * C_ii *
|
||||
float *const C_0i = C + *ldC * i;
|
||||
float *const C_ii = C + *ldC * i + i;
|
||||
|
||||
if (*uplo == 'L') {
|
||||
const int nmi = *n - i;
|
||||
if (*transA == 'N')
|
||||
BLAS(sgemv)(transA, &nmi, k, alpha, A_i, ldA, B_i, &incB, beta, C_ii, &incC);
|
||||
else
|
||||
BLAS(sgemv)(transA, k, &nmi, alpha, A_i, ldA, B_i, &incB, beta, C_ii, &incC);
|
||||
} else {
|
||||
const int ip1 = i + 1;
|
||||
if (*transA == 'N')
|
||||
BLAS(sgemv)(transA, &ip1, k, alpha, A_0, ldA, B_i, &incB, beta, C_0i, &incC);
|
||||
else
|
||||
BLAS(sgemv)(transA, k, &ip1, alpha, A_0, ldA, B_i, &incB, beta, C_0i, &incC);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,117 @@
|
|||
#include "relapack.h"
|
||||
|
||||
static void RELAPACK_sgetrf_rec(const int *, const int *, float *, const int *,
|
||||
int *, int *);
|
||||
|
||||
|
||||
/** SGETRF computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges.
|
||||
*
|
||||
* This routine is functionally equivalent to LAPACK's sgetrf.
|
||||
* For details on its interface, see
|
||||
* http://www.netlib.org/lapack/explore-html/de/de2/sgetrf_8f.html
|
||||
* */
|
||||
void RELAPACK_sgetrf(
|
||||
const int *m, const int *n,
|
||||
float *A, const int *ldA, int *ipiv,
|
||||
int *info
|
||||
) {
|
||||
|
||||
// Check arguments
|
||||
*info = 0;
|
||||
if (*m < 0)
|
||||
*info = -1;
|
||||
else if (*n < 0)
|
||||
*info = -2;
|
||||
else if (*ldA < MAX(1, *n))
|
||||
*info = -4;
|
||||
if (*info) {
|
||||
const int minfo = -*info;
|
||||
LAPACK(xerbla)("SGETRF", &minfo);
|
||||
return;
|
||||
}
|
||||
|
||||
const int sn = MIN(*m, *n);
|
||||
|
||||
RELAPACK_sgetrf_rec(m, &sn, A, ldA, ipiv, info);
|
||||
|
||||
// Right remainder
|
||||
if (*m < *n) {
|
||||
// Constants
|
||||
const float ONE[] = { 1. };
|
||||
const int iONE[] = { 1. };
|
||||
|
||||
// Splitting
|
||||
const int rn = *n - *m;
|
||||
|
||||
// A_L A_R
|
||||
const float *const A_L = A;
|
||||
float *const A_R = A + *ldA * *m;
|
||||
|
||||
// A_R = apply(ipiv, A_R)
|
||||
LAPACK(slaswp)(&rn, A_R, ldA, iONE, m, ipiv, iONE);
|
||||
// A_R = A_L \ A_R
|
||||
BLAS(strsm)("L", "L", "N", "U", m, &rn, ONE, A_L, ldA, A_R, ldA);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/** sgetrf's recursive compute kernel */
|
||||
static void RELAPACK_sgetrf_rec(
|
||||
const int *m, const int *n,
|
||||
float *A, const int *ldA, int *ipiv,
|
||||
int *info
|
||||
) {
|
||||
|
||||
if (*n <= MAX(CROSSOVER_SGETRF, 1)) {
|
||||
// Unblocked
|
||||
LAPACK(sgetf2)(m, n, A, ldA, ipiv, info);
|
||||
return;
|
||||
}
|
||||
|
||||
// Constants
|
||||
const float ONE[] = { 1. };
|
||||
const float MONE[] = { -1. };
|
||||
const int iONE[] = { 1 };
|
||||
|
||||
// Splitting
|
||||
const int n1 = SREC_SPLIT(*n);
|
||||
const int n2 = *n - n1;
|
||||
const int m2 = *m - n1;
|
||||
|
||||
// A_L A_R
|
||||
float *const A_L = A;
|
||||
float *const A_R = A + *ldA * n1;
|
||||
|
||||
// A_TL A_TR
|
||||
// A_BL A_BR
|
||||
float *const A_TL = A;
|
||||
float *const A_TR = A + *ldA * n1;
|
||||
float *const A_BL = A + n1;
|
||||
float *const A_BR = A + *ldA * n1 + n1;
|
||||
|
||||
// ipiv_T
|
||||
// ipiv_B
|
||||
int *const ipiv_T = ipiv;
|
||||
int *const ipiv_B = ipiv + n1;
|
||||
|
||||
// recursion(A_L, ipiv_T)
|
||||
RELAPACK_sgetrf_rec(m, &n1, A_L, ldA, ipiv_T, info);
|
||||
// apply pivots to A_R
|
||||
LAPACK(slaswp)(&n2, A_R, ldA, iONE, &n1, ipiv_T, iONE);
|
||||
|
||||
// A_TR = A_TL \ A_TR
|
||||
BLAS(strsm)("L", "L", "N", "U", &n1, &n2, ONE, A_TL, ldA, A_TR, ldA);
|
||||
// A_BR = A_BR - A_BL * A_TR
|
||||
BLAS(sgemm)("N", "N", &m2, &n2, &n1, MONE, A_BL, ldA, A_TR, ldA, ONE, A_BR, ldA);
|
||||
|
||||
// recursion(A_BR, ipiv_B)
|
||||
RELAPACK_sgetrf_rec(&m2, &n2, A_BR, ldA, ipiv_B, info);
|
||||
if (*info)
|
||||
*info += n1;
|
||||
// apply pivots to A_BL
|
||||
LAPACK(slaswp)(&n1, A_BL, ldA, iONE, &n2, ipiv_B, iONE);
|
||||
// shift pivots
|
||||
int i;
|
||||
for (i = 0; i < n2; i++)
|
||||
ipiv_B[i] += n1;
|
||||
}
|
|
@ -0,0 +1,87 @@
|
|||
#include "relapack.h"
|
||||
|
||||
static void RELAPACK_slauum_rec(const char *, const int *, float *,
|
||||
const int *, int *);
|
||||
|
||||
|
||||
/** SLAUUM computes the product U * U**T or L**T * L, where the triangular factor U or L is stored in the upper or lower triangular part of the array A.
|
||||
*
|
||||
* This routine is functionally equivalent to LAPACK's slauum.
|
||||
* For details on its interface, see
|
||||
* http://www.netlib.org/lapack/explore-html/dd/d5a/slauum_8f.html
|
||||
* */
|
||||
void RELAPACK_slauum(
|
||||
const char *uplo, const int *n,
|
||||
float *A, const int *ldA,
|
||||
int *info
|
||||
) {
|
||||
|
||||
// Check arguments
|
||||
const int lower = LAPACK(lsame)(uplo, "L");
|
||||
const int upper = LAPACK(lsame)(uplo, "U");
|
||||
*info = 0;
|
||||
if (!lower && !upper)
|
||||
*info = -1;
|
||||
else if (*n < 0)
|
||||
*info = -2;
|
||||
else if (*ldA < MAX(1, *n))
|
||||
*info = -4;
|
||||
if (*info) {
|
||||
const int minfo = -*info;
|
||||
LAPACK(xerbla)("SLAUUM", &minfo);
|
||||
return;
|
||||
}
|
||||
|
||||
// Clean char * arguments
|
||||
const char cleanuplo = lower ? 'L' : 'U';
|
||||
|
||||
// Recursive kernel
|
||||
RELAPACK_slauum_rec(&cleanuplo, n, A, ldA, info);
|
||||
}
|
||||
|
||||
|
||||
/** slauum's recursive compute kernel */
|
||||
static void RELAPACK_slauum_rec(
|
||||
const char *uplo, const int *n,
|
||||
float *A, const int *ldA,
|
||||
int *info
|
||||
) {
|
||||
|
||||
if (*n <= MAX(CROSSOVER_SLAUUM, 1)) {
|
||||
// Unblocked
|
||||
LAPACK(slauu2)(uplo, n, A, ldA, info);
|
||||
return;
|
||||
}
|
||||
|
||||
// Constants
|
||||
const float ONE[] = { 1. };
|
||||
|
||||
// Splitting
|
||||
const int n1 = SREC_SPLIT(*n);
|
||||
const int n2 = *n - n1;
|
||||
|
||||
// A_TL A_TR
|
||||
// A_BL A_BR
|
||||
float *const A_TL = A;
|
||||
float *const A_TR = A + *ldA * n1;
|
||||
float *const A_BL = A + n1;
|
||||
float *const A_BR = A + *ldA * n1 + n1;
|
||||
|
||||
// recursion(A_TL)
|
||||
RELAPACK_slauum_rec(uplo, &n1, A_TL, ldA, info);
|
||||
|
||||
if (*uplo == 'L') {
|
||||
// A_TL = A_TL + A_BL' * A_BL
|
||||
BLAS(ssyrk)("L", "T", &n1, &n2, ONE, A_BL, ldA, ONE, A_TL, ldA);
|
||||
// A_BL = A_BR' * A_BL
|
||||
BLAS(strmm)("L", "L", "T", "N", &n2, &n1, ONE, A_BR, ldA, A_BL, ldA);
|
||||
} else {
|
||||
// A_TL = A_TL + A_TR * A_TR'
|
||||
BLAS(ssyrk)("U", "N", &n1, &n2, ONE, A_TR, ldA, ONE, A_TL, ldA);
|
||||
// A_TR = A_TR * A_BR'
|
||||
BLAS(strmm)("R", "U", "T", "N", &n1, &n2, ONE, A_BR, ldA, A_TR, ldA);
|
||||
}
|
||||
|
||||
// recursion(A_BR)
|
||||
RELAPACK_slauum_rec(uplo, &n2, A_BR, ldA, info);
|
||||
}
|
|
@ -0,0 +1,157 @@
|
|||
#include "relapack.h"
|
||||
#include "stdlib.h"
|
||||
|
||||
static void RELAPACK_spbtrf_rec(const char *, const int *, const int *,
|
||||
float *, const int *, float *, const int *, int *);
|
||||
|
||||
|
||||
/** SPBTRF computes the Cholesky factorization of a real symmetric positive definite band matrix A.
|
||||
*
|
||||
* This routine is functionally equivalent to LAPACK's spbtrf.
|
||||
* For details on its interface, see
|
||||
* http://www.netlib.org/lapack/explore-html/d1/d22/spbtrf_8f.html
|
||||
* */
|
||||
void RELAPACK_spbtrf(
|
||||
const char *uplo, const int *n, const int *kd,
|
||||
float *Ab, const int *ldAb,
|
||||
int *info
|
||||
) {
|
||||
|
||||
// Check arguments
|
||||
const int lower = LAPACK(lsame)(uplo, "L");
|
||||
const int upper = LAPACK(lsame)(uplo, "U");
|
||||
*info = 0;
|
||||
if (!lower && !upper)
|
||||
*info = -1;
|
||||
else if (*n < 0)
|
||||
*info = -2;
|
||||
else if (*kd < 0)
|
||||
*info = -3;
|
||||
else if (*ldAb < *kd + 1)
|
||||
*info = -5;
|
||||
if (*info) {
|
||||
const int minfo = -*info;
|
||||
LAPACK(xerbla)("SPBTRF", &minfo);
|
||||
return;
|
||||
}
|
||||
|
||||
// Clean char * arguments
|
||||
const char cleanuplo = lower ? 'L' : 'U';
|
||||
|
||||
// Constant
|
||||
const float ZERO[] = { 0. };
|
||||
|
||||
// Allocate work space
|
||||
const int n1 = SREC_SPLIT(*n);
|
||||
const int mWork = (*kd > n1) ? (lower ? *n - *kd : n1) : *kd;
|
||||
const int nWork = (*kd > n1) ? (lower ? n1 : *n - *kd) : *kd;
|
||||
float *Work = malloc(mWork * nWork * sizeof(float));
|
||||
LAPACK(slaset)(uplo, &mWork, &nWork, ZERO, ZERO, Work, &mWork);
|
||||
|
||||
// Recursive kernel
|
||||
RELAPACK_spbtrf_rec(&cleanuplo, n, kd, Ab, ldAb, Work, &mWork, info);
|
||||
|
||||
// Free work space
|
||||
free(Work);
|
||||
}
|
||||
|
||||
|
||||
/** spbtrf's recursive compute kernel */
|
||||
static void RELAPACK_spbtrf_rec(
|
||||
const char *uplo, const int *n, const int *kd,
|
||||
float *Ab, const int *ldAb,
|
||||
float *Work, const int *ldWork,
|
||||
int *info
|
||||
){
|
||||
|
||||
if (*n <= MAX(CROSSOVER_SPBTRF, 1)) {
|
||||
// Unblocked
|
||||
LAPACK(spbtf2)(uplo, n, kd, Ab, ldAb, info);
|
||||
return;
|
||||
}
|
||||
|
||||
// Constants
|
||||
const float ONE[] = { 1. };
|
||||
const float MONE[] = { -1. };
|
||||
|
||||
// Unskew A
|
||||
const int ldA[] = { *ldAb - 1 };
|
||||
float *const A = Ab + ((*uplo == 'L') ? 0 : *kd);
|
||||
|
||||
// Splitting
|
||||
const int n1 = MIN(SREC_SPLIT(*n), *kd);
|
||||
const int n2 = *n - n1;
|
||||
|
||||
// * *
|
||||
// * Ab_BR
|
||||
float *const Ab_BR = Ab + *ldAb * n1;
|
||||
|
||||
// A_TL A_TR
|
||||
// A_BL A_BR
|
||||
float *const A_TL = A;
|
||||
float *const A_TR = A + *ldA * n1;
|
||||
float *const A_BL = A + n1;
|
||||
float *const A_BR = A + *ldA * n1 + n1;
|
||||
|
||||
// recursion(A_TL)
|
||||
RELAPACK_spotrf(uplo, &n1, A_TL, ldA, info);
|
||||
if (*info)
|
||||
return;
|
||||
|
||||
// Banded splitting
|
||||
const int n21 = MIN(n2, *kd - n1);
|
||||
const int n22 = MIN(n2 - n21, *kd);
|
||||
|
||||
// n1 n21 n22
|
||||
// n1 * A_TRl A_TRr
|
||||
// n21 A_BLt A_BRtl A_BRtr
|
||||
// n22 A_BLb A_BRbl A_BRbr
|
||||
float *const A_TRl = A_TR;
|
||||
float *const A_TRr = A_TR + *ldA * n21;
|
||||
float *const A_BLt = A_BL;
|
||||
float *const A_BLb = A_BL + n21;
|
||||
float *const A_BRtl = A_BR;
|
||||
float *const A_BRtr = A_BR + *ldA * n21;
|
||||
float *const A_BRbl = A_BR + n21;
|
||||
float *const A_BRbr = A_BR + *ldA * n21 + n21;
|
||||
|
||||
if (*uplo == 'L') {
|
||||
// A_BLt = ABLt / A_TL'
|
||||
BLAS(strsm)("R", "L", "T", "N", &n21, &n1, ONE, A_TL, ldA, A_BLt, ldA);
|
||||
// A_BRtl = A_BRtl - A_BLt * A_BLt'
|
||||
BLAS(ssyrk)("L", "N", &n21, &n1, MONE, A_BLt, ldA, ONE, A_BRtl, ldA);
|
||||
// Work = A_BLb
|
||||
LAPACK(slacpy)("U", &n22, &n1, A_BLb, ldA, Work, ldWork);
|
||||
// Work = Work / A_TL'
|
||||
BLAS(strsm)("R", "L", "T", "N", &n22, &n1, ONE, A_TL, ldA, Work, ldWork);
|
||||
// A_BRbl = A_BRbl - Work * A_BLt'
|
||||
BLAS(sgemm)("N", "T", &n22, &n21, &n1, MONE, Work, ldWork, A_BLt, ldA, ONE, A_BRbl, ldA);
|
||||
// A_BRbr = A_BRbr - Work * Work'
|
||||
BLAS(ssyrk)("L", "N", &n22, &n1, MONE, Work, ldWork, ONE, A_BRbr, ldA);
|
||||
// A_BLb = Work
|
||||
LAPACK(slacpy)("U", &n22, &n1, Work, ldWork, A_BLb, ldA);
|
||||
} else {
|
||||
// A_TRl = A_TL' \ A_TRl
|
||||
BLAS(strsm)("L", "U", "T", "N", &n1, &n21, ONE, A_TL, ldA, A_TRl, ldA);
|
||||
// A_BRtl = A_BRtl - A_TRl' * A_TRl
|
||||
BLAS(ssyrk)("U", "T", &n21, &n1, MONE, A_TRl, ldA, ONE, A_BRtl, ldA);
|
||||
// Work = A_TRr
|
||||
LAPACK(slacpy)("L", &n1, &n22, A_TRr, ldA, Work, ldWork);
|
||||
// Work = A_TL' \ Work
|
||||
BLAS(strsm)("L", "U", "T", "N", &n1, &n22, ONE, A_TL, ldA, Work, ldWork);
|
||||
// A_BRtr = A_BRtr - A_TRl' * Work
|
||||
BLAS(sgemm)("T", "N", &n21, &n22, &n1, MONE, A_TRl, ldA, Work, ldWork, ONE, A_BRtr, ldA);
|
||||
// A_BRbr = A_BRbr - Work' * Work
|
||||
BLAS(ssyrk)("U", "T", &n22, &n1, MONE, Work, ldWork, ONE, A_BRbr, ldA);
|
||||
// A_TRr = Work
|
||||
LAPACK(slacpy)("L", &n1, &n22, Work, ldWork, A_TRr, ldA);
|
||||
}
|
||||
|
||||
// recursion(A_BR)
|
||||
if (*kd > n1)
|
||||
RELAPACK_spotrf(uplo, &n2, A_BR, ldA, info);
|
||||
else
|
||||
RELAPACK_spbtrf_rec(uplo, &n2, kd, Ab_BR, ldAb, Work, ldWork, info);
|
||||
if (*info)
|
||||
*info += n1;
|
||||
}
|
|
@ -0,0 +1,92 @@
|
|||
#include "relapack.h"
|
||||
|
||||
static void RELAPACK_spotrf_rec(const char *, const int *, float *,
|
||||
const int *, int *);
|
||||
|
||||
|
||||
/** SPOTRF computes the Cholesky factorization of a real symmetric positive definite matrix A.
|
||||
*
|
||||
* This routine is functionally equivalent to LAPACK's spotrf.
|
||||
* For details on its interface, see
|
||||
* http://www.netlib.org/lapack/explore-html/d0/da2/spotrf_8f.html
|
||||
* */
|
||||
void RELAPACK_spotrf(
|
||||
const char *uplo, const int *n,
|
||||
float *A, const int *ldA,
|
||||
int *info
|
||||
) {
|
||||
|
||||
// Check arguments
|
||||
const int lower = LAPACK(lsame)(uplo, "L");
|
||||
const int upper = LAPACK(lsame)(uplo, "U");
|
||||
*info = 0;
|
||||
if (!lower && !upper)
|
||||
*info = -1;
|
||||
else if (*n < 0)
|
||||
*info = -2;
|
||||
else if (*ldA < MAX(1, *n))
|
||||
*info = -4;
|
||||
if (*info) {
|
||||
const int minfo = -*info;
|
||||
LAPACK(xerbla)("SPOTRF", &minfo);
|
||||
return;
|
||||
}
|
||||
|
||||
// Clean char * arguments
|
||||
const char cleanuplo = lower ? 'L' : 'U';
|
||||
|
||||
// Recursive kernel
|
||||
RELAPACK_spotrf_rec(&cleanuplo, n, A, ldA, info);
|
||||
}
|
||||
|
||||
|
||||
/** spotrf's recursive compute kernel */
|
||||
static void RELAPACK_spotrf_rec(
|
||||
const char *uplo, const int *n,
|
||||
float *A, const int *ldA,
|
||||
int *info
|
||||
) {
|
||||
|
||||
if (*n <= MAX(CROSSOVER_SPOTRF, 1)) {
|
||||
// Unblocked
|
||||
LAPACK(spotf2)(uplo, n, A, ldA, info);
|
||||
return;
|
||||
}
|
||||
|
||||
// Constants
|
||||
const float ONE[] = { 1. };
|
||||
const float MONE[] = { -1. };
|
||||
|
||||
// Splitting
|
||||
const int n1 = SREC_SPLIT(*n);
|
||||
const int n2 = *n - n1;
|
||||
|
||||
// A_TL A_TR
|
||||
// A_BL A_BR
|
||||
float *const A_TL = A;
|
||||
float *const A_TR = A + *ldA * n1;
|
||||
float *const A_BL = A + n1;
|
||||
float *const A_BR = A + *ldA * n1 + n1;
|
||||
|
||||
// recursion(A_TL)
|
||||
RELAPACK_spotrf_rec(uplo, &n1, A_TL, ldA, info);
|
||||
if (*info)
|
||||
return;
|
||||
|
||||
if (*uplo == 'L') {
|
||||
// A_BL = A_BL / A_TL'
|
||||
BLAS(strsm)("R", "L", "T", "N", &n2, &n1, ONE, A_TL, ldA, A_BL, ldA);
|
||||
// A_BR = A_BR - A_BL * A_BL'
|
||||
BLAS(ssyrk)("L", "N", &n2, &n1, MONE, A_BL, ldA, ONE, A_BR, ldA);
|
||||
} else {
|
||||
// A_TR = A_TL' \ A_TR
|
||||
BLAS(strsm)("L", "U", "T", "N", &n1, &n2, ONE, A_TL, ldA, A_TR, ldA);
|
||||
// A_BR = A_BR - A_TR' * A_TR
|
||||
BLAS(ssyrk)("U", "T", &n2, &n1, MONE, A_TR, ldA, ONE, A_BR, ldA);
|
||||
}
|
||||
|
||||
// recursion(A_BR)
|
||||
RELAPACK_spotrf_rec(uplo, &n2, A_BR, ldA, info);
|
||||
if (*info)
|
||||
*info += n1;
|
||||
}
|
|
@ -0,0 +1,212 @@
|
|||
#include "relapack.h"
|
||||
#if XSYGST_ALLOW_MALLOC
|
||||
#include "stdlib.h"
|
||||
#endif
|
||||
|
||||
static void RELAPACK_ssygst_rec(const int *, const char *, const int *,
|
||||
float *, const int *, const float *, const int *,
|
||||
float *, const int *, int *);
|
||||
|
||||
|
||||
/** SSYGST reduces a real symmetric-definite generalized eigenproblem to standard form.
|
||||
*
|
||||
* This routine is functionally equivalent to LAPACK's ssygst.
|
||||
* For details on its interface, see
|
||||
* http://www.netlib.org/lapack/explore-html/d8/d78/ssygst_8f.html
|
||||
* */
|
||||
void RELAPACK_ssygst(
|
||||
const int *itype, const char *uplo, const int *n,
|
||||
float *A, const int *ldA, const float *B, const int *ldB,
|
||||
int *info
|
||||
) {
|
||||
|
||||
// Check arguments
|
||||
const int lower = LAPACK(lsame)(uplo, "L");
|
||||
const int upper = LAPACK(lsame)(uplo, "U");
|
||||
*info = 0;
|
||||
if (*itype < 1 || *itype > 3)
|
||||
*info = -1;
|
||||
else if (!lower && !upper)
|
||||
*info = -2;
|
||||
else if (*n < 0)
|
||||
*info = -3;
|
||||
else if (*ldA < MAX(1, *n))
|
||||
*info = -5;
|
||||
else if (*ldB < MAX(1, *n))
|
||||
*info = -7;
|
||||
if (*info) {
|
||||
const int minfo = -*info;
|
||||
LAPACK(xerbla)("SSYGST", &minfo);
|
||||
return;
|
||||
}
|
||||
|
||||
// Clean char * arguments
|
||||
const char cleanuplo = lower ? 'L' : 'U';
|
||||
|
||||
// Allocate work space
|
||||
float *Work = NULL;
|
||||
int lWork = 0;
|
||||
#if XSYGST_ALLOW_MALLOC
|
||||
const int n1 = SREC_SPLIT(*n);
|
||||
lWork = n1 * (*n - n1);
|
||||
Work = malloc(lWork * sizeof(float));
|
||||
if (!Work)
|
||||
lWork = 0;
|
||||
#endif
|
||||
|
||||
// Recursive kernel
|
||||
RELAPACK_ssygst_rec(itype, &cleanuplo, n, A, ldA, B, ldB, Work, &lWork, info);
|
||||
|
||||
// Free work space
|
||||
#if XSYGST_ALLOW_MALLOC
|
||||
if (Work)
|
||||
free(Work);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
/** ssygst's recursive compute kernel */
|
||||
static void RELAPACK_ssygst_rec(
|
||||
const int *itype, const char *uplo, const int *n,
|
||||
float *A, const int *ldA, const float *B, const int *ldB,
|
||||
float *Work, const int *lWork, int *info
|
||||
) {
|
||||
|
||||
if (*n <= MAX(CROSSOVER_SSYGST, 1)) {
|
||||
// Unblocked
|
||||
LAPACK(ssygs2)(itype, uplo, n, A, ldA, B, ldB, info);
|
||||
return;
|
||||
}
|
||||
|
||||
// Constants
|
||||
const float ZERO[] = { 0. };
|
||||
const float ONE[] = { 1. };
|
||||
const float MONE[] = { -1. };
|
||||
const float HALF[] = { .5 };
|
||||
const float MHALF[] = { -.5 };
|
||||
const int iONE[] = { 1 };
|
||||
|
||||
// Loop iterator
|
||||
int i;
|
||||
|
||||
// Splitting
|
||||
const int n1 = SREC_SPLIT(*n);
|
||||
const int n2 = *n - n1;
|
||||
|
||||
// A_TL A_TR
|
||||
// A_BL A_BR
|
||||
float *const A_TL = A;
|
||||
float *const A_TR = A + *ldA * n1;
|
||||
float *const A_BL = A + n1;
|
||||
float *const A_BR = A + *ldA * n1 + n1;
|
||||
|
||||
// B_TL B_TR
|
||||
// B_BL B_BR
|
||||
const float *const B_TL = B;
|
||||
const float *const B_TR = B + *ldB * n1;
|
||||
const float *const B_BL = B + n1;
|
||||
const float *const B_BR = B + *ldB * n1 + n1;
|
||||
|
||||
// recursion(A_TL, B_TL)
|
||||
RELAPACK_ssygst_rec(itype, uplo, &n1, A_TL, ldA, B_TL, ldB, Work, lWork, info);
|
||||
|
||||
if (*itype == 1)
|
||||
if (*uplo == 'L') {
|
||||
// A_BL = A_BL / B_TL'
|
||||
BLAS(strsm)("R", "L", "T", "N", &n2, &n1, ONE, B_TL, ldB, A_BL, ldA);
|
||||
if (*lWork > n2 * n1) {
|
||||
// T = -1/2 * B_BL * A_TL
|
||||
BLAS(ssymm)("R", "L", &n2, &n1, MHALF, A_TL, ldA, B_BL, ldB, ZERO, Work, &n2);
|
||||
// A_BL = A_BL + T
|
||||
for (i = 0; i < n1; i++)
|
||||
BLAS(saxpy)(&n2, ONE, Work + n2 * i, iONE, A_BL + *ldA * i, iONE);
|
||||
} else
|
||||
// A_BL = A_BL - 1/2 B_BL * A_TL
|
||||
BLAS(ssymm)("R", "L", &n2, &n1, MHALF, A_TL, ldA, B_BL, ldB, ONE, A_BL, ldA);
|
||||
// A_BR = A_BR - A_BL * B_BL' - B_BL * A_BL'
|
||||
BLAS(ssyr2k)("L", "N", &n2, &n1, MONE, A_BL, ldA, B_BL, ldB, ONE, A_BR, ldA);
|
||||
if (*lWork > n2 * n1)
|
||||
// A_BL = A_BL + T
|
||||
for (i = 0; i < n1; i++)
|
||||
BLAS(saxpy)(&n2, ONE, Work + n2 * i, iONE, A_BL + *ldA * i, iONE);
|
||||
else
|
||||
// A_BL = A_BL - 1/2 B_BL * A_TL
|
||||
BLAS(ssymm)("R", "L", &n2, &n1, MHALF, A_TL, ldA, B_BL, ldB, ONE, A_BL, ldA);
|
||||
// A_BL = B_BR \ A_BL
|
||||
BLAS(strsm)("L", "L", "N", "N", &n2, &n1, ONE, B_BR, ldB, A_BL, ldA);
|
||||
} else {
|
||||
// A_TR = B_TL' \ A_TR
|
||||
BLAS(strsm)("L", "U", "T", "N", &n1, &n2, ONE, B_TL, ldB, A_TR, ldA);
|
||||
if (*lWork > n2 * n1) {
|
||||
// T = -1/2 * A_TL * B_TR
|
||||
BLAS(ssymm)("L", "U", &n1, &n2, MHALF, A_TL, ldA, B_TR, ldB, ZERO, Work, &n1);
|
||||
// A_TR = A_BL + T
|
||||
for (i = 0; i < n2; i++)
|
||||
BLAS(saxpy)(&n1, ONE, Work + n1 * i, iONE, A_TR + *ldA * i, iONE);
|
||||
} else
|
||||
// A_TR = A_TR - 1/2 A_TL * B_TR
|
||||
BLAS(ssymm)("L", "U", &n1, &n2, MHALF, A_TL, ldA, B_TR, ldB, ONE, A_TR, ldA);
|
||||
// A_BR = A_BR - A_TR' * B_TR - B_TR' * A_TR
|
||||
BLAS(ssyr2k)("U", "T", &n2, &n1, MONE, A_TR, ldA, B_TR, ldB, ONE, A_BR, ldA);
|
||||
if (*lWork > n2 * n1)
|
||||
// A_TR = A_BL + T
|
||||
for (i = 0; i < n2; i++)
|
||||
BLAS(saxpy)(&n1, ONE, Work + n1 * i, iONE, A_TR + *ldA * i, iONE);
|
||||
else
|
||||
// A_TR = A_TR - 1/2 A_TL * B_TR
|
||||
BLAS(ssymm)("L", "U", &n1, &n2, MHALF, A_TL, ldA, B_TR, ldB, ONE, A_TR, ldA);
|
||||
// A_TR = A_TR / B_BR
|
||||
BLAS(strsm)("R", "U", "N", "N", &n1, &n2, ONE, B_BR, ldB, A_TR, ldA);
|
||||
}
|
||||
else
|
||||
if (*uplo == 'L') {
|
||||
// A_BL = A_BL * B_TL
|
||||
BLAS(strmm)("R", "L", "N", "N", &n2, &n1, ONE, B_TL, ldB, A_BL, ldA);
|
||||
if (*lWork > n2 * n1) {
|
||||
// T = 1/2 * A_BR * B_BL
|
||||
BLAS(ssymm)("L", "L", &n2, &n1, HALF, A_BR, ldA, B_BL, ldB, ZERO, Work, &n2);
|
||||
// A_BL = A_BL + T
|
||||
for (i = 0; i < n1; i++)
|
||||
BLAS(saxpy)(&n2, ONE, Work + n2 * i, iONE, A_BL + *ldA * i, iONE);
|
||||
} else
|
||||
// A_BL = A_BL + 1/2 A_BR * B_BL
|
||||
BLAS(ssymm)("L", "L", &n2, &n1, HALF, A_BR, ldA, B_BL, ldB, ONE, A_BL, ldA);
|
||||
// A_TL = A_TL + A_BL' * B_BL + B_BL' * A_BL
|
||||
BLAS(ssyr2k)("L", "T", &n1, &n2, ONE, A_BL, ldA, B_BL, ldB, ONE, A_TL, ldA);
|
||||
if (*lWork > n2 * n1)
|
||||
// A_BL = A_BL + T
|
||||
for (i = 0; i < n1; i++)
|
||||
BLAS(saxpy)(&n2, ONE, Work + n2 * i, iONE, A_BL + *ldA * i, iONE);
|
||||
else
|
||||
// A_BL = A_BL + 1/2 A_BR * B_BL
|
||||
BLAS(ssymm)("L", "L", &n2, &n1, HALF, A_BR, ldA, B_BL, ldB, ONE, A_BL, ldA);
|
||||
// A_BL = B_BR * A_BL
|
||||
BLAS(strmm)("L", "L", "T", "N", &n2, &n1, ONE, B_BR, ldB, A_BL, ldA);
|
||||
} else {
|
||||
// A_TR = B_TL * A_TR
|
||||
BLAS(strmm)("L", "U", "N", "N", &n1, &n2, ONE, B_TL, ldB, A_TR, ldA);
|
||||
if (*lWork > n2 * n1) {
|
||||
// T = 1/2 * B_TR * A_BR
|
||||
BLAS(ssymm)("R", "U", &n1, &n2, HALF, A_BR, ldA, B_TR, ldB, ZERO, Work, &n1);
|
||||
// A_TR = A_TR + T
|
||||
for (i = 0; i < n2; i++)
|
||||
BLAS(saxpy)(&n1, ONE, Work + n1 * i, iONE, A_TR + *ldA * i, iONE);
|
||||
} else
|
||||
// A_TR = A_TR + 1/2 B_TR A_BR
|
||||
BLAS(ssymm)("R", "U", &n1, &n2, HALF, A_BR, ldA, B_TR, ldB, ONE, A_TR, ldA);
|
||||
// A_TL = A_TL + A_TR * B_TR' + B_TR * A_TR'
|
||||
BLAS(ssyr2k)("U", "N", &n1, &n2, ONE, A_TR, ldA, B_TR, ldB, ONE, A_TL, ldA);
|
||||
if (*lWork > n2 * n1)
|
||||
// A_TR = A_TR + T
|
||||
for (i = 0; i < n2; i++)
|
||||
BLAS(saxpy)(&n1, ONE, Work + n1 * i, iONE, A_TR + *ldA * i, iONE);
|
||||
else
|
||||
// A_TR = A_TR + 1/2 B_TR * A_BR
|
||||
BLAS(ssymm)("R", "U", &n1, &n2, HALF, A_BR, ldA, B_TR, ldB, ONE, A_TR, ldA);
|
||||
// A_TR = A_TR * B_BR
|
||||
BLAS(strmm)("R", "U", "T", "N", &n1, &n2, ONE, B_BR, ldB, A_TR, ldA);
|
||||
}
|
||||
|
||||
// recursion(A_BR, B_BR)
|
||||
RELAPACK_ssygst_rec(itype, uplo, &n2, A_BR, ldA, B_BR, ldB, Work, lWork, info);
|
||||
}
|
|
@ -0,0 +1,238 @@
|
|||
#include "relapack.h"
|
||||
#if XSYTRF_ALLOW_MALLOC
|
||||
#include <stdlib.h>
|
||||
#endif
|
||||
|
||||
static void RELAPACK_ssytrf_rec(const char *, const int *, const int *, int *,
|
||||
float *, const int *, int *, float *, const int *, int *);
|
||||
|
||||
|
||||
/** SSYTRF computes the factorization of a complex symmetric matrix A using the Bunch-Kaufman diagonal pivoting method.
|
||||
*
|
||||
* This routine is functionally equivalent to LAPACK's ssytrf.
|
||||
* For details on its interface, see
|
||||
* http://www.netlib.org/lapack/explore-html/da/de9/ssytrf_8f.html
|
||||
* */
|
||||
void RELAPACK_ssytrf(
|
||||
const char *uplo, const int *n,
|
||||
float *A, const int *ldA, int *ipiv,
|
||||
float *Work, const int *lWork, int *info
|
||||
) {
|
||||
|
||||
// Required work size
|
||||
const int cleanlWork = *n * (*n / 2);
|
||||
int minlWork = cleanlWork;
|
||||
#if XSYTRF_ALLOW_MALLOC
|
||||
minlWork = 1;
|
||||
#endif
|
||||
|
||||
// Check arguments
|
||||
const int lower = LAPACK(lsame)(uplo, "L");
|
||||
const int upper = LAPACK(lsame)(uplo, "U");
|
||||
*info = 0;
|
||||
if (!lower && !upper)
|
||||
*info = -1;
|
||||
else if (*n < 0)
|
||||
*info = -2;
|
||||
else if (*ldA < MAX(1, *n))
|
||||
*info = -4;
|
||||
else if (*lWork < minlWork && *lWork != -1)
|
||||
*info = -7;
|
||||
else if (*lWork == -1) {
|
||||
// Work size query
|
||||
*Work = cleanlWork;
|
||||
return;
|
||||
}
|
||||
|
||||
// Ensure Work size
|
||||
float *cleanWork = Work;
|
||||
#if XSYTRF_ALLOW_MALLOC
|
||||
if (!*info && *lWork < cleanlWork) {
|
||||
cleanWork = malloc(cleanlWork * sizeof(float));
|
||||
if (!cleanWork)
|
||||
*info = -7;
|
||||
}
|
||||
#endif
|
||||
|
||||
if (*info) {
|
||||
const int minfo = -*info;
|
||||
LAPACK(xerbla)("SSYTRF", &minfo);
|
||||
return;
|
||||
}
|
||||
|
||||
// Clean char * arguments
|
||||
const char cleanuplo = lower ? 'L' : 'U';
|
||||
|
||||
// Dummy arguments
|
||||
int nout;
|
||||
|
||||
// Recursive kernel
|
||||
RELAPACK_ssytrf_rec(&cleanuplo, n, n, &nout, A, ldA, ipiv, cleanWork, n, info);
|
||||
|
||||
#if XSYTRF_ALLOW_MALLOC
|
||||
if (cleanWork != Work)
|
||||
free(cleanWork);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
/** ssytrf's recursive compute kernel */
|
||||
static void RELAPACK_ssytrf_rec(
|
||||
const char *uplo, const int *n_full, const int *n, int *n_out,
|
||||
float *A, const int *ldA, int *ipiv,
|
||||
float *Work, const int *ldWork, int *info
|
||||
) {
|
||||
|
||||
// top recursion level?
|
||||
const int top = *n_full == *n;
|
||||
|
||||
if (*n <= MAX(CROSSOVER_SSYTRF, 3)) {
|
||||
// Unblocked
|
||||
if (top) {
|
||||
LAPACK(ssytf2)(uplo, n, A, ldA, ipiv, info);
|
||||
*n_out = *n;
|
||||
} else
|
||||
RELAPACK_ssytrf_rec2(uplo, n_full, n, n_out, A, ldA, ipiv, Work, ldWork, info);
|
||||
return;
|
||||
}
|
||||
|
||||
int info1, info2;
|
||||
|
||||
// Constants
|
||||
const float ONE[] = { 1. };
|
||||
const float MONE[] = { -1. };
|
||||
const int iONE[] = { 1 };
|
||||
|
||||
// Loop iterator
|
||||
int i;
|
||||
|
||||
const int n_rest = *n_full - *n;
|
||||
|
||||
if (*uplo == 'L') {
|
||||
// Splitting (setup)
|
||||
int n1 = SREC_SPLIT(*n);
|
||||
int n2 = *n - n1;
|
||||
|
||||
// Work_L *
|
||||
float *const Work_L = Work;
|
||||
|
||||
// recursion(A_L)
|
||||
int n1_out;
|
||||
RELAPACK_ssytrf_rec(uplo, n_full, &n1, &n1_out, A, ldA, ipiv, Work_L, ldWork, &info1);
|
||||
n1 = n1_out;
|
||||
|
||||
// Splitting (continued)
|
||||
n2 = *n - n1;
|
||||
const int n_full2 = *n_full - n1;
|
||||
|
||||
// * *
|
||||
// A_BL A_BR
|
||||
// A_BL_B A_BR_B
|
||||
float *const A_BL = A + n1;
|
||||
float *const A_BR = A + *ldA * n1 + n1;
|
||||
float *const A_BL_B = A + *n;
|
||||
float *const A_BR_B = A + *ldA * n1 + *n;
|
||||
|
||||
// * *
|
||||
// Work_BL Work_BR
|
||||
// * *
|
||||
// (top recursion level: use Work as Work_BR)
|
||||
float *const Work_BL = Work + n1;
|
||||
float *const Work_BR = top ? Work : Work + *ldWork * n1 + n1;
|
||||
const int ldWork_BR = top ? n2 : *ldWork;
|
||||
|
||||
// ipiv_T
|
||||
// ipiv_B
|
||||
int *const ipiv_B = ipiv + n1;
|
||||
|
||||
// A_BR = A_BR - A_BL Work_BL'
|
||||
RELAPACK_sgemmt(uplo, "N", "T", &n2, &n1, MONE, A_BL, ldA, Work_BL, ldWork, ONE, A_BR, ldA);
|
||||
BLAS(sgemm)("N", "T", &n_rest, &n2, &n1, MONE, A_BL_B, ldA, Work_BL, ldWork, ONE, A_BR_B, ldA);
|
||||
|
||||
// recursion(A_BR)
|
||||
int n2_out;
|
||||
RELAPACK_ssytrf_rec(uplo, &n_full2, &n2, &n2_out, A_BR, ldA, ipiv_B, Work_BR, &ldWork_BR, &info2);
|
||||
|
||||
if (n2_out != n2) {
|
||||
// undo 1 column of updates
|
||||
const int n_restp1 = n_rest + 1;
|
||||
|
||||
// last column of A_BR
|
||||
float *const A_BR_r = A_BR + *ldA * n2_out + n2_out;
|
||||
|
||||
// last row of A_BL
|
||||
float *const A_BL_b = A_BL + n2_out;
|
||||
|
||||
// last row of Work_BL
|
||||
float *const Work_BL_b = Work_BL + n2_out;
|
||||
|
||||
// A_BR_r = A_BR_r + A_BL_b Work_BL_b'
|
||||
BLAS(sgemv)("N", &n_restp1, &n1, ONE, A_BL_b, ldA, Work_BL_b, ldWork, ONE, A_BR_r, iONE);
|
||||
}
|
||||
n2 = n2_out;
|
||||
|
||||
// shift pivots
|
||||
for (i = 0; i < n2; i++)
|
||||
if (ipiv_B[i] > 0)
|
||||
ipiv_B[i] += n1;
|
||||
else
|
||||
ipiv_B[i] -= n1;
|
||||
|
||||
*info = info1 || info2;
|
||||
*n_out = n1 + n2;
|
||||
} else {
|
||||
// Splitting (setup)
|
||||
int n2 = SREC_SPLIT(*n);
|
||||
int n1 = *n - n2;
|
||||
|
||||
// * Work_R
|
||||
// (top recursion level: use Work as Work_R)
|
||||
float *const Work_R = top ? Work : Work + *ldWork * n1;
|
||||
|
||||
// recursion(A_R)
|
||||
int n2_out;
|
||||
RELAPACK_ssytrf_rec(uplo, n_full, &n2, &n2_out, A, ldA, ipiv, Work_R, ldWork, &info2);
|
||||
const int n2_diff = n2 - n2_out;
|
||||
n2 = n2_out;
|
||||
|
||||
// Splitting (continued)
|
||||
n1 = *n - n2;
|
||||
const int n_full1 = *n_full - n2;
|
||||
|
||||
// * A_TL_T A_TR_T
|
||||
// * A_TL A_TR
|
||||
// * * *
|
||||
float *const A_TL_T = A + *ldA * n_rest;
|
||||
float *const A_TR_T = A + *ldA * (n_rest + n1);
|
||||
float *const A_TL = A + *ldA * n_rest + n_rest;
|
||||
float *const A_TR = A + *ldA * (n_rest + n1) + n_rest;
|
||||
|
||||
// Work_L *
|
||||
// * Work_TR
|
||||
// * *
|
||||
// (top recursion level: Work_R was Work)
|
||||
float *const Work_L = Work;
|
||||
float *const Work_TR = Work + *ldWork * (top ? n2_diff : n1) + n_rest;
|
||||
const int ldWork_L = top ? n1 : *ldWork;
|
||||
|
||||
// A_TL = A_TL - A_TR Work_TR'
|
||||
RELAPACK_sgemmt(uplo, "N", "T", &n1, &n2, MONE, A_TR, ldA, Work_TR, ldWork, ONE, A_TL, ldA);
|
||||
BLAS(sgemm)("N", "T", &n_rest, &n1, &n2, MONE, A_TR_T, ldA, Work_TR, ldWork, ONE, A_TL_T, ldA);
|
||||
|
||||
// recursion(A_TL)
|
||||
int n1_out;
|
||||
RELAPACK_ssytrf_rec(uplo, &n_full1, &n1, &n1_out, A, ldA, ipiv, Work_L, &ldWork_L, &info1);
|
||||
|
||||
if (n1_out != n1) {
|
||||
// undo 1 column of updates
|
||||
const int n_restp1 = n_rest + 1;
|
||||
|
||||
// A_TL_T_l = A_TL_T_l + A_TR_T Work_TR_t'
|
||||
BLAS(sgemv)("N", &n_restp1, &n2, ONE, A_TR_T, ldA, Work_TR, ldWork, ONE, A_TL_T, iONE);
|
||||
}
|
||||
n1 = n1_out;
|
||||
|
||||
*info = info2 || info1;
|
||||
*n_out = n1 + n2;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,351 @@
|
|||
/* -- translated by f2c (version 20100827).
|
||||
You must link the resulting object file with libf2c:
|
||||
on Microsoft Windows system, link with libf2c.lib;
|
||||
on Linux or Unix systems, link with .../path/to/libf2c.a -lm
|
||||
or, if you install libf2c.a in a standard place, with -lf2c -lm
|
||||
-- in that order, at the end of the command line, as in
|
||||
cc *.o -lf2c -lm
|
||||
Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
|
||||
|
||||
http://www.netlib.org/f2c/libf2c.zip
|
||||
*/
|
||||
|
||||
#include "f2c.h"
|
||||
|
||||
/* Table of constant values */
|
||||
|
||||
static int c__1 = 1;
|
||||
static float c_b8 = -1.f;
|
||||
static float c_b9 = 1.f;
|
||||
|
||||
/** SSYTRF_REC2 computes a partial factorization of a real symmetric matrix using the Bunch-Kaufman diagon al pivoting method.
|
||||
*
|
||||
* This routine is a minor modification of LAPACK's slasyf.
|
||||
* It serves as an unblocked kernel in the recursive algorithms.
|
||||
* The blocked BLAS Level 3 updates were removed and moved to the
|
||||
* recursive algorithm.
|
||||
* */
|
||||
/* Subroutine */ void RELAPACK_ssytrf_rec2(char *uplo, int *n, int *
|
||||
nb, int *kb, float *a, int *lda, int *ipiv, float *w,
|
||||
int *ldw, int *info, ftnlen uplo_len)
|
||||
{
|
||||
/* System generated locals */
|
||||
int a_dim1, a_offset, w_dim1, w_offset, i__1, i__2;
|
||||
float r__1, r__2, r__3;
|
||||
|
||||
/* Builtin functions */
|
||||
double sqrt(double);
|
||||
|
||||
/* Local variables */
|
||||
static int j, k;
|
||||
static float t, r1, d11, d21, d22;
|
||||
static int jj, kk, jp, kp, kw, kkw, imax, jmax;
|
||||
static float alpha;
|
||||
extern logical lsame_(char *, char *, ftnlen, ftnlen);
|
||||
extern /* Subroutine */ int sscal_(int *, float *, float *, int *),
|
||||
sgemv_(char *, int *, int *, float *, float *, int *,
|
||||
float *, int *, float *, float *, int *, ftnlen);
|
||||
static int kstep;
|
||||
extern /* Subroutine */ int scopy_(int *, float *, int *, float *,
|
||||
int *), sswap_(int *, float *, int *, float *, int *
|
||||
);
|
||||
static float absakk;
|
||||
extern int isamax_(int *, float *, int *);
|
||||
static float colmax, rowmax;
|
||||
|
||||
/* Parameter adjustments */
|
||||
a_dim1 = *lda;
|
||||
a_offset = 1 + a_dim1;
|
||||
a -= a_offset;
|
||||
--ipiv;
|
||||
w_dim1 = *ldw;
|
||||
w_offset = 1 + w_dim1;
|
||||
w -= w_offset;
|
||||
|
||||
/* Function Body */
|
||||
*info = 0;
|
||||
alpha = (sqrt(17.f) + 1.f) / 8.f;
|
||||
if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
|
||||
k = *n;
|
||||
L10:
|
||||
kw = *nb + k - *n;
|
||||
if ((k <= *n - *nb + 1 && *nb < *n) || k < 1) {
|
||||
goto L30;
|
||||
}
|
||||
scopy_(&k, &a[k * a_dim1 + 1], &c__1, &w[kw * w_dim1 + 1], &c__1);
|
||||
if (k < *n) {
|
||||
i__1 = *n - k;
|
||||
sgemv_("No transpose", &k, &i__1, &c_b8, &a[(k + 1) * a_dim1 + 1],
|
||||
lda, &w[k + (kw + 1) * w_dim1], ldw, &c_b9, &w[kw *
|
||||
w_dim1 + 1], &c__1, (ftnlen)12);
|
||||
}
|
||||
kstep = 1;
|
||||
absakk = (r__1 = w[k + kw * w_dim1], dabs(r__1));
|
||||
if (k > 1) {
|
||||
i__1 = k - 1;
|
||||
imax = isamax_(&i__1, &w[kw * w_dim1 + 1], &c__1);
|
||||
colmax = (r__1 = w[imax + kw * w_dim1], dabs(r__1));
|
||||
} else {
|
||||
colmax = 0.f;
|
||||
}
|
||||
if (dmax(absakk,colmax) == 0.f) {
|
||||
if (*info == 0) {
|
||||
*info = k;
|
||||
}
|
||||
kp = k;
|
||||
} else {
|
||||
if (absakk >= alpha * colmax) {
|
||||
kp = k;
|
||||
} else {
|
||||
scopy_(&imax, &a[imax * a_dim1 + 1], &c__1, &w[(kw - 1) *
|
||||
w_dim1 + 1], &c__1);
|
||||
i__1 = k - imax;
|
||||
scopy_(&i__1, &a[imax + (imax + 1) * a_dim1], lda, &w[imax +
|
||||
1 + (kw - 1) * w_dim1], &c__1);
|
||||
if (k < *n) {
|
||||
i__1 = *n - k;
|
||||
sgemv_("No transpose", &k, &i__1, &c_b8, &a[(k + 1) *
|
||||
a_dim1 + 1], lda, &w[imax + (kw + 1) * w_dim1],
|
||||
ldw, &c_b9, &w[(kw - 1) * w_dim1 + 1], &c__1, (
|
||||
ftnlen)12);
|
||||
}
|
||||
i__1 = k - imax;
|
||||
jmax = imax + isamax_(&i__1, &w[imax + 1 + (kw - 1) * w_dim1],
|
||||
&c__1);
|
||||
rowmax = (r__1 = w[jmax + (kw - 1) * w_dim1], dabs(r__1));
|
||||
if (imax > 1) {
|
||||
i__1 = imax - 1;
|
||||
jmax = isamax_(&i__1, &w[(kw - 1) * w_dim1 + 1], &c__1);
|
||||
/* Computing MAX */
|
||||
r__2 = rowmax, r__3 = (r__1 = w[jmax + (kw - 1) * w_dim1],
|
||||
dabs(r__1));
|
||||
rowmax = dmax(r__2,r__3);
|
||||
}
|
||||
if (absakk >= alpha * colmax * (colmax / rowmax)) {
|
||||
kp = k;
|
||||
} else if ((r__1 = w[imax + (kw - 1) * w_dim1], dabs(r__1)) >=
|
||||
alpha * rowmax) {
|
||||
kp = imax;
|
||||
scopy_(&k, &w[(kw - 1) * w_dim1 + 1], &c__1, &w[kw *
|
||||
w_dim1 + 1], &c__1);
|
||||
} else {
|
||||
kp = imax;
|
||||
kstep = 2;
|
||||
}
|
||||
}
|
||||
kk = k - kstep + 1;
|
||||
kkw = *nb + kk - *n;
|
||||
if (kp != kk) {
|
||||
a[kp + kp * a_dim1] = a[kk + kk * a_dim1];
|
||||
i__1 = kk - 1 - kp;
|
||||
scopy_(&i__1, &a[kp + 1 + kk * a_dim1], &c__1, &a[kp + (kp +
|
||||
1) * a_dim1], lda);
|
||||
if (kp > 1) {
|
||||
i__1 = kp - 1;
|
||||
scopy_(&i__1, &a[kk * a_dim1 + 1], &c__1, &a[kp * a_dim1
|
||||
+ 1], &c__1);
|
||||
}
|
||||
if (k < *n) {
|
||||
i__1 = *n - k;
|
||||
sswap_(&i__1, &a[kk + (k + 1) * a_dim1], lda, &a[kp + (k
|
||||
+ 1) * a_dim1], lda);
|
||||
}
|
||||
i__1 = *n - kk + 1;
|
||||
sswap_(&i__1, &w[kk + kkw * w_dim1], ldw, &w[kp + kkw *
|
||||
w_dim1], ldw);
|
||||
}
|
||||
if (kstep == 1) {
|
||||
scopy_(&k, &w[kw * w_dim1 + 1], &c__1, &a[k * a_dim1 + 1], &
|
||||
c__1);
|
||||
r1 = 1.f / a[k + k * a_dim1];
|
||||
i__1 = k - 1;
|
||||
sscal_(&i__1, &r1, &a[k * a_dim1 + 1], &c__1);
|
||||
} else {
|
||||
if (k > 2) {
|
||||
d21 = w[k - 1 + kw * w_dim1];
|
||||
d11 = w[k + kw * w_dim1] / d21;
|
||||
d22 = w[k - 1 + (kw - 1) * w_dim1] / d21;
|
||||
t = 1.f / (d11 * d22 - 1.f);
|
||||
d21 = t / d21;
|
||||
i__1 = k - 2;
|
||||
for (j = 1; j <= i__1; ++j) {
|
||||
a[j + (k - 1) * a_dim1] = d21 * (d11 * w[j + (kw - 1)
|
||||
* w_dim1] - w[j + kw * w_dim1]);
|
||||
a[j + k * a_dim1] = d21 * (d22 * w[j + kw * w_dim1] -
|
||||
w[j + (kw - 1) * w_dim1]);
|
||||
/* L20: */
|
||||
}
|
||||
}
|
||||
a[k - 1 + (k - 1) * a_dim1] = w[k - 1 + (kw - 1) * w_dim1];
|
||||
a[k - 1 + k * a_dim1] = w[k - 1 + kw * w_dim1];
|
||||
a[k + k * a_dim1] = w[k + kw * w_dim1];
|
||||
}
|
||||
}
|
||||
if (kstep == 1) {
|
||||
ipiv[k] = kp;
|
||||
} else {
|
||||
ipiv[k] = -kp;
|
||||
ipiv[k - 1] = -kp;
|
||||
}
|
||||
k -= kstep;
|
||||
goto L10;
|
||||
L30:
|
||||
j = k + 1;
|
||||
L60:
|
||||
jj = j;
|
||||
jp = ipiv[j];
|
||||
if (jp < 0) {
|
||||
jp = -jp;
|
||||
++j;
|
||||
}
|
||||
++j;
|
||||
if (jp != jj && j <= *n) {
|
||||
i__1 = *n - j + 1;
|
||||
sswap_(&i__1, &a[jp + j * a_dim1], lda, &a[jj + j * a_dim1], lda);
|
||||
}
|
||||
if (j < *n) {
|
||||
goto L60;
|
||||
}
|
||||
*kb = *n - k;
|
||||
} else {
|
||||
k = 1;
|
||||
L70:
|
||||
if ((k >= *nb && *nb < *n) || k > *n) {
|
||||
goto L90;
|
||||
}
|
||||
i__1 = *n - k + 1;
|
||||
scopy_(&i__1, &a[k + k * a_dim1], &c__1, &w[k + k * w_dim1], &c__1);
|
||||
i__1 = *n - k + 1;
|
||||
i__2 = k - 1;
|
||||
sgemv_("No transpose", &i__1, &i__2, &c_b8, &a[k + a_dim1], lda, &w[k
|
||||
+ w_dim1], ldw, &c_b9, &w[k + k * w_dim1], &c__1, (ftnlen)12);
|
||||
kstep = 1;
|
||||
absakk = (r__1 = w[k + k * w_dim1], dabs(r__1));
|
||||
if (k < *n) {
|
||||
i__1 = *n - k;
|
||||
imax = k + isamax_(&i__1, &w[k + 1 + k * w_dim1], &c__1);
|
||||
colmax = (r__1 = w[imax + k * w_dim1], dabs(r__1));
|
||||
} else {
|
||||
colmax = 0.f;
|
||||
}
|
||||
if (dmax(absakk,colmax) == 0.f) {
|
||||
if (*info == 0) {
|
||||
*info = k;
|
||||
}
|
||||
kp = k;
|
||||
} else {
|
||||
if (absakk >= alpha * colmax) {
|
||||
kp = k;
|
||||
} else {
|
||||
i__1 = imax - k;
|
||||
scopy_(&i__1, &a[imax + k * a_dim1], lda, &w[k + (k + 1) *
|
||||
w_dim1], &c__1);
|
||||
i__1 = *n - imax + 1;
|
||||
scopy_(&i__1, &a[imax + imax * a_dim1], &c__1, &w[imax + (k +
|
||||
1) * w_dim1], &c__1);
|
||||
i__1 = *n - k + 1;
|
||||
i__2 = k - 1;
|
||||
sgemv_("No transpose", &i__1, &i__2, &c_b8, &a[k + a_dim1],
|
||||
lda, &w[imax + w_dim1], ldw, &c_b9, &w[k + (k + 1) *
|
||||
w_dim1], &c__1, (ftnlen)12);
|
||||
i__1 = imax - k;
|
||||
jmax = k - 1 + isamax_(&i__1, &w[k + (k + 1) * w_dim1], &c__1)
|
||||
;
|
||||
rowmax = (r__1 = w[jmax + (k + 1) * w_dim1], dabs(r__1));
|
||||
if (imax < *n) {
|
||||
i__1 = *n - imax;
|
||||
jmax = imax + isamax_(&i__1, &w[imax + 1 + (k + 1) *
|
||||
w_dim1], &c__1);
|
||||
/* Computing MAX */
|
||||
r__2 = rowmax, r__3 = (r__1 = w[jmax + (k + 1) * w_dim1],
|
||||
dabs(r__1));
|
||||
rowmax = dmax(r__2,r__3);
|
||||
}
|
||||
if (absakk >= alpha * colmax * (colmax / rowmax)) {
|
||||
kp = k;
|
||||
} else if ((r__1 = w[imax + (k + 1) * w_dim1], dabs(r__1)) >=
|
||||
alpha * rowmax) {
|
||||
kp = imax;
|
||||
i__1 = *n - k + 1;
|
||||
scopy_(&i__1, &w[k + (k + 1) * w_dim1], &c__1, &w[k + k *
|
||||
w_dim1], &c__1);
|
||||
} else {
|
||||
kp = imax;
|
||||
kstep = 2;
|
||||
}
|
||||
}
|
||||
kk = k + kstep - 1;
|
||||
if (kp != kk) {
|
||||
a[kp + kp * a_dim1] = a[kk + kk * a_dim1];
|
||||
i__1 = kp - kk - 1;
|
||||
scopy_(&i__1, &a[kk + 1 + kk * a_dim1], &c__1, &a[kp + (kk +
|
||||
1) * a_dim1], lda);
|
||||
if (kp < *n) {
|
||||
i__1 = *n - kp;
|
||||
scopy_(&i__1, &a[kp + 1 + kk * a_dim1], &c__1, &a[kp + 1
|
||||
+ kp * a_dim1], &c__1);
|
||||
}
|
||||
if (k > 1) {
|
||||
i__1 = k - 1;
|
||||
sswap_(&i__1, &a[kk + a_dim1], lda, &a[kp + a_dim1], lda);
|
||||
}
|
||||
sswap_(&kk, &w[kk + w_dim1], ldw, &w[kp + w_dim1], ldw);
|
||||
}
|
||||
if (kstep == 1) {
|
||||
i__1 = *n - k + 1;
|
||||
scopy_(&i__1, &w[k + k * w_dim1], &c__1, &a[k + k * a_dim1], &
|
||||
c__1);
|
||||
if (k < *n) {
|
||||
r1 = 1.f / a[k + k * a_dim1];
|
||||
i__1 = *n - k;
|
||||
sscal_(&i__1, &r1, &a[k + 1 + k * a_dim1], &c__1);
|
||||
}
|
||||
} else {
|
||||
if (k < *n - 1) {
|
||||
d21 = w[k + 1 + k * w_dim1];
|
||||
d11 = w[k + 1 + (k + 1) * w_dim1] / d21;
|
||||
d22 = w[k + k * w_dim1] / d21;
|
||||
t = 1.f / (d11 * d22 - 1.f);
|
||||
d21 = t / d21;
|
||||
i__1 = *n;
|
||||
for (j = k + 2; j <= i__1; ++j) {
|
||||
a[j + k * a_dim1] = d21 * (d11 * w[j + k * w_dim1] -
|
||||
w[j + (k + 1) * w_dim1]);
|
||||
a[j + (k + 1) * a_dim1] = d21 * (d22 * w[j + (k + 1) *
|
||||
w_dim1] - w[j + k * w_dim1]);
|
||||
/* L80: */
|
||||
}
|
||||
}
|
||||
a[k + k * a_dim1] = w[k + k * w_dim1];
|
||||
a[k + 1 + k * a_dim1] = w[k + 1 + k * w_dim1];
|
||||
a[k + 1 + (k + 1) * a_dim1] = w[k + 1 + (k + 1) * w_dim1];
|
||||
}
|
||||
}
|
||||
if (kstep == 1) {
|
||||
ipiv[k] = kp;
|
||||
} else {
|
||||
ipiv[k] = -kp;
|
||||
ipiv[k + 1] = -kp;
|
||||
}
|
||||
k += kstep;
|
||||
goto L70;
|
||||
L90:
|
||||
j = k - 1;
|
||||
L120:
|
||||
jj = j;
|
||||
jp = ipiv[j];
|
||||
if (jp < 0) {
|
||||
jp = -jp;
|
||||
--j;
|
||||
}
|
||||
--j;
|
||||
if (jp != jj && j >= 1) {
|
||||
sswap_(&j, &a[jp + a_dim1], lda, &a[jj + a_dim1], lda);
|
||||
}
|
||||
if (j > 1) {
|
||||
goto L120;
|
||||
}
|
||||
*kb = k - 1;
|
||||
}
|
||||
return;
|
||||
}
|
|
@ -0,0 +1,236 @@
|
|||
#include "relapack.h"
|
||||
#if XSYTRF_ALLOW_MALLOC
|
||||
#include <stdlib.h>
|
||||
#endif
|
||||
|
||||
static void RELAPACK_ssytrf_rook_rec(const char *, const int *, const int *, int *,
|
||||
float *, const int *, int *, float *, const int *, int *);
|
||||
|
||||
|
||||
/** SSYTRF_ROOK computes the factorization of a real symmetric matrix A using the bounded Bunch-Kaufman ("rook") diagonal pivoting method.
|
||||
*
|
||||
* This routine is functionally equivalent to LAPACK's ssytrf_rook.
|
||||
* For details on its interface, see
|
||||
* http://www.netlib.org/lapack/explore-html/de/da4/ssytrf__rook_8f.html
|
||||
* */
|
||||
void RELAPACK_ssytrf_rook(
|
||||
const char *uplo, const int *n,
|
||||
float *A, const int *ldA, int *ipiv,
|
||||
float *Work, const int *lWork, int *info
|
||||
) {
|
||||
|
||||
// Required work size
|
||||
const int cleanlWork = *n * (*n / 2);
|
||||
int minlWork = cleanlWork;
|
||||
#if XSYTRF_ALLOW_MALLOC
|
||||
minlWork = 1;
|
||||
#endif
|
||||
|
||||
// Check arguments
|
||||
const int lower = LAPACK(lsame)(uplo, "L");
|
||||
const int upper = LAPACK(lsame)(uplo, "U");
|
||||
*info = 0;
|
||||
if (!lower && !upper)
|
||||
*info = -1;
|
||||
else if (*n < 0)
|
||||
*info = -2;
|
||||
else if (*ldA < MAX(1, *n))
|
||||
*info = -4;
|
||||
else if (*lWork < minlWork && *lWork != -1)
|
||||
*info = -7;
|
||||
else if (*lWork == -1) {
|
||||
// Work size query
|
||||
*Work = cleanlWork;
|
||||
return;
|
||||
}
|
||||
|
||||
// Ensure Work size
|
||||
float *cleanWork = Work;
|
||||
#if XSYTRF_ALLOW_MALLOC
|
||||
if (!*info && *lWork < cleanlWork) {
|
||||
cleanWork = malloc(cleanlWork * sizeof(float));
|
||||
if (!cleanWork)
|
||||
*info = -7;
|
||||
}
|
||||
#endif
|
||||
|
||||
if (*info) {
|
||||
const int minfo = -*info;
|
||||
LAPACK(xerbla)("SSYTRF", &minfo);
|
||||
return;
|
||||
}
|
||||
|
||||
// Clean char * arguments
|
||||
const char cleanuplo = lower ? 'L' : 'U';
|
||||
|
||||
// Dummy argument
|
||||
int nout;
|
||||
|
||||
// Recursive kernel
|
||||
RELAPACK_ssytrf_rook_rec(&cleanuplo, n, n, &nout, A, ldA, ipiv, cleanWork, n, info);
|
||||
|
||||
#if XSYTRF_ALLOW_MALLOC
|
||||
if (cleanWork != Work)
|
||||
free(cleanWork);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
/** ssytrf_rook's recursive compute kernel */
|
||||
static void RELAPACK_ssytrf_rook_rec(
|
||||
const char *uplo, const int *n_full, const int *n, int *n_out,
|
||||
float *A, const int *ldA, int *ipiv,
|
||||
float *Work, const int *ldWork, int *info
|
||||
) {
|
||||
|
||||
// top recursion level?
|
||||
const int top = *n_full == *n;
|
||||
|
||||
if (*n <= MAX(CROSSOVER_SSYTRF_ROOK, 3)) {
|
||||
// Unblocked
|
||||
if (top) {
|
||||
LAPACK(ssytf2)(uplo, n, A, ldA, ipiv, info);
|
||||
*n_out = *n;
|
||||
} else
|
||||
RELAPACK_ssytrf_rook_rec2(uplo, n_full, n, n_out, A, ldA, ipiv, Work, ldWork, info);
|
||||
return;
|
||||
}
|
||||
|
||||
int info1, info2;
|
||||
|
||||
// Constants
|
||||
const float ONE[] = { 1. };
|
||||
const float MONE[] = { -1. };
|
||||
const int iONE[] = { 1 };
|
||||
|
||||
const int n_rest = *n_full - *n;
|
||||
|
||||
if (*uplo == 'L') {
|
||||
// Splitting (setup)
|
||||
int n1 = SREC_SPLIT(*n);
|
||||
int n2 = *n - n1;
|
||||
|
||||
// Work_L *
|
||||
float *const Work_L = Work;
|
||||
|
||||
// recursion(A_L)
|
||||
int n1_out;
|
||||
RELAPACK_ssytrf_rook_rec(uplo, n_full, &n1, &n1_out, A, ldA, ipiv, Work_L, ldWork, &info1);
|
||||
n1 = n1_out;
|
||||
|
||||
// Splitting (continued)
|
||||
n2 = *n - n1;
|
||||
const int n_full2 = *n_full - n1;
|
||||
|
||||
// * *
|
||||
// A_BL A_BR
|
||||
// A_BL_B A_BR_B
|
||||
float *const A_BL = A + n1;
|
||||
float *const A_BR = A + *ldA * n1 + n1;
|
||||
float *const A_BL_B = A + *n;
|
||||
float *const A_BR_B = A + *ldA * n1 + *n;
|
||||
|
||||
// * *
|
||||
// Work_BL Work_BR
|
||||
// * *
|
||||
// (top recursion level: use Work as Work_BR)
|
||||
float *const Work_BL = Work + n1;
|
||||
float *const Work_BR = top ? Work : Work + *ldWork * n1 + n1;
|
||||
const int ldWork_BR = top ? n2 : *ldWork;
|
||||
|
||||
// ipiv_T
|
||||
// ipiv_B
|
||||
int *const ipiv_B = ipiv + n1;
|
||||
|
||||
// A_BR = A_BR - A_BL Work_BL'
|
||||
RELAPACK_sgemmt(uplo, "N", "T", &n2, &n1, MONE, A_BL, ldA, Work_BL, ldWork, ONE, A_BR, ldA);
|
||||
BLAS(sgemm)("N", "T", &n_rest, &n2, &n1, MONE, A_BL_B, ldA, Work_BL, ldWork, ONE, A_BR_B, ldA);
|
||||
|
||||
// recursion(A_BR)
|
||||
int n2_out;
|
||||
RELAPACK_ssytrf_rook_rec(uplo, &n_full2, &n2, &n2_out, A_BR, ldA, ipiv_B, Work_BR, &ldWork_BR, &info2);
|
||||
|
||||
if (n2_out != n2) {
|
||||
// undo 1 column of updates
|
||||
const int n_restp1 = n_rest + 1;
|
||||
|
||||
// last column of A_BR
|
||||
float *const A_BR_r = A_BR + *ldA * n2_out + n2_out;
|
||||
|
||||
// last row of A_BL
|
||||
float *const A_BL_b = A_BL + n2_out;
|
||||
|
||||
// last row of Work_BL
|
||||
float *const Work_BL_b = Work_BL + n2_out;
|
||||
|
||||
// A_BR_r = A_BR_r + A_BL_b Work_BL_b'
|
||||
BLAS(sgemv)("N", &n_restp1, &n1, ONE, A_BL_b, ldA, Work_BL_b, ldWork, ONE, A_BR_r, iONE);
|
||||
}
|
||||
n2 = n2_out;
|
||||
|
||||
// shift pivots
|
||||
int i;
|
||||
for (i = 0; i < n2; i++)
|
||||
if (ipiv_B[i] > 0)
|
||||
ipiv_B[i] += n1;
|
||||
else
|
||||
ipiv_B[i] -= n1;
|
||||
|
||||
*info = info1 || info2;
|
||||
*n_out = n1 + n2;
|
||||
} else {
|
||||
// Splitting (setup)
|
||||
int n2 = SREC_SPLIT(*n);
|
||||
int n1 = *n - n2;
|
||||
|
||||
// * Work_R
|
||||
// (top recursion level: use Work as Work_R)
|
||||
float *const Work_R = top ? Work : Work + *ldWork * n1;
|
||||
|
||||
// recursion(A_R)
|
||||
int n2_out;
|
||||
RELAPACK_ssytrf_rook_rec(uplo, n_full, &n2, &n2_out, A, ldA, ipiv, Work_R, ldWork, &info2);
|
||||
const int n2_diff = n2 - n2_out;
|
||||
n2 = n2_out;
|
||||
|
||||
// Splitting (continued)
|
||||
n1 = *n - n2;
|
||||
const int n_full1 = *n_full - n2;
|
||||
|
||||
// * A_TL_T A_TR_T
|
||||
// * A_TL A_TR
|
||||
// * * *
|
||||
float *const A_TL_T = A + *ldA * n_rest;
|
||||
float *const A_TR_T = A + *ldA * (n_rest + n1);
|
||||
float *const A_TL = A + *ldA * n_rest + n_rest;
|
||||
float *const A_TR = A + *ldA * (n_rest + n1) + n_rest;
|
||||
|
||||
// Work_L *
|
||||
// * Work_TR
|
||||
// * *
|
||||
// (top recursion level: Work_R was Work)
|
||||
float *const Work_L = Work;
|
||||
float *const Work_TR = Work + *ldWork * (top ? n2_diff : n1) + n_rest;
|
||||
const int ldWork_L = top ? n1 : *ldWork;
|
||||
|
||||
// A_TL = A_TL - A_TR Work_TR'
|
||||
RELAPACK_sgemmt(uplo, "N", "T", &n1, &n2, MONE, A_TR, ldA, Work_TR, ldWork, ONE, A_TL, ldA);
|
||||
BLAS(sgemm)("N", "T", &n_rest, &n1, &n2, MONE, A_TR_T, ldA, Work_TR, ldWork, ONE, A_TL_T, ldA);
|
||||
|
||||
// recursion(A_TL)
|
||||
int n1_out;
|
||||
RELAPACK_ssytrf_rook_rec(uplo, &n_full1, &n1, &n1_out, A, ldA, ipiv, Work_L, &ldWork_L, &info1);
|
||||
|
||||
if (n1_out != n1) {
|
||||
// undo 1 column of updates
|
||||
const int n_restp1 = n_rest + 1;
|
||||
|
||||
// A_TL_T_l = A_TL_T_l + A_TR_T Work_TR_t'
|
||||
BLAS(sgemv)("N", &n_restp1, &n2, ONE, A_TR_T, ldA, Work_TR, ldWork, ONE, A_TL_T, iONE);
|
||||
}
|
||||
n1 = n1_out;
|
||||
|
||||
*info = info2 || info1;
|
||||
*n_out = n1 + n2;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,451 @@
|
|||
/* -- translated by f2c (version 20100827).
|
||||
You must link the resulting object file with libf2c:
|
||||
on Microsoft Windows system, link with libf2c.lib;
|
||||
on Linux or Unix systems, link with .../path/to/libf2c.a -lm
|
||||
or, if you install libf2c.a in a standard place, with -lf2c -lm
|
||||
-- in that order, at the end of the command line, as in
|
||||
cc *.o -lf2c -lm
|
||||
Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
|
||||
|
||||
http://www.netlib.org/f2c/libf2c.zip
|
||||
*/
|
||||
|
||||
#include "f2c.h"
|
||||
|
||||
/* Table of constant values */
|
||||
|
||||
static int c__1 = 1;
|
||||
static float c_b9 = -1.f;
|
||||
static float c_b10 = 1.f;
|
||||
|
||||
/** SSYTRF_ROOK_REC2 computes a partial factorization of a real symmetric matrix using the bounded Bunch-Kaufma n ("rook") diagonal pivoting method.
|
||||
*
|
||||
* This routine is a minor modification of LAPACK's slasyf_rook.
|
||||
* It serves as an unblocked kernel in the recursive algorithms.
|
||||
* The blocked BLAS Level 3 updates were removed and moved to the
|
||||
* recursive algorithm.
|
||||
* */
|
||||
/* Subroutine */ void RELAPACK_ssytrf_rook_rec2(char *uplo, int *n,
|
||||
int *nb, int *kb, float *a, int *lda, int *ipiv, float *
|
||||
w, int *ldw, int *info, ftnlen uplo_len)
|
||||
{
|
||||
/* System generated locals */
|
||||
int a_dim1, a_offset, w_dim1, w_offset, i__1, i__2;
|
||||
float r__1;
|
||||
|
||||
/* Builtin functions */
|
||||
double sqrt(double);
|
||||
|
||||
/* Local variables */
|
||||
static int j, k, p;
|
||||
static float t, r1, d11, d12, d21, d22;
|
||||
static int ii, jj, kk, kp, kw, jp1, jp2, kkw;
|
||||
static logical done;
|
||||
static int imax, jmax;
|
||||
static float alpha;
|
||||
extern logical lsame_(char *, char *, ftnlen, ftnlen);
|
||||
extern /* Subroutine */ int sscal_(int *, float *, float *, int *);
|
||||
static float sfmin;
|
||||
static int itemp;
|
||||
extern /* Subroutine */ int sgemv_(char *, int *, int *, float *,
|
||||
float *, int *, float *, int *, float *, float *, int *,
|
||||
ftnlen);
|
||||
static int kstep;
|
||||
static float stemp;
|
||||
extern /* Subroutine */ int scopy_(int *, float *, int *, float *,
|
||||
int *), sswap_(int *, float *, int *, float *, int *
|
||||
);
|
||||
static float absakk;
|
||||
extern double slamch_(char *, ftnlen);
|
||||
extern int isamax_(int *, float *, int *);
|
||||
static float colmax, rowmax;
|
||||
|
||||
/* Parameter adjustments */
|
||||
a_dim1 = *lda;
|
||||
a_offset = 1 + a_dim1;
|
||||
a -= a_offset;
|
||||
--ipiv;
|
||||
w_dim1 = *ldw;
|
||||
w_offset = 1 + w_dim1;
|
||||
w -= w_offset;
|
||||
|
||||
/* Function Body */
|
||||
*info = 0;
|
||||
alpha = (sqrt(17.f) + 1.f) / 8.f;
|
||||
sfmin = slamch_("S", (ftnlen)1);
|
||||
if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
|
||||
k = *n;
|
||||
L10:
|
||||
kw = *nb + k - *n;
|
||||
if ((k <= *n - *nb + 1 && *nb < *n) || k < 1) {
|
||||
goto L30;
|
||||
}
|
||||
kstep = 1;
|
||||
p = k;
|
||||
scopy_(&k, &a[k * a_dim1 + 1], &c__1, &w[kw * w_dim1 + 1], &c__1);
|
||||
if (k < *n) {
|
||||
i__1 = *n - k;
|
||||
sgemv_("No transpose", &k, &i__1, &c_b9, &a[(k + 1) * a_dim1 + 1],
|
||||
lda, &w[k + (kw + 1) * w_dim1], ldw, &c_b10, &w[kw *
|
||||
w_dim1 + 1], &c__1, (ftnlen)12);
|
||||
}
|
||||
absakk = (r__1 = w[k + kw * w_dim1], dabs(r__1));
|
||||
if (k > 1) {
|
||||
i__1 = k - 1;
|
||||
imax = isamax_(&i__1, &w[kw * w_dim1 + 1], &c__1);
|
||||
colmax = (r__1 = w[imax + kw * w_dim1], dabs(r__1));
|
||||
} else {
|
||||
colmax = 0.f;
|
||||
}
|
||||
if (dmax(absakk,colmax) == 0.f) {
|
||||
if (*info == 0) {
|
||||
*info = k;
|
||||
}
|
||||
kp = k;
|
||||
scopy_(&k, &w[kw * w_dim1 + 1], &c__1, &a[k * a_dim1 + 1], &c__1);
|
||||
} else {
|
||||
if (! (absakk < alpha * colmax)) {
|
||||
kp = k;
|
||||
} else {
|
||||
done = FALSE_;
|
||||
L12:
|
||||
scopy_(&imax, &a[imax * a_dim1 + 1], &c__1, &w[(kw - 1) *
|
||||
w_dim1 + 1], &c__1);
|
||||
i__1 = k - imax;
|
||||
scopy_(&i__1, &a[imax + (imax + 1) * a_dim1], lda, &w[imax +
|
||||
1 + (kw - 1) * w_dim1], &c__1);
|
||||
if (k < *n) {
|
||||
i__1 = *n - k;
|
||||
sgemv_("No transpose", &k, &i__1, &c_b9, &a[(k + 1) *
|
||||
a_dim1 + 1], lda, &w[imax + (kw + 1) * w_dim1],
|
||||
ldw, &c_b10, &w[(kw - 1) * w_dim1 + 1], &c__1, (
|
||||
ftnlen)12);
|
||||
}
|
||||
if (imax != k) {
|
||||
i__1 = k - imax;
|
||||
jmax = imax + isamax_(&i__1, &w[imax + 1 + (kw - 1) *
|
||||
w_dim1], &c__1);
|
||||
rowmax = (r__1 = w[jmax + (kw - 1) * w_dim1], dabs(r__1));
|
||||
} else {
|
||||
rowmax = 0.f;
|
||||
}
|
||||
if (imax > 1) {
|
||||
i__1 = imax - 1;
|
||||
itemp = isamax_(&i__1, &w[(kw - 1) * w_dim1 + 1], &c__1);
|
||||
stemp = (r__1 = w[itemp + (kw - 1) * w_dim1], dabs(r__1));
|
||||
if (stemp > rowmax) {
|
||||
rowmax = stemp;
|
||||
jmax = itemp;
|
||||
}
|
||||
}
|
||||
if (! ((r__1 = w[imax + (kw - 1) * w_dim1], dabs(r__1)) <
|
||||
alpha * rowmax)) {
|
||||
kp = imax;
|
||||
scopy_(&k, &w[(kw - 1) * w_dim1 + 1], &c__1, &w[kw *
|
||||
w_dim1 + 1], &c__1);
|
||||
done = TRUE_;
|
||||
} else if (p == jmax || rowmax <= colmax) {
|
||||
kp = imax;
|
||||
kstep = 2;
|
||||
done = TRUE_;
|
||||
} else {
|
||||
p = imax;
|
||||
colmax = rowmax;
|
||||
imax = jmax;
|
||||
scopy_(&k, &w[(kw - 1) * w_dim1 + 1], &c__1, &w[kw *
|
||||
w_dim1 + 1], &c__1);
|
||||
}
|
||||
if (! done) {
|
||||
goto L12;
|
||||
}
|
||||
}
|
||||
kk = k - kstep + 1;
|
||||
kkw = *nb + kk - *n;
|
||||
if (kstep == 2 && p != k) {
|
||||
i__1 = k - p;
|
||||
scopy_(&i__1, &a[p + 1 + k * a_dim1], &c__1, &a[p + (p + 1) *
|
||||
a_dim1], lda);
|
||||
scopy_(&p, &a[k * a_dim1 + 1], &c__1, &a[p * a_dim1 + 1], &
|
||||
c__1);
|
||||
i__1 = *n - k + 1;
|
||||
sswap_(&i__1, &a[k + k * a_dim1], lda, &a[p + k * a_dim1],
|
||||
lda);
|
||||
i__1 = *n - kk + 1;
|
||||
sswap_(&i__1, &w[k + kkw * w_dim1], ldw, &w[p + kkw * w_dim1],
|
||||
ldw);
|
||||
}
|
||||
if (kp != kk) {
|
||||
a[kp + k * a_dim1] = a[kk + k * a_dim1];
|
||||
i__1 = k - 1 - kp;
|
||||
scopy_(&i__1, &a[kp + 1 + kk * a_dim1], &c__1, &a[kp + (kp +
|
||||
1) * a_dim1], lda);
|
||||
scopy_(&kp, &a[kk * a_dim1 + 1], &c__1, &a[kp * a_dim1 + 1], &
|
||||
c__1);
|
||||
i__1 = *n - kk + 1;
|
||||
sswap_(&i__1, &a[kk + kk * a_dim1], lda, &a[kp + kk * a_dim1],
|
||||
lda);
|
||||
i__1 = *n - kk + 1;
|
||||
sswap_(&i__1, &w[kk + kkw * w_dim1], ldw, &w[kp + kkw *
|
||||
w_dim1], ldw);
|
||||
}
|
||||
if (kstep == 1) {
|
||||
scopy_(&k, &w[kw * w_dim1 + 1], &c__1, &a[k * a_dim1 + 1], &
|
||||
c__1);
|
||||
if (k > 1) {
|
||||
if ((r__1 = a[k + k * a_dim1], dabs(r__1)) >= sfmin) {
|
||||
r1 = 1.f / a[k + k * a_dim1];
|
||||
i__1 = k - 1;
|
||||
sscal_(&i__1, &r1, &a[k * a_dim1 + 1], &c__1);
|
||||
} else if (a[k + k * a_dim1] != 0.f) {
|
||||
i__1 = k - 1;
|
||||
for (ii = 1; ii <= i__1; ++ii) {
|
||||
a[ii + k * a_dim1] /= a[k + k * a_dim1];
|
||||
/* L14: */
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if (k > 2) {
|
||||
d12 = w[k - 1 + kw * w_dim1];
|
||||
d11 = w[k + kw * w_dim1] / d12;
|
||||
d22 = w[k - 1 + (kw - 1) * w_dim1] / d12;
|
||||
t = 1.f / (d11 * d22 - 1.f);
|
||||
i__1 = k - 2;
|
||||
for (j = 1; j <= i__1; ++j) {
|
||||
a[j + (k - 1) * a_dim1] = t * ((d11 * w[j + (kw - 1) *
|
||||
w_dim1] - w[j + kw * w_dim1]) / d12);
|
||||
a[j + k * a_dim1] = t * ((d22 * w[j + kw * w_dim1] -
|
||||
w[j + (kw - 1) * w_dim1]) / d12);
|
||||
/* L20: */
|
||||
}
|
||||
}
|
||||
a[k - 1 + (k - 1) * a_dim1] = w[k - 1 + (kw - 1) * w_dim1];
|
||||
a[k - 1 + k * a_dim1] = w[k - 1 + kw * w_dim1];
|
||||
a[k + k * a_dim1] = w[k + kw * w_dim1];
|
||||
}
|
||||
}
|
||||
if (kstep == 1) {
|
||||
ipiv[k] = kp;
|
||||
} else {
|
||||
ipiv[k] = -p;
|
||||
ipiv[k - 1] = -kp;
|
||||
}
|
||||
k -= kstep;
|
||||
goto L10;
|
||||
L30:
|
||||
j = k + 1;
|
||||
L60:
|
||||
kstep = 1;
|
||||
jp1 = 1;
|
||||
jj = j;
|
||||
jp2 = ipiv[j];
|
||||
if (jp2 < 0) {
|
||||
jp2 = -jp2;
|
||||
++j;
|
||||
jp1 = -ipiv[j];
|
||||
kstep = 2;
|
||||
}
|
||||
++j;
|
||||
if (jp2 != jj && j <= *n) {
|
||||
i__1 = *n - j + 1;
|
||||
sswap_(&i__1, &a[jp2 + j * a_dim1], lda, &a[jj + j * a_dim1], lda)
|
||||
;
|
||||
}
|
||||
jj = j - 1;
|
||||
if (jp1 != jj && kstep == 2) {
|
||||
i__1 = *n - j + 1;
|
||||
sswap_(&i__1, &a[jp1 + j * a_dim1], lda, &a[jj + j * a_dim1], lda)
|
||||
;
|
||||
}
|
||||
if (j <= *n) {
|
||||
goto L60;
|
||||
}
|
||||
*kb = *n - k;
|
||||
} else {
|
||||
k = 1;
|
||||
L70:
|
||||
if ((k >= *nb && *nb < *n) || k > *n) {
|
||||
goto L90;
|
||||
}
|
||||
kstep = 1;
|
||||
p = k;
|
||||
i__1 = *n - k + 1;
|
||||
scopy_(&i__1, &a[k + k * a_dim1], &c__1, &w[k + k * w_dim1], &c__1);
|
||||
if (k > 1) {
|
||||
i__1 = *n - k + 1;
|
||||
i__2 = k - 1;
|
||||
sgemv_("No transpose", &i__1, &i__2, &c_b9, &a[k + a_dim1], lda, &
|
||||
w[k + w_dim1], ldw, &c_b10, &w[k + k * w_dim1], &c__1, (
|
||||
ftnlen)12);
|
||||
}
|
||||
absakk = (r__1 = w[k + k * w_dim1], dabs(r__1));
|
||||
if (k < *n) {
|
||||
i__1 = *n - k;
|
||||
imax = k + isamax_(&i__1, &w[k + 1 + k * w_dim1], &c__1);
|
||||
colmax = (r__1 = w[imax + k * w_dim1], dabs(r__1));
|
||||
} else {
|
||||
colmax = 0.f;
|
||||
}
|
||||
if (dmax(absakk,colmax) == 0.f) {
|
||||
if (*info == 0) {
|
||||
*info = k;
|
||||
}
|
||||
kp = k;
|
||||
i__1 = *n - k + 1;
|
||||
scopy_(&i__1, &w[k + k * w_dim1], &c__1, &a[k + k * a_dim1], &
|
||||
c__1);
|
||||
} else {
|
||||
if (! (absakk < alpha * colmax)) {
|
||||
kp = k;
|
||||
} else {
|
||||
done = FALSE_;
|
||||
L72:
|
||||
i__1 = imax - k;
|
||||
scopy_(&i__1, &a[imax + k * a_dim1], lda, &w[k + (k + 1) *
|
||||
w_dim1], &c__1);
|
||||
i__1 = *n - imax + 1;
|
||||
scopy_(&i__1, &a[imax + imax * a_dim1], &c__1, &w[imax + (k +
|
||||
1) * w_dim1], &c__1);
|
||||
if (k > 1) {
|
||||
i__1 = *n - k + 1;
|
||||
i__2 = k - 1;
|
||||
sgemv_("No transpose", &i__1, &i__2, &c_b9, &a[k + a_dim1]
|
||||
, lda, &w[imax + w_dim1], ldw, &c_b10, &w[k + (k
|
||||
+ 1) * w_dim1], &c__1, (ftnlen)12);
|
||||
}
|
||||
if (imax != k) {
|
||||
i__1 = imax - k;
|
||||
jmax = k - 1 + isamax_(&i__1, &w[k + (k + 1) * w_dim1], &
|
||||
c__1);
|
||||
rowmax = (r__1 = w[jmax + (k + 1) * w_dim1], dabs(r__1));
|
||||
} else {
|
||||
rowmax = 0.f;
|
||||
}
|
||||
if (imax < *n) {
|
||||
i__1 = *n - imax;
|
||||
itemp = imax + isamax_(&i__1, &w[imax + 1 + (k + 1) *
|
||||
w_dim1], &c__1);
|
||||
stemp = (r__1 = w[itemp + (k + 1) * w_dim1], dabs(r__1));
|
||||
if (stemp > rowmax) {
|
||||
rowmax = stemp;
|
||||
jmax = itemp;
|
||||
}
|
||||
}
|
||||
if (! ((r__1 = w[imax + (k + 1) * w_dim1], dabs(r__1)) <
|
||||
alpha * rowmax)) {
|
||||
kp = imax;
|
||||
i__1 = *n - k + 1;
|
||||
scopy_(&i__1, &w[k + (k + 1) * w_dim1], &c__1, &w[k + k *
|
||||
w_dim1], &c__1);
|
||||
done = TRUE_;
|
||||
} else if (p == jmax || rowmax <= colmax) {
|
||||
kp = imax;
|
||||
kstep = 2;
|
||||
done = TRUE_;
|
||||
} else {
|
||||
p = imax;
|
||||
colmax = rowmax;
|
||||
imax = jmax;
|
||||
i__1 = *n - k + 1;
|
||||
scopy_(&i__1, &w[k + (k + 1) * w_dim1], &c__1, &w[k + k *
|
||||
w_dim1], &c__1);
|
||||
}
|
||||
if (! done) {
|
||||
goto L72;
|
||||
}
|
||||
}
|
||||
kk = k + kstep - 1;
|
||||
if (kstep == 2 && p != k) {
|
||||
i__1 = p - k;
|
||||
scopy_(&i__1, &a[k + k * a_dim1], &c__1, &a[p + k * a_dim1],
|
||||
lda);
|
||||
i__1 = *n - p + 1;
|
||||
scopy_(&i__1, &a[p + k * a_dim1], &c__1, &a[p + p * a_dim1], &
|
||||
c__1);
|
||||
sswap_(&k, &a[k + a_dim1], lda, &a[p + a_dim1], lda);
|
||||
sswap_(&kk, &w[k + w_dim1], ldw, &w[p + w_dim1], ldw);
|
||||
}
|
||||
if (kp != kk) {
|
||||
a[kp + k * a_dim1] = a[kk + k * a_dim1];
|
||||
i__1 = kp - k - 1;
|
||||
scopy_(&i__1, &a[k + 1 + kk * a_dim1], &c__1, &a[kp + (k + 1)
|
||||
* a_dim1], lda);
|
||||
i__1 = *n - kp + 1;
|
||||
scopy_(&i__1, &a[kp + kk * a_dim1], &c__1, &a[kp + kp *
|
||||
a_dim1], &c__1);
|
||||
sswap_(&kk, &a[kk + a_dim1], lda, &a[kp + a_dim1], lda);
|
||||
sswap_(&kk, &w[kk + w_dim1], ldw, &w[kp + w_dim1], ldw);
|
||||
}
|
||||
if (kstep == 1) {
|
||||
i__1 = *n - k + 1;
|
||||
scopy_(&i__1, &w[k + k * w_dim1], &c__1, &a[k + k * a_dim1], &
|
||||
c__1);
|
||||
if (k < *n) {
|
||||
if ((r__1 = a[k + k * a_dim1], dabs(r__1)) >= sfmin) {
|
||||
r1 = 1.f / a[k + k * a_dim1];
|
||||
i__1 = *n - k;
|
||||
sscal_(&i__1, &r1, &a[k + 1 + k * a_dim1], &c__1);
|
||||
} else if (a[k + k * a_dim1] != 0.f) {
|
||||
i__1 = *n;
|
||||
for (ii = k + 1; ii <= i__1; ++ii) {
|
||||
a[ii + k * a_dim1] /= a[k + k * a_dim1];
|
||||
/* L74: */
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if (k < *n - 1) {
|
||||
d21 = w[k + 1 + k * w_dim1];
|
||||
d11 = w[k + 1 + (k + 1) * w_dim1] / d21;
|
||||
d22 = w[k + k * w_dim1] / d21;
|
||||
t = 1.f / (d11 * d22 - 1.f);
|
||||
i__1 = *n;
|
||||
for (j = k + 2; j <= i__1; ++j) {
|
||||
a[j + k * a_dim1] = t * ((d11 * w[j + k * w_dim1] - w[
|
||||
j + (k + 1) * w_dim1]) / d21);
|
||||
a[j + (k + 1) * a_dim1] = t * ((d22 * w[j + (k + 1) *
|
||||
w_dim1] - w[j + k * w_dim1]) / d21);
|
||||
/* L80: */
|
||||
}
|
||||
}
|
||||
a[k + k * a_dim1] = w[k + k * w_dim1];
|
||||
a[k + 1 + k * a_dim1] = w[k + 1 + k * w_dim1];
|
||||
a[k + 1 + (k + 1) * a_dim1] = w[k + 1 + (k + 1) * w_dim1];
|
||||
}
|
||||
}
|
||||
if (kstep == 1) {
|
||||
ipiv[k] = kp;
|
||||
} else {
|
||||
ipiv[k] = -p;
|
||||
ipiv[k + 1] = -kp;
|
||||
}
|
||||
k += kstep;
|
||||
goto L70;
|
||||
L90:
|
||||
j = k - 1;
|
||||
L120:
|
||||
kstep = 1;
|
||||
jp1 = 1;
|
||||
jj = j;
|
||||
jp2 = ipiv[j];
|
||||
if (jp2 < 0) {
|
||||
jp2 = -jp2;
|
||||
--j;
|
||||
jp1 = -ipiv[j];
|
||||
kstep = 2;
|
||||
}
|
||||
--j;
|
||||
if (jp2 != jj && j >= 1) {
|
||||
sswap_(&j, &a[jp2 + a_dim1], lda, &a[jj + a_dim1], lda);
|
||||
}
|
||||
jj = j + 1;
|
||||
if (jp1 != jj && kstep == 2) {
|
||||
sswap_(&j, &a[jp1 + a_dim1], lda, &a[jj + a_dim1], lda);
|
||||
}
|
||||
if (j >= 1) {
|
||||
goto L120;
|
||||
}
|
||||
*kb = k - 1;
|
||||
}
|
||||
return;
|
||||
}
|
|
@ -0,0 +1,274 @@
|
|||
#include "relapack.h"
|
||||
#include <math.h>
|
||||
|
||||
static void RELAPACK_stgsyl_rec(const char *, const int *, const int *,
|
||||
const int *, const float *, const int *, const float *, const int *,
|
||||
float *, const int *, const float *, const int *, const float *,
|
||||
const int *, float *, const int *, float *, float *, float *, int *, int *,
|
||||
int *);
|
||||
|
||||
|
||||
/** STGSYL solves the generalized Sylvester equation.
|
||||
*
|
||||
* This routine is functionally equivalent to LAPACK's stgsyl.
|
||||
* For details on its interface, see
|
||||
* http://www.netlib.org/lapack/explore-html/dc/d67/stgsyl_8f.html
|
||||
* */
|
||||
void RELAPACK_stgsyl(
|
||||
const char *trans, const int *ijob, const int *m, const int *n,
|
||||
const float *A, const int *ldA, const float *B, const int *ldB,
|
||||
float *C, const int *ldC,
|
||||
const float *D, const int *ldD, const float *E, const int *ldE,
|
||||
float *F, const int *ldF,
|
||||
float *scale, float *dif,
|
||||
float *Work, const int *lWork, int *iWork, int *info
|
||||
) {
|
||||
|
||||
// Parse arguments
|
||||
const int notran = LAPACK(lsame)(trans, "N");
|
||||
const int tran = LAPACK(lsame)(trans, "T");
|
||||
|
||||
// Compute work buffer size
|
||||
int lwmin = 1;
|
||||
if (notran && (*ijob == 1 || *ijob == 2))
|
||||
lwmin = MAX(1, 2 * *m * *n);
|
||||
*info = 0;
|
||||
|
||||
// Check arguments
|
||||
if (!tran && !notran)
|
||||
*info = -1;
|
||||
else if (notran && (*ijob < 0 || *ijob > 4))
|
||||
*info = -2;
|
||||
else if (*m <= 0)
|
||||
*info = -3;
|
||||
else if (*n <= 0)
|
||||
*info = -4;
|
||||
else if (*ldA < MAX(1, *m))
|
||||
*info = -6;
|
||||
else if (*ldB < MAX(1, *n))
|
||||
*info = -8;
|
||||
else if (*ldC < MAX(1, *m))
|
||||
*info = -10;
|
||||
else if (*ldD < MAX(1, *m))
|
||||
*info = -12;
|
||||
else if (*ldE < MAX(1, *n))
|
||||
*info = -14;
|
||||
else if (*ldF < MAX(1, *m))
|
||||
*info = -16;
|
||||
else if (*lWork < lwmin && *lWork != -1)
|
||||
*info = -20;
|
||||
if (*info) {
|
||||
const int minfo = -*info;
|
||||
LAPACK(xerbla)("STGSYL", &minfo);
|
||||
return;
|
||||
}
|
||||
|
||||
if (*lWork == -1) {
|
||||
// Work size query
|
||||
*Work = lwmin;
|
||||
return;
|
||||
}
|
||||
|
||||
// Clean char * arguments
|
||||
const char cleantrans = notran ? 'N' : 'T';
|
||||
|
||||
// Constant
|
||||
const float ZERO[] = { 0. };
|
||||
|
||||
int isolve = 1;
|
||||
int ifunc = 0;
|
||||
if (notran) {
|
||||
if (*ijob >= 3) {
|
||||
ifunc = *ijob - 2;
|
||||
LAPACK(slaset)("F", m, n, ZERO, ZERO, C, ldC);
|
||||
LAPACK(slaset)("F", m, n, ZERO, ZERO, F, ldF);
|
||||
} else if (*ijob >= 1)
|
||||
isolve = 2;
|
||||
}
|
||||
|
||||
float scale2;
|
||||
int iround;
|
||||
for (iround = 1; iround <= isolve; iround++) {
|
||||
*scale = 1;
|
||||
float dscale = 0;
|
||||
float dsum = 1;
|
||||
int pq;
|
||||
RELAPACK_stgsyl_rec(&cleantrans, &ifunc, m, n, A, ldA, B, ldB, C, ldC, D, ldD, E, ldE, F, ldF, scale, &dsum, &dscale, iWork, &pq, info);
|
||||
if (dscale != 0) {
|
||||
if (*ijob == 1 || *ijob == 3)
|
||||
*dif = sqrt(2 * *m * *n) / (dscale * sqrt(dsum));
|
||||
else
|
||||
*dif = sqrt(pq) / (dscale * sqrt(dsum));
|
||||
}
|
||||
if (isolve == 2) {
|
||||
if (iround == 1) {
|
||||
if (notran)
|
||||
ifunc = *ijob;
|
||||
scale2 = *scale;
|
||||
LAPACK(slacpy)("F", m, n, C, ldC, Work, m);
|
||||
LAPACK(slacpy)("F", m, n, F, ldF, Work + *m * *n, m);
|
||||
LAPACK(slaset)("F", m, n, ZERO, ZERO, C, ldC);
|
||||
LAPACK(slaset)("F", m, n, ZERO, ZERO, F, ldF);
|
||||
} else {
|
||||
LAPACK(slacpy)("F", m, n, Work, m, C, ldC);
|
||||
LAPACK(slacpy)("F", m, n, Work + *m * *n, m, F, ldF);
|
||||
*scale = scale2;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/** stgsyl's recursive vompute kernel */
|
||||
static void RELAPACK_stgsyl_rec(
|
||||
const char *trans, const int *ifunc, const int *m, const int *n,
|
||||
const float *A, const int *ldA, const float *B, const int *ldB,
|
||||
float *C, const int *ldC,
|
||||
const float *D, const int *ldD, const float *E, const int *ldE,
|
||||
float *F, const int *ldF,
|
||||
float *scale, float *dsum, float *dscale,
|
||||
int *iWork, int *pq, int *info
|
||||
) {
|
||||
|
||||
if (*m <= MAX(CROSSOVER_STGSYL, 1) && *n <= MAX(CROSSOVER_STGSYL, 1)) {
|
||||
// Unblocked
|
||||
LAPACK(stgsy2)(trans, ifunc, m, n, A, ldA, B, ldB, C, ldC, D, ldD, E, ldE, F, ldF, scale, dsum, dscale, iWork, pq, info);
|
||||
return;
|
||||
}
|
||||
|
||||
// Constants
|
||||
const float ONE[] = { 1. };
|
||||
const float MONE[] = { -1. };
|
||||
const int iONE[] = { 1 };
|
||||
|
||||
// Outputs
|
||||
float scale1[] = { 1. };
|
||||
float scale2[] = { 1. };
|
||||
int info1[] = { 0 };
|
||||
int info2[] = { 0 };
|
||||
|
||||
if (*m > *n) {
|
||||
// Splitting
|
||||
int m1 = SREC_SPLIT(*m);
|
||||
if (A[m1 + *ldA * (m1 - 1)])
|
||||
m1++;
|
||||
const int m2 = *m - m1;
|
||||
|
||||
// A_TL A_TR
|
||||
// 0 A_BR
|
||||
const float *const A_TL = A;
|
||||
const float *const A_TR = A + *ldA * m1;
|
||||
const float *const A_BR = A + *ldA * m1 + m1;
|
||||
|
||||
// C_T
|
||||
// C_B
|
||||
float *const C_T = C;
|
||||
float *const C_B = C + m1;
|
||||
|
||||
// D_TL D_TR
|
||||
// 0 D_BR
|
||||
const float *const D_TL = D;
|
||||
const float *const D_TR = D + *ldD * m1;
|
||||
const float *const D_BR = D + *ldD * m1 + m1;
|
||||
|
||||
// F_T
|
||||
// F_B
|
||||
float *const F_T = F;
|
||||
float *const F_B = F + m1;
|
||||
|
||||
if (*trans == 'N') {
|
||||
// recursion(A_BR, B, C_B, D_BR, E, F_B)
|
||||
RELAPACK_stgsyl_rec(trans, ifunc, &m2, n, A_BR, ldA, B, ldB, C_B, ldC, D_BR, ldD, E, ldE, F_B, ldF, scale1, dsum, dscale, iWork, pq, info1);
|
||||
// C_T = C_T - A_TR * C_B
|
||||
BLAS(sgemm)("N", "N", &m1, n, &m2, MONE, A_TR, ldA, C_B, ldC, scale1, C_T, ldC);
|
||||
// F_T = F_T - D_TR * C_B
|
||||
BLAS(sgemm)("N", "N", &m1, n, &m2, MONE, D_TR, ldD, C_B, ldC, scale1, F_T, ldF);
|
||||
// recursion(A_TL, B, C_T, D_TL, E, F_T)
|
||||
RELAPACK_stgsyl_rec(trans, ifunc, &m1, n, A_TL, ldA, B, ldB, C_T, ldC, D_TL, ldD, E, ldE, F_T, ldF, scale2, dsum, dscale, iWork, pq, info2);
|
||||
// apply scale
|
||||
if (scale2[0] != 1) {
|
||||
LAPACK(slascl)("G", iONE, iONE, ONE, scale2, &m2, n, C_B, ldC, info);
|
||||
LAPACK(slascl)("G", iONE, iONE, ONE, scale2, &m2, n, F_B, ldF, info);
|
||||
}
|
||||
} else {
|
||||
// recursion(A_TL, B, C_T, D_TL, E, F_T)
|
||||
RELAPACK_stgsyl_rec(trans, ifunc, &m1, n, A_TL, ldA, B, ldB, C_T, ldC, D_TL, ldD, E, ldE, F_T, ldF, scale1, dsum, dscale, iWork, pq, info1);
|
||||
// apply scale
|
||||
if (scale1[0] != 1)
|
||||
LAPACK(slascl)("G", iONE, iONE, ONE, scale1, &m2, n, F_B, ldF, info);
|
||||
// C_B = C_B - A_TR^H * C_T
|
||||
BLAS(sgemm)("T", "N", &m2, n, &m1, MONE, A_TR, ldA, C_T, ldC, scale1, C_B, ldC);
|
||||
// C_B = C_B - D_TR^H * F_T
|
||||
BLAS(sgemm)("T", "N", &m2, n, &m1, MONE, D_TR, ldD, F_T, ldC, ONE, C_B, ldC);
|
||||
// recursion(A_BR, B, C_B, D_BR, E, F_B)
|
||||
RELAPACK_stgsyl_rec(trans, ifunc, &m2, n, A_BR, ldA, B, ldB, C_B, ldC, D_BR, ldD, E, ldE, F_B, ldF, scale2, dsum, dscale, iWork, pq, info2);
|
||||
// apply scale
|
||||
if (scale2[0] != 1) {
|
||||
LAPACK(slascl)("G", iONE, iONE, ONE, scale2, &m1, n, C_T, ldC, info);
|
||||
LAPACK(slascl)("G", iONE, iONE, ONE, scale2, &m1, n, F_T, ldF, info);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// Splitting
|
||||
int n1 = SREC_SPLIT(*n);
|
||||
if (B[n1 + *ldB * (n1 - 1)])
|
||||
n1++;
|
||||
const int n2 = *n - n1;
|
||||
|
||||
// B_TL B_TR
|
||||
// 0 B_BR
|
||||
const float *const B_TL = B;
|
||||
const float *const B_TR = B + *ldB * n1;
|
||||
const float *const B_BR = B + *ldB * n1 + n1;
|
||||
|
||||
// C_L C_R
|
||||
float *const C_L = C;
|
||||
float *const C_R = C + *ldC * n1;
|
||||
|
||||
// E_TL E_TR
|
||||
// 0 E_BR
|
||||
const float *const E_TL = E;
|
||||
const float *const E_TR = E + *ldE * n1;
|
||||
const float *const E_BR = E + *ldE * n1 + n1;
|
||||
|
||||
// F_L F_R
|
||||
float *const F_L = F;
|
||||
float *const F_R = F + *ldF * n1;
|
||||
|
||||
if (*trans == 'N') {
|
||||
// recursion(A, B_TL, C_L, D, E_TL, F_L)
|
||||
RELAPACK_stgsyl_rec(trans, ifunc, m, &n1, A, ldA, B_TL, ldB, C_L, ldC, D, ldD, E_TL, ldE, F_L, ldF, scale1, dsum, dscale, iWork, pq, info1);
|
||||
// C_R = C_R + F_L * B_TR
|
||||
BLAS(sgemm)("N", "N", m, &n2, &n1, ONE, F_L, ldF, B_TR, ldB, scale1, C_R, ldC);
|
||||
// F_R = F_R + F_L * E_TR
|
||||
BLAS(sgemm)("N", "N", m, &n2, &n1, ONE, F_L, ldF, E_TR, ldE, scale1, F_R, ldF);
|
||||
// recursion(A, B_BR, C_R, D, E_BR, F_R)
|
||||
RELAPACK_stgsyl_rec(trans, ifunc, m, &n2, A, ldA, B_BR, ldB, C_R, ldC, D, ldD, E_BR, ldE, F_R, ldF, scale2, dsum, dscale, iWork, pq, info2);
|
||||
// apply scale
|
||||
if (scale2[0] != 1) {
|
||||
LAPACK(slascl)("G", iONE, iONE, ONE, scale2, m, &n1, C_L, ldC, info);
|
||||
LAPACK(slascl)("G", iONE, iONE, ONE, scale2, m, &n1, F_L, ldF, info);
|
||||
}
|
||||
} else {
|
||||
// recursion(A, B_BR, C_R, D, E_BR, F_R)
|
||||
RELAPACK_stgsyl_rec(trans, ifunc, m, &n2, A, ldA, B_BR, ldB, C_R, ldC, D, ldD, E_BR, ldE, F_R, ldF, scale1, dsum, dscale, iWork, pq, info1);
|
||||
// apply scale
|
||||
if (scale1[0] != 1)
|
||||
LAPACK(slascl)("G", iONE, iONE, ONE, scale1, m, &n1, C_L, ldC, info);
|
||||
// F_L = F_L + C_R * B_TR
|
||||
BLAS(sgemm)("N", "T", m, &n1, &n2, ONE, C_R, ldC, B_TR, ldB, scale1, F_L, ldF);
|
||||
// F_L = F_L + F_R * E_TR
|
||||
BLAS(sgemm)("N", "T", m, &n1, &n2, ONE, F_R, ldF, E_TR, ldB, ONE, F_L, ldF);
|
||||
// recursion(A, B_TL, C_L, D, E_TL, F_L)
|
||||
RELAPACK_stgsyl_rec(trans, ifunc, m, &n1, A, ldA, B_TL, ldB, C_L, ldC, D, ldD, E_TL, ldE, F_L, ldF, scale2, dsum, dscale, iWork, pq, info2);
|
||||
// apply scale
|
||||
if (scale2[0] != 1) {
|
||||
LAPACK(slascl)("G", iONE, iONE, ONE, scale2, m, &n2, C_R, ldC, info);
|
||||
LAPACK(slascl)("G", iONE, iONE, ONE, scale2, m, &n2, F_R, ldF, info);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
*scale = scale1[0] * scale2[0];
|
||||
*info = info1[0] || info2[0];
|
||||
}
|
|
@ -0,0 +1,169 @@
|
|||
#include "relapack.h"
|
||||
|
||||
static void RELAPACK_strsyl_rec(const char *, const char *, const int *,
|
||||
const int *, const int *, const float *, const int *, const float *,
|
||||
const int *, float *, const int *, float *, int *);
|
||||
|
||||
|
||||
/** STRSYL solves the real Sylvester matrix equation.
|
||||
*
|
||||
* This routine is functionally equivalent to LAPACK's strsyl.
|
||||
* For details on its interface, see
|
||||
* http://www.netlib.org/lapack/explore-html/d4/d7d/strsyl_8f.html
|
||||
* */
|
||||
void RELAPACK_strsyl(
|
||||
const char *tranA, const char *tranB, const int *isgn,
|
||||
const int *m, const int *n,
|
||||
const float *A, const int *ldA, const float *B, const int *ldB,
|
||||
float *C, const int *ldC, float *scale,
|
||||
int *info
|
||||
) {
|
||||
|
||||
// Check arguments
|
||||
const int notransA = LAPACK(lsame)(tranA, "N");
|
||||
const int transA = LAPACK(lsame)(tranA, "T");
|
||||
const int ctransA = LAPACK(lsame)(tranA, "C");
|
||||
const int notransB = LAPACK(lsame)(tranB, "N");
|
||||
const int transB = LAPACK(lsame)(tranB, "T");
|
||||
const int ctransB = LAPACK(lsame)(tranB, "C");
|
||||
*info = 0;
|
||||
if (!transA && !ctransA && !notransA)
|
||||
*info = -1;
|
||||
else if (!transB && !ctransB && !notransB)
|
||||
*info = -2;
|
||||
else if (*isgn != 1 && *isgn != -1)
|
||||
*info = -3;
|
||||
else if (*m < 0)
|
||||
*info = -4;
|
||||
else if (*n < 0)
|
||||
*info = -5;
|
||||
else if (*ldA < MAX(1, *m))
|
||||
*info = -7;
|
||||
else if (*ldB < MAX(1, *n))
|
||||
*info = -9;
|
||||
else if (*ldC < MAX(1, *m))
|
||||
*info = -11;
|
||||
if (*info) {
|
||||
const int minfo = -*info;
|
||||
LAPACK(xerbla)("STRSYL", &minfo);
|
||||
return;
|
||||
}
|
||||
|
||||
// Clean char * arguments
|
||||
const char cleantranA = notransA ? 'N' : (transA ? 'T' : 'C');
|
||||
const char cleantranB = notransB ? 'N' : (transB ? 'T' : 'C');
|
||||
|
||||
// Recursive kernel
|
||||
RELAPACK_strsyl_rec(&cleantranA, &cleantranB, isgn, m, n, A, ldA, B, ldB, C, ldC, scale, info);
|
||||
}
|
||||
|
||||
|
||||
/** strsyl's recursive compute kernel */
|
||||
static void RELAPACK_strsyl_rec(
|
||||
const char *tranA, const char *tranB, const int *isgn,
|
||||
const int *m, const int *n,
|
||||
const float *A, const int *ldA, const float *B, const int *ldB,
|
||||
float *C, const int *ldC, float *scale,
|
||||
int *info
|
||||
) {
|
||||
|
||||
if (*m <= MAX(CROSSOVER_STRSYL, 1) && *n <= MAX(CROSSOVER_STRSYL, 1)) {
|
||||
// Unblocked
|
||||
RELAPACK_strsyl_rec2(tranA, tranB, isgn, m, n, A, ldA, B, ldB, C, ldC, scale, info);
|
||||
return;
|
||||
}
|
||||
|
||||
// Constants
|
||||
const float ONE[] = { 1. };
|
||||
const float MONE[] = { -1. };
|
||||
const float MSGN[] = { -*isgn };
|
||||
const int iONE[] = { 1 };
|
||||
|
||||
// Outputs
|
||||
float scale1[] = { 1. };
|
||||
float scale2[] = { 1. };
|
||||
int info1[] = { 0 };
|
||||
int info2[] = { 0 };
|
||||
|
||||
if (*m > *n) {
|
||||
// Splitting
|
||||
int m1 = SREC_SPLIT(*m);
|
||||
if (A[m1 + *ldA * (m1 - 1)])
|
||||
m1++;
|
||||
const int m2 = *m - m1;
|
||||
|
||||
// A_TL A_TR
|
||||
// 0 A_BR
|
||||
const float *const A_TL = A;
|
||||
const float *const A_TR = A + *ldA * m1;
|
||||
const float *const A_BR = A + *ldA * m1 + m1;
|
||||
|
||||
// C_T
|
||||
// C_B
|
||||
float *const C_T = C;
|
||||
float *const C_B = C + m1;
|
||||
|
||||
if (*tranA == 'N') {
|
||||
// recusion(A_BR, B, C_B)
|
||||
RELAPACK_strsyl_rec(tranA, tranB, isgn, &m2, n, A_BR, ldA, B, ldB, C_B, ldC, scale1, info1);
|
||||
// C_T = C_T - A_TR * C_B
|
||||
BLAS(sgemm)("N", "N", &m1, n, &m2, MONE, A_TR, ldA, C_B, ldC, scale1, C_T, ldC);
|
||||
// recusion(A_TL, B, C_T)
|
||||
RELAPACK_strsyl_rec(tranA, tranB, isgn, &m1, n, A_TL, ldA, B, ldB, C_T, ldC, scale2, info2);
|
||||
// apply scale
|
||||
if (scale2[0] != 1)
|
||||
LAPACK(slascl)("G", iONE, iONE, ONE, scale2, &m2, n, C_B, ldC, info);
|
||||
} else {
|
||||
// recusion(A_TL, B, C_T)
|
||||
RELAPACK_strsyl_rec(tranA, tranB, isgn, &m1, n, A_TL, ldA, B, ldB, C_T, ldC, scale1, info1);
|
||||
// C_B = C_B - A_TR' * C_T
|
||||
BLAS(sgemm)("C", "N", &m2, n, &m1, MONE, A_TR, ldA, C_T, ldC, scale1, C_B, ldC);
|
||||
// recusion(A_BR, B, C_B)
|
||||
RELAPACK_strsyl_rec(tranA, tranB, isgn, &m2, n, A_BR, ldA, B, ldB, C_B, ldC, scale2, info2);
|
||||
// apply scale
|
||||
if (scale2[0] != 1)
|
||||
LAPACK(slascl)("G", iONE, iONE, ONE, scale2, &m1, n, C_B, ldC, info);
|
||||
}
|
||||
} else {
|
||||
// Splitting
|
||||
int n1 = SREC_SPLIT(*n);
|
||||
if (B[n1 + *ldB * (n1 - 1)])
|
||||
n1++;
|
||||
const int n2 = *n - n1;
|
||||
|
||||
// B_TL B_TR
|
||||
// 0 B_BR
|
||||
const float *const B_TL = B;
|
||||
const float *const B_TR = B + *ldB * n1;
|
||||
const float *const B_BR = B + *ldB * n1 + n1;
|
||||
|
||||
// C_L C_R
|
||||
float *const C_L = C;
|
||||
float *const C_R = C + *ldC * n1;
|
||||
|
||||
if (*tranB == 'N') {
|
||||
// recusion(A, B_TL, C_L)
|
||||
RELAPACK_strsyl_rec(tranA, tranB, isgn, m, &n1, A, ldA, B_TL, ldB, C_L, ldC, scale1, info1);
|
||||
// C_R = C_R -/+ C_L * B_TR
|
||||
BLAS(sgemm)("N", "N", m, &n2, &n1, MSGN, C_L, ldC, B_TR, ldB, scale1, C_R, ldC);
|
||||
// recusion(A, B_BR, C_R)
|
||||
RELAPACK_strsyl_rec(tranA, tranB, isgn, m, &n2, A, ldA, B_BR, ldB, C_R, ldC, scale2, info2);
|
||||
// apply scale
|
||||
if (scale2[0] != 1)
|
||||
LAPACK(slascl)("G", iONE, iONE, ONE, scale2, m, &n1, C_L, ldC, info);
|
||||
} else {
|
||||
// recusion(A, B_BR, C_R)
|
||||
RELAPACK_strsyl_rec(tranA, tranB, isgn, m, &n2, A, ldA, B_BR, ldB, C_R, ldC, scale1, info1);
|
||||
// C_L = C_L -/+ C_R * B_TR'
|
||||
BLAS(sgemm)("N", "C", m, &n1, &n2, MSGN, C_R, ldC, B_TR, ldB, scale1, C_L, ldC);
|
||||
// recusion(A, B_TL, C_L)
|
||||
RELAPACK_strsyl_rec(tranA, tranB, isgn, m, &n1, A, ldA, B_TL, ldB, C_L, ldC, scale2, info2);
|
||||
// apply scale
|
||||
if (scale2[0] != 1)
|
||||
LAPACK(slascl)("G", iONE, iONE, ONE, scale2, m, &n2, C_R, ldC, info);
|
||||
}
|
||||
}
|
||||
|
||||
*scale = scale1[0] * scale2[0];
|
||||
*info = info1[0] || info2[0];
|
||||
}
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,107 @@
|
|||
#include "relapack.h"
|
||||
|
||||
static void RELAPACK_strtri_rec(const char *, const char *, const int *,
|
||||
float *, const int *, int *);
|
||||
|
||||
|
||||
/** CTRTRI computes the inverse of a real upper or lower triangular matrix A.
|
||||
*
|
||||
* This routine is functionally equivalent to LAPACK's strtri.
|
||||
* For details on its interface, see
|
||||
* http://www.netlib.org/lapack/explore-html/de/d76/strtri_8f.html
|
||||
* */
|
||||
void RELAPACK_strtri(
|
||||
const char *uplo, const char *diag, const int *n,
|
||||
float *A, const int *ldA,
|
||||
int *info
|
||||
) {
|
||||
|
||||
// Check arguments
|
||||
const int lower = LAPACK(lsame)(uplo, "L");
|
||||
const int upper = LAPACK(lsame)(uplo, "U");
|
||||
const int nounit = LAPACK(lsame)(diag, "N");
|
||||
const int unit = LAPACK(lsame)(diag, "U");
|
||||
*info = 0;
|
||||
if (!lower && !upper)
|
||||
*info = -1;
|
||||
else if (!nounit && !unit)
|
||||
*info = -2;
|
||||
else if (*n < 0)
|
||||
*info = -3;
|
||||
else if (*ldA < MAX(1, *n))
|
||||
*info = -5;
|
||||
if (*info) {
|
||||
const int minfo = -*info;
|
||||
LAPACK(xerbla)("STRTRI", &minfo);
|
||||
return;
|
||||
}
|
||||
|
||||
// Clean char * arguments
|
||||
const char cleanuplo = lower ? 'L' : 'U';
|
||||
const char cleandiag = nounit ? 'N' : 'U';
|
||||
|
||||
// check for singularity
|
||||
if (nounit) {
|
||||
int i;
|
||||
for (i = 0; i < *n; i++)
|
||||
if (A[i + *ldA * i] == 0) {
|
||||
*info = i;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// Recursive kernel
|
||||
RELAPACK_strtri_rec(&cleanuplo, &cleandiag, n, A, ldA, info);
|
||||
}
|
||||
|
||||
|
||||
/** strtri's recursive compute kernel */
|
||||
static void RELAPACK_strtri_rec(
|
||||
const char *uplo, const char *diag, const int *n,
|
||||
float *A, const int *ldA,
|
||||
int *info
|
||||
){
|
||||
|
||||
if (*n <= MAX(CROSSOVER_STRTRI, 1)) {
|
||||
// Unblocked
|
||||
LAPACK(strti2)(uplo, diag, n, A, ldA, info);
|
||||
return;
|
||||
}
|
||||
|
||||
// Constants
|
||||
const float ONE[] = { 1. };
|
||||
const float MONE[] = { -1. };
|
||||
|
||||
// Splitting
|
||||
const int n1 = SREC_SPLIT(*n);
|
||||
const int n2 = *n - n1;
|
||||
|
||||
// A_TL A_TR
|
||||
// A_BL A_BR
|
||||
float *const A_TL = A;
|
||||
float *const A_TR = A + *ldA * n1;
|
||||
float *const A_BL = A + n1;
|
||||
float *const A_BR = A + *ldA * n1 + n1;
|
||||
|
||||
// recursion(A_TL)
|
||||
RELAPACK_strtri_rec(uplo, diag, &n1, A_TL, ldA, info);
|
||||
if (*info)
|
||||
return;
|
||||
|
||||
if (*uplo == 'L') {
|
||||
// A_BL = - A_BL * A_TL
|
||||
BLAS(strmm)("R", "L", "N", diag, &n2, &n1, MONE, A_TL, ldA, A_BL, ldA);
|
||||
// A_BL = A_BR \ A_BL
|
||||
BLAS(strsm)("L", "L", "N", diag, &n2, &n1, ONE, A_BR, ldA, A_BL, ldA);
|
||||
} else {
|
||||
// A_TR = - A_TL * A_TR
|
||||
BLAS(strmm)("L", "U", "N", diag, &n1, &n2, MONE, A_TL, ldA, A_TR, ldA);
|
||||
// A_TR = A_TR / A_BR
|
||||
BLAS(strsm)("R", "U", "N", diag, &n1, &n2, ONE, A_BR, ldA, A_TR, ldA);
|
||||
}
|
||||
|
||||
// recursion(A_BR)
|
||||
RELAPACK_strtri_rec(uplo, diag, &n2, A_BR, ldA, info);
|
||||
if (*info)
|
||||
*info += n1;
|
||||
}
|
|
@ -0,0 +1,230 @@
|
|||
#include "relapack.h"
|
||||
#include "stdlib.h"
|
||||
|
||||
static void RELAPACK_zgbtrf_rec(const int *, const int *, const int *,
|
||||
const int *, double *, const int *, int *, double *, const int *, double *,
|
||||
const int *, int *);
|
||||
|
||||
|
||||
/** ZGBTRF computes an LU factorization of a complex m-by-n band matrix A using partial pivoting with row interchanges.
|
||||
*
|
||||
* This routine is functionally equivalent to LAPACK's zgbtrf.
|
||||
* For details on its interface, see
|
||||
* http://www.netlib.org/lapack/explore-html/dc/dcb/zgbtrf_8f.html
|
||||
* */
|
||||
void RELAPACK_zgbtrf(
|
||||
const int *m, const int *n, const int *kl, const int *ku,
|
||||
double *Ab, const int *ldAb, int *ipiv,
|
||||
int *info
|
||||
) {
|
||||
|
||||
// Check arguments
|
||||
*info = 0;
|
||||
if (*m < 0)
|
||||
*info = -1;
|
||||
else if (*n < 0)
|
||||
*info = -2;
|
||||
else if (*kl < 0)
|
||||
*info = -3;
|
||||
else if (*ku < 0)
|
||||
*info = -4;
|
||||
else if (*ldAb < 2 * *kl + *ku + 1)
|
||||
*info = -6;
|
||||
if (*info) {
|
||||
const int minfo = -*info;
|
||||
LAPACK(xerbla)("ZGBTRF", &minfo);
|
||||
return;
|
||||
}
|
||||
|
||||
// Constant
|
||||
const double ZERO[] = { 0., 0. };
|
||||
|
||||
// Result upper band width
|
||||
const int kv = *ku + *kl;
|
||||
|
||||
// Unskew A
|
||||
const int ldA[] = { *ldAb - 1 };
|
||||
double *const A = Ab + 2 * kv;
|
||||
|
||||
// Zero upper diagonal fill-in elements
|
||||
int i, j;
|
||||
for (j = 0; j < *n; j++) {
|
||||
double *const A_j = A + 2 * *ldA * j;
|
||||
for (i = MAX(0, j - kv); i < j - *ku; i++)
|
||||
A_j[2 * i] = A_j[2 * i + 1] = 0.;
|
||||
}
|
||||
|
||||
// Allocate work space
|
||||
const int n1 = ZREC_SPLIT(*n);
|
||||
const int mWorkl = (kv > n1) ? MAX(1, *m - *kl) : kv;
|
||||
const int nWorkl = (kv > n1) ? n1 : kv;
|
||||
const int mWorku = (*kl > n1) ? n1 : *kl;
|
||||
const int nWorku = (*kl > n1) ? MAX(0, *n - *kl) : *kl;
|
||||
double *Workl = malloc(mWorkl * nWorkl * 2 * sizeof(double));
|
||||
double *Worku = malloc(mWorku * nWorku * 2 * sizeof(double));
|
||||
LAPACK(zlaset)("L", &mWorkl, &nWorkl, ZERO, ZERO, Workl, &mWorkl);
|
||||
LAPACK(zlaset)("U", &mWorku, &nWorku, ZERO, ZERO, Worku, &mWorku);
|
||||
|
||||
// Recursive kernel
|
||||
RELAPACK_zgbtrf_rec(m, n, kl, ku, Ab, ldAb, ipiv, Workl, &mWorkl, Worku, &mWorku, info);
|
||||
|
||||
// Free work space
|
||||
free(Workl);
|
||||
free(Worku);
|
||||
}
|
||||
|
||||
|
||||
/** zgbtrf's recursive compute kernel */
|
||||
static void RELAPACK_zgbtrf_rec(
|
||||
const int *m, const int *n, const int *kl, const int *ku,
|
||||
double *Ab, const int *ldAb, int *ipiv,
|
||||
double *Workl, const int *ldWorkl, double *Worku, const int *ldWorku,
|
||||
int *info
|
||||
) {
|
||||
|
||||
if (*n <= MAX(CROSSOVER_ZGBTRF, 1)) {
|
||||
// Unblocked
|
||||
LAPACK(zgbtf2)(m, n, kl, ku, Ab, ldAb, ipiv, info);
|
||||
return;
|
||||
}
|
||||
|
||||
// Constants
|
||||
const double ONE[] = { 1., 0. };
|
||||
const double MONE[] = { -1., 0. };
|
||||
const int iONE[] = { 1 };
|
||||
|
||||
// Loop iterators
|
||||
int i, j;
|
||||
|
||||
// Output upper band width
|
||||
const int kv = *ku + *kl;
|
||||
|
||||
// Unskew A
|
||||
const int ldA[] = { *ldAb - 1 };
|
||||
double *const A = Ab + 2 * kv;
|
||||
|
||||
// Splitting
|
||||
const int n1 = MIN(ZREC_SPLIT(*n), *kl);
|
||||
const int n2 = *n - n1;
|
||||
const int m1 = MIN(n1, *m);
|
||||
const int m2 = *m - m1;
|
||||
const int mn1 = MIN(m1, n1);
|
||||
const int mn2 = MIN(m2, n2);
|
||||
|
||||
// Ab_L *
|
||||
// Ab_BR
|
||||
double *const Ab_L = Ab;
|
||||
double *const Ab_BR = Ab + 2 * *ldAb * n1;
|
||||
|
||||
// A_L A_R
|
||||
double *const A_L = A;
|
||||
double *const A_R = A + 2 * *ldA * n1;
|
||||
|
||||
// A_TL A_TR
|
||||
// A_BL A_BR
|
||||
double *const A_TL = A;
|
||||
double *const A_TR = A + 2 * *ldA * n1;
|
||||
double *const A_BL = A + 2 * m1;
|
||||
double *const A_BR = A + 2 * *ldA * n1 + 2 * m1;
|
||||
|
||||
// ipiv_T
|
||||
// ipiv_B
|
||||
int *const ipiv_T = ipiv;
|
||||
int *const ipiv_B = ipiv + n1;
|
||||
|
||||
// Banded splitting
|
||||
const int n21 = MIN(n2, kv - n1);
|
||||
const int n22 = MIN(n2 - n21, n1);
|
||||
const int m21 = MIN(m2, *kl - m1);
|
||||
const int m22 = MIN(m2 - m21, m1);
|
||||
|
||||
// n1 n21 n22
|
||||
// m * A_Rl ARr
|
||||
double *const A_Rl = A_R;
|
||||
double *const A_Rr = A_R + 2 * *ldA * n21;
|
||||
|
||||
// n1 n21 n22
|
||||
// m1 * A_TRl A_TRr
|
||||
// m21 A_BLt A_BRtl A_BRtr
|
||||
// m22 A_BLb A_BRbl A_BRbr
|
||||
double *const A_TRl = A_TR;
|
||||
double *const A_TRr = A_TR + 2 * *ldA * n21;
|
||||
double *const A_BLt = A_BL;
|
||||
double *const A_BLb = A_BL + 2 * m21;
|
||||
double *const A_BRtl = A_BR;
|
||||
double *const A_BRtr = A_BR + 2 * *ldA * n21;
|
||||
double *const A_BRbl = A_BR + 2 * m21;
|
||||
double *const A_BRbr = A_BR + 2 * *ldA * n21 + 2 * m21;
|
||||
|
||||
// recursion(Ab_L, ipiv_T)
|
||||
RELAPACK_zgbtrf_rec(m, &n1, kl, ku, Ab_L, ldAb, ipiv_T, Workl, ldWorkl, Worku, ldWorku, info);
|
||||
|
||||
// Workl = A_BLb
|
||||
LAPACK(zlacpy)("U", &m22, &n1, A_BLb, ldA, Workl, ldWorkl);
|
||||
|
||||
// partially redo swaps in A_L
|
||||
for (i = 0; i < mn1; i++) {
|
||||
const int ip = ipiv_T[i] - 1;
|
||||
if (ip != i) {
|
||||
if (ip < *kl)
|
||||
BLAS(zswap)(&i, A_L + 2 * i, ldA, A_L + 2 * ip, ldA);
|
||||
else
|
||||
BLAS(zswap)(&i, A_L + 2 * i, ldA, Workl + 2 * (ip - *kl), ldWorkl);
|
||||
}
|
||||
}
|
||||
|
||||
// apply pivots to A_Rl
|
||||
LAPACK(zlaswp)(&n21, A_Rl, ldA, iONE, &mn1, ipiv_T, iONE);
|
||||
|
||||
// apply pivots to A_Rr columnwise
|
||||
for (j = 0; j < n22; j++) {
|
||||
double *const A_Rrj = A_Rr + 2 * *ldA * j;
|
||||
for (i = j; i < mn1; i++) {
|
||||
const int ip = ipiv_T[i] - 1;
|
||||
if (ip != i) {
|
||||
const double tmpr = A_Rrj[2 * i];
|
||||
const double tmpc = A_Rrj[2 * i + 1];
|
||||
A_Rrj[2 * i] = A_Rrj[2 * ip];
|
||||
A_Rrj[2 * i + 1] = A_Rrj[2 * ip + 1];
|
||||
A_Rrj[2 * ip] = tmpr;
|
||||
A_Rrj[2 * ip + 1] = tmpc;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// A_TRl = A_TL \ A_TRl
|
||||
BLAS(ztrsm)("L", "L", "N", "U", &m1, &n21, ONE, A_TL, ldA, A_TRl, ldA);
|
||||
// Worku = A_TRr
|
||||
LAPACK(zlacpy)("L", &m1, &n22, A_TRr, ldA, Worku, ldWorku);
|
||||
// Worku = A_TL \ Worku
|
||||
BLAS(ztrsm)("L", "L", "N", "U", &m1, &n22, ONE, A_TL, ldA, Worku, ldWorku);
|
||||
// A_TRr = Worku
|
||||
LAPACK(zlacpy)("L", &m1, &n22, Worku, ldWorku, A_TRr, ldA);
|
||||
// A_BRtl = A_BRtl - A_BLt * A_TRl
|
||||
BLAS(zgemm)("N", "N", &m21, &n21, &n1, MONE, A_BLt, ldA, A_TRl, ldA, ONE, A_BRtl, ldA);
|
||||
// A_BRbl = A_BRbl - Workl * A_TRl
|
||||
BLAS(zgemm)("N", "N", &m22, &n21, &n1, MONE, Workl, ldWorkl, A_TRl, ldA, ONE, A_BRbl, ldA);
|
||||
// A_BRtr = A_BRtr - A_BLt * Worku
|
||||
BLAS(zgemm)("N", "N", &m21, &n22, &n1, MONE, A_BLt, ldA, Worku, ldWorku, ONE, A_BRtr, ldA);
|
||||
// A_BRbr = A_BRbr - Workl * Worku
|
||||
BLAS(zgemm)("N", "N", &m22, &n22, &n1, MONE, Workl, ldWorkl, Worku, ldWorku, ONE, A_BRbr, ldA);
|
||||
|
||||
// partially undo swaps in A_L
|
||||
for (i = mn1 - 1; i >= 0; i--) {
|
||||
const int ip = ipiv_T[i] - 1;
|
||||
if (ip != i) {
|
||||
if (ip < *kl)
|
||||
BLAS(zswap)(&i, A_L + 2 * i, ldA, A_L + 2 * ip, ldA);
|
||||
else
|
||||
BLAS(zswap)(&i, A_L + 2 * i, ldA, Workl + 2 * (ip - *kl), ldWorkl);
|
||||
}
|
||||
}
|
||||
|
||||
// recursion(Ab_BR, ipiv_B)
|
||||
RELAPACK_zgbtrf_rec(&m2, &n2, kl, ku, Ab_BR, ldAb, ipiv_B, Workl, ldWorkl, Worku, ldWorku, info);
|
||||
if (*info)
|
||||
*info += n1;
|
||||
// shift pivots
|
||||
for (i = 0; i < mn2; i++)
|
||||
ipiv_B[i] += n1;
|
||||
}
|
|
@ -0,0 +1,167 @@
|
|||
#include "relapack.h"
|
||||
|
||||
static void RELAPACK_zgemmt_rec(const char *, const char *, const char *,
|
||||
const int *, const int *, const double *, const double *, const int *,
|
||||
const double *, const int *, const double *, double *, const int *);
|
||||
|
||||
static void RELAPACK_zgemmt_rec2(const char *, const char *, const char *,
|
||||
const int *, const int *, const double *, const double *, const int *,
|
||||
const double *, const int *, const double *, double *, const int *);
|
||||
|
||||
|
||||
/** ZGEMMT computes a matrix-matrix product with general matrices but updates
|
||||
* only the upper or lower triangular part of the result matrix.
|
||||
*
|
||||
* This routine performs the same operation as the BLAS routine
|
||||
* zgemm(transA, transB, n, n, k, alpha, A, ldA, B, ldB, beta, C, ldC)
|
||||
* but only updates the triangular part of C specified by uplo:
|
||||
* If (*uplo == 'L'), only the lower triangular part of C is updated,
|
||||
* otherwise the upper triangular part is updated.
|
||||
* */
|
||||
void RELAPACK_zgemmt(
|
||||
const char *uplo, const char *transA, const char *transB,
|
||||
const int *n, const int *k,
|
||||
const double *alpha, const double *A, const int *ldA,
|
||||
const double *B, const int *ldB,
|
||||
const double *beta, double *C, const int *ldC
|
||||
) {
|
||||
|
||||
#if HAVE_XGEMMT
|
||||
BLAS(zgemmt)(uplo, transA, transB, n, k, alpha, A, ldA, B, ldB, beta, C, ldC);
|
||||
return;
|
||||
#else
|
||||
|
||||
// Check arguments
|
||||
const int lower = LAPACK(lsame)(uplo, "L");
|
||||
const int upper = LAPACK(lsame)(uplo, "U");
|
||||
const int notransA = LAPACK(lsame)(transA, "N");
|
||||
const int tranA = LAPACK(lsame)(transA, "T");
|
||||
const int ctransA = LAPACK(lsame)(transA, "C");
|
||||
const int notransB = LAPACK(lsame)(transB, "N");
|
||||
const int tranB = LAPACK(lsame)(transB, "T");
|
||||
const int ctransB = LAPACK(lsame)(transB, "C");
|
||||
int info = 0;
|
||||
if (!lower && !upper)
|
||||
info = 1;
|
||||
else if (!tranA && !ctransA && !notransA)
|
||||
info = 2;
|
||||
else if (!tranB && !ctransB && !notransB)
|
||||
info = 3;
|
||||
else if (*n < 0)
|
||||
info = 4;
|
||||
else if (*k < 0)
|
||||
info = 5;
|
||||
else if (*ldA < MAX(1, notransA ? *n : *k))
|
||||
info = 8;
|
||||
else if (*ldB < MAX(1, notransB ? *k : *n))
|
||||
info = 10;
|
||||
else if (*ldC < MAX(1, *n))
|
||||
info = 13;
|
||||
if (info) {
|
||||
LAPACK(xerbla)("ZGEMMT", &info);
|
||||
return;
|
||||
}
|
||||
|
||||
// Clean char * arguments
|
||||
const char cleanuplo = lower ? 'L' : 'U';
|
||||
const char cleantransA = notransA ? 'N' : (tranA ? 'T' : 'C');
|
||||
const char cleantransB = notransB ? 'N' : (tranB ? 'T' : 'C');
|
||||
|
||||
// Recursive kernel
|
||||
RELAPACK_zgemmt_rec(&cleanuplo, &cleantransA, &cleantransB, n, k, alpha, A, ldA, B, ldB, beta, C, ldC);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
/** zgemmt's recursive compute kernel */
|
||||
static void RELAPACK_zgemmt_rec(
|
||||
const char *uplo, const char *transA, const char *transB,
|
||||
const int *n, const int *k,
|
||||
const double *alpha, const double *A, const int *ldA,
|
||||
const double *B, const int *ldB,
|
||||
const double *beta, double *C, const int *ldC
|
||||
) {
|
||||
|
||||
if (*n <= MAX(CROSSOVER_ZGEMMT, 1)) {
|
||||
// Unblocked
|
||||
RELAPACK_zgemmt_rec2(uplo, transA, transB, n, k, alpha, A, ldA, B, ldB, beta, C, ldC);
|
||||
return;
|
||||
}
|
||||
|
||||
// Splitting
|
||||
const int n1 = ZREC_SPLIT(*n);
|
||||
const int n2 = *n - n1;
|
||||
|
||||
// A_T
|
||||
// A_B
|
||||
const double *const A_T = A;
|
||||
const double *const A_B = A + 2 * ((*transA == 'N') ? n1 : *ldA * n1);
|
||||
|
||||
// B_L B_R
|
||||
const double *const B_L = B;
|
||||
const double *const B_R = B + 2 * ((*transB == 'N') ? *ldB * n1 : n1);
|
||||
|
||||
// C_TL C_TR
|
||||
// C_BL C_BR
|
||||
double *const C_TL = C;
|
||||
double *const C_TR = C + 2 * *ldC * n1;
|
||||
double *const C_BL = C + 2 * n1;
|
||||
double *const C_BR = C + 2 * *ldC * n1 + 2 * n1;
|
||||
|
||||
// recursion(C_TL)
|
||||
RELAPACK_zgemmt_rec(uplo, transA, transB, &n1, k, alpha, A_T, ldA, B_L, ldB, beta, C_TL, ldC);
|
||||
|
||||
if (*uplo == 'L')
|
||||
// C_BL = alpha A_B B_L + beta C_BL
|
||||
BLAS(zgemm)(transA, transB, &n2, &n1, k, alpha, A_B, ldA, B_L, ldB, beta, C_BL, ldC);
|
||||
else
|
||||
// C_TR = alpha A_T B_R + beta C_TR
|
||||
BLAS(zgemm)(transA, transB, &n1, &n2, k, alpha, A_T, ldA, B_R, ldB, beta, C_TR, ldC);
|
||||
|
||||
// recursion(C_BR)
|
||||
RELAPACK_zgemmt_rec(uplo, transA, transB, &n2, k, alpha, A_B, ldA, B_R, ldB, beta, C_BR, ldC);
|
||||
}
|
||||
|
||||
|
||||
/** zgemmt's unblocked compute kernel */
|
||||
static void RELAPACK_zgemmt_rec2(
|
||||
const char *uplo, const char *transA, const char *transB,
|
||||
const int *n, const int *k,
|
||||
const double *alpha, const double *A, const int *ldA,
|
||||
const double *B, const int *ldB,
|
||||
const double *beta, double *C, const int *ldC
|
||||
) {
|
||||
|
||||
const int incB = (*transB == 'N') ? 1 : *ldB;
|
||||
const int incC = 1;
|
||||
|
||||
int i;
|
||||
for (i = 0; i < *n; i++) {
|
||||
// A_0
|
||||
// A_i
|
||||
const double *const A_0 = A;
|
||||
const double *const A_i = A + 2 * ((*transA == 'N') ? i : *ldA * i);
|
||||
|
||||
// * B_i *
|
||||
const double *const B_i = B + 2 * ((*transB == 'N') ? *ldB * i : i);
|
||||
|
||||
// * C_0i *
|
||||
// * C_ii *
|
||||
double *const C_0i = C + 2 * *ldC * i;
|
||||
double *const C_ii = C + 2 * *ldC * i + 2 * i;
|
||||
|
||||
if (*uplo == 'L') {
|
||||
const int nmi = *n - i;
|
||||
if (*transA == 'N')
|
||||
BLAS(zgemv)(transA, &nmi, k, alpha, A_i, ldA, B_i, &incB, beta, C_ii, &incC);
|
||||
else
|
||||
BLAS(zgemv)(transA, k, &nmi, alpha, A_i, ldA, B_i, &incB, beta, C_ii, &incC);
|
||||
} else {
|
||||
const int ip1 = i + 1;
|
||||
if (*transA == 'N')
|
||||
BLAS(zgemv)(transA, &ip1, k, alpha, A_0, ldA, B_i, &incB, beta, C_0i, &incC);
|
||||
else
|
||||
BLAS(zgemv)(transA, k, &ip1, alpha, A_0, ldA, B_i, &incB, beta, C_0i, &incC);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,117 @@
|
|||
#include "relapack.h"
|
||||
|
||||
static void RELAPACK_zgetrf_rec(const int *, const int *, double *,
|
||||
const int *, int *, int *);
|
||||
|
||||
|
||||
/** ZGETRF computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges.
|
||||
*
|
||||
* This routine is functionally equivalent to LAPACK's zgetrf.
|
||||
* For details on its interface, see
|
||||
* http://www.netlib.org/lapack/explore-html/dd/dd1/zgetrf_8f.html
|
||||
* */
|
||||
void RELAPACK_zgetrf(
|
||||
const int *m, const int *n,
|
||||
double *A, const int *ldA, int *ipiv,
|
||||
int *info
|
||||
) {
|
||||
|
||||
// Check arguments
|
||||
*info = 0;
|
||||
if (*m < 0)
|
||||
*info = -1;
|
||||
else if (*n < 0)
|
||||
*info = -2;
|
||||
else if (*ldA < MAX(1, *n))
|
||||
*info = -4;
|
||||
if (*info) {
|
||||
const int minfo = -*info;
|
||||
LAPACK(xerbla)("ZGETRF", &minfo);
|
||||
return;
|
||||
}
|
||||
|
||||
const int sn = MIN(*m, *n);
|
||||
|
||||
RELAPACK_zgetrf_rec(m, &sn, A, ldA, ipiv, info);
|
||||
|
||||
// Right remainder
|
||||
if (*m < *n) {
|
||||
// Constants
|
||||
const double ONE[] = { 1., 0. };
|
||||
const int iONE[] = { 1 };
|
||||
|
||||
// Splitting
|
||||
const int rn = *n - *m;
|
||||
|
||||
// A_L A_R
|
||||
const double *const A_L = A;
|
||||
double *const A_R = A + 2 * *ldA * *m;
|
||||
|
||||
// A_R = apply(ipiv, A_R)
|
||||
LAPACK(zlaswp)(&rn, A_R, ldA, iONE, m, ipiv, iONE);
|
||||
// A_R = A_L \ A_R
|
||||
BLAS(ztrsm)("L", "L", "N", "U", m, &rn, ONE, A_L, ldA, A_R, ldA);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/** zgetrf's recursive compute kernel */
|
||||
static void RELAPACK_zgetrf_rec(
|
||||
const int *m, const int *n,
|
||||
double *A, const int *ldA, int *ipiv,
|
||||
int *info
|
||||
) {
|
||||
|
||||
if (*n <= MAX(CROSSOVER_ZGETRF, 1)) {
|
||||
// Unblocked
|
||||
LAPACK(zgetf2)(m, n, A, ldA, ipiv, info);
|
||||
return;
|
||||
}
|
||||
|
||||
// Constants
|
||||
const double ONE[] = { 1., 0. };
|
||||
const double MONE[] = { -1., 0. };
|
||||
const int iONE[] = { 1. };
|
||||
|
||||
// Splitting
|
||||
const int n1 = ZREC_SPLIT(*n);
|
||||
const int n2 = *n - n1;
|
||||
const int m2 = *m - n1;
|
||||
|
||||
// A_L A_R
|
||||
double *const A_L = A;
|
||||
double *const A_R = A + 2 * *ldA * n1;
|
||||
|
||||
// A_TL A_TR
|
||||
// A_BL A_BR
|
||||
double *const A_TL = A;
|
||||
double *const A_TR = A + 2 * *ldA * n1;
|
||||
double *const A_BL = A + 2 * n1;
|
||||
double *const A_BR = A + 2 * *ldA * n1 + 2 * n1;
|
||||
|
||||
// ipiv_T
|
||||
// ipiv_B
|
||||
int *const ipiv_T = ipiv;
|
||||
int *const ipiv_B = ipiv + n1;
|
||||
|
||||
// recursion(A_L, ipiv_T)
|
||||
RELAPACK_zgetrf_rec(m, &n1, A_L, ldA, ipiv_T, info);
|
||||
// apply pivots to A_R
|
||||
LAPACK(zlaswp)(&n2, A_R, ldA, iONE, &n1, ipiv_T, iONE);
|
||||
|
||||
// A_TR = A_TL \ A_TR
|
||||
BLAS(ztrsm)("L", "L", "N", "U", &n1, &n2, ONE, A_TL, ldA, A_TR, ldA);
|
||||
// A_BR = A_BR - A_BL * A_TR
|
||||
BLAS(zgemm)("N", "N", &m2, &n2, &n1, MONE, A_BL, ldA, A_TR, ldA, ONE, A_BR, ldA);
|
||||
|
||||
// recursion(A_BR, ipiv_B)
|
||||
RELAPACK_zgetrf_rec(&m2, &n2, A_BR, ldA, ipiv_B, info);
|
||||
if (*info)
|
||||
*info += n1;
|
||||
// apply pivots to A_BL
|
||||
LAPACK(zlaswp)(&n1, A_BL, ldA, iONE, &n2, ipiv_B, iONE);
|
||||
// shift pivots
|
||||
int i;
|
||||
for (i = 0; i < n2; i++)
|
||||
ipiv_B[i] += n1;
|
||||
}
|
|
@ -0,0 +1,212 @@
|
|||
#include "relapack.h"
|
||||
#if XSYGST_ALLOW_MALLOC
|
||||
#include "stdlib.h"
|
||||
#endif
|
||||
|
||||
static void RELAPACK_zhegst_rec(const int *, const char *, const int *,
|
||||
double *, const int *, const double *, const int *,
|
||||
double *, const int *, int *);
|
||||
|
||||
|
||||
/** ZHEGST reduces a complex Hermitian-definite generalized eigenproblem to standard form.
|
||||
*
|
||||
* This routine is functionally equivalent to LAPACK's zhegst.
|
||||
* For details on its interface, see
|
||||
* http://www.netlib.org/lapack/explore-html/dc/d68/zhegst_8f.html
|
||||
* */
|
||||
void RELAPACK_zhegst(
|
||||
const int *itype, const char *uplo, const int *n,
|
||||
double *A, const int *ldA, const double *B, const int *ldB,
|
||||
int *info
|
||||
) {
|
||||
|
||||
// Check arguments
|
||||
const int lower = LAPACK(lsame)(uplo, "L");
|
||||
const int upper = LAPACK(lsame)(uplo, "U");
|
||||
*info = 0;
|
||||
if (*itype < 1 || *itype > 3)
|
||||
*info = -1;
|
||||
else if (!lower && !upper)
|
||||
*info = -2;
|
||||
else if (*n < 0)
|
||||
*info = -3;
|
||||
else if (*ldA < MAX(1, *n))
|
||||
*info = -5;
|
||||
else if (*ldB < MAX(1, *n))
|
||||
*info = -7;
|
||||
if (*info) {
|
||||
const int minfo = -*info;
|
||||
LAPACK(xerbla)("ZHEGST", &minfo);
|
||||
return;
|
||||
}
|
||||
|
||||
// Clean char * arguments
|
||||
const char cleanuplo = lower ? 'L' : 'U';
|
||||
|
||||
// Allocate work space
|
||||
double *Work = NULL;
|
||||
int lWork = 0;
|
||||
#if XSYGST_ALLOW_MALLOC
|
||||
const int n1 = ZREC_SPLIT(*n);
|
||||
lWork = n1 * (*n - n1);
|
||||
Work = malloc(lWork * 2 * sizeof(double));
|
||||
if (!Work)
|
||||
lWork = 0;
|
||||
#endif
|
||||
|
||||
// recursive kernel
|
||||
RELAPACK_zhegst_rec(itype, &cleanuplo, n, A, ldA, B, ldB, Work, &lWork, info);
|
||||
|
||||
// Free work space
|
||||
#if XSYGST_ALLOW_MALLOC
|
||||
if (Work)
|
||||
free(Work);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
/** zhegst's recursive compute kernel */
|
||||
static void RELAPACK_zhegst_rec(
|
||||
const int *itype, const char *uplo, const int *n,
|
||||
double *A, const int *ldA, const double *B, const int *ldB,
|
||||
double *Work, const int *lWork, int *info
|
||||
) {
|
||||
|
||||
if (*n <= MAX(CROSSOVER_ZHEGST, 1)) {
|
||||
// Unblocked
|
||||
LAPACK(zhegs2)(itype, uplo, n, A, ldA, B, ldB, info);
|
||||
return;
|
||||
}
|
||||
|
||||
// Constants
|
||||
const double ZERO[] = { 0., 0. };
|
||||
const double ONE[] = { 1., 0. };
|
||||
const double MONE[] = { -1., 0. };
|
||||
const double HALF[] = { .5, 0. };
|
||||
const double MHALF[] = { -.5, 0. };
|
||||
const int iONE[] = { 1 };
|
||||
|
||||
// Loop iterator
|
||||
int i;
|
||||
|
||||
// Splitting
|
||||
const int n1 = ZREC_SPLIT(*n);
|
||||
const int n2 = *n - n1;
|
||||
|
||||
// A_TL A_TR
|
||||
// A_BL A_BR
|
||||
double *const A_TL = A;
|
||||
double *const A_TR = A + 2 * *ldA * n1;
|
||||
double *const A_BL = A + 2 * n1;
|
||||
double *const A_BR = A + 2 * *ldA * n1 + 2 * n1;
|
||||
|
||||
// B_TL B_TR
|
||||
// B_BL B_BR
|
||||
const double *const B_TL = B;
|
||||
const double *const B_TR = B + 2 * *ldB * n1;
|
||||
const double *const B_BL = B + 2 * n1;
|
||||
const double *const B_BR = B + 2 * *ldB * n1 + 2 * n1;
|
||||
|
||||
// recursion(A_TL, B_TL)
|
||||
RELAPACK_zhegst_rec(itype, uplo, &n1, A_TL, ldA, B_TL, ldB, Work, lWork, info);
|
||||
|
||||
if (*itype == 1)
|
||||
if (*uplo == 'L') {
|
||||
// A_BL = A_BL / B_TL'
|
||||
BLAS(ztrsm)("R", "L", "C", "N", &n2, &n1, ONE, B_TL, ldB, A_BL, ldA);
|
||||
if (*lWork >= n2 * n1) {
|
||||
// T = -1/2 * B_BL * A_TL
|
||||
BLAS(zhemm)("R", "L", &n2, &n1, MHALF, A_TL, ldA, B_BL, ldB, ZERO, Work, &n2);
|
||||
// A_BL = A_BL + T
|
||||
for (i = 0; i < n1; i++)
|
||||
BLAS(zaxpy)(&n2, ONE, Work + 2 * n2 * i, iONE, A_BL + 2 * *ldA * i, iONE);
|
||||
} else
|
||||
// A_BL = A_BL - 1/2 B_BL * A_TL
|
||||
BLAS(zhemm)("R", "L", &n2, &n1, MHALF, A_TL, ldA, B_BL, ldB, ONE, A_BL, ldA);
|
||||
// A_BR = A_BR - A_BL * B_BL' - B_BL * A_BL'
|
||||
BLAS(zher2k)("L", "N", &n2, &n1, MONE, A_BL, ldA, B_BL, ldB, ONE, A_BR, ldA);
|
||||
if (*lWork >= n2 * n1)
|
||||
// A_BL = A_BL + T
|
||||
for (i = 0; i < n1; i++)
|
||||
BLAS(zaxpy)(&n2, ONE, Work + 2 * n2 * i, iONE, A_BL + 2 * *ldA * i, iONE);
|
||||
else
|
||||
// A_BL = A_BL - 1/2 B_BL * A_TL
|
||||
BLAS(zhemm)("R", "L", &n2, &n1, MHALF, A_TL, ldA, B_BL, ldB, ONE, A_BL, ldA);
|
||||
// A_BL = B_BR \ A_BL
|
||||
BLAS(ztrsm)("L", "L", "N", "N", &n2, &n1, ONE, B_BR, ldB, A_BL, ldA);
|
||||
} else {
|
||||
// A_TR = B_TL' \ A_TR
|
||||
BLAS(ztrsm)("L", "U", "C", "N", &n1, &n2, ONE, B_TL, ldB, A_TR, ldA);
|
||||
if (*lWork >= n2 * n1) {
|
||||
// T = -1/2 * A_TL * B_TR
|
||||
BLAS(zhemm)("L", "U", &n1, &n2, MHALF, A_TL, ldA, B_TR, ldB, ZERO, Work, &n1);
|
||||
// A_TR = A_BL + T
|
||||
for (i = 0; i < n2; i++)
|
||||
BLAS(zaxpy)(&n1, ONE, Work + 2 * n1 * i, iONE, A_TR + 2 * *ldA * i, iONE);
|
||||
} else
|
||||
// A_TR = A_TR - 1/2 A_TL * B_TR
|
||||
BLAS(zhemm)("L", "U", &n1, &n2, MHALF, A_TL, ldA, B_TR, ldB, ONE, A_TR, ldA);
|
||||
// A_BR = A_BR - A_TR' * B_TR - B_TR' * A_TR
|
||||
BLAS(zher2k)("U", "C", &n2, &n1, MONE, A_TR, ldA, B_TR, ldB, ONE, A_BR, ldA);
|
||||
if (*lWork >= n2 * n1)
|
||||
// A_TR = A_BL + T
|
||||
for (i = 0; i < n2; i++)
|
||||
BLAS(zaxpy)(&n1, ONE, Work + 2 * n1 * i, iONE, A_TR + 2 * *ldA * i, iONE);
|
||||
else
|
||||
// A_TR = A_TR - 1/2 A_TL * B_TR
|
||||
BLAS(zhemm)("L", "U", &n1, &n2, MHALF, A_TL, ldA, B_TR, ldB, ONE, A_TR, ldA);
|
||||
// A_TR = A_TR / B_BR
|
||||
BLAS(ztrsm)("R", "U", "N", "N", &n1, &n2, ONE, B_BR, ldB, A_TR, ldA);
|
||||
}
|
||||
else
|
||||
if (*uplo == 'L') {
|
||||
// A_BL = A_BL * B_TL
|
||||
BLAS(ztrmm)("R", "L", "N", "N", &n2, &n1, ONE, B_TL, ldB, A_BL, ldA);
|
||||
if (*lWork >= n2 * n1) {
|
||||
// T = 1/2 * A_BR * B_BL
|
||||
BLAS(zhemm)("L", "L", &n2, &n1, HALF, A_BR, ldA, B_BL, ldB, ZERO, Work, &n2);
|
||||
// A_BL = A_BL + T
|
||||
for (i = 0; i < n1; i++)
|
||||
BLAS(zaxpy)(&n2, ONE, Work + 2 * n2 * i, iONE, A_BL + 2 * *ldA * i, iONE);
|
||||
} else
|
||||
// A_BL = A_BL + 1/2 A_BR * B_BL
|
||||
BLAS(zhemm)("L", "L", &n2, &n1, HALF, A_BR, ldA, B_BL, ldB, ONE, A_BL, ldA);
|
||||
// A_TL = A_TL + A_BL' * B_BL + B_BL' * A_BL
|
||||
BLAS(zher2k)("L", "C", &n1, &n2, ONE, A_BL, ldA, B_BL, ldB, ONE, A_TL, ldA);
|
||||
if (*lWork >= n2 * n1)
|
||||
// A_BL = A_BL + T
|
||||
for (i = 0; i < n1; i++)
|
||||
BLAS(zaxpy)(&n2, ONE, Work + 2 * n2 * i, iONE, A_BL + 2 * *ldA * i, iONE);
|
||||
else
|
||||
// A_BL = A_BL + 1/2 A_BR * B_BL
|
||||
BLAS(zhemm)("L", "L", &n2, &n1, HALF, A_BR, ldA, B_BL, ldB, ONE, A_BL, ldA);
|
||||
// A_BL = B_BR * A_BL
|
||||
BLAS(ztrmm)("L", "L", "C", "N", &n2, &n1, ONE, B_BR, ldB, A_BL, ldA);
|
||||
} else {
|
||||
// A_TR = B_TL * A_TR
|
||||
BLAS(ztrmm)("L", "U", "N", "N", &n1, &n2, ONE, B_TL, ldB, A_TR, ldA);
|
||||
if (*lWork >= n2 * n1) {
|
||||
// T = 1/2 * B_TR * A_BR
|
||||
BLAS(zhemm)("R", "U", &n1, &n2, HALF, A_BR, ldA, B_TR, ldB, ZERO, Work, &n1);
|
||||
// A_TR = A_TR + T
|
||||
for (i = 0; i < n2; i++)
|
||||
BLAS(zaxpy)(&n1, ONE, Work + 2 * n1 * i, iONE, A_TR + 2 * *ldA * i, iONE);
|
||||
} else
|
||||
// A_TR = A_TR + 1/2 B_TR A_BR
|
||||
BLAS(zhemm)("R", "U", &n1, &n2, HALF, A_BR, ldA, B_TR, ldB, ONE, A_TR, ldA);
|
||||
// A_TL = A_TL + A_TR * B_TR' + B_TR * A_TR'
|
||||
BLAS(zher2k)("U", "N", &n1, &n2, ONE, A_TR, ldA, B_TR, ldB, ONE, A_TL, ldA);
|
||||
if (*lWork >= n2 * n1)
|
||||
// A_TR = A_TR + T
|
||||
for (i = 0; i < n2; i++)
|
||||
BLAS(zaxpy)(&n1, ONE, Work + 2 * n1 * i, iONE, A_TR + 2 * *ldA * i, iONE);
|
||||
else
|
||||
// A_TR = A_TR + 1/2 B_TR * A_BR
|
||||
BLAS(zhemm)("R", "U", &n1, &n2, HALF, A_BR, ldA, B_TR, ldB, ONE, A_TR, ldA);
|
||||
// A_TR = A_TR * B_BR
|
||||
BLAS(ztrmm)("R", "U", "C", "N", &n1, &n2, ONE, B_BR, ldB, A_TR, ldA);
|
||||
}
|
||||
|
||||
// recursion(A_BR, B_BR)
|
||||
RELAPACK_zhegst_rec(itype, uplo, &n2, A_BR, ldA, B_BR, ldB, Work, lWork, info);
|
||||
}
|
|
@ -0,0 +1,236 @@
|
|||
#include "relapack.h"
|
||||
#if XSYTRF_ALLOW_MALLOC
|
||||
#include <stdlib.h>
|
||||
#endif
|
||||
|
||||
static void RELAPACK_zhetrf_rec(const char *, const int *, const int *, int *,
|
||||
double *, const int *, int *, double *, const int *, int *);
|
||||
|
||||
|
||||
/** ZHETRF computes the factorization of a complex Hermitian matrix A using the Bunch-Kaufman diagonal pivoting method.
|
||||
*
|
||||
* This routine is functionally equivalent to LAPACK's zhetrf.
|
||||
* For details on its interface, see
|
||||
* http://www.netlib.org/lapack/explore-html/d6/dd3/zhetrf_8f.html
|
||||
* */
|
||||
void RELAPACK_zhetrf(
|
||||
const char *uplo, const int *n,
|
||||
double *A, const int *ldA, int *ipiv,
|
||||
double *Work, const int *lWork, int *info
|
||||
) {
|
||||
|
||||
// Required work size
|
||||
const int cleanlWork = *n * (*n / 2);
|
||||
int minlWork = cleanlWork;
|
||||
#if XSYTRF_ALLOW_MALLOC
|
||||
minlWork = 1;
|
||||
#endif
|
||||
|
||||
// Check arguments
|
||||
const int lower = LAPACK(lsame)(uplo, "L");
|
||||
const int upper = LAPACK(lsame)(uplo, "U");
|
||||
*info = 0;
|
||||
if (!lower && !upper)
|
||||
*info = -1;
|
||||
else if (*n < 0)
|
||||
*info = -2;
|
||||
else if (*ldA < MAX(1, *n))
|
||||
*info = -4;
|
||||
else if (*lWork < minlWork && *lWork != -1)
|
||||
*info = -7;
|
||||
else if (*lWork == -1) {
|
||||
// Work size query
|
||||
*Work = cleanlWork;
|
||||
return;
|
||||
}
|
||||
|
||||
// Ensure Work size
|
||||
double *cleanWork = Work;
|
||||
#if XSYTRF_ALLOW_MALLOC
|
||||
if (!*info && *lWork < cleanlWork) {
|
||||
cleanWork = malloc(cleanlWork * 2 * sizeof(double));
|
||||
if (!cleanWork)
|
||||
*info = -7;
|
||||
}
|
||||
#endif
|
||||
|
||||
if (*info) {
|
||||
const int minfo = -*info;
|
||||
LAPACK(xerbla)("ZHETRF", &minfo);
|
||||
return;
|
||||
}
|
||||
|
||||
// Clean char * arguments
|
||||
const char cleanuplo = lower ? 'L' : 'U';
|
||||
|
||||
// Dummy argument
|
||||
int nout;
|
||||
|
||||
// Recursive kernel
|
||||
RELAPACK_zhetrf_rec(&cleanuplo, n, n, &nout, A, ldA, ipiv, cleanWork, n, info);
|
||||
|
||||
#if XSYTRF_ALLOW_MALLOC
|
||||
if (cleanWork != Work)
|
||||
free(cleanWork);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
/** zhetrf's recursive compute kernel */
|
||||
static void RELAPACK_zhetrf_rec(
|
||||
const char *uplo, const int *n_full, const int *n, int *n_out,
|
||||
double *A, const int *ldA, int *ipiv,
|
||||
double *Work, const int *ldWork, int *info
|
||||
) {
|
||||
|
||||
// top recursion level?
|
||||
const int top = *n_full == *n;
|
||||
|
||||
if (*n <= MAX(CROSSOVER_ZHETRF, 3)) {
|
||||
// Unblocked
|
||||
if (top) {
|
||||
LAPACK(zhetf2)(uplo, n, A, ldA, ipiv, info);
|
||||
*n_out = *n;
|
||||
} else
|
||||
RELAPACK_zhetrf_rec2(uplo, n_full, n, n_out, A, ldA, ipiv, Work, ldWork, info);
|
||||
return;
|
||||
}
|
||||
|
||||
int info1, info2;
|
||||
|
||||
// Constants
|
||||
const double ONE[] = { 1., 0. };
|
||||
const double MONE[] = { -1., 0. };
|
||||
const int iONE[] = { 1 };
|
||||
|
||||
const int n_rest = *n_full - *n;
|
||||
|
||||
if (*uplo == 'L') {
|
||||
// Splitting (setup)
|
||||
int n1 = ZREC_SPLIT(*n);
|
||||
int n2 = *n - n1;
|
||||
|
||||
// Work_L *
|
||||
double *const Work_L = Work;
|
||||
|
||||
// recursion(A_L)
|
||||
int n1_out;
|
||||
RELAPACK_zhetrf_rec(uplo, n_full, &n1, &n1_out, A, ldA, ipiv, Work_L, ldWork, &info1);
|
||||
n1 = n1_out;
|
||||
|
||||
// Splitting (continued)
|
||||
n2 = *n - n1;
|
||||
const int n_full2 = *n_full - n1;
|
||||
|
||||
// * *
|
||||
// A_BL A_BR
|
||||
// A_BL_B A_BR_B
|
||||
double *const A_BL = A + 2 * n1;
|
||||
double *const A_BR = A + 2 * *ldA * n1 + 2 * n1;
|
||||
double *const A_BL_B = A + 2 * *n;
|
||||
double *const A_BR_B = A + 2 * *ldA * n1 + 2 * *n;
|
||||
|
||||
// * *
|
||||
// Work_BL Work_BR
|
||||
// * *
|
||||
// (top recursion level: use Work as Work_BR)
|
||||
double *const Work_BL = Work + 2 * n1;
|
||||
double *const Work_BR = top ? Work : Work + 2 * *ldWork * n1 + 2 * n1;
|
||||
const int ldWork_BR = top ? n2 : *ldWork;
|
||||
|
||||
// ipiv_T
|
||||
// ipiv_B
|
||||
int *const ipiv_B = ipiv + n1;
|
||||
|
||||
// A_BR = A_BR - A_BL Work_BL'
|
||||
RELAPACK_zgemmt(uplo, "N", "T", &n2, &n1, MONE, A_BL, ldA, Work_BL, ldWork, ONE, A_BR, ldA);
|
||||
BLAS(zgemm)("N", "T", &n_rest, &n2, &n1, MONE, A_BL_B, ldA, Work_BL, ldWork, ONE, A_BR_B, ldA);
|
||||
|
||||
// recursion(A_BR)
|
||||
int n2_out;
|
||||
RELAPACK_zhetrf_rec(uplo, &n_full2, &n2, &n2_out, A_BR, ldA, ipiv_B, Work_BR, &ldWork_BR, &info2);
|
||||
|
||||
if (n2_out != n2) {
|
||||
// undo 1 column of updates
|
||||
const int n_restp1 = n_rest + 1;
|
||||
|
||||
// last column of A_BR
|
||||
double *const A_BR_r = A_BR + 2 * *ldA * n2_out + 2 * n2_out;
|
||||
|
||||
// last row of A_BL
|
||||
double *const A_BL_b = A_BL + 2 * n2_out;
|
||||
|
||||
// last row of Work_BL
|
||||
double *const Work_BL_b = Work_BL + 2 * n2_out;
|
||||
|
||||
// A_BR_r = A_BR_r + A_BL_b Work_BL_b'
|
||||
BLAS(zgemv)("N", &n_restp1, &n1, ONE, A_BL_b, ldA, Work_BL_b, ldWork, ONE, A_BR_r, iONE);
|
||||
}
|
||||
n2 = n2_out;
|
||||
|
||||
// shift pivots
|
||||
int i;
|
||||
for (i = 0; i < n2; i++)
|
||||
if (ipiv_B[i] > 0)
|
||||
ipiv_B[i] += n1;
|
||||
else
|
||||
ipiv_B[i] -= n1;
|
||||
|
||||
*info = info1 || info2;
|
||||
*n_out = n1 + n2;
|
||||
} else {
|
||||
// Splitting (setup)
|
||||
int n2 = ZREC_SPLIT(*n);
|
||||
int n1 = *n - n2;
|
||||
|
||||
// * Work_R
|
||||
// (top recursion level: use Work as Work_R)
|
||||
double *const Work_R = top ? Work : Work + 2 * *ldWork * n1;
|
||||
|
||||
// recursion(A_R)
|
||||
int n2_out;
|
||||
RELAPACK_zhetrf_rec(uplo, n_full, &n2, &n2_out, A, ldA, ipiv, Work_R, ldWork, &info2);
|
||||
const int n2_diff = n2 - n2_out;
|
||||
n2 = n2_out;
|
||||
|
||||
// Splitting (continued)
|
||||
n1 = *n - n2;
|
||||
const int n_full1 = *n_full - n2;
|
||||
|
||||
// * A_TL_T A_TR_T
|
||||
// * A_TL A_TR
|
||||
// * * *
|
||||
double *const A_TL_T = A + 2 * *ldA * n_rest;
|
||||
double *const A_TR_T = A + 2 * *ldA * (n_rest + n1);
|
||||
double *const A_TL = A + 2 * *ldA * n_rest + 2 * n_rest;
|
||||
double *const A_TR = A + 2 * *ldA * (n_rest + n1) + 2 * n_rest;
|
||||
|
||||
// Work_L *
|
||||
// * Work_TR
|
||||
// * *
|
||||
// (top recursion level: Work_R was Work)
|
||||
double *const Work_L = Work;
|
||||
double *const Work_TR = Work + 2 * *ldWork * (top ? n2_diff : n1) + 2 * n_rest;
|
||||
const int ldWork_L = top ? n1 : *ldWork;
|
||||
|
||||
// A_TL = A_TL - A_TR Work_TR'
|
||||
RELAPACK_zgemmt(uplo, "N", "T", &n1, &n2, MONE, A_TR, ldA, Work_TR, ldWork, ONE, A_TL, ldA);
|
||||
BLAS(zgemm)("N", "T", &n_rest, &n1, &n2, MONE, A_TR_T, ldA, Work_TR, ldWork, ONE, A_TL_T, ldA);
|
||||
|
||||
// recursion(A_TL)
|
||||
int n1_out;
|
||||
RELAPACK_zhetrf_rec(uplo, &n_full1, &n1, &n1_out, A, ldA, ipiv, Work_L, &ldWork_L, &info1);
|
||||
|
||||
if (n1_out != n1) {
|
||||
// undo 1 column of updates
|
||||
const int n_restp1 = n_rest + 1;
|
||||
|
||||
// A_TL_T_l = A_TL_T_l + A_TR_T Work_TR_t'
|
||||
BLAS(zgemv)("N", &n_restp1, &n2, ONE, A_TR_T, ldA, Work_TR, ldWork, ONE, A_TL_T, iONE);
|
||||
}
|
||||
n1 = n1_out;
|
||||
|
||||
*info = info2 || info1;
|
||||
*n_out = n1 + n2;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,524 @@
|
|||
/* -- translated by f2c (version 20100827).
|
||||
You must link the resulting object file with libf2c:
|
||||
on Microsoft Windows system, link with libf2c.lib;
|
||||
on Linux or Unix systems, link with .../path/to/libf2c.a -lm
|
||||
or, if you install libf2c.a in a standard place, with -lf2c -lm
|
||||
-- in that order, at the end of the command line, as in
|
||||
cc *.o -lf2c -lm
|
||||
Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
|
||||
|
||||
http://www.netlib.org/f2c/libf2c.zip
|
||||
*/
|
||||
|
||||
#include "f2c.h"
|
||||
|
||||
/* Table of constant values */
|
||||
|
||||
static doublecomplex c_b1 = {1.,0.};
|
||||
static int c__1 = 1;
|
||||
|
||||
/** ZHETRF_REC2 computes a partial factorization of a complex Hermitian indefinite matrix using the Bunch-Kau fman diagonal pivoting method
|
||||
*
|
||||
* This routine is a minor modification of LAPACK's zlahef.
|
||||
* It serves as an unblocked kernel in the recursive algorithms.
|
||||
* The blocked BLAS Level 3 updates were removed and moved to the
|
||||
* recursive algorithm.
|
||||
* */
|
||||
/* Subroutine */ void RELAPACK_zhetrf_rec2(char *uplo, int *n, int *
|
||||
nb, int *kb, doublecomplex *a, int *lda, int *ipiv,
|
||||
doublecomplex *w, int *ldw, int *info, ftnlen uplo_len)
|
||||
{
|
||||
/* System generated locals */
|
||||
int a_dim1, a_offset, w_dim1, w_offset, i__1, i__2, i__3, i__4;
|
||||
double d__1, d__2, d__3, d__4;
|
||||
doublecomplex z__1, z__2, z__3, z__4;
|
||||
|
||||
/* Builtin functions */
|
||||
double sqrt(double), d_imag(doublecomplex *);
|
||||
void d_cnjg(doublecomplex *, doublecomplex *), z_div(doublecomplex *,
|
||||
doublecomplex *, doublecomplex *);
|
||||
|
||||
/* Local variables */
|
||||
static int j, k;
|
||||
static double t, r1;
|
||||
static doublecomplex d11, d21, d22;
|
||||
static int jj, kk, jp, kp, kw, kkw, imax, jmax;
|
||||
static double alpha;
|
||||
extern logical lsame_(char *, char *, ftnlen, ftnlen);
|
||||
static int kstep;
|
||||
extern /* Subroutine */ int zgemv_(char *, int *, int *,
|
||||
doublecomplex *, doublecomplex *, int *, doublecomplex *,
|
||||
int *, doublecomplex *, doublecomplex *, int *, ftnlen),
|
||||
zcopy_(int *, doublecomplex *, int *, doublecomplex *,
|
||||
int *), zswap_(int *, doublecomplex *, int *,
|
||||
doublecomplex *, int *);
|
||||
static double absakk;
|
||||
extern /* Subroutine */ int zdscal_(int *, double *,
|
||||
doublecomplex *, int *);
|
||||
static double colmax;
|
||||
extern /* Subroutine */ int zlacgv_(int *, doublecomplex *, int *)
|
||||
;
|
||||
extern int izamax_(int *, doublecomplex *, int *);
|
||||
static double rowmax;
|
||||
|
||||
/* Parameter adjustments */
|
||||
a_dim1 = *lda;
|
||||
a_offset = 1 + a_dim1;
|
||||
a -= a_offset;
|
||||
--ipiv;
|
||||
w_dim1 = *ldw;
|
||||
w_offset = 1 + w_dim1;
|
||||
w -= w_offset;
|
||||
|
||||
/* Function Body */
|
||||
*info = 0;
|
||||
alpha = (sqrt(17.) + 1.) / 8.;
|
||||
if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
|
||||
k = *n;
|
||||
L10:
|
||||
kw = *nb + k - *n;
|
||||
if ((k <= *n - *nb + 1 && *nb < *n) || k < 1) {
|
||||
goto L30;
|
||||
}
|
||||
kstep = 1;
|
||||
i__1 = k - 1;
|
||||
zcopy_(&i__1, &a[k * a_dim1 + 1], &c__1, &w[kw * w_dim1 + 1], &c__1);
|
||||
i__1 = k + kw * w_dim1;
|
||||
i__2 = k + k * a_dim1;
|
||||
d__1 = a[i__2].r;
|
||||
w[i__1].r = d__1, w[i__1].i = 0.;
|
||||
if (k < *n) {
|
||||
i__1 = *n - k;
|
||||
z__1.r = -1., z__1.i = -0.;
|
||||
zgemv_("No transpose", &k, &i__1, &z__1, &a[(k + 1) * a_dim1 + 1],
|
||||
lda, &w[k + (kw + 1) * w_dim1], ldw, &c_b1, &w[kw *
|
||||
w_dim1 + 1], &c__1, (ftnlen)12);
|
||||
i__1 = k + kw * w_dim1;
|
||||
i__2 = k + kw * w_dim1;
|
||||
d__1 = w[i__2].r;
|
||||
w[i__1].r = d__1, w[i__1].i = 0.;
|
||||
}
|
||||
i__1 = k + kw * w_dim1;
|
||||
absakk = (d__1 = w[i__1].r, abs(d__1));
|
||||
if (k > 1) {
|
||||
i__1 = k - 1;
|
||||
imax = izamax_(&i__1, &w[kw * w_dim1 + 1], &c__1);
|
||||
i__1 = imax + kw * w_dim1;
|
||||
colmax = (d__1 = w[i__1].r, abs(d__1)) + (d__2 = d_imag(&w[imax +
|
||||
kw * w_dim1]), abs(d__2));
|
||||
} else {
|
||||
colmax = 0.;
|
||||
}
|
||||
if (max(absakk,colmax) == 0.) {
|
||||
if (*info == 0) {
|
||||
*info = k;
|
||||
}
|
||||
kp = k;
|
||||
i__1 = k + k * a_dim1;
|
||||
i__2 = k + k * a_dim1;
|
||||
d__1 = a[i__2].r;
|
||||
a[i__1].r = d__1, a[i__1].i = 0.;
|
||||
} else {
|
||||
if (absakk >= alpha * colmax) {
|
||||
kp = k;
|
||||
} else {
|
||||
i__1 = imax - 1;
|
||||
zcopy_(&i__1, &a[imax * a_dim1 + 1], &c__1, &w[(kw - 1) *
|
||||
w_dim1 + 1], &c__1);
|
||||
i__1 = imax + (kw - 1) * w_dim1;
|
||||
i__2 = imax + imax * a_dim1;
|
||||
d__1 = a[i__2].r;
|
||||
w[i__1].r = d__1, w[i__1].i = 0.;
|
||||
i__1 = k - imax;
|
||||
zcopy_(&i__1, &a[imax + (imax + 1) * a_dim1], lda, &w[imax +
|
||||
1 + (kw - 1) * w_dim1], &c__1);
|
||||
i__1 = k - imax;
|
||||
zlacgv_(&i__1, &w[imax + 1 + (kw - 1) * w_dim1], &c__1);
|
||||
if (k < *n) {
|
||||
i__1 = *n - k;
|
||||
z__1.r = -1., z__1.i = -0.;
|
||||
zgemv_("No transpose", &k, &i__1, &z__1, &a[(k + 1) *
|
||||
a_dim1 + 1], lda, &w[imax + (kw + 1) * w_dim1],
|
||||
ldw, &c_b1, &w[(kw - 1) * w_dim1 + 1], &c__1, (
|
||||
ftnlen)12);
|
||||
i__1 = imax + (kw - 1) * w_dim1;
|
||||
i__2 = imax + (kw - 1) * w_dim1;
|
||||
d__1 = w[i__2].r;
|
||||
w[i__1].r = d__1, w[i__1].i = 0.;
|
||||
}
|
||||
i__1 = k - imax;
|
||||
jmax = imax + izamax_(&i__1, &w[imax + 1 + (kw - 1) * w_dim1],
|
||||
&c__1);
|
||||
i__1 = jmax + (kw - 1) * w_dim1;
|
||||
rowmax = (d__1 = w[i__1].r, abs(d__1)) + (d__2 = d_imag(&w[
|
||||
jmax + (kw - 1) * w_dim1]), abs(d__2));
|
||||
if (imax > 1) {
|
||||
i__1 = imax - 1;
|
||||
jmax = izamax_(&i__1, &w[(kw - 1) * w_dim1 + 1], &c__1);
|
||||
/* Computing MAX */
|
||||
i__1 = jmax + (kw - 1) * w_dim1;
|
||||
d__3 = rowmax, d__4 = (d__1 = w[i__1].r, abs(d__1)) + (
|
||||
d__2 = d_imag(&w[jmax + (kw - 1) * w_dim1]), abs(
|
||||
d__2));
|
||||
rowmax = max(d__3,d__4);
|
||||
}
|
||||
if (absakk >= alpha * colmax * (colmax / rowmax)) {
|
||||
kp = k;
|
||||
} else /* if(complicated condition) */ {
|
||||
i__1 = imax + (kw - 1) * w_dim1;
|
||||
if ((d__1 = w[i__1].r, abs(d__1)) >= alpha * rowmax) {
|
||||
kp = imax;
|
||||
zcopy_(&k, &w[(kw - 1) * w_dim1 + 1], &c__1, &w[kw *
|
||||
w_dim1 + 1], &c__1);
|
||||
} else {
|
||||
kp = imax;
|
||||
kstep = 2;
|
||||
}
|
||||
}
|
||||
}
|
||||
kk = k - kstep + 1;
|
||||
kkw = *nb + kk - *n;
|
||||
if (kp != kk) {
|
||||
i__1 = kp + kp * a_dim1;
|
||||
i__2 = kk + kk * a_dim1;
|
||||
d__1 = a[i__2].r;
|
||||
a[i__1].r = d__1, a[i__1].i = 0.;
|
||||
i__1 = kk - 1 - kp;
|
||||
zcopy_(&i__1, &a[kp + 1 + kk * a_dim1], &c__1, &a[kp + (kp +
|
||||
1) * a_dim1], lda);
|
||||
i__1 = kk - 1 - kp;
|
||||
zlacgv_(&i__1, &a[kp + (kp + 1) * a_dim1], lda);
|
||||
if (kp > 1) {
|
||||
i__1 = kp - 1;
|
||||
zcopy_(&i__1, &a[kk * a_dim1 + 1], &c__1, &a[kp * a_dim1
|
||||
+ 1], &c__1);
|
||||
}
|
||||
if (k < *n) {
|
||||
i__1 = *n - k;
|
||||
zswap_(&i__1, &a[kk + (k + 1) * a_dim1], lda, &a[kp + (k
|
||||
+ 1) * a_dim1], lda);
|
||||
}
|
||||
i__1 = *n - kk + 1;
|
||||
zswap_(&i__1, &w[kk + kkw * w_dim1], ldw, &w[kp + kkw *
|
||||
w_dim1], ldw);
|
||||
}
|
||||
if (kstep == 1) {
|
||||
zcopy_(&k, &w[kw * w_dim1 + 1], &c__1, &a[k * a_dim1 + 1], &
|
||||
c__1);
|
||||
if (k > 1) {
|
||||
i__1 = k + k * a_dim1;
|
||||
r1 = 1. / a[i__1].r;
|
||||
i__1 = k - 1;
|
||||
zdscal_(&i__1, &r1, &a[k * a_dim1 + 1], &c__1);
|
||||
i__1 = k - 1;
|
||||
zlacgv_(&i__1, &w[kw * w_dim1 + 1], &c__1);
|
||||
}
|
||||
} else {
|
||||
if (k > 2) {
|
||||
i__1 = k - 1 + kw * w_dim1;
|
||||
d21.r = w[i__1].r, d21.i = w[i__1].i;
|
||||
d_cnjg(&z__2, &d21);
|
||||
z_div(&z__1, &w[k + kw * w_dim1], &z__2);
|
||||
d11.r = z__1.r, d11.i = z__1.i;
|
||||
z_div(&z__1, &w[k - 1 + (kw - 1) * w_dim1], &d21);
|
||||
d22.r = z__1.r, d22.i = z__1.i;
|
||||
z__1.r = d11.r * d22.r - d11.i * d22.i, z__1.i = d11.r *
|
||||
d22.i + d11.i * d22.r;
|
||||
t = 1. / (z__1.r - 1.);
|
||||
z__2.r = t, z__2.i = 0.;
|
||||
z_div(&z__1, &z__2, &d21);
|
||||
d21.r = z__1.r, d21.i = z__1.i;
|
||||
i__1 = k - 2;
|
||||
for (j = 1; j <= i__1; ++j) {
|
||||
i__2 = j + (k - 1) * a_dim1;
|
||||
i__3 = j + (kw - 1) * w_dim1;
|
||||
z__3.r = d11.r * w[i__3].r - d11.i * w[i__3].i,
|
||||
z__3.i = d11.r * w[i__3].i + d11.i * w[i__3]
|
||||
.r;
|
||||
i__4 = j + kw * w_dim1;
|
||||
z__2.r = z__3.r - w[i__4].r, z__2.i = z__3.i - w[i__4]
|
||||
.i;
|
||||
z__1.r = d21.r * z__2.r - d21.i * z__2.i, z__1.i =
|
||||
d21.r * z__2.i + d21.i * z__2.r;
|
||||
a[i__2].r = z__1.r, a[i__2].i = z__1.i;
|
||||
i__2 = j + k * a_dim1;
|
||||
d_cnjg(&z__2, &d21);
|
||||
i__3 = j + kw * w_dim1;
|
||||
z__4.r = d22.r * w[i__3].r - d22.i * w[i__3].i,
|
||||
z__4.i = d22.r * w[i__3].i + d22.i * w[i__3]
|
||||
.r;
|
||||
i__4 = j + (kw - 1) * w_dim1;
|
||||
z__3.r = z__4.r - w[i__4].r, z__3.i = z__4.i - w[i__4]
|
||||
.i;
|
||||
z__1.r = z__2.r * z__3.r - z__2.i * z__3.i, z__1.i =
|
||||
z__2.r * z__3.i + z__2.i * z__3.r;
|
||||
a[i__2].r = z__1.r, a[i__2].i = z__1.i;
|
||||
/* L20: */
|
||||
}
|
||||
}
|
||||
i__1 = k - 1 + (k - 1) * a_dim1;
|
||||
i__2 = k - 1 + (kw - 1) * w_dim1;
|
||||
a[i__1].r = w[i__2].r, a[i__1].i = w[i__2].i;
|
||||
i__1 = k - 1 + k * a_dim1;
|
||||
i__2 = k - 1 + kw * w_dim1;
|
||||
a[i__1].r = w[i__2].r, a[i__1].i = w[i__2].i;
|
||||
i__1 = k + k * a_dim1;
|
||||
i__2 = k + kw * w_dim1;
|
||||
a[i__1].r = w[i__2].r, a[i__1].i = w[i__2].i;
|
||||
i__1 = k - 1;
|
||||
zlacgv_(&i__1, &w[kw * w_dim1 + 1], &c__1);
|
||||
i__1 = k - 2;
|
||||
zlacgv_(&i__1, &w[(kw - 1) * w_dim1 + 1], &c__1);
|
||||
}
|
||||
}
|
||||
if (kstep == 1) {
|
||||
ipiv[k] = kp;
|
||||
} else {
|
||||
ipiv[k] = -kp;
|
||||
ipiv[k - 1] = -kp;
|
||||
}
|
||||
k -= kstep;
|
||||
goto L10;
|
||||
L30:
|
||||
j = k + 1;
|
||||
L60:
|
||||
jj = j;
|
||||
jp = ipiv[j];
|
||||
if (jp < 0) {
|
||||
jp = -jp;
|
||||
++j;
|
||||
}
|
||||
++j;
|
||||
if (jp != jj && j <= *n) {
|
||||
i__1 = *n - j + 1;
|
||||
zswap_(&i__1, &a[jp + j * a_dim1], lda, &a[jj + j * a_dim1], lda);
|
||||
}
|
||||
if (j < *n) {
|
||||
goto L60;
|
||||
}
|
||||
*kb = *n - k;
|
||||
} else {
|
||||
k = 1;
|
||||
L70:
|
||||
if ((k >= *nb && *nb < *n) || k > *n) {
|
||||
goto L90;
|
||||
}
|
||||
kstep = 1;
|
||||
i__1 = k + k * w_dim1;
|
||||
i__2 = k + k * a_dim1;
|
||||
d__1 = a[i__2].r;
|
||||
w[i__1].r = d__1, w[i__1].i = 0.;
|
||||
if (k < *n) {
|
||||
i__1 = *n - k;
|
||||
zcopy_(&i__1, &a[k + 1 + k * a_dim1], &c__1, &w[k + 1 + k *
|
||||
w_dim1], &c__1);
|
||||
}
|
||||
i__1 = *n - k + 1;
|
||||
i__2 = k - 1;
|
||||
z__1.r = -1., z__1.i = -0.;
|
||||
zgemv_("No transpose", &i__1, &i__2, &z__1, &a[k + a_dim1], lda, &w[k
|
||||
+ w_dim1], ldw, &c_b1, &w[k + k * w_dim1], &c__1, (ftnlen)12);
|
||||
i__1 = k + k * w_dim1;
|
||||
i__2 = k + k * w_dim1;
|
||||
d__1 = w[i__2].r;
|
||||
w[i__1].r = d__1, w[i__1].i = 0.;
|
||||
i__1 = k + k * w_dim1;
|
||||
absakk = (d__1 = w[i__1].r, abs(d__1));
|
||||
if (k < *n) {
|
||||
i__1 = *n - k;
|
||||
imax = k + izamax_(&i__1, &w[k + 1 + k * w_dim1], &c__1);
|
||||
i__1 = imax + k * w_dim1;
|
||||
colmax = (d__1 = w[i__1].r, abs(d__1)) + (d__2 = d_imag(&w[imax +
|
||||
k * w_dim1]), abs(d__2));
|
||||
} else {
|
||||
colmax = 0.;
|
||||
}
|
||||
if (max(absakk,colmax) == 0.) {
|
||||
if (*info == 0) {
|
||||
*info = k;
|
||||
}
|
||||
kp = k;
|
||||
i__1 = k + k * a_dim1;
|
||||
i__2 = k + k * a_dim1;
|
||||
d__1 = a[i__2].r;
|
||||
a[i__1].r = d__1, a[i__1].i = 0.;
|
||||
} else {
|
||||
if (absakk >= alpha * colmax) {
|
||||
kp = k;
|
||||
} else {
|
||||
i__1 = imax - k;
|
||||
zcopy_(&i__1, &a[imax + k * a_dim1], lda, &w[k + (k + 1) *
|
||||
w_dim1], &c__1);
|
||||
i__1 = imax - k;
|
||||
zlacgv_(&i__1, &w[k + (k + 1) * w_dim1], &c__1);
|
||||
i__1 = imax + (k + 1) * w_dim1;
|
||||
i__2 = imax + imax * a_dim1;
|
||||
d__1 = a[i__2].r;
|
||||
w[i__1].r = d__1, w[i__1].i = 0.;
|
||||
if (imax < *n) {
|
||||
i__1 = *n - imax;
|
||||
zcopy_(&i__1, &a[imax + 1 + imax * a_dim1], &c__1, &w[
|
||||
imax + 1 + (k + 1) * w_dim1], &c__1);
|
||||
}
|
||||
i__1 = *n - k + 1;
|
||||
i__2 = k - 1;
|
||||
z__1.r = -1., z__1.i = -0.;
|
||||
zgemv_("No transpose", &i__1, &i__2, &z__1, &a[k + a_dim1],
|
||||
lda, &w[imax + w_dim1], ldw, &c_b1, &w[k + (k + 1) *
|
||||
w_dim1], &c__1, (ftnlen)12);
|
||||
i__1 = imax + (k + 1) * w_dim1;
|
||||
i__2 = imax + (k + 1) * w_dim1;
|
||||
d__1 = w[i__2].r;
|
||||
w[i__1].r = d__1, w[i__1].i = 0.;
|
||||
i__1 = imax - k;
|
||||
jmax = k - 1 + izamax_(&i__1, &w[k + (k + 1) * w_dim1], &c__1)
|
||||
;
|
||||
i__1 = jmax + (k + 1) * w_dim1;
|
||||
rowmax = (d__1 = w[i__1].r, abs(d__1)) + (d__2 = d_imag(&w[
|
||||
jmax + (k + 1) * w_dim1]), abs(d__2));
|
||||
if (imax < *n) {
|
||||
i__1 = *n - imax;
|
||||
jmax = imax + izamax_(&i__1, &w[imax + 1 + (k + 1) *
|
||||
w_dim1], &c__1);
|
||||
/* Computing MAX */
|
||||
i__1 = jmax + (k + 1) * w_dim1;
|
||||
d__3 = rowmax, d__4 = (d__1 = w[i__1].r, abs(d__1)) + (
|
||||
d__2 = d_imag(&w[jmax + (k + 1) * w_dim1]), abs(
|
||||
d__2));
|
||||
rowmax = max(d__3,d__4);
|
||||
}
|
||||
if (absakk >= alpha * colmax * (colmax / rowmax)) {
|
||||
kp = k;
|
||||
} else /* if(complicated condition) */ {
|
||||
i__1 = imax + (k + 1) * w_dim1;
|
||||
if ((d__1 = w[i__1].r, abs(d__1)) >= alpha * rowmax) {
|
||||
kp = imax;
|
||||
i__1 = *n - k + 1;
|
||||
zcopy_(&i__1, &w[k + (k + 1) * w_dim1], &c__1, &w[k +
|
||||
k * w_dim1], &c__1);
|
||||
} else {
|
||||
kp = imax;
|
||||
kstep = 2;
|
||||
}
|
||||
}
|
||||
}
|
||||
kk = k + kstep - 1;
|
||||
if (kp != kk) {
|
||||
i__1 = kp + kp * a_dim1;
|
||||
i__2 = kk + kk * a_dim1;
|
||||
d__1 = a[i__2].r;
|
||||
a[i__1].r = d__1, a[i__1].i = 0.;
|
||||
i__1 = kp - kk - 1;
|
||||
zcopy_(&i__1, &a[kk + 1 + kk * a_dim1], &c__1, &a[kp + (kk +
|
||||
1) * a_dim1], lda);
|
||||
i__1 = kp - kk - 1;
|
||||
zlacgv_(&i__1, &a[kp + (kk + 1) * a_dim1], lda);
|
||||
if (kp < *n) {
|
||||
i__1 = *n - kp;
|
||||
zcopy_(&i__1, &a[kp + 1 + kk * a_dim1], &c__1, &a[kp + 1
|
||||
+ kp * a_dim1], &c__1);
|
||||
}
|
||||
if (k > 1) {
|
||||
i__1 = k - 1;
|
||||
zswap_(&i__1, &a[kk + a_dim1], lda, &a[kp + a_dim1], lda);
|
||||
}
|
||||
zswap_(&kk, &w[kk + w_dim1], ldw, &w[kp + w_dim1], ldw);
|
||||
}
|
||||
if (kstep == 1) {
|
||||
i__1 = *n - k + 1;
|
||||
zcopy_(&i__1, &w[k + k * w_dim1], &c__1, &a[k + k * a_dim1], &
|
||||
c__1);
|
||||
if (k < *n) {
|
||||
i__1 = k + k * a_dim1;
|
||||
r1 = 1. / a[i__1].r;
|
||||
i__1 = *n - k;
|
||||
zdscal_(&i__1, &r1, &a[k + 1 + k * a_dim1], &c__1);
|
||||
i__1 = *n - k;
|
||||
zlacgv_(&i__1, &w[k + 1 + k * w_dim1], &c__1);
|
||||
}
|
||||
} else {
|
||||
if (k < *n - 1) {
|
||||
i__1 = k + 1 + k * w_dim1;
|
||||
d21.r = w[i__1].r, d21.i = w[i__1].i;
|
||||
z_div(&z__1, &w[k + 1 + (k + 1) * w_dim1], &d21);
|
||||
d11.r = z__1.r, d11.i = z__1.i;
|
||||
d_cnjg(&z__2, &d21);
|
||||
z_div(&z__1, &w[k + k * w_dim1], &z__2);
|
||||
d22.r = z__1.r, d22.i = z__1.i;
|
||||
z__1.r = d11.r * d22.r - d11.i * d22.i, z__1.i = d11.r *
|
||||
d22.i + d11.i * d22.r;
|
||||
t = 1. / (z__1.r - 1.);
|
||||
z__2.r = t, z__2.i = 0.;
|
||||
z_div(&z__1, &z__2, &d21);
|
||||
d21.r = z__1.r, d21.i = z__1.i;
|
||||
i__1 = *n;
|
||||
for (j = k + 2; j <= i__1; ++j) {
|
||||
i__2 = j + k * a_dim1;
|
||||
d_cnjg(&z__2, &d21);
|
||||
i__3 = j + k * w_dim1;
|
||||
z__4.r = d11.r * w[i__3].r - d11.i * w[i__3].i,
|
||||
z__4.i = d11.r * w[i__3].i + d11.i * w[i__3]
|
||||
.r;
|
||||
i__4 = j + (k + 1) * w_dim1;
|
||||
z__3.r = z__4.r - w[i__4].r, z__3.i = z__4.i - w[i__4]
|
||||
.i;
|
||||
z__1.r = z__2.r * z__3.r - z__2.i * z__3.i, z__1.i =
|
||||
z__2.r * z__3.i + z__2.i * z__3.r;
|
||||
a[i__2].r = z__1.r, a[i__2].i = z__1.i;
|
||||
i__2 = j + (k + 1) * a_dim1;
|
||||
i__3 = j + (k + 1) * w_dim1;
|
||||
z__3.r = d22.r * w[i__3].r - d22.i * w[i__3].i,
|
||||
z__3.i = d22.r * w[i__3].i + d22.i * w[i__3]
|
||||
.r;
|
||||
i__4 = j + k * w_dim1;
|
||||
z__2.r = z__3.r - w[i__4].r, z__2.i = z__3.i - w[i__4]
|
||||
.i;
|
||||
z__1.r = d21.r * z__2.r - d21.i * z__2.i, z__1.i =
|
||||
d21.r * z__2.i + d21.i * z__2.r;
|
||||
a[i__2].r = z__1.r, a[i__2].i = z__1.i;
|
||||
/* L80: */
|
||||
}
|
||||
}
|
||||
i__1 = k + k * a_dim1;
|
||||
i__2 = k + k * w_dim1;
|
||||
a[i__1].r = w[i__2].r, a[i__1].i = w[i__2].i;
|
||||
i__1 = k + 1 + k * a_dim1;
|
||||
i__2 = k + 1 + k * w_dim1;
|
||||
a[i__1].r = w[i__2].r, a[i__1].i = w[i__2].i;
|
||||
i__1 = k + 1 + (k + 1) * a_dim1;
|
||||
i__2 = k + 1 + (k + 1) * w_dim1;
|
||||
a[i__1].r = w[i__2].r, a[i__1].i = w[i__2].i;
|
||||
i__1 = *n - k;
|
||||
zlacgv_(&i__1, &w[k + 1 + k * w_dim1], &c__1);
|
||||
i__1 = *n - k - 1;
|
||||
zlacgv_(&i__1, &w[k + 2 + (k + 1) * w_dim1], &c__1);
|
||||
}
|
||||
}
|
||||
if (kstep == 1) {
|
||||
ipiv[k] = kp;
|
||||
} else {
|
||||
ipiv[k] = -kp;
|
||||
ipiv[k + 1] = -kp;
|
||||
}
|
||||
k += kstep;
|
||||
goto L70;
|
||||
L90:
|
||||
j = k - 1;
|
||||
L120:
|
||||
jj = j;
|
||||
jp = ipiv[j];
|
||||
if (jp < 0) {
|
||||
jp = -jp;
|
||||
--j;
|
||||
}
|
||||
--j;
|
||||
if (jp != jj && j >= 1) {
|
||||
zswap_(&j, &a[jp + a_dim1], lda, &a[jj + a_dim1], lda);
|
||||
}
|
||||
if (j > 1) {
|
||||
goto L120;
|
||||
}
|
||||
*kb = k - 1;
|
||||
}
|
||||
return;
|
||||
}
|
|
@ -0,0 +1,236 @@
|
|||
#include "relapack.h"
|
||||
#if XSYTRF_ALLOW_MALLOC
|
||||
#include <stdlib.h>
|
||||
#endif
|
||||
|
||||
static void RELAPACK_zhetrf_rook_rec(const char *, const int *, const int *, int *,
|
||||
double *, const int *, int *, double *, const int *, int *);
|
||||
|
||||
|
||||
/** ZHETRF_ROOK computes the factorization of a complex Hermitian indefinite matrix using the bounded Bunch-Kaufman ("rook") diagonal pivoting method.
|
||||
*
|
||||
* This routine is functionally equivalent to LAPACK's zhetrf_rook.
|
||||
* For details on its interface, see
|
||||
* http://www.netlib.org/lapack/explore-html/d6/d6f/zhetrf__rook_8f.html
|
||||
* */
|
||||
void RELAPACK_zhetrf_rook(
|
||||
const char *uplo, const int *n,
|
||||
double *A, const int *ldA, int *ipiv,
|
||||
double *Work, const int *lWork, int *info
|
||||
) {
|
||||
|
||||
// Required work size
|
||||
const int cleanlWork = *n * (*n / 2);
|
||||
int minlWork = cleanlWork;
|
||||
#if XSYTRF_ALLOW_MALLOC
|
||||
minlWork = 1;
|
||||
#endif
|
||||
|
||||
// Check arguments
|
||||
const int lower = LAPACK(lsame)(uplo, "L");
|
||||
const int upper = LAPACK(lsame)(uplo, "U");
|
||||
*info = 0;
|
||||
if (!lower && !upper)
|
||||
*info = -1;
|
||||
else if (*n < 0)
|
||||
*info = -2;
|
||||
else if (*ldA < MAX(1, *n))
|
||||
*info = -4;
|
||||
else if (*lWork < minlWork && *lWork != -1)
|
||||
*info = -7;
|
||||
else if (*lWork == -1) {
|
||||
// Work size query
|
||||
*Work = cleanlWork;
|
||||
return;
|
||||
}
|
||||
|
||||
// Ensure Work size
|
||||
double *cleanWork = Work;
|
||||
#if XSYTRF_ALLOW_MALLOC
|
||||
if (!*info && *lWork < cleanlWork) {
|
||||
cleanWork = malloc(cleanlWork * 2 * sizeof(double));
|
||||
if (!cleanWork)
|
||||
*info = -7;
|
||||
}
|
||||
#endif
|
||||
|
||||
if (*info) {
|
||||
const int minfo = -*info;
|
||||
LAPACK(xerbla)("ZHETRF", &minfo);
|
||||
return;
|
||||
}
|
||||
|
||||
// Clean char * arguments
|
||||
const char cleanuplo = lower ? 'L' : 'U';
|
||||
|
||||
// Dummy argument
|
||||
int nout;
|
||||
|
||||
// Recursive kernel
|
||||
RELAPACK_zhetrf_rook_rec(&cleanuplo, n, n, &nout, A, ldA, ipiv, cleanWork, n, info);
|
||||
|
||||
#if XSYTRF_ALLOW_MALLOC
|
||||
if (cleanWork != Work)
|
||||
free(cleanWork);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
/** zhetrf_rook's recursive compute kernel */
|
||||
static void RELAPACK_zhetrf_rook_rec(
|
||||
const char *uplo, const int *n_full, const int *n, int *n_out,
|
||||
double *A, const int *ldA, int *ipiv,
|
||||
double *Work, const int *ldWork, int *info
|
||||
) {
|
||||
|
||||
// top recursion level?
|
||||
const int top = *n_full == *n;
|
||||
|
||||
if (*n <= MAX(CROSSOVER_ZHETRF_ROOK, 3)) {
|
||||
// Unblocked
|
||||
if (top) {
|
||||
LAPACK(zhetf2)(uplo, n, A, ldA, ipiv, info);
|
||||
*n_out = *n;
|
||||
} else
|
||||
RELAPACK_zhetrf_rook_rec2(uplo, n_full, n, n_out, A, ldA, ipiv, Work, ldWork, info);
|
||||
return;
|
||||
}
|
||||
|
||||
int info1, info2;
|
||||
|
||||
// Constants
|
||||
const double ONE[] = { 1., 0. };
|
||||
const double MONE[] = { -1., 0. };
|
||||
const int iONE[] = { 1 };
|
||||
|
||||
const int n_rest = *n_full - *n;
|
||||
|
||||
if (*uplo == 'L') {
|
||||
// Splitting (setup)
|
||||
int n1 = ZREC_SPLIT(*n);
|
||||
int n2 = *n - n1;
|
||||
|
||||
// Work_L *
|
||||
double *const Work_L = Work;
|
||||
|
||||
// recursion(A_L)
|
||||
int n1_out;
|
||||
RELAPACK_zhetrf_rook_rec(uplo, n_full, &n1, &n1_out, A, ldA, ipiv, Work_L, ldWork, &info1);
|
||||
n1 = n1_out;
|
||||
|
||||
// Splitting (continued)
|
||||
n2 = *n - n1;
|
||||
const int n_full2 = *n_full - n1;
|
||||
|
||||
// * *
|
||||
// A_BL A_BR
|
||||
// A_BL_B A_BR_B
|
||||
double *const A_BL = A + 2 * n1;
|
||||
double *const A_BR = A + 2 * *ldA * n1 + 2 * n1;
|
||||
double *const A_BL_B = A + 2 * *n;
|
||||
double *const A_BR_B = A + 2 * *ldA * n1 + 2 * *n;
|
||||
|
||||
// * *
|
||||
// Work_BL Work_BR
|
||||
// * *
|
||||
// (top recursion level: use Work as Work_BR)
|
||||
double *const Work_BL = Work + 2 * n1;
|
||||
double *const Work_BR = top ? Work : Work + 2 * *ldWork * n1 + 2 * n1;
|
||||
const int ldWork_BR = top ? n2 : *ldWork;
|
||||
|
||||
// ipiv_T
|
||||
// ipiv_B
|
||||
int *const ipiv_B = ipiv + n1;
|
||||
|
||||
// A_BR = A_BR - A_BL Work_BL'
|
||||
RELAPACK_zgemmt(uplo, "N", "T", &n2, &n1, MONE, A_BL, ldA, Work_BL, ldWork, ONE, A_BR, ldA);
|
||||
BLAS(zgemm)("N", "T", &n_rest, &n2, &n1, MONE, A_BL_B, ldA, Work_BL, ldWork, ONE, A_BR_B, ldA);
|
||||
|
||||
// recursion(A_BR)
|
||||
int n2_out;
|
||||
RELAPACK_zhetrf_rook_rec(uplo, &n_full2, &n2, &n2_out, A_BR, ldA, ipiv_B, Work_BR, &ldWork_BR, &info2);
|
||||
|
||||
if (n2_out != n2) {
|
||||
// undo 1 column of updates
|
||||
const int n_restp1 = n_rest + 1;
|
||||
|
||||
// last column of A_BR
|
||||
double *const A_BR_r = A_BR + 2 * *ldA * n2_out + 2 * n2_out;
|
||||
|
||||
// last row of A_BL
|
||||
double *const A_BL_b = A_BL + 2 * n2_out;
|
||||
|
||||
// last row of Work_BL
|
||||
double *const Work_BL_b = Work_BL + 2 * n2_out;
|
||||
|
||||
// A_BR_r = A_BR_r + A_BL_b Work_BL_b'
|
||||
BLAS(zgemv)("N", &n_restp1, &n1, ONE, A_BL_b, ldA, Work_BL_b, ldWork, ONE, A_BR_r, iONE);
|
||||
}
|
||||
n2 = n2_out;
|
||||
|
||||
// shift pivots
|
||||
int i;
|
||||
for (i = 0; i < n2; i++)
|
||||
if (ipiv_B[i] > 0)
|
||||
ipiv_B[i] += n1;
|
||||
else
|
||||
ipiv_B[i] -= n1;
|
||||
|
||||
*info = info1 || info2;
|
||||
*n_out = n1 + n2;
|
||||
} else {
|
||||
// Splitting (setup)
|
||||
int n2 = ZREC_SPLIT(*n);
|
||||
int n1 = *n - n2;
|
||||
|
||||
// * Work_R
|
||||
// (top recursion level: use Work as Work_R)
|
||||
double *const Work_R = top ? Work : Work + 2 * *ldWork * n1;
|
||||
|
||||
// recursion(A_R)
|
||||
int n2_out;
|
||||
RELAPACK_zhetrf_rook_rec(uplo, n_full, &n2, &n2_out, A, ldA, ipiv, Work_R, ldWork, &info2);
|
||||
const int n2_diff = n2 - n2_out;
|
||||
n2 = n2_out;
|
||||
|
||||
// Splitting (continued)
|
||||
n1 = *n - n2;
|
||||
const int n_full1 = *n_full - n2;
|
||||
|
||||
// * A_TL_T A_TR_T
|
||||
// * A_TL A_TR
|
||||
// * * *
|
||||
double *const A_TL_T = A + 2 * *ldA * n_rest;
|
||||
double *const A_TR_T = A + 2 * *ldA * (n_rest + n1);
|
||||
double *const A_TL = A + 2 * *ldA * n_rest + 2 * n_rest;
|
||||
double *const A_TR = A + 2 * *ldA * (n_rest + n1) + 2 * n_rest;
|
||||
|
||||
// Work_L *
|
||||
// * Work_TR
|
||||
// * *
|
||||
// (top recursion level: Work_R was Work)
|
||||
double *const Work_L = Work;
|
||||
double *const Work_TR = Work + 2 * *ldWork * (top ? n2_diff : n1) + 2 * n_rest;
|
||||
const int ldWork_L = top ? n1 : *ldWork;
|
||||
|
||||
// A_TL = A_TL - A_TR Work_TR'
|
||||
RELAPACK_zgemmt(uplo, "N", "T", &n1, &n2, MONE, A_TR, ldA, Work_TR, ldWork, ONE, A_TL, ldA);
|
||||
BLAS(zgemm)("N", "T", &n_rest, &n1, &n2, MONE, A_TR_T, ldA, Work_TR, ldWork, ONE, A_TL_T, ldA);
|
||||
|
||||
// recursion(A_TL)
|
||||
int n1_out;
|
||||
RELAPACK_zhetrf_rook_rec(uplo, &n_full1, &n1, &n1_out, A, ldA, ipiv, Work_L, &ldWork_L, &info1);
|
||||
|
||||
if (n1_out != n1) {
|
||||
// undo 1 column of updates
|
||||
const int n_restp1 = n_rest + 1;
|
||||
|
||||
// A_TL_T_l = A_TL_T_l + A_TR_T Work_TR_t'
|
||||
BLAS(zgemv)("N", &n_restp1, &n2, ONE, A_TR_T, ldA, Work_TR, ldWork, ONE, A_TL_T, iONE);
|
||||
}
|
||||
n1 = n1_out;
|
||||
|
||||
*info = info2 || info1;
|
||||
*n_out = n1 + n2;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,662 @@
|
|||
/* -- translated by f2c (version 20100827).
|
||||
You must link the resulting object file with libf2c:
|
||||
on Microsoft Windows system, link with libf2c.lib;
|
||||
on Linux or Unix systems, link with .../path/to/libf2c.a -lm
|
||||
or, if you install libf2c.a in a standard place, with -lf2c -lm
|
||||
-- in that order, at the end of the command line, as in
|
||||
cc *.o -lf2c -lm
|
||||
Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
|
||||
|
||||
http://www.netlib.org/f2c/libf2c.zip
|
||||
*/
|
||||
|
||||
#include "f2c.h"
|
||||
|
||||
/* Table of constant values */
|
||||
|
||||
static doublecomplex c_b1 = {1.,0.};
|
||||
static int c__1 = 1;
|
||||
|
||||
/** ZHETRF_ROOK_REC2 computes a partial factorization of a complex Hermitian indefinite matrix using the boun ded Bunch-Kaufman ("rook") diagonal pivoting method
|
||||
*
|
||||
* This routine is a minor modification of LAPACK's zlahef_rook.
|
||||
* It serves as an unblocked kernel in the recursive algorithms.
|
||||
* The blocked BLAS Level 3 updates were removed and moved to the
|
||||
* recursive algorithm.
|
||||
* */
|
||||
/* Subroutine */ void RELAPACK_zhetrf_rook_rec2(char *uplo, int *n,
|
||||
int *nb, int *kb, doublecomplex *a, int *lda, int *
|
||||
ipiv, doublecomplex *w, int *ldw, int *info, ftnlen uplo_len)
|
||||
{
|
||||
/* System generated locals */
|
||||
int a_dim1, a_offset, w_dim1, w_offset, i__1, i__2, i__3, i__4;
|
||||
double d__1, d__2;
|
||||
doublecomplex z__1, z__2, z__3, z__4, z__5;
|
||||
|
||||
/* Builtin functions */
|
||||
double sqrt(double), d_imag(doublecomplex *);
|
||||
void d_cnjg(doublecomplex *, doublecomplex *), z_div(doublecomplex *,
|
||||
doublecomplex *, doublecomplex *);
|
||||
|
||||
/* Local variables */
|
||||
static int j, k, p;
|
||||
static double t, r1;
|
||||
static doublecomplex d11, d21, d22;
|
||||
static int ii, jj, kk, kp, kw, jp1, jp2, kkw;
|
||||
static logical done;
|
||||
static int imax, jmax;
|
||||
static double alpha;
|
||||
extern logical lsame_(char *, char *, ftnlen, ftnlen);
|
||||
static double dtemp, sfmin;
|
||||
static int itemp, kstep;
|
||||
extern /* Subroutine */ int zgemv_(char *, int *, int *,
|
||||
doublecomplex *, doublecomplex *, int *, doublecomplex *,
|
||||
int *, doublecomplex *, doublecomplex *, int *, ftnlen),
|
||||
zcopy_(int *, doublecomplex *, int *, doublecomplex *,
|
||||
int *), zswap_(int *, doublecomplex *, int *,
|
||||
doublecomplex *, int *);
|
||||
extern double dlamch_(char *, ftnlen);
|
||||
static double absakk;
|
||||
extern /* Subroutine */ int zdscal_(int *, double *,
|
||||
doublecomplex *, int *);
|
||||
static double colmax;
|
||||
extern /* Subroutine */ int zlacgv_(int *, doublecomplex *, int *)
|
||||
;
|
||||
extern int izamax_(int *, doublecomplex *, int *);
|
||||
static double rowmax;
|
||||
|
||||
/* Parameter adjustments */
|
||||
a_dim1 = *lda;
|
||||
a_offset = 1 + a_dim1;
|
||||
a -= a_offset;
|
||||
--ipiv;
|
||||
w_dim1 = *ldw;
|
||||
w_offset = 1 + w_dim1;
|
||||
w -= w_offset;
|
||||
|
||||
/* Function Body */
|
||||
*info = 0;
|
||||
alpha = (sqrt(17.) + 1.) / 8.;
|
||||
sfmin = dlamch_("S", (ftnlen)1);
|
||||
if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
|
||||
k = *n;
|
||||
L10:
|
||||
kw = *nb + k - *n;
|
||||
if ((k <= *n - *nb + 1 && *nb < *n) || k < 1) {
|
||||
goto L30;
|
||||
}
|
||||
kstep = 1;
|
||||
p = k;
|
||||
if (k > 1) {
|
||||
i__1 = k - 1;
|
||||
zcopy_(&i__1, &a[k * a_dim1 + 1], &c__1, &w[kw * w_dim1 + 1], &
|
||||
c__1);
|
||||
}
|
||||
i__1 = k + kw * w_dim1;
|
||||
i__2 = k + k * a_dim1;
|
||||
d__1 = a[i__2].r;
|
||||
w[i__1].r = d__1, w[i__1].i = 0.;
|
||||
if (k < *n) {
|
||||
i__1 = *n - k;
|
||||
z__1.r = -1., z__1.i = -0.;
|
||||
zgemv_("No transpose", &k, &i__1, &z__1, &a[(k + 1) * a_dim1 + 1],
|
||||
lda, &w[k + (kw + 1) * w_dim1], ldw, &c_b1, &w[kw *
|
||||
w_dim1 + 1], &c__1, (ftnlen)12);
|
||||
i__1 = k + kw * w_dim1;
|
||||
i__2 = k + kw * w_dim1;
|
||||
d__1 = w[i__2].r;
|
||||
w[i__1].r = d__1, w[i__1].i = 0.;
|
||||
}
|
||||
i__1 = k + kw * w_dim1;
|
||||
absakk = (d__1 = w[i__1].r, abs(d__1));
|
||||
if (k > 1) {
|
||||
i__1 = k - 1;
|
||||
imax = izamax_(&i__1, &w[kw * w_dim1 + 1], &c__1);
|
||||
i__1 = imax + kw * w_dim1;
|
||||
colmax = (d__1 = w[i__1].r, abs(d__1)) + (d__2 = d_imag(&w[imax +
|
||||
kw * w_dim1]), abs(d__2));
|
||||
} else {
|
||||
colmax = 0.;
|
||||
}
|
||||
if (max(absakk,colmax) == 0.) {
|
||||
if (*info == 0) {
|
||||
*info = k;
|
||||
}
|
||||
kp = k;
|
||||
i__1 = k + k * a_dim1;
|
||||
i__2 = k + kw * w_dim1;
|
||||
d__1 = w[i__2].r;
|
||||
a[i__1].r = d__1, a[i__1].i = 0.;
|
||||
if (k > 1) {
|
||||
i__1 = k - 1;
|
||||
zcopy_(&i__1, &w[kw * w_dim1 + 1], &c__1, &a[k * a_dim1 + 1],
|
||||
&c__1);
|
||||
}
|
||||
} else {
|
||||
if (! (absakk < alpha * colmax)) {
|
||||
kp = k;
|
||||
} else {
|
||||
done = FALSE_;
|
||||
L12:
|
||||
if (imax > 1) {
|
||||
i__1 = imax - 1;
|
||||
zcopy_(&i__1, &a[imax * a_dim1 + 1], &c__1, &w[(kw - 1) *
|
||||
w_dim1 + 1], &c__1);
|
||||
}
|
||||
i__1 = imax + (kw - 1) * w_dim1;
|
||||
i__2 = imax + imax * a_dim1;
|
||||
d__1 = a[i__2].r;
|
||||
w[i__1].r = d__1, w[i__1].i = 0.;
|
||||
i__1 = k - imax;
|
||||
zcopy_(&i__1, &a[imax + (imax + 1) * a_dim1], lda, &w[imax +
|
||||
1 + (kw - 1) * w_dim1], &c__1);
|
||||
i__1 = k - imax;
|
||||
zlacgv_(&i__1, &w[imax + 1 + (kw - 1) * w_dim1], &c__1);
|
||||
if (k < *n) {
|
||||
i__1 = *n - k;
|
||||
z__1.r = -1., z__1.i = -0.;
|
||||
zgemv_("No transpose", &k, &i__1, &z__1, &a[(k + 1) *
|
||||
a_dim1 + 1], lda, &w[imax + (kw + 1) * w_dim1],
|
||||
ldw, &c_b1, &w[(kw - 1) * w_dim1 + 1], &c__1, (
|
||||
ftnlen)12);
|
||||
i__1 = imax + (kw - 1) * w_dim1;
|
||||
i__2 = imax + (kw - 1) * w_dim1;
|
||||
d__1 = w[i__2].r;
|
||||
w[i__1].r = d__1, w[i__1].i = 0.;
|
||||
}
|
||||
if (imax != k) {
|
||||
i__1 = k - imax;
|
||||
jmax = imax + izamax_(&i__1, &w[imax + 1 + (kw - 1) *
|
||||
w_dim1], &c__1);
|
||||
i__1 = jmax + (kw - 1) * w_dim1;
|
||||
rowmax = (d__1 = w[i__1].r, abs(d__1)) + (d__2 = d_imag(&
|
||||
w[jmax + (kw - 1) * w_dim1]), abs(d__2));
|
||||
} else {
|
||||
rowmax = 0.;
|
||||
}
|
||||
if (imax > 1) {
|
||||
i__1 = imax - 1;
|
||||
itemp = izamax_(&i__1, &w[(kw - 1) * w_dim1 + 1], &c__1);
|
||||
i__1 = itemp + (kw - 1) * w_dim1;
|
||||
dtemp = (d__1 = w[i__1].r, abs(d__1)) + (d__2 = d_imag(&w[
|
||||
itemp + (kw - 1) * w_dim1]), abs(d__2));
|
||||
if (dtemp > rowmax) {
|
||||
rowmax = dtemp;
|
||||
jmax = itemp;
|
||||
}
|
||||
}
|
||||
i__1 = imax + (kw - 1) * w_dim1;
|
||||
if (! ((d__1 = w[i__1].r, abs(d__1)) < alpha * rowmax)) {
|
||||
kp = imax;
|
||||
zcopy_(&k, &w[(kw - 1) * w_dim1 + 1], &c__1, &w[kw *
|
||||
w_dim1 + 1], &c__1);
|
||||
done = TRUE_;
|
||||
} else if (p == jmax || rowmax <= colmax) {
|
||||
kp = imax;
|
||||
kstep = 2;
|
||||
done = TRUE_;
|
||||
} else {
|
||||
p = imax;
|
||||
colmax = rowmax;
|
||||
imax = jmax;
|
||||
zcopy_(&k, &w[(kw - 1) * w_dim1 + 1], &c__1, &w[kw *
|
||||
w_dim1 + 1], &c__1);
|
||||
}
|
||||
if (! done) {
|
||||
goto L12;
|
||||
}
|
||||
}
|
||||
kk = k - kstep + 1;
|
||||
kkw = *nb + kk - *n;
|
||||
if (kstep == 2 && p != k) {
|
||||
i__1 = p + p * a_dim1;
|
||||
i__2 = k + k * a_dim1;
|
||||
d__1 = a[i__2].r;
|
||||
a[i__1].r = d__1, a[i__1].i = 0.;
|
||||
i__1 = k - 1 - p;
|
||||
zcopy_(&i__1, &a[p + 1 + k * a_dim1], &c__1, &a[p + (p + 1) *
|
||||
a_dim1], lda);
|
||||
i__1 = k - 1 - p;
|
||||
zlacgv_(&i__1, &a[p + (p + 1) * a_dim1], lda);
|
||||
if (p > 1) {
|
||||
i__1 = p - 1;
|
||||
zcopy_(&i__1, &a[k * a_dim1 + 1], &c__1, &a[p * a_dim1 +
|
||||
1], &c__1);
|
||||
}
|
||||
if (k < *n) {
|
||||
i__1 = *n - k;
|
||||
zswap_(&i__1, &a[k + (k + 1) * a_dim1], lda, &a[p + (k +
|
||||
1) * a_dim1], lda);
|
||||
}
|
||||
i__1 = *n - kk + 1;
|
||||
zswap_(&i__1, &w[k + kkw * w_dim1], ldw, &w[p + kkw * w_dim1],
|
||||
ldw);
|
||||
}
|
||||
if (kp != kk) {
|
||||
i__1 = kp + kp * a_dim1;
|
||||
i__2 = kk + kk * a_dim1;
|
||||
d__1 = a[i__2].r;
|
||||
a[i__1].r = d__1, a[i__1].i = 0.;
|
||||
i__1 = kk - 1 - kp;
|
||||
zcopy_(&i__1, &a[kp + 1 + kk * a_dim1], &c__1, &a[kp + (kp +
|
||||
1) * a_dim1], lda);
|
||||
i__1 = kk - 1 - kp;
|
||||
zlacgv_(&i__1, &a[kp + (kp + 1) * a_dim1], lda);
|
||||
if (kp > 1) {
|
||||
i__1 = kp - 1;
|
||||
zcopy_(&i__1, &a[kk * a_dim1 + 1], &c__1, &a[kp * a_dim1
|
||||
+ 1], &c__1);
|
||||
}
|
||||
if (k < *n) {
|
||||
i__1 = *n - k;
|
||||
zswap_(&i__1, &a[kk + (k + 1) * a_dim1], lda, &a[kp + (k
|
||||
+ 1) * a_dim1], lda);
|
||||
}
|
||||
i__1 = *n - kk + 1;
|
||||
zswap_(&i__1, &w[kk + kkw * w_dim1], ldw, &w[kp + kkw *
|
||||
w_dim1], ldw);
|
||||
}
|
||||
if (kstep == 1) {
|
||||
zcopy_(&k, &w[kw * w_dim1 + 1], &c__1, &a[k * a_dim1 + 1], &
|
||||
c__1);
|
||||
if (k > 1) {
|
||||
i__1 = k + k * a_dim1;
|
||||
t = a[i__1].r;
|
||||
if (abs(t) >= sfmin) {
|
||||
r1 = 1. / t;
|
||||
i__1 = k - 1;
|
||||
zdscal_(&i__1, &r1, &a[k * a_dim1 + 1], &c__1);
|
||||
} else {
|
||||
i__1 = k - 1;
|
||||
for (ii = 1; ii <= i__1; ++ii) {
|
||||
i__2 = ii + k * a_dim1;
|
||||
i__3 = ii + k * a_dim1;
|
||||
z__1.r = a[i__3].r / t, z__1.i = a[i__3].i / t;
|
||||
a[i__2].r = z__1.r, a[i__2].i = z__1.i;
|
||||
/* L14: */
|
||||
}
|
||||
}
|
||||
i__1 = k - 1;
|
||||
zlacgv_(&i__1, &w[kw * w_dim1 + 1], &c__1);
|
||||
}
|
||||
} else {
|
||||
if (k > 2) {
|
||||
i__1 = k - 1 + kw * w_dim1;
|
||||
d21.r = w[i__1].r, d21.i = w[i__1].i;
|
||||
d_cnjg(&z__2, &d21);
|
||||
z_div(&z__1, &w[k + kw * w_dim1], &z__2);
|
||||
d11.r = z__1.r, d11.i = z__1.i;
|
||||
z_div(&z__1, &w[k - 1 + (kw - 1) * w_dim1], &d21);
|
||||
d22.r = z__1.r, d22.i = z__1.i;
|
||||
z__1.r = d11.r * d22.r - d11.i * d22.i, z__1.i = d11.r *
|
||||
d22.i + d11.i * d22.r;
|
||||
t = 1. / (z__1.r - 1.);
|
||||
i__1 = k - 2;
|
||||
for (j = 1; j <= i__1; ++j) {
|
||||
i__2 = j + (k - 1) * a_dim1;
|
||||
i__3 = j + (kw - 1) * w_dim1;
|
||||
z__4.r = d11.r * w[i__3].r - d11.i * w[i__3].i,
|
||||
z__4.i = d11.r * w[i__3].i + d11.i * w[i__3]
|
||||
.r;
|
||||
i__4 = j + kw * w_dim1;
|
||||
z__3.r = z__4.r - w[i__4].r, z__3.i = z__4.i - w[i__4]
|
||||
.i;
|
||||
z_div(&z__2, &z__3, &d21);
|
||||
z__1.r = t * z__2.r, z__1.i = t * z__2.i;
|
||||
a[i__2].r = z__1.r, a[i__2].i = z__1.i;
|
||||
i__2 = j + k * a_dim1;
|
||||
i__3 = j + kw * w_dim1;
|
||||
z__4.r = d22.r * w[i__3].r - d22.i * w[i__3].i,
|
||||
z__4.i = d22.r * w[i__3].i + d22.i * w[i__3]
|
||||
.r;
|
||||
i__4 = j + (kw - 1) * w_dim1;
|
||||
z__3.r = z__4.r - w[i__4].r, z__3.i = z__4.i - w[i__4]
|
||||
.i;
|
||||
d_cnjg(&z__5, &d21);
|
||||
z_div(&z__2, &z__3, &z__5);
|
||||
z__1.r = t * z__2.r, z__1.i = t * z__2.i;
|
||||
a[i__2].r = z__1.r, a[i__2].i = z__1.i;
|
||||
/* L20: */
|
||||
}
|
||||
}
|
||||
i__1 = k - 1 + (k - 1) * a_dim1;
|
||||
i__2 = k - 1 + (kw - 1) * w_dim1;
|
||||
a[i__1].r = w[i__2].r, a[i__1].i = w[i__2].i;
|
||||
i__1 = k - 1 + k * a_dim1;
|
||||
i__2 = k - 1 + kw * w_dim1;
|
||||
a[i__1].r = w[i__2].r, a[i__1].i = w[i__2].i;
|
||||
i__1 = k + k * a_dim1;
|
||||
i__2 = k + kw * w_dim1;
|
||||
a[i__1].r = w[i__2].r, a[i__1].i = w[i__2].i;
|
||||
i__1 = k - 1;
|
||||
zlacgv_(&i__1, &w[kw * w_dim1 + 1], &c__1);
|
||||
i__1 = k - 2;
|
||||
zlacgv_(&i__1, &w[(kw - 1) * w_dim1 + 1], &c__1);
|
||||
}
|
||||
}
|
||||
if (kstep == 1) {
|
||||
ipiv[k] = kp;
|
||||
} else {
|
||||
ipiv[k] = -p;
|
||||
ipiv[k - 1] = -kp;
|
||||
}
|
||||
k -= kstep;
|
||||
goto L10;
|
||||
L30:
|
||||
j = k + 1;
|
||||
L60:
|
||||
kstep = 1;
|
||||
jp1 = 1;
|
||||
jj = j;
|
||||
jp2 = ipiv[j];
|
||||
if (jp2 < 0) {
|
||||
jp2 = -jp2;
|
||||
++j;
|
||||
jp1 = -ipiv[j];
|
||||
kstep = 2;
|
||||
}
|
||||
++j;
|
||||
if (jp2 != jj && j <= *n) {
|
||||
i__1 = *n - j + 1;
|
||||
zswap_(&i__1, &a[jp2 + j * a_dim1], lda, &a[jj + j * a_dim1], lda)
|
||||
;
|
||||
}
|
||||
++jj;
|
||||
if (kstep == 2 && jp1 != jj && j <= *n) {
|
||||
i__1 = *n - j + 1;
|
||||
zswap_(&i__1, &a[jp1 + j * a_dim1], lda, &a[jj + j * a_dim1], lda)
|
||||
;
|
||||
}
|
||||
if (j < *n) {
|
||||
goto L60;
|
||||
}
|
||||
*kb = *n - k;
|
||||
} else {
|
||||
k = 1;
|
||||
L70:
|
||||
if ((k >= *nb && *nb < *n) || k > *n) {
|
||||
goto L90;
|
||||
}
|
||||
kstep = 1;
|
||||
p = k;
|
||||
i__1 = k + k * w_dim1;
|
||||
i__2 = k + k * a_dim1;
|
||||
d__1 = a[i__2].r;
|
||||
w[i__1].r = d__1, w[i__1].i = 0.;
|
||||
if (k < *n) {
|
||||
i__1 = *n - k;
|
||||
zcopy_(&i__1, &a[k + 1 + k * a_dim1], &c__1, &w[k + 1 + k *
|
||||
w_dim1], &c__1);
|
||||
}
|
||||
if (k > 1) {
|
||||
i__1 = *n - k + 1;
|
||||
i__2 = k - 1;
|
||||
z__1.r = -1., z__1.i = -0.;
|
||||
zgemv_("No transpose", &i__1, &i__2, &z__1, &a[k + a_dim1], lda, &
|
||||
w[k + w_dim1], ldw, &c_b1, &w[k + k * w_dim1], &c__1, (
|
||||
ftnlen)12);
|
||||
i__1 = k + k * w_dim1;
|
||||
i__2 = k + k * w_dim1;
|
||||
d__1 = w[i__2].r;
|
||||
w[i__1].r = d__1, w[i__1].i = 0.;
|
||||
}
|
||||
i__1 = k + k * w_dim1;
|
||||
absakk = (d__1 = w[i__1].r, abs(d__1));
|
||||
if (k < *n) {
|
||||
i__1 = *n - k;
|
||||
imax = k + izamax_(&i__1, &w[k + 1 + k * w_dim1], &c__1);
|
||||
i__1 = imax + k * w_dim1;
|
||||
colmax = (d__1 = w[i__1].r, abs(d__1)) + (d__2 = d_imag(&w[imax +
|
||||
k * w_dim1]), abs(d__2));
|
||||
} else {
|
||||
colmax = 0.;
|
||||
}
|
||||
if (max(absakk,colmax) == 0.) {
|
||||
if (*info == 0) {
|
||||
*info = k;
|
||||
}
|
||||
kp = k;
|
||||
i__1 = k + k * a_dim1;
|
||||
i__2 = k + k * w_dim1;
|
||||
d__1 = w[i__2].r;
|
||||
a[i__1].r = d__1, a[i__1].i = 0.;
|
||||
if (k < *n) {
|
||||
i__1 = *n - k;
|
||||
zcopy_(&i__1, &w[k + 1 + k * w_dim1], &c__1, &a[k + 1 + k *
|
||||
a_dim1], &c__1);
|
||||
}
|
||||
} else {
|
||||
if (! (absakk < alpha * colmax)) {
|
||||
kp = k;
|
||||
} else {
|
||||
done = FALSE_;
|
||||
L72:
|
||||
i__1 = imax - k;
|
||||
zcopy_(&i__1, &a[imax + k * a_dim1], lda, &w[k + (k + 1) *
|
||||
w_dim1], &c__1);
|
||||
i__1 = imax - k;
|
||||
zlacgv_(&i__1, &w[k + (k + 1) * w_dim1], &c__1);
|
||||
i__1 = imax + (k + 1) * w_dim1;
|
||||
i__2 = imax + imax * a_dim1;
|
||||
d__1 = a[i__2].r;
|
||||
w[i__1].r = d__1, w[i__1].i = 0.;
|
||||
if (imax < *n) {
|
||||
i__1 = *n - imax;
|
||||
zcopy_(&i__1, &a[imax + 1 + imax * a_dim1], &c__1, &w[
|
||||
imax + 1 + (k + 1) * w_dim1], &c__1);
|
||||
}
|
||||
if (k > 1) {
|
||||
i__1 = *n - k + 1;
|
||||
i__2 = k - 1;
|
||||
z__1.r = -1., z__1.i = -0.;
|
||||
zgemv_("No transpose", &i__1, &i__2, &z__1, &a[k + a_dim1]
|
||||
, lda, &w[imax + w_dim1], ldw, &c_b1, &w[k + (k +
|
||||
1) * w_dim1], &c__1, (ftnlen)12);
|
||||
i__1 = imax + (k + 1) * w_dim1;
|
||||
i__2 = imax + (k + 1) * w_dim1;
|
||||
d__1 = w[i__2].r;
|
||||
w[i__1].r = d__1, w[i__1].i = 0.;
|
||||
}
|
||||
if (imax != k) {
|
||||
i__1 = imax - k;
|
||||
jmax = k - 1 + izamax_(&i__1, &w[k + (k + 1) * w_dim1], &
|
||||
c__1);
|
||||
i__1 = jmax + (k + 1) * w_dim1;
|
||||
rowmax = (d__1 = w[i__1].r, abs(d__1)) + (d__2 = d_imag(&
|
||||
w[jmax + (k + 1) * w_dim1]), abs(d__2));
|
||||
} else {
|
||||
rowmax = 0.;
|
||||
}
|
||||
if (imax < *n) {
|
||||
i__1 = *n - imax;
|
||||
itemp = imax + izamax_(&i__1, &w[imax + 1 + (k + 1) *
|
||||
w_dim1], &c__1);
|
||||
i__1 = itemp + (k + 1) * w_dim1;
|
||||
dtemp = (d__1 = w[i__1].r, abs(d__1)) + (d__2 = d_imag(&w[
|
||||
itemp + (k + 1) * w_dim1]), abs(d__2));
|
||||
if (dtemp > rowmax) {
|
||||
rowmax = dtemp;
|
||||
jmax = itemp;
|
||||
}
|
||||
}
|
||||
i__1 = imax + (k + 1) * w_dim1;
|
||||
if (! ((d__1 = w[i__1].r, abs(d__1)) < alpha * rowmax)) {
|
||||
kp = imax;
|
||||
i__1 = *n - k + 1;
|
||||
zcopy_(&i__1, &w[k + (k + 1) * w_dim1], &c__1, &w[k + k *
|
||||
w_dim1], &c__1);
|
||||
done = TRUE_;
|
||||
} else if (p == jmax || rowmax <= colmax) {
|
||||
kp = imax;
|
||||
kstep = 2;
|
||||
done = TRUE_;
|
||||
} else {
|
||||
p = imax;
|
||||
colmax = rowmax;
|
||||
imax = jmax;
|
||||
i__1 = *n - k + 1;
|
||||
zcopy_(&i__1, &w[k + (k + 1) * w_dim1], &c__1, &w[k + k *
|
||||
w_dim1], &c__1);
|
||||
}
|
||||
if (! done) {
|
||||
goto L72;
|
||||
}
|
||||
}
|
||||
kk = k + kstep - 1;
|
||||
if (kstep == 2 && p != k) {
|
||||
i__1 = p + p * a_dim1;
|
||||
i__2 = k + k * a_dim1;
|
||||
d__1 = a[i__2].r;
|
||||
a[i__1].r = d__1, a[i__1].i = 0.;
|
||||
i__1 = p - k - 1;
|
||||
zcopy_(&i__1, &a[k + 1 + k * a_dim1], &c__1, &a[p + (k + 1) *
|
||||
a_dim1], lda);
|
||||
i__1 = p - k - 1;
|
||||
zlacgv_(&i__1, &a[p + (k + 1) * a_dim1], lda);
|
||||
if (p < *n) {
|
||||
i__1 = *n - p;
|
||||
zcopy_(&i__1, &a[p + 1 + k * a_dim1], &c__1, &a[p + 1 + p
|
||||
* a_dim1], &c__1);
|
||||
}
|
||||
if (k > 1) {
|
||||
i__1 = k - 1;
|
||||
zswap_(&i__1, &a[k + a_dim1], lda, &a[p + a_dim1], lda);
|
||||
}
|
||||
zswap_(&kk, &w[k + w_dim1], ldw, &w[p + w_dim1], ldw);
|
||||
}
|
||||
if (kp != kk) {
|
||||
i__1 = kp + kp * a_dim1;
|
||||
i__2 = kk + kk * a_dim1;
|
||||
d__1 = a[i__2].r;
|
||||
a[i__1].r = d__1, a[i__1].i = 0.;
|
||||
i__1 = kp - kk - 1;
|
||||
zcopy_(&i__1, &a[kk + 1 + kk * a_dim1], &c__1, &a[kp + (kk +
|
||||
1) * a_dim1], lda);
|
||||
i__1 = kp - kk - 1;
|
||||
zlacgv_(&i__1, &a[kp + (kk + 1) * a_dim1], lda);
|
||||
if (kp < *n) {
|
||||
i__1 = *n - kp;
|
||||
zcopy_(&i__1, &a[kp + 1 + kk * a_dim1], &c__1, &a[kp + 1
|
||||
+ kp * a_dim1], &c__1);
|
||||
}
|
||||
if (k > 1) {
|
||||
i__1 = k - 1;
|
||||
zswap_(&i__1, &a[kk + a_dim1], lda, &a[kp + a_dim1], lda);
|
||||
}
|
||||
zswap_(&kk, &w[kk + w_dim1], ldw, &w[kp + w_dim1], ldw);
|
||||
}
|
||||
if (kstep == 1) {
|
||||
i__1 = *n - k + 1;
|
||||
zcopy_(&i__1, &w[k + k * w_dim1], &c__1, &a[k + k * a_dim1], &
|
||||
c__1);
|
||||
if (k < *n) {
|
||||
i__1 = k + k * a_dim1;
|
||||
t = a[i__1].r;
|
||||
if (abs(t) >= sfmin) {
|
||||
r1 = 1. / t;
|
||||
i__1 = *n - k;
|
||||
zdscal_(&i__1, &r1, &a[k + 1 + k * a_dim1], &c__1);
|
||||
} else {
|
||||
i__1 = *n;
|
||||
for (ii = k + 1; ii <= i__1; ++ii) {
|
||||
i__2 = ii + k * a_dim1;
|
||||
i__3 = ii + k * a_dim1;
|
||||
z__1.r = a[i__3].r / t, z__1.i = a[i__3].i / t;
|
||||
a[i__2].r = z__1.r, a[i__2].i = z__1.i;
|
||||
/* L74: */
|
||||
}
|
||||
}
|
||||
i__1 = *n - k;
|
||||
zlacgv_(&i__1, &w[k + 1 + k * w_dim1], &c__1);
|
||||
}
|
||||
} else {
|
||||
if (k < *n - 1) {
|
||||
i__1 = k + 1 + k * w_dim1;
|
||||
d21.r = w[i__1].r, d21.i = w[i__1].i;
|
||||
z_div(&z__1, &w[k + 1 + (k + 1) * w_dim1], &d21);
|
||||
d11.r = z__1.r, d11.i = z__1.i;
|
||||
d_cnjg(&z__2, &d21);
|
||||
z_div(&z__1, &w[k + k * w_dim1], &z__2);
|
||||
d22.r = z__1.r, d22.i = z__1.i;
|
||||
z__1.r = d11.r * d22.r - d11.i * d22.i, z__1.i = d11.r *
|
||||
d22.i + d11.i * d22.r;
|
||||
t = 1. / (z__1.r - 1.);
|
||||
i__1 = *n;
|
||||
for (j = k + 2; j <= i__1; ++j) {
|
||||
i__2 = j + k * a_dim1;
|
||||
i__3 = j + k * w_dim1;
|
||||
z__4.r = d11.r * w[i__3].r - d11.i * w[i__3].i,
|
||||
z__4.i = d11.r * w[i__3].i + d11.i * w[i__3]
|
||||
.r;
|
||||
i__4 = j + (k + 1) * w_dim1;
|
||||
z__3.r = z__4.r - w[i__4].r, z__3.i = z__4.i - w[i__4]
|
||||
.i;
|
||||
d_cnjg(&z__5, &d21);
|
||||
z_div(&z__2, &z__3, &z__5);
|
||||
z__1.r = t * z__2.r, z__1.i = t * z__2.i;
|
||||
a[i__2].r = z__1.r, a[i__2].i = z__1.i;
|
||||
i__2 = j + (k + 1) * a_dim1;
|
||||
i__3 = j + (k + 1) * w_dim1;
|
||||
z__4.r = d22.r * w[i__3].r - d22.i * w[i__3].i,
|
||||
z__4.i = d22.r * w[i__3].i + d22.i * w[i__3]
|
||||
.r;
|
||||
i__4 = j + k * w_dim1;
|
||||
z__3.r = z__4.r - w[i__4].r, z__3.i = z__4.i - w[i__4]
|
||||
.i;
|
||||
z_div(&z__2, &z__3, &d21);
|
||||
z__1.r = t * z__2.r, z__1.i = t * z__2.i;
|
||||
a[i__2].r = z__1.r, a[i__2].i = z__1.i;
|
||||
/* L80: */
|
||||
}
|
||||
}
|
||||
i__1 = k + k * a_dim1;
|
||||
i__2 = k + k * w_dim1;
|
||||
a[i__1].r = w[i__2].r, a[i__1].i = w[i__2].i;
|
||||
i__1 = k + 1 + k * a_dim1;
|
||||
i__2 = k + 1 + k * w_dim1;
|
||||
a[i__1].r = w[i__2].r, a[i__1].i = w[i__2].i;
|
||||
i__1 = k + 1 + (k + 1) * a_dim1;
|
||||
i__2 = k + 1 + (k + 1) * w_dim1;
|
||||
a[i__1].r = w[i__2].r, a[i__1].i = w[i__2].i;
|
||||
i__1 = *n - k;
|
||||
zlacgv_(&i__1, &w[k + 1 + k * w_dim1], &c__1);
|
||||
i__1 = *n - k - 1;
|
||||
zlacgv_(&i__1, &w[k + 2 + (k + 1) * w_dim1], &c__1);
|
||||
}
|
||||
}
|
||||
if (kstep == 1) {
|
||||
ipiv[k] = kp;
|
||||
} else {
|
||||
ipiv[k] = -p;
|
||||
ipiv[k + 1] = -kp;
|
||||
}
|
||||
k += kstep;
|
||||
goto L70;
|
||||
L90:
|
||||
j = k - 1;
|
||||
L120:
|
||||
kstep = 1;
|
||||
jp1 = 1;
|
||||
jj = j;
|
||||
jp2 = ipiv[j];
|
||||
if (jp2 < 0) {
|
||||
jp2 = -jp2;
|
||||
--j;
|
||||
jp1 = -ipiv[j];
|
||||
kstep = 2;
|
||||
}
|
||||
--j;
|
||||
if (jp2 != jj && j >= 1) {
|
||||
zswap_(&j, &a[jp2 + a_dim1], lda, &a[jj + a_dim1], lda);
|
||||
}
|
||||
--jj;
|
||||
if (kstep == 2 && jp1 != jj && j >= 1) {
|
||||
zswap_(&j, &a[jp1 + a_dim1], lda, &a[jj + a_dim1], lda);
|
||||
}
|
||||
if (j > 1) {
|
||||
goto L120;
|
||||
}
|
||||
*kb = k - 1;
|
||||
}
|
||||
return;
|
||||
}
|
|
@ -0,0 +1,87 @@
|
|||
#include "relapack.h"
|
||||
|
||||
static void RELAPACK_zlauum_rec(const char *, const int *, double *,
|
||||
const int *, int *);
|
||||
|
||||
|
||||
/** ZLAUUM computes the product U * U**H or L**H * L, where the triangular factor U or L is stored in the upper or lower triangular part of the array A.
|
||||
*
|
||||
* This routine is functionally equivalent to LAPACK's zlauum.
|
||||
* For details on its interface, see
|
||||
* http://www.netlib.org/lapack/explore-html/d8/d45/zlauum_8f.html
|
||||
* */
|
||||
void RELAPACK_zlauum(
|
||||
const char *uplo, const int *n,
|
||||
double *A, const int *ldA,
|
||||
int *info
|
||||
) {
|
||||
|
||||
// Check arguments
|
||||
const int lower = LAPACK(lsame)(uplo, "L");
|
||||
const int upper = LAPACK(lsame)(uplo, "U");
|
||||
*info = 0;
|
||||
if (!lower && !upper)
|
||||
*info = -1;
|
||||
else if (*n < 0)
|
||||
*info = -2;
|
||||
else if (*ldA < MAX(1, *n))
|
||||
*info = -4;
|
||||
if (*info) {
|
||||
const int minfo = -*info;
|
||||
LAPACK(xerbla)("ZLAUUM", &minfo);
|
||||
return;
|
||||
}
|
||||
|
||||
// Clean char * arguments
|
||||
const char cleanuplo = lower ? 'L' : 'U';
|
||||
|
||||
// Recursive kernel
|
||||
RELAPACK_zlauum_rec(&cleanuplo, n, A, ldA, info);
|
||||
}
|
||||
|
||||
|
||||
/** zlauum's recursive compute kernel */
|
||||
static void RELAPACK_zlauum_rec(
|
||||
const char *uplo, const int *n,
|
||||
double *A, const int *ldA,
|
||||
int *info
|
||||
) {
|
||||
|
||||
if (*n <= MAX(CROSSOVER_ZLAUUM, 1)) {
|
||||
// Unblocked
|
||||
LAPACK(zlauu2)(uplo, n, A, ldA, info);
|
||||
return;
|
||||
}
|
||||
|
||||
// Constants
|
||||
const double ONE[] = { 1., 0. };
|
||||
|
||||
// Splitting
|
||||
const int n1 = ZREC_SPLIT(*n);
|
||||
const int n2 = *n - n1;
|
||||
|
||||
// A_TL A_TR
|
||||
// A_BL A_BR
|
||||
double *const A_TL = A;
|
||||
double *const A_TR = A + 2 * *ldA * n1;
|
||||
double *const A_BL = A + 2 * n1;
|
||||
double *const A_BR = A + 2 * *ldA * n1 + 2 * n1;
|
||||
|
||||
// recursion(A_TL)
|
||||
RELAPACK_zlauum_rec(uplo, &n1, A_TL, ldA, info);
|
||||
|
||||
if (*uplo == 'L') {
|
||||
// A_TL = A_TL + A_BL' * A_BL
|
||||
BLAS(zherk)("L", "C", &n1, &n2, ONE, A_BL, ldA, ONE, A_TL, ldA);
|
||||
// A_BL = A_BR' * A_BL
|
||||
BLAS(ztrmm)("L", "L", "C", "N", &n2, &n1, ONE, A_BR, ldA, A_BL, ldA);
|
||||
} else {
|
||||
// A_TL = A_TL + A_TR * A_TR'
|
||||
BLAS(zherk)("U", "N", &n1, &n2, ONE, A_TR, ldA, ONE, A_TL, ldA);
|
||||
// A_TR = A_TR * A_BR'
|
||||
BLAS(ztrmm)("R", "U", "C", "N", &n1, &n2, ONE, A_BR, ldA, A_TR, ldA);
|
||||
}
|
||||
|
||||
// recursion(A_BR)
|
||||
RELAPACK_zlauum_rec(uplo, &n2, A_BR, ldA, info);
|
||||
}
|
|
@ -0,0 +1,157 @@
|
|||
#include "relapack.h"
|
||||
#include "stdlib.h"
|
||||
|
||||
static void RELAPACK_zpbtrf_rec(const char *, const int *, const int *,
|
||||
double *, const int *, double *, const int *, int *);
|
||||
|
||||
|
||||
/** ZPBTRF computes the Cholesky factorization of a complex Hermitian positive definite band matrix A.
|
||||
*
|
||||
* This routine is functionally equivalent to LAPACK's zpbtrf.
|
||||
* For details on its interface, see
|
||||
* http://www.netlib.org/lapack/explore-html/db/da9/zpbtrf_8f.html
|
||||
* */
|
||||
void RELAPACK_zpbtrf(
|
||||
const char *uplo, const int *n, const int *kd,
|
||||
double *Ab, const int *ldAb,
|
||||
int *info
|
||||
) {
|
||||
|
||||
// Check arguments
|
||||
const int lower = LAPACK(lsame)(uplo, "L");
|
||||
const int upper = LAPACK(lsame)(uplo, "U");
|
||||
*info = 0;
|
||||
if (!lower && !upper)
|
||||
*info = -1;
|
||||
else if (*n < 0)
|
||||
*info = -2;
|
||||
else if (*kd < 0)
|
||||
*info = -3;
|
||||
else if (*ldAb < *kd + 1)
|
||||
*info = -5;
|
||||
if (*info) {
|
||||
const int minfo = -*info;
|
||||
LAPACK(xerbla)("ZPBTRF", &minfo);
|
||||
return;
|
||||
}
|
||||
|
||||
// Clean char * arguments
|
||||
const char cleanuplo = lower ? 'L' : 'U';
|
||||
|
||||
// Constant
|
||||
const double ZERO[] = { 0., 0. };
|
||||
|
||||
// Allocate work space
|
||||
const int n1 = ZREC_SPLIT(*n);
|
||||
const int mWork = (*kd > n1) ? (lower ? *n - *kd : n1) : *kd;
|
||||
const int nWork = (*kd > n1) ? (lower ? n1 : *n - *kd) : *kd;
|
||||
double *Work = malloc(mWork * nWork * 2 * sizeof(double));
|
||||
LAPACK(zlaset)(uplo, &mWork, &nWork, ZERO, ZERO, Work, &mWork);
|
||||
|
||||
// Recursive kernel
|
||||
RELAPACK_zpbtrf_rec(&cleanuplo, n, kd, Ab, ldAb, Work, &mWork, info);
|
||||
|
||||
// Free work space
|
||||
free(Work);
|
||||
}
|
||||
|
||||
|
||||
/** zpbtrf's recursive compute kernel */
|
||||
static void RELAPACK_zpbtrf_rec(
|
||||
const char *uplo, const int *n, const int *kd,
|
||||
double *Ab, const int *ldAb,
|
||||
double *Work, const int *ldWork,
|
||||
int *info
|
||||
){
|
||||
|
||||
if (*n <= MAX(CROSSOVER_ZPBTRF, 1)) {
|
||||
// Unblocked
|
||||
LAPACK(zpbtf2)(uplo, n, kd, Ab, ldAb, info);
|
||||
return;
|
||||
}
|
||||
|
||||
// Constants
|
||||
const double ONE[] = { 1., 0. };
|
||||
const double MONE[] = { -1., 0. };
|
||||
|
||||
// Unskew A
|
||||
const int ldA[] = { *ldAb - 1 };
|
||||
double *const A = Ab + 2 * ((*uplo == 'L') ? 0 : *kd);
|
||||
|
||||
// Splitting
|
||||
const int n1 = MIN(ZREC_SPLIT(*n), *kd);
|
||||
const int n2 = *n - n1;
|
||||
|
||||
// * *
|
||||
// * Ab_BR
|
||||
double *const Ab_BR = Ab + 2 * *ldAb * n1;
|
||||
|
||||
// A_TL A_TR
|
||||
// A_BL A_BR
|
||||
double *const A_TL = A;
|
||||
double *const A_TR = A + 2 * *ldA * n1;
|
||||
double *const A_BL = A + 2 * n1;
|
||||
double *const A_BR = A + 2 * *ldA * n1 + 2 * n1;
|
||||
|
||||
// recursion(A_TL)
|
||||
RELAPACK_zpotrf(uplo, &n1, A_TL, ldA, info);
|
||||
if (*info)
|
||||
return;
|
||||
|
||||
// Banded splitting
|
||||
const int n21 = MIN(n2, *kd - n1);
|
||||
const int n22 = MIN(n2 - n21, *kd);
|
||||
|
||||
// n1 n21 n22
|
||||
// n1 * A_TRl A_TRr
|
||||
// n21 A_BLt A_BRtl A_BRtr
|
||||
// n22 A_BLb A_BRbl A_BRbr
|
||||
double *const A_TRl = A_TR;
|
||||
double *const A_TRr = A_TR + 2 * *ldA * n21;
|
||||
double *const A_BLt = A_BL;
|
||||
double *const A_BLb = A_BL + 2 * n21;
|
||||
double *const A_BRtl = A_BR;
|
||||
double *const A_BRtr = A_BR + 2 * *ldA * n21;
|
||||
double *const A_BRbl = A_BR + 2 * n21;
|
||||
double *const A_BRbr = A_BR + 2 * *ldA * n21 + 2 * n21;
|
||||
|
||||
if (*uplo == 'L') {
|
||||
// A_BLt = ABLt / A_TL'
|
||||
BLAS(ztrsm)("R", "L", "C", "N", &n21, &n1, ONE, A_TL, ldA, A_BLt, ldA);
|
||||
// A_BRtl = A_BRtl - A_BLt * A_BLt'
|
||||
BLAS(zherk)("L", "N", &n21, &n1, MONE, A_BLt, ldA, ONE, A_BRtl, ldA);
|
||||
// Work = A_BLb
|
||||
LAPACK(zlacpy)("U", &n22, &n1, A_BLb, ldA, Work, ldWork);
|
||||
// Work = Work / A_TL'
|
||||
BLAS(ztrsm)("R", "L", "C", "N", &n22, &n1, ONE, A_TL, ldA, Work, ldWork);
|
||||
// A_BRbl = A_BRbl - Work * A_BLt'
|
||||
BLAS(zgemm)("N", "C", &n22, &n21, &n1, MONE, Work, ldWork, A_BLt, ldA, ONE, A_BRbl, ldA);
|
||||
// A_BRbr = A_BRbr - Work * Work'
|
||||
BLAS(zherk)("L", "N", &n22, &n1, MONE, Work, ldWork, ONE, A_BRbr, ldA);
|
||||
// A_BLb = Work
|
||||
LAPACK(zlacpy)("U", &n22, &n1, Work, ldWork, A_BLb, ldA);
|
||||
} else {
|
||||
// A_TRl = A_TL' \ A_TRl
|
||||
BLAS(ztrsm)("L", "U", "C", "N", &n1, &n21, ONE, A_TL, ldA, A_TRl, ldA);
|
||||
// A_BRtl = A_BRtl - A_TRl' * A_TRl
|
||||
BLAS(zherk)("U", "C", &n21, &n1, MONE, A_TRl, ldA, ONE, A_BRtl, ldA);
|
||||
// Work = A_TRr
|
||||
LAPACK(zlacpy)("L", &n1, &n22, A_TRr, ldA, Work, ldWork);
|
||||
// Work = A_TL' \ Work
|
||||
BLAS(ztrsm)("L", "U", "C", "N", &n1, &n22, ONE, A_TL, ldA, Work, ldWork);
|
||||
// A_BRtr = A_BRtr - A_TRl' * Work
|
||||
BLAS(zgemm)("C", "N", &n21, &n22, &n1, MONE, A_TRl, ldA, Work, ldWork, ONE, A_BRtr, ldA);
|
||||
// A_BRbr = A_BRbr - Work' * Work
|
||||
BLAS(zherk)("U", "C", &n22, &n1, MONE, Work, ldWork, ONE, A_BRbr, ldA);
|
||||
// A_TRr = Work
|
||||
LAPACK(zlacpy)("L", &n1, &n22, Work, ldWork, A_TRr, ldA);
|
||||
}
|
||||
|
||||
// recursion(A_BR)
|
||||
if (*kd > n1)
|
||||
RELAPACK_zpotrf(uplo, &n2, A_BR, ldA, info);
|
||||
else
|
||||
RELAPACK_zpbtrf_rec(uplo, &n2, kd, Ab_BR, ldAb, Work, ldWork, info);
|
||||
if (*info)
|
||||
*info += n1;
|
||||
}
|
|
@ -0,0 +1,92 @@
|
|||
#include "relapack.h"
|
||||
|
||||
static void RELAPACK_zpotrf_rec(const char *, const int *, double *,
|
||||
const int *, int *);
|
||||
|
||||
|
||||
/** ZPOTRF computes the Cholesky factorization of a complex Hermitian positive definite matrix A.
|
||||
*
|
||||
* This routine is functionally equivalent to LAPACK's zpotrf.
|
||||
* For details on its interface, see
|
||||
* http://www.netlib.org/lapack/explore-html/d1/db9/zpotrf_8f.html
|
||||
* */
|
||||
void RELAPACK_zpotrf(
|
||||
const char *uplo, const int *n,
|
||||
double *A, const int *ldA,
|
||||
int *info
|
||||
) {
|
||||
|
||||
// Check arguments
|
||||
const int lower = LAPACK(lsame)(uplo, "L");
|
||||
const int upper = LAPACK(lsame)(uplo, "U");
|
||||
*info = 0;
|
||||
if (!lower && !upper)
|
||||
*info = -1;
|
||||
else if (*n < 0)
|
||||
*info = -2;
|
||||
else if (*ldA < MAX(1, *n))
|
||||
*info = -4;
|
||||
if (*info) {
|
||||
const int minfo = -*info;
|
||||
LAPACK(xerbla)("ZPOTRF", &minfo);
|
||||
return;
|
||||
}
|
||||
|
||||
// Clean char * arguments
|
||||
const char cleanuplo = lower ? 'L' : 'U';
|
||||
|
||||
// Recursive kernel
|
||||
RELAPACK_zpotrf_rec(&cleanuplo, n, A, ldA, info);
|
||||
}
|
||||
|
||||
|
||||
/** zpotrf's recursive compute kernel */
|
||||
static void RELAPACK_zpotrf_rec(
|
||||
const char *uplo, const int *n,
|
||||
double *A, const int *ldA,
|
||||
int *info
|
||||
) {
|
||||
|
||||
if (*n <= MAX(CROSSOVER_ZPOTRF, 1)) {
|
||||
// Unblocked
|
||||
LAPACK(zpotf2)(uplo, n, A, ldA, info);
|
||||
return;
|
||||
}
|
||||
|
||||
// Constants
|
||||
const double ONE[] = { 1., 0. };
|
||||
const double MONE[] = { -1., 0. };
|
||||
|
||||
// Splitting
|
||||
const int n1 = ZREC_SPLIT(*n);
|
||||
const int n2 = *n - n1;
|
||||
|
||||
// A_TL A_TR
|
||||
// A_BL A_BR
|
||||
double *const A_TL = A;
|
||||
double *const A_TR = A + 2 * *ldA * n1;
|
||||
double *const A_BL = A + 2 * n1;
|
||||
double *const A_BR = A + 2 * *ldA * n1 + 2 * n1;
|
||||
|
||||
// recursion(A_TL)
|
||||
RELAPACK_zpotrf_rec(uplo, &n1, A_TL, ldA, info);
|
||||
if (*info)
|
||||
return;
|
||||
|
||||
if (*uplo == 'L') {
|
||||
// A_BL = A_BL / A_TL'
|
||||
BLAS(ztrsm)("R", "L", "C", "N", &n2, &n1, ONE, A_TL, ldA, A_BL, ldA);
|
||||
// A_BR = A_BR - A_BL * A_BL'
|
||||
BLAS(zherk)("L", "N", &n2, &n1, MONE, A_BL, ldA, ONE, A_BR, ldA);
|
||||
} else {
|
||||
// A_TR = A_TL' \ A_TR
|
||||
BLAS(ztrsm)("L", "U", "C", "N", &n1, &n2, ONE, A_TL, ldA, A_TR, ldA);
|
||||
// A_BR = A_BR - A_TR' * A_TR
|
||||
BLAS(zherk)("U", "C", &n2, &n1, MONE, A_TR, ldA, ONE, A_BR, ldA);
|
||||
}
|
||||
|
||||
// recursion(A_BR)
|
||||
RELAPACK_zpotrf_rec(uplo, &n2, A_BR, ldA, info);
|
||||
if (*info)
|
||||
*info += n1;
|
||||
}
|
|
@ -0,0 +1,238 @@
|
|||
#include "relapack.h"
|
||||
#if XSYTRF_ALLOW_MALLOC
|
||||
#include <stdlib.h>
|
||||
#endif
|
||||
|
||||
static void RELAPACK_zsytrf_rec(const char *, const int *, const int *, int *,
|
||||
double *, const int *, int *, double *, const int *, int *);
|
||||
|
||||
|
||||
/** ZSYTRF computes the factorization of a complex symmetric matrix A using the Bunch-Kaufman diagonal pivoting method.
|
||||
*
|
||||
* This routine is functionally equivalent to LAPACK's zsytrf.
|
||||
* For details on its interface, see
|
||||
* http://www.netlib.org/lapack/explore-html/da/d94/zsytrf_8f.html
|
||||
* */
|
||||
void RELAPACK_zsytrf(
|
||||
const char *uplo, const int *n,
|
||||
double *A, const int *ldA, int *ipiv,
|
||||
double *Work, const int *lWork, int *info
|
||||
) {
|
||||
|
||||
// Required work size
|
||||
const int cleanlWork = *n * (*n / 2);
|
||||
int minlWork = cleanlWork;
|
||||
#if XSYTRF_ALLOW_MALLOC
|
||||
minlWork = 1;
|
||||
#endif
|
||||
|
||||
// Check arguments
|
||||
const int lower = LAPACK(lsame)(uplo, "L");
|
||||
const int upper = LAPACK(lsame)(uplo, "U");
|
||||
*info = 0;
|
||||
if (!lower && !upper)
|
||||
*info = -1;
|
||||
else if (*n < 0)
|
||||
*info = -2;
|
||||
else if (*ldA < MAX(1, *n))
|
||||
*info = -4;
|
||||
else if (*lWork < minlWork && *lWork != -1)
|
||||
*info = -7;
|
||||
else if (*lWork == -1) {
|
||||
// Work size query
|
||||
*Work = cleanlWork;
|
||||
return;
|
||||
}
|
||||
|
||||
// Ensure Work size
|
||||
double *cleanWork = Work;
|
||||
#if XSYTRF_ALLOW_MALLOC
|
||||
if (!*info && *lWork < cleanlWork) {
|
||||
cleanWork = malloc(cleanlWork * 2 * sizeof(double));
|
||||
if (!cleanWork)
|
||||
*info = -7;
|
||||
}
|
||||
#endif
|
||||
|
||||
if (*info) {
|
||||
const int minfo = -*info;
|
||||
LAPACK(xerbla)("ZSYTRF", &minfo);
|
||||
return;
|
||||
}
|
||||
|
||||
// Clean char * arguments
|
||||
const char cleanuplo = lower ? 'L' : 'U';
|
||||
|
||||
// Dummy arguments
|
||||
int nout;
|
||||
|
||||
// Recursive kernel
|
||||
RELAPACK_zsytrf_rec(&cleanuplo, n, n, &nout, A, ldA, ipiv, cleanWork, n, info);
|
||||
|
||||
#if XSYTRF_ALLOW_MALLOC
|
||||
if (cleanWork != Work)
|
||||
free(cleanWork);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
/** zsytrf's recursive compute kernel */
|
||||
static void RELAPACK_zsytrf_rec(
|
||||
const char *uplo, const int *n_full, const int *n, int *n_out,
|
||||
double *A, const int *ldA, int *ipiv,
|
||||
double *Work, const int *ldWork, int *info
|
||||
) {
|
||||
|
||||
// top recursion level?
|
||||
const int top = *n_full == *n;
|
||||
|
||||
if (*n <= MAX(CROSSOVER_ZSYTRF, 3)) {
|
||||
// Unblocked
|
||||
if (top) {
|
||||
LAPACK(zsytf2)(uplo, n, A, ldA, ipiv, info);
|
||||
*n_out = *n;
|
||||
} else
|
||||
RELAPACK_zsytrf_rec2(uplo, n_full, n, n_out, A, ldA, ipiv, Work, ldWork, info);
|
||||
return;
|
||||
}
|
||||
|
||||
int info1, info2;
|
||||
|
||||
// Constants
|
||||
const double ONE[] = { 1., 0. };
|
||||
const double MONE[] = { -1., 0. };
|
||||
const int iONE[] = { 1 };
|
||||
|
||||
// Loop iterator
|
||||
int i;
|
||||
|
||||
const int n_rest = *n_full - *n;
|
||||
|
||||
if (*uplo == 'L') {
|
||||
// Splitting (setup)
|
||||
int n1 = ZREC_SPLIT(*n);
|
||||
int n2 = *n - n1;
|
||||
|
||||
// Work_L *
|
||||
double *const Work_L = Work;
|
||||
|
||||
// recursion(A_L)
|
||||
int n1_out;
|
||||
RELAPACK_zsytrf_rec(uplo, n_full, &n1, &n1_out, A, ldA, ipiv, Work_L, ldWork, &info1);
|
||||
n1 = n1_out;
|
||||
|
||||
// Splitting (continued)
|
||||
n2 = *n - n1;
|
||||
const int n_full2 = *n_full - n1;
|
||||
|
||||
// * *
|
||||
// A_BL A_BR
|
||||
// A_BL_B A_BR_B
|
||||
double *const A_BL = A + 2 * n1;
|
||||
double *const A_BR = A + 2 * *ldA * n1 + 2 * n1;
|
||||
double *const A_BL_B = A + 2 * *n;
|
||||
double *const A_BR_B = A + 2 * *ldA * n1 + 2 * *n;
|
||||
|
||||
// * *
|
||||
// Work_BL Work_BR
|
||||
// * *
|
||||
// (top recursion level: use Work as Work_BR)
|
||||
double *const Work_BL = Work + 2 * n1;
|
||||
double *const Work_BR = top ? Work : Work + 2 * *ldWork * n1 + 2 * n1;
|
||||
const int ldWork_BR = top ? n2 : *ldWork;
|
||||
|
||||
// ipiv_T
|
||||
// ipiv_B
|
||||
int *const ipiv_B = ipiv + n1;
|
||||
|
||||
// A_BR = A_BR - A_BL Work_BL'
|
||||
RELAPACK_zgemmt(uplo, "N", "T", &n2, &n1, MONE, A_BL, ldA, Work_BL, ldWork, ONE, A_BR, ldA);
|
||||
BLAS(zgemm)("N", "T", &n_rest, &n2, &n1, MONE, A_BL_B, ldA, Work_BL, ldWork, ONE, A_BR_B, ldA);
|
||||
|
||||
// recursion(A_BR)
|
||||
int n2_out;
|
||||
RELAPACK_zsytrf_rec(uplo, &n_full2, &n2, &n2_out, A_BR, ldA, ipiv_B, Work_BR, &ldWork_BR, &info2);
|
||||
|
||||
if (n2_out != n2) {
|
||||
// undo 1 column of updates
|
||||
const int n_restp1 = n_rest + 1;
|
||||
|
||||
// last column of A_BR
|
||||
double *const A_BR_r = A_BR + 2 * *ldA * n2_out + 2 * n2_out;
|
||||
|
||||
// last row of A_BL
|
||||
double *const A_BL_b = A_BL + 2 * n2_out;
|
||||
|
||||
// last row of Work_BL
|
||||
double *const Work_BL_b = Work_BL + 2 * n2_out;
|
||||
|
||||
// A_BR_r = A_BR_r + A_BL_b Work_BL_b'
|
||||
BLAS(zgemv)("N", &n_restp1, &n1, ONE, A_BL_b, ldA, Work_BL_b, ldWork, ONE, A_BR_r, iONE);
|
||||
}
|
||||
n2 = n2_out;
|
||||
|
||||
// shift pivots
|
||||
for (i = 0; i < n2; i++)
|
||||
if (ipiv_B[i] > 0)
|
||||
ipiv_B[i] += n1;
|
||||
else
|
||||
ipiv_B[i] -= n1;
|
||||
|
||||
*info = info1 || info2;
|
||||
*n_out = n1 + n2;
|
||||
} else {
|
||||
// Splitting (setup)
|
||||
int n2 = ZREC_SPLIT(*n);
|
||||
int n1 = *n - n2;
|
||||
|
||||
// * Work_R
|
||||
// (top recursion level: use Work as Work_R)
|
||||
double *const Work_R = top ? Work : Work + 2 * *ldWork * n1;
|
||||
|
||||
// recursion(A_R)
|
||||
int n2_out;
|
||||
RELAPACK_zsytrf_rec(uplo, n_full, &n2, &n2_out, A, ldA, ipiv, Work_R, ldWork, &info2);
|
||||
const int n2_diff = n2 - n2_out;
|
||||
n2 = n2_out;
|
||||
|
||||
// Splitting (continued)
|
||||
n1 = *n - n2;
|
||||
const int n_full1 = *n_full - n2;
|
||||
|
||||
// * A_TL_T A_TR_T
|
||||
// * A_TL A_TR
|
||||
// * * *
|
||||
double *const A_TL_T = A + 2 * *ldA * n_rest;
|
||||
double *const A_TR_T = A + 2 * *ldA * (n_rest + n1);
|
||||
double *const A_TL = A + 2 * *ldA * n_rest + 2 * n_rest;
|
||||
double *const A_TR = A + 2 * *ldA * (n_rest + n1) + 2 * n_rest;
|
||||
|
||||
// Work_L *
|
||||
// * Work_TR
|
||||
// * *
|
||||
// (top recursion level: Work_R was Work)
|
||||
double *const Work_L = Work;
|
||||
double *const Work_TR = Work + 2 * *ldWork * (top ? n2_diff : n1) + 2 * n_rest;
|
||||
const int ldWork_L = top ? n1 : *ldWork;
|
||||
|
||||
// A_TL = A_TL - A_TR Work_TR'
|
||||
RELAPACK_zgemmt(uplo, "N", "T", &n1, &n2, MONE, A_TR, ldA, Work_TR, ldWork, ONE, A_TL, ldA);
|
||||
BLAS(zgemm)("N", "T", &n_rest, &n1, &n2, MONE, A_TR_T, ldA, Work_TR, ldWork, ONE, A_TL_T, ldA);
|
||||
|
||||
// recursion(A_TL)
|
||||
int n1_out;
|
||||
RELAPACK_zsytrf_rec(uplo, &n_full1, &n1, &n1_out, A, ldA, ipiv, Work_L, &ldWork_L, &info1);
|
||||
|
||||
if (n1_out != n1) {
|
||||
// undo 1 column of updates
|
||||
const int n_restp1 = n_rest + 1;
|
||||
|
||||
// A_TL_T_l = A_TL_T_l + A_TR_T Work_TR_t'
|
||||
BLAS(zgemv)("N", &n_restp1, &n2, ONE, A_TR_T, ldA, Work_TR, ldWork, ONE, A_TL_T, iONE);
|
||||
}
|
||||
n1 = n1_out;
|
||||
|
||||
*info = info2 || info1;
|
||||
*n_out = n1 + n2;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,452 @@
|
|||
/* -- translated by f2c (version 20100827).
|
||||
You must link the resulting object file with libf2c:
|
||||
on Microsoft Windows system, link with libf2c.lib;
|
||||
on Linux or Unix systems, link with .../path/to/libf2c.a -lm
|
||||
or, if you install libf2c.a in a standard place, with -lf2c -lm
|
||||
-- in that order, at the end of the command line, as in
|
||||
cc *.o -lf2c -lm
|
||||
Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
|
||||
|
||||
http://www.netlib.org/f2c/libf2c.zip
|
||||
*/
|
||||
|
||||
#include "f2c.h"
|
||||
|
||||
/* Table of constant values */
|
||||
|
||||
static doublecomplex c_b1 = {1.,0.};
|
||||
static int c__1 = 1;
|
||||
|
||||
/** ZSYTRF_REC2 computes a partial factorization of a complex symmetric matrix using the Bunch-Kaufman diagon al pivoting method.
|
||||
*
|
||||
* This routine is a minor modification of LAPACK's zlasyf.
|
||||
* It serves as an unblocked kernel in the recursive algorithms.
|
||||
* The blocked BLAS Level 3 updates were removed and moved to the
|
||||
* recursive algorithm.
|
||||
* */
|
||||
/* Subroutine */ void RELAPACK_zsytrf_rec2(char *uplo, int *n, int *
|
||||
nb, int *kb, doublecomplex *a, int *lda, int *ipiv,
|
||||
doublecomplex *w, int *ldw, int *info, ftnlen uplo_len)
|
||||
{
|
||||
/* System generated locals */
|
||||
int a_dim1, a_offset, w_dim1, w_offset, i__1, i__2, i__3, i__4;
|
||||
double d__1, d__2, d__3, d__4;
|
||||
doublecomplex z__1, z__2, z__3;
|
||||
|
||||
/* Builtin functions */
|
||||
double sqrt(double), d_imag(doublecomplex *);
|
||||
void z_div(doublecomplex *, doublecomplex *, doublecomplex *);
|
||||
|
||||
/* Local variables */
|
||||
static int j, k;
|
||||
static doublecomplex t, r1, d11, d21, d22;
|
||||
static int jj, kk, jp, kp, kw, kkw, imax, jmax;
|
||||
static double alpha;
|
||||
extern logical lsame_(char *, char *, ftnlen, ftnlen);
|
||||
extern /* Subroutine */ int zscal_(int *, doublecomplex *,
|
||||
doublecomplex *, int *);
|
||||
static int kstep;
|
||||
extern /* Subroutine */ int zgemv_(char *, int *, int *,
|
||||
doublecomplex *, doublecomplex *, int *, doublecomplex *,
|
||||
int *, doublecomplex *, doublecomplex *, int *, ftnlen),
|
||||
zcopy_(int *, doublecomplex *, int *, doublecomplex *,
|
||||
int *), zswap_(int *, doublecomplex *, int *,
|
||||
doublecomplex *, int *);
|
||||
static double absakk, colmax;
|
||||
extern int izamax_(int *, doublecomplex *, int *);
|
||||
static double rowmax;
|
||||
|
||||
/* Parameter adjustments */
|
||||
a_dim1 = *lda;
|
||||
a_offset = 1 + a_dim1;
|
||||
a -= a_offset;
|
||||
--ipiv;
|
||||
w_dim1 = *ldw;
|
||||
w_offset = 1 + w_dim1;
|
||||
w -= w_offset;
|
||||
|
||||
/* Function Body */
|
||||
*info = 0;
|
||||
alpha = (sqrt(17.) + 1.) / 8.;
|
||||
if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
|
||||
k = *n;
|
||||
L10:
|
||||
kw = *nb + k - *n;
|
||||
if ((k <= *n - *nb + 1 && *nb < *n) || k < 1) {
|
||||
goto L30;
|
||||
}
|
||||
zcopy_(&k, &a[k * a_dim1 + 1], &c__1, &w[kw * w_dim1 + 1], &c__1);
|
||||
if (k < *n) {
|
||||
i__1 = *n - k;
|
||||
z__1.r = -1., z__1.i = -0.;
|
||||
zgemv_("No transpose", &k, &i__1, &z__1, &a[(k + 1) * a_dim1 + 1],
|
||||
lda, &w[k + (kw + 1) * w_dim1], ldw, &c_b1, &w[kw *
|
||||
w_dim1 + 1], &c__1, (ftnlen)12);
|
||||
}
|
||||
kstep = 1;
|
||||
i__1 = k + kw * w_dim1;
|
||||
absakk = (d__1 = w[i__1].r, abs(d__1)) + (d__2 = d_imag(&w[k + kw *
|
||||
w_dim1]), abs(d__2));
|
||||
if (k > 1) {
|
||||
i__1 = k - 1;
|
||||
imax = izamax_(&i__1, &w[kw * w_dim1 + 1], &c__1);
|
||||
i__1 = imax + kw * w_dim1;
|
||||
colmax = (d__1 = w[i__1].r, abs(d__1)) + (d__2 = d_imag(&w[imax +
|
||||
kw * w_dim1]), abs(d__2));
|
||||
} else {
|
||||
colmax = 0.;
|
||||
}
|
||||
if (max(absakk,colmax) == 0.) {
|
||||
if (*info == 0) {
|
||||
*info = k;
|
||||
}
|
||||
kp = k;
|
||||
} else {
|
||||
if (absakk >= alpha * colmax) {
|
||||
kp = k;
|
||||
} else {
|
||||
zcopy_(&imax, &a[imax * a_dim1 + 1], &c__1, &w[(kw - 1) *
|
||||
w_dim1 + 1], &c__1);
|
||||
i__1 = k - imax;
|
||||
zcopy_(&i__1, &a[imax + (imax + 1) * a_dim1], lda, &w[imax +
|
||||
1 + (kw - 1) * w_dim1], &c__1);
|
||||
if (k < *n) {
|
||||
i__1 = *n - k;
|
||||
z__1.r = -1., z__1.i = -0.;
|
||||
zgemv_("No transpose", &k, &i__1, &z__1, &a[(k + 1) *
|
||||
a_dim1 + 1], lda, &w[imax + (kw + 1) * w_dim1],
|
||||
ldw, &c_b1, &w[(kw - 1) * w_dim1 + 1], &c__1, (
|
||||
ftnlen)12);
|
||||
}
|
||||
i__1 = k - imax;
|
||||
jmax = imax + izamax_(&i__1, &w[imax + 1 + (kw - 1) * w_dim1],
|
||||
&c__1);
|
||||
i__1 = jmax + (kw - 1) * w_dim1;
|
||||
rowmax = (d__1 = w[i__1].r, abs(d__1)) + (d__2 = d_imag(&w[
|
||||
jmax + (kw - 1) * w_dim1]), abs(d__2));
|
||||
if (imax > 1) {
|
||||
i__1 = imax - 1;
|
||||
jmax = izamax_(&i__1, &w[(kw - 1) * w_dim1 + 1], &c__1);
|
||||
/* Computing MAX */
|
||||
i__1 = jmax + (kw - 1) * w_dim1;
|
||||
d__3 = rowmax, d__4 = (d__1 = w[i__1].r, abs(d__1)) + (
|
||||
d__2 = d_imag(&w[jmax + (kw - 1) * w_dim1]), abs(
|
||||
d__2));
|
||||
rowmax = max(d__3,d__4);
|
||||
}
|
||||
if (absakk >= alpha * colmax * (colmax / rowmax)) {
|
||||
kp = k;
|
||||
} else /* if(complicated condition) */ {
|
||||
i__1 = imax + (kw - 1) * w_dim1;
|
||||
if ((d__1 = w[i__1].r, abs(d__1)) + (d__2 = d_imag(&w[
|
||||
imax + (kw - 1) * w_dim1]), abs(d__2)) >= alpha *
|
||||
rowmax) {
|
||||
kp = imax;
|
||||
zcopy_(&k, &w[(kw - 1) * w_dim1 + 1], &c__1, &w[kw *
|
||||
w_dim1 + 1], &c__1);
|
||||
} else {
|
||||
kp = imax;
|
||||
kstep = 2;
|
||||
}
|
||||
}
|
||||
}
|
||||
kk = k - kstep + 1;
|
||||
kkw = *nb + kk - *n;
|
||||
if (kp != kk) {
|
||||
i__1 = kp + kp * a_dim1;
|
||||
i__2 = kk + kk * a_dim1;
|
||||
a[i__1].r = a[i__2].r, a[i__1].i = a[i__2].i;
|
||||
i__1 = kk - 1 - kp;
|
||||
zcopy_(&i__1, &a[kp + 1 + kk * a_dim1], &c__1, &a[kp + (kp +
|
||||
1) * a_dim1], lda);
|
||||
if (kp > 1) {
|
||||
i__1 = kp - 1;
|
||||
zcopy_(&i__1, &a[kk * a_dim1 + 1], &c__1, &a[kp * a_dim1
|
||||
+ 1], &c__1);
|
||||
}
|
||||
if (k < *n) {
|
||||
i__1 = *n - k;
|
||||
zswap_(&i__1, &a[kk + (k + 1) * a_dim1], lda, &a[kp + (k
|
||||
+ 1) * a_dim1], lda);
|
||||
}
|
||||
i__1 = *n - kk + 1;
|
||||
zswap_(&i__1, &w[kk + kkw * w_dim1], ldw, &w[kp + kkw *
|
||||
w_dim1], ldw);
|
||||
}
|
||||
if (kstep == 1) {
|
||||
zcopy_(&k, &w[kw * w_dim1 + 1], &c__1, &a[k * a_dim1 + 1], &
|
||||
c__1);
|
||||
z_div(&z__1, &c_b1, &a[k + k * a_dim1]);
|
||||
r1.r = z__1.r, r1.i = z__1.i;
|
||||
i__1 = k - 1;
|
||||
zscal_(&i__1, &r1, &a[k * a_dim1 + 1], &c__1);
|
||||
} else {
|
||||
if (k > 2) {
|
||||
i__1 = k - 1 + kw * w_dim1;
|
||||
d21.r = w[i__1].r, d21.i = w[i__1].i;
|
||||
z_div(&z__1, &w[k + kw * w_dim1], &d21);
|
||||
d11.r = z__1.r, d11.i = z__1.i;
|
||||
z_div(&z__1, &w[k - 1 + (kw - 1) * w_dim1], &d21);
|
||||
d22.r = z__1.r, d22.i = z__1.i;
|
||||
z__3.r = d11.r * d22.r - d11.i * d22.i, z__3.i = d11.r *
|
||||
d22.i + d11.i * d22.r;
|
||||
z__2.r = z__3.r - 1., z__2.i = z__3.i - 0.;
|
||||
z_div(&z__1, &c_b1, &z__2);
|
||||
t.r = z__1.r, t.i = z__1.i;
|
||||
z_div(&z__1, &t, &d21);
|
||||
d21.r = z__1.r, d21.i = z__1.i;
|
||||
i__1 = k - 2;
|
||||
for (j = 1; j <= i__1; ++j) {
|
||||
i__2 = j + (k - 1) * a_dim1;
|
||||
i__3 = j + (kw - 1) * w_dim1;
|
||||
z__3.r = d11.r * w[i__3].r - d11.i * w[i__3].i,
|
||||
z__3.i = d11.r * w[i__3].i + d11.i * w[i__3]
|
||||
.r;
|
||||
i__4 = j + kw * w_dim1;
|
||||
z__2.r = z__3.r - w[i__4].r, z__2.i = z__3.i - w[i__4]
|
||||
.i;
|
||||
z__1.r = d21.r * z__2.r - d21.i * z__2.i, z__1.i =
|
||||
d21.r * z__2.i + d21.i * z__2.r;
|
||||
a[i__2].r = z__1.r, a[i__2].i = z__1.i;
|
||||
i__2 = j + k * a_dim1;
|
||||
i__3 = j + kw * w_dim1;
|
||||
z__3.r = d22.r * w[i__3].r - d22.i * w[i__3].i,
|
||||
z__3.i = d22.r * w[i__3].i + d22.i * w[i__3]
|
||||
.r;
|
||||
i__4 = j + (kw - 1) * w_dim1;
|
||||
z__2.r = z__3.r - w[i__4].r, z__2.i = z__3.i - w[i__4]
|
||||
.i;
|
||||
z__1.r = d21.r * z__2.r - d21.i * z__2.i, z__1.i =
|
||||
d21.r * z__2.i + d21.i * z__2.r;
|
||||
a[i__2].r = z__1.r, a[i__2].i = z__1.i;
|
||||
/* L20: */
|
||||
}
|
||||
}
|
||||
i__1 = k - 1 + (k - 1) * a_dim1;
|
||||
i__2 = k - 1 + (kw - 1) * w_dim1;
|
||||
a[i__1].r = w[i__2].r, a[i__1].i = w[i__2].i;
|
||||
i__1 = k - 1 + k * a_dim1;
|
||||
i__2 = k - 1 + kw * w_dim1;
|
||||
a[i__1].r = w[i__2].r, a[i__1].i = w[i__2].i;
|
||||
i__1 = k + k * a_dim1;
|
||||
i__2 = k + kw * w_dim1;
|
||||
a[i__1].r = w[i__2].r, a[i__1].i = w[i__2].i;
|
||||
}
|
||||
}
|
||||
if (kstep == 1) {
|
||||
ipiv[k] = kp;
|
||||
} else {
|
||||
ipiv[k] = -kp;
|
||||
ipiv[k - 1] = -kp;
|
||||
}
|
||||
k -= kstep;
|
||||
goto L10;
|
||||
L30:
|
||||
j = k + 1;
|
||||
L60:
|
||||
jj = j;
|
||||
jp = ipiv[j];
|
||||
if (jp < 0) {
|
||||
jp = -jp;
|
||||
++j;
|
||||
}
|
||||
++j;
|
||||
if (jp != jj && j <= *n) {
|
||||
i__1 = *n - j + 1;
|
||||
zswap_(&i__1, &a[jp + j * a_dim1], lda, &a[jj + j * a_dim1], lda);
|
||||
}
|
||||
if (j < *n) {
|
||||
goto L60;
|
||||
}
|
||||
*kb = *n - k;
|
||||
} else {
|
||||
k = 1;
|
||||
L70:
|
||||
if ((k >= *nb && *nb < *n) || k > *n) {
|
||||
goto L90;
|
||||
}
|
||||
i__1 = *n - k + 1;
|
||||
zcopy_(&i__1, &a[k + k * a_dim1], &c__1, &w[k + k * w_dim1], &c__1);
|
||||
i__1 = *n - k + 1;
|
||||
i__2 = k - 1;
|
||||
z__1.r = -1., z__1.i = -0.;
|
||||
zgemv_("No transpose", &i__1, &i__2, &z__1, &a[k + a_dim1], lda, &w[k
|
||||
+ w_dim1], ldw, &c_b1, &w[k + k * w_dim1], &c__1, (ftnlen)12);
|
||||
kstep = 1;
|
||||
i__1 = k + k * w_dim1;
|
||||
absakk = (d__1 = w[i__1].r, abs(d__1)) + (d__2 = d_imag(&w[k + k *
|
||||
w_dim1]), abs(d__2));
|
||||
if (k < *n) {
|
||||
i__1 = *n - k;
|
||||
imax = k + izamax_(&i__1, &w[k + 1 + k * w_dim1], &c__1);
|
||||
i__1 = imax + k * w_dim1;
|
||||
colmax = (d__1 = w[i__1].r, abs(d__1)) + (d__2 = d_imag(&w[imax +
|
||||
k * w_dim1]), abs(d__2));
|
||||
} else {
|
||||
colmax = 0.;
|
||||
}
|
||||
if (max(absakk,colmax) == 0.) {
|
||||
if (*info == 0) {
|
||||
*info = k;
|
||||
}
|
||||
kp = k;
|
||||
} else {
|
||||
if (absakk >= alpha * colmax) {
|
||||
kp = k;
|
||||
} else {
|
||||
i__1 = imax - k;
|
||||
zcopy_(&i__1, &a[imax + k * a_dim1], lda, &w[k + (k + 1) *
|
||||
w_dim1], &c__1);
|
||||
i__1 = *n - imax + 1;
|
||||
zcopy_(&i__1, &a[imax + imax * a_dim1], &c__1, &w[imax + (k +
|
||||
1) * w_dim1], &c__1);
|
||||
i__1 = *n - k + 1;
|
||||
i__2 = k - 1;
|
||||
z__1.r = -1., z__1.i = -0.;
|
||||
zgemv_("No transpose", &i__1, &i__2, &z__1, &a[k + a_dim1],
|
||||
lda, &w[imax + w_dim1], ldw, &c_b1, &w[k + (k + 1) *
|
||||
w_dim1], &c__1, (ftnlen)12);
|
||||
i__1 = imax - k;
|
||||
jmax = k - 1 + izamax_(&i__1, &w[k + (k + 1) * w_dim1], &c__1)
|
||||
;
|
||||
i__1 = jmax + (k + 1) * w_dim1;
|
||||
rowmax = (d__1 = w[i__1].r, abs(d__1)) + (d__2 = d_imag(&w[
|
||||
jmax + (k + 1) * w_dim1]), abs(d__2));
|
||||
if (imax < *n) {
|
||||
i__1 = *n - imax;
|
||||
jmax = imax + izamax_(&i__1, &w[imax + 1 + (k + 1) *
|
||||
w_dim1], &c__1);
|
||||
/* Computing MAX */
|
||||
i__1 = jmax + (k + 1) * w_dim1;
|
||||
d__3 = rowmax, d__4 = (d__1 = w[i__1].r, abs(d__1)) + (
|
||||
d__2 = d_imag(&w[jmax + (k + 1) * w_dim1]), abs(
|
||||
d__2));
|
||||
rowmax = max(d__3,d__4);
|
||||
}
|
||||
if (absakk >= alpha * colmax * (colmax / rowmax)) {
|
||||
kp = k;
|
||||
} else /* if(complicated condition) */ {
|
||||
i__1 = imax + (k + 1) * w_dim1;
|
||||
if ((d__1 = w[i__1].r, abs(d__1)) + (d__2 = d_imag(&w[
|
||||
imax + (k + 1) * w_dim1]), abs(d__2)) >= alpha *
|
||||
rowmax) {
|
||||
kp = imax;
|
||||
i__1 = *n - k + 1;
|
||||
zcopy_(&i__1, &w[k + (k + 1) * w_dim1], &c__1, &w[k +
|
||||
k * w_dim1], &c__1);
|
||||
} else {
|
||||
kp = imax;
|
||||
kstep = 2;
|
||||
}
|
||||
}
|
||||
}
|
||||
kk = k + kstep - 1;
|
||||
if (kp != kk) {
|
||||
i__1 = kp + kp * a_dim1;
|
||||
i__2 = kk + kk * a_dim1;
|
||||
a[i__1].r = a[i__2].r, a[i__1].i = a[i__2].i;
|
||||
i__1 = kp - kk - 1;
|
||||
zcopy_(&i__1, &a[kk + 1 + kk * a_dim1], &c__1, &a[kp + (kk +
|
||||
1) * a_dim1], lda);
|
||||
if (kp < *n) {
|
||||
i__1 = *n - kp;
|
||||
zcopy_(&i__1, &a[kp + 1 + kk * a_dim1], &c__1, &a[kp + 1
|
||||
+ kp * a_dim1], &c__1);
|
||||
}
|
||||
if (k > 1) {
|
||||
i__1 = k - 1;
|
||||
zswap_(&i__1, &a[kk + a_dim1], lda, &a[kp + a_dim1], lda);
|
||||
}
|
||||
zswap_(&kk, &w[kk + w_dim1], ldw, &w[kp + w_dim1], ldw);
|
||||
}
|
||||
if (kstep == 1) {
|
||||
i__1 = *n - k + 1;
|
||||
zcopy_(&i__1, &w[k + k * w_dim1], &c__1, &a[k + k * a_dim1], &
|
||||
c__1);
|
||||
if (k < *n) {
|
||||
z_div(&z__1, &c_b1, &a[k + k * a_dim1]);
|
||||
r1.r = z__1.r, r1.i = z__1.i;
|
||||
i__1 = *n - k;
|
||||
zscal_(&i__1, &r1, &a[k + 1 + k * a_dim1], &c__1);
|
||||
}
|
||||
} else {
|
||||
if (k < *n - 1) {
|
||||
i__1 = k + 1 + k * w_dim1;
|
||||
d21.r = w[i__1].r, d21.i = w[i__1].i;
|
||||
z_div(&z__1, &w[k + 1 + (k + 1) * w_dim1], &d21);
|
||||
d11.r = z__1.r, d11.i = z__1.i;
|
||||
z_div(&z__1, &w[k + k * w_dim1], &d21);
|
||||
d22.r = z__1.r, d22.i = z__1.i;
|
||||
z__3.r = d11.r * d22.r - d11.i * d22.i, z__3.i = d11.r *
|
||||
d22.i + d11.i * d22.r;
|
||||
z__2.r = z__3.r - 1., z__2.i = z__3.i - 0.;
|
||||
z_div(&z__1, &c_b1, &z__2);
|
||||
t.r = z__1.r, t.i = z__1.i;
|
||||
z_div(&z__1, &t, &d21);
|
||||
d21.r = z__1.r, d21.i = z__1.i;
|
||||
i__1 = *n;
|
||||
for (j = k + 2; j <= i__1; ++j) {
|
||||
i__2 = j + k * a_dim1;
|
||||
i__3 = j + k * w_dim1;
|
||||
z__3.r = d11.r * w[i__3].r - d11.i * w[i__3].i,
|
||||
z__3.i = d11.r * w[i__3].i + d11.i * w[i__3]
|
||||
.r;
|
||||
i__4 = j + (k + 1) * w_dim1;
|
||||
z__2.r = z__3.r - w[i__4].r, z__2.i = z__3.i - w[i__4]
|
||||
.i;
|
||||
z__1.r = d21.r * z__2.r - d21.i * z__2.i, z__1.i =
|
||||
d21.r * z__2.i + d21.i * z__2.r;
|
||||
a[i__2].r = z__1.r, a[i__2].i = z__1.i;
|
||||
i__2 = j + (k + 1) * a_dim1;
|
||||
i__3 = j + (k + 1) * w_dim1;
|
||||
z__3.r = d22.r * w[i__3].r - d22.i * w[i__3].i,
|
||||
z__3.i = d22.r * w[i__3].i + d22.i * w[i__3]
|
||||
.r;
|
||||
i__4 = j + k * w_dim1;
|
||||
z__2.r = z__3.r - w[i__4].r, z__2.i = z__3.i - w[i__4]
|
||||
.i;
|
||||
z__1.r = d21.r * z__2.r - d21.i * z__2.i, z__1.i =
|
||||
d21.r * z__2.i + d21.i * z__2.r;
|
||||
a[i__2].r = z__1.r, a[i__2].i = z__1.i;
|
||||
/* L80: */
|
||||
}
|
||||
}
|
||||
i__1 = k + k * a_dim1;
|
||||
i__2 = k + k * w_dim1;
|
||||
a[i__1].r = w[i__2].r, a[i__1].i = w[i__2].i;
|
||||
i__1 = k + 1 + k * a_dim1;
|
||||
i__2 = k + 1 + k * w_dim1;
|
||||
a[i__1].r = w[i__2].r, a[i__1].i = w[i__2].i;
|
||||
i__1 = k + 1 + (k + 1) * a_dim1;
|
||||
i__2 = k + 1 + (k + 1) * w_dim1;
|
||||
a[i__1].r = w[i__2].r, a[i__1].i = w[i__2].i;
|
||||
}
|
||||
}
|
||||
if (kstep == 1) {
|
||||
ipiv[k] = kp;
|
||||
} else {
|
||||
ipiv[k] = -kp;
|
||||
ipiv[k + 1] = -kp;
|
||||
}
|
||||
k += kstep;
|
||||
goto L70;
|
||||
L90:
|
||||
j = k - 1;
|
||||
L120:
|
||||
jj = j;
|
||||
jp = ipiv[j];
|
||||
if (jp < 0) {
|
||||
jp = -jp;
|
||||
--j;
|
||||
}
|
||||
--j;
|
||||
if (jp != jj && j >= 1) {
|
||||
zswap_(&j, &a[jp + a_dim1], lda, &a[jj + a_dim1], lda);
|
||||
}
|
||||
if (j > 1) {
|
||||
goto L120;
|
||||
}
|
||||
*kb = k - 1;
|
||||
}
|
||||
return;
|
||||
}
|
|
@ -0,0 +1,236 @@
|
|||
#include "relapack.h"
|
||||
#if XSYTRF_ALLOW_MALLOC
|
||||
#include <stdlib.h>
|
||||
#endif
|
||||
|
||||
static void RELAPACK_zsytrf_rook_rec(const char *, const int *, const int *, int *,
|
||||
double *, const int *, int *, double *, const int *, int *);
|
||||
|
||||
|
||||
/** ZSYTRF_ROOK computes the factorization of a complex symmetric matrix A using the bounded Bunch-Kaufman ("rook") diagonal pivoting method.
|
||||
*
|
||||
* This routine is functionally equivalent to LAPACK's zsytrf_rook.
|
||||
* For details on its interface, see
|
||||
* http://www.netlib.org/lapack/explore-html/d6/d6e/zsytrf__rook_8f.html
|
||||
* */
|
||||
void RELAPACK_zsytrf_rook(
|
||||
const char *uplo, const int *n,
|
||||
double *A, const int *ldA, int *ipiv,
|
||||
double *Work, const int *lWork, int *info
|
||||
) {
|
||||
|
||||
// Required work size
|
||||
const int cleanlWork = *n * (*n / 2);
|
||||
int minlWork = cleanlWork;
|
||||
#if XSYTRF_ALLOW_MALLOC
|
||||
minlWork = 1;
|
||||
#endif
|
||||
|
||||
// Check arguments
|
||||
const int lower = LAPACK(lsame)(uplo, "L");
|
||||
const int upper = LAPACK(lsame)(uplo, "U");
|
||||
*info = 0;
|
||||
if (!lower && !upper)
|
||||
*info = -1;
|
||||
else if (*n < 0)
|
||||
*info = -2;
|
||||
else if (*ldA < MAX(1, *n))
|
||||
*info = -4;
|
||||
else if (*lWork < minlWork && *lWork != -1)
|
||||
*info = -7;
|
||||
else if (*lWork == -1) {
|
||||
// Work size query
|
||||
*Work = cleanlWork;
|
||||
return;
|
||||
}
|
||||
|
||||
// Ensure Work size
|
||||
double *cleanWork = Work;
|
||||
#if XSYTRF_ALLOW_MALLOC
|
||||
if (!*info && *lWork < cleanlWork) {
|
||||
cleanWork = malloc(cleanlWork * 2 * sizeof(double));
|
||||
if (!cleanWork)
|
||||
*info = -7;
|
||||
}
|
||||
#endif
|
||||
|
||||
if (*info) {
|
||||
const int minfo = -*info;
|
||||
LAPACK(xerbla)("ZSYTRF", &minfo);
|
||||
return;
|
||||
}
|
||||
|
||||
// Clean char * arguments
|
||||
const char cleanuplo = lower ? 'L' : 'U';
|
||||
|
||||
// Dummy argument
|
||||
int nout;
|
||||
|
||||
// Recursive kernel
|
||||
RELAPACK_zsytrf_rook_rec(&cleanuplo, n, n, &nout, A, ldA, ipiv, cleanWork, n, info);
|
||||
|
||||
#if XSYTRF_ALLOW_MALLOC
|
||||
if (cleanWork != Work)
|
||||
free(cleanWork);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
/** zsytrf_rook's recursive compute kernel */
|
||||
static void RELAPACK_zsytrf_rook_rec(
|
||||
const char *uplo, const int *n_full, const int *n, int *n_out,
|
||||
double *A, const int *ldA, int *ipiv,
|
||||
double *Work, const int *ldWork, int *info
|
||||
) {
|
||||
|
||||
// top recursion level?
|
||||
const int top = *n_full == *n;
|
||||
|
||||
if (*n <= MAX(CROSSOVER_ZSYTRF_ROOK, 3)) {
|
||||
// Unblocked
|
||||
if (top) {
|
||||
LAPACK(zsytf2)(uplo, n, A, ldA, ipiv, info);
|
||||
*n_out = *n;
|
||||
} else
|
||||
RELAPACK_zsytrf_rook_rec2(uplo, n_full, n, n_out, A, ldA, ipiv, Work, ldWork, info);
|
||||
return;
|
||||
}
|
||||
|
||||
int info1, info2;
|
||||
|
||||
// Constants
|
||||
const double ONE[] = { 1., 0. };
|
||||
const double MONE[] = { -1., 0. };
|
||||
const int iONE[] = { 1 };
|
||||
|
||||
const int n_rest = *n_full - *n;
|
||||
|
||||
if (*uplo == 'L') {
|
||||
// Splitting (setup)
|
||||
int n1 = ZREC_SPLIT(*n);
|
||||
int n2 = *n - n1;
|
||||
|
||||
// Work_L *
|
||||
double *const Work_L = Work;
|
||||
|
||||
// recursion(A_L)
|
||||
int n1_out;
|
||||
RELAPACK_zsytrf_rook_rec(uplo, n_full, &n1, &n1_out, A, ldA, ipiv, Work_L, ldWork, &info1);
|
||||
n1 = n1_out;
|
||||
|
||||
// Splitting (continued)
|
||||
n2 = *n - n1;
|
||||
const int n_full2 = *n_full - n1;
|
||||
|
||||
// * *
|
||||
// A_BL A_BR
|
||||
// A_BL_B A_BR_B
|
||||
double *const A_BL = A + 2 * n1;
|
||||
double *const A_BR = A + 2 * *ldA * n1 + 2 * n1;
|
||||
double *const A_BL_B = A + 2 * *n;
|
||||
double *const A_BR_B = A + 2 * *ldA * n1 + 2 * *n;
|
||||
|
||||
// * *
|
||||
// Work_BL Work_BR
|
||||
// * *
|
||||
// (top recursion level: use Work as Work_BR)
|
||||
double *const Work_BL = Work + 2 * n1;
|
||||
double *const Work_BR = top ? Work : Work + 2 * *ldWork * n1 + 2 * n1;
|
||||
const int ldWork_BR = top ? n2 : *ldWork;
|
||||
|
||||
// ipiv_T
|
||||
// ipiv_B
|
||||
int *const ipiv_B = ipiv + n1;
|
||||
|
||||
// A_BR = A_BR - A_BL Work_BL'
|
||||
RELAPACK_zgemmt(uplo, "N", "T", &n2, &n1, MONE, A_BL, ldA, Work_BL, ldWork, ONE, A_BR, ldA);
|
||||
BLAS(zgemm)("N", "T", &n_rest, &n2, &n1, MONE, A_BL_B, ldA, Work_BL, ldWork, ONE, A_BR_B, ldA);
|
||||
|
||||
// recursion(A_BR)
|
||||
int n2_out;
|
||||
RELAPACK_zsytrf_rook_rec(uplo, &n_full2, &n2, &n2_out, A_BR, ldA, ipiv_B, Work_BR, &ldWork_BR, &info2);
|
||||
|
||||
if (n2_out != n2) {
|
||||
// undo 1 column of updates
|
||||
const int n_restp1 = n_rest + 1;
|
||||
|
||||
// last column of A_BR
|
||||
double *const A_BR_r = A_BR + 2 * *ldA * n2_out + 2 * n2_out;
|
||||
|
||||
// last row of A_BL
|
||||
double *const A_BL_b = A_BL + 2 * n2_out;
|
||||
|
||||
// last row of Work_BL
|
||||
double *const Work_BL_b = Work_BL + 2 * n2_out;
|
||||
|
||||
// A_BR_r = A_BR_r + A_BL_b Work_BL_b'
|
||||
BLAS(zgemv)("N", &n_restp1, &n1, ONE, A_BL_b, ldA, Work_BL_b, ldWork, ONE, A_BR_r, iONE);
|
||||
}
|
||||
n2 = n2_out;
|
||||
|
||||
// shift pivots
|
||||
int i;
|
||||
for (i = 0; i < n2; i++)
|
||||
if (ipiv_B[i] > 0)
|
||||
ipiv_B[i] += n1;
|
||||
else
|
||||
ipiv_B[i] -= n1;
|
||||
|
||||
*info = info1 || info2;
|
||||
*n_out = n1 + n2;
|
||||
} else {
|
||||
// Splitting (setup)
|
||||
int n2 = ZREC_SPLIT(*n);
|
||||
int n1 = *n - n2;
|
||||
|
||||
// * Work_R
|
||||
// (top recursion level: use Work as Work_R)
|
||||
double *const Work_R = top ? Work : Work + 2 * *ldWork * n1;
|
||||
|
||||
// recursion(A_R)
|
||||
int n2_out;
|
||||
RELAPACK_zsytrf_rook_rec(uplo, n_full, &n2, &n2_out, A, ldA, ipiv, Work_R, ldWork, &info2);
|
||||
const int n2_diff = n2 - n2_out;
|
||||
n2 = n2_out;
|
||||
|
||||
// Splitting (continued)
|
||||
n1 = *n - n2;
|
||||
const int n_full1 = *n_full - n2;
|
||||
|
||||
// * A_TL_T A_TR_T
|
||||
// * A_TL A_TR
|
||||
// * * *
|
||||
double *const A_TL_T = A + 2 * *ldA * n_rest;
|
||||
double *const A_TR_T = A + 2 * *ldA * (n_rest + n1);
|
||||
double *const A_TL = A + 2 * *ldA * n_rest + 2 * n_rest;
|
||||
double *const A_TR = A + 2 * *ldA * (n_rest + n1) + 2 * n_rest;
|
||||
|
||||
// Work_L *
|
||||
// * Work_TR
|
||||
// * *
|
||||
// (top recursion level: Work_R was Work)
|
||||
double *const Work_L = Work;
|
||||
double *const Work_TR = Work + 2 * *ldWork * (top ? n2_diff : n1) + 2 * n_rest;
|
||||
const int ldWork_L = top ? n1 : *ldWork;
|
||||
|
||||
// A_TL = A_TL - A_TR Work_TR'
|
||||
RELAPACK_zgemmt(uplo, "N", "T", &n1, &n2, MONE, A_TR, ldA, Work_TR, ldWork, ONE, A_TL, ldA);
|
||||
BLAS(zgemm)("N", "T", &n_rest, &n1, &n2, MONE, A_TR_T, ldA, Work_TR, ldWork, ONE, A_TL_T, ldA);
|
||||
|
||||
// recursion(A_TL)
|
||||
int n1_out;
|
||||
RELAPACK_zsytrf_rook_rec(uplo, &n_full1, &n1, &n1_out, A, ldA, ipiv, Work_L, &ldWork_L, &info1);
|
||||
|
||||
if (n1_out != n1) {
|
||||
// undo 1 column of updates
|
||||
const int n_restp1 = n_rest + 1;
|
||||
|
||||
// A_TL_T_l = A_TL_T_l + A_TR_T Work_TR_t'
|
||||
BLAS(zgemv)("N", &n_restp1, &n2, ONE, A_TR_T, ldA, Work_TR, ldWork, ONE, A_TL_T, iONE);
|
||||
}
|
||||
n1 = n1_out;
|
||||
|
||||
*info = info2 || info1;
|
||||
*n_out = n1 + n2;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,561 @@
|
|||
/* -- translated by f2c (version 20100827).
|
||||
You must link the resulting object file with libf2c:
|
||||
on Microsoft Windows system, link with libf2c.lib;
|
||||
on Linux or Unix systems, link with .../path/to/libf2c.a -lm
|
||||
or, if you install libf2c.a in a standard place, with -lf2c -lm
|
||||
-- in that order, at the end of the command line, as in
|
||||
cc *.o -lf2c -lm
|
||||
Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
|
||||
|
||||
http://www.netlib.org/f2c/libf2c.zip
|
||||
*/
|
||||
|
||||
#include "f2c.h"
|
||||
|
||||
/* Table of constant values */
|
||||
|
||||
static doublecomplex c_b1 = {1.,0.};
|
||||
static int c__1 = 1;
|
||||
|
||||
/** ZSYTRF_ROOK_REC2 computes a partial factorization of a complex symmetric matrix using the bounded Bunch-K aufman ("rook") diagonal pivoting method.
|
||||
*
|
||||
* This routine is a minor modification of LAPACK's zlasyf_rook.
|
||||
* It serves as an unblocked kernel in the recursive algorithms.
|
||||
* The blocked BLAS Level 3 updates were removed and moved to the
|
||||
* recursive algorithm.
|
||||
* */
|
||||
/* Subroutine */ void RELAPACK_zsytrf_rook_rec2(char *uplo, int *n,
|
||||
int *nb, int *kb, doublecomplex *a, int *lda, int *
|
||||
ipiv, doublecomplex *w, int *ldw, int *info, ftnlen uplo_len)
|
||||
{
|
||||
/* System generated locals */
|
||||
int a_dim1, a_offset, w_dim1, w_offset, i__1, i__2, i__3, i__4;
|
||||
double d__1, d__2;
|
||||
doublecomplex z__1, z__2, z__3, z__4;
|
||||
|
||||
/* Builtin functions */
|
||||
double sqrt(double), d_imag(doublecomplex *);
|
||||
void z_div(doublecomplex *, doublecomplex *, doublecomplex *);
|
||||
|
||||
/* Local variables */
|
||||
static int j, k, p;
|
||||
static doublecomplex t, r1, d11, d12, d21, d22;
|
||||
static int ii, jj, kk, kp, kw, jp1, jp2, kkw;
|
||||
static logical done;
|
||||
static int imax, jmax;
|
||||
static double alpha;
|
||||
extern logical lsame_(char *, char *, ftnlen, ftnlen);
|
||||
static double dtemp, sfmin;
|
||||
extern /* Subroutine */ int zscal_(int *, doublecomplex *,
|
||||
doublecomplex *, int *);
|
||||
static int itemp, kstep;
|
||||
extern /* Subroutine */ int zgemv_(char *, int *, int *,
|
||||
doublecomplex *, doublecomplex *, int *, doublecomplex *,
|
||||
int *, doublecomplex *, doublecomplex *, int *, ftnlen),
|
||||
zcopy_(int *, doublecomplex *, int *, doublecomplex *,
|
||||
int *), zswap_(int *, doublecomplex *, int *,
|
||||
doublecomplex *, int *);
|
||||
extern double dlamch_(char *, ftnlen);
|
||||
static double absakk, colmax;
|
||||
extern int izamax_(int *, doublecomplex *, int *);
|
||||
static double rowmax;
|
||||
|
||||
/* Parameter adjustments */
|
||||
a_dim1 = *lda;
|
||||
a_offset = 1 + a_dim1;
|
||||
a -= a_offset;
|
||||
--ipiv;
|
||||
w_dim1 = *ldw;
|
||||
w_offset = 1 + w_dim1;
|
||||
w -= w_offset;
|
||||
|
||||
/* Function Body */
|
||||
*info = 0;
|
||||
alpha = (sqrt(17.) + 1.) / 8.;
|
||||
sfmin = dlamch_("S", (ftnlen)1);
|
||||
if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
|
||||
k = *n;
|
||||
L10:
|
||||
kw = *nb + k - *n;
|
||||
if ((k <= *n - *nb + 1 && *nb < *n) || k < 1) {
|
||||
goto L30;
|
||||
}
|
||||
kstep = 1;
|
||||
p = k;
|
||||
zcopy_(&k, &a[k * a_dim1 + 1], &c__1, &w[kw * w_dim1 + 1], &c__1);
|
||||
if (k < *n) {
|
||||
i__1 = *n - k;
|
||||
z__1.r = -1., z__1.i = -0.;
|
||||
zgemv_("No transpose", &k, &i__1, &z__1, &a[(k + 1) * a_dim1 + 1],
|
||||
lda, &w[k + (kw + 1) * w_dim1], ldw, &c_b1, &w[kw *
|
||||
w_dim1 + 1], &c__1, (ftnlen)12);
|
||||
}
|
||||
i__1 = k + kw * w_dim1;
|
||||
absakk = (d__1 = w[i__1].r, abs(d__1)) + (d__2 = d_imag(&w[k + kw *
|
||||
w_dim1]), abs(d__2));
|
||||
if (k > 1) {
|
||||
i__1 = k - 1;
|
||||
imax = izamax_(&i__1, &w[kw * w_dim1 + 1], &c__1);
|
||||
i__1 = imax + kw * w_dim1;
|
||||
colmax = (d__1 = w[i__1].r, abs(d__1)) + (d__2 = d_imag(&w[imax +
|
||||
kw * w_dim1]), abs(d__2));
|
||||
} else {
|
||||
colmax = 0.;
|
||||
}
|
||||
if (max(absakk,colmax) == 0.) {
|
||||
if (*info == 0) {
|
||||
*info = k;
|
||||
}
|
||||
kp = k;
|
||||
zcopy_(&k, &w[kw * w_dim1 + 1], &c__1, &a[k * a_dim1 + 1], &c__1);
|
||||
} else {
|
||||
if (! (absakk < alpha * colmax)) {
|
||||
kp = k;
|
||||
} else {
|
||||
done = FALSE_;
|
||||
L12:
|
||||
zcopy_(&imax, &a[imax * a_dim1 + 1], &c__1, &w[(kw - 1) *
|
||||
w_dim1 + 1], &c__1);
|
||||
i__1 = k - imax;
|
||||
zcopy_(&i__1, &a[imax + (imax + 1) * a_dim1], lda, &w[imax +
|
||||
1 + (kw - 1) * w_dim1], &c__1);
|
||||
if (k < *n) {
|
||||
i__1 = *n - k;
|
||||
z__1.r = -1., z__1.i = -0.;
|
||||
zgemv_("No transpose", &k, &i__1, &z__1, &a[(k + 1) *
|
||||
a_dim1 + 1], lda, &w[imax + (kw + 1) * w_dim1],
|
||||
ldw, &c_b1, &w[(kw - 1) * w_dim1 + 1], &c__1, (
|
||||
ftnlen)12);
|
||||
}
|
||||
if (imax != k) {
|
||||
i__1 = k - imax;
|
||||
jmax = imax + izamax_(&i__1, &w[imax + 1 + (kw - 1) *
|
||||
w_dim1], &c__1);
|
||||
i__1 = jmax + (kw - 1) * w_dim1;
|
||||
rowmax = (d__1 = w[i__1].r, abs(d__1)) + (d__2 = d_imag(&
|
||||
w[jmax + (kw - 1) * w_dim1]), abs(d__2));
|
||||
} else {
|
||||
rowmax = 0.;
|
||||
}
|
||||
if (imax > 1) {
|
||||
i__1 = imax - 1;
|
||||
itemp = izamax_(&i__1, &w[(kw - 1) * w_dim1 + 1], &c__1);
|
||||
i__1 = itemp + (kw - 1) * w_dim1;
|
||||
dtemp = (d__1 = w[i__1].r, abs(d__1)) + (d__2 = d_imag(&w[
|
||||
itemp + (kw - 1) * w_dim1]), abs(d__2));
|
||||
if (dtemp > rowmax) {
|
||||
rowmax = dtemp;
|
||||
jmax = itemp;
|
||||
}
|
||||
}
|
||||
i__1 = imax + (kw - 1) * w_dim1;
|
||||
if (! ((d__1 = w[i__1].r, abs(d__1)) + (d__2 = d_imag(&w[imax
|
||||
+ (kw - 1) * w_dim1]), abs(d__2)) < alpha * rowmax)) {
|
||||
kp = imax;
|
||||
zcopy_(&k, &w[(kw - 1) * w_dim1 + 1], &c__1, &w[kw *
|
||||
w_dim1 + 1], &c__1);
|
||||
done = TRUE_;
|
||||
} else if (p == jmax || rowmax <= colmax) {
|
||||
kp = imax;
|
||||
kstep = 2;
|
||||
done = TRUE_;
|
||||
} else {
|
||||
p = imax;
|
||||
colmax = rowmax;
|
||||
imax = jmax;
|
||||
zcopy_(&k, &w[(kw - 1) * w_dim1 + 1], &c__1, &w[kw *
|
||||
w_dim1 + 1], &c__1);
|
||||
}
|
||||
if (! done) {
|
||||
goto L12;
|
||||
}
|
||||
}
|
||||
kk = k - kstep + 1;
|
||||
kkw = *nb + kk - *n;
|
||||
if (kstep == 2 && p != k) {
|
||||
i__1 = k - p;
|
||||
zcopy_(&i__1, &a[p + 1 + k * a_dim1], &c__1, &a[p + (p + 1) *
|
||||
a_dim1], lda);
|
||||
zcopy_(&p, &a[k * a_dim1 + 1], &c__1, &a[p * a_dim1 + 1], &
|
||||
c__1);
|
||||
i__1 = *n - k + 1;
|
||||
zswap_(&i__1, &a[k + k * a_dim1], lda, &a[p + k * a_dim1],
|
||||
lda);
|
||||
i__1 = *n - kk + 1;
|
||||
zswap_(&i__1, &w[k + kkw * w_dim1], ldw, &w[p + kkw * w_dim1],
|
||||
ldw);
|
||||
}
|
||||
if (kp != kk) {
|
||||
i__1 = kp + k * a_dim1;
|
||||
i__2 = kk + k * a_dim1;
|
||||
a[i__1].r = a[i__2].r, a[i__1].i = a[i__2].i;
|
||||
i__1 = k - 1 - kp;
|
||||
zcopy_(&i__1, &a[kp + 1 + kk * a_dim1], &c__1, &a[kp + (kp +
|
||||
1) * a_dim1], lda);
|
||||
zcopy_(&kp, &a[kk * a_dim1 + 1], &c__1, &a[kp * a_dim1 + 1], &
|
||||
c__1);
|
||||
i__1 = *n - kk + 1;
|
||||
zswap_(&i__1, &a[kk + kk * a_dim1], lda, &a[kp + kk * a_dim1],
|
||||
lda);
|
||||
i__1 = *n - kk + 1;
|
||||
zswap_(&i__1, &w[kk + kkw * w_dim1], ldw, &w[kp + kkw *
|
||||
w_dim1], ldw);
|
||||
}
|
||||
if (kstep == 1) {
|
||||
zcopy_(&k, &w[kw * w_dim1 + 1], &c__1, &a[k * a_dim1 + 1], &
|
||||
c__1);
|
||||
if (k > 1) {
|
||||
i__1 = k + k * a_dim1;
|
||||
if ((d__1 = a[i__1].r, abs(d__1)) + (d__2 = d_imag(&a[k +
|
||||
k * a_dim1]), abs(d__2)) >= sfmin) {
|
||||
z_div(&z__1, &c_b1, &a[k + k * a_dim1]);
|
||||
r1.r = z__1.r, r1.i = z__1.i;
|
||||
i__1 = k - 1;
|
||||
zscal_(&i__1, &r1, &a[k * a_dim1 + 1], &c__1);
|
||||
} else /* if(complicated condition) */ {
|
||||
i__1 = k + k * a_dim1;
|
||||
if (a[i__1].r != 0. || a[i__1].i != 0.) {
|
||||
i__1 = k - 1;
|
||||
for (ii = 1; ii <= i__1; ++ii) {
|
||||
i__2 = ii + k * a_dim1;
|
||||
z_div(&z__1, &a[ii + k * a_dim1], &a[k + k *
|
||||
a_dim1]);
|
||||
a[i__2].r = z__1.r, a[i__2].i = z__1.i;
|
||||
/* L14: */
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if (k > 2) {
|
||||
i__1 = k - 1 + kw * w_dim1;
|
||||
d12.r = w[i__1].r, d12.i = w[i__1].i;
|
||||
z_div(&z__1, &w[k + kw * w_dim1], &d12);
|
||||
d11.r = z__1.r, d11.i = z__1.i;
|
||||
z_div(&z__1, &w[k - 1 + (kw - 1) * w_dim1], &d12);
|
||||
d22.r = z__1.r, d22.i = z__1.i;
|
||||
z__3.r = d11.r * d22.r - d11.i * d22.i, z__3.i = d11.r *
|
||||
d22.i + d11.i * d22.r;
|
||||
z__2.r = z__3.r - 1., z__2.i = z__3.i - 0.;
|
||||
z_div(&z__1, &c_b1, &z__2);
|
||||
t.r = z__1.r, t.i = z__1.i;
|
||||
i__1 = k - 2;
|
||||
for (j = 1; j <= i__1; ++j) {
|
||||
i__2 = j + (k - 1) * a_dim1;
|
||||
i__3 = j + (kw - 1) * w_dim1;
|
||||
z__4.r = d11.r * w[i__3].r - d11.i * w[i__3].i,
|
||||
z__4.i = d11.r * w[i__3].i + d11.i * w[i__3]
|
||||
.r;
|
||||
i__4 = j + kw * w_dim1;
|
||||
z__3.r = z__4.r - w[i__4].r, z__3.i = z__4.i - w[i__4]
|
||||
.i;
|
||||
z_div(&z__2, &z__3, &d12);
|
||||
z__1.r = t.r * z__2.r - t.i * z__2.i, z__1.i = t.r *
|
||||
z__2.i + t.i * z__2.r;
|
||||
a[i__2].r = z__1.r, a[i__2].i = z__1.i;
|
||||
i__2 = j + k * a_dim1;
|
||||
i__3 = j + kw * w_dim1;
|
||||
z__4.r = d22.r * w[i__3].r - d22.i * w[i__3].i,
|
||||
z__4.i = d22.r * w[i__3].i + d22.i * w[i__3]
|
||||
.r;
|
||||
i__4 = j + (kw - 1) * w_dim1;
|
||||
z__3.r = z__4.r - w[i__4].r, z__3.i = z__4.i - w[i__4]
|
||||
.i;
|
||||
z_div(&z__2, &z__3, &d12);
|
||||
z__1.r = t.r * z__2.r - t.i * z__2.i, z__1.i = t.r *
|
||||
z__2.i + t.i * z__2.r;
|
||||
a[i__2].r = z__1.r, a[i__2].i = z__1.i;
|
||||
/* L20: */
|
||||
}
|
||||
}
|
||||
i__1 = k - 1 + (k - 1) * a_dim1;
|
||||
i__2 = k - 1 + (kw - 1) * w_dim1;
|
||||
a[i__1].r = w[i__2].r, a[i__1].i = w[i__2].i;
|
||||
i__1 = k - 1 + k * a_dim1;
|
||||
i__2 = k - 1 + kw * w_dim1;
|
||||
a[i__1].r = w[i__2].r, a[i__1].i = w[i__2].i;
|
||||
i__1 = k + k * a_dim1;
|
||||
i__2 = k + kw * w_dim1;
|
||||
a[i__1].r = w[i__2].r, a[i__1].i = w[i__2].i;
|
||||
}
|
||||
}
|
||||
if (kstep == 1) {
|
||||
ipiv[k] = kp;
|
||||
} else {
|
||||
ipiv[k] = -p;
|
||||
ipiv[k - 1] = -kp;
|
||||
}
|
||||
k -= kstep;
|
||||
goto L10;
|
||||
L30:
|
||||
j = k + 1;
|
||||
L60:
|
||||
kstep = 1;
|
||||
jp1 = 1;
|
||||
jj = j;
|
||||
jp2 = ipiv[j];
|
||||
if (jp2 < 0) {
|
||||
jp2 = -jp2;
|
||||
++j;
|
||||
jp1 = -ipiv[j];
|
||||
kstep = 2;
|
||||
}
|
||||
++j;
|
||||
if (jp2 != jj && j <= *n) {
|
||||
i__1 = *n - j + 1;
|
||||
zswap_(&i__1, &a[jp2 + j * a_dim1], lda, &a[jj + j * a_dim1], lda)
|
||||
;
|
||||
}
|
||||
jj = j - 1;
|
||||
if (jp1 != jj && kstep == 2) {
|
||||
i__1 = *n - j + 1;
|
||||
zswap_(&i__1, &a[jp1 + j * a_dim1], lda, &a[jj + j * a_dim1], lda)
|
||||
;
|
||||
}
|
||||
if (j <= *n) {
|
||||
goto L60;
|
||||
}
|
||||
*kb = *n - k;
|
||||
} else {
|
||||
k = 1;
|
||||
L70:
|
||||
if ((k >= *nb && *nb < *n) || k > *n) {
|
||||
goto L90;
|
||||
}
|
||||
kstep = 1;
|
||||
p = k;
|
||||
i__1 = *n - k + 1;
|
||||
zcopy_(&i__1, &a[k + k * a_dim1], &c__1, &w[k + k * w_dim1], &c__1);
|
||||
if (k > 1) {
|
||||
i__1 = *n - k + 1;
|
||||
i__2 = k - 1;
|
||||
z__1.r = -1., z__1.i = -0.;
|
||||
zgemv_("No transpose", &i__1, &i__2, &z__1, &a[k + a_dim1], lda, &
|
||||
w[k + w_dim1], ldw, &c_b1, &w[k + k * w_dim1], &c__1, (
|
||||
ftnlen)12);
|
||||
}
|
||||
i__1 = k + k * w_dim1;
|
||||
absakk = (d__1 = w[i__1].r, abs(d__1)) + (d__2 = d_imag(&w[k + k *
|
||||
w_dim1]), abs(d__2));
|
||||
if (k < *n) {
|
||||
i__1 = *n - k;
|
||||
imax = k + izamax_(&i__1, &w[k + 1 + k * w_dim1], &c__1);
|
||||
i__1 = imax + k * w_dim1;
|
||||
colmax = (d__1 = w[i__1].r, abs(d__1)) + (d__2 = d_imag(&w[imax +
|
||||
k * w_dim1]), abs(d__2));
|
||||
} else {
|
||||
colmax = 0.;
|
||||
}
|
||||
if (max(absakk,colmax) == 0.) {
|
||||
if (*info == 0) {
|
||||
*info = k;
|
||||
}
|
||||
kp = k;
|
||||
i__1 = *n - k + 1;
|
||||
zcopy_(&i__1, &w[k + k * w_dim1], &c__1, &a[k + k * a_dim1], &
|
||||
c__1);
|
||||
} else {
|
||||
if (! (absakk < alpha * colmax)) {
|
||||
kp = k;
|
||||
} else {
|
||||
done = FALSE_;
|
||||
L72:
|
||||
i__1 = imax - k;
|
||||
zcopy_(&i__1, &a[imax + k * a_dim1], lda, &w[k + (k + 1) *
|
||||
w_dim1], &c__1);
|
||||
i__1 = *n - imax + 1;
|
||||
zcopy_(&i__1, &a[imax + imax * a_dim1], &c__1, &w[imax + (k +
|
||||
1) * w_dim1], &c__1);
|
||||
if (k > 1) {
|
||||
i__1 = *n - k + 1;
|
||||
i__2 = k - 1;
|
||||
z__1.r = -1., z__1.i = -0.;
|
||||
zgemv_("No transpose", &i__1, &i__2, &z__1, &a[k + a_dim1]
|
||||
, lda, &w[imax + w_dim1], ldw, &c_b1, &w[k + (k +
|
||||
1) * w_dim1], &c__1, (ftnlen)12);
|
||||
}
|
||||
if (imax != k) {
|
||||
i__1 = imax - k;
|
||||
jmax = k - 1 + izamax_(&i__1, &w[k + (k + 1) * w_dim1], &
|
||||
c__1);
|
||||
i__1 = jmax + (k + 1) * w_dim1;
|
||||
rowmax = (d__1 = w[i__1].r, abs(d__1)) + (d__2 = d_imag(&
|
||||
w[jmax + (k + 1) * w_dim1]), abs(d__2));
|
||||
} else {
|
||||
rowmax = 0.;
|
||||
}
|
||||
if (imax < *n) {
|
||||
i__1 = *n - imax;
|
||||
itemp = imax + izamax_(&i__1, &w[imax + 1 + (k + 1) *
|
||||
w_dim1], &c__1);
|
||||
i__1 = itemp + (k + 1) * w_dim1;
|
||||
dtemp = (d__1 = w[i__1].r, abs(d__1)) + (d__2 = d_imag(&w[
|
||||
itemp + (k + 1) * w_dim1]), abs(d__2));
|
||||
if (dtemp > rowmax) {
|
||||
rowmax = dtemp;
|
||||
jmax = itemp;
|
||||
}
|
||||
}
|
||||
i__1 = imax + (k + 1) * w_dim1;
|
||||
if (! ((d__1 = w[i__1].r, abs(d__1)) + (d__2 = d_imag(&w[imax
|
||||
+ (k + 1) * w_dim1]), abs(d__2)) < alpha * rowmax)) {
|
||||
kp = imax;
|
||||
i__1 = *n - k + 1;
|
||||
zcopy_(&i__1, &w[k + (k + 1) * w_dim1], &c__1, &w[k + k *
|
||||
w_dim1], &c__1);
|
||||
done = TRUE_;
|
||||
} else if (p == jmax || rowmax <= colmax) {
|
||||
kp = imax;
|
||||
kstep = 2;
|
||||
done = TRUE_;
|
||||
} else {
|
||||
p = imax;
|
||||
colmax = rowmax;
|
||||
imax = jmax;
|
||||
i__1 = *n - k + 1;
|
||||
zcopy_(&i__1, &w[k + (k + 1) * w_dim1], &c__1, &w[k + k *
|
||||
w_dim1], &c__1);
|
||||
}
|
||||
if (! done) {
|
||||
goto L72;
|
||||
}
|
||||
}
|
||||
kk = k + kstep - 1;
|
||||
if (kstep == 2 && p != k) {
|
||||
i__1 = p - k;
|
||||
zcopy_(&i__1, &a[k + k * a_dim1], &c__1, &a[p + k * a_dim1],
|
||||
lda);
|
||||
i__1 = *n - p + 1;
|
||||
zcopy_(&i__1, &a[p + k * a_dim1], &c__1, &a[p + p * a_dim1], &
|
||||
c__1);
|
||||
zswap_(&k, &a[k + a_dim1], lda, &a[p + a_dim1], lda);
|
||||
zswap_(&kk, &w[k + w_dim1], ldw, &w[p + w_dim1], ldw);
|
||||
}
|
||||
if (kp != kk) {
|
||||
i__1 = kp + k * a_dim1;
|
||||
i__2 = kk + k * a_dim1;
|
||||
a[i__1].r = a[i__2].r, a[i__1].i = a[i__2].i;
|
||||
i__1 = kp - k - 1;
|
||||
zcopy_(&i__1, &a[k + 1 + kk * a_dim1], &c__1, &a[kp + (k + 1)
|
||||
* a_dim1], lda);
|
||||
i__1 = *n - kp + 1;
|
||||
zcopy_(&i__1, &a[kp + kk * a_dim1], &c__1, &a[kp + kp *
|
||||
a_dim1], &c__1);
|
||||
zswap_(&kk, &a[kk + a_dim1], lda, &a[kp + a_dim1], lda);
|
||||
zswap_(&kk, &w[kk + w_dim1], ldw, &w[kp + w_dim1], ldw);
|
||||
}
|
||||
if (kstep == 1) {
|
||||
i__1 = *n - k + 1;
|
||||
zcopy_(&i__1, &w[k + k * w_dim1], &c__1, &a[k + k * a_dim1], &
|
||||
c__1);
|
||||
if (k < *n) {
|
||||
i__1 = k + k * a_dim1;
|
||||
if ((d__1 = a[i__1].r, abs(d__1)) + (d__2 = d_imag(&a[k +
|
||||
k * a_dim1]), abs(d__2)) >= sfmin) {
|
||||
z_div(&z__1, &c_b1, &a[k + k * a_dim1]);
|
||||
r1.r = z__1.r, r1.i = z__1.i;
|
||||
i__1 = *n - k;
|
||||
zscal_(&i__1, &r1, &a[k + 1 + k * a_dim1], &c__1);
|
||||
} else /* if(complicated condition) */ {
|
||||
i__1 = k + k * a_dim1;
|
||||
if (a[i__1].r != 0. || a[i__1].i != 0.) {
|
||||
i__1 = *n;
|
||||
for (ii = k + 1; ii <= i__1; ++ii) {
|
||||
i__2 = ii + k * a_dim1;
|
||||
z_div(&z__1, &a[ii + k * a_dim1], &a[k + k *
|
||||
a_dim1]);
|
||||
a[i__2].r = z__1.r, a[i__2].i = z__1.i;
|
||||
/* L74: */
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if (k < *n - 1) {
|
||||
i__1 = k + 1 + k * w_dim1;
|
||||
d21.r = w[i__1].r, d21.i = w[i__1].i;
|
||||
z_div(&z__1, &w[k + 1 + (k + 1) * w_dim1], &d21);
|
||||
d11.r = z__1.r, d11.i = z__1.i;
|
||||
z_div(&z__1, &w[k + k * w_dim1], &d21);
|
||||
d22.r = z__1.r, d22.i = z__1.i;
|
||||
z__3.r = d11.r * d22.r - d11.i * d22.i, z__3.i = d11.r *
|
||||
d22.i + d11.i * d22.r;
|
||||
z__2.r = z__3.r - 1., z__2.i = z__3.i - 0.;
|
||||
z_div(&z__1, &c_b1, &z__2);
|
||||
t.r = z__1.r, t.i = z__1.i;
|
||||
i__1 = *n;
|
||||
for (j = k + 2; j <= i__1; ++j) {
|
||||
i__2 = j + k * a_dim1;
|
||||
i__3 = j + k * w_dim1;
|
||||
z__4.r = d11.r * w[i__3].r - d11.i * w[i__3].i,
|
||||
z__4.i = d11.r * w[i__3].i + d11.i * w[i__3]
|
||||
.r;
|
||||
i__4 = j + (k + 1) * w_dim1;
|
||||
z__3.r = z__4.r - w[i__4].r, z__3.i = z__4.i - w[i__4]
|
||||
.i;
|
||||
z_div(&z__2, &z__3, &d21);
|
||||
z__1.r = t.r * z__2.r - t.i * z__2.i, z__1.i = t.r *
|
||||
z__2.i + t.i * z__2.r;
|
||||
a[i__2].r = z__1.r, a[i__2].i = z__1.i;
|
||||
i__2 = j + (k + 1) * a_dim1;
|
||||
i__3 = j + (k + 1) * w_dim1;
|
||||
z__4.r = d22.r * w[i__3].r - d22.i * w[i__3].i,
|
||||
z__4.i = d22.r * w[i__3].i + d22.i * w[i__3]
|
||||
.r;
|
||||
i__4 = j + k * w_dim1;
|
||||
z__3.r = z__4.r - w[i__4].r, z__3.i = z__4.i - w[i__4]
|
||||
.i;
|
||||
z_div(&z__2, &z__3, &d21);
|
||||
z__1.r = t.r * z__2.r - t.i * z__2.i, z__1.i = t.r *
|
||||
z__2.i + t.i * z__2.r;
|
||||
a[i__2].r = z__1.r, a[i__2].i = z__1.i;
|
||||
/* L80: */
|
||||
}
|
||||
}
|
||||
i__1 = k + k * a_dim1;
|
||||
i__2 = k + k * w_dim1;
|
||||
a[i__1].r = w[i__2].r, a[i__1].i = w[i__2].i;
|
||||
i__1 = k + 1 + k * a_dim1;
|
||||
i__2 = k + 1 + k * w_dim1;
|
||||
a[i__1].r = w[i__2].r, a[i__1].i = w[i__2].i;
|
||||
i__1 = k + 1 + (k + 1) * a_dim1;
|
||||
i__2 = k + 1 + (k + 1) * w_dim1;
|
||||
a[i__1].r = w[i__2].r, a[i__1].i = w[i__2].i;
|
||||
}
|
||||
}
|
||||
if (kstep == 1) {
|
||||
ipiv[k] = kp;
|
||||
} else {
|
||||
ipiv[k] = -p;
|
||||
ipiv[k + 1] = -kp;
|
||||
}
|
||||
k += kstep;
|
||||
goto L70;
|
||||
L90:
|
||||
j = k - 1;
|
||||
L120:
|
||||
kstep = 1;
|
||||
jp1 = 1;
|
||||
jj = j;
|
||||
jp2 = ipiv[j];
|
||||
if (jp2 < 0) {
|
||||
jp2 = -jp2;
|
||||
--j;
|
||||
jp1 = -ipiv[j];
|
||||
kstep = 2;
|
||||
}
|
||||
--j;
|
||||
if (jp2 != jj && j >= 1) {
|
||||
zswap_(&j, &a[jp2 + a_dim1], lda, &a[jj + a_dim1], lda);
|
||||
}
|
||||
jj = j + 1;
|
||||
if (jp1 != jj && kstep == 2) {
|
||||
zswap_(&j, &a[jp1 + a_dim1], lda, &a[jj + a_dim1], lda);
|
||||
}
|
||||
if (j >= 1) {
|
||||
goto L120;
|
||||
}
|
||||
*kb = k - 1;
|
||||
}
|
||||
return;
|
||||
}
|
|
@ -0,0 +1,268 @@
|
|||
#include "relapack.h"
|
||||
#include <math.h>
|
||||
|
||||
static void RELAPACK_ztgsyl_rec(const char *, const int *, const int *,
|
||||
const int *, const double *, const int *, const double *, const int *,
|
||||
double *, const int *, const double *, const int *, const double *,
|
||||
const int *, double *, const int *, double *, double *, double *, int *);
|
||||
|
||||
|
||||
/** ZTGSYL solves the generalized Sylvester equation.
|
||||
*
|
||||
* This routine is functionally equivalent to LAPACK's ztgsyl.
|
||||
* For details on its interface, see
|
||||
* http://www.netlib.org/lapack/explore-html/db/d68/ztgsyl_8f.html
|
||||
* */
|
||||
void RELAPACK_ztgsyl(
|
||||
const char *trans, const int *ijob, const int *m, const int *n,
|
||||
const double *A, const int *ldA, const double *B, const int *ldB,
|
||||
double *C, const int *ldC,
|
||||
const double *D, const int *ldD, const double *E, const int *ldE,
|
||||
double *F, const int *ldF,
|
||||
double *scale, double *dif,
|
||||
double *Work, const int *lWork, int *iWork, int *info
|
||||
) {
|
||||
|
||||
// Parse arguments
|
||||
const int notran = LAPACK(lsame)(trans, "N");
|
||||
const int tran = LAPACK(lsame)(trans, "C");
|
||||
|
||||
// Compute work buffer size
|
||||
int lwmin = 1;
|
||||
if (notran && (*ijob == 1 || *ijob == 2))
|
||||
lwmin = MAX(1, 2 * *m * *n);
|
||||
*info = 0;
|
||||
|
||||
// Check arguments
|
||||
if (!tran && !notran)
|
||||
*info = -1;
|
||||
else if (notran && (*ijob < 0 || *ijob > 4))
|
||||
*info = -2;
|
||||
else if (*m <= 0)
|
||||
*info = -3;
|
||||
else if (*n <= 0)
|
||||
*info = -4;
|
||||
else if (*ldA < MAX(1, *m))
|
||||
*info = -6;
|
||||
else if (*ldB < MAX(1, *n))
|
||||
*info = -8;
|
||||
else if (*ldC < MAX(1, *m))
|
||||
*info = -10;
|
||||
else if (*ldD < MAX(1, *m))
|
||||
*info = -12;
|
||||
else if (*ldE < MAX(1, *n))
|
||||
*info = -14;
|
||||
else if (*ldF < MAX(1, *m))
|
||||
*info = -16;
|
||||
else if (*lWork < lwmin && *lWork != -1)
|
||||
*info = -20;
|
||||
if (*info) {
|
||||
const int minfo = -*info;
|
||||
LAPACK(xerbla)("ZTGSYL", &minfo);
|
||||
return;
|
||||
}
|
||||
|
||||
if (*lWork == -1) {
|
||||
// Work size query
|
||||
*Work = lwmin;
|
||||
return;
|
||||
}
|
||||
|
||||
// Clean char * arguments
|
||||
const char cleantrans = notran ? 'N' : 'C';
|
||||
|
||||
// Constant
|
||||
const double ZERO[] = { 0., 0. };
|
||||
|
||||
int isolve = 1;
|
||||
int ifunc = 0;
|
||||
if (notran) {
|
||||
if (*ijob >= 3) {
|
||||
ifunc = *ijob - 2;
|
||||
LAPACK(zlaset)("F", m, n, ZERO, ZERO, C, ldC);
|
||||
LAPACK(zlaset)("F", m, n, ZERO, ZERO, F, ldF);
|
||||
} else if (*ijob >= 1)
|
||||
isolve = 2;
|
||||
}
|
||||
|
||||
double scale2;
|
||||
int iround;
|
||||
for (iround = 1; iround <= isolve; iround++) {
|
||||
*scale = 1;
|
||||
double dscale = 0;
|
||||
double dsum = 1;
|
||||
RELAPACK_ztgsyl_rec(&cleantrans, &ifunc, m, n, A, ldA, B, ldB, C, ldC, D, ldD, E, ldE, F, ldF, scale, &dsum, &dscale, info);
|
||||
if (dscale != 0) {
|
||||
if (*ijob == 1 || *ijob == 3)
|
||||
*dif = sqrt(2 * *m * *n) / (dscale * sqrt(dsum));
|
||||
else
|
||||
*dif = sqrt(*m * *n) / (dscale * sqrt(dsum));
|
||||
}
|
||||
if (isolve == 2) {
|
||||
if (iround == 1) {
|
||||
if (notran)
|
||||
ifunc = *ijob;
|
||||
scale2 = *scale;
|
||||
LAPACK(zlacpy)("F", m, n, C, ldC, Work, m);
|
||||
LAPACK(zlacpy)("F", m, n, F, ldF, Work + 2 * *m * *n, m);
|
||||
LAPACK(zlaset)("F", m, n, ZERO, ZERO, C, ldC);
|
||||
LAPACK(zlaset)("F", m, n, ZERO, ZERO, F, ldF);
|
||||
} else {
|
||||
LAPACK(zlacpy)("F", m, n, Work, m, C, ldC);
|
||||
LAPACK(zlacpy)("F", m, n, Work + 2 * *m * *n, m, F, ldF);
|
||||
*scale = scale2;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/** ztgsyl's recursive vompute kernel */
|
||||
static void RELAPACK_ztgsyl_rec(
|
||||
const char *trans, const int *ifunc, const int *m, const int *n,
|
||||
const double *A, const int *ldA, const double *B, const int *ldB,
|
||||
double *C, const int *ldC,
|
||||
const double *D, const int *ldD, const double *E, const int *ldE,
|
||||
double *F, const int *ldF,
|
||||
double *scale, double *dsum, double *dscale,
|
||||
int *info
|
||||
) {
|
||||
|
||||
if (*m <= MAX(CROSSOVER_ZTGSYL, 1) && *n <= MAX(CROSSOVER_ZTGSYL, 1)) {
|
||||
// Unblocked
|
||||
LAPACK(ztgsy2)(trans, ifunc, m, n, A, ldA, B, ldB, C, ldC, D, ldD, E, ldE, F, ldF, scale, dsum, dscale, info);
|
||||
return;
|
||||
}
|
||||
|
||||
// Constants
|
||||
const double ONE[] = { 1., 0. };
|
||||
const double MONE[] = { -1., 0. };
|
||||
const int iONE[] = { 1 };
|
||||
|
||||
// Outputs
|
||||
double scale1[] = { 1., 0. };
|
||||
double scale2[] = { 1., 0. };
|
||||
int info1[] = { 0 };
|
||||
int info2[] = { 0 };
|
||||
|
||||
if (*m > *n) {
|
||||
// Splitting
|
||||
const int m1 = ZREC_SPLIT(*m);
|
||||
const int m2 = *m - m1;
|
||||
|
||||
// A_TL A_TR
|
||||
// 0 A_BR
|
||||
const double *const A_TL = A;
|
||||
const double *const A_TR = A + 2 * *ldA * m1;
|
||||
const double *const A_BR = A + 2 * *ldA * m1 + 2 * m1;
|
||||
|
||||
// C_T
|
||||
// C_B
|
||||
double *const C_T = C;
|
||||
double *const C_B = C + 2 * m1;
|
||||
|
||||
// D_TL D_TR
|
||||
// 0 D_BR
|
||||
const double *const D_TL = D;
|
||||
const double *const D_TR = D + 2 * *ldD * m1;
|
||||
const double *const D_BR = D + 2 * *ldD * m1 + 2 * m1;
|
||||
|
||||
// F_T
|
||||
// F_B
|
||||
double *const F_T = F;
|
||||
double *const F_B = F + 2 * m1;
|
||||
|
||||
if (*trans == 'N') {
|
||||
// recursion(A_BR, B, C_B, D_BR, E, F_B)
|
||||
RELAPACK_ztgsyl_rec(trans, ifunc, &m2, n, A_BR, ldA, B, ldB, C_B, ldC, D_BR, ldD, E, ldE, F_B, ldF, scale1, dsum, dscale, info1);
|
||||
// C_T = C_T - A_TR * C_B
|
||||
BLAS(zgemm)("N", "N", &m1, n, &m2, MONE, A_TR, ldA, C_B, ldC, scale1, C_T, ldC);
|
||||
// F_T = F_T - D_TR * C_B
|
||||
BLAS(zgemm)("N", "N", &m1, n, &m2, MONE, D_TR, ldD, C_B, ldC, scale1, F_T, ldF);
|
||||
// recursion(A_TL, B, C_T, D_TL, E, F_T)
|
||||
RELAPACK_ztgsyl_rec(trans, ifunc, &m1, n, A_TL, ldA, B, ldB, C_T, ldC, D_TL, ldD, E, ldE, F_T, ldF, scale2, dsum, dscale, info2);
|
||||
// apply scale
|
||||
if (scale2[0] != 1) {
|
||||
LAPACK(zlascl)("G", iONE, iONE, ONE, scale2, &m2, n, C_B, ldC, info);
|
||||
LAPACK(zlascl)("G", iONE, iONE, ONE, scale2, &m2, n, F_B, ldF, info);
|
||||
}
|
||||
} else {
|
||||
// recursion(A_TL, B, C_T, D_TL, E, F_T)
|
||||
RELAPACK_ztgsyl_rec(trans, ifunc, &m1, n, A_TL, ldA, B, ldB, C_T, ldC, D_TL, ldD, E, ldE, F_T, ldF, scale1, dsum, dscale, info1);
|
||||
// apply scale
|
||||
if (scale1[0] != 1)
|
||||
LAPACK(zlascl)("G", iONE, iONE, ONE, scale1, &m2, n, F_B, ldF, info);
|
||||
// C_B = C_B - A_TR^H * C_T
|
||||
BLAS(zgemm)("C", "N", &m2, n, &m1, MONE, A_TR, ldA, C_T, ldC, scale1, C_B, ldC);
|
||||
// C_B = C_B - D_TR^H * F_T
|
||||
BLAS(zgemm)("C", "N", &m2, n, &m1, MONE, D_TR, ldD, F_T, ldC, ONE, C_B, ldC);
|
||||
// recursion(A_BR, B, C_B, D_BR, E, F_B)
|
||||
RELAPACK_ztgsyl_rec(trans, ifunc, &m2, n, A_BR, ldA, B, ldB, C_B, ldC, D_BR, ldD, E, ldE, F_B, ldF, scale2, dsum, dscale, info2);
|
||||
// apply scale
|
||||
if (scale2[0] != 1) {
|
||||
LAPACK(zlascl)("G", iONE, iONE, ONE, scale2, &m1, n, C_T, ldC, info);
|
||||
LAPACK(zlascl)("G", iONE, iONE, ONE, scale2, &m1, n, F_T, ldF, info);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// Splitting
|
||||
const int n1 = ZREC_SPLIT(*n);
|
||||
const int n2 = *n - n1;
|
||||
|
||||
// B_TL B_TR
|
||||
// 0 B_BR
|
||||
const double *const B_TL = B;
|
||||
const double *const B_TR = B + 2 * *ldB * n1;
|
||||
const double *const B_BR = B + 2 * *ldB * n1 + 2 * n1;
|
||||
|
||||
// C_L C_R
|
||||
double *const C_L = C;
|
||||
double *const C_R = C + 2 * *ldC * n1;
|
||||
|
||||
// E_TL E_TR
|
||||
// 0 E_BR
|
||||
const double *const E_TL = E;
|
||||
const double *const E_TR = E + 2 * *ldE * n1;
|
||||
const double *const E_BR = E + 2 * *ldE * n1 + 2 * n1;
|
||||
|
||||
// F_L F_R
|
||||
double *const F_L = F;
|
||||
double *const F_R = F + 2 * *ldF * n1;
|
||||
|
||||
if (*trans == 'N') {
|
||||
// recursion(A, B_TL, C_L, D, E_TL, F_L)
|
||||
RELAPACK_ztgsyl_rec(trans, ifunc, m, &n1, A, ldA, B_TL, ldB, C_L, ldC, D, ldD, E_TL, ldE, F_L, ldF, scale1, dsum, dscale, info1);
|
||||
// C_R = C_R + F_L * B_TR
|
||||
BLAS(zgemm)("N", "N", m, &n2, &n1, ONE, F_L, ldF, B_TR, ldB, scale1, C_R, ldC);
|
||||
// F_R = F_R + F_L * E_TR
|
||||
BLAS(zgemm)("N", "N", m, &n2, &n1, ONE, F_L, ldF, E_TR, ldE, scale1, F_R, ldF);
|
||||
// recursion(A, B_BR, C_R, D, E_BR, F_R)
|
||||
RELAPACK_ztgsyl_rec(trans, ifunc, m, &n2, A, ldA, B_BR, ldB, C_R, ldC, D, ldD, E_BR, ldE, F_R, ldF, scale2, dsum, dscale, info2);
|
||||
// apply scale
|
||||
if (scale2[0] != 1) {
|
||||
LAPACK(zlascl)("G", iONE, iONE, ONE, scale2, m, &n1, C_L, ldC, info);
|
||||
LAPACK(zlascl)("G", iONE, iONE, ONE, scale2, m, &n1, F_L, ldF, info);
|
||||
}
|
||||
} else {
|
||||
// recursion(A, B_BR, C_R, D, E_BR, F_R)
|
||||
RELAPACK_ztgsyl_rec(trans, ifunc, m, &n2, A, ldA, B_BR, ldB, C_R, ldC, D, ldD, E_BR, ldE, F_R, ldF, scale1, dsum, dscale, info1);
|
||||
// apply scale
|
||||
if (scale1[0] != 1)
|
||||
LAPACK(zlascl)("G", iONE, iONE, ONE, scale1, m, &n1, C_L, ldC, info);
|
||||
// F_L = F_L + C_R * B_TR
|
||||
BLAS(zgemm)("N", "C", m, &n1, &n2, ONE, C_R, ldC, B_TR, ldB, scale1, F_L, ldF);
|
||||
// F_L = F_L + F_R * E_TR
|
||||
BLAS(zgemm)("N", "C", m, &n1, &n2, ONE, F_R, ldF, E_TR, ldB, ONE, F_L, ldF);
|
||||
// recursion(A, B_TL, C_L, D, E_TL, F_L)
|
||||
RELAPACK_ztgsyl_rec(trans, ifunc, m, &n1, A, ldA, B_TL, ldB, C_L, ldC, D, ldD, E_TL, ldE, F_L, ldF, scale2, dsum, dscale, info2);
|
||||
// apply scale
|
||||
if (scale2[0] != 1) {
|
||||
LAPACK(zlascl)("G", iONE, iONE, ONE, scale2, m, &n2, C_R, ldC, info);
|
||||
LAPACK(zlascl)("G", iONE, iONE, ONE, scale2, m, &n2, F_R, ldF, info);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
*scale = scale1[0] * scale2[0];
|
||||
*info = info1[0] || info2[0];
|
||||
}
|
|
@ -0,0 +1,163 @@
|
|||
#include "relapack.h"
|
||||
|
||||
static void RELAPACK_ztrsyl_rec(const char *, const char *, const int *,
|
||||
const int *, const int *, const double *, const int *, const double *,
|
||||
const int *, double *, const int *, double *, int *);
|
||||
|
||||
|
||||
/** ZTRSYL solves the complex Sylvester matrix equation.
|
||||
*
|
||||
* This routine is functionally equivalent to LAPACK's ztrsyl.
|
||||
* For details on its interface, see
|
||||
* http://www.netlib.org/lapack/explore-html/d1/d36/ztrsyl_8f.html
|
||||
* */
|
||||
void RELAPACK_ztrsyl(
|
||||
const char *tranA, const char *tranB, const int *isgn,
|
||||
const int *m, const int *n,
|
||||
const double *A, const int *ldA, const double *B, const int *ldB,
|
||||
double *C, const int *ldC, double *scale,
|
||||
int *info
|
||||
) {
|
||||
|
||||
// Check arguments
|
||||
const int notransA = LAPACK(lsame)(tranA, "N");
|
||||
const int ctransA = LAPACK(lsame)(tranA, "C");
|
||||
const int notransB = LAPACK(lsame)(tranB, "N");
|
||||
const int ctransB = LAPACK(lsame)(tranB, "C");
|
||||
*info = 0;
|
||||
if (!ctransA && !notransA)
|
||||
*info = -1;
|
||||
else if (!ctransB && !notransB)
|
||||
*info = -2;
|
||||
else if (*isgn != 1 && *isgn != -1)
|
||||
*info = -3;
|
||||
else if (*m < 0)
|
||||
*info = -4;
|
||||
else if (*n < 0)
|
||||
*info = -5;
|
||||
else if (*ldA < MAX(1, *m))
|
||||
*info = -7;
|
||||
else if (*ldB < MAX(1, *n))
|
||||
*info = -9;
|
||||
else if (*ldC < MAX(1, *m))
|
||||
*info = -11;
|
||||
if (*info) {
|
||||
const int minfo = -*info;
|
||||
LAPACK(xerbla)("ZTRSYL", &minfo);
|
||||
return;
|
||||
}
|
||||
|
||||
// Clean char * arguments
|
||||
const char cleantranA = notransA ? 'N' : 'C';
|
||||
const char cleantranB = notransB ? 'N' : 'C';
|
||||
|
||||
// Recursive kernel
|
||||
RELAPACK_ztrsyl_rec(&cleantranA, &cleantranB, isgn, m, n, A, ldA, B, ldB, C, ldC, scale, info);
|
||||
}
|
||||
|
||||
|
||||
/** ztrsyl's recursive compute kernel */
|
||||
static void RELAPACK_ztrsyl_rec(
|
||||
const char *tranA, const char *tranB, const int *isgn,
|
||||
const int *m, const int *n,
|
||||
const double *A, const int *ldA, const double *B, const int *ldB,
|
||||
double *C, const int *ldC, double *scale,
|
||||
int *info
|
||||
) {
|
||||
|
||||
if (*m <= MAX(CROSSOVER_ZTRSYL, 1) && *n <= MAX(CROSSOVER_ZTRSYL, 1)) {
|
||||
// Unblocked
|
||||
RELAPACK_ztrsyl_rec2(tranA, tranB, isgn, m, n, A, ldA, B, ldB, C, ldC, scale, info);
|
||||
return;
|
||||
}
|
||||
|
||||
// Constants
|
||||
const double ONE[] = { 1., 0. };
|
||||
const double MONE[] = { -1., 0. };
|
||||
const double MSGN[] = { -*isgn, 0. };
|
||||
const int iONE[] = { 1 };
|
||||
|
||||
// Outputs
|
||||
double scale1[] = { 1., 0. };
|
||||
double scale2[] = { 1., 0. };
|
||||
int info1[] = { 0 };
|
||||
int info2[] = { 0 };
|
||||
|
||||
if (*m > *n) {
|
||||
// Splitting
|
||||
const int m1 = ZREC_SPLIT(*m);
|
||||
const int m2 = *m - m1;
|
||||
|
||||
// A_TL A_TR
|
||||
// 0 A_BR
|
||||
const double *const A_TL = A;
|
||||
const double *const A_TR = A + 2 * *ldA * m1;
|
||||
const double *const A_BR = A + 2 * *ldA * m1 + 2 * m1;
|
||||
|
||||
// C_T
|
||||
// C_B
|
||||
double *const C_T = C;
|
||||
double *const C_B = C + 2 * m1;
|
||||
|
||||
if (*tranA == 'N') {
|
||||
// recusion(A_BR, B, C_B)
|
||||
RELAPACK_ztrsyl_rec(tranA, tranB, isgn, &m2, n, A_BR, ldA, B, ldB, C_B, ldC, scale1, info1);
|
||||
// C_T = C_T - A_TR * C_B
|
||||
BLAS(zgemm)("N", "N", &m1, n, &m2, MONE, A_TR, ldA, C_B, ldC, scale1, C_T, ldC);
|
||||
// recusion(A_TL, B, C_T)
|
||||
RELAPACK_ztrsyl_rec(tranA, tranB, isgn, &m1, n, A_TL, ldA, B, ldB, C_T, ldC, scale2, info2);
|
||||
// apply scale
|
||||
if (scale2[0] != 1)
|
||||
LAPACK(zlascl)("G", iONE, iONE, ONE, scale2, &m2, n, C_B, ldC, info);
|
||||
} else {
|
||||
// recusion(A_TL, B, C_T)
|
||||
RELAPACK_ztrsyl_rec(tranA, tranB, isgn, &m1, n, A_TL, ldA, B, ldB, C_T, ldC, scale1, info1);
|
||||
// C_B = C_B - A_TR' * C_T
|
||||
BLAS(zgemm)("C", "N", &m2, n, &m1, MONE, A_TR, ldA, C_T, ldC, scale1, C_B, ldC);
|
||||
// recusion(A_BR, B, C_B)
|
||||
RELAPACK_ztrsyl_rec(tranA, tranB, isgn, &m2, n, A_BR, ldA, B, ldB, C_B, ldC, scale2, info2);
|
||||
// apply scale
|
||||
if (scale2[0] != 1)
|
||||
LAPACK(zlascl)("G", iONE, iONE, ONE, scale2, &m1, n, C_B, ldC, info);
|
||||
}
|
||||
} else {
|
||||
// Splitting
|
||||
const int n1 = ZREC_SPLIT(*n);
|
||||
const int n2 = *n - n1;
|
||||
|
||||
// B_TL B_TR
|
||||
// 0 B_BR
|
||||
const double *const B_TL = B;
|
||||
const double *const B_TR = B + 2 * *ldB * n1;
|
||||
const double *const B_BR = B + 2 * *ldB * n1 + 2 * n1;
|
||||
|
||||
// C_L C_R
|
||||
double *const C_L = C;
|
||||
double *const C_R = C + 2 * *ldC * n1;
|
||||
|
||||
if (*tranB == 'N') {
|
||||
// recusion(A, B_TL, C_L)
|
||||
RELAPACK_ztrsyl_rec(tranA, tranB, isgn, m, &n1, A, ldA, B_TL, ldB, C_L, ldC, scale1, info1);
|
||||
// C_R = C_R -/+ C_L * B_TR
|
||||
BLAS(zgemm)("N", "N", m, &n2, &n1, MSGN, C_L, ldC, B_TR, ldB, scale1, C_R, ldC);
|
||||
// recusion(A, B_BR, C_R)
|
||||
RELAPACK_ztrsyl_rec(tranA, tranB, isgn, m, &n2, A, ldA, B_BR, ldB, C_R, ldC, scale2, info2);
|
||||
// apply scale
|
||||
if (scale2[0] != 1)
|
||||
LAPACK(zlascl)("G", iONE, iONE, ONE, scale2, m, &n1, C_L, ldC, info);
|
||||
} else {
|
||||
// recusion(A, B_BR, C_R)
|
||||
RELAPACK_ztrsyl_rec(tranA, tranB, isgn, m, &n2, A, ldA, B_BR, ldB, C_R, ldC, scale1, info1);
|
||||
// C_L = C_L -/+ C_R * B_TR'
|
||||
BLAS(zgemm)("N", "C", m, &n1, &n2, MSGN, C_R, ldC, B_TR, ldB, scale1, C_L, ldC);
|
||||
// recusion(A, B_TL, C_L)
|
||||
RELAPACK_ztrsyl_rec(tranA, tranB, isgn, m, &n1, A, ldA, B_TL, ldB, C_L, ldC, scale2, info2);
|
||||
// apply scale
|
||||
if (scale2[0] != 1)
|
||||
LAPACK(zlascl)("G", iONE, iONE, ONE, scale2, m, &n2, C_R, ldC, info);
|
||||
}
|
||||
}
|
||||
|
||||
*scale = scale1[0] * scale2[0];
|
||||
*info = info1[0] || info2[0];
|
||||
}
|
|
@ -0,0 +1,394 @@
|
|||
/* -- translated by f2c (version 20100827).
|
||||
You must link the resulting object file with libf2c:
|
||||
on Microsoft Windows system, link with libf2c.lib;
|
||||
on Linux or Unix systems, link with .../path/to/libf2c.a -lm
|
||||
or, if you install libf2c.a in a standard place, with -lf2c -lm
|
||||
-- in that order, at the end of the command line, as in
|
||||
cc *.o -lf2c -lm
|
||||
Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
|
||||
|
||||
http://www.netlib.org/f2c/libf2c.zip
|
||||
*/
|
||||
|
||||
#include "../config.h"
|
||||
#include "f2c.h"
|
||||
|
||||
#if BLAS_COMPLEX_FUNCTIONS_AS_ROUTINES
|
||||
doublecomplex zdotu_fun(int *n, doublecomplex *x, int *incx, doublecomplex *y, int *incy) {
|
||||
extern void zdotu_(doublecomplex *, int *, doublecomplex *, int *, doublecomplex *, int *);
|
||||
doublecomplex result;
|
||||
zdotu_(&result, n, x, incx, y, incy);
|
||||
return result;
|
||||
}
|
||||
#define zdotu_ zdotu_fun
|
||||
|
||||
doublecomplex zdotc_fun(int *n, doublecomplex *x, int *incx, doublecomplex *y, int *incy) {
|
||||
extern void zdotc_(doublecomplex *, int *, doublecomplex *, int *, doublecomplex *, int *);
|
||||
doublecomplex result;
|
||||
zdotc_(&result, n, x, incx, y, incy);
|
||||
return result;
|
||||
}
|
||||
#define zdotc_ zdotc_fun
|
||||
#endif
|
||||
|
||||
#if LAPACK_BLAS_COMPLEX_FUNCTIONS_AS_ROUTINES
|
||||
doublecomplex zladiv_fun(doublecomplex *a, doublecomplex *b) {
|
||||
extern void zladiv_(doublecomplex *, doublecomplex *, doublecomplex *);
|
||||
doublecomplex result;
|
||||
zladiv_(&result, a, b);
|
||||
return result;
|
||||
}
|
||||
#define zladiv_ zladiv_fun
|
||||
#endif
|
||||
|
||||
/* Table of constant values */
|
||||
|
||||
static int c__1 = 1;
|
||||
|
||||
/** RELAPACK_ZTRSYL_REC2 solves the complex Sylvester matrix equation (unblocked algorithm)
|
||||
*
|
||||
* This routine is an exact copy of LAPACK's ztrsyl.
|
||||
* It serves as an unblocked kernel in the recursive algorithms.
|
||||
* */
|
||||
/* Subroutine */ void RELAPACK_ztrsyl_rec2(char *trana, char *tranb, int
|
||||
*isgn, int *m, int *n, doublecomplex *a, int *lda,
|
||||
doublecomplex *b, int *ldb, doublecomplex *c__, int *ldc,
|
||||
double *scale, int *info, ftnlen trana_len, ftnlen tranb_len)
|
||||
{
|
||||
/* System generated locals */
|
||||
int a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2,
|
||||
i__3, i__4;
|
||||
double d__1, d__2;
|
||||
doublecomplex z__1, z__2, z__3, z__4;
|
||||
|
||||
/* Builtin functions */
|
||||
double d_imag(doublecomplex *);
|
||||
void d_cnjg(doublecomplex *, doublecomplex *);
|
||||
|
||||
/* Local variables */
|
||||
static int j, k, l;
|
||||
static doublecomplex a11;
|
||||
static double db;
|
||||
static doublecomplex x11;
|
||||
static double da11;
|
||||
static doublecomplex vec;
|
||||
static double dum[1], eps, sgn, smin;
|
||||
static doublecomplex suml, sumr;
|
||||
extern int lsame_(char *, char *, ftnlen, ftnlen);
|
||||
/* Double Complex */ doublecomplex zdotc_(int *,
|
||||
doublecomplex *, int *, doublecomplex *, int *), zdotu_(
|
||||
int *, doublecomplex *, int *,
|
||||
doublecomplex *, int *);
|
||||
extern /* Subroutine */ int dlabad_(double *, double *);
|
||||
extern double dlamch_(char *, ftnlen);
|
||||
static double scaloc;
|
||||
extern /* Subroutine */ int xerbla_(char *, int *, ftnlen);
|
||||
extern double zlange_(char *, int *, int *, doublecomplex *,
|
||||
int *, double *, ftnlen);
|
||||
static double bignum;
|
||||
extern /* Subroutine */ int zdscal_(int *, double *,
|
||||
doublecomplex *, int *);
|
||||
/* Double Complex */ doublecomplex zladiv_(doublecomplex *,
|
||||
doublecomplex *);
|
||||
static int notrna, notrnb;
|
||||
static double smlnum;
|
||||
|
||||
/* Parameter adjustments */
|
||||
a_dim1 = *lda;
|
||||
a_offset = 1 + a_dim1;
|
||||
a -= a_offset;
|
||||
b_dim1 = *ldb;
|
||||
b_offset = 1 + b_dim1;
|
||||
b -= b_offset;
|
||||
c_dim1 = *ldc;
|
||||
c_offset = 1 + c_dim1;
|
||||
c__ -= c_offset;
|
||||
|
||||
/* Function Body */
|
||||
notrna = lsame_(trana, "N", (ftnlen)1, (ftnlen)1);
|
||||
notrnb = lsame_(tranb, "N", (ftnlen)1, (ftnlen)1);
|
||||
*info = 0;
|
||||
if (! notrna && ! lsame_(trana, "C", (ftnlen)1, (ftnlen)1)) {
|
||||
*info = -1;
|
||||
} else if (! notrnb && ! lsame_(tranb, "C", (ftnlen)1, (ftnlen)1)) {
|
||||
*info = -2;
|
||||
} else if (*isgn != 1 && *isgn != -1) {
|
||||
*info = -3;
|
||||
} else if (*m < 0) {
|
||||
*info = -4;
|
||||
} else if (*n < 0) {
|
||||
*info = -5;
|
||||
} else if (*lda < max(1,*m)) {
|
||||
*info = -7;
|
||||
} else if (*ldb < max(1,*n)) {
|
||||
*info = -9;
|
||||
} else if (*ldc < max(1,*m)) {
|
||||
*info = -11;
|
||||
}
|
||||
if (*info != 0) {
|
||||
i__1 = -(*info);
|
||||
xerbla_("ZTRSY2", &i__1, (ftnlen)6);
|
||||
return;
|
||||
}
|
||||
*scale = 1.;
|
||||
if (*m == 0 || *n == 0) {
|
||||
return;
|
||||
}
|
||||
eps = dlamch_("P", (ftnlen)1);
|
||||
smlnum = dlamch_("S", (ftnlen)1);
|
||||
bignum = 1. / smlnum;
|
||||
dlabad_(&smlnum, &bignum);
|
||||
smlnum = smlnum * (double) (*m * *n) / eps;
|
||||
bignum = 1. / smlnum;
|
||||
/* Computing MAX */
|
||||
d__1 = smlnum, d__2 = eps * zlange_("M", m, m, &a[a_offset], lda, dum, (
|
||||
ftnlen)1), d__1 = max(d__1,d__2), d__2 = eps * zlange_("M", n, n,
|
||||
&b[b_offset], ldb, dum, (ftnlen)1);
|
||||
smin = max(d__1,d__2);
|
||||
sgn = (double) (*isgn);
|
||||
if (notrna && notrnb) {
|
||||
i__1 = *n;
|
||||
for (l = 1; l <= i__1; ++l) {
|
||||
for (k = *m; k >= 1; --k) {
|
||||
i__2 = *m - k;
|
||||
/* Computing MIN */
|
||||
i__3 = k + 1;
|
||||
/* Computing MIN */
|
||||
i__4 = k + 1;
|
||||
z__1 = zdotu_(&i__2, &a[k + min(i__3,*m) * a_dim1], lda, &c__[
|
||||
min(i__4,*m) + l * c_dim1], &c__1);
|
||||
suml.r = z__1.r, suml.i = z__1.i;
|
||||
i__2 = l - 1;
|
||||
z__1 = zdotu_(&i__2, &c__[k + c_dim1], ldc, &b[l * b_dim1 + 1]
|
||||
, &c__1);
|
||||
sumr.r = z__1.r, sumr.i = z__1.i;
|
||||
i__2 = k + l * c_dim1;
|
||||
z__3.r = sgn * sumr.r, z__3.i = sgn * sumr.i;
|
||||
z__2.r = suml.r + z__3.r, z__2.i = suml.i + z__3.i;
|
||||
z__1.r = c__[i__2].r - z__2.r, z__1.i = c__[i__2].i - z__2.i;
|
||||
vec.r = z__1.r, vec.i = z__1.i;
|
||||
scaloc = 1.;
|
||||
i__2 = k + k * a_dim1;
|
||||
i__3 = l + l * b_dim1;
|
||||
z__2.r = sgn * b[i__3].r, z__2.i = sgn * b[i__3].i;
|
||||
z__1.r = a[i__2].r + z__2.r, z__1.i = a[i__2].i + z__2.i;
|
||||
a11.r = z__1.r, a11.i = z__1.i;
|
||||
da11 = (d__1 = a11.r, abs(d__1)) + (d__2 = d_imag(&a11), abs(
|
||||
d__2));
|
||||
if (da11 <= smin) {
|
||||
a11.r = smin, a11.i = 0.;
|
||||
da11 = smin;
|
||||
*info = 1;
|
||||
}
|
||||
db = (d__1 = vec.r, abs(d__1)) + (d__2 = d_imag(&vec), abs(
|
||||
d__2));
|
||||
if (da11 < 1. && db > 1.) {
|
||||
if (db > bignum * da11) {
|
||||
scaloc = 1. / db;
|
||||
}
|
||||
}
|
||||
z__3.r = scaloc, z__3.i = 0.;
|
||||
z__2.r = vec.r * z__3.r - vec.i * z__3.i, z__2.i = vec.r *
|
||||
z__3.i + vec.i * z__3.r;
|
||||
z__1 = zladiv_(&z__2, &a11);
|
||||
x11.r = z__1.r, x11.i = z__1.i;
|
||||
if (scaloc != 1.) {
|
||||
i__2 = *n;
|
||||
for (j = 1; j <= i__2; ++j) {
|
||||
zdscal_(m, &scaloc, &c__[j * c_dim1 + 1], &c__1);
|
||||
/* L10: */
|
||||
}
|
||||
*scale *= scaloc;
|
||||
}
|
||||
i__2 = k + l * c_dim1;
|
||||
c__[i__2].r = x11.r, c__[i__2].i = x11.i;
|
||||
/* L20: */
|
||||
}
|
||||
/* L30: */
|
||||
}
|
||||
} else if (! notrna && notrnb) {
|
||||
i__1 = *n;
|
||||
for (l = 1; l <= i__1; ++l) {
|
||||
i__2 = *m;
|
||||
for (k = 1; k <= i__2; ++k) {
|
||||
i__3 = k - 1;
|
||||
z__1 = zdotc_(&i__3, &a[k * a_dim1 + 1], &c__1, &c__[l *
|
||||
c_dim1 + 1], &c__1);
|
||||
suml.r = z__1.r, suml.i = z__1.i;
|
||||
i__3 = l - 1;
|
||||
z__1 = zdotu_(&i__3, &c__[k + c_dim1], ldc, &b[l * b_dim1 + 1]
|
||||
, &c__1);
|
||||
sumr.r = z__1.r, sumr.i = z__1.i;
|
||||
i__3 = k + l * c_dim1;
|
||||
z__3.r = sgn * sumr.r, z__3.i = sgn * sumr.i;
|
||||
z__2.r = suml.r + z__3.r, z__2.i = suml.i + z__3.i;
|
||||
z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
|
||||
vec.r = z__1.r, vec.i = z__1.i;
|
||||
scaloc = 1.;
|
||||
d_cnjg(&z__2, &a[k + k * a_dim1]);
|
||||
i__3 = l + l * b_dim1;
|
||||
z__3.r = sgn * b[i__3].r, z__3.i = sgn * b[i__3].i;
|
||||
z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i;
|
||||
a11.r = z__1.r, a11.i = z__1.i;
|
||||
da11 = (d__1 = a11.r, abs(d__1)) + (d__2 = d_imag(&a11), abs(
|
||||
d__2));
|
||||
if (da11 <= smin) {
|
||||
a11.r = smin, a11.i = 0.;
|
||||
da11 = smin;
|
||||
*info = 1;
|
||||
}
|
||||
db = (d__1 = vec.r, abs(d__1)) + (d__2 = d_imag(&vec), abs(
|
||||
d__2));
|
||||
if (da11 < 1. && db > 1.) {
|
||||
if (db > bignum * da11) {
|
||||
scaloc = 1. / db;
|
||||
}
|
||||
}
|
||||
z__3.r = scaloc, z__3.i = 0.;
|
||||
z__2.r = vec.r * z__3.r - vec.i * z__3.i, z__2.i = vec.r *
|
||||
z__3.i + vec.i * z__3.r;
|
||||
z__1 = zladiv_(&z__2, &a11);
|
||||
x11.r = z__1.r, x11.i = z__1.i;
|
||||
if (scaloc != 1.) {
|
||||
i__3 = *n;
|
||||
for (j = 1; j <= i__3; ++j) {
|
||||
zdscal_(m, &scaloc, &c__[j * c_dim1 + 1], &c__1);
|
||||
/* L40: */
|
||||
}
|
||||
*scale *= scaloc;
|
||||
}
|
||||
i__3 = k + l * c_dim1;
|
||||
c__[i__3].r = x11.r, c__[i__3].i = x11.i;
|
||||
/* L50: */
|
||||
}
|
||||
/* L60: */
|
||||
}
|
||||
} else if (! notrna && ! notrnb) {
|
||||
for (l = *n; l >= 1; --l) {
|
||||
i__1 = *m;
|
||||
for (k = 1; k <= i__1; ++k) {
|
||||
i__2 = k - 1;
|
||||
z__1 = zdotc_(&i__2, &a[k * a_dim1 + 1], &c__1, &c__[l *
|
||||
c_dim1 + 1], &c__1);
|
||||
suml.r = z__1.r, suml.i = z__1.i;
|
||||
i__2 = *n - l;
|
||||
/* Computing MIN */
|
||||
i__3 = l + 1;
|
||||
/* Computing MIN */
|
||||
i__4 = l + 1;
|
||||
z__1 = zdotc_(&i__2, &c__[k + min(i__3,*n) * c_dim1], ldc, &b[
|
||||
l + min(i__4,*n) * b_dim1], ldb);
|
||||
sumr.r = z__1.r, sumr.i = z__1.i;
|
||||
i__2 = k + l * c_dim1;
|
||||
d_cnjg(&z__4, &sumr);
|
||||
z__3.r = sgn * z__4.r, z__3.i = sgn * z__4.i;
|
||||
z__2.r = suml.r + z__3.r, z__2.i = suml.i + z__3.i;
|
||||
z__1.r = c__[i__2].r - z__2.r, z__1.i = c__[i__2].i - z__2.i;
|
||||
vec.r = z__1.r, vec.i = z__1.i;
|
||||
scaloc = 1.;
|
||||
i__2 = k + k * a_dim1;
|
||||
i__3 = l + l * b_dim1;
|
||||
z__3.r = sgn * b[i__3].r, z__3.i = sgn * b[i__3].i;
|
||||
z__2.r = a[i__2].r + z__3.r, z__2.i = a[i__2].i + z__3.i;
|
||||
d_cnjg(&z__1, &z__2);
|
||||
a11.r = z__1.r, a11.i = z__1.i;
|
||||
da11 = (d__1 = a11.r, abs(d__1)) + (d__2 = d_imag(&a11), abs(
|
||||
d__2));
|
||||
if (da11 <= smin) {
|
||||
a11.r = smin, a11.i = 0.;
|
||||
da11 = smin;
|
||||
*info = 1;
|
||||
}
|
||||
db = (d__1 = vec.r, abs(d__1)) + (d__2 = d_imag(&vec), abs(
|
||||
d__2));
|
||||
if (da11 < 1. && db > 1.) {
|
||||
if (db > bignum * da11) {
|
||||
scaloc = 1. / db;
|
||||
}
|
||||
}
|
||||
z__3.r = scaloc, z__3.i = 0.;
|
||||
z__2.r = vec.r * z__3.r - vec.i * z__3.i, z__2.i = vec.r *
|
||||
z__3.i + vec.i * z__3.r;
|
||||
z__1 = zladiv_(&z__2, &a11);
|
||||
x11.r = z__1.r, x11.i = z__1.i;
|
||||
if (scaloc != 1.) {
|
||||
i__2 = *n;
|
||||
for (j = 1; j <= i__2; ++j) {
|
||||
zdscal_(m, &scaloc, &c__[j * c_dim1 + 1], &c__1);
|
||||
/* L70: */
|
||||
}
|
||||
*scale *= scaloc;
|
||||
}
|
||||
i__2 = k + l * c_dim1;
|
||||
c__[i__2].r = x11.r, c__[i__2].i = x11.i;
|
||||
/* L80: */
|
||||
}
|
||||
/* L90: */
|
||||
}
|
||||
} else if (notrna && ! notrnb) {
|
||||
for (l = *n; l >= 1; --l) {
|
||||
for (k = *m; k >= 1; --k) {
|
||||
i__1 = *m - k;
|
||||
/* Computing MIN */
|
||||
i__2 = k + 1;
|
||||
/* Computing MIN */
|
||||
i__3 = k + 1;
|
||||
z__1 = zdotu_(&i__1, &a[k + min(i__2,*m) * a_dim1], lda, &c__[
|
||||
min(i__3,*m) + l * c_dim1], &c__1);
|
||||
suml.r = z__1.r, suml.i = z__1.i;
|
||||
i__1 = *n - l;
|
||||
/* Computing MIN */
|
||||
i__2 = l + 1;
|
||||
/* Computing MIN */
|
||||
i__3 = l + 1;
|
||||
z__1 = zdotc_(&i__1, &c__[k + min(i__2,*n) * c_dim1], ldc, &b[
|
||||
l + min(i__3,*n) * b_dim1], ldb);
|
||||
sumr.r = z__1.r, sumr.i = z__1.i;
|
||||
i__1 = k + l * c_dim1;
|
||||
d_cnjg(&z__4, &sumr);
|
||||
z__3.r = sgn * z__4.r, z__3.i = sgn * z__4.i;
|
||||
z__2.r = suml.r + z__3.r, z__2.i = suml.i + z__3.i;
|
||||
z__1.r = c__[i__1].r - z__2.r, z__1.i = c__[i__1].i - z__2.i;
|
||||
vec.r = z__1.r, vec.i = z__1.i;
|
||||
scaloc = 1.;
|
||||
i__1 = k + k * a_dim1;
|
||||
d_cnjg(&z__3, &b[l + l * b_dim1]);
|
||||
z__2.r = sgn * z__3.r, z__2.i = sgn * z__3.i;
|
||||
z__1.r = a[i__1].r + z__2.r, z__1.i = a[i__1].i + z__2.i;
|
||||
a11.r = z__1.r, a11.i = z__1.i;
|
||||
da11 = (d__1 = a11.r, abs(d__1)) + (d__2 = d_imag(&a11), abs(
|
||||
d__2));
|
||||
if (da11 <= smin) {
|
||||
a11.r = smin, a11.i = 0.;
|
||||
da11 = smin;
|
||||
*info = 1;
|
||||
}
|
||||
db = (d__1 = vec.r, abs(d__1)) + (d__2 = d_imag(&vec), abs(
|
||||
d__2));
|
||||
if (da11 < 1. && db > 1.) {
|
||||
if (db > bignum * da11) {
|
||||
scaloc = 1. / db;
|
||||
}
|
||||
}
|
||||
z__3.r = scaloc, z__3.i = 0.;
|
||||
z__2.r = vec.r * z__3.r - vec.i * z__3.i, z__2.i = vec.r *
|
||||
z__3.i + vec.i * z__3.r;
|
||||
z__1 = zladiv_(&z__2, &a11);
|
||||
x11.r = z__1.r, x11.i = z__1.i;
|
||||
if (scaloc != 1.) {
|
||||
i__1 = *n;
|
||||
for (j = 1; j <= i__1; ++j) {
|
||||
zdscal_(m, &scaloc, &c__[j * c_dim1 + 1], &c__1);
|
||||
/* L100: */
|
||||
}
|
||||
*scale *= scaloc;
|
||||
}
|
||||
i__1 = k + l * c_dim1;
|
||||
c__[i__1].r = x11.r, c__[i__1].i = x11.i;
|
||||
/* L110: */
|
||||
}
|
||||
/* L120: */
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
|
@ -0,0 +1,107 @@
|
|||
#include "relapack.h"
|
||||
|
||||
static void RELAPACK_ztrtri_rec(const char *, const char *, const int *,
|
||||
double *, const int *, int *);
|
||||
|
||||
|
||||
/** CTRTRI computes the inverse of a complex upper or lower triangular matrix A.
|
||||
*
|
||||
* This routine is functionally equivalent to LAPACK's ztrtri.
|
||||
* For details on its interface, see
|
||||
* http://www.netlib.org/lapack/explore-html/d1/d0e/ztrtri_8f.html
|
||||
* */
|
||||
void RELAPACK_ztrtri(
|
||||
const char *uplo, const char *diag, const int *n,
|
||||
double *A, const int *ldA,
|
||||
int *info
|
||||
) {
|
||||
|
||||
// Check arguments
|
||||
const int lower = LAPACK(lsame)(uplo, "L");
|
||||
const int upper = LAPACK(lsame)(uplo, "U");
|
||||
const int nounit = LAPACK(lsame)(diag, "N");
|
||||
const int unit = LAPACK(lsame)(diag, "U");
|
||||
*info = 0;
|
||||
if (!lower && !upper)
|
||||
*info = -1;
|
||||
else if (!nounit && !unit)
|
||||
*info = -2;
|
||||
else if (*n < 0)
|
||||
*info = -3;
|
||||
else if (*ldA < MAX(1, *n))
|
||||
*info = -5;
|
||||
if (*info) {
|
||||
const int minfo = -*info;
|
||||
LAPACK(xerbla)("ZTRTRI", &minfo);
|
||||
return;
|
||||
}
|
||||
|
||||
// Clean char * arguments
|
||||
const char cleanuplo = lower ? 'L' : 'U';
|
||||
const char cleandiag = nounit ? 'N' : 'U';
|
||||
|
||||
// check for singularity
|
||||
if (nounit) {
|
||||
int i;
|
||||
for (i = 0; i < *n; i++)
|
||||
if (A[2 * (i + *ldA * i)] == 0 && A[2 * (i + *ldA * i) + 1] == 0) {
|
||||
*info = i;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// Recursive kernel
|
||||
RELAPACK_ztrtri_rec(&cleanuplo, &cleandiag, n, A, ldA, info);
|
||||
}
|
||||
|
||||
|
||||
/** ztrtri's recursive compute kernel */
|
||||
static void RELAPACK_ztrtri_rec(
|
||||
const char *uplo, const char *diag, const int *n,
|
||||
double *A, const int *ldA,
|
||||
int *info
|
||||
){
|
||||
|
||||
if (*n <= MAX(CROSSOVER_ZTRTRI, 1)) {
|
||||
// Unblocked
|
||||
LAPACK(ztrti2)(uplo, diag, n, A, ldA, info);
|
||||
return;
|
||||
}
|
||||
|
||||
// Constants
|
||||
const double ONE[] = { 1. };
|
||||
const double MONE[] = { -1. };
|
||||
|
||||
// Splitting
|
||||
const int n1 = ZREC_SPLIT(*n);
|
||||
const int n2 = *n - n1;
|
||||
|
||||
// A_TL A_TR
|
||||
// A_BL A_BR
|
||||
double *const A_TL = A;
|
||||
double *const A_TR = A + 2 * *ldA * n1;
|
||||
double *const A_BL = A + 2 * n1;
|
||||
double *const A_BR = A + 2 * *ldA * n1 + 2 * n1;
|
||||
|
||||
// recursion(A_TL)
|
||||
RELAPACK_ztrtri_rec(uplo, diag, &n1, A_TL, ldA, info);
|
||||
if (*info)
|
||||
return;
|
||||
|
||||
if (*uplo == 'L') {
|
||||
// A_BL = - A_BL * A_TL
|
||||
BLAS(ztrmm)("R", "L", "N", diag, &n2, &n1, MONE, A_TL, ldA, A_BL, ldA);
|
||||
// A_BL = A_BR \ A_BL
|
||||
BLAS(ztrsm)("L", "L", "N", diag, &n2, &n1, ONE, A_BR, ldA, A_BL, ldA);
|
||||
} else {
|
||||
// A_TR = - A_TL * A_TR
|
||||
BLAS(ztrmm)("L", "U", "N", diag, &n1, &n2, MONE, A_TL, ldA, A_TR, ldA);
|
||||
// A_TR = A_TR / A_BR
|
||||
BLAS(ztrsm)("R", "U", "N", diag, &n1, &n2, ONE, A_BR, ldA, A_TR, ldA);
|
||||
}
|
||||
|
||||
// recursion(A_BR)
|
||||
RELAPACK_ztrtri_rec(uplo, diag, &n2, A_BR, ldA, info);
|
||||
if (*info)
|
||||
*info += n1;
|
||||
}
|
Loading…
Reference in New Issue