Merge pull request #4087 from martin-frbg/lapack847
Improve variants of Cholesky and QR (Reference-LAPACK PR 847)
This commit is contained in:
commit
eb058c2ae2
|
@ -28,7 +28,7 @@ LULL = lu/LL/cgetrf.o lu/LL/dgetrf.o lu/LL/sgetrf.o lu/LL/zgetrf.o
|
|||
|
||||
LUREC = lu/REC/cgetrf.o lu/REC/dgetrf.o lu/REC/sgetrf.o lu/REC/zgetrf.o
|
||||
|
||||
QRLL = qr/LL/cgeqrf.o qr/LL/dgeqrf.o qr/LL/sgeqrf.o qr/LL/zgeqrf.o qr/LL/sceil.o
|
||||
QRLL = qr/LL/cgeqrf.o qr/LL/dgeqrf.o qr/LL/sgeqrf.o qr/LL/zgeqrf.o
|
||||
|
||||
|
||||
.PHONY: all
|
||||
|
|
|
@ -24,7 +24,7 @@ C> \brief \b CPOTRF VARIANT: right looking block version of the algorithm, calli
|
|||
C>\details \b Purpose:
|
||||
C>\verbatim
|
||||
C>
|
||||
C> CPOTRF computes the Cholesky factorization of a real Hermitian
|
||||
C> CPOTRF computes the Cholesky factorization of a complex Hermitian
|
||||
C> positive definite matrix A.
|
||||
C>
|
||||
C> The factorization has the form
|
||||
|
|
|
@ -24,7 +24,7 @@ C> \brief \b ZPOTRF VARIANT: right looking block version of the algorithm, calli
|
|||
C>\details \b Purpose:
|
||||
C>\verbatim
|
||||
C>
|
||||
C> ZPOTRF computes the Cholesky factorization of a real Hermitian
|
||||
C> ZPOTRF computes the Cholesky factorization of a complex Hermitian
|
||||
C> positive definite matrix A.
|
||||
C>
|
||||
C> The factorization has the form
|
||||
|
|
|
@ -24,7 +24,7 @@ C> \brief \b CPOTRF VARIANT: top-looking block version of the algorithm, calling
|
|||
C>\details \b Purpose:
|
||||
C>\verbatim
|
||||
C>
|
||||
C> CPOTRF computes the Cholesky factorization of a real symmetric
|
||||
C> CPOTRF computes the Cholesky factorization of a complex Hermitian
|
||||
C> positive definite matrix A.
|
||||
C>
|
||||
C> The factorization has the form
|
||||
|
@ -55,7 +55,7 @@ C>
|
|||
C> \param[in,out] A
|
||||
C> \verbatim
|
||||
C> A is COMPLEX array, dimension (LDA,N)
|
||||
C> On entry, the symmetric matrix A. If UPLO = 'U', the leading
|
||||
C> On entry, the Hermitian matrix A. If UPLO = 'U', the leading
|
||||
C> N-by-N upper triangular part of A contains the upper
|
||||
C> triangular part of the matrix A, and the strictly lower
|
||||
C> triangular part of A is not referenced. If UPLO = 'L', the
|
||||
|
|
|
@ -24,7 +24,7 @@ C> \brief \b ZPOTRF VARIANT: top-looking block version of the algorithm, calling
|
|||
C>\details \b Purpose:
|
||||
C>\verbatim
|
||||
C>
|
||||
C> ZPOTRF computes the Cholesky factorization of a real symmetric
|
||||
C> ZPOTRF computes the Cholesky factorization of a complex Hermitian
|
||||
C> positive definite matrix A.
|
||||
C>
|
||||
C> The factorization has the form
|
||||
|
@ -55,7 +55,7 @@ C>
|
|||
C> \param[in,out] A
|
||||
C> \verbatim
|
||||
C> A is COMPLEX*16 array, dimension (LDA,N)
|
||||
C> On entry, the symmetric matrix A. If UPLO = 'U', the leading
|
||||
C> On entry, the Hermitian matrix A. If UPLO = 'U', the leading
|
||||
C> N-by-N upper triangular part of A contains the upper
|
||||
C> triangular part of the matrix A, and the strictly lower
|
||||
C> triangular part of A is not referenced. If UPLO = 'L', the
|
||||
|
|
|
@ -23,7 +23,7 @@ C> \brief \b CGEQRF VARIANT: left-looking Level 3 BLAS version of the algorithm.
|
|||
C>\details \b Purpose:
|
||||
C>\verbatim
|
||||
C>
|
||||
C> CGEQRF computes a QR factorization of a real M-by-N matrix A:
|
||||
C> CGEQRF computes a QR factorization of a complex M-by-N matrix A:
|
||||
C> A = Q * R.
|
||||
C>
|
||||
C> This is the left-looking Level 3 BLAS version of the algorithm.
|
||||
|
@ -172,12 +172,11 @@ C>
|
|||
EXTERNAL CGEQR2, CLARFB, CLARFT, XERBLA
|
||||
* ..
|
||||
* .. Intrinsic Functions ..
|
||||
INTRINSIC MAX, MIN
|
||||
INTRINSIC CEILING, MAX, MIN, REAL
|
||||
* ..
|
||||
* .. External Functions ..
|
||||
INTEGER ILAENV
|
||||
REAL SCEIL
|
||||
EXTERNAL ILAENV, SCEIL
|
||||
EXTERNAL ILAENV
|
||||
* ..
|
||||
* .. Executable Statements ..
|
||||
|
||||
|
@ -205,13 +204,13 @@ C>
|
|||
*
|
||||
* So here 4 x 4 is the last T stored in the workspace
|
||||
*
|
||||
NT = K-SCEIL(REAL(K-NX)/REAL(NB))*NB
|
||||
NT = K-CEILING(REAL(K-NX)/REAL(NB))*NB
|
||||
|
||||
*
|
||||
* optimal workspace = space for dlarfb + space for normal T's + space for the last T
|
||||
*
|
||||
LLWORK = MAX (MAX((N-M)*K, (N-M)*NB), MAX(K*NB, NB*NB))
|
||||
LLWORK = SCEIL(REAL(LLWORK)/REAL(NB))
|
||||
LLWORK = CEILING(REAL(LLWORK)/REAL(NB))
|
||||
|
||||
IF( K.EQ.0 ) THEN
|
||||
|
||||
|
@ -230,7 +229,7 @@ C>
|
|||
|
||||
ELSE
|
||||
|
||||
LBWORK = SCEIL(REAL(K)/REAL(NB))*NB
|
||||
LBWORK = CEILING(REAL(K)/REAL(NB))*NB
|
||||
LWKOPT = (LBWORK+LLWORK-NB)*NB
|
||||
WORK( 1 ) = LWKOPT
|
||||
|
||||
|
|
|
@ -172,12 +172,11 @@ C>
|
|||
EXTERNAL DGEQR2, DLARFB, DLARFT, XERBLA
|
||||
* ..
|
||||
* .. Intrinsic Functions ..
|
||||
INTRINSIC MAX, MIN
|
||||
INTRINSIC CEILING, MAX, MIN, REAL
|
||||
* ..
|
||||
* .. External Functions ..
|
||||
INTEGER ILAENV
|
||||
REAL SCEIL
|
||||
EXTERNAL ILAENV, SCEIL
|
||||
EXTERNAL ILAENV
|
||||
* ..
|
||||
* .. Executable Statements ..
|
||||
|
||||
|
@ -205,13 +204,13 @@ C>
|
|||
*
|
||||
* So here 4 x 4 is the last T stored in the workspace
|
||||
*
|
||||
NT = K-SCEIL(REAL(K-NX)/REAL(NB))*NB
|
||||
NT = K-CEILING(REAL(K-NX)/REAL(NB))*NB
|
||||
|
||||
*
|
||||
* optimal workspace = space for dlarfb + space for normal T's + space for the last T
|
||||
*
|
||||
LLWORK = MAX (MAX((N-M)*K, (N-M)*NB), MAX(K*NB, NB*NB))
|
||||
LLWORK = SCEIL(REAL(LLWORK)/REAL(NB))
|
||||
LLWORK = CEILING(REAL(LLWORK)/REAL(NB))
|
||||
|
||||
IF( K.EQ.0 ) THEN
|
||||
|
||||
|
@ -230,7 +229,7 @@ C>
|
|||
|
||||
ELSE
|
||||
|
||||
LBWORK = SCEIL(REAL(K)/REAL(NB))*NB
|
||||
LBWORK = CEILING(REAL(K)/REAL(NB))*NB
|
||||
LWKOPT = (LBWORK+LLWORK-NB)*NB
|
||||
WORK( 1 ) = LWKOPT
|
||||
|
||||
|
|
|
@ -1,86 +0,0 @@
|
|||
C> \brief \b SCEIL
|
||||
*
|
||||
* =========== DOCUMENTATION ===========
|
||||
*
|
||||
* Online html documentation available at
|
||||
* http://www.netlib.org/lapack/explore-html/
|
||||
*
|
||||
* Definition:
|
||||
* ===========
|
||||
*
|
||||
* REAL FUNCTION SCEIL( A )
|
||||
*
|
||||
* .. Scalar Arguments ..
|
||||
* REAL A
|
||||
* ..
|
||||
*
|
||||
* =====================================================================
|
||||
*
|
||||
* .. Intrinsic Functions ..
|
||||
* INTRINSIC INT
|
||||
* ..
|
||||
* .. Executable Statements ..*
|
||||
*
|
||||
* IF (A-INT(A).EQ.0) THEN
|
||||
* SCEIL = A
|
||||
* ELSE IF (A.GT.0) THEN
|
||||
* SCEIL = INT(A)+1;
|
||||
* ELSE
|
||||
* SCEIL = INT(A)
|
||||
* END IF
|
||||
*
|
||||
* RETURN
|
||||
*
|
||||
* END
|
||||
* Purpose
|
||||
* =======
|
||||
*
|
||||
C>\details \b Purpose:
|
||||
C>\verbatim
|
||||
C>\endverbatim
|
||||
*
|
||||
* Arguments:
|
||||
* ==========
|
||||
*
|
||||
*
|
||||
* Authors:
|
||||
* ========
|
||||
*
|
||||
C> \author Univ. of Tennessee
|
||||
C> \author Univ. of California Berkeley
|
||||
C> \author Univ. of Colorado Denver
|
||||
C> \author NAG Ltd.
|
||||
*
|
||||
C> \date December 2016
|
||||
*
|
||||
C> \ingroup variantsOTHERcomputational
|
||||
*
|
||||
* =====================================================================
|
||||
REAL FUNCTION SCEIL( A )
|
||||
*
|
||||
* -- LAPACK computational routine --
|
||||
* -- LAPACK is a software package provided by Univ. of Tennessee, --
|
||||
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
|
||||
*
|
||||
* .. Scalar Arguments ..*
|
||||
REAL A
|
||||
* ..
|
||||
*
|
||||
* =====================================================================
|
||||
*
|
||||
* .. Intrinsic Functions ..
|
||||
INTRINSIC INT
|
||||
* ..
|
||||
* .. Executable Statements ..*
|
||||
*
|
||||
IF (A-INT(A).EQ.0) THEN
|
||||
SCEIL = A
|
||||
ELSE IF (A.GT.0) THEN
|
||||
SCEIL = INT(A)+1;
|
||||
ELSE
|
||||
SCEIL = INT(A)
|
||||
END IF
|
||||
|
||||
RETURN
|
||||
*
|
||||
END
|
|
@ -172,12 +172,11 @@ C>
|
|||
EXTERNAL SGEQR2, SLARFB, SLARFT, XERBLA
|
||||
* ..
|
||||
* .. Intrinsic Functions ..
|
||||
INTRINSIC MAX, MIN
|
||||
INTRINSIC CEILING, MAX, MIN, REAL
|
||||
* ..
|
||||
* .. External Functions ..
|
||||
INTEGER ILAENV
|
||||
REAL SCEIL
|
||||
EXTERNAL ILAENV, SCEIL
|
||||
EXTERNAL ILAENV
|
||||
* ..
|
||||
* .. Executable Statements ..
|
||||
|
||||
|
@ -205,13 +204,13 @@ C>
|
|||
*
|
||||
* So here 4 x 4 is the last T stored in the workspace
|
||||
*
|
||||
NT = K-SCEIL(REAL(K-NX)/REAL(NB))*NB
|
||||
NT = K-CEILING(REAL(K-NX)/REAL(NB))*NB
|
||||
|
||||
*
|
||||
* optimal workspace = space for dlarfb + space for normal T's + space for the last T
|
||||
*
|
||||
LLWORK = MAX (MAX((N-M)*K, (N-M)*NB), MAX(K*NB, NB*NB))
|
||||
LLWORK = SCEIL(REAL(LLWORK)/REAL(NB))
|
||||
LLWORK = CEILING(REAL(LLWORK)/REAL(NB))
|
||||
|
||||
IF( K.EQ.0 ) THEN
|
||||
|
||||
|
@ -230,7 +229,7 @@ C>
|
|||
|
||||
ELSE
|
||||
|
||||
LBWORK = SCEIL(REAL(K)/REAL(NB))*NB
|
||||
LBWORK = CEILING(REAL(K)/REAL(NB))*NB
|
||||
LWKOPT = (LBWORK+LLWORK-NB)*NB
|
||||
WORK( 1 ) = LWKOPT
|
||||
|
||||
|
|
|
@ -23,7 +23,7 @@ C> \brief \b ZGEQRF VARIANT: left-looking Level 3 BLAS of the algorithm.
|
|||
C>\details \b Purpose:
|
||||
C>\verbatim
|
||||
C>
|
||||
C> ZGEQRF computes a QR factorization of a real M-by-N matrix A:
|
||||
C> ZGEQRF computes a QR factorization of a complex M-by-N matrix A:
|
||||
C> A = Q * R.
|
||||
C>
|
||||
C> This is the left-looking Level 3 BLAS version of the algorithm.
|
||||
|
@ -172,12 +172,11 @@ C>
|
|||
EXTERNAL ZGEQR2, ZLARFB, ZLARFT, XERBLA
|
||||
* ..
|
||||
* .. Intrinsic Functions ..
|
||||
INTRINSIC MAX, MIN
|
||||
INTRINSIC CEILING, MAX, MIN, REAL
|
||||
* ..
|
||||
* .. External Functions ..
|
||||
INTEGER ILAENV
|
||||
REAL SCEIL
|
||||
EXTERNAL ILAENV, SCEIL
|
||||
EXTERNAL ILAENV
|
||||
* ..
|
||||
* .. Executable Statements ..
|
||||
|
||||
|
@ -205,13 +204,13 @@ C>
|
|||
*
|
||||
* So here 4 x 4 is the last T stored in the workspace
|
||||
*
|
||||
NT = K-SCEIL(REAL(K-NX)/REAL(NB))*NB
|
||||
NT = K-CEILING(REAL(K-NX)/REAL(NB))*NB
|
||||
|
||||
*
|
||||
* optimal workspace = space for dlarfb + space for normal T's + space for the last T
|
||||
*
|
||||
LLWORK = MAX (MAX((N-M)*K, (N-M)*NB), MAX(K*NB, NB*NB))
|
||||
LLWORK = SCEIL(REAL(LLWORK)/REAL(NB))
|
||||
LLWORK = CEILING(REAL(LLWORK)/REAL(NB))
|
||||
|
||||
IF( K.EQ.0 ) THEN
|
||||
|
||||
|
@ -230,7 +229,7 @@ C>
|
|||
|
||||
ELSE
|
||||
|
||||
LBWORK = SCEIL(REAL(K)/REAL(NB))*NB
|
||||
LBWORK = CEILING(REAL(K)/REAL(NB))*NB
|
||||
LWKOPT = (LBWORK+LLWORK-NB)*NB
|
||||
WORK( 1 ) = LWKOPT
|
||||
|
||||
|
|
Loading…
Reference in New Issue