Merge pull request #4087 from martin-frbg/lapack847
Improve variants of Cholesky and QR (Reference-LAPACK PR 847)
This commit is contained in:
commit
eb058c2ae2
|
@ -28,7 +28,7 @@ LULL = lu/LL/cgetrf.o lu/LL/dgetrf.o lu/LL/sgetrf.o lu/LL/zgetrf.o
|
||||||
|
|
||||||
LUREC = lu/REC/cgetrf.o lu/REC/dgetrf.o lu/REC/sgetrf.o lu/REC/zgetrf.o
|
LUREC = lu/REC/cgetrf.o lu/REC/dgetrf.o lu/REC/sgetrf.o lu/REC/zgetrf.o
|
||||||
|
|
||||||
QRLL = qr/LL/cgeqrf.o qr/LL/dgeqrf.o qr/LL/sgeqrf.o qr/LL/zgeqrf.o qr/LL/sceil.o
|
QRLL = qr/LL/cgeqrf.o qr/LL/dgeqrf.o qr/LL/sgeqrf.o qr/LL/zgeqrf.o
|
||||||
|
|
||||||
|
|
||||||
.PHONY: all
|
.PHONY: all
|
||||||
|
|
|
@ -24,7 +24,7 @@ C> \brief \b CPOTRF VARIANT: right looking block version of the algorithm, calli
|
||||||
C>\details \b Purpose:
|
C>\details \b Purpose:
|
||||||
C>\verbatim
|
C>\verbatim
|
||||||
C>
|
C>
|
||||||
C> CPOTRF computes the Cholesky factorization of a real Hermitian
|
C> CPOTRF computes the Cholesky factorization of a complex Hermitian
|
||||||
C> positive definite matrix A.
|
C> positive definite matrix A.
|
||||||
C>
|
C>
|
||||||
C> The factorization has the form
|
C> The factorization has the form
|
||||||
|
|
|
@ -24,7 +24,7 @@ C> \brief \b ZPOTRF VARIANT: right looking block version of the algorithm, calli
|
||||||
C>\details \b Purpose:
|
C>\details \b Purpose:
|
||||||
C>\verbatim
|
C>\verbatim
|
||||||
C>
|
C>
|
||||||
C> ZPOTRF computes the Cholesky factorization of a real Hermitian
|
C> ZPOTRF computes the Cholesky factorization of a complex Hermitian
|
||||||
C> positive definite matrix A.
|
C> positive definite matrix A.
|
||||||
C>
|
C>
|
||||||
C> The factorization has the form
|
C> The factorization has the form
|
||||||
|
|
|
@ -24,7 +24,7 @@ C> \brief \b CPOTRF VARIANT: top-looking block version of the algorithm, calling
|
||||||
C>\details \b Purpose:
|
C>\details \b Purpose:
|
||||||
C>\verbatim
|
C>\verbatim
|
||||||
C>
|
C>
|
||||||
C> CPOTRF computes the Cholesky factorization of a real symmetric
|
C> CPOTRF computes the Cholesky factorization of a complex Hermitian
|
||||||
C> positive definite matrix A.
|
C> positive definite matrix A.
|
||||||
C>
|
C>
|
||||||
C> The factorization has the form
|
C> The factorization has the form
|
||||||
|
@ -55,7 +55,7 @@ C>
|
||||||
C> \param[in,out] A
|
C> \param[in,out] A
|
||||||
C> \verbatim
|
C> \verbatim
|
||||||
C> A is COMPLEX array, dimension (LDA,N)
|
C> A is COMPLEX array, dimension (LDA,N)
|
||||||
C> On entry, the symmetric matrix A. If UPLO = 'U', the leading
|
C> On entry, the Hermitian matrix A. If UPLO = 'U', the leading
|
||||||
C> N-by-N upper triangular part of A contains the upper
|
C> N-by-N upper triangular part of A contains the upper
|
||||||
C> triangular part of the matrix A, and the strictly lower
|
C> triangular part of the matrix A, and the strictly lower
|
||||||
C> triangular part of A is not referenced. If UPLO = 'L', the
|
C> triangular part of A is not referenced. If UPLO = 'L', the
|
||||||
|
|
|
@ -24,7 +24,7 @@ C> \brief \b ZPOTRF VARIANT: top-looking block version of the algorithm, calling
|
||||||
C>\details \b Purpose:
|
C>\details \b Purpose:
|
||||||
C>\verbatim
|
C>\verbatim
|
||||||
C>
|
C>
|
||||||
C> ZPOTRF computes the Cholesky factorization of a real symmetric
|
C> ZPOTRF computes the Cholesky factorization of a complex Hermitian
|
||||||
C> positive definite matrix A.
|
C> positive definite matrix A.
|
||||||
C>
|
C>
|
||||||
C> The factorization has the form
|
C> The factorization has the form
|
||||||
|
@ -55,7 +55,7 @@ C>
|
||||||
C> \param[in,out] A
|
C> \param[in,out] A
|
||||||
C> \verbatim
|
C> \verbatim
|
||||||
C> A is COMPLEX*16 array, dimension (LDA,N)
|
C> A is COMPLEX*16 array, dimension (LDA,N)
|
||||||
C> On entry, the symmetric matrix A. If UPLO = 'U', the leading
|
C> On entry, the Hermitian matrix A. If UPLO = 'U', the leading
|
||||||
C> N-by-N upper triangular part of A contains the upper
|
C> N-by-N upper triangular part of A contains the upper
|
||||||
C> triangular part of the matrix A, and the strictly lower
|
C> triangular part of the matrix A, and the strictly lower
|
||||||
C> triangular part of A is not referenced. If UPLO = 'L', the
|
C> triangular part of A is not referenced. If UPLO = 'L', the
|
||||||
|
|
|
@ -23,7 +23,7 @@ C> \brief \b CGEQRF VARIANT: left-looking Level 3 BLAS version of the algorithm.
|
||||||
C>\details \b Purpose:
|
C>\details \b Purpose:
|
||||||
C>\verbatim
|
C>\verbatim
|
||||||
C>
|
C>
|
||||||
C> CGEQRF computes a QR factorization of a real M-by-N matrix A:
|
C> CGEQRF computes a QR factorization of a complex M-by-N matrix A:
|
||||||
C> A = Q * R.
|
C> A = Q * R.
|
||||||
C>
|
C>
|
||||||
C> This is the left-looking Level 3 BLAS version of the algorithm.
|
C> This is the left-looking Level 3 BLAS version of the algorithm.
|
||||||
|
@ -172,12 +172,11 @@ C>
|
||||||
EXTERNAL CGEQR2, CLARFB, CLARFT, XERBLA
|
EXTERNAL CGEQR2, CLARFB, CLARFT, XERBLA
|
||||||
* ..
|
* ..
|
||||||
* .. Intrinsic Functions ..
|
* .. Intrinsic Functions ..
|
||||||
INTRINSIC MAX, MIN
|
INTRINSIC CEILING, MAX, MIN, REAL
|
||||||
* ..
|
* ..
|
||||||
* .. External Functions ..
|
* .. External Functions ..
|
||||||
INTEGER ILAENV
|
INTEGER ILAENV
|
||||||
REAL SCEIL
|
EXTERNAL ILAENV
|
||||||
EXTERNAL ILAENV, SCEIL
|
|
||||||
* ..
|
* ..
|
||||||
* .. Executable Statements ..
|
* .. Executable Statements ..
|
||||||
|
|
||||||
|
@ -205,13 +204,13 @@ C>
|
||||||
*
|
*
|
||||||
* So here 4 x 4 is the last T stored in the workspace
|
* So here 4 x 4 is the last T stored in the workspace
|
||||||
*
|
*
|
||||||
NT = K-SCEIL(REAL(K-NX)/REAL(NB))*NB
|
NT = K-CEILING(REAL(K-NX)/REAL(NB))*NB
|
||||||
|
|
||||||
*
|
*
|
||||||
* optimal workspace = space for dlarfb + space for normal T's + space for the last T
|
* optimal workspace = space for dlarfb + space for normal T's + space for the last T
|
||||||
*
|
*
|
||||||
LLWORK = MAX (MAX((N-M)*K, (N-M)*NB), MAX(K*NB, NB*NB))
|
LLWORK = MAX (MAX((N-M)*K, (N-M)*NB), MAX(K*NB, NB*NB))
|
||||||
LLWORK = SCEIL(REAL(LLWORK)/REAL(NB))
|
LLWORK = CEILING(REAL(LLWORK)/REAL(NB))
|
||||||
|
|
||||||
IF( K.EQ.0 ) THEN
|
IF( K.EQ.0 ) THEN
|
||||||
|
|
||||||
|
@ -230,7 +229,7 @@ C>
|
||||||
|
|
||||||
ELSE
|
ELSE
|
||||||
|
|
||||||
LBWORK = SCEIL(REAL(K)/REAL(NB))*NB
|
LBWORK = CEILING(REAL(K)/REAL(NB))*NB
|
||||||
LWKOPT = (LBWORK+LLWORK-NB)*NB
|
LWKOPT = (LBWORK+LLWORK-NB)*NB
|
||||||
WORK( 1 ) = LWKOPT
|
WORK( 1 ) = LWKOPT
|
||||||
|
|
||||||
|
|
|
@ -172,12 +172,11 @@ C>
|
||||||
EXTERNAL DGEQR2, DLARFB, DLARFT, XERBLA
|
EXTERNAL DGEQR2, DLARFB, DLARFT, XERBLA
|
||||||
* ..
|
* ..
|
||||||
* .. Intrinsic Functions ..
|
* .. Intrinsic Functions ..
|
||||||
INTRINSIC MAX, MIN
|
INTRINSIC CEILING, MAX, MIN, REAL
|
||||||
* ..
|
* ..
|
||||||
* .. External Functions ..
|
* .. External Functions ..
|
||||||
INTEGER ILAENV
|
INTEGER ILAENV
|
||||||
REAL SCEIL
|
EXTERNAL ILAENV
|
||||||
EXTERNAL ILAENV, SCEIL
|
|
||||||
* ..
|
* ..
|
||||||
* .. Executable Statements ..
|
* .. Executable Statements ..
|
||||||
|
|
||||||
|
@ -205,13 +204,13 @@ C>
|
||||||
*
|
*
|
||||||
* So here 4 x 4 is the last T stored in the workspace
|
* So here 4 x 4 is the last T stored in the workspace
|
||||||
*
|
*
|
||||||
NT = K-SCEIL(REAL(K-NX)/REAL(NB))*NB
|
NT = K-CEILING(REAL(K-NX)/REAL(NB))*NB
|
||||||
|
|
||||||
*
|
*
|
||||||
* optimal workspace = space for dlarfb + space for normal T's + space for the last T
|
* optimal workspace = space for dlarfb + space for normal T's + space for the last T
|
||||||
*
|
*
|
||||||
LLWORK = MAX (MAX((N-M)*K, (N-M)*NB), MAX(K*NB, NB*NB))
|
LLWORK = MAX (MAX((N-M)*K, (N-M)*NB), MAX(K*NB, NB*NB))
|
||||||
LLWORK = SCEIL(REAL(LLWORK)/REAL(NB))
|
LLWORK = CEILING(REAL(LLWORK)/REAL(NB))
|
||||||
|
|
||||||
IF( K.EQ.0 ) THEN
|
IF( K.EQ.0 ) THEN
|
||||||
|
|
||||||
|
@ -230,7 +229,7 @@ C>
|
||||||
|
|
||||||
ELSE
|
ELSE
|
||||||
|
|
||||||
LBWORK = SCEIL(REAL(K)/REAL(NB))*NB
|
LBWORK = CEILING(REAL(K)/REAL(NB))*NB
|
||||||
LWKOPT = (LBWORK+LLWORK-NB)*NB
|
LWKOPT = (LBWORK+LLWORK-NB)*NB
|
||||||
WORK( 1 ) = LWKOPT
|
WORK( 1 ) = LWKOPT
|
||||||
|
|
||||||
|
|
|
@ -1,86 +0,0 @@
|
||||||
C> \brief \b SCEIL
|
|
||||||
*
|
|
||||||
* =========== DOCUMENTATION ===========
|
|
||||||
*
|
|
||||||
* Online html documentation available at
|
|
||||||
* http://www.netlib.org/lapack/explore-html/
|
|
||||||
*
|
|
||||||
* Definition:
|
|
||||||
* ===========
|
|
||||||
*
|
|
||||||
* REAL FUNCTION SCEIL( A )
|
|
||||||
*
|
|
||||||
* .. Scalar Arguments ..
|
|
||||||
* REAL A
|
|
||||||
* ..
|
|
||||||
*
|
|
||||||
* =====================================================================
|
|
||||||
*
|
|
||||||
* .. Intrinsic Functions ..
|
|
||||||
* INTRINSIC INT
|
|
||||||
* ..
|
|
||||||
* .. Executable Statements ..*
|
|
||||||
*
|
|
||||||
* IF (A-INT(A).EQ.0) THEN
|
|
||||||
* SCEIL = A
|
|
||||||
* ELSE IF (A.GT.0) THEN
|
|
||||||
* SCEIL = INT(A)+1;
|
|
||||||
* ELSE
|
|
||||||
* SCEIL = INT(A)
|
|
||||||
* END IF
|
|
||||||
*
|
|
||||||
* RETURN
|
|
||||||
*
|
|
||||||
* END
|
|
||||||
* Purpose
|
|
||||||
* =======
|
|
||||||
*
|
|
||||||
C>\details \b Purpose:
|
|
||||||
C>\verbatim
|
|
||||||
C>\endverbatim
|
|
||||||
*
|
|
||||||
* Arguments:
|
|
||||||
* ==========
|
|
||||||
*
|
|
||||||
*
|
|
||||||
* Authors:
|
|
||||||
* ========
|
|
||||||
*
|
|
||||||
C> \author Univ. of Tennessee
|
|
||||||
C> \author Univ. of California Berkeley
|
|
||||||
C> \author Univ. of Colorado Denver
|
|
||||||
C> \author NAG Ltd.
|
|
||||||
*
|
|
||||||
C> \date December 2016
|
|
||||||
*
|
|
||||||
C> \ingroup variantsOTHERcomputational
|
|
||||||
*
|
|
||||||
* =====================================================================
|
|
||||||
REAL FUNCTION SCEIL( A )
|
|
||||||
*
|
|
||||||
* -- LAPACK computational routine --
|
|
||||||
* -- LAPACK is a software package provided by Univ. of Tennessee, --
|
|
||||||
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
|
|
||||||
*
|
|
||||||
* .. Scalar Arguments ..*
|
|
||||||
REAL A
|
|
||||||
* ..
|
|
||||||
*
|
|
||||||
* =====================================================================
|
|
||||||
*
|
|
||||||
* .. Intrinsic Functions ..
|
|
||||||
INTRINSIC INT
|
|
||||||
* ..
|
|
||||||
* .. Executable Statements ..*
|
|
||||||
*
|
|
||||||
IF (A-INT(A).EQ.0) THEN
|
|
||||||
SCEIL = A
|
|
||||||
ELSE IF (A.GT.0) THEN
|
|
||||||
SCEIL = INT(A)+1;
|
|
||||||
ELSE
|
|
||||||
SCEIL = INT(A)
|
|
||||||
END IF
|
|
||||||
|
|
||||||
RETURN
|
|
||||||
*
|
|
||||||
END
|
|
|
@ -172,12 +172,11 @@ C>
|
||||||
EXTERNAL SGEQR2, SLARFB, SLARFT, XERBLA
|
EXTERNAL SGEQR2, SLARFB, SLARFT, XERBLA
|
||||||
* ..
|
* ..
|
||||||
* .. Intrinsic Functions ..
|
* .. Intrinsic Functions ..
|
||||||
INTRINSIC MAX, MIN
|
INTRINSIC CEILING, MAX, MIN, REAL
|
||||||
* ..
|
* ..
|
||||||
* .. External Functions ..
|
* .. External Functions ..
|
||||||
INTEGER ILAENV
|
INTEGER ILAENV
|
||||||
REAL SCEIL
|
EXTERNAL ILAENV
|
||||||
EXTERNAL ILAENV, SCEIL
|
|
||||||
* ..
|
* ..
|
||||||
* .. Executable Statements ..
|
* .. Executable Statements ..
|
||||||
|
|
||||||
|
@ -205,13 +204,13 @@ C>
|
||||||
*
|
*
|
||||||
* So here 4 x 4 is the last T stored in the workspace
|
* So here 4 x 4 is the last T stored in the workspace
|
||||||
*
|
*
|
||||||
NT = K-SCEIL(REAL(K-NX)/REAL(NB))*NB
|
NT = K-CEILING(REAL(K-NX)/REAL(NB))*NB
|
||||||
|
|
||||||
*
|
*
|
||||||
* optimal workspace = space for dlarfb + space for normal T's + space for the last T
|
* optimal workspace = space for dlarfb + space for normal T's + space for the last T
|
||||||
*
|
*
|
||||||
LLWORK = MAX (MAX((N-M)*K, (N-M)*NB), MAX(K*NB, NB*NB))
|
LLWORK = MAX (MAX((N-M)*K, (N-M)*NB), MAX(K*NB, NB*NB))
|
||||||
LLWORK = SCEIL(REAL(LLWORK)/REAL(NB))
|
LLWORK = CEILING(REAL(LLWORK)/REAL(NB))
|
||||||
|
|
||||||
IF( K.EQ.0 ) THEN
|
IF( K.EQ.0 ) THEN
|
||||||
|
|
||||||
|
@ -230,7 +229,7 @@ C>
|
||||||
|
|
||||||
ELSE
|
ELSE
|
||||||
|
|
||||||
LBWORK = SCEIL(REAL(K)/REAL(NB))*NB
|
LBWORK = CEILING(REAL(K)/REAL(NB))*NB
|
||||||
LWKOPT = (LBWORK+LLWORK-NB)*NB
|
LWKOPT = (LBWORK+LLWORK-NB)*NB
|
||||||
WORK( 1 ) = LWKOPT
|
WORK( 1 ) = LWKOPT
|
||||||
|
|
||||||
|
|
|
@ -23,7 +23,7 @@ C> \brief \b ZGEQRF VARIANT: left-looking Level 3 BLAS of the algorithm.
|
||||||
C>\details \b Purpose:
|
C>\details \b Purpose:
|
||||||
C>\verbatim
|
C>\verbatim
|
||||||
C>
|
C>
|
||||||
C> ZGEQRF computes a QR factorization of a real M-by-N matrix A:
|
C> ZGEQRF computes a QR factorization of a complex M-by-N matrix A:
|
||||||
C> A = Q * R.
|
C> A = Q * R.
|
||||||
C>
|
C>
|
||||||
C> This is the left-looking Level 3 BLAS version of the algorithm.
|
C> This is the left-looking Level 3 BLAS version of the algorithm.
|
||||||
|
@ -172,12 +172,11 @@ C>
|
||||||
EXTERNAL ZGEQR2, ZLARFB, ZLARFT, XERBLA
|
EXTERNAL ZGEQR2, ZLARFB, ZLARFT, XERBLA
|
||||||
* ..
|
* ..
|
||||||
* .. Intrinsic Functions ..
|
* .. Intrinsic Functions ..
|
||||||
INTRINSIC MAX, MIN
|
INTRINSIC CEILING, MAX, MIN, REAL
|
||||||
* ..
|
* ..
|
||||||
* .. External Functions ..
|
* .. External Functions ..
|
||||||
INTEGER ILAENV
|
INTEGER ILAENV
|
||||||
REAL SCEIL
|
EXTERNAL ILAENV
|
||||||
EXTERNAL ILAENV, SCEIL
|
|
||||||
* ..
|
* ..
|
||||||
* .. Executable Statements ..
|
* .. Executable Statements ..
|
||||||
|
|
||||||
|
@ -205,13 +204,13 @@ C>
|
||||||
*
|
*
|
||||||
* So here 4 x 4 is the last T stored in the workspace
|
* So here 4 x 4 is the last T stored in the workspace
|
||||||
*
|
*
|
||||||
NT = K-SCEIL(REAL(K-NX)/REAL(NB))*NB
|
NT = K-CEILING(REAL(K-NX)/REAL(NB))*NB
|
||||||
|
|
||||||
*
|
*
|
||||||
* optimal workspace = space for dlarfb + space for normal T's + space for the last T
|
* optimal workspace = space for dlarfb + space for normal T's + space for the last T
|
||||||
*
|
*
|
||||||
LLWORK = MAX (MAX((N-M)*K, (N-M)*NB), MAX(K*NB, NB*NB))
|
LLWORK = MAX (MAX((N-M)*K, (N-M)*NB), MAX(K*NB, NB*NB))
|
||||||
LLWORK = SCEIL(REAL(LLWORK)/REAL(NB))
|
LLWORK = CEILING(REAL(LLWORK)/REAL(NB))
|
||||||
|
|
||||||
IF( K.EQ.0 ) THEN
|
IF( K.EQ.0 ) THEN
|
||||||
|
|
||||||
|
@ -230,7 +229,7 @@ C>
|
||||||
|
|
||||||
ELSE
|
ELSE
|
||||||
|
|
||||||
LBWORK = SCEIL(REAL(K)/REAL(NB))*NB
|
LBWORK = CEILING(REAL(K)/REAL(NB))*NB
|
||||||
LWKOPT = (LBWORK+LLWORK-NB)*NB
|
LWKOPT = (LBWORK+LLWORK-NB)*NB
|
||||||
WORK( 1 ) = LWKOPT
|
WORK( 1 ) = LWKOPT
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue