Merge pull request #4087 from martin-frbg/lapack847

Improve variants of Cholesky and QR (Reference-LAPACK PR 847)
This commit is contained in:
Martin Kroeker 2023-06-18 11:39:52 +02:00 committed by GitHub
commit eb058c2ae2
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 29 additions and 119 deletions

View File

@ -28,7 +28,7 @@ LULL = lu/LL/cgetrf.o lu/LL/dgetrf.o lu/LL/sgetrf.o lu/LL/zgetrf.o
LUREC = lu/REC/cgetrf.o lu/REC/dgetrf.o lu/REC/sgetrf.o lu/REC/zgetrf.o
QRLL = qr/LL/cgeqrf.o qr/LL/dgeqrf.o qr/LL/sgeqrf.o qr/LL/zgeqrf.o qr/LL/sceil.o
QRLL = qr/LL/cgeqrf.o qr/LL/dgeqrf.o qr/LL/sgeqrf.o qr/LL/zgeqrf.o
.PHONY: all

View File

@ -24,7 +24,7 @@ C> \brief \b CPOTRF VARIANT: right looking block version of the algorithm, calli
C>\details \b Purpose:
C>\verbatim
C>
C> CPOTRF computes the Cholesky factorization of a real Hermitian
C> CPOTRF computes the Cholesky factorization of a complex Hermitian
C> positive definite matrix A.
C>
C> The factorization has the form

View File

@ -24,7 +24,7 @@ C> \brief \b ZPOTRF VARIANT: right looking block version of the algorithm, calli
C>\details \b Purpose:
C>\verbatim
C>
C> ZPOTRF computes the Cholesky factorization of a real Hermitian
C> ZPOTRF computes the Cholesky factorization of a complex Hermitian
C> positive definite matrix A.
C>
C> The factorization has the form

View File

@ -24,7 +24,7 @@ C> \brief \b CPOTRF VARIANT: top-looking block version of the algorithm, calling
C>\details \b Purpose:
C>\verbatim
C>
C> CPOTRF computes the Cholesky factorization of a real symmetric
C> CPOTRF computes the Cholesky factorization of a complex Hermitian
C> positive definite matrix A.
C>
C> The factorization has the form
@ -55,7 +55,7 @@ C>
C> \param[in,out] A
C> \verbatim
C> A is COMPLEX array, dimension (LDA,N)
C> On entry, the symmetric matrix A. If UPLO = 'U', the leading
C> On entry, the Hermitian matrix A. If UPLO = 'U', the leading
C> N-by-N upper triangular part of A contains the upper
C> triangular part of the matrix A, and the strictly lower
C> triangular part of A is not referenced. If UPLO = 'L', the

View File

@ -24,7 +24,7 @@ C> \brief \b ZPOTRF VARIANT: top-looking block version of the algorithm, calling
C>\details \b Purpose:
C>\verbatim
C>
C> ZPOTRF computes the Cholesky factorization of a real symmetric
C> ZPOTRF computes the Cholesky factorization of a complex Hermitian
C> positive definite matrix A.
C>
C> The factorization has the form
@ -55,7 +55,7 @@ C>
C> \param[in,out] A
C> \verbatim
C> A is COMPLEX*16 array, dimension (LDA,N)
C> On entry, the symmetric matrix A. If UPLO = 'U', the leading
C> On entry, the Hermitian matrix A. If UPLO = 'U', the leading
C> N-by-N upper triangular part of A contains the upper
C> triangular part of the matrix A, and the strictly lower
C> triangular part of A is not referenced. If UPLO = 'L', the

View File

@ -23,7 +23,7 @@ C> \brief \b CGEQRF VARIANT: left-looking Level 3 BLAS version of the algorithm.
C>\details \b Purpose:
C>\verbatim
C>
C> CGEQRF computes a QR factorization of a real M-by-N matrix A:
C> CGEQRF computes a QR factorization of a complex M-by-N matrix A:
C> A = Q * R.
C>
C> This is the left-looking Level 3 BLAS version of the algorithm.
@ -172,12 +172,11 @@ C>
EXTERNAL CGEQR2, CLARFB, CLARFT, XERBLA
* ..
* .. Intrinsic Functions ..
INTRINSIC MAX, MIN
INTRINSIC CEILING, MAX, MIN, REAL
* ..
* .. External Functions ..
INTEGER ILAENV
REAL SCEIL
EXTERNAL ILAENV, SCEIL
EXTERNAL ILAENV
* ..
* .. Executable Statements ..
@ -205,13 +204,13 @@ C>
*
* So here 4 x 4 is the last T stored in the workspace
*
NT = K-SCEIL(REAL(K-NX)/REAL(NB))*NB
NT = K-CEILING(REAL(K-NX)/REAL(NB))*NB
*
* optimal workspace = space for dlarfb + space for normal T's + space for the last T
*
LLWORK = MAX (MAX((N-M)*K, (N-M)*NB), MAX(K*NB, NB*NB))
LLWORK = SCEIL(REAL(LLWORK)/REAL(NB))
LLWORK = CEILING(REAL(LLWORK)/REAL(NB))
IF( K.EQ.0 ) THEN
@ -230,7 +229,7 @@ C>
ELSE
LBWORK = SCEIL(REAL(K)/REAL(NB))*NB
LBWORK = CEILING(REAL(K)/REAL(NB))*NB
LWKOPT = (LBWORK+LLWORK-NB)*NB
WORK( 1 ) = LWKOPT

View File

@ -172,12 +172,11 @@ C>
EXTERNAL DGEQR2, DLARFB, DLARFT, XERBLA
* ..
* .. Intrinsic Functions ..
INTRINSIC MAX, MIN
INTRINSIC CEILING, MAX, MIN, REAL
* ..
* .. External Functions ..
INTEGER ILAENV
REAL SCEIL
EXTERNAL ILAENV, SCEIL
EXTERNAL ILAENV
* ..
* .. Executable Statements ..
@ -205,13 +204,13 @@ C>
*
* So here 4 x 4 is the last T stored in the workspace
*
NT = K-SCEIL(REAL(K-NX)/REAL(NB))*NB
NT = K-CEILING(REAL(K-NX)/REAL(NB))*NB
*
* optimal workspace = space for dlarfb + space for normal T's + space for the last T
*
LLWORK = MAX (MAX((N-M)*K, (N-M)*NB), MAX(K*NB, NB*NB))
LLWORK = SCEIL(REAL(LLWORK)/REAL(NB))
LLWORK = CEILING(REAL(LLWORK)/REAL(NB))
IF( K.EQ.0 ) THEN
@ -230,7 +229,7 @@ C>
ELSE
LBWORK = SCEIL(REAL(K)/REAL(NB))*NB
LBWORK = CEILING(REAL(K)/REAL(NB))*NB
LWKOPT = (LBWORK+LLWORK-NB)*NB
WORK( 1 ) = LWKOPT

View File

@ -1,86 +0,0 @@
C> \brief \b SCEIL
*
* =========== DOCUMENTATION ===========
*
* Online html documentation available at
* http://www.netlib.org/lapack/explore-html/
*
* Definition:
* ===========
*
* REAL FUNCTION SCEIL( A )
*
* .. Scalar Arguments ..
* REAL A
* ..
*
* =====================================================================
*
* .. Intrinsic Functions ..
* INTRINSIC INT
* ..
* .. Executable Statements ..*
*
* IF (A-INT(A).EQ.0) THEN
* SCEIL = A
* ELSE IF (A.GT.0) THEN
* SCEIL = INT(A)+1;
* ELSE
* SCEIL = INT(A)
* END IF
*
* RETURN
*
* END
* Purpose
* =======
*
C>\details \b Purpose:
C>\verbatim
C>\endverbatim
*
* Arguments:
* ==========
*
*
* Authors:
* ========
*
C> \author Univ. of Tennessee
C> \author Univ. of California Berkeley
C> \author Univ. of Colorado Denver
C> \author NAG Ltd.
*
C> \date December 2016
*
C> \ingroup variantsOTHERcomputational
*
* =====================================================================
REAL FUNCTION SCEIL( A )
*
* -- LAPACK computational routine --
* -- LAPACK is a software package provided by Univ. of Tennessee, --
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
*
* .. Scalar Arguments ..*
REAL A
* ..
*
* =====================================================================
*
* .. Intrinsic Functions ..
INTRINSIC INT
* ..
* .. Executable Statements ..*
*
IF (A-INT(A).EQ.0) THEN
SCEIL = A
ELSE IF (A.GT.0) THEN
SCEIL = INT(A)+1;
ELSE
SCEIL = INT(A)
END IF
RETURN
*
END

View File

@ -172,12 +172,11 @@ C>
EXTERNAL SGEQR2, SLARFB, SLARFT, XERBLA
* ..
* .. Intrinsic Functions ..
INTRINSIC MAX, MIN
INTRINSIC CEILING, MAX, MIN, REAL
* ..
* .. External Functions ..
INTEGER ILAENV
REAL SCEIL
EXTERNAL ILAENV, SCEIL
EXTERNAL ILAENV
* ..
* .. Executable Statements ..
@ -205,13 +204,13 @@ C>
*
* So here 4 x 4 is the last T stored in the workspace
*
NT = K-SCEIL(REAL(K-NX)/REAL(NB))*NB
NT = K-CEILING(REAL(K-NX)/REAL(NB))*NB
*
* optimal workspace = space for dlarfb + space for normal T's + space for the last T
*
LLWORK = MAX (MAX((N-M)*K, (N-M)*NB), MAX(K*NB, NB*NB))
LLWORK = SCEIL(REAL(LLWORK)/REAL(NB))
LLWORK = CEILING(REAL(LLWORK)/REAL(NB))
IF( K.EQ.0 ) THEN
@ -230,7 +229,7 @@ C>
ELSE
LBWORK = SCEIL(REAL(K)/REAL(NB))*NB
LBWORK = CEILING(REAL(K)/REAL(NB))*NB
LWKOPT = (LBWORK+LLWORK-NB)*NB
WORK( 1 ) = LWKOPT

View File

@ -23,7 +23,7 @@ C> \brief \b ZGEQRF VARIANT: left-looking Level 3 BLAS of the algorithm.
C>\details \b Purpose:
C>\verbatim
C>
C> ZGEQRF computes a QR factorization of a real M-by-N matrix A:
C> ZGEQRF computes a QR factorization of a complex M-by-N matrix A:
C> A = Q * R.
C>
C> This is the left-looking Level 3 BLAS version of the algorithm.
@ -172,12 +172,11 @@ C>
EXTERNAL ZGEQR2, ZLARFB, ZLARFT, XERBLA
* ..
* .. Intrinsic Functions ..
INTRINSIC MAX, MIN
INTRINSIC CEILING, MAX, MIN, REAL
* ..
* .. External Functions ..
INTEGER ILAENV
REAL SCEIL
EXTERNAL ILAENV, SCEIL
EXTERNAL ILAENV
* ..
* .. Executable Statements ..
@ -205,13 +204,13 @@ C>
*
* So here 4 x 4 is the last T stored in the workspace
*
NT = K-SCEIL(REAL(K-NX)/REAL(NB))*NB
NT = K-CEILING(REAL(K-NX)/REAL(NB))*NB
*
* optimal workspace = space for dlarfb + space for normal T's + space for the last T
*
LLWORK = MAX (MAX((N-M)*K, (N-M)*NB), MAX(K*NB, NB*NB))
LLWORK = SCEIL(REAL(LLWORK)/REAL(NB))
LLWORK = CEILING(REAL(LLWORK)/REAL(NB))
IF( K.EQ.0 ) THEN
@ -230,7 +229,7 @@ C>
ELSE
LBWORK = SCEIL(REAL(K)/REAL(NB))*NB
LBWORK = CEILING(REAL(K)/REAL(NB))*NB
LWKOPT = (LBWORK+LLWORK-NB)*NB
WORK( 1 ) = LWKOPT