517 lines
		
	
	
		
			16 KiB
		
	
	
	
		
			Fortran
		
	
	
	
			
		
		
	
	
			517 lines
		
	
	
		
			16 KiB
		
	
	
	
		
			Fortran
		
	
	
	
| *> \brief \b SGBTRF
 | |
| *
 | |
| *  =========== DOCUMENTATION ===========
 | |
| *
 | |
| * Online html documentation available at 
 | |
| *            http://www.netlib.org/lapack/explore-html/ 
 | |
| *
 | |
| *> \htmlonly
 | |
| *> Download SGBTRF + dependencies 
 | |
| *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/sgbtrf.f"> 
 | |
| *> [TGZ]</a> 
 | |
| *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/sgbtrf.f"> 
 | |
| *> [ZIP]</a> 
 | |
| *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/sgbtrf.f"> 
 | |
| *> [TXT]</a>
 | |
| *> \endhtmlonly 
 | |
| *
 | |
| *  Definition:
 | |
| *  ===========
 | |
| *
 | |
| *       SUBROUTINE SGBTRF( M, N, KL, KU, AB, LDAB, IPIV, INFO )
 | |
| * 
 | |
| *       .. Scalar Arguments ..
 | |
| *       INTEGER            INFO, KL, KU, LDAB, M, N
 | |
| *       ..
 | |
| *       .. Array Arguments ..
 | |
| *       INTEGER            IPIV( * )
 | |
| *       REAL               AB( LDAB, * )
 | |
| *       ..
 | |
| *  
 | |
| *
 | |
| *> \par Purpose:
 | |
| *  =============
 | |
| *>
 | |
| *> \verbatim
 | |
| *>
 | |
| *> SGBTRF computes an LU factorization of a real m-by-n band matrix A
 | |
| *> using partial pivoting with row interchanges.
 | |
| *>
 | |
| *> This is the blocked version of the algorithm, calling Level 3 BLAS.
 | |
| *> \endverbatim
 | |
| *
 | |
| *  Arguments:
 | |
| *  ==========
 | |
| *
 | |
| *> \param[in] M
 | |
| *> \verbatim
 | |
| *>          M is INTEGER
 | |
| *>          The number of rows of the matrix A.  M >= 0.
 | |
| *> \endverbatim
 | |
| *>
 | |
| *> \param[in] N
 | |
| *> \verbatim
 | |
| *>          N is INTEGER
 | |
| *>          The number of columns of the matrix A.  N >= 0.
 | |
| *> \endverbatim
 | |
| *>
 | |
| *> \param[in] KL
 | |
| *> \verbatim
 | |
| *>          KL is INTEGER
 | |
| *>          The number of subdiagonals within the band of A.  KL >= 0.
 | |
| *> \endverbatim
 | |
| *>
 | |
| *> \param[in] KU
 | |
| *> \verbatim
 | |
| *>          KU is INTEGER
 | |
| *>          The number of superdiagonals within the band of A.  KU >= 0.
 | |
| *> \endverbatim
 | |
| *>
 | |
| *> \param[in,out] AB
 | |
| *> \verbatim
 | |
| *>          AB is REAL array, dimension (LDAB,N)
 | |
| *>          On entry, the matrix A in band storage, in rows KL+1 to
 | |
| *>          2*KL+KU+1; rows 1 to KL of the array need not be set.
 | |
| *>          The j-th column of A is stored in the j-th column of the
 | |
| *>          array AB as follows:
 | |
| *>          AB(kl+ku+1+i-j,j) = A(i,j) for max(1,j-ku)<=i<=min(m,j+kl)
 | |
| *>
 | |
| *>          On exit, details of the factorization: U is stored as an
 | |
| *>          upper triangular band matrix with KL+KU superdiagonals in
 | |
| *>          rows 1 to KL+KU+1, and the multipliers used during the
 | |
| *>          factorization are stored in rows KL+KU+2 to 2*KL+KU+1.
 | |
| *>          See below for further details.
 | |
| *> \endverbatim
 | |
| *>
 | |
| *> \param[in] LDAB
 | |
| *> \verbatim
 | |
| *>          LDAB is INTEGER
 | |
| *>          The leading dimension of the array AB.  LDAB >= 2*KL+KU+1.
 | |
| *> \endverbatim
 | |
| *>
 | |
| *> \param[out] IPIV
 | |
| *> \verbatim
 | |
| *>          IPIV is INTEGER array, dimension (min(M,N))
 | |
| *>          The pivot indices; for 1 <= i <= min(M,N), row i of the
 | |
| *>          matrix was interchanged with row IPIV(i).
 | |
| *> \endverbatim
 | |
| *>
 | |
| *> \param[out] INFO
 | |
| *> \verbatim
 | |
| *>          INFO is INTEGER
 | |
| *>          = 0: successful exit
 | |
| *>          < 0: if INFO = -i, the i-th argument had an illegal value
 | |
| *>          > 0: if INFO = +i, U(i,i) is exactly zero. The factorization
 | |
| *>               has been completed, but the factor U is exactly
 | |
| *>               singular, and division by zero will occur if it is used
 | |
| *>               to solve a system of equations.
 | |
| *> \endverbatim
 | |
| *
 | |
| *  Authors:
 | |
| *  ========
 | |
| *
 | |
| *> \author Univ. of Tennessee 
 | |
| *> \author Univ. of California Berkeley 
 | |
| *> \author Univ. of Colorado Denver 
 | |
| *> \author NAG Ltd. 
 | |
| *
 | |
| *> \date November 2011
 | |
| *
 | |
| *> \ingroup realGBcomputational
 | |
| *
 | |
| *> \par Further Details:
 | |
| *  =====================
 | |
| *>
 | |
| *> \verbatim
 | |
| *>
 | |
| *>  The band storage scheme is illustrated by the following example, when
 | |
| *>  M = N = 6, KL = 2, KU = 1:
 | |
| *>
 | |
| *>  On entry:                       On exit:
 | |
| *>
 | |
| *>      *    *    *    +    +    +       *    *    *   u14  u25  u36
 | |
| *>      *    *    +    +    +    +       *    *   u13  u24  u35  u46
 | |
| *>      *   a12  a23  a34  a45  a56      *   u12  u23  u34  u45  u56
 | |
| *>     a11  a22  a33  a44  a55  a66     u11  u22  u33  u44  u55  u66
 | |
| *>     a21  a32  a43  a54  a65   *      m21  m32  m43  m54  m65   *
 | |
| *>     a31  a42  a53  a64   *    *      m31  m42  m53  m64   *    *
 | |
| *>
 | |
| *>  Array elements marked * are not used by the routine; elements marked
 | |
| *>  + need not be set on entry, but are required by the routine to store
 | |
| *>  elements of U because of fill-in resulting from the row interchanges.
 | |
| *> \endverbatim
 | |
| *>
 | |
| *  =====================================================================
 | |
|       SUBROUTINE SGBTRF( M, N, KL, KU, AB, LDAB, IPIV, INFO )
 | |
| *
 | |
| *  -- LAPACK computational routine (version 3.4.0) --
 | |
| *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 | |
| *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
 | |
| *     November 2011
 | |
| *
 | |
| *     .. Scalar Arguments ..
 | |
|       INTEGER            INFO, KL, KU, LDAB, M, N
 | |
| *     ..
 | |
| *     .. Array Arguments ..
 | |
|       INTEGER            IPIV( * )
 | |
|       REAL               AB( LDAB, * )
 | |
| *     ..
 | |
| *
 | |
| *  =====================================================================
 | |
| *
 | |
| *     .. Parameters ..
 | |
|       REAL               ONE, ZERO
 | |
|       PARAMETER          ( ONE = 1.0E+0, ZERO = 0.0E+0 )
 | |
|       INTEGER            NBMAX, LDWORK
 | |
|       PARAMETER          ( NBMAX = 64, LDWORK = NBMAX+1 )
 | |
| *     ..
 | |
| *     .. Local Scalars ..
 | |
|       INTEGER            I, I2, I3, II, IP, J, J2, J3, JB, JJ, JM, JP,
 | |
|      $                   JU, K2, KM, KV, NB, NW
 | |
|       REAL               TEMP
 | |
| *     ..
 | |
| *     .. Local Arrays ..
 | |
|       REAL               WORK13( LDWORK, NBMAX ),
 | |
|      $                   WORK31( LDWORK, NBMAX )
 | |
| *     ..
 | |
| *     .. External Functions ..
 | |
|       INTEGER            ILAENV, ISAMAX
 | |
|       EXTERNAL           ILAENV, ISAMAX
 | |
| *     ..
 | |
| *     .. External Subroutines ..
 | |
|       EXTERNAL           SCOPY, SGBTF2, SGEMM, SGER, SLASWP, SSCAL,
 | |
|      $                   SSWAP, STRSM, XERBLA
 | |
| *     ..
 | |
| *     .. Intrinsic Functions ..
 | |
|       INTRINSIC          MAX, MIN
 | |
| *     ..
 | |
| *     .. Executable Statements ..
 | |
| *
 | |
| *     KV is the number of superdiagonals in the factor U, allowing for
 | |
| *     fill-in
 | |
| *
 | |
|       KV = KU + KL
 | |
| *
 | |
| *     Test the input parameters.
 | |
| *
 | |
|       INFO = 0
 | |
|       IF( M.LT.0 ) THEN
 | |
|          INFO = -1
 | |
|       ELSE IF( N.LT.0 ) THEN
 | |
|          INFO = -2
 | |
|       ELSE IF( KL.LT.0 ) THEN
 | |
|          INFO = -3
 | |
|       ELSE IF( KU.LT.0 ) THEN
 | |
|          INFO = -4
 | |
|       ELSE IF( LDAB.LT.KL+KV+1 ) THEN
 | |
|          INFO = -6
 | |
|       END IF
 | |
|       IF( INFO.NE.0 ) THEN
 | |
|          CALL XERBLA( 'SGBTRF', -INFO )
 | |
|          RETURN
 | |
|       END IF
 | |
| *
 | |
| *     Quick return if possible
 | |
| *
 | |
|       IF( M.EQ.0 .OR. N.EQ.0 )
 | |
|      $   RETURN
 | |
| *
 | |
| *     Determine the block size for this environment
 | |
| *
 | |
|       NB = ILAENV( 1, 'SGBTRF', ' ', M, N, KL, KU )
 | |
| *
 | |
| *     The block size must not exceed the limit set by the size of the
 | |
| *     local arrays WORK13 and WORK31.
 | |
| *
 | |
|       NB = MIN( NB, NBMAX )
 | |
| *
 | |
|       IF( NB.LE.1 .OR. NB.GT.KL ) THEN
 | |
| *
 | |
| *        Use unblocked code
 | |
| *
 | |
|          CALL SGBTF2( M, N, KL, KU, AB, LDAB, IPIV, INFO )
 | |
|       ELSE
 | |
| *
 | |
| *        Use blocked code
 | |
| *
 | |
| *        Zero the superdiagonal elements of the work array WORK13
 | |
| *
 | |
|          DO 20 J = 1, NB
 | |
|             DO 10 I = 1, J - 1
 | |
|                WORK13( I, J ) = ZERO
 | |
|    10       CONTINUE
 | |
|    20    CONTINUE
 | |
| *
 | |
| *        Zero the subdiagonal elements of the work array WORK31
 | |
| *
 | |
|          DO 40 J = 1, NB
 | |
|             DO 30 I = J + 1, NB
 | |
|                WORK31( I, J ) = ZERO
 | |
|    30       CONTINUE
 | |
|    40    CONTINUE
 | |
| *
 | |
| *        Gaussian elimination with partial pivoting
 | |
| *
 | |
| *        Set fill-in elements in columns KU+2 to KV to zero
 | |
| *
 | |
|          DO 60 J = KU + 2, MIN( KV, N )
 | |
|             DO 50 I = KV - J + 2, KL
 | |
|                AB( I, J ) = ZERO
 | |
|    50       CONTINUE
 | |
|    60    CONTINUE
 | |
| *
 | |
| *        JU is the index of the last column affected by the current
 | |
| *        stage of the factorization
 | |
| *
 | |
|          JU = 1
 | |
| *
 | |
|          DO 180 J = 1, MIN( M, N ), NB
 | |
|             JB = MIN( NB, MIN( M, N )-J+1 )
 | |
| *
 | |
| *           The active part of the matrix is partitioned
 | |
| *
 | |
| *              A11   A12   A13
 | |
| *              A21   A22   A23
 | |
| *              A31   A32   A33
 | |
| *
 | |
| *           Here A11, A21 and A31 denote the current block of JB columns
 | |
| *           which is about to be factorized. The number of rows in the
 | |
| *           partitioning are JB, I2, I3 respectively, and the numbers
 | |
| *           of columns are JB, J2, J3. The superdiagonal elements of A13
 | |
| *           and the subdiagonal elements of A31 lie outside the band.
 | |
| *
 | |
|             I2 = MIN( KL-JB, M-J-JB+1 )
 | |
|             I3 = MIN( JB, M-J-KL+1 )
 | |
| *
 | |
| *           J2 and J3 are computed after JU has been updated.
 | |
| *
 | |
| *           Factorize the current block of JB columns
 | |
| *
 | |
|             DO 80 JJ = J, J + JB - 1
 | |
| *
 | |
| *              Set fill-in elements in column JJ+KV to zero
 | |
| *
 | |
|                IF( JJ+KV.LE.N ) THEN
 | |
|                   DO 70 I = 1, KL
 | |
|                      AB( I, JJ+KV ) = ZERO
 | |
|    70             CONTINUE
 | |
|                END IF
 | |
| *
 | |
| *              Find pivot and test for singularity. KM is the number of
 | |
| *              subdiagonal elements in the current column.
 | |
| *
 | |
|                KM = MIN( KL, M-JJ )
 | |
|                JP = ISAMAX( KM+1, AB( KV+1, JJ ), 1 )
 | |
|                IPIV( JJ ) = JP + JJ - J
 | |
|                IF( AB( KV+JP, JJ ).NE.ZERO ) THEN
 | |
|                   JU = MAX( JU, MIN( JJ+KU+JP-1, N ) )
 | |
|                   IF( JP.NE.1 ) THEN
 | |
| *
 | |
| *                    Apply interchange to columns J to J+JB-1
 | |
| *
 | |
|                      IF( JP+JJ-1.LT.J+KL ) THEN
 | |
| *
 | |
|                         CALL SSWAP( JB, AB( KV+1+JJ-J, J ), LDAB-1,
 | |
|      $                              AB( KV+JP+JJ-J, J ), LDAB-1 )
 | |
|                      ELSE
 | |
| *
 | |
| *                       The interchange affects columns J to JJ-1 of A31
 | |
| *                       which are stored in the work array WORK31
 | |
| *
 | |
|                         CALL SSWAP( JJ-J, AB( KV+1+JJ-J, J ), LDAB-1,
 | |
|      $                              WORK31( JP+JJ-J-KL, 1 ), LDWORK )
 | |
|                         CALL SSWAP( J+JB-JJ, AB( KV+1, JJ ), LDAB-1,
 | |
|      $                              AB( KV+JP, JJ ), LDAB-1 )
 | |
|                      END IF
 | |
|                   END IF
 | |
| *
 | |
| *                 Compute multipliers
 | |
| *
 | |
|                   CALL SSCAL( KM, ONE / AB( KV+1, JJ ), AB( KV+2, JJ ),
 | |
|      $                        1 )
 | |
| *
 | |
| *                 Update trailing submatrix within the band and within
 | |
| *                 the current block. JM is the index of the last column
 | |
| *                 which needs to be updated.
 | |
| *
 | |
|                   JM = MIN( JU, J+JB-1 )
 | |
|                   IF( JM.GT.JJ )
 | |
|      $               CALL SGER( KM, JM-JJ, -ONE, AB( KV+2, JJ ), 1,
 | |
|      $                          AB( KV, JJ+1 ), LDAB-1,
 | |
|      $                          AB( KV+1, JJ+1 ), LDAB-1 )
 | |
|                ELSE
 | |
| *
 | |
| *                 If pivot is zero, set INFO to the index of the pivot
 | |
| *                 unless a zero pivot has already been found.
 | |
| *
 | |
|                   IF( INFO.EQ.0 )
 | |
|      $               INFO = JJ
 | |
|                END IF
 | |
| *
 | |
| *              Copy current column of A31 into the work array WORK31
 | |
| *
 | |
|                NW = MIN( JJ-J+1, I3 )
 | |
|                IF( NW.GT.0 )
 | |
|      $            CALL SCOPY( NW, AB( KV+KL+1-JJ+J, JJ ), 1,
 | |
|      $                        WORK31( 1, JJ-J+1 ), 1 )
 | |
|    80       CONTINUE
 | |
|             IF( J+JB.LE.N ) THEN
 | |
| *
 | |
| *              Apply the row interchanges to the other blocks.
 | |
| *
 | |
|                J2 = MIN( JU-J+1, KV ) - JB
 | |
|                J3 = MAX( 0, JU-J-KV+1 )
 | |
| *
 | |
| *              Use SLASWP to apply the row interchanges to A12, A22, and
 | |
| *              A32.
 | |
| *
 | |
|                CALL SLASWP( J2, AB( KV+1-JB, J+JB ), LDAB-1, 1, JB,
 | |
|      $                      IPIV( J ), 1 )
 | |
| *
 | |
| *              Adjust the pivot indices.
 | |
| *
 | |
|                DO 90 I = J, J + JB - 1
 | |
|                   IPIV( I ) = IPIV( I ) + J - 1
 | |
|    90          CONTINUE
 | |
| *
 | |
| *              Apply the row interchanges to A13, A23, and A33
 | |
| *              columnwise.
 | |
| *
 | |
|                K2 = J - 1 + JB + J2
 | |
|                DO 110 I = 1, J3
 | |
|                   JJ = K2 + I
 | |
|                   DO 100 II = J + I - 1, J + JB - 1
 | |
|                      IP = IPIV( II )
 | |
|                      IF( IP.NE.II ) THEN
 | |
|                         TEMP = AB( KV+1+II-JJ, JJ )
 | |
|                         AB( KV+1+II-JJ, JJ ) = AB( KV+1+IP-JJ, JJ )
 | |
|                         AB( KV+1+IP-JJ, JJ ) = TEMP
 | |
|                      END IF
 | |
|   100             CONTINUE
 | |
|   110          CONTINUE
 | |
| *
 | |
| *              Update the relevant part of the trailing submatrix
 | |
| *
 | |
|                IF( J2.GT.0 ) THEN
 | |
| *
 | |
| *                 Update A12
 | |
| *
 | |
|                   CALL STRSM( 'Left', 'Lower', 'No transpose', 'Unit',
 | |
|      $                        JB, J2, ONE, AB( KV+1, J ), LDAB-1,
 | |
|      $                        AB( KV+1-JB, J+JB ), LDAB-1 )
 | |
| *
 | |
|                   IF( I2.GT.0 ) THEN
 | |
| *
 | |
| *                    Update A22
 | |
| *
 | |
|                      CALL SGEMM( 'No transpose', 'No transpose', I2, J2,
 | |
|      $                           JB, -ONE, AB( KV+1+JB, J ), LDAB-1,
 | |
|      $                           AB( KV+1-JB, J+JB ), LDAB-1, ONE,
 | |
|      $                           AB( KV+1, J+JB ), LDAB-1 )
 | |
|                   END IF
 | |
| *
 | |
|                   IF( I3.GT.0 ) THEN
 | |
| *
 | |
| *                    Update A32
 | |
| *
 | |
|                      CALL SGEMM( 'No transpose', 'No transpose', I3, J2,
 | |
|      $                           JB, -ONE, WORK31, LDWORK,
 | |
|      $                           AB( KV+1-JB, J+JB ), LDAB-1, ONE,
 | |
|      $                           AB( KV+KL+1-JB, J+JB ), LDAB-1 )
 | |
|                   END IF
 | |
|                END IF
 | |
| *
 | |
|                IF( J3.GT.0 ) THEN
 | |
| *
 | |
| *                 Copy the lower triangle of A13 into the work array
 | |
| *                 WORK13
 | |
| *
 | |
|                   DO 130 JJ = 1, J3
 | |
|                      DO 120 II = JJ, JB
 | |
|                         WORK13( II, JJ ) = AB( II-JJ+1, JJ+J+KV-1 )
 | |
|   120                CONTINUE
 | |
|   130             CONTINUE
 | |
| *
 | |
| *                 Update A13 in the work array
 | |
| *
 | |
|                   CALL STRSM( 'Left', 'Lower', 'No transpose', 'Unit',
 | |
|      $                        JB, J3, ONE, AB( KV+1, J ), LDAB-1,
 | |
|      $                        WORK13, LDWORK )
 | |
| *
 | |
|                   IF( I2.GT.0 ) THEN
 | |
| *
 | |
| *                    Update A23
 | |
| *
 | |
|                      CALL SGEMM( 'No transpose', 'No transpose', I2, J3,
 | |
|      $                           JB, -ONE, AB( KV+1+JB, J ), LDAB-1,
 | |
|      $                           WORK13, LDWORK, ONE, AB( 1+JB, J+KV ),
 | |
|      $                           LDAB-1 )
 | |
|                   END IF
 | |
| *
 | |
|                   IF( I3.GT.0 ) THEN
 | |
| *
 | |
| *                    Update A33
 | |
| *
 | |
|                      CALL SGEMM( 'No transpose', 'No transpose', I3, J3,
 | |
|      $                           JB, -ONE, WORK31, LDWORK, WORK13,
 | |
|      $                           LDWORK, ONE, AB( 1+KL, J+KV ), LDAB-1 )
 | |
|                   END IF
 | |
| *
 | |
| *                 Copy the lower triangle of A13 back into place
 | |
| *
 | |
|                   DO 150 JJ = 1, J3
 | |
|                      DO 140 II = JJ, JB
 | |
|                         AB( II-JJ+1, JJ+J+KV-1 ) = WORK13( II, JJ )
 | |
|   140                CONTINUE
 | |
|   150             CONTINUE
 | |
|                END IF
 | |
|             ELSE
 | |
| *
 | |
| *              Adjust the pivot indices.
 | |
| *
 | |
|                DO 160 I = J, J + JB - 1
 | |
|                   IPIV( I ) = IPIV( I ) + J - 1
 | |
|   160          CONTINUE
 | |
|             END IF
 | |
| *
 | |
| *           Partially undo the interchanges in the current block to
 | |
| *           restore the upper triangular form of A31 and copy the upper
 | |
| *           triangle of A31 back into place
 | |
| *
 | |
|             DO 170 JJ = J + JB - 1, J, -1
 | |
|                JP = IPIV( JJ ) - JJ + 1
 | |
|                IF( JP.NE.1 ) THEN
 | |
| *
 | |
| *                 Apply interchange to columns J to JJ-1
 | |
| *
 | |
|                   IF( JP+JJ-1.LT.J+KL ) THEN
 | |
| *
 | |
| *                    The interchange does not affect A31
 | |
| *
 | |
|                      CALL SSWAP( JJ-J, AB( KV+1+JJ-J, J ), LDAB-1,
 | |
|      $                           AB( KV+JP+JJ-J, J ), LDAB-1 )
 | |
|                   ELSE
 | |
| *
 | |
| *                    The interchange does affect A31
 | |
| *
 | |
|                      CALL SSWAP( JJ-J, AB( KV+1+JJ-J, J ), LDAB-1,
 | |
|      $                           WORK31( JP+JJ-J-KL, 1 ), LDWORK )
 | |
|                   END IF
 | |
|                END IF
 | |
| *
 | |
| *              Copy the current column of A31 back into place
 | |
| *
 | |
|                NW = MIN( I3, JJ-J+1 )
 | |
|                IF( NW.GT.0 )
 | |
|      $            CALL SCOPY( NW, WORK31( 1, JJ-J+1 ), 1,
 | |
|      $                        AB( KV+KL+1-JJ+J, JJ ), 1 )
 | |
|   170       CONTINUE
 | |
|   180    CONTINUE
 | |
|       END IF
 | |
| *
 | |
|       RETURN
 | |
| *
 | |
| *     End of SGBTRF
 | |
| *
 | |
|       END
 |