336 lines
		
	
	
		
			10 KiB
		
	
	
	
		
			Fortran
		
	
	
	
			
		
		
	
	
			336 lines
		
	
	
		
			10 KiB
		
	
	
	
		
			Fortran
		
	
	
	
*> \brief \b SSB2ST_KERNELS
 | 
						|
*
 | 
						|
*  @generated from zhb2st_kernels.f, fortran z -> s, Wed Dec  7 08:22:40 2016
 | 
						|
*      
 | 
						|
*  =========== DOCUMENTATION ===========
 | 
						|
*
 | 
						|
* Online html documentation available at 
 | 
						|
*            http://www.netlib.org/lapack/explore-html/ 
 | 
						|
*
 | 
						|
*> \htmlonly
 | 
						|
*> Download SSB2ST_KERNELS + dependencies 
 | 
						|
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/ssb2st_kernels.f"> 
 | 
						|
*> [TGZ]</a> 
 | 
						|
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/ssb2st_kernels.f"> 
 | 
						|
*> [ZIP]</a> 
 | 
						|
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/ssb2st_kernels.f"> 
 | 
						|
*> [TXT]</a>
 | 
						|
*> \endhtmlonly 
 | 
						|
*
 | 
						|
*  Definition:
 | 
						|
*  ===========
 | 
						|
*
 | 
						|
*       SUBROUTINE  SSB2ST_KERNELS( UPLO, WANTZ, TTYPE, 
 | 
						|
*                                   ST, ED, SWEEP, N, NB, IB,
 | 
						|
*                                   A, LDA, V, TAU, LDVT, WORK)
 | 
						|
*
 | 
						|
*       IMPLICIT NONE
 | 
						|
*
 | 
						|
*       .. Scalar Arguments ..
 | 
						|
*       CHARACTER          UPLO
 | 
						|
*       LOGICAL            WANTZ
 | 
						|
*       INTEGER            TTYPE, ST, ED, SWEEP, N, NB, IB, LDA, LDVT
 | 
						|
*       ..
 | 
						|
*       .. Array Arguments ..
 | 
						|
*       REAL               A( LDA, * ), V( * ), 
 | 
						|
*                          TAU( * ), WORK( * )
 | 
						|
*  
 | 
						|
*> \par Purpose:
 | 
						|
*  =============
 | 
						|
*>
 | 
						|
*> \verbatim
 | 
						|
*>
 | 
						|
*> SSB2ST_KERNELS is an internal routine used by the SSYTRD_SB2ST
 | 
						|
*> subroutine.
 | 
						|
*> \endverbatim
 | 
						|
*
 | 
						|
*  Arguments:
 | 
						|
*  ==========
 | 
						|
*
 | 
						|
*> @param[in] n
 | 
						|
*>          The order of the matrix A.
 | 
						|
*>
 | 
						|
*> @param[in] nb
 | 
						|
*>          The size of the band.
 | 
						|
*>
 | 
						|
*> @param[in, out] A
 | 
						|
*>          A pointer to the matrix A.
 | 
						|
*>
 | 
						|
*> @param[in] lda
 | 
						|
*>          The leading dimension of the matrix A.
 | 
						|
*>
 | 
						|
*> @param[out] V
 | 
						|
*>          REAL array, dimension 2*n if eigenvalues only are
 | 
						|
*>          requested or to be queried for vectors.
 | 
						|
*>
 | 
						|
*> @param[out] TAU
 | 
						|
*>          REAL array, dimension (2*n).
 | 
						|
*>          The scalar factors of the Householder reflectors are stored
 | 
						|
*>          in this array.
 | 
						|
*>
 | 
						|
*> @param[in] st
 | 
						|
*>          internal parameter for indices.
 | 
						|
*>
 | 
						|
*> @param[in] ed
 | 
						|
*>          internal parameter for indices.
 | 
						|
*>
 | 
						|
*> @param[in] sweep
 | 
						|
*>          internal parameter for indices.
 | 
						|
*>
 | 
						|
*> @param[in] Vblksiz
 | 
						|
*>          internal parameter for indices.
 | 
						|
*>
 | 
						|
*> @param[in] wantz
 | 
						|
*>          logical which indicate if Eigenvalue are requested or both
 | 
						|
*>          Eigenvalue/Eigenvectors.
 | 
						|
*>
 | 
						|
*> @param[in] work
 | 
						|
*>          Workspace of size nb.
 | 
						|
*>
 | 
						|
*> \par Further Details:
 | 
						|
*  =====================
 | 
						|
*>
 | 
						|
*> \verbatim
 | 
						|
*>
 | 
						|
*>  Implemented by Azzam Haidar.
 | 
						|
*>
 | 
						|
*>  All details are available on technical report, SC11, SC13 papers.
 | 
						|
*>
 | 
						|
*>  Azzam Haidar, Hatem Ltaief, and Jack Dongarra.
 | 
						|
*>  Parallel reduction to condensed forms for symmetric eigenvalue problems
 | 
						|
*>  using aggregated fine-grained and memory-aware kernels. In Proceedings
 | 
						|
*>  of 2011 International Conference for High Performance Computing,
 | 
						|
*>  Networking, Storage and Analysis (SC '11), New York, NY, USA,
 | 
						|
*>  Article 8 , 11 pages.
 | 
						|
*>  http://doi.acm.org/10.1145/2063384.2063394
 | 
						|
*>
 | 
						|
*>  A. Haidar, J. Kurzak, P. Luszczek, 2013.
 | 
						|
*>  An improved parallel singular value algorithm and its implementation 
 | 
						|
*>  for multicore hardware, In Proceedings of 2013 International Conference
 | 
						|
*>  for High Performance Computing, Networking, Storage and Analysis (SC '13).
 | 
						|
*>  Denver, Colorado, USA, 2013.
 | 
						|
*>  Article 90, 12 pages.
 | 
						|
*>  http://doi.acm.org/10.1145/2503210.2503292
 | 
						|
*>
 | 
						|
*>  A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra.
 | 
						|
*>  A novel hybrid CPU-GPU generalized eigensolver for electronic structure 
 | 
						|
*>  calculations based on fine-grained memory aware tasks.
 | 
						|
*>  International Journal of High Performance Computing Applications.
 | 
						|
*>  Volume 28 Issue 2, Pages 196-209, May 2014.
 | 
						|
*>  http://hpc.sagepub.com/content/28/2/196 
 | 
						|
*>
 | 
						|
*> \endverbatim
 | 
						|
*>
 | 
						|
*  =====================================================================
 | 
						|
      SUBROUTINE  SSB2ST_KERNELS( UPLO, WANTZ, TTYPE, 
 | 
						|
     $                            ST, ED, SWEEP, N, NB, IB,
 | 
						|
     $                            A, LDA, V, TAU, LDVT, WORK)
 | 
						|
*
 | 
						|
      IMPLICIT NONE
 | 
						|
*
 | 
						|
*  -- LAPACK computational routine (version 3.7.0) --
 | 
						|
*  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 | 
						|
*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
 | 
						|
*     December 2016
 | 
						|
*
 | 
						|
*     .. Scalar Arguments ..
 | 
						|
      CHARACTER          UPLO
 | 
						|
      LOGICAL            WANTZ
 | 
						|
      INTEGER            TTYPE, ST, ED, SWEEP, N, NB, IB, LDA, LDVT
 | 
						|
*     ..
 | 
						|
*     .. Array Arguments ..
 | 
						|
      REAL               A( LDA, * ), V( * ), 
 | 
						|
     $                   TAU( * ), WORK( * )
 | 
						|
*     ..
 | 
						|
*
 | 
						|
*  =====================================================================
 | 
						|
*
 | 
						|
*     .. Parameters ..
 | 
						|
      REAL               ZERO, ONE
 | 
						|
      PARAMETER          ( ZERO = 0.0E+0,
 | 
						|
     $                   ONE = 1.0E+0 )
 | 
						|
*     ..
 | 
						|
*     .. Local Scalars ..
 | 
						|
      LOGICAL            UPPER
 | 
						|
      INTEGER            I, J1, J2, LM, LN, VPOS, TAUPOS,
 | 
						|
     $                   DPOS, OFDPOS, AJETER 
 | 
						|
      REAL               CTMP 
 | 
						|
*     ..
 | 
						|
*     .. External Subroutines ..
 | 
						|
      EXTERNAL           SLARFG, SLARFX, SLARFY
 | 
						|
*     ..
 | 
						|
*     .. Intrinsic Functions ..
 | 
						|
      INTRINSIC          MOD
 | 
						|
*     .. External Functions ..
 | 
						|
      LOGICAL            LSAME
 | 
						|
      EXTERNAL           LSAME
 | 
						|
*     ..
 | 
						|
*     ..
 | 
						|
*     .. Executable Statements ..
 | 
						|
*      
 | 
						|
      AJETER = IB + LDVT
 | 
						|
      UPPER = LSAME( UPLO, 'U' )
 | 
						|
 | 
						|
      IF( UPPER ) THEN
 | 
						|
          DPOS    = 2 * NB + 1
 | 
						|
          OFDPOS  = 2 * NB
 | 
						|
      ELSE
 | 
						|
          DPOS    = 1
 | 
						|
          OFDPOS  = 2
 | 
						|
      ENDIF
 | 
						|
 | 
						|
*
 | 
						|
*     Upper case
 | 
						|
*
 | 
						|
      IF( UPPER ) THEN
 | 
						|
*
 | 
						|
          IF( WANTZ ) THEN
 | 
						|
              VPOS   = MOD( SWEEP-1, 2 ) * N + ST
 | 
						|
              TAUPOS = MOD( SWEEP-1, 2 ) * N + ST
 | 
						|
          ELSE
 | 
						|
              VPOS   = MOD( SWEEP-1, 2 ) * N + ST
 | 
						|
              TAUPOS = MOD( SWEEP-1, 2 ) * N + ST
 | 
						|
          ENDIF
 | 
						|
*
 | 
						|
          IF( TTYPE.EQ.1 ) THEN
 | 
						|
              LM = ED - ST + 1
 | 
						|
*
 | 
						|
              V( VPOS ) = ONE
 | 
						|
              DO 10 I = 1, LM-1
 | 
						|
                  V( VPOS+I )         = ( A( OFDPOS-I, ST+I ) )
 | 
						|
                  A( OFDPOS-I, ST+I ) = ZERO  
 | 
						|
   10         CONTINUE
 | 
						|
              CTMP = ( A( OFDPOS, ST ) )
 | 
						|
              CALL SLARFG( LM, CTMP, V( VPOS+1 ), 1, 
 | 
						|
     $                                       TAU( TAUPOS ) )
 | 
						|
              A( OFDPOS, ST ) = CTMP
 | 
						|
*
 | 
						|
              LM = ED - ST + 1
 | 
						|
              CALL SLARFY( UPLO, LM, V( VPOS ), 1,
 | 
						|
     $                     ( TAU( TAUPOS ) ),
 | 
						|
     $                     A( DPOS, ST ), LDA-1, WORK)
 | 
						|
          ENDIF
 | 
						|
*
 | 
						|
          IF( TTYPE.EQ.3 ) THEN
 | 
						|
*
 | 
						|
              LM = ED - ST + 1
 | 
						|
              CALL SLARFY( UPLO, LM, V( VPOS ), 1,
 | 
						|
     $                     ( TAU( TAUPOS ) ),
 | 
						|
     $                     A( DPOS, ST ), LDA-1, WORK)
 | 
						|
          ENDIF
 | 
						|
*
 | 
						|
          IF( TTYPE.EQ.2 ) THEN
 | 
						|
              J1 = ED+1
 | 
						|
              J2 = MIN( ED+NB, N )
 | 
						|
              LN = ED-ST+1
 | 
						|
              LM = J2-J1+1
 | 
						|
              IF( LM.GT.0) THEN
 | 
						|
                  CALL SLARFX( 'Left', LN, LM, V( VPOS ),
 | 
						|
     $                         ( TAU( TAUPOS ) ),
 | 
						|
     $                         A( DPOS-NB, J1 ), LDA-1, WORK)
 | 
						|
*
 | 
						|
                  IF( WANTZ ) THEN
 | 
						|
                      VPOS   = MOD( SWEEP-1, 2 ) * N + J1
 | 
						|
                      TAUPOS = MOD( SWEEP-1, 2 ) * N + J1
 | 
						|
                  ELSE
 | 
						|
                      VPOS   = MOD( SWEEP-1, 2 ) * N + J1
 | 
						|
                      TAUPOS = MOD( SWEEP-1, 2 ) * N + J1
 | 
						|
                  ENDIF
 | 
						|
*
 | 
						|
                  V( VPOS ) = ONE
 | 
						|
                  DO 30 I = 1, LM-1
 | 
						|
                      V( VPOS+I )          = 
 | 
						|
     $                                    ( A( DPOS-NB-I, J1+I ) )
 | 
						|
                      A( DPOS-NB-I, J1+I ) = ZERO
 | 
						|
   30             CONTINUE
 | 
						|
                  CTMP = ( A( DPOS-NB, J1 ) )
 | 
						|
                  CALL SLARFG( LM, CTMP, V( VPOS+1 ), 1, TAU( TAUPOS ) )
 | 
						|
                  A( DPOS-NB, J1 ) = CTMP
 | 
						|
*                 
 | 
						|
                  CALL SLARFX( 'Right', LN-1, LM, V( VPOS ),
 | 
						|
     $                         TAU( TAUPOS ),
 | 
						|
     $                         A( DPOS-NB+1, J1 ), LDA-1, WORK)
 | 
						|
              ENDIF
 | 
						|
          ENDIF
 | 
						|
*
 | 
						|
*     Lower case
 | 
						|
*  
 | 
						|
      ELSE
 | 
						|
*      
 | 
						|
          IF( WANTZ ) THEN
 | 
						|
              VPOS   = MOD( SWEEP-1, 2 ) * N + ST
 | 
						|
              TAUPOS = MOD( SWEEP-1, 2 ) * N + ST
 | 
						|
          ELSE
 | 
						|
              VPOS   = MOD( SWEEP-1, 2 ) * N + ST
 | 
						|
              TAUPOS = MOD( SWEEP-1, 2 ) * N + ST
 | 
						|
          ENDIF
 | 
						|
*
 | 
						|
          IF( TTYPE.EQ.1 ) THEN
 | 
						|
              LM = ED - ST + 1
 | 
						|
*
 | 
						|
              V( VPOS ) = ONE
 | 
						|
              DO 20 I = 1, LM-1
 | 
						|
                  V( VPOS+I )         = A( OFDPOS+I, ST-1 )
 | 
						|
                  A( OFDPOS+I, ST-1 ) = ZERO  
 | 
						|
   20         CONTINUE
 | 
						|
              CALL SLARFG( LM, A( OFDPOS, ST-1 ), V( VPOS+1 ), 1, 
 | 
						|
     $                                       TAU( TAUPOS ) )
 | 
						|
*
 | 
						|
              LM = ED - ST + 1
 | 
						|
*
 | 
						|
              CALL SLARFY( UPLO, LM, V( VPOS ), 1,
 | 
						|
     $                     ( TAU( TAUPOS ) ),
 | 
						|
     $                     A( DPOS, ST ), LDA-1, WORK)
 | 
						|
 | 
						|
          ENDIF
 | 
						|
*
 | 
						|
          IF( TTYPE.EQ.3 ) THEN
 | 
						|
              LM = ED - ST + 1
 | 
						|
*
 | 
						|
              CALL SLARFY( UPLO, LM, V( VPOS ), 1,
 | 
						|
     $                     ( TAU( TAUPOS ) ),
 | 
						|
     $                     A( DPOS, ST ), LDA-1, WORK)
 | 
						|
 | 
						|
          ENDIF
 | 
						|
*
 | 
						|
          IF( TTYPE.EQ.2 ) THEN
 | 
						|
              J1 = ED+1
 | 
						|
              J2 = MIN( ED+NB, N )
 | 
						|
              LN = ED-ST+1
 | 
						|
              LM = J2-J1+1
 | 
						|
*
 | 
						|
              IF( LM.GT.0) THEN
 | 
						|
                  CALL SLARFX( 'Right', LM, LN, V( VPOS ), 
 | 
						|
     $                         TAU( TAUPOS ), A( DPOS+NB, ST ),
 | 
						|
     $                         LDA-1, WORK)
 | 
						|
*
 | 
						|
                  IF( WANTZ ) THEN
 | 
						|
                      VPOS   = MOD( SWEEP-1, 2 ) * N + J1
 | 
						|
                      TAUPOS = MOD( SWEEP-1, 2 ) * N + J1
 | 
						|
                  ELSE
 | 
						|
                      VPOS   = MOD( SWEEP-1, 2 ) * N + J1
 | 
						|
                      TAUPOS = MOD( SWEEP-1, 2 ) * N + J1
 | 
						|
                  ENDIF
 | 
						|
*
 | 
						|
                  V( VPOS ) = ONE
 | 
						|
                  DO 40 I = 1, LM-1
 | 
						|
                      V( VPOS+I )        = A( DPOS+NB+I, ST )
 | 
						|
                      A( DPOS+NB+I, ST ) = ZERO
 | 
						|
   40             CONTINUE
 | 
						|
                  CALL SLARFG( LM, A( DPOS+NB, ST ), V( VPOS+1 ), 1, 
 | 
						|
     $                                        TAU( TAUPOS ) )
 | 
						|
*
 | 
						|
                  CALL SLARFX( 'Left', LM, LN-1, V( VPOS ), 
 | 
						|
     $                         ( TAU( TAUPOS ) ),
 | 
						|
     $                         A( DPOS+NB-1, ST+1 ), LDA-1, WORK)
 | 
						|
             
 | 
						|
              ENDIF
 | 
						|
          ENDIF
 | 
						|
      ENDIF
 | 
						|
*
 | 
						|
      RETURN
 | 
						|
*
 | 
						|
*     END OF SSB2ST_KERNELS
 | 
						|
*
 | 
						|
      END      
 |