From 8d57af540b12dc864d49dc45ebd8dc449b10e41b Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Tue, 20 Jun 2023 10:42:46 +0200 Subject: [PATCH 01/15] Add Dynamic Mode Decomposition functions (Reference-LAPACK PR 736) --- lapack-netlib/SRC/cgedmd.f90 | 995 +++++++++++++++++++++++++++++++ lapack-netlib/SRC/cgedmdq.f90 | 689 +++++++++++++++++++++ lapack-netlib/SRC/dgedmd.f90 | 1054 +++++++++++++++++++++++++++++++++ lapack-netlib/SRC/dgedmdq.f90 | 704 ++++++++++++++++++++++ lapack-netlib/SRC/sgedmd.f90 | 1054 +++++++++++++++++++++++++++++++++ lapack-netlib/SRC/sgedmdq.f90 | 703 ++++++++++++++++++++++ lapack-netlib/SRC/zgedmd.f90 | 996 +++++++++++++++++++++++++++++++ lapack-netlib/SRC/zgedmdq.f90 | 689 +++++++++++++++++++++ 8 files changed, 6884 insertions(+) create mode 100644 lapack-netlib/SRC/cgedmd.f90 create mode 100644 lapack-netlib/SRC/cgedmdq.f90 create mode 100644 lapack-netlib/SRC/dgedmd.f90 create mode 100644 lapack-netlib/SRC/dgedmdq.f90 create mode 100644 lapack-netlib/SRC/sgedmd.f90 create mode 100644 lapack-netlib/SRC/sgedmdq.f90 create mode 100644 lapack-netlib/SRC/zgedmd.f90 create mode 100644 lapack-netlib/SRC/zgedmdq.f90 diff --git a/lapack-netlib/SRC/cgedmd.f90 b/lapack-netlib/SRC/cgedmd.f90 new file mode 100644 index 000000000..499489270 --- /dev/null +++ b/lapack-netlib/SRC/cgedmd.f90 @@ -0,0 +1,995 @@ + SUBROUTINE CGEDMD( JOBS, JOBZ, JOBR, JOBF, WHTSVD, & + M, N, X, LDX, Y, LDY, NRNK, TOL, & + K, EIGS, Z, LDZ, RES, B, LDB, & + W, LDW, S, LDS, ZWORK, LZWORK, & + RWORK, LRWORK, IWORK, LIWORK, INFO ) +! March 2023 +!..... + USE iso_fortran_env + IMPLICIT NONE + INTEGER, PARAMETER :: WP = real32 +!..... +! Scalar arguments + CHARACTER, INTENT(IN) :: JOBS, JOBZ, JOBR, JOBF + INTEGER, INTENT(IN) :: WHTSVD, M, N, LDX, LDY, & + NRNK, LDZ, LDB, LDW, LDS, & + LIWORK, LRWORK, LZWORK + INTEGER, INTENT(OUT) :: K, INFO + REAL(KIND=WP), INTENT(IN) :: TOL +! Array arguments + COMPLEX(KIND=WP), INTENT(INOUT) :: X(LDX,*), Y(LDY,*) + COMPLEX(KIND=WP), INTENT(OUT) :: Z(LDZ,*), B(LDB,*), & + W(LDW,*), S(LDS,*) + COMPLEX(KIND=WP), INTENT(OUT) :: EIGS(*) + COMPLEX(KIND=WP), INTENT(OUT) :: ZWORK(*) + REAL(KIND=WP), INTENT(OUT) :: RES(*) + REAL(KIND=WP), INTENT(OUT) :: RWORK(*) + INTEGER, INTENT(OUT) :: IWORK(*) +!............................................................ +! Purpose +! ======= +! CGEDMD computes the Dynamic Mode Decomposition (DMD) for +! a pair of data snapshot matrices. For the input matrices +! X and Y such that Y = A*X with an unaccessible matrix +! A, CGEDMD computes a certain number of Ritz pairs of A using +! the standard Rayleigh-Ritz extraction from a subspace of +! range(X) that is determined using the leading left singular +! vectors of X. Optionally, CGEDMD returns the residuals +! of the computed Ritz pairs, the information needed for +! a refinement of the Ritz vectors, or the eigenvectors of +! the Exact DMD. +! For further details see the references listed +! below. For more details of the implementation see [3]. +! +! References +! ========== +! [1] P. Schmid: Dynamic mode decomposition of numerical +! and experimental data, +! Journal of Fluid Mechanics 656, 5-28, 2010. +! [2] Z. Drmac, I. Mezic, R. Mohr: Data driven modal +! decompositions: analysis and enhancements, +! SIAM J. on Sci. Comp. 40 (4), A2253-A2285, 2018. +! [3] Z. Drmac: A LAPACK implementation of the Dynamic +! Mode Decomposition I. Technical report. AIMDyn Inc. +! and LAPACK Working Note 298. +! [4] J. Tu, C. W. Rowley, D. M. Luchtenburg, S. L. +! Brunton, N. Kutz: On Dynamic Mode Decomposition: +! Theory and Applications, Journal of Computational +! Dynamics 1(2), 391 -421, 2014. +! +!...................................................................... +! Developed and supported by: +! =========================== +! Developed and coded by Zlatko Drmac, Faculty of Science, +! University of Zagreb; drmac@math.hr +! In cooperation with +! AIMdyn Inc., Santa Barbara, CA. +! and supported by +! - DARPA SBIR project "Koopman Operator-Based Forecasting +! for Nonstationary Processes from Near-Term, Limited +! Observational Data" Contract No: W31P4Q-21-C-0007 +! - DARPA PAI project "Physics-Informed Machine Learning +! Methodologies" Contract No: HR0011-18-9-0033 +! - DARPA MoDyL project "A Data-Driven, Operator-Theoretic +! Framework for Space-Time Analysis of Process Dynamics" +! Contract No: HR0011-16-C-0116 +! Any opinions, findings and conclusions or recommendations +! expressed in this material are those of the author and +! do not necessarily reflect the views of the DARPA SBIR +! Program Office +!============================================================ +! Distribution Statement A: +! Approved for Public Release, Distribution Unlimited. +! Cleared by DARPA on September 29, 2022 +!============================================================ +!...................................................................... +! Arguments +! ========= +! JOBS (input) CHARACTER*1 +! Determines whether the initial data snapshots are scaled +! by a diagonal matrix. +! 'S' :: The data snapshots matrices X and Y are multiplied +! with a diagonal matrix D so that X*D has unit +! nonzero columns (in the Euclidean 2-norm) +! 'C' :: The snapshots are scaled as with the 'S' option. +! If it is found that an i-th column of X is zero +! vector and the corresponding i-th column of Y is +! non-zero, then the i-th column of Y is set to +! zero and a warning flag is raised. +! 'Y' :: The data snapshots matrices X and Y are multiplied +! by a diagonal matrix D so that Y*D has unit +! nonzero columns (in the Euclidean 2-norm) +! 'N' :: No data scaling. +!..... +! JOBZ (input) CHARACTER*1 +! Determines whether the eigenvectors (Koopman modes) will +! be computed. +! 'V' :: The eigenvectors (Koopman modes) will be computed +! and returned in the matrix Z. +! See the description of Z. +! 'F' :: The eigenvectors (Koopman modes) will be returned +! in factored form as the product X(:,1:K)*W, where X +! contains a POD basis (leading left singular vectors +! of the data matrix X) and W contains the eigenvectors +! of the corresponding Rayleigh quotient. +! See the descriptions of K, X, W, Z. +! 'N' :: The eigenvectors are not computed. +!..... +! JOBR (input) CHARACTER*1 +! Determines whether to compute the residuals. +! 'R' :: The residuals for the computed eigenpairs will be +! computed and stored in the array RES. +! See the description of RES. +! For this option to be legal, JOBZ must be 'V'. +! 'N' :: The residuals are not computed. +!..... +! JOBF (input) CHARACTER*1 +! Specifies whether to store information needed for post- +! processing (e.g. computing refined Ritz vectors) +! 'R' :: The matrix needed for the refinement of the Ritz +! vectors is computed and stored in the array B. +! See the description of B. +! 'E' :: The unscaled eigenvectors of the Exact DMD are +! computed and returned in the array B. See the +! description of B. +! 'N' :: No eigenvector refinement data is computed. +!..... +! WHTSVD (input) INTEGER, WHSTVD in { 1, 2, 3, 4 } +! Allows for a selection of the SVD algorithm from the +! LAPACK library. +! 1 :: CGESVD (the QR SVD algorithm) +! 2 :: CGESDD (the Divide and Conquer algorithm; if enough +! workspace available, this is the fastest option) +! 3 :: CGESVDQ (the preconditioned QR SVD ; this and 4 +! are the most accurate options) +! 4 :: CGEJSV (the preconditioned Jacobi SVD; this and 3 +! are the most accurate options) +! For the four methods above, a significant difference in +! the accuracy of small singular values is possible if +! the snapshots vary in norm so that X is severely +! ill-conditioned. If small (smaller than EPS*||X||) +! singular values are of interest and JOBS=='N', then +! the options (3, 4) give the most accurate results, where +! the option 4 is slightly better and with stronger +! theoretical background. +! If JOBS=='S', i.e. the columns of X will be normalized, +! then all methods give nearly equally accurate results. +!..... +! M (input) INTEGER, M>= 0 +! The state space dimension (the row dimension of X, Y). +!..... +! N (input) INTEGER, 0 <= N <= M +! The number of data snapshot pairs +! (the number of columns of X and Y). +!..... +! X (input/output) COMPLEX(KIND=WP) M-by-N array +! > On entry, X contains the data snapshot matrix X. It is +! assumed that the column norms of X are in the range of +! the normalized floating point numbers. +! < On exit, the leading K columns of X contain a POD basis, +! i.e. the leading K left singular vectors of the input +! data matrix X, U(:,1:K). All N columns of X contain all +! left singular vectors of the input matrix X. +! See the descriptions of K, Z and W. +!..... +! LDX (input) INTEGER, LDX >= M +! The leading dimension of the array X. +!..... +! Y (input/workspace/output) COMPLEX(KIND=WP) M-by-N array +! > On entry, Y contains the data snapshot matrix Y +! < On exit, +! If JOBR == 'R', the leading K columns of Y contain +! the residual vectors for the computed Ritz pairs. +! See the description of RES. +! If JOBR == 'N', Y contains the original input data, +! scaled according to the value of JOBS. +!..... +! LDY (input) INTEGER , LDY >= M +! The leading dimension of the array Y. +!..... +! NRNK (input) INTEGER +! Determines the mode how to compute the numerical rank, +! i.e. how to truncate small singular values of the input +! matrix X. On input, if +! NRNK = -1 :: i-th singular value sigma(i) is truncated +! if sigma(i) <= TOL*sigma(1) +! This option is recommended. +! NRNK = -2 :: i-th singular value sigma(i) is truncated +! if sigma(i) <= TOL*sigma(i-1) +! This option is included for R&D purposes. +! It requires highly accurate SVD, which +! may not be feasible. +! The numerical rank can be enforced by using positive +! value of NRNK as follows: +! 0 < NRNK <= N :: at most NRNK largest singular values +! will be used. If the number of the computed nonzero +! singular values is less than NRNK, then only those +! nonzero values will be used and the actually used +! dimension is less than NRNK. The actual number of +! the nonzero singular values is returned in the variable +! K. See the descriptions of TOL and K. +!..... +! TOL (input) REAL(KIND=WP), 0 <= TOL < 1 +! The tolerance for truncating small singular values. +! See the description of NRNK. +!..... +! K (output) INTEGER, 0 <= K <= N +! The dimension of the POD basis for the data snapshot +! matrix X and the number of the computed Ritz pairs. +! The value of K is determined according to the rule set +! by the parameters NRNK and TOL. +! See the descriptions of NRNK and TOL. +!..... +! EIGS (output) COMPLEX(KIND=WP) N-by-1 array +! The leading K (K<=N) entries of EIGS contain +! the computed eigenvalues (Ritz values). +! See the descriptions of K, and Z. +!..... +! Z (workspace/output) COMPLEX(KIND=WP) M-by-N array +! If JOBZ =='V' then Z contains the Ritz vectors. Z(:,i) +! is an eigenvector of the i-th Ritz value; ||Z(:,i)||_2=1. +! If JOBZ == 'F', then the Z(:,i)'s are given implicitly as +! the columns of X(:,1:K)*W(1:K,1:K), i.e. X(:,1:K)*W(:,i) +! is an eigenvector corresponding to EIGS(i). The columns +! of W(1:k,1:K) are the computed eigenvectors of the +! K-by-K Rayleigh quotient. +! See the descriptions of EIGS, X and W. +!..... +! LDZ (input) INTEGER , LDZ >= M +! The leading dimension of the array Z. +!..... +! RES (output) REAL(KIND=WP) N-by-1 array +! RES(1:K) contains the residuals for the K computed +! Ritz pairs, +! RES(i) = || A * Z(:,i) - EIGS(i)*Z(:,i))||_2. +! See the description of EIGS and Z. +!..... +! B (output) COMPLEX(KIND=WP) M-by-N array. +! IF JOBF =='R', B(1:M,1:K) contains A*U(:,1:K), and can +! be used for computing the refined vectors; see further +! details in the provided references. +! If JOBF == 'E', B(1:M,1:K) contains +! A*U(:,1:K)*W(1:K,1:K), which are the vectors from the +! Exact DMD, up to scaling by the inverse eigenvalues. +! If JOBF =='N', then B is not referenced. +! See the descriptions of X, W, K. +!..... +! LDB (input) INTEGER, LDB >= M +! The leading dimension of the array B. +!..... +! W (workspace/output) COMPLEX(KIND=WP) N-by-N array +! On exit, W(1:K,1:K) contains the K computed +! eigenvectors of the matrix Rayleigh quotient. +! The Ritz vectors (returned in Z) are the +! product of X (containing a POD basis for the input +! matrix X) and W. See the descriptions of K, S, X and Z. +! W is also used as a workspace to temporarily store the +! right singular vectors of X. +!..... +! LDW (input) INTEGER, LDW >= N +! The leading dimension of the array W. +!..... +! S (workspace/output) COMPLEX(KIND=WP) N-by-N array +! The array S(1:K,1:K) is used for the matrix Rayleigh +! quotient. This content is overwritten during +! the eigenvalue decomposition by CGEEV. +! See the description of K. +!..... +! LDS (input) INTEGER, LDS >= N +! The leading dimension of the array S. +!..... +! ZWORK (workspace/output) COMPLEX(KIND=WP) LZWORK-by-1 array +! ZWORK is used as complex workspace in the complex SVD, as +! specified by WHTSVD (1,2, 3 or 4) and for CGEEV for computing +! the eigenvalues of a Rayleigh quotient. +! If the call to CGEDMD is only workspace query, then +! ZWORK(1) contains the minimal complex workspace length and +! ZWORK(2) is the optimal complex workspace length. +! Hence, the length of work is at least 2. +! See the description of LZWORK. +!..... +! LZWORK (input) INTEGER +! The minimal length of the workspace vector ZWORK. +! LZWORK is calculated as MAX(LZWORK_SVD, LZWORK_CGEEV), +! where LZWORK_CGEEV = MAX( 1, 2*N ) and the minimal +! LZWORK_SVD is calculated as follows +! If WHTSVD == 1 :: CGESVD :: +! LZWORK_SVD = MAX(1,2*MIN(M,N)+MAX(M,N)) +! If WHTSVD == 2 :: CGESDD :: +! LZWORK_SVD = 2*MIN(M,N)*MIN(M,N)+2*MIN(M,N)+MAX(M,N) +! If WHTSVD == 3 :: CGESVDQ :: +! LZWORK_SVD = obtainable by a query +! If WHTSVD == 4 :: CGEJSV :: +! LZWORK_SVD = obtainable by a query +! If on entry LZWORK = -1, then a workspace query is +! assumed and the procedure only computes the minimal +! and the optimal workspace lengths and returns them in +! LZWORK(1) and LZWORK(2), respectively. +!..... +! RWORK (workspace/output) REAL(KIND=WP) LRWORK-by-1 array +! On exit, RWORK(1:N) contains the singular values of +! X (for JOBS=='N') or column scaled X (JOBS=='S', 'C'). +! If WHTSVD==4, then RWORK(N+1) and RWORK(N+2) contain +! scaling factor RWORK(N+2)/RWORK(N+1) used to scale X +! and Y to avoid overflow in the SVD of X. +! This may be of interest if the scaling option is off +! and as many as possible smallest eigenvalues are +! desired to the highest feasible accuracy. +! If the call to CGEDMD is only workspace query, then +! RWORK(1) contains the minimal workspace length. +! See the description of LRWORK. +!..... +! LRWORK (input) INTEGER +! The minimal length of the workspace vector RWORK. +! LRWORK is calculated as follows: +! LRWORK = MAX(1, N+LRWORK_SVD,N+LRWORK_CGEEV), where +! LRWORK_CGEEV = MAX(1,2*N) and RWORK_SVD is the real workspace +! for the SVD subroutine determined by the input parameter +! WHTSVD. +! If WHTSVD == 1 :: CGESVD :: +! LRWORK_SVD = 5*MIN(M,N) +! If WHTSVD == 2 :: CGESDD :: +! LRWORK_SVD = MAX(5*MIN(M,N)*MIN(M,N)+7*MIN(M,N), +! 2*MAX(M,N)*MIN(M,N)+2*MIN(M,N)*MIN(M,N)+MIN(M,N) ) ) +! If WHTSVD == 3 :: CGESVDQ :: +! LRWORK_SVD = obtainable by a query +! If WHTSVD == 4 :: CGEJSV :: +! LRWORK_SVD = obtainable by a query +! If on entry LRWORK = -1, then a workspace query is +! assumed and the procedure only computes the minimal +! real workspace length and returns it in RWORK(1). +!..... +! IWORK (workspace/output) INTEGER LIWORK-by-1 array +! Workspace that is required only if WHTSVD equals +! 2 , 3 or 4. (See the description of WHTSVD). +! If on entry LWORK =-1 or LIWORK=-1, then the +! minimal length of IWORK is computed and returned in +! IWORK(1). See the description of LIWORK. +!..... +! LIWORK (input) INTEGER +! The minimal length of the workspace vector IWORK. +! If WHTSVD == 1, then only IWORK(1) is used; LIWORK >=1 +! If WHTSVD == 2, then LIWORK >= MAX(1,8*MIN(M,N)) +! If WHTSVD == 3, then LIWORK >= MAX(1,M+N-1) +! If WHTSVD == 4, then LIWORK >= MAX(3,M+3*N) +! If on entry LIWORK = -1, then a workspace query is +! assumed and the procedure only computes the minimal +! and the optimal workspace lengths for ZWORK, RWORK and +! IWORK. See the descriptions of ZWORK, RWORK and IWORK. +!..... +! INFO (output) INTEGER +! -i < 0 :: On entry, the i-th argument had an +! illegal value +! = 0 :: Successful return. +! = 1 :: Void input. Quick exit (M=0 or N=0). +! = 2 :: The SVD computation of X did not converge. +! Suggestion: Check the input data and/or +! repeat with different WHTSVD. +! = 3 :: The computation of the eigenvalues did not +! converge. +! = 4 :: If data scaling was requested on input and +! the procedure found inconsistency in the data +! such that for some column index i, +! X(:,i) = 0 but Y(:,i) /= 0, then Y(:,i) is set +! to zero if JOBS=='C'. The computation proceeds +! with original or modified data and warning +! flag is set with INFO=4. +!............................................................. +!............................................................. +! Parameters +! ~~~~~~~~~~ + REAL(KIND=WP), PARAMETER :: ONE = 1.0_WP + REAL(KIND=WP), PARAMETER :: ZERO = 0.0_WP + COMPLEX(KIND=WP), PARAMETER :: ZONE = ( 1.0_WP, 0.0_WP ) + COMPLEX(KIND=WP), PARAMETER :: ZZERO = ( 0.0_WP, 0.0_WP ) + +! Local scalars +! ~~~~~~~~~~~~~ + REAL(KIND=WP) :: OFL, ROOTSC, SCALE, SMALL, & + SSUM, XSCL1, XSCL2 + INTEGER :: i, j, IMINWR, INFO1, INFO2, & + LWRKEV, LWRSDD, LWRSVD, LWRSVJ, & + LWRSVQ, MLWORK, MWRKEV, MWRSDD, & + MWRSVD, MWRSVJ, MWRSVQ, NUMRNK, & + OLWORK, MLRWRK + LOGICAL :: BADXY, LQUERY, SCCOLX, SCCOLY, & + WNTEX, WNTREF, WNTRES, WNTVEC + CHARACTER :: JOBZL, T_OR_N + CHARACTER :: JSVOPT +! +! Local arrays +! ~~~~~~~~~~~~ + REAL(KIND=WP) :: RDUMMY(2) + +! External functions (BLAS and LAPACK) +! ~~~~~~~~~~~~~~~~~ + REAL(KIND=WP) CLANGE, SLAMCH, SCNRM2 + EXTERNAL CLANGE, SLAMCH, SCNRM2, ICAMAX + INTEGER ICAMAX + LOGICAL SISNAN, LSAME + EXTERNAL SISNAN, LSAME + +! External subroutines (BLAS and LAPACK) +! ~~~~~~~~~~~~~~~~~~~~ + EXTERNAL CAXPY, CGEMM, CSSCAL + EXTERNAL CGEEV, CGEJSV, CGESDD, CGESVD, CGESVDQ, & + CLACPY, CLASCL, CLASSQ, XERBLA + +! Intrinsic functions +! ~~~~~~~~~~~~~~~~~~~ + INTRINSIC FLOAT, INT, MAX, SQRT +!............................................................ +! +! Test the input arguments +! + WNTRES = LSAME(JOBR,'R') + SCCOLX = LSAME(JOBS,'S') .OR. LSAME(JOBS,'C') + SCCOLY = LSAME(JOBS,'Y') + WNTVEC = LSAME(JOBZ,'V') + WNTREF = LSAME(JOBF,'R') + WNTEX = LSAME(JOBF,'E') + INFO = 0 + LQUERY = ( ( LZWORK == -1 ) .OR. ( LIWORK == -1 ) & + .OR. ( LRWORK == -1 ) ) +! + IF ( .NOT. (SCCOLX .OR. SCCOLY .OR. & + LSAME(JOBS,'N')) ) THEN + INFO = -1 + ELSE IF ( .NOT. (WNTVEC .OR. LSAME(JOBZ,'N') & + .OR. LSAME(JOBZ,'F')) ) THEN + INFO = -2 + ELSE IF ( .NOT. (WNTRES .OR. LSAME(JOBR,'N')) .OR. & + ( WNTRES .AND. (.NOT.WNTVEC) ) ) THEN + INFO = -3 + ELSE IF ( .NOT. (WNTREF .OR. WNTEX .OR. & + LSAME(JOBF,'N') ) ) THEN + INFO = -4 + ELSE IF ( .NOT.((WHTSVD == 1) .OR. (WHTSVD == 2) .OR. & + (WHTSVD == 3) .OR. (WHTSVD == 4) )) THEN + INFO = -5 + ELSE IF ( M < 0 ) THEN + INFO = -6 + ELSE IF ( ( N < 0 ) .OR. ( N > M ) ) THEN + INFO = -7 + ELSE IF ( LDX < M ) THEN + INFO = -9 + ELSE IF ( LDY < M ) THEN + INFO = -11 + ELSE IF ( .NOT. (( NRNK == -2).OR.(NRNK == -1).OR. & + ((NRNK >= 1).AND.(NRNK <=N ))) ) THEN + INFO = -12 + ELSE IF ( ( TOL < ZERO ) .OR. ( TOL >= ONE ) ) THEN + INFO = -13 + ELSE IF ( LDZ < M ) THEN + INFO = -17 + ELSE IF ( (WNTREF .OR. WNTEX ) .AND. ( LDB < M ) ) THEN + INFO = -20 + ELSE IF ( LDW < N ) THEN + INFO = -22 + ELSE IF ( LDS < N ) THEN + INFO = -24 + END IF +! + IF ( INFO == 0 ) THEN + ! Compute the minimal and the optimal workspace + ! requirements. Simulate running the code and + ! determine minimal and optimal sizes of the + ! workspace at any moment of the run. + IF ( N == 0 ) THEN + ! Quick return. All output except K is void. + ! INFO=1 signals the void input. + ! In case of a workspace query, the default + ! minimal workspace lengths are returned. + IF ( LQUERY ) THEN + IWORK(1) = 1 + RWORK(1) = 1 + ZWORK(1) = 2 + ZWORK(2) = 2 + ELSE + K = 0 + END IF + INFO = 1 + RETURN + END IF + + IMINWR = 1 + MLRWRK = MAX(1,N) + MLWORK = 2 + OLWORK = 2 + SELECT CASE ( WHTSVD ) + CASE (1) + ! The following is specified as the minimal + ! length of WORK in the definition of CGESVD: + ! MWRSVD = MAX(1,2*MIN(M,N)+MAX(M,N)) + MWRSVD = MAX(1,2*MIN(M,N)+MAX(M,N)) + MLWORK = MAX(MLWORK,MWRSVD) + MLRWRK = MAX(MLRWRK,N + 5*MIN(M,N)) + IF ( LQUERY ) THEN + CALL CGESVD( 'O', 'S', M, N, X, LDX, RWORK, & + B, LDB, W, LDW, ZWORK, -1, RDUMMY, INFO1 ) + LWRSVD = INT( ZWORK(1) ) + OLWORK = MAX(OLWORK,LWRSVD) + END IF + CASE (2) + ! The following is specified as the minimal + ! length of WORK in the definition of CGESDD: + ! MWRSDD = 2*min(M,N)*min(M,N)+2*min(M,N)+max(M,N). + ! RWORK length: 5*MIN(M,N)*MIN(M,N)+7*MIN(M,N) + ! In LAPACK 3.10.1 RWORK is defined differently. + ! Below we take max over the two versions. + ! IMINWR = 8*MIN(M,N) + MWRSDD = 2*MIN(M,N)*MIN(M,N)+2*MIN(M,N)+MAX(M,N) + MLWORK = MAX(MLWORK,MWRSDD) + IMINWR = 8*MIN(M,N) + MLRWRK = MAX( MLRWRK, N + & + MAX( 5*MIN(M,N)*MIN(M,N)+7*MIN(M,N), & + 5*MIN(M,N)*MIN(M,N)+5*MIN(M,N), & + 2*MAX(M,N)*MIN(M,N)+ & + 2*MIN(M,N)*MIN(M,N)+MIN(M,N) ) ) + IF ( LQUERY ) THEN + CALL CGESDD( 'O', M, N, X, LDX, RWORK, B, & + LDB, W, LDW, ZWORK, -1, RDUMMY, IWORK, INFO1 ) + LWRSDD = MAX(MWRSDD,INT( ZWORK(1) )) + OLWORK = MAX(OLWORK,LWRSDD) + END IF + CASE (3) + CALL CGESVDQ( 'H', 'P', 'N', 'R', 'R', M, N, & + X, LDX, RWORK, Z, LDZ, W, LDW, NUMRNK, & + IWORK, -1, ZWORK, -1, RDUMMY, -1, INFO1 ) + IMINWR = IWORK(1) + MWRSVQ = INT(ZWORK(2)) + MLWORK = MAX(MLWORK,MWRSVQ) + MLRWRK = MAX(MLRWRK,N + INT(RDUMMY(1))) + IF ( LQUERY ) THEN + LWRSVQ = INT(ZWORK(1)) + OLWORK = MAX(OLWORK,LWRSVQ) + END IF + CASE (4) + JSVOPT = 'J' + CALL CGEJSV( 'F', 'U', JSVOPT, 'N', 'N', 'P', M, & + N, X, LDX, RWORK, Z, LDZ, W, LDW, & + ZWORK, -1, RDUMMY, -1, IWORK, INFO1 ) + IMINWR = IWORK(1) + MWRSVJ = INT(ZWORK(2)) + MLWORK = MAX(MLWORK,MWRSVJ) + MLRWRK = MAX(MLRWRK,N + MAX(7,INT(RDUMMY(1)))) + IF ( LQUERY ) THEN + LWRSVJ = INT(ZWORK(1)) + OLWORK = MAX(OLWORK,LWRSVJ) + END IF + END SELECT + IF ( WNTVEC .OR. WNTEX .OR. LSAME(JOBZ,'F') ) THEN + JOBZL = 'V' + ELSE + JOBZL = 'N' + END IF + ! Workspace calculation to the CGEEV call + MWRKEV = MAX( 1, 2*N ) + MLWORK = MAX(MLWORK,MWRKEV) + MLRWRK = MAX(MLRWRK,N+2*N) + IF ( LQUERY ) THEN + CALL CGEEV( 'N', JOBZL, N, S, LDS, EIGS, & + W, LDW, W, LDW, ZWORK, -1, RWORK, INFO1 ) ! LAPACK CALL + LWRKEV = INT(ZWORK(1)) + OLWORK = MAX( OLWORK, LWRKEV ) + OLWORK = MAX( 2, OLWORK ) + END IF +! + IF ( LIWORK < IMINWR .AND. (.NOT.LQUERY) ) INFO = -30 + IF ( LRWORK < MLRWRK .AND. (.NOT.LQUERY) ) INFO = -28 + IF ( LZWORK < MLWORK .AND. (.NOT.LQUERY) ) INFO = -26 + + END IF +! + IF( INFO /= 0 ) THEN + CALL XERBLA( 'CGEDMD', -INFO ) + RETURN + ELSE IF ( LQUERY ) THEN +! Return minimal and optimal workspace sizes + IWORK(1) = IMINWR + RWORK(1) = MLRWRK + ZWORK(1) = MLWORK + ZWORK(2) = OLWORK + RETURN + END IF +!............................................................ +! + OFL = SLAMCH('O')*SLAMCH('P') + SMALL = SLAMCH('S') + BADXY = .FALSE. +! +! <1> Optional scaling of the snapshots (columns of X, Y) +! ========================================================== + IF ( SCCOLX ) THEN + ! The columns of X will be normalized. + ! To prevent overflows, the column norms of X are + ! carefully computed using CLASSQ. + K = 0 + DO i = 1, N + !WORK(i) = SCNRM2( M, X(1,i), 1 ) + SCALE = ZERO + CALL CLASSQ( M, X(1,i), 1, SCALE, SSUM ) + IF ( SISNAN(SCALE) .OR. SISNAN(SSUM) ) THEN + K = 0 + INFO = -8 + CALL XERBLA('CGEDMD',-INFO) + END IF + IF ( (SCALE /= ZERO) .AND. (SSUM /= ZERO) ) THEN + ROOTSC = SQRT(SSUM) + IF ( SCALE .GE. (OFL / ROOTSC) ) THEN +! Norm of X(:,i) overflows. First, X(:,i) +! is scaled by +! ( ONE / ROOTSC ) / SCALE = 1/||X(:,i)||_2. +! Next, the norm of X(:,i) is stored without +! overflow as WORK(i) = - SCALE * (ROOTSC/M), +! the minus sign indicating the 1/M factor. +! Scaling is performed without overflow, and +! underflow may occur in the smallest entries +! of X(:,i). The relative backward and forward +! errors are small in the ell_2 norm. + CALL CLASCL( 'G', 0, 0, SCALE, ONE/ROOTSC, & + M, 1, X(1,i), LDX, INFO2 ) + RWORK(i) = - SCALE * ( ROOTSC / FLOAT(M) ) + ELSE +! X(:,i) will be scaled to unit 2-norm + RWORK(i) = SCALE * ROOTSC + CALL CLASCL( 'G',0, 0, RWORK(i), ONE, M, 1, & + X(1,i), LDX, INFO2 ) ! LAPACK CALL +! X(1:M,i) = (ONE/RWORK(i)) * X(1:M,i) ! INTRINSIC + END IF + ELSE + RWORK(i) = ZERO + K = K + 1 + END IF + END DO + IF ( K == N ) THEN + ! All columns of X are zero. Return error code -8. + ! (the 8th input variable had an illegal value) + K = 0 + INFO = -8 + CALL XERBLA('CGEDMD',-INFO) + RETURN + END IF + DO i = 1, N +! Now, apply the same scaling to the columns of Y. + IF ( RWORK(i) > ZERO ) THEN + CALL CSSCAL( M, ONE/RWORK(i), Y(1,i), 1 ) ! BLAS CALL +! Y(1:M,i) = (ONE/RWORK(i)) * Y(1:M,i) ! INTRINSIC + ELSE IF ( RWORK(i) < ZERO ) THEN + CALL CLASCL( 'G', 0, 0, -RWORK(i), & + ONE/FLOAT(M), M, 1, Y(1,i), LDY, INFO2 ) ! LAPACK CALL + ELSE IF ( ABS(Y(ICAMAX(M, Y(1,i),1),i )) & + /= ZERO ) THEN +! X(:,i) is zero vector. For consistency, +! Y(:,i) should also be zero. If Y(:,i) is not +! zero, then the data might be inconsistent or +! corrupted. If JOBS == 'C', Y(:,i) is set to +! zero and a warning flag is raised. +! The computation continues but the +! situation will be reported in the output. + BADXY = .TRUE. + IF ( LSAME(JOBS,'C')) & + CALL CSSCAL( M, ZERO, Y(1,i), 1 ) ! BLAS CALL + END IF + END DO + END IF + ! + IF ( SCCOLY ) THEN + ! The columns of Y will be normalized. + ! To prevent overflows, the column norms of Y are + ! carefully computed using CLASSQ. + DO i = 1, N + !RWORK(i) = SCNRM2( M, Y(1,i), 1 ) + SCALE = ZERO + CALL CLASSQ( M, Y(1,i), 1, SCALE, SSUM ) + IF ( SISNAN(SCALE) .OR. SISNAN(SSUM) ) THEN + K = 0 + INFO = -10 + CALL XERBLA('CGEDMD',-INFO) + END IF + IF ( SCALE /= ZERO .AND. (SSUM /= ZERO) ) THEN + ROOTSC = SQRT(SSUM) + IF ( SCALE .GE. (OFL / ROOTSC) ) THEN +! Norm of Y(:,i) overflows. First, Y(:,i) +! is scaled by +! ( ONE / ROOTSC ) / SCALE = 1/||Y(:,i)||_2. +! Next, the norm of Y(:,i) is stored without +! overflow as RWORK(i) = - SCALE * (ROOTSC/M), +! the minus sign indicating the 1/M factor. +! Scaling is performed without overflow, and +! underflow may occur in the smallest entries +! of Y(:,i). The relative backward and forward +! errors are small in the ell_2 norm. + CALL CLASCL( 'G', 0, 0, SCALE, ONE/ROOTSC, & + M, 1, Y(1,i), LDY, INFO2 ) + RWORK(i) = - SCALE * ( ROOTSC / FLOAT(M) ) + ELSE +! Y(:,i) will be scaled to unit 2-norm + RWORK(i) = SCALE * ROOTSC + CALL CLASCL( 'G',0, 0, RWORK(i), ONE, M, 1, & + Y(1,i), LDY, INFO2 ) ! LAPACK CALL +! Y(1:M,i) = (ONE/RWORK(i)) * Y(1:M,i) ! INTRINSIC + END IF + ELSE + RWORK(i) = ZERO + END IF + END DO + DO i = 1, N +! Now, apply the same scaling to the columns of X. + IF ( RWORK(i) > ZERO ) THEN + CALL CSSCAL( M, ONE/RWORK(i), X(1,i), 1 ) ! BLAS CALL +! X(1:M,i) = (ONE/RWORK(i)) * X(1:M,i) ! INTRINSIC + ELSE IF ( RWORK(i) < ZERO ) THEN + CALL CLASCL( 'G', 0, 0, -RWORK(i), & + ONE/FLOAT(M), M, 1, X(1,i), LDX, INFO2 ) ! LAPACK CALL + ELSE IF ( ABS(X(ICAMAX(M, X(1,i),1),i )) & + /= ZERO ) THEN +! Y(:,i) is zero vector. If X(:,i) is not +! zero, then a warning flag is raised. +! The computation continues but the +! situation will be reported in the output. + BADXY = .TRUE. + END IF + END DO + END IF +! +! <2> SVD of the data snapshot matrix X. +! ===================================== +! The left singular vectors are stored in the array X. +! The right singular vectors are in the array W. +! The array W will later on contain the eigenvectors +! of a Rayleigh quotient. + NUMRNK = N + SELECT CASE ( WHTSVD ) + CASE (1) + CALL CGESVD( 'O', 'S', M, N, X, LDX, RWORK, B, & + LDB, W, LDW, ZWORK, LZWORK, RWORK(N+1), INFO1 ) ! LAPACK CALL + T_OR_N = 'C' + CASE (2) + CALL CGESDD( 'O', M, N, X, LDX, RWORK, B, LDB, W, & + LDW, ZWORK, LZWORK, RWORK(N+1), IWORK, INFO1 ) ! LAPACK CALL + T_OR_N = 'C' + CASE (3) + CALL CGESVDQ( 'H', 'P', 'N', 'R', 'R', M, N, & + X, LDX, RWORK, Z, LDZ, W, LDW, & + NUMRNK, IWORK, LIWORK, ZWORK, & + LZWORK, RWORK(N+1), LRWORK-N, INFO1) ! LAPACK CALL + CALL CLACPY( 'A', M, NUMRNK, Z, LDZ, X, LDX ) ! LAPACK CALL + T_OR_N = 'C' + CASE (4) + CALL CGEJSV( 'F', 'U', JSVOPT, 'N', 'N', 'P', M, & + N, X, LDX, RWORK, Z, LDZ, W, LDW, & + ZWORK, LZWORK, RWORK(N+1), LRWORK-N, IWORK, INFO1 ) ! LAPACK CALL + CALL CLACPY( 'A', M, N, Z, LDZ, X, LDX ) ! LAPACK CALL + T_OR_N = 'N' + XSCL1 = RWORK(N+1) + XSCL2 = RWORK(N+2) + IF ( XSCL1 /= XSCL2 ) THEN + ! This is an exceptional situation. If the + ! data matrices are not scaled and the + ! largest singular value of X overflows. + ! In that case CGEJSV can return the SVD + ! in scaled form. The scaling factor can be used + ! to rescale the data (X and Y). + CALL CLASCL( 'G', 0, 0, XSCL1, XSCL2, M, N, Y, LDY, INFO2 ) + END IF + END SELECT +! + IF ( INFO1 > 0 ) THEN + ! The SVD selected subroutine did not converge. + ! Return with an error code. + INFO = 2 + RETURN + END IF +! + IF ( RWORK(1) == ZERO ) THEN + ! The largest computed singular value of (scaled) + ! X is zero. Return error code -8 + ! (the 8th input variable had an illegal value). + K = 0 + INFO = -8 + CALL XERBLA('CGEDMD',-INFO) + RETURN + END IF +! + !<3> Determine the numerical rank of the data + ! snapshots matrix X. This depends on the + ! parameters NRNK and TOL. + + SELECT CASE ( NRNK ) + CASE ( -1 ) + K = 1 + DO i = 2, NUMRNK + IF ( ( RWORK(i) <= RWORK(1)*TOL ) .OR. & + ( RWORK(i) <= SMALL ) ) EXIT + K = K + 1 + END DO + CASE ( -2 ) + K = 1 + DO i = 1, NUMRNK-1 + IF ( ( RWORK(i+1) <= RWORK(i)*TOL ) .OR. & + ( RWORK(i) <= SMALL ) ) EXIT + K = K + 1 + END DO + CASE DEFAULT + K = 1 + DO i = 2, NRNK + IF ( RWORK(i) <= SMALL ) EXIT + K = K + 1 + END DO + END SELECT + ! Now, U = X(1:M,1:K) is the SVD/POD basis for the + ! snapshot data in the input matrix X. + + !<4> Compute the Rayleigh quotient S = U^H * A * U. + ! Depending on the requested outputs, the computation + ! is organized to compute additional auxiliary + ! matrices (for the residuals and refinements). + ! + ! In all formulas below, we need V_k*Sigma_k^(-1) + ! where either V_k is in W(1:N,1:K), or V_k^H is in + ! W(1:K,1:N). Here Sigma_k=diag(WORK(1:K)). + IF ( LSAME(T_OR_N, 'N') ) THEN + DO i = 1, K + CALL CSSCAL( N, ONE/RWORK(i), W(1,i), 1 ) ! BLAS CALL + ! W(1:N,i) = (ONE/RWORK(i)) * W(1:N,i) ! INTRINSIC + END DO + ELSE + ! This non-unit stride access is due to the fact + ! that CGESVD, CGESVDQ and CGESDD return the + ! adjoint matrix of the right singular vectors. + !DO i = 1, K + ! CALL DSCAL( N, ONE/RWORK(i), W(i,1), LDW ) ! BLAS CALL + ! ! W(i,1:N) = (ONE/RWORK(i)) * W(i,1:N) ! INTRINSIC + !END DO + DO i = 1, K + RWORK(N+i) = ONE/RWORK(i) + END DO + DO j = 1, N + DO i = 1, K + W(i,j) = CMPLX(RWORK(N+i),ZERO,KIND=WP)*W(i,j) + END DO + END DO + END IF +! + IF ( WNTREF ) THEN + ! + ! Need A*U(:,1:K)=Y*V_k*inv(diag(WORK(1:K))) + ! for computing the refined Ritz vectors + ! (optionally, outside CGEDMD). + CALL CGEMM( 'N', T_OR_N, M, K, N, ZONE, Y, LDY, W, & + LDW, ZZERO, Z, LDZ ) ! BLAS CALL + ! Z(1:M,1:K)=MATMUL(Y(1:M,1:N),TRANSPOSE(W(1:K,1:N))) ! INTRINSIC, for T_OR_N=='T' + ! Z(1:M,1:K)=MATMUL(Y(1:M,1:N),W(1:N,1:K)) ! INTRINSIC, for T_OR_N=='N' + ! + ! At this point Z contains + ! A * U(:,1:K) = Y * V_k * Sigma_k^(-1), and + ! this is needed for computing the residuals. + ! This matrix is returned in the array B and + ! it can be used to compute refined Ritz vectors. + CALL CLACPY( 'A', M, K, Z, LDZ, B, LDB ) ! BLAS CALL + ! B(1:M,1:K) = Z(1:M,1:K) ! INTRINSIC + + CALL CGEMM( 'C', 'N', K, K, M, ZONE, X, LDX, Z, & + LDZ, ZZERO, S, LDS ) ! BLAS CALL + ! S(1:K,1:K) = MATMUL(TANSPOSE(X(1:M,1:K)),Z(1:M,1:K)) ! INTRINSIC + ! At this point S = U^H * A * U is the Rayleigh quotient. + ELSE + ! A * U(:,1:K) is not explicitly needed and the + ! computation is organized differently. The Rayleigh + ! quotient is computed more efficiently. + CALL CGEMM( 'C', 'N', K, N, M, ZONE, X, LDX, Y, LDY, & + ZZERO, Z, LDZ ) ! BLAS CALL + ! Z(1:K,1:N) = MATMUL( TRANSPOSE(X(1:M,1:K)), Y(1:M,1:N) ) ! INTRINSIC + ! + CALL CGEMM( 'N', T_OR_N, K, K, N, ZONE, Z, LDZ, W, & + LDW, ZZERO, S, LDS ) ! BLAS CALL + ! S(1:K,1:K) = MATMUL(Z(1:K,1:N),TRANSPOSE(W(1:K,1:N))) ! INTRINSIC, for T_OR_N=='T' + ! S(1:K,1:K) = MATMUL(Z(1:K,1:N),(W(1:N,1:K))) ! INTRINSIC, for T_OR_N=='N' + ! At this point S = U^H * A * U is the Rayleigh quotient. + ! If the residuals are requested, save scaled V_k into Z. + ! Recall that V_k or V_k^H is stored in W. + IF ( WNTRES .OR. WNTEX ) THEN + IF ( LSAME(T_OR_N, 'N') ) THEN + CALL CLACPY( 'A', N, K, W, LDW, Z, LDZ ) + ELSE + CALL CLACPY( 'A', K, N, W, LDW, Z, LDZ ) + END IF + END IF + END IF +! + !<5> Compute the Ritz values and (if requested) the + ! right eigenvectors of the Rayleigh quotient. + ! + CALL CGEEV( 'N', JOBZL, K, S, LDS, EIGS, W, & + LDW, W, LDW, ZWORK, LZWORK, RWORK(N+1), INFO1 ) ! LAPACK CALL + ! + ! W(1:K,1:K) contains the eigenvectors of the Rayleigh + ! quotient. See the description of Z. + ! Also, see the description of CGEEV. + IF ( INFO1 > 0 ) THEN + ! CGEEV failed to compute the eigenvalues and + ! eigenvectors of the Rayleigh quotient. + INFO = 3 + RETURN + END IF +! + ! <6> Compute the eigenvectors (if requested) and, + ! the residuals (if requested). + ! + IF ( WNTVEC .OR. WNTEX ) THEN + IF ( WNTRES ) THEN + IF ( WNTREF ) THEN + ! Here, if the refinement is requested, we have + ! A*U(:,1:K) already computed and stored in Z. + ! For the residuals, need Y = A * U(:,1;K) * W. + CALL CGEMM( 'N', 'N', M, K, K, ZONE, Z, LDZ, W, & + LDW, ZZERO, Y, LDY ) ! BLAS CALL + ! Y(1:M,1:K) = Z(1:M,1:K) * W(1:K,1:K) ! INTRINSIC + ! This frees Z; Y contains A * U(:,1:K) * W. + ELSE + ! Compute S = V_k * Sigma_k^(-1) * W, where + ! V_k * Sigma_k^(-1) (or its adjoint) is stored in Z + CALL CGEMM( T_OR_N, 'N', N, K, K, ZONE, Z, LDZ, & + W, LDW, ZZERO, S, LDS) + ! Then, compute Z = Y * S = + ! = Y * V_k * Sigma_k^(-1) * W(1:K,1:K) = + ! = A * U(:,1:K) * W(1:K,1:K) + CALL CGEMM( 'N', 'N', M, K, N, ZONE, Y, LDY, S, & + LDS, ZZERO, Z, LDZ) + ! Save a copy of Z into Y and free Z for holding + ! the Ritz vectors. + CALL CLACPY( 'A', M, K, Z, LDZ, Y, LDY ) + IF ( WNTEX ) CALL CLACPY( 'A', M, K, Z, LDZ, B, LDB ) + END IF + ELSE IF ( WNTEX ) THEN + ! Compute S = V_k * Sigma_k^(-1) * W, where + ! V_k * Sigma_k^(-1) is stored in Z + CALL CGEMM( T_OR_N, 'N', N, K, K, ZONE, Z, LDZ, & + W, LDW, ZZERO, S, LDS) + ! Then, compute Z = Y * S = + ! = Y * V_k * Sigma_k^(-1) * W(1:K,1:K) = + ! = A * U(:,1:K) * W(1:K,1:K) + CALL CGEMM( 'N', 'N', M, K, N, ZONE, Y, LDY, S, & + LDS, ZZERO, B, LDB) + ! The above call replaces the following two calls + ! that were used in the developing-testing phase. + ! CALL CGEMM( 'N', 'N', M, K, N, ZONE, Y, LDY, S, & + ! LDS, ZZERO, Z, LDZ) + ! Save a copy of Z into Y and free Z for holding + ! the Ritz vectors. + ! CALL CLACPY( 'A', M, K, Z, LDZ, B, LDB ) + END IF +! + ! Compute the Ritz vectors + IF ( WNTVEC ) CALL CGEMM( 'N', 'N', M, K, K, ZONE, X, LDX, W, LDW, & + ZZERO, Z, LDZ ) ! BLAS CALL + ! Z(1:M,1:K) = MATMUL(X(1:M,1:K), W(1:K,1:K)) ! INTRINSIC +! + IF ( WNTRES ) THEN + DO i = 1, K + CALL CAXPY( M, -EIGS(i), Z(1,i), 1, Y(1,i), 1 ) ! BLAS CALL + ! Y(1:M,i) = Y(1:M,i) - EIGS(i) * Z(1:M,i) ! INTRINSIC + RES(i) = SCNRM2( M, Y(1,i), 1) ! BLAS CALL + END DO + END IF + END IF +! + IF ( WHTSVD == 4 ) THEN + RWORK(N+1) = XSCL1 + RWORK(N+2) = XSCL2 + END IF +! +! Successful exit. + IF ( .NOT. BADXY ) THEN + INFO = 0 + ELSE + ! A warning on possible data inconsistency. + ! This should be a rare event. + INFO = 4 + END IF +!............................................................ + RETURN +! ...... + END SUBROUTINE CGEDMD + diff --git a/lapack-netlib/SRC/cgedmdq.f90 b/lapack-netlib/SRC/cgedmdq.f90 new file mode 100644 index 000000000..52c1669c7 --- /dev/null +++ b/lapack-netlib/SRC/cgedmdq.f90 @@ -0,0 +1,689 @@ +SUBROUTINE CGEDMDQ( JOBS, JOBZ, JOBR, JOBQ, JOBT, JOBF, & + WHTSVD, M, N, F, LDF, X, LDX, Y, & + LDY, NRNK, TOL, K, EIGS, & + Z, LDZ, RES, B, LDB, V, LDV, & + S, LDS, ZWORK, LZWORK, WORK, LWORK, & + IWORK, LIWORK, INFO ) +! March 2023 +!..... + USE iso_fortran_env + IMPLICIT NONE + INTEGER, PARAMETER :: WP = real32 +!..... +! Scalar arguments + CHARACTER, INTENT(IN) :: JOBS, JOBZ, JOBR, JOBQ, & + JOBT, JOBF + INTEGER, INTENT(IN) :: WHTSVD, M, N, LDF, LDX, & + LDY, NRNK, LDZ, LDB, LDV, & + LDS, LZWORK, LWORK, LIWORK + INTEGER, INTENT(OUT) :: INFO, K + REAL(KIND=WP), INTENT(IN) :: TOL +! Array arguments + COMPLEX(KIND=WP), INTENT(INOUT) :: F(LDF,*) + COMPLEX(KIND=WP), INTENT(OUT) :: X(LDX,*), Y(LDY,*), & + Z(LDZ,*), B(LDB,*), & + V(LDV,*), S(LDS,*) + COMPLEX(KIND=WP), INTENT(OUT) :: EIGS(*) + COMPLEX(KIND=WP), INTENT(OUT) :: ZWORK(*) + REAL(KIND=WP), INTENT(OUT) :: RES(*) + REAL(KIND=WP), INTENT(OUT) :: WORK(*) + INTEGER, INTENT(OUT) :: IWORK(*) +!..... +! Purpose +! ======= +! CGEDMDQ computes the Dynamic Mode Decomposition (DMD) for +! a pair of data snapshot matrices, using a QR factorization +! based compression of the data. For the input matrices +! X and Y such that Y = A*X with an unaccessible matrix +! A, CGEDMDQ computes a certain number of Ritz pairs of A using +! the standard Rayleigh-Ritz extraction from a subspace of +! range(X) that is determined using the leading left singular +! vectors of X. Optionally, CGEDMDQ returns the residuals +! of the computed Ritz pairs, the information needed for +! a refinement of the Ritz vectors, or the eigenvectors of +! the Exact DMD. +! For further details see the references listed +! below. For more details of the implementation see [3]. +! +! References +! ========== +! [1] P. Schmid: Dynamic mode decomposition of numerical +! and experimental data, +! Journal of Fluid Mechanics 656, 5-28, 2010. +! [2] Z. Drmac, I. Mezic, R. Mohr: Data driven modal +! decompositions: analysis and enhancements, +! SIAM J. on Sci. Comp. 40 (4), A2253-A2285, 2018. +! [3] Z. Drmac: A LAPACK implementation of the Dynamic +! Mode Decomposition I. Technical report. AIMDyn Inc. +! and LAPACK Working Note 298. +! [4] J. Tu, C. W. Rowley, D. M. Luchtenburg, S. L. +! Brunton, N. Kutz: On Dynamic Mode Decomposition: +! Theory and Applications, Journal of Computational +! Dynamics 1(2), 391 -421, 2014. +! +! Developed and supported by: +! =========================== +! Developed and coded by Zlatko Drmac, Faculty of Science, +! University of Zagreb; drmac@math.hr +! In cooperation with +! AIMdyn Inc., Santa Barbara, CA. +! and supported by +! - DARPA SBIR project "Koopman Operator-Based Forecasting +! for Nonstationary Processes from Near-Term, Limited +! Observational Data" Contract No: W31P4Q-21-C-0007 +! - DARPA PAI project "Physics-Informed Machine Learning +! Methodologies" Contract No: HR0011-18-9-0033 +! - DARPA MoDyL project "A Data-Driven, Operator-Theoretic +! Framework for Space-Time Analysis of Process Dynamics" +! Contract No: HR0011-16-C-0116 +! Any opinions, findings and conclusions or recommendations +! expressed in this material are those of the author and +! do not necessarily reflect the views of the DARPA SBIR +! Program Office. +!============================================================ +! Distribution Statement A: +! Approved for Public Release, Distribution Unlimited. +! Cleared by DARPA on September 29, 2022 +!============================================================ +!...................................................................... +! Arguments +! ========= +! JOBS (input) CHARACTER*1 +! Determines whether the initial data snapshots are scaled +! by a diagonal matrix. The data snapshots are the columns +! of F. The leading N-1 columns of F are denoted X and the +! trailing N-1 columns are denoted Y. +! 'S' :: The data snapshots matrices X and Y are multiplied +! with a diagonal matrix D so that X*D has unit +! nonzero columns (in the Euclidean 2-norm) +! 'C' :: The snapshots are scaled as with the 'S' option. +! If it is found that an i-th column of X is zero +! vector and the corresponding i-th column of Y is +! non-zero, then the i-th column of Y is set to +! zero and a warning flag is raised. +! 'Y' :: The data snapshots matrices X and Y are multiplied +! by a diagonal matrix D so that Y*D has unit +! nonzero columns (in the Euclidean 2-norm) +! 'N' :: No data scaling. +!..... +! JOBZ (input) CHARACTER*1 +! Determines whether the eigenvectors (Koopman modes) will +! be computed. +! 'V' :: The eigenvectors (Koopman modes) will be computed +! and returned in the matrix Z. +! See the description of Z. +! 'F' :: The eigenvectors (Koopman modes) will be returned +! in factored form as the product Z*V, where Z +! is orthonormal and V contains the eigenvectors +! of the corresponding Rayleigh quotient. +! See the descriptions of F, V, Z. +! 'Q' :: The eigenvectors (Koopman modes) will be returned +! in factored form as the product Q*Z, where Z +! contains the eigenvectors of the compression of the +! underlying discretised operator onto the span of +! the data snapshots. See the descriptions of F, V, Z. +! Q is from the inital QR facorization. +! 'N' :: The eigenvectors are not computed. +!..... +! JOBR (input) CHARACTER*1 +! Determines whether to compute the residuals. +! 'R' :: The residuals for the computed eigenpairs will +! be computed and stored in the array RES. +! See the description of RES. +! For this option to be legal, JOBZ must be 'V'. +! 'N' :: The residuals are not computed. +!..... +! JOBQ (input) CHARACTER*1 +! Specifies whether to explicitly compute and return the +! unitary matrix from the QR factorization. +! 'Q' :: The matrix Q of the QR factorization of the data +! snapshot matrix is computed and stored in the +! array F. See the description of F. +! 'N' :: The matrix Q is not explicitly computed. +!..... +! JOBT (input) CHARACTER*1 +! Specifies whether to return the upper triangular factor +! from the QR factorization. +! 'R' :: The matrix R of the QR factorization of the data +! snapshot matrix F is returned in the array Y. +! See the description of Y and Further details. +! 'N' :: The matrix R is not returned. +!..... +! JOBF (input) CHARACTER*1 +! Specifies whether to store information needed for post- +! processing (e.g. computing refined Ritz vectors) +! 'R' :: The matrix needed for the refinement of the Ritz +! vectors is computed and stored in the array B. +! See the description of B. +! 'E' :: The unscaled eigenvectors of the Exact DMD are +! computed and returned in the array B. See the +! description of B. +! 'N' :: No eigenvector refinement data is computed. +! To be useful on exit, this option needs JOBQ='Q'. +!..... +! WHTSVD (input) INTEGER, WHSTVD in { 1, 2, 3, 4 } +! Allows for a selection of the SVD algorithm from the +! LAPACK library. +! 1 :: CGESVD (the QR SVD algorithm) +! 2 :: CGESDD (the Divide and Conquer algorithm; if enough +! workspace available, this is the fastest option) +! 3 :: CGESVDQ (the preconditioned QR SVD ; this and 4 +! are the most accurate options) +! 4 :: CGEJSV (the preconditioned Jacobi SVD; this and 3 +! are the most accurate options) +! For the four methods above, a significant difference in +! the accuracy of small singular values is possible if +! the snapshots vary in norm so that X is severely +! ill-conditioned. If small (smaller than EPS*||X||) +! singular values are of interest and JOBS=='N', then +! the options (3, 4) give the most accurate results, where +! the option 4 is slightly better and with stronger +! theoretical background. +! If JOBS=='S', i.e. the columns of X will be normalized, +! then all methods give nearly equally accurate results. +!..... +! M (input) INTEGER, M >= 0 +! The state space dimension (the number of rows of F). +!..... +! N (input) INTEGER, 0 <= N <= M +! The number of data snapshots from a single trajectory, +! taken at equidistant discrete times. This is the +! number of columns of F. +!..... +! F (input/output) COMPLEX(KIND=WP) M-by-N array +! > On entry, +! the columns of F are the sequence of data snapshots +! from a single trajectory, taken at equidistant discrete +! times. It is assumed that the column norms of F are +! in the range of the normalized floating point numbers. +! < On exit, +! If JOBQ == 'Q', the array F contains the orthogonal +! matrix/factor of the QR factorization of the initial +! data snapshots matrix F. See the description of JOBQ. +! If JOBQ == 'N', the entries in F strictly below the main +! diagonal contain, column-wise, the information on the +! Householder vectors, as returned by CGEQRF. The +! remaining information to restore the orthogonal matrix +! of the initial QR factorization is stored in ZWORK(1:MIN(M,N)). +! See the description of ZWORK. +!..... +! LDF (input) INTEGER, LDF >= M +! The leading dimension of the array F. +!..... +! X (workspace/output) COMPLEX(KIND=WP) MIN(M,N)-by-(N-1) array +! X is used as workspace to hold representations of the +! leading N-1 snapshots in the orthonormal basis computed +! in the QR factorization of F. +! On exit, the leading K columns of X contain the leading +! K left singular vectors of the above described content +! of X. To lift them to the space of the left singular +! vectors U(:,1:K) of the input data, pre-multiply with the +! Q factor from the initial QR factorization. +! See the descriptions of F, K, V and Z. +!..... +! LDX (input) INTEGER, LDX >= N +! The leading dimension of the array X. +!..... +! Y (workspace/output) COMPLEX(KIND=WP) MIN(M,N)-by-(N) array +! Y is used as workspace to hold representations of the +! trailing N-1 snapshots in the orthonormal basis computed +! in the QR factorization of F. +! On exit, +! If JOBT == 'R', Y contains the MIN(M,N)-by-N upper +! triangular factor from the QR factorization of the data +! snapshot matrix F. +!..... +! LDY (input) INTEGER , LDY >= N +! The leading dimension of the array Y. +!..... +! NRNK (input) INTEGER +! Determines the mode how to compute the numerical rank, +! i.e. how to truncate small singular values of the input +! matrix X. On input, if +! NRNK = -1 :: i-th singular value sigma(i) is truncated +! if sigma(i) <= TOL*sigma(1) +! This option is recommended. +! NRNK = -2 :: i-th singular value sigma(i) is truncated +! if sigma(i) <= TOL*sigma(i-1) +! This option is included for R&D purposes. +! It requires highly accurate SVD, which +! may not be feasible. +! The numerical rank can be enforced by using positive +! value of NRNK as follows: +! 0 < NRNK <= N-1 :: at most NRNK largest singular values +! will be used. If the number of the computed nonzero +! singular values is less than NRNK, then only those +! nonzero values will be used and the actually used +! dimension is less than NRNK. The actual number of +! the nonzero singular values is returned in the variable +! K. See the description of K. +!..... +! TOL (input) REAL(KIND=WP), 0 <= TOL < 1 +! The tolerance for truncating small singular values. +! See the description of NRNK. +!..... +! K (output) INTEGER, 0 <= K <= N +! The dimension of the SVD/POD basis for the leading N-1 +! data snapshots (columns of F) and the number of the +! computed Ritz pairs. The value of K is determined +! according to the rule set by the parameters NRNK and +! TOL. See the descriptions of NRNK and TOL. +!..... +! EIGS (output) COMPLEX(KIND=WP) (N-1)-by-1 array +! The leading K (K<=N-1) entries of EIGS contain +! the computed eigenvalues (Ritz values). +! See the descriptions of K, and Z. +!..... +! Z (workspace/output) COMPLEX(KIND=WP) M-by-(N-1) array +! If JOBZ =='V' then Z contains the Ritz vectors. Z(:,i) +! is an eigenvector of the i-th Ritz value; ||Z(:,i)||_2=1. +! If JOBZ == 'F', then the Z(:,i)'s are given implicitly as +! Z*V, where Z contains orthonormal matrix (the product of +! Q from the initial QR factorization and the SVD/POD_basis +! returned by CGEDMD in X) and the second factor (the +! eigenvectors of the Rayleigh quotient) is in the array V, +! as returned by CGEDMD. That is, X(:,1:K)*V(:,i) +! is an eigenvector corresponding to EIGS(i). The columns +! of V(1:K,1:K) are the computed eigenvectors of the +! K-by-K Rayleigh quotient. +! See the descriptions of EIGS, X and V. +!..... +! LDZ (input) INTEGER , LDZ >= M +! The leading dimension of the array Z. +!..... +! RES (output) REAL(KIND=WP) (N-1)-by-1 array +! RES(1:K) contains the residuals for the K computed +! Ritz pairs, +! RES(i) = || A * Z(:,i) - EIGS(i)*Z(:,i))||_2. +! See the description of EIGS and Z. +!..... +! B (output) COMPLEX(KIND=WP) MIN(M,N)-by-(N-1) array. +! IF JOBF =='R', B(1:N,1:K) contains A*U(:,1:K), and can +! be used for computing the refined vectors; see further +! details in the provided references. +! If JOBF == 'E', B(1:N,1;K) contains +! A*U(:,1:K)*W(1:K,1:K), which are the vectors from the +! Exact DMD, up to scaling by the inverse eigenvalues. +! In both cases, the content of B can be lifted to the +! original dimension of the input data by pre-multiplying +! with the Q factor from the initial QR factorization. +! Here A denotes a compression of the underlying operator. +! See the descriptions of F and X. +! If JOBF =='N', then B is not referenced. +!..... +! LDB (input) INTEGER, LDB >= MIN(M,N) +! The leading dimension of the array B. +!..... +! V (workspace/output) COMPLEX(KIND=WP) (N-1)-by-(N-1) array +! On exit, V(1:K,1:K) V contains the K eigenvectors of +! the Rayleigh quotient. The Ritz vectors +! (returned in Z) are the product of Q from the initial QR +! factorization (see the description of F) X (see the +! description of X) and V. +!..... +! LDV (input) INTEGER, LDV >= N-1 +! The leading dimension of the array V. +!..... +! S (output) COMPLEX(KIND=WP) (N-1)-by-(N-1) array +! The array S(1:K,1:K) is used for the matrix Rayleigh +! quotient. This content is overwritten during +! the eigenvalue decomposition by CGEEV. +! See the description of K. +!..... +! LDS (input) INTEGER, LDS >= N-1 +! The leading dimension of the array S. +!..... +! ZWORK (workspace/output) COMPLEX(KIND=WP) LWORK-by-1 array +! On exit, +! ZWORK(1:MIN(M,N)) contains the scalar factors of the +! elementary reflectors as returned by CGEQRF of the +! M-by-N input matrix F. +! If the call to CGEDMDQ is only workspace query, then +! ZWORK(1) contains the minimal complex workspace length and +! ZWORK(2) is the optimal complex workspace length. +! Hence, the length of work is at least 2. +! See the description of LZWORK. +!..... +! LZWORK (input) INTEGER +! The minimal length of the workspace vector ZWORK. +! LZWORK is calculated as follows: +! Let MLWQR = N (minimal workspace for CGEQRF[M,N]) +! MLWDMD = minimal workspace for CGEDMD (see the +! description of LWORK in CGEDMD) +! MLWMQR = N (minimal workspace for +! ZUNMQR['L','N',M,N,N]) +! MLWGQR = N (minimal workspace for ZUNGQR[M,N,N]) +! MINMN = MIN(M,N) +! Then +! LZWORK = MAX(2, MIN(M,N)+MLWQR, MINMN+MLWDMD) +! is further updated as follows: +! if JOBZ == 'V' or JOBZ == 'F' THEN +! LZWORK = MAX( LZWORK, MINMN+MLWMQR ) +! if JOBQ == 'Q' THEN +! LZWORK = MAX( ZLWORK, MINMN+MLWGQR) +! +!..... +! WORK (workspace/output) REAL(KIND=WP) LWORK-by-1 array +! On exit, +! WORK(1:N-1) contains the singular values of +! the input submatrix F(1:M,1:N-1). +! If the call to CGEDMDQ is only workspace query, then +! WORK(1) contains the minimal workspace length and +! WORK(2) is the optimal workspace length. hence, the +! length of work is at least 2. +! See the description of LWORK. +!..... +! LWORK (input) INTEGER +! The minimal length of the workspace vector WORK. +! LWORK is the same as in CGEDMD, because in CGEDMDQ +! only CGEDMD requires real workspace for snapshots +! of dimensions MIN(M,N)-by-(N-1). +! If on entry LWORK = -1, then a workspace query is +! assumed and the procedure only computes the minimal +! and the optimal workspace lengths for both WORK and +! IWORK. See the descriptions of WORK and IWORK. +!..... +! IWORK (workspace/output) INTEGER LIWORK-by-1 array +! Workspace that is required only if WHTSVD equals +! 2 , 3 or 4. (See the description of WHTSVD). +! If on entry LWORK =-1 or LIWORK=-1, then the +! minimal length of IWORK is computed and returned in +! IWORK(1). See the description of LIWORK. +!..... +! LIWORK (input) INTEGER +! The minimal length of the workspace vector IWORK. +! If WHTSVD == 1, then only IWORK(1) is used; LIWORK >=1 +! Let M1=MIN(M,N), N1=N-1. Then +! If WHTSVD == 2, then LIWORK >= MAX(1,8*MIN(M,N)) +! If WHTSVD == 3, then LIWORK >= MAX(1,M+N-1) +! If WHTSVD == 4, then LIWORK >= MAX(3,M+3*N) +! If on entry LIWORK = -1, then a workspace query is +! assumed and the procedure only computes the minimal +! and the optimal workspace lengths for both WORK and +! IWORK. See the descriptions of WORK and IWORK. +!..... +! INFO (output) INTEGER +! -i < 0 :: On entry, the i-th argument had an +! illegal value +! = 0 :: Successful return. +! = 1 :: Void input. Quick exit (M=0 or N=0). +! = 2 :: The SVD computation of X did not converge. +! Suggestion: Check the input data and/or +! repeat with different WHTSVD. +! = 3 :: The computation of the eigenvalues did not +! converge. +! = 4 :: If data scaling was requested on input and +! the procedure found inconsistency in the data +! such that for some column index i, +! X(:,i) = 0 but Y(:,i) /= 0, then Y(:,i) is set +! to zero if JOBS=='C'. The computation proceeds +! with original or modified data and warning +! flag is set with INFO=4. +!............................................................. +!............................................................. +! Parameters +! ~~~~~~~~~~ + REAL(KIND=WP), PARAMETER :: ONE = 1.0_WP + REAL(KIND=WP), PARAMETER :: ZERO = 0.0_WP +! COMPLEX(KIND=WP), PARAMETER :: ZONE = ( 1.0_WP, 0.0_WP ) + COMPLEX(KIND=WP), PARAMETER :: ZZERO = ( 0.0_WP, 0.0_WP ) +! +! Local scalars +! ~~~~~~~~~~~~~ + INTEGER :: IMINWR, INFO1, MINMN, MLRWRK, & + MLWDMD, MLWGQR, MLWMQR, MLWORK, & + MLWQR, OLWDMD, OLWGQR, OLWMQR, & + OLWORK, OLWQR + LOGICAL :: LQUERY, SCCOLX, SCCOLY, WANTQ, & + WNTTRF, WNTRES, WNTVEC, WNTVCF, & + WNTVCQ, WNTREF, WNTEX + CHARACTER(LEN=1) :: JOBVL +! +! External functions (BLAS and LAPACK) +! ~~~~~~~~~~~~~~~~~ + LOGICAL LSAME + EXTERNAL LSAME +! +! External subroutines (BLAS and LAPACK) +! ~~~~~~~~~~~~~~~~~~~~ + EXTERNAL CGEQRF, CLACPY, CLASET, CUNGQR, & + CUNMQR, XERBLA + +! External subroutines +! ~~~~~~~~~~~~~~~~~~~~ + EXTERNAL CGEDMD + +! Intrinsic functions +! ~~~~~~~~~~~~~~~~~~~ + INTRINSIC MAX, MIN, INT + !.......................................................... + ! + ! Test the input arguments + WNTRES = LSAME(JOBR,'R') + SCCOLX = LSAME(JOBS,'S') .OR. LSAME( JOBS, 'C' ) + SCCOLY = LSAME(JOBS,'Y') + WNTVEC = LSAME(JOBZ,'V') + WNTVCF = LSAME(JOBZ,'F') + WNTVCQ = LSAME(JOBZ,'Q') + WNTREF = LSAME(JOBF,'R') + WNTEX = LSAME(JOBF,'E') + WANTQ = LSAME(JOBQ,'Q') + WNTTRF = LSAME(JOBT,'R') + MINMN = MIN(M,N) + INFO = 0 + LQUERY = ( ( LWORK == -1 ) .OR. ( LIWORK == -1 ) ) +! + IF ( .NOT. (SCCOLX .OR. SCCOLY .OR. & + LSAME(JOBS,'N')) ) THEN + INFO = -1 + ELSE IF ( .NOT. (WNTVEC .OR. WNTVCF .OR. WNTVCQ & + .OR. LSAME(JOBZ,'N')) ) THEN + INFO = -2 + ELSE IF ( .NOT. (WNTRES .OR. LSAME(JOBR,'N')) .OR. & + ( WNTRES .AND. LSAME(JOBZ,'N') ) ) THEN + INFO = -3 + ELSE IF ( .NOT. (WANTQ .OR. LSAME(JOBQ,'N')) ) THEN + INFO = -4 + ELSE IF ( .NOT. ( WNTTRF .OR. LSAME(JOBT,'N') ) ) THEN + INFO = -5 + ELSE IF ( .NOT. (WNTREF .OR. WNTEX .OR. & + LSAME(JOBF,'N') ) ) THEN + INFO = -6 + ELSE IF ( .NOT. ((WHTSVD == 1).OR.(WHTSVD == 2).OR. & + (WHTSVD == 3).OR.(WHTSVD == 4)) ) THEN + INFO = -7 + ELSE IF ( M < 0 ) THEN + INFO = -8 + ELSE IF ( ( N < 0 ) .OR. ( N > M+1 ) ) THEN + INFO = -9 + ELSE IF ( LDF < M ) THEN + INFO = -11 + ELSE IF ( LDX < MINMN ) THEN + INFO = -13 + ELSE IF ( LDY < MINMN ) THEN + INFO = -15 + ELSE IF ( .NOT. (( NRNK == -2).OR.(NRNK == -1).OR. & + ((NRNK >= 1).AND.(NRNK <=N ))) ) THEN + INFO = -16 + ELSE IF ( ( TOL < ZERO ) .OR. ( TOL >= ONE ) ) THEN + INFO = -17 + ELSE IF ( LDZ < M ) THEN + INFO = -21 + ELSE IF ( (WNTREF.OR.WNTEX ).AND.( LDB < MINMN ) ) THEN + INFO = -24 + ELSE IF ( LDV < N-1 ) THEN + INFO = -26 + ELSE IF ( LDS < N-1 ) THEN + INFO = -28 + END IF +! + IF ( WNTVEC .OR. WNTVCF .OR. WNTVCQ ) THEN + JOBVL = 'V' + ELSE + JOBVL = 'N' + END IF + IF ( INFO == 0 ) THEN + ! Compute the minimal and the optimal workspace + ! requirements. Simulate running the code and + ! determine minimal and optimal sizes of the + ! workspace at any moment of the run. + IF ( ( N == 0 ) .OR. ( N == 1 ) ) THEN + ! All output except K is void. INFO=1 signals + ! the void input. In case of a workspace query, + ! the minimal workspace lengths are returned. + IF ( LQUERY ) THEN + IWORK(1) = 1 + WORK(1) = 2 + WORK(2) = 2 + ELSE + K = 0 + END IF + INFO = 1 + RETURN + END IF + + MLRWRK = 2 + MLWORK = 2 + OLWORK = 2 + IMINWR = 1 + MLWQR = MAX(1,N) ! Minimal workspace length for CGEQRF. + MLWORK = MAX(MLWORK,MINMN + MLWQR) + + IF ( LQUERY ) THEN + CALL CGEQRF( M, N, F, LDF, ZWORK, ZWORK, -1, & + INFO1 ) + OLWQR = INT(ZWORK(1)) + OLWORK = MAX(OLWORK,MINMN + OLWQR) + END IF + CALL CGEDMD( JOBS, JOBVL, JOBR, JOBF, WHTSVD, MINMN,& + N-1, X, LDX, Y, LDY, NRNK, TOL, K, & + EIGS, Z, LDZ, RES, B, LDB, V, LDV, & + S, LDS, ZWORK, LZWORK, WORK, -1, IWORK,& + LIWORK, INFO1 ) + MLWDMD = INT(ZWORK(1)) + MLWORK = MAX(MLWORK, MINMN + MLWDMD) + MLRWRK = MAX(MLRWRK, INT(WORK(1))) + IMINWR = MAX(IMINWR, IWORK(1)) + IF ( LQUERY ) THEN + OLWDMD = INT(ZWORK(2)) + OLWORK = MAX(OLWORK, MINMN+OLWDMD) + END IF + IF ( WNTVEC .OR. WNTVCF ) THEN + MLWMQR = MAX(1,N) + MLWORK = MAX(MLWORK, MINMN+MLWMQR) + IF ( LQUERY ) THEN + CALL CUNMQR( 'L','N', M, N, MINMN, F, LDF, & + ZWORK, Z, LDZ, ZWORK, -1, INFO1 ) + OLWMQR = INT(ZWORK(1)) + OLWORK = MAX(OLWORK, MINMN+OLWMQR) + END IF + END IF + IF ( WANTQ ) THEN + MLWGQR = MAX(1,N) + MLWORK = MAX(MLWORK, MINMN+MLWGQR) + IF ( LQUERY ) THEN + CALL CUNGQR( M, MINMN, MINMN, F, LDF, ZWORK, & + ZWORK, -1, INFO1 ) + OLWGQR = INT(ZWORK(1)) + OLWORK = MAX(OLWORK, MINMN+OLWGQR) + END IF + END IF + IF ( LIWORK < IMINWR .AND. (.NOT.LQUERY) ) INFO = -34 + IF ( LWORK < MLRWRK .AND. (.NOT.LQUERY) ) INFO = -32 + IF ( LZWORK < MLWORK .AND. (.NOT.LQUERY) ) INFO = -30 + END IF + IF( INFO /= 0 ) THEN + CALL XERBLA( 'CGEDMDQ', -INFO ) + RETURN + ELSE IF ( LQUERY ) THEN +! Return minimal and optimal workspace sizes + IWORK(1) = IMINWR + ZWORK(1) = MLWORK + ZWORK(2) = OLWORK + WORK(1) = MLRWRK + WORK(2) = MLRWRK + RETURN + END IF +!..... +! Initial QR factorization that is used to represent the +! snapshots as elements of lower dimensional subspace. +! For large scale computation with M >>N , at this place +! one can use an out of core QRF. +! + CALL CGEQRF( M, N, F, LDF, ZWORK, & + ZWORK(MINMN+1), LZWORK-MINMN, INFO1 ) +! +! Define X and Y as the snapshots representations in the +! orthogonal basis computed in the QR factorization. +! X corresponds to the leading N-1 and Y to the trailing +! N-1 snapshots. + CALL CLASET( 'L', MINMN, N-1, ZZERO, ZZERO, X, LDX ) + CALL CLACPY( 'U', MINMN, N-1, F, LDF, X, LDX ) + CALL CLACPY( 'A', MINMN, N-1, F(1,2), LDF, Y, LDY ) + IF ( M >= 3 ) THEN + CALL CLASET( 'L', MINMN-2, N-2, ZZERO, ZZERO, & + Y(3,1), LDY ) + END IF +! +! Compute the DMD of the projected snapshot pairs (X,Y) + CALL CGEDMD( JOBS, JOBVL, JOBR, JOBF, WHTSVD, MINMN, & + N-1, X, LDX, Y, LDY, NRNK, TOL, K, & + EIGS, Z, LDZ, RES, B, LDB, V, LDV, & + S, LDS, ZWORK(MINMN+1), LZWORK-MINMN, & + WORK, LWORK, IWORK, LIWORK, INFO1 ) + IF ( INFO1 == 2 .OR. INFO1 == 3 ) THEN + ! Return with error code. See CGEDMD for details. + INFO = INFO1 + RETURN + ELSE + INFO = INFO1 + END IF +! +! The Ritz vectors (Koopman modes) can be explicitly +! formed or returned in factored form. + IF ( WNTVEC ) THEN + ! Compute the eigenvectors explicitly. + IF ( M > MINMN ) CALL CLASET( 'A', M-MINMN, K, ZZERO, & + ZZERO, Z(MINMN+1,1), LDZ ) + CALL CUNMQR( 'L','N', M, K, MINMN, F, LDF, ZWORK, Z, & + LDZ, ZWORK(MINMN+1), LZWORK-MINMN, INFO1 ) + ELSE IF ( WNTVCF ) THEN + ! Return the Ritz vectors (eigenvectors) in factored + ! form Z*V, where Z contains orthonormal matrix (the + ! product of Q from the initial QR factorization and + ! the SVD/POD_basis returned by CGEDMD in X) and the + ! second factor (the eigenvectors of the Rayleigh + ! quotient) is in the array V, as returned by CGEDMD. + CALL CLACPY( 'A', N, K, X, LDX, Z, LDZ ) + IF ( M > N ) CALL CLASET( 'A', M-N, K, ZZERO, ZZERO, & + Z(N+1,1), LDZ ) + CALL CUNMQR( 'L','N', M, K, MINMN, F, LDF, ZWORK, Z, & + LDZ, ZWORK(MINMN+1), LZWORK-MINMN, INFO1 ) + END IF +! +! Some optional output variables: +! +! The upper triangular factor R in the initial QR +! factorization is optionally returned in the array Y. +! This is useful if this call to CGEDMDQ is to be + +! followed by a streaming DMD that is implemented in a +! QR compressed form. + IF ( WNTTRF ) THEN ! Return the upper triangular R in Y + CALL CLASET( 'A', MINMN, N, ZZERO, ZZERO, Y, LDY ) + CALL CLACPY( 'U', MINMN, N, F, LDF, Y, LDY ) + END IF +! +! The orthonormal/unitary factor Q in the initial QR +! factorization is optionally returned in the array F. +! Same as with the triangular factor above, this is +! useful in a streaming DMD. + IF ( WANTQ ) THEN ! Q overwrites F + CALL CUNGQR( M, MINMN, MINMN, F, LDF, ZWORK, & + ZWORK(MINMN+1), LZWORK-MINMN, INFO1 ) + END IF +! + RETURN +! + END SUBROUTINE CGEDMDQ + \ No newline at end of file diff --git a/lapack-netlib/SRC/dgedmd.f90 b/lapack-netlib/SRC/dgedmd.f90 new file mode 100644 index 000000000..20424808f --- /dev/null +++ b/lapack-netlib/SRC/dgedmd.f90 @@ -0,0 +1,1054 @@ + SUBROUTINE DGEDMD( JOBS, JOBZ, JOBR, JOBF, WHTSVD, & + M, N, X, LDX, Y, LDY, NRNK, TOL, & + K, REIG, IMEIG, Z, LDZ, RES, & + B, LDB, W, LDW, S, LDS, & + WORK, LWORK, IWORK, LIWORK, INFO ) +! March 2023 +!..... + USE iso_fortran_env + IMPLICIT NONE + INTEGER, PARAMETER :: WP = real64 +!..... +! Scalar arguments + CHARACTER, INTENT(IN) :: JOBS, JOBZ, JOBR, JOBF + INTEGER, INTENT(IN) :: WHTSVD, M, N, LDX, LDY, & + NRNK, LDZ, LDB, LDW, LDS, & + LWORK, LIWORK + INTEGER, INTENT(OUT) :: K, INFO + REAL(KIND=WP), INTENT(IN) :: TOL +! Array arguments + REAL(KIND=WP), INTENT(INOUT) :: X(LDX,*), Y(LDY,*) + REAL(KIND=WP), INTENT(OUT) :: Z(LDZ,*), B(LDB,*), & + W(LDW,*), S(LDS,*) + REAL(KIND=WP), INTENT(OUT) :: REIG(*), IMEIG(*), & + RES(*) + REAL(KIND=WP), INTENT(OUT) :: WORK(*) + INTEGER, INTENT(OUT) :: IWORK(*) +!............................................................ +! Purpose +! ======= +! DGEDMD computes the Dynamic Mode Decomposition (DMD) for +! a pair of data snapshot matrices. For the input matrices +! X and Y such that Y = A*X with an unaccessible matrix +! A, DGEDMD computes a certain number of Ritz pairs of A using +! the standard Rayleigh-Ritz extraction from a subspace of +! range(X) that is determined using the leading left singular +! vectors of X. Optionally, DGEDMD returns the residuals +! of the computed Ritz pairs, the information needed for +! a refinement of the Ritz vectors, or the eigenvectors of +! the Exact DMD. +! For further details see the references listed +! below. For more details of the implementation see [3]. +! +! References +! ========== +! [1] P. Schmid: Dynamic mode decomposition of numerical +! and experimental data, +! Journal of Fluid Mechanics 656, 5-28, 2010. +! [2] Z. Drmac, I. Mezic, R. Mohr: Data driven modal +! decompositions: analysis and enhancements, +! SIAM J. on Sci. Comp. 40 (4), A2253-A2285, 2018. +! [3] Z. Drmac: A LAPACK implementation of the Dynamic +! Mode Decomposition I. Technical report. AIMDyn Inc. +! and LAPACK Working Note 298. +! [4] J. Tu, C. W. Rowley, D. M. Luchtenburg, S. L. +! Brunton, N. Kutz: On Dynamic Mode Decomposition: +! Theory and Applications, Journal of Computational +! Dynamics 1(2), 391 -421, 2014. +! +!...................................................................... +! Developed and supported by: +! =========================== +! Developed and coded by Zlatko Drmac, Faculty of Science, +! University of Zagreb; drmac@math.hr +! In cooperation with +! AIMdyn Inc., Santa Barbara, CA. +! and supported by +! - DARPA SBIR project "Koopman Operator-Based Forecasting +! for Nonstationary Processes from Near-Term, Limited +! Observational Data" Contract No: W31P4Q-21-C-0007 +! - DARPA PAI project "Physics-Informed Machine Learning +! Methodologies" Contract No: HR0011-18-9-0033 +! - DARPA MoDyL project "A Data-Driven, Operator-Theoretic +! Framework for Space-Time Analysis of Process Dynamics" +! Contract No: HR0011-16-C-0116 +! Any opinions, findings and conclusions or recommendations +! expressed in this material are those of the author and +! do not necessarily reflect the views of the DARPA SBIR +! Program Office +!============================================================ +! Distribution Statement A: +! Approved for Public Release, Distribution Unlimited. +! Cleared by DARPA on September 29, 2022 +!============================================================ +!............................................................ +! Arguments +! ========= +! JOBS (input) CHARACTER*1 +! Determines whether the initial data snapshots are scaled +! by a diagonal matrix. +! 'S' :: The data snapshots matrices X and Y are multiplied +! with a diagonal matrix D so that X*D has unit +! nonzero columns (in the Euclidean 2-norm) +! 'C' :: The snapshots are scaled as with the 'S' option. +! If it is found that an i-th column of X is zero +! vector and the corresponding i-th column of Y is +! non-zero, then the i-th column of Y is set to +! zero and a warning flag is raised. +! 'Y' :: The data snapshots matrices X and Y are multiplied +! by a diagonal matrix D so that Y*D has unit +! nonzero columns (in the Euclidean 2-norm) +! 'N' :: No data scaling. +!..... +! JOBZ (input) CHARACTER*1 +! Determines whether the eigenvectors (Koopman modes) will +! be computed. +! 'V' :: The eigenvectors (Koopman modes) will be computed +! and returned in the matrix Z. +! See the description of Z. +! 'F' :: The eigenvectors (Koopman modes) will be returned +! in factored form as the product X(:,1:K)*W, where X +! contains a POD basis (leading left singular vectors +! of the data matrix X) and W contains the eigenvectors +! of the corresponding Rayleigh quotient. +! See the descriptions of K, X, W, Z. +! 'N' :: The eigenvectors are not computed. +!..... +! JOBR (input) CHARACTER*1 +! Determines whether to compute the residuals. +! 'R' :: The residuals for the computed eigenpairs will be +! computed and stored in the array RES. +! See the description of RES. +! For this option to be legal, JOBZ must be 'V'. +! 'N' :: The residuals are not computed. +!..... +! JOBF (input) CHARACTER*1 +! Specifies whether to store information needed for post- +! processing (e.g. computing refined Ritz vectors) +! 'R' :: The matrix needed for the refinement of the Ritz +! vectors is computed and stored in the array B. +! See the description of B. +! 'E' :: The unscaled eigenvectors of the Exact DMD are +! computed and returned in the array B. See the +! description of B. +! 'N' :: No eigenvector refinement data is computed. +!..... +! WHTSVD (input) INTEGER, WHSTVD in { 1, 2, 3, 4 } +! Allows for a selection of the SVD algorithm from the +! LAPACK library. +! 1 :: DGESVD (the QR SVD algorithm) +! 2 :: DGESDD (the Divide and Conquer algorithm; if enough +! workspace available, this is the fastest option) +! 3 :: DGESVDQ (the preconditioned QR SVD ; this and 4 +! are the most accurate options) +! 4 :: DGEJSV (the preconditioned Jacobi SVD; this and 3 +! are the most accurate options) +! For the four methods above, a significant difference in +! the accuracy of small singular values is possible if +! the snapshots vary in norm so that X is severely +! ill-conditioned. If small (smaller than EPS*||X||) +! singular values are of interest and JOBS=='N', then +! the options (3, 4) give the most accurate results, where +! the option 4 is slightly better and with stronger +! theoretical background. +! If JOBS=='S', i.e. the columns of X will be normalized, +! then all methods give nearly equally accurate results. +!..... +! M (input) INTEGER, M>= 0 +! The state space dimension (the row dimension of X, Y). +!..... +! N (input) INTEGER, 0 <= N <= M +! The number of data snapshot pairs +! (the number of columns of X and Y). +!..... +! X (input/output) REAL(KIND=WP) M-by-N array +! > On entry, X contains the data snapshot matrix X. It is +! assumed that the column norms of X are in the range of +! the normalized floating point numbers. +! < On exit, the leading K columns of X contain a POD basis, +! i.e. the leading K left singular vectors of the input +! data matrix X, U(:,1:K). All N columns of X contain all +! left singular vectors of the input matrix X. +! See the descriptions of K, Z and W. +!..... +! LDX (input) INTEGER, LDX >= M +! The leading dimension of the array X. +!..... +! Y (input/workspace/output) REAL(KIND=WP) M-by-N array +! > On entry, Y contains the data snapshot matrix Y +! < On exit, +! If JOBR == 'R', the leading K columns of Y contain +! the residual vectors for the computed Ritz pairs. +! See the description of RES. +! If JOBR == 'N', Y contains the original input data, +! scaled according to the value of JOBS. +!..... +! LDY (input) INTEGER , LDY >= M +! The leading dimension of the array Y. +!..... +! NRNK (input) INTEGER +! Determines the mode how to compute the numerical rank, +! i.e. how to truncate small singular values of the input +! matrix X. On input, if +! NRNK = -1 :: i-th singular value sigma(i) is truncated +! if sigma(i) <= TOL*sigma(1). +! This option is recommended. +! NRNK = -2 :: i-th singular value sigma(i) is truncated +! if sigma(i) <= TOL*sigma(i-1) +! This option is included for R&D purposes. +! It requires highly accurate SVD, which +! may not be feasible. +! +! The numerical rank can be enforced by using positive +! value of NRNK as follows: +! 0 < NRNK <= N :: at most NRNK largest singular values +! will be used. If the number of the computed nonzero +! singular values is less than NRNK, then only those +! nonzero values will be used and the actually used +! dimension is less than NRNK. The actual number of +! the nonzero singular values is returned in the variable +! K. See the descriptions of TOL and K. +!..... +! TOL (input) REAL(KIND=WP), 0 <= TOL < 1 +! The tolerance for truncating small singular values. +! See the description of NRNK. +!..... +! K (output) INTEGER, 0 <= K <= N +! The dimension of the POD basis for the data snapshot +! matrix X and the number of the computed Ritz pairs. +! The value of K is determined according to the rule set +! by the parameters NRNK and TOL. +! See the descriptions of NRNK and TOL. +!..... +! REIG (output) REAL(KIND=WP) N-by-1 array +! The leading K (K<=N) entries of REIG contain +! the real parts of the computed eigenvalues +! REIG(1:K) + sqrt(-1)*IMEIG(1:K). +! See the descriptions of K, IMEIG, and Z. +!..... +! IMEIG (output) REAL(KIND=WP) N-by-1 array +! The leading K (K<=N) entries of IMEIG contain +! the imaginary parts of the computed eigenvalues +! REIG(1:K) + sqrt(-1)*IMEIG(1:K). +! The eigenvalues are determined as follows: +! If IMEIG(i) == 0, then the corresponding eigenvalue is +! real, LAMBDA(i) = REIG(i). +! If IMEIG(i)>0, then the corresponding complex +! conjugate pair of eigenvalues reads +! LAMBDA(i) = REIG(i) + sqrt(-1)*IMAG(i) +! LAMBDA(i+1) = REIG(i) - sqrt(-1)*IMAG(i) +! That is, complex conjugate pairs have consecutive +! indices (i,i+1), with the positive imaginary part +! listed first. +! See the descriptions of K, REIG, and Z. +!..... +! Z (workspace/output) REAL(KIND=WP) M-by-N array +! If JOBZ =='V' then +! Z contains real Ritz vectors as follows: +! If IMEIG(i)=0, then Z(:,i) is an eigenvector of +! the i-th Ritz value; ||Z(:,i)||_2=1. +! If IMEIG(i) > 0 (and IMEIG(i+1) < 0) then +! [Z(:,i) Z(:,i+1)] span an invariant subspace and +! the Ritz values extracted from this subspace are +! REIG(i) + sqrt(-1)*IMEIG(i) and +! REIG(i) - sqrt(-1)*IMEIG(i). +! The corresponding eigenvectors are +! Z(:,i) + sqrt(-1)*Z(:,i+1) and +! Z(:,i) - sqrt(-1)*Z(:,i+1), respectively. +! || Z(:,i:i+1)||_F = 1. +! If JOBZ == 'F', then the above descriptions hold for +! the columns of X(:,1:K)*W(1:K,1:K), where the columns +! of W(1:k,1:K) are the computed eigenvectors of the +! K-by-K Rayleigh quotient. The columns of W(1:K,1:K) +! are similarly structured: If IMEIG(i) == 0 then +! X(:,1:K)*W(:,i) is an eigenvector, and if IMEIG(i)>0 +! then X(:,1:K)*W(:,i)+sqrt(-1)*X(:,1:K)*W(:,i+1) and +! X(:,1:K)*W(:,i)-sqrt(-1)*X(:,1:K)*W(:,i+1) +! are the eigenvectors of LAMBDA(i), LAMBDA(i+1). +! See the descriptions of REIG, IMEIG, X and W. +!..... +! LDZ (input) INTEGER , LDZ >= M +! The leading dimension of the array Z. +!..... +! RES (output) REAL(KIND=WP) N-by-1 array +! RES(1:K) contains the residuals for the K computed +! Ritz pairs. +! If LAMBDA(i) is real, then +! RES(i) = || A * Z(:,i) - LAMBDA(i)*Z(:,i))||_2. +! If [LAMBDA(i), LAMBDA(i+1)] is a complex conjugate pair +! then +! RES(i)=RES(i+1) = || A * Z(:,i:i+1) - Z(:,i:i+1) *B||_F +! where B = [ real(LAMBDA(i)) imag(LAMBDA(i)) ] +! [-imag(LAMBDA(i)) real(LAMBDA(i)) ]. +! It holds that +! RES(i) = || A*ZC(:,i) - LAMBDA(i) *ZC(:,i) ||_2 +! RES(i+1) = || A*ZC(:,i+1) - LAMBDA(i+1)*ZC(:,i+1) ||_2 +! where ZC(:,i) = Z(:,i) + sqrt(-1)*Z(:,i+1) +! ZC(:,i+1) = Z(:,i) - sqrt(-1)*Z(:,i+1) +! See the description of REIG, IMEIG and Z. +!..... +! B (output) REAL(KIND=WP) M-by-N array. +! IF JOBF =='R', B(1:M,1:K) contains A*U(:,1:K), and can +! be used for computing the refined vectors; see further +! details in the provided references. +! If JOBF == 'E', B(1:M,1;K) contains +! A*U(:,1:K)*W(1:K,1:K), which are the vectors from the +! Exact DMD, up to scaling by the inverse eigenvalues. +! If JOBF =='N', then B is not referenced. +! See the descriptions of X, W, K. +!..... +! LDB (input) INTEGER, LDB >= M +! The leading dimension of the array B. +!..... +! W (workspace/output) REAL(KIND=WP) N-by-N array +! On exit, W(1:K,1:K) contains the K computed +! eigenvectors of the matrix Rayleigh quotient (real and +! imaginary parts for each complex conjugate pair of the +! eigenvalues). The Ritz vectors (returned in Z) are the +! product of X (containing a POD basis for the input +! matrix X) and W. See the descriptions of K, S, X and Z. +! W is also used as a workspace to temporarily store the +! right singular vectors of X. +!..... +! LDW (input) INTEGER, LDW >= N +! The leading dimension of the array W. +!..... +! S (workspace/output) REAL(KIND=WP) N-by-N array +! The array S(1:K,1:K) is used for the matrix Rayleigh +! quotient. This content is overwritten during +! the eigenvalue decomposition by DGEEV. +! See the description of K. +!..... +! LDS (input) INTEGER, LDS >= N +! The leading dimension of the array S. +!..... +! WORK (workspace/output) REAL(KIND=WP) LWORK-by-1 array +! On exit, WORK(1:N) contains the singular values of +! X (for JOBS=='N') or column scaled X (JOBS=='S', 'C'). +! If WHTSVD==4, then WORK(N+1) and WORK(N+2) contain +! scaling factor WORK(N+2)/WORK(N+1) used to scale X +! and Y to avoid overflow in the SVD of X. +! This may be of interest if the scaling option is off +! and as many as possible smallest eigenvalues are +! desired to the highest feasible accuracy. +! If the call to DGEDMD is only workspace query, then +! WORK(1) contains the minimal workspace length and +! WORK(2) is the optimal workspace length. Hence, the +! leng of work is at least 2. +! See the description of LWORK. +!..... +! LWORK (input) INTEGER +! The minimal length of the workspace vector WORK. +! LWORK is calculated as follows: +! If WHTSVD == 1 :: +! If JOBZ == 'V', then +! LWORK >= MAX(2, N + LWORK_SVD, N+MAX(1,4*N)). +! If JOBZ == 'N' then +! LWORK >= MAX(2, N + LWORK_SVD, N+MAX(1,3*N)). +! Here LWORK_SVD = MAX(1,3*N+M,5*N) is the minimal +! workspace length of DGESVD. +! If WHTSVD == 2 :: +! If JOBZ == 'V', then +! LWORK >= MAX(2, N + LWORK_SVD, N+MAX(1,4*N)) +! If JOBZ == 'N', then +! LWORK >= MAX(2, N + LWORK_SVD, N+MAX(1,3*N)) +! Here LWORK_SVD = MAX(M, 5*N*N+4*N)+3*N*N is the +! minimal workspace length of DGESDD. +! If WHTSVD == 3 :: +! If JOBZ == 'V', then +! LWORK >= MAX(2, N+LWORK_SVD,N+MAX(1,4*N)) +! If JOBZ == 'N', then +! LWORK >= MAX(2, N+LWORK_SVD,N+MAX(1,3*N)) +! Here LWORK_SVD = N+M+MAX(3*N+1, +! MAX(1,3*N+M,5*N),MAX(1,N)) +! is the minimal workspace length of DGESVDQ. +! If WHTSVD == 4 :: +! If JOBZ == 'V', then +! LWORK >= MAX(2, N+LWORK_SVD,N+MAX(1,4*N)) +! If JOBZ == 'N', then +! LWORK >= MAX(2, N+LWORK_SVD,N+MAX(1,3*N)) +! Here LWORK_SVD = MAX(7,2*M+N,6*N+2*N*N) is the +! minimal workspace length of DGEJSV. +! The above expressions are not simplified in order to +! make the usage of WORK more transparent, and for +! easier checking. In any case, LWORK >= 2. +! If on entry LWORK = -1, then a workspace query is +! assumed and the procedure only computes the minimal +! and the optimal workspace lengths for both WORK and +! IWORK. See the descriptions of WORK and IWORK. +!..... +! IWORK (workspace/output) INTEGER LIWORK-by-1 array +! Workspace that is required only if WHTSVD equals +! 2 , 3 or 4. (See the description of WHTSVD). +! If on entry LWORK =-1 or LIWORK=-1, then the +! minimal length of IWORK is computed and returned in +! IWORK(1). See the description of LIWORK. +!..... +! LIWORK (input) INTEGER +! The minimal length of the workspace vector IWORK. +! If WHTSVD == 1, then only IWORK(1) is used; LIWORK >=1 +! If WHTSVD == 2, then LIWORK >= MAX(1,8*MIN(M,N)) +! If WHTSVD == 3, then LIWORK >= MAX(1,M+N-1) +! If WHTSVD == 4, then LIWORK >= MAX(3,M+3*N) +! If on entry LIWORK = -1, then a workspace query is +! assumed and the procedure only computes the minimal +! and the optimal workspace lengths for both WORK and +! IWORK. See the descriptions of WORK and IWORK. +!..... +! INFO (output) INTEGER +! -i < 0 :: On entry, the i-th argument had an +! illegal value +! = 0 :: Successful return. +! = 1 :: Void input. Quick exit (M=0 or N=0). +! = 2 :: The SVD computation of X did not converge. +! Suggestion: Check the input data and/or +! repeat with different WHTSVD. +! = 3 :: The computation of the eigenvalues did not +! converge. +! = 4 :: If data scaling was requested on input and +! the procedure found inconsistency in the data +! such that for some column index i, +! X(:,i) = 0 but Y(:,i) /= 0, then Y(:,i) is set +! to zero if JOBS=='C'. The computation proceeds +! with original or modified data and warning +! flag is set with INFO=4. +!............................................................. +!............................................................. +! Parameters +! ~~~~~~~~~~ + REAL(KIND=WP), PARAMETER :: ONE = 1.0_WP + REAL(KIND=WP), PARAMETER :: ZERO = 0.0_WP + +! Local scalars +! ~~~~~~~~~~~~~ + REAL(KIND=WP) :: OFL, ROOTSC, SCALE, SMALL, & + SSUM, XSCL1, XSCL2 + INTEGER :: i, j, IMINWR, INFO1, INFO2, & + LWRKEV, LWRSDD, LWRSVD, & + LWRSVQ, MLWORK, MWRKEV, MWRSDD, & + MWRSVD, MWRSVJ, MWRSVQ, NUMRNK, & + OLWORK + LOGICAL :: BADXY, LQUERY, SCCOLX, SCCOLY, & + WNTEX, WNTREF, WNTRES, WNTVEC + CHARACTER :: JOBZL, T_OR_N + CHARACTER :: JSVOPT + +! Local arrays +! ~~~~~~~~~~~~ + REAL(KIND=WP) :: AB(2,2), RDUMMY(2), RDUMMY2(2) +! External functions (BLAS and LAPACK) +! ~~~~~~~~~~~~~~~~~ + REAL(KIND=WP) DLANGE, DLAMCH, DNRM2 + EXTERNAL DLANGE, DLAMCH, DNRM2, IDAMAX + INTEGER IDAMAX + LOGICAL DISNAN, LSAME + EXTERNAL DISNAN, LSAME + +! External subroutines (BLAS and LAPACK) +! ~~~~~~~~~~~~~~~~~~~~ + EXTERNAL DAXPY, DGEMM, DSCAL + EXTERNAL DGEEV, DGEJSV, DGESDD, DGESVD, DGESVDQ, & + DLACPY, DLASCL, DLASSQ, XERBLA + +! Intrinsic functions +! ~~~~~~~~~~~~~~~~~~~ + INTRINSIC DBLE, INT, MAX, SQRT +!............................................................ +! +! Test the input arguments +! + WNTRES = LSAME(JOBR,'R') + SCCOLX = LSAME(JOBS,'S') .OR. LSAME(JOBS,'C') + SCCOLY = LSAME(JOBS,'Y') + WNTVEC = LSAME(JOBZ,'V') + WNTREF = LSAME(JOBF,'R') + WNTEX = LSAME(JOBF,'E') + INFO = 0 + LQUERY = ( ( LWORK == -1 ) .OR. ( LIWORK == -1 ) ) +! + IF ( .NOT. (SCCOLX .OR. SCCOLY .OR. & + LSAME(JOBS,'N')) ) THEN + INFO = -1 + ELSE IF ( .NOT. (WNTVEC .OR. LSAME(JOBZ,'N') & + .OR. LSAME(JOBZ,'F')) ) THEN + INFO = -2 + ELSE IF ( .NOT. (WNTRES .OR. LSAME(JOBR,'N')) .OR. & + ( WNTRES .AND. (.NOT.WNTVEC) ) ) THEN + INFO = -3 + ELSE IF ( .NOT. (WNTREF .OR. WNTEX .OR. & + LSAME(JOBF,'N') ) ) THEN + INFO = -4 + ELSE IF ( .NOT.((WHTSVD == 1) .OR. (WHTSVD == 2) .OR. & + (WHTSVD == 3) .OR. (WHTSVD == 4) )) THEN + INFO = -5 + ELSE IF ( M < 0 ) THEN + INFO = -6 + ELSE IF ( ( N < 0 ) .OR. ( N > M ) ) THEN + INFO = -7 + ELSE IF ( LDX < M ) THEN + INFO = -9 + ELSE IF ( LDY < M ) THEN + INFO = -11 + ELSE IF ( .NOT. (( NRNK == -2).OR.(NRNK == -1).OR. & + ((NRNK >= 1).AND.(NRNK <=N ))) ) THEN + INFO = -12 + ELSE IF ( ( TOL < ZERO ) .OR. ( TOL >= ONE ) ) THEN + INFO = -13 + ELSE IF ( LDZ < M ) THEN + INFO = -18 + ELSE IF ( (WNTREF .OR. WNTEX ) .AND. ( LDB < M ) ) THEN + INFO = -21 + ELSE IF ( LDW < N ) THEN + INFO = -23 + ELSE IF ( LDS < N ) THEN + INFO = -25 + END IF +! + IF ( INFO == 0 ) THEN + ! Compute the minimal and the optimal workspace + ! requirements. Simulate running the code and + ! determine minimal and optimal sizes of the + ! workspace at any moment of the run. + IF ( N == 0 ) THEN + ! Quick return. All output except K is void. + ! INFO=1 signals the void input. + ! In case of a workspace query, the default + ! minimal workspace lengths are returned. + IF ( LQUERY ) THEN + IWORK(1) = 1 + WORK(1) = 2 + WORK(2) = 2 + ELSE + K = 0 + END IF + INFO = 1 + RETURN + END IF + MLWORK = MAX(2,N) + OLWORK = MAX(2,N) + IMINWR = 1 + SELECT CASE ( WHTSVD ) + CASE (1) + ! The following is specified as the minimal + ! length of WORK in the definition of DGESVD: + ! MWRSVD = MAX(1,3*MIN(M,N)+MAX(M,N),5*MIN(M,N)) + MWRSVD = MAX(1,3*MIN(M,N)+MAX(M,N),5*MIN(M,N)) + MLWORK = MAX(MLWORK,N + MWRSVD) + IF ( LQUERY ) THEN + CALL DGESVD( 'O', 'S', M, N, X, LDX, WORK, & + B, LDB, W, LDW, RDUMMY, -1, INFO1 ) + LWRSVD = MAX( MWRSVD, INT( RDUMMY(1) ) ) + OLWORK = MAX(OLWORK,N + LWRSVD) + END IF + CASE (2) + ! The following is specified as the minimal + ! length of WORK in the definition of DGESDD: + ! MWRSDD = 3*MIN(M,N)*MIN(M,N) + + ! MAX( MAX(M,N),5*MIN(M,N)*MIN(M,N)+4*MIN(M,N) ) + ! IMINWR = 8*MIN(M,N) + MWRSDD = 3*MIN(M,N)*MIN(M,N) + & + MAX( MAX(M,N),5*MIN(M,N)*MIN(M,N)+4*MIN(M,N) ) + MLWORK = MAX(MLWORK,N + MWRSDD) + IMINWR = 8*MIN(M,N) + IF ( LQUERY ) THEN + CALL DGESDD( 'O', M, N, X, LDX, WORK, B, & + LDB, W, LDW, RDUMMY, -1, IWORK, INFO1 ) + LWRSDD = MAX( MWRSDD, INT( RDUMMY(1) ) ) + OLWORK = MAX(OLWORK,N + LWRSDD) + END IF + CASE (3) + !LWQP3 = 3*N+1 + !LWORQ = MAX(N, 1) + !MWRSVD = MAX(1,3*MIN(M,N)+MAX(M,N),5*MIN(M,N)) + !MWRSVQ = N + MAX( LWQP3, MWRSVD, LWORQ ) + MAX(M,2) + !MLWORK = N + MWRSVQ + !IMINWR = M+N-1 + CALL DGESVDQ( 'H', 'P', 'N', 'R', 'R', M, N, & + X, LDX, WORK, Z, LDZ, W, LDW, & + NUMRNK, IWORK, LIWORK, RDUMMY, & + -1, RDUMMY2, -1, INFO1 ) + IMINWR = IWORK(1) + MWRSVQ = INT(RDUMMY(2)) + MLWORK = MAX(MLWORK,N+MWRSVQ+INT(RDUMMY2(1))) + IF ( LQUERY ) THEN + LWRSVQ = MAX( MWRSVQ, INT(RDUMMY(1)) ) + OLWORK = MAX(OLWORK,N+LWRSVQ+INT(RDUMMY2(1))) + END IF + CASE (4) + JSVOPT = 'J' + !MWRSVJ = MAX( 7, 2*M+N, 6*N+2*N*N ) ! for JSVOPT='V' + MWRSVJ = MAX( 7, 2*M+N, 4*N+N*N, 2*N+N*N+6 ) + MLWORK = MAX(MLWORK,N+MWRSVJ) + IMINWR = MAX( 3, M+3*N ) + IF ( LQUERY ) THEN + OLWORK = MAX(OLWORK,N+MWRSVJ) + END IF + END SELECT + IF ( WNTVEC .OR. WNTEX .OR. LSAME(JOBZ,'F') ) THEN + JOBZL = 'V' + ELSE + JOBZL = 'N' + END IF + ! Workspace calculation to the DGEEV call + IF ( LSAME(JOBZL,'V') ) THEN + MWRKEV = MAX( 1, 4*N ) + ELSE + MWRKEV = MAX( 1, 3*N ) + END IF + MLWORK = MAX(MLWORK,N+MWRKEV) + IF ( LQUERY ) THEN + CALL DGEEV( 'N', JOBZL, N, S, LDS, REIG, & + IMEIG, W, LDW, W, LDW, RDUMMY, -1, INFO1 ) + LWRKEV = MAX( MWRKEV, INT(RDUMMY(1)) ) + OLWORK = MAX( OLWORK, N+LWRKEV ) + END IF +! + IF ( LIWORK < IMINWR .AND. (.NOT.LQUERY) ) INFO = -29 + IF ( LWORK < MLWORK .AND. (.NOT.LQUERY) ) INFO = -27 + END IF +! + IF( INFO /= 0 ) THEN + CALL XERBLA( 'DGEDMD', -INFO ) + RETURN + ELSE IF ( LQUERY ) THEN +! Return minimal and optimal workspace sizes + IWORK(1) = IMINWR + WORK(1) = MLWORK + WORK(2) = OLWORK + RETURN + END IF +!............................................................ +! + OFL = DLAMCH('O') + SMALL = DLAMCH('S') + BADXY = .FALSE. +! +! <1> Optional scaling of the snapshots (columns of X, Y) +! ========================================================== + IF ( SCCOLX ) THEN + ! The columns of X will be normalized. + ! To prevent overflows, the column norms of X are + ! carefully computed using DLASSQ. + K = 0 + DO i = 1, N + !WORK(i) = DNRM2( M, X(1,i), 1 ) + SCALE = ZERO + CALL DLASSQ( M, X(1,i), 1, SCALE, SSUM ) + IF ( DISNAN(SCALE) .OR. DISNAN(SSUM) ) THEN + K = 0 + INFO = -8 + CALL XERBLA('DGEDMD',-INFO) + END IF + IF ( (SCALE /= ZERO) .AND. (SSUM /= ZERO) ) THEN + ROOTSC = SQRT(SSUM) + IF ( SCALE .GE. (OFL / ROOTSC) ) THEN +! Norm of X(:,i) overflows. First, X(:,i) +! is scaled by +! ( ONE / ROOTSC ) / SCALE = 1/||X(:,i)||_2. +! Next, the norm of X(:,i) is stored without +! overflow as WORK(i) = - SCALE * (ROOTSC/M), +! the minus sign indicating the 1/M factor. +! Scaling is performed without overflow, and +! underflow may occur in the smallest entries +! of X(:,i). The relative backward and forward +! errors are small in the ell_2 norm. + CALL DLASCL( 'G', 0, 0, SCALE, ONE/ROOTSC, & + M, 1, X(1,i), M, INFO2 ) + WORK(i) = - SCALE * ( ROOTSC / DBLE(M) ) + ELSE +! X(:,i) will be scaled to unit 2-norm + WORK(i) = SCALE * ROOTSC + CALL DLASCL( 'G',0, 0, WORK(i), ONE, M, 1, & + X(1,i), M, INFO2 ) ! LAPACK CALL +! X(1:M,i) = (ONE/WORK(i)) * X(1:M,i) ! INTRINSIC + END IF + ELSE + WORK(i) = ZERO + K = K + 1 + END IF + END DO + IF ( K == N ) THEN + ! All columns of X are zero. Return error code -8. + ! (the 8th input variable had an illegal value) + K = 0 + INFO = -8 + CALL XERBLA('DGEDMD',-INFO) + RETURN + END IF + DO i = 1, N +! Now, apply the same scaling to the columns of Y. + IF ( WORK(i) > ZERO ) THEN + CALL DSCAL( M, ONE/WORK(i), Y(1,i), 1 ) ! BLAS CALL +! Y(1:M,i) = (ONE/WORK(i)) * Y(1:M,i) ! INTRINSIC + ELSE IF ( WORK(i) < ZERO ) THEN + CALL DLASCL( 'G', 0, 0, -WORK(i), & + ONE/DBLE(M), M, 1, Y(1,i), M, INFO2 ) ! LAPACK CALL + ELSE IF ( Y(IDAMAX(M, Y(1,i),1),i ) & + /= ZERO ) THEN +! X(:,i) is zero vector. For consistency, +! Y(:,i) should also be zero. If Y(:,i) is not +! zero, then the data might be inconsistent or +! corrupted. If JOBS == 'C', Y(:,i) is set to +! zero and a warning flag is raised. +! The computation continues but the +! situation will be reported in the output. + BADXY = .TRUE. + IF ( LSAME(JOBS,'C')) & + CALL DSCAL( M, ZERO, Y(1,i), 1 ) ! BLAS CALL + END IF + END DO + END IF + ! + IF ( SCCOLY ) THEN + ! The columns of Y will be normalized. + ! To prevent overflows, the column norms of Y are + ! carefully computed using DLASSQ. + DO i = 1, N + !WORK(i) = DNRM2( M, Y(1,i), 1 ) + SCALE = ZERO + CALL DLASSQ( M, Y(1,i), 1, SCALE, SSUM ) + IF ( DISNAN(SCALE) .OR. DISNAN(SSUM) ) THEN + K = 0 + INFO = -10 + CALL XERBLA('DGEDMD',-INFO) + END IF + IF ( SCALE /= ZERO .AND. (SSUM /= ZERO) ) THEN + ROOTSC = SQRT(SSUM) + IF ( SCALE .GE. (OFL / ROOTSC) ) THEN +! Norm of Y(:,i) overflows. First, Y(:,i) +! is scaled by +! ( ONE / ROOTSC ) / SCALE = 1/||Y(:,i)||_2. +! Next, the norm of Y(:,i) is stored without +! overflow as WORK(i) = - SCALE * (ROOTSC/M), +! the minus sign indicating the 1/M factor. +! Scaling is performed without overflow, and +! underflow may occur in the smallest entries +! of Y(:,i). The relative backward and forward +! errors are small in the ell_2 norm. + CALL DLASCL( 'G', 0, 0, SCALE, ONE/ROOTSC, & + M, 1, Y(1,i), M, INFO2 ) + WORK(i) = - SCALE * ( ROOTSC / DBLE(M) ) + ELSE +! X(:,i) will be scaled to unit 2-norm + WORK(i) = SCALE * ROOTSC + CALL DLASCL( 'G',0, 0, WORK(i), ONE, M, 1, & + Y(1,i), M, INFO2 ) ! LAPACK CALL +! Y(1:M,i) = (ONE/WORK(i)) * Y(1:M,i) ! INTRINSIC + END IF + ELSE + WORK(i) = ZERO + END IF + END DO + DO i = 1, N +! Now, apply the same scaling to the columns of X. + IF ( WORK(i) > ZERO ) THEN + CALL DSCAL( M, ONE/WORK(i), X(1,i), 1 ) ! BLAS CALL +! X(1:M,i) = (ONE/WORK(i)) * X(1:M,i) ! INTRINSIC + ELSE IF ( WORK(i) < ZERO ) THEN + CALL DLASCL( 'G', 0, 0, -WORK(i), & + ONE/DBLE(M), M, 1, X(1,i), M, INFO2 ) ! LAPACK CALL + ELSE IF ( X(IDAMAX(M, X(1,i),1),i ) & + /= ZERO ) THEN +! Y(:,i) is zero vector. If X(:,i) is not +! zero, then a warning flag is raised. +! The computation continues but the +! situation will be reported in the output. + BADXY = .TRUE. + END IF + END DO + END IF +! +! <2> SVD of the data snapshot matrix X. +! ===================================== +! The left singular vectors are stored in the array X. +! The right singular vectors are in the array W. +! The array W will later on contain the eigenvectors +! of a Rayleigh quotient. + NUMRNK = N + SELECT CASE ( WHTSVD ) + CASE (1) + CALL DGESVD( 'O', 'S', M, N, X, LDX, WORK, B, & + LDB, W, LDW, WORK(N+1), LWORK-N, INFO1 ) ! LAPACK CALL + T_OR_N = 'T' + CASE (2) + CALL DGESDD( 'O', M, N, X, LDX, WORK, B, LDB, W, & + LDW, WORK(N+1), LWORK-N, IWORK, INFO1 ) ! LAPACK CALL + T_OR_N = 'T' + CASE (3) + CALL DGESVDQ( 'H', 'P', 'N', 'R', 'R', M, N, & + X, LDX, WORK, Z, LDZ, W, LDW, & + NUMRNK, IWORK, LIWORK, WORK(N+MAX(2,M)+1),& + LWORK-N-MAX(2,M), WORK(N+1), MAX(2,M), INFO1) ! LAPACK CALL + CALL DLACPY( 'A', M, NUMRNK, Z, LDZ, X, LDX ) ! LAPACK CALL + T_OR_N = 'T' + CASE (4) + CALL DGEJSV( 'F', 'U', JSVOPT, 'N', 'N', 'P', M, & + N, X, LDX, WORK, Z, LDZ, W, LDW, & + WORK(N+1), LWORK-N, IWORK, INFO1 ) ! LAPACK CALL + CALL DLACPY( 'A', M, N, Z, LDZ, X, LDX ) ! LAPACK CALL + T_OR_N = 'N' + XSCL1 = WORK(N+1) + XSCL2 = WORK(N+2) + IF ( XSCL1 /= XSCL2 ) THEN + ! This is an exceptional situation. If the + ! data matrices are not scaled and the + ! largest singular value of X overflows. + ! In that case DGEJSV can return the SVD + ! in scaled form. The scaling factor can be used + ! to rescale the data (X and Y). + CALL DLASCL( 'G', 0, 0, XSCL1, XSCL2, M, N, Y, LDY, INFO2 ) + END IF + END SELECT +! + IF ( INFO1 > 0 ) THEN + ! The SVD selected subroutine did not converge. + ! Return with an error code. + INFO = 2 + RETURN + END IF +! + IF ( WORK(1) == ZERO ) THEN + ! The largest computed singular value of (scaled) + ! X is zero. Return error code -8 + ! (the 8th input variable had an illegal value). + K = 0 + INFO = -8 + CALL XERBLA('DGEDMD',-INFO) + RETURN + END IF +! + !<3> Determine the numerical rank of the data + ! snapshots matrix X. This depends on the + ! parameters NRNK and TOL. + + SELECT CASE ( NRNK ) + CASE ( -1 ) + K = 1 + DO i = 2, NUMRNK + IF ( ( WORK(i) <= WORK(1)*TOL ) .OR. & + ( WORK(i) <= SMALL ) ) EXIT + K = K + 1 + END DO + CASE ( -2 ) + K = 1 + DO i = 1, NUMRNK-1 + IF ( ( WORK(i+1) <= WORK(i)*TOL ) .OR. & + ( WORK(i) <= SMALL ) ) EXIT + K = K + 1 + END DO + CASE DEFAULT + K = 1 + DO i = 2, NRNK + IF ( WORK(i) <= SMALL ) EXIT + K = K + 1 + END DO + END SELECT + ! Now, U = X(1:M,1:K) is the SVD/POD basis for the + ! snapshot data in the input matrix X. + + !<4> Compute the Rayleigh quotient S = U^T * A * U. + ! Depending on the requested outputs, the computation + ! is organized to compute additional auxiliary + ! matrices (for the residuals and refinements). + ! + ! In all formulas below, we need V_k*Sigma_k^(-1) + ! where either V_k is in W(1:N,1:K), or V_k^T is in + ! W(1:K,1:N). Here Sigma_k=diag(WORK(1:K)). + IF ( LSAME(T_OR_N, 'N') ) THEN + DO i = 1, K + CALL DSCAL( N, ONE/WORK(i), W(1,i), 1 ) ! BLAS CALL + ! W(1:N,i) = (ONE/WORK(i)) * W(1:N,i) ! INTRINSIC + END DO + ELSE + ! This non-unit stride access is due to the fact + ! that DGESVD, DGESVDQ and DGESDD return the + ! transposed matrix of the right singular vectors. + !DO i = 1, K + ! CALL DSCAL( N, ONE/WORK(i), W(i,1), LDW ) ! BLAS CALL + ! ! W(i,1:N) = (ONE/WORK(i)) * W(i,1:N) ! INTRINSIC + !END DO + DO i = 1, K + WORK(N+i) = ONE/WORK(i) + END DO + DO j = 1, N + DO i = 1, K + W(i,j) = (WORK(N+i))*W(i,j) + END DO + END DO + END IF +! + IF ( WNTREF ) THEN + ! + ! Need A*U(:,1:K)=Y*V_k*inv(diag(WORK(1:K))) + ! for computing the refined Ritz vectors + ! (optionally, outside DGEDMD). + CALL DGEMM( 'N', T_OR_N, M, K, N, ONE, Y, LDY, W, & + LDW, ZERO, Z, LDZ ) ! BLAS CALL + ! Z(1:M,1:K)=MATMUL(Y(1:M,1:N),TRANSPOSE(W(1:K,1:N))) ! INTRINSIC, for T_OR_N=='T' + ! Z(1:M,1:K)=MATMUL(Y(1:M,1:N),W(1:N,1:K)) ! INTRINSIC, for T_OR_N=='N' + ! + ! At this point Z contains + ! A * U(:,1:K) = Y * V_k * Sigma_k^(-1), and + ! this is needed for computing the residuals. + ! This matrix is returned in the array B and + ! it can be used to compute refined Ritz vectors. + CALL DLACPY( 'A', M, K, Z, LDZ, B, LDB ) ! BLAS CALL + ! B(1:M,1:K) = Z(1:M,1:K) ! INTRINSIC + + CALL DGEMM( 'T', 'N', K, K, M, ONE, X, LDX, Z, & + LDZ, ZERO, S, LDS ) ! BLAS CALL + ! S(1:K,1:K) = MATMUL(TANSPOSE(X(1:M,1:K)),Z(1:M,1:K)) ! INTRINSIC + ! At this point S = U^T * A * U is the Rayleigh quotient. + ELSE + ! A * U(:,1:K) is not explicitly needed and the + ! computation is organized differently. The Rayleigh + ! quotient is computed more efficiently. + CALL DGEMM( 'T', 'N', K, N, M, ONE, X, LDX, Y, LDY, & + ZERO, Z, LDZ ) ! BLAS CALL + ! Z(1:K,1:N) = MATMUL( TRANSPOSE(X(1:M,1:K)), Y(1:M,1:N) ) ! INTRINSIC + ! In the two DGEMM calls here, can use K for LDZ. + CALL DGEMM( 'N', T_OR_N, K, K, N, ONE, Z, LDZ, W, & + LDW, ZERO, S, LDS ) ! BLAS CALL + ! S(1:K,1:K) = MATMUL(Z(1:K,1:N),TRANSPOSE(W(1:K,1:N))) ! INTRINSIC, for T_OR_N=='T' + ! S(1:K,1:K) = MATMUL(Z(1:K,1:N),(W(1:N,1:K))) ! INTRINSIC, for T_OR_N=='N' + ! At this point S = U^T * A * U is the Rayleigh quotient. + ! If the residuals are requested, save scaled V_k into Z. + ! Recall that V_k or V_k^T is stored in W. + IF ( WNTRES .OR. WNTEX ) THEN + IF ( LSAME(T_OR_N, 'N') ) THEN + CALL DLACPY( 'A', N, K, W, LDW, Z, LDZ ) + ELSE + CALL DLACPY( 'A', K, N, W, LDW, Z, LDZ ) + END IF + END IF + END IF +! + !<5> Compute the Ritz values and (if requested) the + ! right eigenvectors of the Rayleigh quotient. + ! + CALL DGEEV( 'N', JOBZL, K, S, LDS, REIG, IMEIG, W, & + LDW, W, LDW, WORK(N+1), LWORK-N, INFO1 ) ! LAPACK CALL + ! + ! W(1:K,1:K) contains the eigenvectors of the Rayleigh + ! quotient. Even in the case of complex spectrum, all + ! computation is done in real arithmetic. REIG and + ! IMEIG are the real and the imaginary parts of the + ! eigenvalues, so that the spectrum is given as + ! REIG(:) + sqrt(-1)*IMEIG(:). Complex conjugate pairs + ! are listed at consecutive positions. For such a + ! complex conjugate pair of the eigenvalues, the + ! corresponding eigenvectors are also a complex + ! conjugate pair with the real and imaginary parts + ! stored column-wise in W at the corresponding + ! consecutive column indices. See the description of Z. + ! Also, see the description of DGEEV. + IF ( INFO1 > 0 ) THEN + ! DGEEV failed to compute the eigenvalues and + ! eigenvectors of the Rayleigh quotient. + INFO = 3 + RETURN + END IF +! + ! <6> Compute the eigenvectors (if requested) and, + ! the residuals (if requested). + ! + IF ( WNTVEC .OR. WNTEX ) THEN + IF ( WNTRES ) THEN + IF ( WNTREF ) THEN + ! Here, if the refinement is requested, we have + ! A*U(:,1:K) already computed and stored in Z. + ! For the residuals, need Y = A * U(:,1;K) * W. + CALL DGEMM( 'N', 'N', M, K, K, ONE, Z, LDZ, W, & + LDW, ZERO, Y, LDY ) ! BLAS CALL + ! Y(1:M,1:K) = Z(1:M,1:K) * W(1:K,1:K) ! INTRINSIC + ! This frees Z; Y contains A * U(:,1:K) * W. + ELSE + ! Compute S = V_k * Sigma_k^(-1) * W, where + ! V_k * Sigma_k^(-1) is stored in Z + CALL DGEMM( T_OR_N, 'N', N, K, K, ONE, Z, LDZ, & + W, LDW, ZERO, S, LDS) + ! Then, compute Z = Y * S = + ! = Y * V_k * Sigma_k^(-1) * W(1:K,1:K) = + ! = A * U(:,1:K) * W(1:K,1:K) + CALL DGEMM( 'N', 'N', M, K, N, ONE, Y, LDY, S, & + LDS, ZERO, Z, LDZ) + ! Save a copy of Z into Y and free Z for holding + ! the Ritz vectors. + CALL DLACPY( 'A', M, K, Z, LDZ, Y, LDY ) + IF ( WNTEX ) CALL DLACPY( 'A', M, K, Z, LDZ, B, LDB ) + END IF + ELSE IF ( WNTEX ) THEN + ! Compute S = V_k * Sigma_k^(-1) * W, where + ! V_k * Sigma_k^(-1) is stored in Z + CALL DGEMM( T_OR_N, 'N', N, K, K, ONE, Z, LDZ, & + W, LDW, ZERO, S, LDS ) + ! Then, compute Z = Y * S = + ! = Y * V_k * Sigma_k^(-1) * W(1:K,1:K) = + ! = A * U(:,1:K) * W(1:K,1:K) + CALL DGEMM( 'N', 'N', M, K, N, ONE, Y, LDY, S, & + LDS, ZERO, B, LDB ) + ! The above call replaces the following two calls + ! that were used in the developing-testing phase. + ! CALL DGEMM( 'N', 'N', M, K, N, ONE, Y, LDY, S, & + ! LDS, ZERO, Z, LDZ) + ! Save a copy of Z into B and free Z for holding + ! the Ritz vectors. + ! CALL DLACPY( 'A', M, K, Z, LDZ, B, LDB ) + END IF +! + ! Compute the real form of the Ritz vectors + IF ( WNTVEC ) CALL DGEMM( 'N', 'N', M, K, K, ONE, X, LDX, W, LDW, & + ZERO, Z, LDZ ) ! BLAS CALL + ! Z(1:M,1:K) = MATMUL(X(1:M,1:K), W(1:K,1:K)) ! INTRINSIC +! + IF ( WNTRES ) THEN + i = 1 + DO WHILE ( i <= K ) + IF ( IMEIG(i) == ZERO ) THEN + ! have a real eigenvalue with real eigenvector + CALL DAXPY( M, -REIG(i), Z(1,i), 1, Y(1,i), 1 ) ! BLAS CALL + ! Y(1:M,i) = Y(1:M,i) - REIG(i) * Z(1:M,i) ! INTRINSIC + RES(i) = DNRM2( M, Y(1,i), 1) ! BLAS CALL + i = i + 1 + ELSE + ! Have a complex conjugate pair + ! REIG(i) +- sqrt(-1)*IMEIG(i). + ! Since all computation is done in real + ! arithmetic, the formula for the residual + ! is recast for real representation of the + ! complex conjugate eigenpair. See the + ! description of RES. + AB(1,1) = REIG(i) + AB(2,1) = -IMEIG(i) + AB(1,2) = IMEIG(i) + AB(2,2) = REIG(i) + CALL DGEMM( 'N', 'N', M, 2, 2, -ONE, Z(1,i), & + LDZ, AB, 2, ONE, Y(1,i), LDY ) ! BLAS CALL + ! Y(1:M,i:i+1) = Y(1:M,i:i+1) - Z(1:M,i:i+1) * AB ! INTRINSIC + RES(i) = DLANGE( 'F', M, 2, Y(1,i), LDY, & + WORK(N+1) ) ! LAPACK CALL + RES(i+1) = RES(i) + i = i + 2 + END IF + END DO + END IF + END IF +! + IF ( WHTSVD == 4 ) THEN + WORK(N+1) = XSCL1 + WORK(N+2) = XSCL2 + END IF +! +! Successful exit. + IF ( .NOT. BADXY ) THEN + INFO = 0 + ELSE + ! A warning on possible data inconsistency. + ! This should be a rare event. + INFO = 4 + END IF +!............................................................ + RETURN +! ...... + END SUBROUTINE DGEDMD + diff --git a/lapack-netlib/SRC/dgedmdq.f90 b/lapack-netlib/SRC/dgedmdq.f90 new file mode 100644 index 000000000..bedfba472 --- /dev/null +++ b/lapack-netlib/SRC/dgedmdq.f90 @@ -0,0 +1,704 @@ +SUBROUTINE DGEDMDQ( JOBS, JOBZ, JOBR, JOBQ, JOBT, JOBF, & + WHTSVD, M, N, F, LDF, X, LDX, Y, & + LDY, NRNK, TOL, K, REIG, IMEIG, & + Z, LDZ, RES, B, LDB, V, LDV, & + S, LDS, WORK, LWORK, IWORK, LIWORK, INFO ) +! March 2023 +!..... + USE iso_fortran_env + IMPLICIT NONE + INTEGER, PARAMETER :: WP = real64 +!..... +! Scalar arguments + CHARACTER, INTENT(IN) :: JOBS, JOBZ, JOBR, JOBQ, & + JOBT, JOBF + INTEGER, INTENT(IN) :: WHTSVD, M, N, LDF, LDX, & + LDY, NRNK, LDZ, LDB, LDV, & + LDS, LWORK, LIWORK + INTEGER, INTENT(OUT) :: INFO, K + REAL(KIND=WP), INTENT(IN) :: TOL +! Array arguments + REAL(KIND=WP), INTENT(INOUT) :: F(LDF,*) + REAL(KIND=WP), INTENT(OUT) :: X(LDX,*), Y(LDY,*), & + Z(LDZ,*), B(LDB,*), & + V(LDV,*), S(LDS,*) + REAL(KIND=WP), INTENT(OUT) :: REIG(*), IMEIG(*), & + RES(*) + REAL(KIND=WP), INTENT(OUT) :: WORK(*) + INTEGER, INTENT(OUT) :: IWORK(*) +!..... +! Purpose +! ======= +! DGEDMDQ computes the Dynamic Mode Decomposition (DMD) for +! a pair of data snapshot matrices, using a QR factorization +! based compression of the data. For the input matrices +! X and Y such that Y = A*X with an unaccessible matrix +! A, DGEDMDQ computes a certain number of Ritz pairs of A using +! the standard Rayleigh-Ritz extraction from a subspace of +! range(X) that is determined using the leading left singular +! vectors of X. Optionally, DGEDMDQ returns the residuals +! of the computed Ritz pairs, the information needed for +! a refinement of the Ritz vectors, or the eigenvectors of +! the Exact DMD. +! For further details see the references listed +! below. For more details of the implementation see [3]. +! +! References +! ========== +! [1] P. Schmid: Dynamic mode decomposition of numerical +! and experimental data, +! Journal of Fluid Mechanics 656, 5-28, 2010. +! [2] Z. Drmac, I. Mezic, R. Mohr: Data driven modal +! decompositions: analysis and enhancements, +! SIAM J. on Sci. Comp. 40 (4), A2253-A2285, 2018. +! [3] Z. Drmac: A LAPACK implementation of the Dynamic +! Mode Decomposition I. Technical report. AIMDyn Inc. +! and LAPACK Working Note 298. +! [4] J. Tu, C. W. Rowley, D. M. Luchtenburg, S. L. +! Brunton, N. Kutz: On Dynamic Mode Decomposition: +! Theory and Applications, Journal of Computational +! Dynamics 1(2), 391 -421, 2014. +! +! Developed and supported by: +! =========================== +! Developed and coded by Zlatko Drmac, Faculty of Science, +! University of Zagreb; drmac@math.hr +! In cooperation with +! AIMdyn Inc., Santa Barbara, CA. +! and supported by +! - DARPA SBIR project "Koopman Operator-Based Forecasting +! for Nonstationary Processes from Near-Term, Limited +! Observational Data" Contract No: W31P4Q-21-C-0007 +! - DARPA PAI project "Physics-Informed Machine Learning +! Methodologies" Contract No: HR0011-18-9-0033 +! - DARPA MoDyL project "A Data-Driven, Operator-Theoretic +! Framework for Space-Time Analysis of Process Dynamics" +! Contract No: HR0011-16-C-0116 +! Any opinions, findings and conclusions or recommendations +! expressed in this material are those of the author and +! do not necessarily reflect the views of the DARPA SBIR +! Program Office. +!============================================================ +! Distribution Statement A: +! Approved for Public Release, Distribution Unlimited. +! Cleared by DARPA on September 29, 2022 +!============================================================ +!...................................................................... +! Arguments +! ========= +! JOBS (input) CHARACTER*1 +! Determines whether the initial data snapshots are scaled +! by a diagonal matrix. The data snapshots are the columns +! of F. The leading N-1 columns of F are denoted X and the +! trailing N-1 columns are denoted Y. +! 'S' :: The data snapshots matrices X and Y are multiplied +! with a diagonal matrix D so that X*D has unit +! nonzero columns (in the Euclidean 2-norm) +! 'C' :: The snapshots are scaled as with the 'S' option. +! If it is found that an i-th column of X is zero +! vector and the corresponding i-th column of Y is +! non-zero, then the i-th column of Y is set to +! zero and a warning flag is raised. +! 'Y' :: The data snapshots matrices X and Y are multiplied +! by a diagonal matrix D so that Y*D has unit +! nonzero columns (in the Euclidean 2-norm) +! 'N' :: No data scaling. +!..... +! JOBZ (input) CHARACTER*1 +! Determines whether the eigenvectors (Koopman modes) will +! be computed. +! 'V' :: The eigenvectors (Koopman modes) will be computed +! and returned in the matrix Z. +! See the description of Z. +! 'F' :: The eigenvectors (Koopman modes) will be returned +! in factored form as the product Z*V, where Z +! is orthonormal and V contains the eigenvectors +! of the corresponding Rayleigh quotient. +! See the descriptions of F, V, Z. +! 'Q' :: The eigenvectors (Koopman modes) will be returned +! in factored form as the product Q*Z, where Z +! contains the eigenvectors of the compression of the +! underlying discretized operator onto the span of +! the data snapshots. See the descriptions of F, V, Z. +! Q is from the initial QR factorization. +! 'N' :: The eigenvectors are not computed. +!..... +! JOBR (input) CHARACTER*1 +! Determines whether to compute the residuals. +! 'R' :: The residuals for the computed eigenpairs will +! be computed and stored in the array RES. +! See the description of RES. +! For this option to be legal, JOBZ must be 'V'. +! 'N' :: The residuals are not computed. +!..... +! JOBQ (input) CHARACTER*1 +! Specifies whether to explicitly compute and return the +! orthogonal matrix from the QR factorization. +! 'Q' :: The matrix Q of the QR factorization of the data +! snapshot matrix is computed and stored in the +! array F. See the description of F. +! 'N' :: The matrix Q is not explicitly computed. +!..... +! JOBT (input) CHARACTER*1 +! Specifies whether to return the upper triangular factor +! from the QR factorization. +! 'R' :: The matrix R of the QR factorization of the data +! snapshot matrix F is returned in the array Y. +! See the description of Y and Further details. +! 'N' :: The matrix R is not returned. +!..... +! JOBF (input) CHARACTER*1 +! Specifies whether to store information needed for post- +! processing (e.g. computing refined Ritz vectors) +! 'R' :: The matrix needed for the refinement of the Ritz +! vectors is computed and stored in the array B. +! See the description of B. +! 'E' :: The unscaled eigenvectors of the Exact DMD are +! computed and returned in the array B. See the +! description of B. +! 'N' :: No eigenvector refinement data is computed. +! To be useful on exit, this option needs JOBQ='Q'. +!..... +! WHTSVD (input) INTEGER, WHSTVD in { 1, 2, 3, 4 } +! Allows for a selection of the SVD algorithm from the +! LAPACK library. +! 1 :: DGESVD (the QR SVD algorithm) +! 2 :: DGESDD (the Divide and Conquer algorithm; if enough +! workspace available, this is the fastest option) +! 3 :: DGESVDQ (the preconditioned QR SVD ; this and 4 +! are the most accurate options) +! 4 :: DGEJSV (the preconditioned Jacobi SVD; this and 3 +! are the most accurate options) +! For the four methods above, a significant difference in +! the accuracy of small singular values is possible if +! the snapshots vary in norm so that X is severely +! ill-conditioned. If small (smaller than EPS*||X||) +! singular values are of interest and JOBS=='N', then +! the options (3, 4) give the most accurate results, where +! the option 4 is slightly better and with stronger +! theoretical background. +! If JOBS=='S', i.e. the columns of X will be normalized, +! then all methods give nearly equally accurate results. +!..... +! M (input) INTEGER, M >= 0 +! The state space dimension (the number of rows of F). +!..... +! N (input) INTEGER, 0 <= N <= M +! The number of data snapshots from a single trajectory, +! taken at equidistant discrete times. This is the +! number of columns of F. +!..... +! F (input/output) REAL(KIND=WP) M-by-N array +! > On entry, +! the columns of F are the sequence of data snapshots +! from a single trajectory, taken at equidistant discrete +! times. It is assumed that the column norms of F are +! in the range of the normalized floating point numbers. +! < On exit, +! If JOBQ == 'Q', the array F contains the orthogonal +! matrix/factor of the QR factorization of the initial +! data snapshots matrix F. See the description of JOBQ. +! If JOBQ == 'N', the entries in F strictly below the main +! diagonal contain, column-wise, the information on the +! Householder vectors, as returned by DGEQRF. The +! remaining information to restore the orthogonal matrix +! of the initial QR factorization is stored in WORK(1:N). +! See the description of WORK. +!..... +! LDF (input) INTEGER, LDF >= M +! The leading dimension of the array F. +!..... +! X (workspace/output) REAL(KIND=WP) MIN(M,N)-by-(N-1) array +! X is used as workspace to hold representations of the +! leading N-1 snapshots in the orthonormal basis computed +! in the QR factorization of F. +! On exit, the leading K columns of X contain the leading +! K left singular vectors of the above described content +! of X. To lift them to the space of the left singular +! vectors U(:,1:K)of the input data, pre-multiply with the +! Q factor from the initial QR factorization. +! See the descriptions of F, K, V and Z. +!..... +! LDX (input) INTEGER, LDX >= N +! The leading dimension of the array X. +!..... +! Y (workspace/output) REAL(KIND=WP) MIN(M,N)-by-(N-1) array +! Y is used as workspace to hold representations of the +! trailing N-1 snapshots in the orthonormal basis computed +! in the QR factorization of F. +! On exit, +! If JOBT == 'R', Y contains the MIN(M,N)-by-N upper +! triangular factor from the QR factorization of the data +! snapshot matrix F. +!..... +! LDY (input) INTEGER , LDY >= N +! The leading dimension of the array Y. +!..... +! NRNK (input) INTEGER +! Determines the mode how to compute the numerical rank, +! i.e. how to truncate small singular values of the input +! matrix X. On input, if +! NRNK = -1 :: i-th singular value sigma(i) is truncated +! if sigma(i) <= TOL*sigma(1) +! This option is recommended. +! NRNK = -2 :: i-th singular value sigma(i) is truncated +! if sigma(i) <= TOL*sigma(i-1) +! This option is included for R&D purposes. +! It requires highly accurate SVD, which +! may not be feasible. +! The numerical rank can be enforced by using positive +! value of NRNK as follows: +! 0 < NRNK <= N-1 :: at most NRNK largest singular values +! will be used. If the number of the computed nonzero +! singular values is less than NRNK, then only those +! nonzero values will be used and the actually used +! dimension is less than NRNK. The actual number of +! the nonzero singular values is returned in the variable +! K. See the description of K. +!..... +! TOL (input) REAL(KIND=WP), 0 <= TOL < 1 +! The tolerance for truncating small singular values. +! See the description of NRNK. +!..... +! K (output) INTEGER, 0 <= K <= N +! The dimension of the SVD/POD basis for the leading N-1 +! data snapshots (columns of F) and the number of the +! computed Ritz pairs. The value of K is determined +! according to the rule set by the parameters NRNK and +! TOL. See the descriptions of NRNK and TOL. +!..... +! REIG (output) REAL(KIND=WP) (N-1)-by-1 array +! The leading K (K<=N) entries of REIG contain +! the real parts of the computed eigenvalues +! REIG(1:K) + sqrt(-1)*IMEIG(1:K). +! See the descriptions of K, IMEIG, Z. +!..... +! IMEIG (output) REAL(KIND=WP) (N-1)-by-1 array +! The leading K (K0, then the corresponding complex +! conjugate pair of eigenvalues reads +! LAMBDA(i) = REIG(i) + sqrt(-1)*IMAG(i) +! LAMBDA(i+1) = REIG(i) - sqrt(-1)*IMAG(i) +! That is, complex conjugate pairs have consequtive +! indices (i,i+1), with the positive imaginary part +! listed first. +! See the descriptions of K, REIG, Z. +!..... +! Z (workspace/output) REAL(KIND=WP) M-by-(N-1) array +! If JOBZ =='V' then +! Z contains real Ritz vectors as follows: +! If IMEIG(i)=0, then Z(:,i) is an eigenvector of +! the i-th Ritz value. +! If IMEIG(i) > 0 (and IMEIG(i+1) < 0) then +! [Z(:,i) Z(:,i+1)] span an invariant subspace and +! the Ritz values extracted from this subspace are +! REIG(i) + sqrt(-1)*IMEIG(i) and +! REIG(i) - sqrt(-1)*IMEIG(i). +! The corresponding eigenvectors are +! Z(:,i) + sqrt(-1)*Z(:,i+1) and +! Z(:,i) - sqrt(-1)*Z(:,i+1), respectively. +! If JOBZ == 'F', then the above descriptions hold for +! the columns of Z*V, where the columns of V are the +! eigenvectors of the K-by-K Rayleigh quotient, and Z is +! orthonormal. The columns of V are similarly structured: +! If IMEIG(i) == 0 then Z*V(:,i) is an eigenvector, and if +! IMEIG(i) > 0 then Z*V(:,i)+sqrt(-1)*Z*V(:,i+1) and +! Z*V(:,i)-sqrt(-1)*Z*V(:,i+1) +! are the eigenvectors of LAMBDA(i), LAMBDA(i+1). +! See the descriptions of REIG, IMEIG, X and V. +!..... +! LDZ (input) INTEGER , LDZ >= M +! The leading dimension of the array Z. +!..... +! RES (output) REAL(KIND=WP) (N-1)-by-1 array +! RES(1:K) contains the residuals for the K computed +! Ritz pairs. +! If LAMBDA(i) is real, then +! RES(i) = || A * Z(:,i) - LAMBDA(i)*Z(:,i))||_2. +! If [LAMBDA(i), LAMBDA(i+1)] is a complex conjugate pair +! then +! RES(i)=RES(i+1) = || A * Z(:,i:i+1) - Z(:,i:i+1) *B||_F +! where B = [ real(LAMBDA(i)) imag(LAMBDA(i)) ] +! [-imag(LAMBDA(i)) real(LAMBDA(i)) ]. +! It holds that +! RES(i) = || A*ZC(:,i) - LAMBDA(i) *ZC(:,i) ||_2 +! RES(i+1) = || A*ZC(:,i+1) - LAMBDA(i+1)*ZC(:,i+1) ||_2 +! where ZC(:,i) = Z(:,i) + sqrt(-1)*Z(:,i+1) +! ZC(:,i+1) = Z(:,i) - sqrt(-1)*Z(:,i+1) +! See the description of Z. +!..... +! B (output) REAL(KIND=WP) MIN(M,N)-by-(N-1) array. +! IF JOBF =='R', B(1:N,1:K) contains A*U(:,1:K), and can +! be used for computing the refined vectors; see further +! details in the provided references. +! If JOBF == 'E', B(1:N,1;K) contains +! A*U(:,1:K)*W(1:K,1:K), which are the vectors from the +! Exact DMD, up to scaling by the inverse eigenvalues. +! In both cases, the content of B can be lifted to the +! original dimension of the input data by pre-multiplying +! with the Q factor from the initial QR factorization. +! Here A denotes a compression of the underlying operator. +! See the descriptions of F and X. +! If JOBF =='N', then B is not referenced. +!..... +! LDB (input) INTEGER, LDB >= MIN(M,N) +! The leading dimension of the array B. +!..... +! V (workspace/output) REAL(KIND=WP) (N-1)-by-(N-1) array +! On exit, V(1:K,1:K) contains the K eigenvectors of +! the Rayleigh quotient. The eigenvectors of a complex +! conjugate pair of eigenvalues are returned in real form +! as explained in the description of Z. The Ritz vectors +! (returned in Z) are the product of X and V; see +! the descriptions of X and Z. +!..... +! LDV (input) INTEGER, LDV >= N-1 +! The leading dimension of the array V. +!..... +! S (output) REAL(KIND=WP) (N-1)-by-(N-1) array +! The array S(1:K,1:K) is used for the matrix Rayleigh +! quotient. This content is overwritten during +! the eigenvalue decomposition by DGEEV. +! See the description of K. +!..... +! LDS (input) INTEGER, LDS >= N-1 +! The leading dimension of the array S. +!..... +! WORK (workspace/output) REAL(KIND=WP) LWORK-by-1 array +! On exit, +! WORK(1:MIN(M,N)) contains the scalar factors of the +! elementary reflectors as returned by DGEQRF of the +! M-by-N input matrix F. +! WORK(MIN(M,N)+1:MIN(M,N)+N-1) contains the singular values of +! the input submatrix F(1:M,1:N-1). +! If the call to DGEDMDQ is only workspace query, then +! WORK(1) contains the minimal workspace length and +! WORK(2) is the optimal workspace length. Hence, the +! length of work is at least 2. +! See the description of LWORK. +!..... +! LWORK (input) INTEGER +! The minimal length of the workspace vector WORK. +! LWORK is calculated as follows: +! Let MLWQR = N (minimal workspace for DGEQRF[M,N]) +! MLWDMD = minimal workspace for DGEDMD (see the +! description of LWORK in DGEDMD) for +! snapshots of dimensions MIN(M,N)-by-(N-1) +! MLWMQR = N (minimal workspace for +! DORMQR['L','N',M,N,N]) +! MLWGQR = N (minimal workspace for DORGQR[M,N,N]) +! Then +! LWORK = MAX(N+MLWQR, N+MLWDMD) +! is updated as follows: +! if JOBZ == 'V' or JOBZ == 'F' THEN +! LWORK = MAX( LWORK, MIN(M,N)+N-1+MLWMQR ) +! if JOBQ == 'Q' THEN +! LWORK = MAX( LWORK, MIN(M,N)+N-1+MLWGQR) +! If on entry LWORK = -1, then a workspace query is +! assumed and the procedure only computes the minimal +! and the optimal workspace lengths for both WORK and +! IWORK. See the descriptions of WORK and IWORK. +!..... +! IWORK (workspace/output) INTEGER LIWORK-by-1 array +! Workspace that is required only if WHTSVD equals +! 2 , 3 or 4. (See the description of WHTSVD). +! If on entry LWORK =-1 or LIWORK=-1, then the +! minimal length of IWORK is computed and returned in +! IWORK(1). See the description of LIWORK. +!..... +! LIWORK (input) INTEGER +! The minimal length of the workspace vector IWORK. +! If WHTSVD == 1, then only IWORK(1) is used; LIWORK >=1 +! Let M1=MIN(M,N), N1=N-1. Then +! If WHTSVD == 2, then LIWORK >= MAX(1,8*MIN(M1,N1)) +! If WHTSVD == 3, then LIWORK >= MAX(1,M1+N1-1) +! If WHTSVD == 4, then LIWORK >= MAX(3,M1+3*N1) +! If on entry LIWORK = -1, then a workspace query is +! assumed and the procedure only computes the minimal +! and the optimal workspace lengths for both WORK and +! IWORK. See the descriptions of WORK and IWORK. +!..... +! INFO (output) INTEGER +! -i < 0 :: On entry, the i-th argument had an +! illegal value +! = 0 :: Successful return. +! = 1 :: Void input. Quick exit (M=0 or N=0). +! = 2 :: The SVD computation of X did not converge. +! Suggestion: Check the input data and/or +! repeat with different WHTSVD. +! = 3 :: The computation of the eigenvalues did not +! converge. +! = 4 :: If data scaling was requested on input and +! the procedure found inconsistency in the data +! such that for some column index i, +! X(:,i) = 0 but Y(:,i) /= 0, then Y(:,i) is set +! to zero if JOBS=='C'. The computation proceeds +! with original or modified data and warning +! flag is set with INFO=4. +!............................................................. +!............................................................. +! Parameters +! ~~~~~~~~~~ + REAL(KIND=WP), PARAMETER :: ONE = 1.0_WP + REAL(KIND=WP), PARAMETER :: ZERO = 0.0_WP +! +! Local scalars +! ~~~~~~~~~~~~~ + INTEGER :: IMINWR, INFO1, MLWDMD, MLWGQR, & + MLWMQR, MLWORK, MLWQR, MINMN, & + OLWDMD, OLWGQR, OLWMQR, OLWORK, & + OLWQR + LOGICAL :: LQUERY, SCCOLX, SCCOLY, WANTQ, & + WNTTRF, WNTRES, WNTVEC, WNTVCF, & + WNTVCQ, WNTREF, WNTEX + CHARACTER(LEN=1) :: JOBVL +! +! Local array +! ~~~~~~~~~~~ + REAL(KIND=WP) :: RDUMMY(2) +! +! External functions (BLAS and LAPACK) +! ~~~~~~~~~~~~~~~~~ + LOGICAL LSAME + EXTERNAL LSAME +! +! External subroutines (BLAS and LAPACK) +! ~~~~~~~~~~~~~~~~~~~~ + EXTERNAL DGEMM + EXTERNAL DGEQRF, DLACPY, DLASET, DORGQR, & + DORMQR, XERBLA + +! External subroutines +! ~~~~~~~~~~~~~~~~~~~~ + EXTERNAL DGEDMD + +! Intrinsic functions +! ~~~~~~~~~~~~~~~~~~~ + INTRINSIC MAX, MIN, INT + !.......................................................... + ! + ! Test the input arguments + WNTRES = LSAME(JOBR,'R') + SCCOLX = LSAME(JOBS,'S') .OR. LSAME( JOBS, 'C' ) + SCCOLY = LSAME(JOBS,'Y') + WNTVEC = LSAME(JOBZ,'V') + WNTVCF = LSAME(JOBZ,'F') + WNTVCQ = LSAME(JOBZ,'Q') + WNTREF = LSAME(JOBF,'R') + WNTEX = LSAME(JOBF,'E') + WANTQ = LSAME(JOBQ,'Q') + WNTTRF = LSAME(JOBT,'R') + MINMN = MIN(M,N) + INFO = 0 + LQUERY = ( ( LWORK == -1 ) .OR. ( LIWORK == -1 ) ) +! + IF ( .NOT. (SCCOLX .OR. SCCOLY .OR. & + LSAME(JOBS,'N')) ) THEN + INFO = -1 + ELSE IF ( .NOT. (WNTVEC .OR. WNTVCF .OR. WNTVCQ & + .OR. LSAME(JOBZ,'N')) ) THEN + INFO = -2 + ELSE IF ( .NOT. (WNTRES .OR. LSAME(JOBR,'N')) .OR. & + ( WNTRES .AND. LSAME(JOBZ,'N') ) ) THEN + INFO = -3 + ELSE IF ( .NOT. (WANTQ .OR. LSAME(JOBQ,'N')) ) THEN + INFO = -4 + ELSE IF ( .NOT. ( WNTTRF .OR. LSAME(JOBT,'N') ) ) THEN + INFO = -5 + ELSE IF ( .NOT. (WNTREF .OR. WNTEX .OR. & + LSAME(JOBF,'N') ) ) THEN + INFO = -6 + ELSE IF ( .NOT. ((WHTSVD == 1).OR.(WHTSVD == 2).OR. & + (WHTSVD == 3).OR.(WHTSVD == 4)) ) THEN + INFO = -7 + ELSE IF ( M < 0 ) THEN + INFO = -8 + ELSE IF ( ( N < 0 ) .OR. ( N > M+1 ) ) THEN + INFO = -9 + ELSE IF ( LDF < M ) THEN + INFO = -11 + ELSE IF ( LDX < MINMN ) THEN + INFO = -13 + ELSE IF ( LDY < MINMN ) THEN + INFO = -15 + ELSE IF ( .NOT. (( NRNK == -2).OR.(NRNK == -1).OR. & + ((NRNK >= 1).AND.(NRNK <=N ))) ) THEN + INFO = -16 + ELSE IF ( ( TOL < ZERO ) .OR. ( TOL >= ONE ) ) THEN + INFO = -17 + ELSE IF ( LDZ < M ) THEN + INFO = -22 + ELSE IF ( (WNTREF.OR.WNTEX ).AND.( LDB < MINMN ) ) THEN + INFO = -25 + ELSE IF ( LDV < N-1 ) THEN + INFO = -27 + ELSE IF ( LDS < N-1 ) THEN + INFO = -29 + END IF +! + IF ( WNTVEC .OR. WNTVCF .OR. WNTVCQ ) THEN + JOBVL = 'V' + ELSE + JOBVL = 'N' + END IF + IF ( INFO == 0 ) THEN + ! Compute the minimal and the optimal workspace + ! requirements. Simulate running the code and + ! determine minimal and optimal sizes of the + ! workspace at any moment of the run. + IF ( ( N == 0 ) .OR. ( N == 1 ) ) THEN + ! All output except K is void. INFO=1 signals + ! the void input. In case of a workspace query, + ! the minimal workspace lengths are returned. + IF ( LQUERY ) THEN + IWORK(1) = 1 + WORK(1) = 2 + WORK(2) = 2 + ELSE + K = 0 + END IF + INFO = 1 + RETURN + END IF + MLWQR = MAX(1,N) ! Minimal workspace length for DGEQRF. + MLWORK = MINMN + MLWQR + IF ( LQUERY ) THEN + CALL DGEQRF( M, N, F, LDF, WORK, RDUMMY, -1, & + INFO1 ) + OLWQR = INT(RDUMMY(1)) + OLWORK = MIN(M,N) + OLWQR + END IF + CALL DGEDMD( JOBS, JOBVL, JOBR, JOBF, WHTSVD, MINMN,& + N-1, X, LDX, Y, LDY, NRNK, TOL, K, & + REIG, IMEIG, Z, LDZ, RES, B, LDB, & + V, LDV, S, LDS, WORK, -1, IWORK, & + LIWORK, INFO1 ) + MLWDMD = INT(WORK(1)) + MLWORK = MAX(MLWORK, MINMN + MLWDMD) + IMINWR = IWORK(1) + IF ( LQUERY ) THEN + OLWDMD = INT(WORK(2)) + OLWORK = MAX(OLWORK, MINMN+OLWDMD) + END IF + IF ( WNTVEC .OR. WNTVCF ) THEN + MLWMQR = MAX(1,N) + MLWORK = MAX(MLWORK,MINMN+N-1+MLWMQR) + IF ( LQUERY ) THEN + CALL DORMQR( 'L','N', M, N, MINMN, F, LDF, & + WORK, Z, LDZ, WORK, -1, INFO1 ) + OLWMQR = INT(WORK(1)) + OLWORK = MAX(OLWORK,MINMN+N-1+OLWMQR) + END IF + END IF + IF ( WANTQ ) THEN + MLWGQR = N + MLWORK = MAX(MLWORK,MINMN+N-1+MLWGQR) + IF ( LQUERY ) THEN + CALL DORGQR( M, MINMN, MINMN, F, LDF, WORK, & + WORK, -1, INFO1 ) + OLWGQR = INT(WORK(1)) + OLWORK = MAX(OLWORK,MINMN+N-1+OLWGQR) + END IF + END IF + IMINWR = MAX( 1, IMINWR ) + MLWORK = MAX( 2, MLWORK ) + IF ( LWORK < MLWORK .AND. (.NOT.LQUERY) ) INFO = -31 + IF ( LIWORK < IMINWR .AND. (.NOT.LQUERY) ) INFO = -33 + END IF + IF( INFO /= 0 ) THEN + CALL XERBLA( 'DGEDMDQ', -INFO ) + RETURN + ELSE IF ( LQUERY ) THEN +! Return minimal and optimal workspace sizes + IWORK(1) = IMINWR + WORK(1) = MLWORK + WORK(2) = OLWORK + RETURN + END IF +!..... +! Initial QR factorization that is used to represent the +! snapshots as elements of lower dimensional subspace. +! For large scale computation with M >>N , at this place +! one can use an out of core QRF. +! + CALL DGEQRF( M, N, F, LDF, WORK, & + WORK(MINMN+1), LWORK-MINMN, INFO1 ) +! +! Define X and Y as the snapshots representations in the +! orthogonal basis computed in the QR factorization. +! X corresponds to the leading N-1 and Y to the trailing +! N-1 snapshots. + CALL DLASET( 'L', MINMN, N-1, ZERO, ZERO, X, LDX ) + CALL DLACPY( 'U', MINMN, N-1, F, LDF, X, LDX ) + CALL DLACPY( 'A', MINMN, N-1, F(1,2), LDF, Y, LDY ) + IF ( M >= 3 ) THEN + CALL DLASET( 'L', MINMN-2, N-2, ZERO, ZERO, & + Y(3,1), LDY ) + END IF +! +! Compute the DMD of the projected snapshot pairs (X,Y) + CALL DGEDMD( JOBS, JOBVL, JOBR, JOBF, WHTSVD, MINMN, & + N-1, X, LDX, Y, LDY, NRNK, TOL, K, & + REIG, IMEIG, Z, LDZ, RES, B, LDB, V, & + LDV, S, LDS, WORK(MINMN+1), LWORK-MINMN, & + IWORK, LIWORK, INFO1 ) + IF ( INFO1 == 2 .OR. INFO1 == 3 ) THEN + ! Return with error code. See DGEDMD for details. + INFO = INFO1 + RETURN + ELSE + INFO = INFO1 + END IF +! +! The Ritz vectors (Koopman modes) can be explicitly +! formed or returned in factored form. + IF ( WNTVEC ) THEN + ! Compute the eigenvectors explicitly. + IF ( M > MINMN ) CALL DLASET( 'A', M-MINMN, K, ZERO, & + ZERO, Z(MINMN+1,1), LDZ ) + CALL DORMQR( 'L','N', M, K, MINMN, F, LDF, WORK, Z, & + LDZ, WORK(MINMN+N), LWORK-(MINMN+N-1), INFO1 ) + ELSE IF ( WNTVCF ) THEN + ! Return the Ritz vectors (eigenvectors) in factored + ! form Z*V, where Z contains orthonormal matrix (the + ! product of Q from the initial QR factorization and + ! the SVD/POD_basis returned by DGEDMD in X) and the + ! second factor (the eigenvectors of the Rayleigh + ! quotient) is in the array V, as returned by DGEDMD. + CALL DLACPY( 'A', N, K, X, LDX, Z, LDZ ) + IF ( M > N ) CALL DLASET( 'A', M-N, K, ZERO, ZERO, & + Z(N+1,1), LDZ ) + CALL DORMQR( 'L','N', M, K, MINMN, F, LDF, WORK, Z, & + LDZ, WORK(MINMN+N), LWORK-(MINMN+N-1), INFO1 ) + END IF +! +! Some optional output variables: +! +! The upper triangular factor R in the initial QR +! factorization is optionally returned in the array Y. +! This is useful if this call to DGEDMDQ is to be +! followed by a streaming DMD that is implemented in a +! QR compressed form. + IF ( WNTTRF ) THEN ! Return the upper triangular R in Y + CALL DLASET( 'A', MINMN, N, ZERO, ZERO, Y, LDY ) + CALL DLACPY( 'U', MINMN, N, F, LDF, Y, LDY ) + END IF +! +! The orthonormal/orthogonal factor Q in the initial QR +! factorization is optionally returned in the array F. +! Same as with the triangular factor above, this is +! useful in a streaming DMD. + IF ( WANTQ ) THEN ! Q overwrites F + CALL DORGQR( M, MINMN, MINMN, F, LDF, WORK, & + WORK(MINMN+N), LWORK-(MINMN+N-1), INFO1 ) + END IF +! + RETURN +! + END SUBROUTINE DGEDMDQ + \ No newline at end of file diff --git a/lapack-netlib/SRC/sgedmd.f90 b/lapack-netlib/SRC/sgedmd.f90 new file mode 100644 index 000000000..49cb11527 --- /dev/null +++ b/lapack-netlib/SRC/sgedmd.f90 @@ -0,0 +1,1054 @@ + SUBROUTINE SGEDMD( JOBS, JOBZ, JOBR, JOBF, WHTSVD, & + M, N, X, LDX, Y, LDY, NRNK, TOL, & + K, REIG, IMEIG, Z, LDZ, RES, & + B, LDB, W, LDW, S, LDS, & + WORK, LWORK, IWORK, LIWORK, INFO ) +! March 2023 +!..... + USE iso_fortran_env + IMPLICIT NONE + INTEGER, PARAMETER :: WP = real32 +!..... +! Scalar arguments + CHARACTER, INTENT(IN) :: JOBS, JOBZ, JOBR, JOBF + INTEGER, INTENT(IN) :: WHTSVD, M, N, LDX, LDY, & + NRNK, LDZ, LDB, LDW, LDS, & + LWORK, LIWORK + INTEGER, INTENT(OUT) :: K, INFO + REAL(KIND=WP), INTENT(IN) :: TOL +! Array arguments + REAL(KIND=WP), INTENT(INOUT) :: X(LDX,*), Y(LDY,*) + REAL(KIND=WP), INTENT(OUT) :: Z(LDZ,*), B(LDB,*), & + W(LDW,*), S(LDS,*) + REAL(KIND=WP), INTENT(OUT) :: REIG(*), IMEIG(*), & + RES(*) + REAL(KIND=WP), INTENT(OUT) :: WORK(*) + INTEGER, INTENT(OUT) :: IWORK(*) +!............................................................ +! Purpose +! ======= +! SGEDMD computes the Dynamic Mode Decomposition (DMD) for +! a pair of data snapshot matrices. For the input matrices +! X and Y such that Y = A*X with an unaccessible matrix +! A, SGEDMD computes a certain number of Ritz pairs of A using +! the standard Rayleigh-Ritz extraction from a subspace of +! range(X) that is determined using the leading left singular +! vectors of X. Optionally, SGEDMD returns the residuals +! of the computed Ritz pairs, the information needed for +! a refinement of the Ritz vectors, or the eigenvectors of +! the Exact DMD. +! For further details see the references listed +! below. For more details of the implementation see [3]. +! +! References +! ========== +! [1] P. Schmid: Dynamic mode decomposition of numerical +! and experimental data, +! Journal of Fluid Mechanics 656, 5-28, 2010. +! [2] Z. Drmac, I. Mezic, R. Mohr: Data driven modal +! decompositions: analysis and enhancements, +! SIAM J. on Sci. Comp. 40 (4), A2253-A2285, 2018. +! [3] Z. Drmac: A LAPACK implementation of the Dynamic +! Mode Decomposition I. Technical report. AIMDyn Inc. +! and LAPACK Working Note 298. +! [4] J. Tu, C. W. Rowley, D. M. Luchtenburg, S. L. +! Brunton, N. Kutz: On Dynamic Mode Decomposition: +! Theory and Applications, Journal of Computational +! Dynamics 1(2), 391 -421, 2014. +! +!...................................................................... +! Developed and supported by: +! =========================== +! Developed and coded by Zlatko Drmac, Faculty of Science, +! University of Zagreb; drmac@math.hr +! In cooperation with +! AIMdyn Inc., Santa Barbara, CA. +! and supported by +! - DARPA SBIR project "Koopman Operator-Based Forecasting +! for Nonstationary Processes from Near-Term, Limited +! Observational Data" Contract No: W31P4Q-21-C-0007 +! - DARPA PAI project "Physics-Informed Machine Learning +! Methodologies" Contract No: HR0011-18-9-0033 +! - DARPA MoDyL project "A Data-Driven, Operator-Theoretic +! Framework for Space-Time Analysis of Process Dynamics" +! Contract No: HR0011-16-C-0116 +! Any opinions, findings and conclusions or recommendations +! expressed in this material are those of the author and +! do not necessarily reflect the views of the DARPA SBIR +! Program Office +!============================================================ +! Distribution Statement A: +! Approved for Public Release, Distribution Unlimited. +! Cleared by DARPA on September 29, 2022 +!============================================================ +!...................................................................... +! Arguments +! ========= +! JOBS (input) CHARACTER*1 +! Determines whether the initial data snapshots are scaled +! by a diagonal matrix. +! 'S' :: The data snapshots matrices X and Y are multiplied +! with a diagonal matrix D so that X*D has unit +! nonzero columns (in the Euclidean 2-norm) +! 'C' :: The snapshots are scaled as with the 'S' option. +! If it is found that an i-th column of X is zero +! vector and the corresponding i-th column of Y is +! non-zero, then the i-th column of Y is set to +! zero and a warning flag is raised. +! 'Y' :: The data snapshots matrices X and Y are multiplied +! by a diagonal matrix D so that Y*D has unit +! nonzero columns (in the Euclidean 2-norm) +! 'N' :: No data scaling. +!..... +! JOBZ (input) CHARACTER*1 +! Determines whether the eigenvectors (Koopman modes) will +! be computed. +! 'V' :: The eigenvectors (Koopman modes) will be computed +! and returned in the matrix Z. +! See the description of Z. +! 'F' :: The eigenvectors (Koopman modes) will be returned +! in factored form as the product X(:,1:K)*W, where X +! contains a POD basis (leading left singular vectors +! of the data matrix X) and W contains the eigenvectors +! of the corresponding Rayleigh quotient. +! See the descriptions of K, X, W, Z. +! 'N' :: The eigenvectors are not computed. +!..... +! JOBR (input) CHARACTER*1 +! Determines whether to compute the residuals. +! 'R' :: The residuals for the computed eigenpairs will be +! computed and stored in the array RES. +! See the description of RES. +! For this option to be legal, JOBZ must be 'V'. +! 'N' :: The residuals are not computed. +!..... +! JOBF (input) CHARACTER*1 +! Specifies whether to store information needed for post- +! processing (e.g. computing refined Ritz vectors) +! 'R' :: The matrix needed for the refinement of the Ritz +! vectors is computed and stored in the array B. +! See the description of B. +! 'E' :: The unscaled eigenvectors of the Exact DMD are +! computed and returned in the array B. See the +! description of B. +! 'N' :: No eigenvector refinement data is computed. +!..... +! WHTSVD (input) INTEGER, WHSTVD in { 1, 2, 3, 4 } +! Allows for a selection of the SVD algorithm from the +! LAPACK library. +! 1 :: SGESVD (the QR SVD algorithm) +! 2 :: SGESDD (the Divide and Conquer algorithm; if enough +! workspace available, this is the fastest option) +! 3 :: SGESVDQ (the preconditioned QR SVD ; this and 4 +! are the most accurate options) +! 4 :: SGEJSV (the preconditioned Jacobi SVD; this and 3 +! are the most accurate options) +! For the four methods above, a significant difference in +! the accuracy of small singular values is possible if +! the snapshots vary in norm so that X is severely +! ill-conditioned. If small (smaller than EPS*||X||) +! singular values are of interest and JOBS=='N', then +! the options (3, 4) give the most accurate results, where +! the option 4 is slightly better and with stronger +! theoretical background. +! If JOBS=='S', i.e. the columns of X will be normalized, +! then all methods give nearly equally accurate results. +!..... +! M (input) INTEGER, M>= 0 +! The state space dimension (the row dimension of X, Y). +!..... +! N (input) INTEGER, 0 <= N <= M +! The number of data snapshot pairs +! (the number of columns of X and Y). +!..... +! X (input/output) REAL(KIND=WP) M-by-N array +! > On entry, X contains the data snapshot matrix X. It is +! assumed that the column norms of X are in the range of +! the normalized floating point numbers. +! < On exit, the leading K columns of X contain a POD basis, +! i.e. the leading K left singular vectors of the input +! data matrix X, U(:,1:K). All N columns of X contain all +! left singular vectors of the input matrix X. +! See the descriptions of K, Z and W. +!..... +! LDX (input) INTEGER, LDX >= M +! The leading dimension of the array X. +!..... +! Y (input/workspace/output) REAL(KIND=WP) M-by-N array +! > On entry, Y contains the data snapshot matrix Y +! < On exit, +! If JOBR == 'R', the leading K columns of Y contain +! the residual vectors for the computed Ritz pairs. +! See the description of RES. +! If JOBR == 'N', Y contains the original input data, +! scaled according to the value of JOBS. +!..... +! LDY (input) INTEGER , LDY >= M +! The leading dimension of the array Y. +!..... +! NRNK (input) INTEGER +! Determines the mode how to compute the numerical rank, +! i.e. how to truncate small singular values of the input +! matrix X. On input, if +! NRNK = -1 :: i-th singular value sigma(i) is truncated +! if sigma(i) <= TOL*sigma(1) +! This option is recommended. +! NRNK = -2 :: i-th singular value sigma(i) is truncated +! if sigma(i) <= TOL*sigma(i-1) +! This option is included for R&D purposes. +! It requires highly accurate SVD, which +! may not be feasible. +! The numerical rank can be enforced by using positive +! value of NRNK as follows: +! 0 < NRNK <= N :: at most NRNK largest singular values +! will be used. If the number of the computed nonzero +! singular values is less than NRNK, then only those +! nonzero values will be used and the actually used +! dimension is less than NRNK. The actual number of +! the nonzero singular values is returned in the variable +! K. See the descriptions of TOL and K. +!..... +! TOL (input) REAL(KIND=WP), 0 <= TOL < 1 +! The tolerance for truncating small singular values. +! See the description of NRNK. +!..... +! K (output) INTEGER, 0 <= K <= N +! The dimension of the POD basis for the data snapshot +! matrix X and the number of the computed Ritz pairs. +! The value of K is determined according to the rule set +! by the parameters NRNK and TOL. +! See the descriptions of NRNK and TOL. +!..... +! REIG (output) REAL(KIND=WP) N-by-1 array +! The leading K (K<=N) entries of REIG contain +! the real parts of the computed eigenvalues +! REIG(1:K) + sqrt(-1)*IMEIG(1:K). +! See the descriptions of K, IMEIG, and Z. +!..... +! IMEIG (output) REAL(KIND=WP) N-by-1 array +! The leading K (K<=N) entries of IMEIG contain +! the imaginary parts of the computed eigenvalues +! REIG(1:K) + sqrt(-1)*IMEIG(1:K). +! The eigenvalues are determined as follows: +! If IMEIG(i) == 0, then the corresponding eigenvalue is +! real, LAMBDA(i) = REIG(i). +! If IMEIG(i)>0, then the corresponding complex +! conjugate pair of eigenvalues reads +! LAMBDA(i) = REIG(i) + sqrt(-1)*IMAG(i) +! LAMBDA(i+1) = REIG(i) - sqrt(-1)*IMAG(i) +! That is, complex conjugate pairs have consecutive +! indices (i,i+1), with the positive imaginary part +! listed first. +! See the descriptions of K, REIG, and Z. +!..... +! Z (workspace/output) REAL(KIND=WP) M-by-N array +! If JOBZ =='V' then +! Z contains real Ritz vectors as follows: +! If IMEIG(i)=0, then Z(:,i) is an eigenvector of +! the i-th Ritz value; ||Z(:,i)||_2=1. +! If IMEIG(i) > 0 (and IMEIG(i+1) < 0) then +! [Z(:,i) Z(:,i+1)] span an invariant subspace and +! the Ritz values extracted from this subspace are +! REIG(i) + sqrt(-1)*IMEIG(i) and +! REIG(i) - sqrt(-1)*IMEIG(i). +! The corresponding eigenvectors are +! Z(:,i) + sqrt(-1)*Z(:,i+1) and +! Z(:,i) - sqrt(-1)*Z(:,i+1), respectively. +! || Z(:,i:i+1)||_F = 1. +! If JOBZ == 'F', then the above descriptions hold for +! the columns of X(:,1:K)*W(1:K,1:K), where the columns +! of W(1:k,1:K) are the computed eigenvectors of the +! K-by-K Rayleigh quotient. The columns of W(1:K,1:K) +! are similarly structured: If IMEIG(i) == 0 then +! X(:,1:K)*W(:,i) is an eigenvector, and if IMEIG(i)>0 +! then X(:,1:K)*W(:,i)+sqrt(-1)*X(:,1:K)*W(:,i+1) and +! X(:,1:K)*W(:,i)-sqrt(-1)*X(:,1:K)*W(:,i+1) +! are the eigenvectors of LAMBDA(i), LAMBDA(i+1). +! See the descriptions of REIG, IMEIG, X and W. +!..... +! LDZ (input) INTEGER , LDZ >= M +! The leading dimension of the array Z. +!..... +! RES (output) REAL(KIND=WP) N-by-1 array +! RES(1:K) contains the residuals for the K computed +! Ritz pairs. +! If LAMBDA(i) is real, then +! RES(i) = || A * Z(:,i) - LAMBDA(i)*Z(:,i))||_2. +! If [LAMBDA(i), LAMBDA(i+1)] is a complex conjugate pair +! then +! RES(i)=RES(i+1) = || A * Z(:,i:i+1) - Z(:,i:i+1) *B||_F +! where B = [ real(LAMBDA(i)) imag(LAMBDA(i)) ] +! [-imag(LAMBDA(i)) real(LAMBDA(i)) ]. +! It holds that +! RES(i) = || A*ZC(:,i) - LAMBDA(i) *ZC(:,i) ||_2 +! RES(i+1) = || A*ZC(:,i+1) - LAMBDA(i+1)*ZC(:,i+1) ||_2 +! where ZC(:,i) = Z(:,i) + sqrt(-1)*Z(:,i+1) +! ZC(:,i+1) = Z(:,i) - sqrt(-1)*Z(:,i+1) +! See the description of REIG, IMEIG and Z. +!..... +! B (output) REAL(KIND=WP) M-by-N array. +! IF JOBF =='R', B(1:M,1:K) contains A*U(:,1:K), and can +! be used for computing the refined vectors; see further +! details in the provided references. +! If JOBF == 'E', B(1:M,1;K) contains +! A*U(:,1:K)*W(1:K,1:K), which are the vectors from the +! Exact DMD, up to scaling by the inverse eigenvalues. +! If JOBF =='N', then B is not referenced. +! See the descriptions of X, W, K. +!..... +! LDB (input) INTEGER, LDB >= M +! The leading dimension of the array B. +!..... +! W (workspace/output) REAL(KIND=WP) N-by-N array +! On exit, W(1:K,1:K) contains the K computed +! eigenvectors of the matrix Rayleigh quotient (real and +! imaginary parts for each complex conjugate pair of the +! eigenvalues). The Ritz vectors (returned in Z) are the +! product of X (containing a POD basis for the input +! matrix X) and W. See the descriptions of K, S, X and Z. +! W is also used as a workspace to temporarily store the +! left singular vectors of X. +!..... +! LDW (input) INTEGER, LDW >= N +! The leading dimension of the array W. +!..... +! S (workspace/output) REAL(KIND=WP) N-by-N array +! The array S(1:K,1:K) is used for the matrix Rayleigh +! quotient. This content is overwritten during +! the eigenvalue decomposition by SGEEV. +! See the description of K. +!..... +! LDS (input) INTEGER, LDS >= N +! The leading dimension of the array S. +!..... +! WORK (workspace/output) REAL(KIND=WP) LWORK-by-1 array +! On exit, WORK(1:N) contains the singular values of +! X (for JOBS=='N') or column scaled X (JOBS=='S', 'C'). +! If WHTSVD==4, then WORK(N+1) and WORK(N+2) contain +! scaling factor WORK(N+2)/WORK(N+1) used to scale X +! and Y to avoid overflow in the SVD of X. +! This may be of interest if the scaling option is off +! and as many as possible smallest eigenvalues are +! desired to the highest feasible accuracy. +! If the call to SGEDMD is only workspace query, then +! WORK(1) contains the minimal workspace length and +! WORK(2) is the optimal workspace length. Hence, the +! length of work is at least 2. +! See the description of LWORK. +!..... +! LWORK (input) INTEGER +! The minimal length of the workspace vector WORK. +! LWORK is calculated as follows: +! If WHTSVD == 1 :: +! If JOBZ == 'V', then +! LWORK >= MAX(2, N + LWORK_SVD, N+MAX(1,4*N)). +! If JOBZ == 'N' then +! LWORK >= MAX(2, N + LWORK_SVD, N+MAX(1,3*N)). +! Here LWORK_SVD = MAX(1,3*N+M,5*N) is the minimal +! workspace length of SGESVD. +! If WHTSVD == 2 :: +! If JOBZ == 'V', then +! LWORK >= MAX(2, N + LWORK_SVD, N+MAX(1,4*N)) +! If JOBZ == 'N', then +! LWORK >= MAX(2, N + LWORK_SVD, N+MAX(1,3*N)) +! Here LWORK_SVD = MAX(M, 5*N*N+4*N)+3*N*N is the +! minimal workspace length of SGESDD. +! If WHTSVD == 3 :: +! If JOBZ == 'V', then +! LWORK >= MAX(2, N+LWORK_SVD,N+MAX(1,4*N)) +! If JOBZ == 'N', then +! LWORK >= MAX(2, N+LWORK_SVD,N+MAX(1,3*N)) +! Here LWORK_SVD = N+M+MAX(3*N+1, +! MAX(1,3*N+M,5*N),MAX(1,N)) +! is the minimal workspace length of SGESVDQ. +! If WHTSVD == 4 :: +! If JOBZ == 'V', then +! LWORK >= MAX(2, N+LWORK_SVD,N+MAX(1,4*N)) +! If JOBZ == 'N', then +! LWORK >= MAX(2, N+LWORK_SVD,N+MAX(1,3*N)) +! Here LWORK_SVD = MAX(7,2*M+N,6*N+2*N*N) is the +! minimal workspace length of SGEJSV. +! The above expressions are not simplified in order to +! make the usage of WORK more transparent, and for +! easier checking. In any case, LWORK >= 2. +! If on entry LWORK = -1, then a workspace query is +! assumed and the procedure only computes the minimal +! and the optimal workspace lengths for both WORK and +! IWORK. See the descriptions of WORK and IWORK. +!..... +! IWORK (workspace/output) INTEGER LIWORK-by-1 array +! Workspace that is required only if WHTSVD equals +! 2 , 3 or 4. (See the description of WHTSVD). +! If on entry LWORK =-1 or LIWORK=-1, then the +! minimal length of IWORK is computed and returned in +! IWORK(1). See the description of LIWORK. +!..... +! LIWORK (input) INTEGER +! The minimal length of the workspace vector IWORK. +! If WHTSVD == 1, then only IWORK(1) is used; LIWORK >=1 +! If WHTSVD == 2, then LIWORK >= MAX(1,8*MIN(M,N)) +! If WHTSVD == 3, then LIWORK >= MAX(1,M+N-1) +! If WHTSVD == 4, then LIWORK >= MAX(3,M+3*N) +! If on entry LIWORK = -1, then a workspace query is +! assumed and the procedure only computes the minimal +! and the optimal workspace lengths for both WORK and +! IWORK. See the descriptions of WORK and IWORK. +!..... +! INFO (output) INTEGER +! -i < 0 :: On entry, the i-th argument had an +! illegal value +! = 0 :: Successful return. +! = 1 :: Void input. Quick exit (M=0 or N=0). +! = 2 :: The SVD computation of X did not converge. +! Suggestion: Check the input data and/or +! repeat with different WHTSVD. +! = 3 :: The computation of the eigenvalues did not +! converge. +! = 4 :: If data scaling was requested on input and +! the procedure found inconsistency in the data +! such that for some column index i, +! X(:,i) = 0 but Y(:,i) /= 0, then Y(:,i) is set +! to zero if JOBS=='C'. The computation proceeds +! with original or modified data and warning +! flag is set with INFO=4. +!............................................................. +!............................................................. +! Parameters +! ~~~~~~~~~~ + REAL(KIND=WP), PARAMETER :: ONE = 1.0_WP + REAL(KIND=WP), PARAMETER :: ZERO = 0.0_WP + +! Local scalars +! ~~~~~~~~~~~~~ + REAL(KIND=WP) :: OFL, ROOTSC, SCALE, SMALL, & + SSUM, XSCL1, XSCL2 + INTEGER :: i, j, IMINWR, INFO1, INFO2, & + LWRKEV, LWRSDD, LWRSVD, & + LWRSVQ, MLWORK, MWRKEV, MWRSDD, & + MWRSVD, MWRSVJ, MWRSVQ, NUMRNK, & + OLWORK + LOGICAL :: BADXY, LQUERY, SCCOLX, SCCOLY, & + WNTEX, WNTREF, WNTRES, WNTVEC + CHARACTER :: JOBZL, T_OR_N + CHARACTER :: JSVOPT + +! Local arrays +! ~~~~~~~~~~~~ + REAL(KIND=WP) :: AB(2,2), RDUMMY(2), RDUMMY2(2) + +! External functions (BLAS and LAPACK) +! ~~~~~~~~~~~~~~~~~ + REAL(KIND=WP) SLANGE, SLAMCH, SNRM2 + EXTERNAL SLANGE, SLAMCH, SNRM2, ISAMAX + INTEGER ISAMAX + LOGICAL SISNAN, LSAME + EXTERNAL SISNAN, LSAME + +! External subroutines (BLAS and LAPACK) +! ~~~~~~~~~~~~~~~~~~~~ + EXTERNAL SAXPY, SGEMM, SSCAL + EXTERNAL SGEEV, SGEJSV, SGESDD, SGESVD, SGESVDQ, & + SLACPY, SLASCL, SLASSQ, XERBLA + +! Intrinsic functions +! ~~~~~~~~~~~~~~~~~~~ + INTRINSIC INT, FLOAT, MAX, SQRT +!............................................................ +! +! Test the input arguments +! + WNTRES = LSAME(JOBR,'R') + SCCOLX = LSAME(JOBS,'S') .OR. LSAME(JOBS,'C') + SCCOLY = LSAME(JOBS,'Y') + WNTVEC = LSAME(JOBZ,'V') + WNTREF = LSAME(JOBF,'R') + WNTEX = LSAME(JOBF,'E') + INFO = 0 + LQUERY = ( ( LWORK == -1 ) .OR. ( LIWORK == -1 ) ) +! + IF ( .NOT. (SCCOLX .OR. SCCOLY .OR. & + LSAME(JOBS,'N')) ) THEN + INFO = -1 + ELSE IF ( .NOT. (WNTVEC .OR. LSAME(JOBZ,'N') & + .OR. LSAME(JOBZ,'F')) ) THEN + INFO = -2 + ELSE IF ( .NOT. (WNTRES .OR. LSAME(JOBR,'N')) .OR. & + ( WNTRES .AND. (.NOT.WNTVEC) ) ) THEN + INFO = -3 + ELSE IF ( .NOT. (WNTREF .OR. WNTEX .OR. & + LSAME(JOBF,'N') ) ) THEN + INFO = -4 + ELSE IF ( .NOT.((WHTSVD == 1) .OR. (WHTSVD == 2) .OR. & + (WHTSVD == 3) .OR. (WHTSVD == 4) )) THEN + INFO = -5 + ELSE IF ( M < 0 ) THEN + INFO = -6 + ELSE IF ( ( N < 0 ) .OR. ( N > M ) ) THEN + INFO = -7 + ELSE IF ( LDX < M ) THEN + INFO = -9 + ELSE IF ( LDY < M ) THEN + INFO = -11 + ELSE IF ( .NOT. (( NRNK == -2).OR.(NRNK == -1).OR. & + ((NRNK >= 1).AND.(NRNK <=N ))) ) THEN + INFO = -12 + ELSE IF ( ( TOL < ZERO ) .OR. ( TOL >= ONE ) ) THEN + INFO = -13 + ELSE IF ( LDZ < M ) THEN + INFO = -18 + ELSE IF ( (WNTREF .OR. WNTEX ) .AND. ( LDB < M ) ) THEN + INFO = -21 + ELSE IF ( LDW < N ) THEN + INFO = -23 + ELSE IF ( LDS < N ) THEN + INFO = -25 + END IF +! + IF ( INFO == 0 ) THEN + ! Compute the minimal and the optimal workspace + ! requirements. Simulate running the code and + ! determine minimal and optimal sizes of the + ! workspace at any moment of the run. + IF ( N == 0 ) THEN + ! Quick return. All output except K is void. + ! INFO=1 signals the void input. + ! In case of a workspace query, the default + ! minimal workspace lengths are returned. + IF ( LQUERY ) THEN + IWORK(1) = 1 + WORK(1) = 2 + WORK(2) = 2 + ELSE + K = 0 + END IF + INFO = 1 + RETURN + END IF + MLWORK = MAX(2,N) + OLWORK = MAX(2,N) + IMINWR = 1 + SELECT CASE ( WHTSVD ) + CASE (1) + ! The following is specified as the minimal + ! length of WORK in the definition of SGESVD: + ! MWRSVD = MAX(1,3*MIN(M,N)+MAX(M,N),5*MIN(M,N)) + MWRSVD = MAX(1,3*MIN(M,N)+MAX(M,N),5*MIN(M,N)) + MLWORK = MAX(MLWORK,N + MWRSVD) + IF ( LQUERY ) THEN + CALL SGESVD( 'O', 'S', M, N, X, LDX, WORK, & + B, LDB, W, LDW, RDUMMY, -1, INFO1 ) + LWRSVD = MAX( MWRSVD, INT( RDUMMY(1) ) ) + OLWORK = MAX(OLWORK,N + LWRSVD) + END IF + CASE (2) + ! The following is specified as the minimal + ! length of WORK in the definition of SGESDD: + ! MWRSDD = 3*MIN(M,N)*MIN(M,N) + + ! MAX( MAX(M,N),5*MIN(M,N)*MIN(M,N)+4*MIN(M,N) ) + ! IMINWR = 8*MIN(M,N) + MWRSDD = 3*MIN(M,N)*MIN(M,N) + & + MAX( MAX(M,N),5*MIN(M,N)*MIN(M,N)+4*MIN(M,N) ) + MLWORK = MAX(MLWORK,N + MWRSDD) + IMINWR = 8*MIN(M,N) + IF ( LQUERY ) THEN + CALL SGESDD( 'O', M, N, X, LDX, WORK, B, & + LDB, W, LDW, RDUMMY, -1, IWORK, INFO1 ) + LWRSDD = MAX( MWRSDD, INT( RDUMMY(1) ) ) + OLWORK = MAX(OLWORK,N + LWRSDD) + END IF + CASE (3) + !LWQP3 = 3*N+1 + !LWORQ = MAX(N, 1) + !MWRSVD = MAX(1,3*MIN(M,N)+MAX(M,N),5*MIN(M,N)) + !MWRSVQ = N + MAX( LWQP3, MWRSVD, LWORQ )+ MAX(M,2) + !MLWORK = N + MWRSVQ + !IMINWR = M+N-1 + CALL SGESVDQ( 'H', 'P', 'N', 'R', 'R', M, N, & + X, LDX, WORK, Z, LDZ, W, LDW, & + NUMRNK, IWORK, -1, RDUMMY, & + -1, RDUMMY2, -1, INFO1 ) + IMINWR = IWORK(1) + MWRSVQ = INT(RDUMMY(2)) + MLWORK = MAX(MLWORK,N+MWRSVQ+INT(RDUMMY2(1))) + IF ( LQUERY ) THEN + LWRSVQ = INT(RDUMMY(1)) + OLWORK = MAX(OLWORK,N+LWRSVQ+INT(RDUMMY2(1))) + END IF + CASE (4) + JSVOPT = 'J' + !MWRSVJ = MAX( 7, 2*M+N, 6*N+2*N*N )! for JSVOPT='V' + MWRSVJ = MAX( 7, 2*M+N, 4*N+N*N, 2*N+N*N+6 ) + MLWORK = MAX(MLWORK,N+MWRSVJ) + IMINWR = MAX( 3, M+3*N ) + IF ( LQUERY ) THEN + OLWORK = MAX(OLWORK,N+MWRSVJ) + END IF + END SELECT + IF ( WNTVEC .OR. WNTEX .OR. LSAME(JOBZ,'F') ) THEN + JOBZL = 'V' + ELSE + JOBZL = 'N' + END IF + ! Workspace calculation to the SGEEV call + IF ( LSAME(JOBZL,'V') ) THEN + MWRKEV = MAX( 1, 4*N ) + ELSE + MWRKEV = MAX( 1, 3*N ) + END IF + MLWORK = MAX(MLWORK,N+MWRKEV) + IF ( LQUERY ) THEN + CALL SGEEV( 'N', JOBZL, N, S, LDS, REIG, & + IMEIG, W, LDW, W, LDW, RDUMMY, -1, INFO1 ) + LWRKEV = MAX( MWRKEV, INT(RDUMMY(1)) ) + OLWORK = MAX( OLWORK, N+LWRKEV ) + END IF +! + IF ( LIWORK < IMINWR .AND. (.NOT.LQUERY) ) INFO = -29 + IF ( LWORK < MLWORK .AND. (.NOT.LQUERY) ) INFO = -27 + END IF +! + IF( INFO /= 0 ) THEN + CALL XERBLA( 'SGEDMD', -INFO ) + RETURN + ELSE IF ( LQUERY ) THEN +! Return minimal and optimal workspace sizes + IWORK(1) = IMINWR + WORK(1) = MLWORK + WORK(2) = OLWORK + RETURN + END IF +!............................................................ +! + OFL = SLAMCH('O') + SMALL = SLAMCH('S') + BADXY = .FALSE. +! +! <1> Optional scaling of the snapshots (columns of X, Y) +! ========================================================== + IF ( SCCOLX ) THEN + ! The columns of X will be normalized. + ! To prevent overflows, the column norms of X are + ! carefully computed using SLASSQ. + K = 0 + DO i = 1, N + !WORK(i) = DNRM2( M, X(1,i), 1 ) + SCALE = ZERO + CALL SLASSQ( M, X(1,i), 1, SCALE, SSUM ) + IF ( SISNAN(SCALE) .OR. SISNAN(SSUM) ) THEN + K = 0 + INFO = -8 + CALL XERBLA('SGEDMD',-INFO) + END IF + IF ( (SCALE /= ZERO) .AND. (SSUM /= ZERO) ) THEN + ROOTSC = SQRT(SSUM) + IF ( SCALE .GE. (OFL / ROOTSC) ) THEN +! Norm of X(:,i) overflows. First, X(:,i) +! is scaled by +! ( ONE / ROOTSC ) / SCALE = 1/||X(:,i)||_2. +! Next, the norm of X(:,i) is stored without +! overflow as WORK(i) = - SCALE * (ROOTSC/M), +! the minus sign indicating the 1/M factor. +! Scaling is performed without overflow, and +! underflow may occur in the smallest entries +! of X(:,i). The relative backward and forward +! errors are small in the ell_2 norm. + CALL SLASCL( 'G', 0, 0, SCALE, ONE/ROOTSC, & + M, 1, X(1,i), M, INFO2 ) + WORK(i) = - SCALE * ( ROOTSC / FLOAT(M) ) + ELSE +! X(:,i) will be scaled to unit 2-norm + WORK(i) = SCALE * ROOTSC + CALL SLASCL( 'G',0, 0, WORK(i), ONE, M, 1, & + X(1,i), M, INFO2 ) ! LAPACK CALL +! X(1:M,i) = (ONE/WORK(i)) * X(1:M,i) ! INTRINSIC + END IF + ELSE + WORK(i) = ZERO + K = K + 1 + END IF + END DO + IF ( K == N ) THEN + ! All columns of X are zero. Return error code -8. + ! (the 8th input variable had an illegal value) + K = 0 + INFO = -8 + CALL XERBLA('SGEDMD',-INFO) + RETURN + END IF + DO i = 1, N +! Now, apply the same scaling to the columns of Y. + IF ( WORK(i) > ZERO ) THEN + CALL SSCAL( M, ONE/WORK(i), Y(1,i), 1 ) ! BLAS CALL +! Y(1:M,i) = (ONE/WORK(i)) * Y(1:M,i) ! INTRINSIC + ELSE IF ( WORK(i) < ZERO ) THEN + CALL SLASCL( 'G', 0, 0, -WORK(i), & + ONE/FLOAT(M), M, 1, Y(1,i), M, INFO2 ) ! LAPACK CALL + ELSE IF ( Y(ISAMAX(M, Y(1,i),1),i ) & + /= ZERO ) THEN +! X(:,i) is zero vector. For consistency, +! Y(:,i) should also be zero. If Y(:,i) is not +! zero, then the data might be inconsistent or +! corrupted. If JOBS == 'C', Y(:,i) is set to +! zero and a warning flag is raised. +! The computation continues but the +! situation will be reported in the output. + BADXY = .TRUE. + IF ( LSAME(JOBS,'C')) & + CALL SSCAL( M, ZERO, Y(1,i), 1 ) ! BLAS CALL + END IF + END DO + END IF + ! + IF ( SCCOLY ) THEN + ! The columns of Y will be normalized. + ! To prevent overflows, the column norms of Y are + ! carefully computed using SLASSQ. + DO i = 1, N + !WORK(i) = DNRM2( M, Y(1,i), 1 ) + SCALE = ZERO + CALL SLASSQ( M, Y(1,i), 1, SCALE, SSUM ) + IF ( SISNAN(SCALE) .OR. SISNAN(SSUM) ) THEN + K = 0 + INFO = -10 + CALL XERBLA('SGEDMD',-INFO) + END IF + IF ( SCALE /= ZERO .AND. (SSUM /= ZERO) ) THEN + ROOTSC = SQRT(SSUM) + IF ( SCALE .GE. (OFL / ROOTSC) ) THEN +! Norm of Y(:,i) overflows. First, Y(:,i) +! is scaled by +! ( ONE / ROOTSC ) / SCALE = 1/||Y(:,i)||_2. +! Next, the norm of Y(:,i) is stored without +! overflow as WORK(i) = - SCALE * (ROOTSC/M), +! the minus sign indicating the 1/M factor. +! Scaling is performed without overflow, and +! underflow may occur in the smallest entries +! of Y(:,i). The relative backward and forward +! errors are small in the ell_2 norm. + CALL SLASCL( 'G', 0, 0, SCALE, ONE/ROOTSC, & + M, 1, Y(1,i), M, INFO2 ) + WORK(i) = - SCALE * ( ROOTSC / FLOAT(M) ) + ELSE +! X(:,i) will be scaled to unit 2-norm + WORK(i) = SCALE * ROOTSC + CALL SLASCL( 'G',0, 0, WORK(i), ONE, M, 1, & + Y(1,i), M, INFO2 ) ! LAPACK CALL +! Y(1:M,i) = (ONE/WORK(i)) * Y(1:M,i) ! INTRINSIC + END IF + ELSE + WORK(i) = ZERO + END IF + END DO + DO i = 1, N +! Now, apply the same scaling to the columns of X. + IF ( WORK(i) > ZERO ) THEN + CALL SSCAL( M, ONE/WORK(i), X(1,i), 1 ) ! BLAS CALL +! X(1:M,i) = (ONE/WORK(i)) * X(1:M,i) ! INTRINSIC + ELSE IF ( WORK(i) < ZERO ) THEN + CALL SLASCL( 'G', 0, 0, -WORK(i), & + ONE/FLOAT(M), M, 1, X(1,i), M, INFO2 ) ! LAPACK CALL + ELSE IF ( X(ISAMAX(M, X(1,i),1),i ) & + /= ZERO ) THEN +! Y(:,i) is zero vector. If X(:,i) is not +! zero, then a warning flag is raised. +! The computation continues but the +! situation will be reported in the output. + BADXY = .TRUE. + END IF + END DO + END IF +! +! <2> SVD of the data snapshot matrix X. +! ===================================== +! The left singular vectors are stored in the array X. +! The right singular vectors are in the array W. +! The array W will later on contain the eigenvectors +! of a Rayleigh quotient. + NUMRNK = N + SELECT CASE ( WHTSVD ) + CASE (1) + CALL SGESVD( 'O', 'S', M, N, X, LDX, WORK, B, & + LDB, W, LDW, WORK(N+1), LWORK-N, INFO1 ) ! LAPACK CALL + T_OR_N = 'T' + CASE (2) + CALL SGESDD( 'O', M, N, X, LDX, WORK, B, LDB, W, & + LDW, WORK(N+1), LWORK-N, IWORK, INFO1 ) ! LAPACK CALL + T_OR_N = 'T' + CASE (3) + CALL SGESVDQ( 'H', 'P', 'N', 'R', 'R', M, N, & + X, LDX, WORK, Z, LDZ, W, LDW, & + NUMRNK, IWORK, LIWORK, WORK(N+MAX(2,M)+1),& + LWORK-N-MAX(2,M), WORK(N+1), MAX(2,M), INFO1) ! LAPACK CALL + CALL SLACPY( 'A', M, NUMRNK, Z, LDZ, X, LDX ) ! LAPACK CALL + T_OR_N = 'T' + CASE (4) + CALL SGEJSV( 'F', 'U', JSVOPT, 'N', 'N', 'P', M, & + N, X, LDX, WORK, Z, LDZ, W, LDW, & + WORK(N+1), LWORK-N, IWORK, INFO1 ) ! LAPACK CALL + CALL SLACPY( 'A', M, N, Z, LDZ, X, LDX ) ! LAPACK CALL + T_OR_N = 'N' + XSCL1 = WORK(N+1) + XSCL2 = WORK(N+2) + IF ( XSCL1 /= XSCL2 ) THEN + ! This is an exceptional situation. If the + ! data matrices are not scaled and the + ! largest singular value of X overflows. + ! In that case SGEJSV can return the SVD + ! in scaled form. The scaling factor can be used + ! to rescale the data (X and Y). + CALL SLASCL( 'G', 0, 0, XSCL1, XSCL2, M, N, Y, LDY, INFO2 ) + END IF + END SELECT +! + IF ( INFO1 > 0 ) THEN + ! The SVD selected subroutine did not converge. + ! Return with an error code. + INFO = 2 + RETURN + END IF +! + IF ( WORK(1) == ZERO ) THEN + ! The largest computed singular value of (scaled) + ! X is zero. Return error code -8 + ! (the 8th input variable had an illegal value). + K = 0 + INFO = -8 + CALL XERBLA('SGEDMD',-INFO) + RETURN + END IF +! + !<3> Determine the numerical rank of the data + ! snapshots matrix X. This depends on the + ! parameters NRNK and TOL. + + SELECT CASE ( NRNK ) + CASE ( -1 ) + K = 1 + DO i = 2, NUMRNK + IF ( ( WORK(i) <= WORK(1)*TOL ) .OR. & + ( WORK(i) <= SMALL ) ) EXIT + K = K + 1 + END DO + CASE ( -2 ) + K = 1 + DO i = 1, NUMRNK-1 + IF ( ( WORK(i+1) <= WORK(i)*TOL ) .OR. & + ( WORK(i) <= SMALL ) ) EXIT + K = K + 1 + END DO + CASE DEFAULT + K = 1 + DO i = 2, NRNK + IF ( WORK(i) <= SMALL ) EXIT + K = K + 1 + END DO + END SELECT + ! Now, U = X(1:M,1:K) is the SVD/POD basis for the + ! snapshot data in the input matrix X. + + !<4> Compute the Rayleigh quotient S = U^T * A * U. + ! Depending on the requested outputs, the computation + ! is organized to compute additional auxiliary + ! matrices (for the residuals and refinements). + ! + ! In all formulas below, we need V_k*Sigma_k^(-1) + ! where either V_k is in W(1:N,1:K), or V_k^T is in + ! W(1:K,1:N). Here Sigma_k=diag(WORK(1:K)). + IF ( LSAME(T_OR_N, 'N') ) THEN + DO i = 1, K + CALL SSCAL( N, ONE/WORK(i), W(1,i), 1 ) ! BLAS CALL + ! W(1:N,i) = (ONE/WORK(i)) * W(1:N,i) ! INTRINSIC + END DO + ELSE + ! This non-unit stride access is due to the fact + ! that SGESVD, SGESVDQ and SGESDD return the + ! transposed matrix of the right singular vectors. + !DO i = 1, K + ! CALL SSCAL( N, ONE/WORK(i), W(i,1), LDW ) ! BLAS CALL + ! ! W(i,1:N) = (ONE/WORK(i)) * W(i,1:N) ! INTRINSIC + !END DO + DO i = 1, K + WORK(N+i) = ONE/WORK(i) + END DO + DO j = 1, N + DO i = 1, K + W(i,j) = (WORK(N+i))*W(i,j) + END DO + END DO + END IF +! + IF ( WNTREF ) THEN + ! + ! Need A*U(:,1:K)=Y*V_k*inv(diag(WORK(1:K))) + ! for computing the refined Ritz vectors + ! (optionally, outside SGEDMD). + CALL SGEMM( 'N', T_OR_N, M, K, N, ONE, Y, LDY, W, & + LDW, ZERO, Z, LDZ ) ! BLAS CALL + ! Z(1:M,1:K)=MATMUL(Y(1:M,1:N),TRANSPOSE(W(1:K,1:N))) ! INTRINSIC, for T_OR_N=='T' + ! Z(1:M,1:K)=MATMUL(Y(1:M,1:N),W(1:N,1:K)) ! INTRINSIC, for T_OR_N=='N' + ! + ! At this point Z contains + ! A * U(:,1:K) = Y * V_k * Sigma_k^(-1), and + ! this is needed for computing the residuals. + ! This matrix is returned in the array B and + ! it can be used to compute refined Ritz vectors. + CALL SLACPY( 'A', M, K, Z, LDZ, B, LDB ) ! BLAS CALL + ! B(1:M,1:K) = Z(1:M,1:K) ! INTRINSIC + + CALL SGEMM( 'T', 'N', K, K, M, ONE, X, LDX, Z, & + LDZ, ZERO, S, LDS ) ! BLAS CALL + ! S(1:K,1:K) = MATMUL(TANSPOSE(X(1:M,1:K)),Z(1:M,1:K)) ! INTRINSIC + ! At this point S = U^T * A * U is the Rayleigh quotient. + ELSE + ! A * U(:,1:K) is not explicitly needed and the + ! computation is organized differently. The Rayleigh + ! quotient is computed more efficiently. + CALL SGEMM( 'T', 'N', K, N, M, ONE, X, LDX, Y, LDY, & + ZERO, Z, LDZ ) ! BLAS CALL + ! Z(1:K,1:N) = MATMUL( TRANSPOSE(X(1:M,1:K)), Y(1:M,1:N) ) ! INTRINSIC + ! In the two SGEMM calls here, can use K for LDZ + CALL SGEMM( 'N', T_OR_N, K, K, N, ONE, Z, LDZ, W, & + LDW, ZERO, S, LDS ) ! BLAS CALL + ! S(1:K,1:K) = MATMUL(Z(1:K,1:N),TRANSPOSE(W(1:K,1:N))) ! INTRINSIC, for T_OR_N=='T' + ! S(1:K,1:K) = MATMUL(Z(1:K,1:N),(W(1:N,1:K))) ! INTRINSIC, for T_OR_N=='N' + ! At this point S = U^T * A * U is the Rayleigh quotient. + ! If the residuals are requested, save scaled V_k into Z. + ! Recall that V_k or V_k^T is stored in W. + IF ( WNTRES .OR. WNTEX ) THEN + IF ( LSAME(T_OR_N, 'N') ) THEN + CALL SLACPY( 'A', N, K, W, LDW, Z, LDZ ) + ELSE + CALL SLACPY( 'A', K, N, W, LDW, Z, LDZ ) + END IF + END IF + END IF +! + !<5> Compute the Ritz values and (if requested) the + ! right eigenvectors of the Rayleigh quotient. + ! + CALL SGEEV( 'N', JOBZL, K, S, LDS, REIG, IMEIG, W, & + LDW, W, LDW, WORK(N+1), LWORK-N, INFO1 ) ! LAPACK CALL + ! + ! W(1:K,1:K) contains the eigenvectors of the Rayleigh + ! quotient. Even in the case of complex spectrum, all + ! computation is done in real arithmetic. REIG and + ! IMEIG are the real and the imaginary parts of the + ! eigenvalues, so that the spectrum is given as + ! REIG(:) + sqrt(-1)*IMEIG(:). Complex conjugate pairs + ! are listed at consecutive positions. For such a + ! complex conjugate pair of the eigenvalues, the + ! corresponding eigenvectors are also a complex + ! conjugate pair with the real and imaginary parts + ! stored column-wise in W at the corresponding + ! consecutive column indices. See the description of Z. + ! Also, see the description of SGEEV. + IF ( INFO1 > 0 ) THEN + ! SGEEV failed to compute the eigenvalues and + ! eigenvectors of the Rayleigh quotient. + INFO = 3 + RETURN + END IF +! + ! <6> Compute the eigenvectors (if requested) and, + ! the residuals (if requested). + ! + IF ( WNTVEC .OR. WNTEX ) THEN + IF ( WNTRES ) THEN + IF ( WNTREF ) THEN + ! Here, if the refinement is requested, we have + ! A*U(:,1:K) already computed and stored in Z. + ! For the residuals, need Y = A * U(:,1;K) * W. + CALL SGEMM( 'N', 'N', M, K, K, ONE, Z, LDZ, W, & + LDW, ZERO, Y, LDY ) ! BLAS CALL + ! Y(1:M,1:K) = Z(1:M,1:K) * W(1:K,1:K) ! INTRINSIC + ! This frees Z; Y contains A * U(:,1:K) * W. + ELSE + ! Compute S = V_k * Sigma_k^(-1) * W, where + ! V_k * Sigma_k^(-1) is stored in Z + CALL SGEMM( T_OR_N, 'N', N, K, K, ONE, Z, LDZ, & + W, LDW, ZERO, S, LDS ) + ! Then, compute Z = Y * S = + ! = Y * V_k * Sigma_k^(-1) * W(1:K,1:K) = + ! = A * U(:,1:K) * W(1:K,1:K) + CALL SGEMM( 'N', 'N', M, K, N, ONE, Y, LDY, S, & + LDS, ZERO, Z, LDZ ) + ! Save a copy of Z into Y and free Z for holding + ! the Ritz vectors. + CALL SLACPY( 'A', M, K, Z, LDZ, Y, LDY ) + IF ( WNTEX ) CALL SLACPY( 'A', M, K, Z, LDZ, B, LDB ) + END IF + ELSE IF ( WNTEX ) THEN + ! Compute S = V_k * Sigma_k^(-1) * W, where + ! V_k * Sigma_k^(-1) is stored in Z + CALL SGEMM( T_OR_N, 'N', N, K, K, ONE, Z, LDZ, & + W, LDW, ZERO, S, LDS ) + ! Then, compute Z = Y * S = + ! = Y * V_k * Sigma_k^(-1) * W(1:K,1:K) = + ! = A * U(:,1:K) * W(1:K,1:K) + CALL SGEMM( 'N', 'N', M, K, N, ONE, Y, LDY, S, & + LDS, ZERO, B, LDB ) + ! The above call replaces the following two calls + ! that were used in the developing-testing phase. + ! CALL SGEMM( 'N', 'N', M, K, N, ONE, Y, LDY, S, & + ! LDS, ZERO, Z, LDZ) + ! Save a copy of Z into B and free Z for holding + ! the Ritz vectors. + ! CALL SLACPY( 'A', M, K, Z, LDZ, B, LDB ) + END IF +! + ! Compute the real form of the Ritz vectors + IF ( WNTVEC ) CALL SGEMM( 'N', 'N', M, K, K, ONE, X, LDX, W, LDW, & + ZERO, Z, LDZ ) ! BLAS CALL + ! Z(1:M,1:K) = MATMUL(X(1:M,1:K), W(1:K,1:K)) ! INTRINSIC +! + IF ( WNTRES ) THEN + i = 1 + DO WHILE ( i <= K ) + IF ( IMEIG(i) == ZERO ) THEN + ! have a real eigenvalue with real eigenvector + CALL SAXPY( M, -REIG(i), Z(1,i), 1, Y(1,i), 1 ) ! BLAS CALL + ! Y(1:M,i) = Y(1:M,i) - REIG(i) * Z(1:M,i) ! INTRINSIC + RES(i) = SNRM2( M, Y(1,i), 1 ) ! BLAS CALL + i = i + 1 + ELSE + ! Have a complex conjugate pair + ! REIG(i) +- sqrt(-1)*IMEIG(i). + ! Since all computation is done in real + ! arithmetic, the formula for the residual + ! is recast for real representation of the + ! complex conjugate eigenpair. See the + ! description of RES. + AB(1,1) = REIG(i) + AB(2,1) = -IMEIG(i) + AB(1,2) = IMEIG(i) + AB(2,2) = REIG(i) + CALL SGEMM( 'N', 'N', M, 2, 2, -ONE, Z(1,i), & + LDZ, AB, 2, ONE, Y(1,i), LDY ) ! BLAS CALL + ! Y(1:M,i:i+1) = Y(1:M,i:i+1) - Z(1:M,i:i+1) * AB ! INTRINSIC + RES(i) = SLANGE( 'F', M, 2, Y(1,i), LDY, & + WORK(N+1) ) ! LAPACK CALL + RES(i+1) = RES(i) + i = i + 2 + END IF + END DO + END IF + END IF +! + IF ( WHTSVD == 4 ) THEN + WORK(N+1) = XSCL1 + WORK(N+2) = XSCL2 + END IF +! +! Successful exit. + IF ( .NOT. BADXY ) THEN + INFO = 0 + ELSE + ! A warning on possible data inconsistency. + ! This should be a rare event. + INFO = 4 + END IF +!............................................................ + RETURN +! ...... + END SUBROUTINE SGEDMD + diff --git a/lapack-netlib/SRC/sgedmdq.f90 b/lapack-netlib/SRC/sgedmdq.f90 new file mode 100644 index 000000000..acd5d56c6 --- /dev/null +++ b/lapack-netlib/SRC/sgedmdq.f90 @@ -0,0 +1,703 @@ +SUBROUTINE SGEDMDQ( JOBS, JOBZ, JOBR, JOBQ, JOBT, JOBF, & + WHTSVD, M, N, F, LDF, X, LDX, Y, & + LDY, NRNK, TOL, K, REIG, IMEIG, & + Z, LDZ, RES, B, LDB, V, LDV, & + S, LDS, WORK, LWORK, IWORK, LIWORK, INFO ) +! March 2023 +!..... + USE iso_fortran_env + IMPLICIT NONE + INTEGER, PARAMETER :: WP = real32 +!..... +! Scalar arguments + CHARACTER, INTENT(IN) :: JOBS, JOBZ, JOBR, JOBQ, & + JOBT, JOBF + INTEGER, INTENT(IN) :: WHTSVD, M, N, LDF, LDX, & + LDY, NRNK, LDZ, LDB, LDV, & + LDS, LWORK, LIWORK + INTEGER, INTENT(OUT) :: INFO, K + REAL(KIND=WP), INTENT(IN) :: TOL +! Array arguments + REAL(KIND=WP), INTENT(INOUT) :: F(LDF,*) + REAL(KIND=WP), INTENT(OUT) :: X(LDX,*), Y(LDY,*), & + Z(LDZ,*), B(LDB,*), & + V(LDV,*), S(LDS,*) + REAL(KIND=WP), INTENT(OUT) :: REIG(*), IMEIG(*), & + RES(*) + REAL(KIND=WP), INTENT(OUT) :: WORK(*) + INTEGER, INTENT(OUT) :: IWORK(*) +!..... +! Purpose +! ======= +! SGEDMDQ computes the Dynamic Mode Decomposition (DMD) for +! a pair of data snapshot matrices, using a QR factorization +! based compression of the data. For the input matrices +! X and Y such that Y = A*X with an unaccessible matrix +! A, SGEDMDQ computes a certain number of Ritz pairs of A using +! the standard Rayleigh-Ritz extraction from a subspace of +! range(X) that is determined using the leading left singular +! vectors of X. Optionally, SGEDMDQ returns the residuals +! of the computed Ritz pairs, the information needed for +! a refinement of the Ritz vectors, or the eigenvectors of +! the Exact DMD. +! For further details see the references listed +! below. For more details of the implementation see [3]. +! +! References +! ========== +! [1] P. Schmid: Dynamic mode decomposition of numerical +! and experimental data, +! Journal of Fluid Mechanics 656, 5-28, 2010. +! [2] Z. Drmac, I. Mezic, R. Mohr: Data driven modal +! decompositions: analysis and enhancements, +! SIAM J. on Sci. Comp. 40 (4), A2253-A2285, 2018. +! [3] Z. Drmac: A LAPACK implementation of the Dynamic +! Mode Decomposition I. Technical report. AIMDyn Inc. +! and LAPACK Working Note 298. +! [4] J. Tu, C. W. Rowley, D. M. Luchtenburg, S. L. +! Brunton, N. Kutz: On Dynamic Mode Decomposition: +! Theory and Applications, Journal of Computational +! Dynamics 1(2), 391 -421, 2014. +! +! Developed and supported by: +! =========================== +! Developed and coded by Zlatko Drmac, Faculty of Science, +! University of Zagreb; drmac@math.hr +! In cooperation with +! AIMdyn Inc., Santa Barbara, CA. +! and supported by +! - DARPA SBIR project "Koopman Operator-Based Forecasting +! for Nonstationary Processes from Near-Term, Limited +! Observational Data" Contract No: W31P4Q-21-C-0007 +! - DARPA PAI project "Physics-Informed Machine Learning +! Methodologies" Contract No: HR0011-18-9-0033 +! - DARPA MoDyL project "A Data-Driven, Operator-Theoretic +! Framework for Space-Time Analysis of Process Dynamics" +! Contract No: HR0011-16-C-0116 +! Any opinions, findings and conclusions or recommendations +! expressed in this material are those of the author and +! do not necessarily reflect the views of the DARPA SBIR +! Program Office. +!============================================================ +! Distribution Statement A: +! Approved for Public Release, Distribution Unlimited. +! Cleared by DARPA on September 29, 2022 +!============================================================ +!...................................................................... +! Arguments +! ========= +! JOBS (input) CHARACTER*1 +! Determines whether the initial data snapshots are scaled +! by a diagonal matrix. The data snapshots are the columns +! of F. The leading N-1 columns of F are denoted X and the +! trailing N-1 columns are denoted Y. +! 'S' :: The data snapshots matrices X and Y are multiplied +! with a diagonal matrix D so that X*D has unit +! nonzero columns (in the Euclidean 2-norm) +! 'C' :: The snapshots are scaled as with the 'S' option. +! If it is found that an i-th column of X is zero +! vector and the corresponding i-th column of Y is +! non-zero, then the i-th column of Y is set to +! zero and a warning flag is raised. +! 'Y' :: The data snapshots matrices X and Y are multiplied +! by a diagonal matrix D so that Y*D has unit +! nonzero columns (in the Euclidean 2-norm) +! 'N' :: No data scaling. +!..... +! JOBZ (input) CHARACTER*1 +! Determines whether the eigenvectors (Koopman modes) will +! be computed. +! 'V' :: The eigenvectors (Koopman modes) will be computed +! and returned in the matrix Z. +! See the description of Z. +! 'F' :: The eigenvectors (Koopman modes) will be returned +! in factored form as the product Z*V, where Z +! is orthonormal and V contains the eigenvectors +! of the corresponding Rayleigh quotient. +! See the descriptions of F, V, Z. +! 'Q' :: The eigenvectors (Koopman modes) will be returned +! in factored form as the product Q*Z, where Z +! contains the eigenvectors of the compression of the +! underlying discretized operator onto the span of +! the data snapshots. See the descriptions of F, V, Z. +! Q is from the initial QR factorization. +! 'N' :: The eigenvectors are not computed. +!..... +! JOBR (input) CHARACTER*1 +! Determines whether to compute the residuals. +! 'R' :: The residuals for the computed eigenpairs will +! be computed and stored in the array RES. +! See the description of RES. +! For this option to be legal, JOBZ must be 'V'. +! 'N' :: The residuals are not computed. +!..... +! JOBQ (input) CHARACTER*1 +! Specifies whether to explicitly compute and return the +! orthogonal matrix from the QR factorization. +! 'Q' :: The matrix Q of the QR factorization of the data +! snapshot matrix is computed and stored in the +! array F. See the description of F. +! 'N' :: The matrix Q is not explicitly computed. +!..... +! JOBT (input) CHARACTER*1 +! Specifies whether to return the upper triangular factor +! from the QR factorization. +! 'R' :: The matrix R of the QR factorization of the data +! snapshot matrix F is returned in the array Y. +! See the description of Y and Further details. +! 'N' :: The matrix R is not returned. +!..... +! JOBF (input) CHARACTER*1 +! Specifies whether to store information needed for post- +! processing (e.g. computing refined Ritz vectors) +! 'R' :: The matrix needed for the refinement of the Ritz +! vectors is computed and stored in the array B. +! See the description of B. +! 'E' :: The unscaled eigenvectors of the Exact DMD are +! computed and returned in the array B. See the +! description of B. +! 'N' :: No eigenvector refinement data is computed. +! To be useful on exit, this option needs JOBQ='Q'. +!..... +! WHTSVD (input) INTEGER, WHSTVD in { 1, 2, 3, 4 } +! Allows for a selection of the SVD algorithm from the +! LAPACK library. +! 1 :: SGESVD (the QR SVD algorithm) +! 2 :: SGESDD (the Divide and Conquer algorithm; if enough +! workspace available, this is the fastest option) +! 3 :: SGESVDQ (the preconditioned QR SVD ; this and 4 +! are the most accurate options) +! 4 :: SGEJSV (the preconditioned Jacobi SVD; this and 3 +! are the most accurate options) +! For the four methods above, a significant difference in +! the accuracy of small singular values is possible if +! the snapshots vary in norm so that X is severely +! ill-conditioned. If small (smaller than EPS*||X||) +! singular values are of interest and JOBS=='N', then +! the options (3, 4) give the most accurate results, where +! the option 4 is slightly better and with stronger +! theoretical background. +! If JOBS=='S', i.e. the columns of X will be normalized, +! then all methods give nearly equally accurate results. +!..... +! M (input) INTEGER, M >= 0 +! The state space dimension (the number of rows of F) +!..... +! N (input) INTEGER, 0 <= N <= M +! The number of data snapshots from a single trajectory, +! taken at equidistant discrete times. This is the +! number of columns of F. +!..... +! F (input/output) REAL(KIND=WP) M-by-N array +! > On entry, +! the columns of F are the sequence of data snapshots +! from a single trajectory, taken at equidistant discrete +! times. It is assumed that the column norms of F are +! in the range of the normalized floating point numbers. +! < On exit, +! If JOBQ == 'Q', the array F contains the orthogonal +! matrix/factor of the QR factorization of the initial +! data snapshots matrix F. See the description of JOBQ. +! If JOBQ == 'N', the entries in F strictly below the main +! diagonal contain, column-wise, the information on the +! Householder vectors, as returned by SGEQRF. The +! remaining information to restore the orthogonal matrix +! of the initial QR factorization is stored in WORK(1:N). +! See the description of WORK. +!..... +! LDF (input) INTEGER, LDF >= M +! The leading dimension of the array F. +!..... +! X (workspace/output) REAL(KIND=WP) MIN(M,N)-by-(N-1) array +! X is used as workspace to hold representations of the +! leading N-1 snapshots in the orthonormal basis computed +! in the QR factorization of F. +! On exit, the leading K columns of X contain the leading +! K left singular vectors of the above described content +! of X. To lift them to the space of the left singular +! vectors U(:,1:K)of the input data, pre-multiply with the +! Q factor from the initial QR factorization. +! See the descriptions of F, K, V and Z. +!..... +! LDX (input) INTEGER, LDX >= N +! The leading dimension of the array X +!..... +! Y (workspace/output) REAL(KIND=WP) MIN(M,N)-by-(N-1) array +! Y is used as workspace to hold representations of the +! trailing N-1 snapshots in the orthonormal basis computed +! in the QR factorization of F. +! On exit, +! If JOBT == 'R', Y contains the MIN(M,N)-by-N upper +! triangular factor from the QR factorization of the data +! snapshot matrix F. +!..... +! LDY (input) INTEGER , LDY >= N +! The leading dimension of the array Y +!..... +! NRNK (input) INTEGER +! Determines the mode how to compute the numerical rank, +! i.e. how to truncate small singular values of the input +! matrix X. On input, if +! NRNK = -1 :: i-th singular value sigma(i) is truncated +! if sigma(i) <= TOL*sigma(1) +! This option is recommended. +! NRNK = -2 :: i-th singular value sigma(i) is truncated +! if sigma(i) <= TOL*sigma(i-1) +! This option is included for R&D purposes. +! It requires highly accurate SVD, which +! may not be feasible. +! The numerical rank can be enforced by using positive +! value of NRNK as follows: +! 0 < NRNK <= N-1 :: at most NRNK largest singular values +! will be used. If the number of the computed nonzero +! singular values is less than NRNK, then only those +! nonzero values will be used and the actually used +! dimension is less than NRNK. The actual number of +! the nonzero singular values is returned in the variable +! K. See the description of K. +!..... +! TOL (input) REAL(KIND=WP), 0 <= TOL < 1 +! The tolerance for truncating small singular values. +! See the description of NRNK. +!..... +! K (output) INTEGER, 0 <= K <= N +! The dimension of the SVD/POD basis for the leading N-1 +! data snapshots (columns of F) and the number of the +! computed Ritz pairs. The value of K is determined +! according to the rule set by the parameters NRNK and +! TOL. See the descriptions of NRNK and TOL. +!..... +! REIG (output) REAL(KIND=WP) (N-1)-by-1 array +! The leading K (K<=N) entries of REIG contain +! the real parts of the computed eigenvalues +! REIG(1:K) + sqrt(-1)*IMEIG(1:K). +! See the descriptions of K, IMEIG, Z. +!..... +! IMEIG (output) REAL(KIND=WP) (N-1)-by-1 array +! The leading K (K0, then the corresponding complex +! conjugate pair of eigenvalues reads +! LAMBDA(i) = REIG(i) + sqrt(-1)*IMAG(i) +! LAMBDA(i+1) = REIG(i) - sqrt(-1)*IMAG(i) +! That is, complex conjugate pairs have consecutive +! indices (i,i+1), with the positive imaginary part +! listed first. +! See the descriptions of K, REIG, Z. +!..... +! Z (workspace/output) REAL(KIND=WP) M-by-(N-1) array +! If JOBZ =='V' then +! Z contains real Ritz vectors as follows: +! If IMEIG(i)=0, then Z(:,i) is an eigenvector of +! the i-th Ritz value. +! If IMEIG(i) > 0 (and IMEIG(i+1) < 0) then +! [Z(:,i) Z(:,i+1)] span an invariant subspace and +! the Ritz values extracted from this subspace are +! REIG(i) + sqrt(-1)*IMEIG(i) and +! REIG(i) - sqrt(-1)*IMEIG(i). +! The corresponding eigenvectors are +! Z(:,i) + sqrt(-1)*Z(:,i+1) and +! Z(:,i) - sqrt(-1)*Z(:,i+1), respectively. +! If JOBZ == 'F', then the above descriptions hold for +! the columns of Z*V, where the columns of V are the +! eigenvectors of the K-by-K Rayleigh quotient, and Z is +! orthonormal. The columns of V are similarly structured: +! If IMEIG(i) == 0 then Z*V(:,i) is an eigenvector, and if +! IMEIG(i) > 0 then Z*V(:,i)+sqrt(-1)*Z*V(:,i+1) and +! Z*V(:,i)-sqrt(-1)*Z*V(:,i+1) +! are the eigenvectors of LAMBDA(i), LAMBDA(i+1). +! See the descriptions of REIG, IMEIG, X and V. +!..... +! LDZ (input) INTEGER , LDZ >= M +! The leading dimension of the array Z. +!..... +! RES (output) REAL(KIND=WP) (N-1)-by-1 array +! RES(1:K) contains the residuals for the K computed +! Ritz pairs. +! If LAMBDA(i) is real, then +! RES(i) = || A * Z(:,i) - LAMBDA(i)*Z(:,i))||_2. +! If [LAMBDA(i), LAMBDA(i+1)] is a complex conjugate pair +! then +! RES(i)=RES(i+1) = || A * Z(:,i:i+1) - Z(:,i:i+1) *B||_F +! where B = [ real(LAMBDA(i)) imag(LAMBDA(i)) ] +! [-imag(LAMBDA(i)) real(LAMBDA(i)) ]. +! It holds that +! RES(i) = || A*ZC(:,i) - LAMBDA(i) *ZC(:,i) ||_2 +! RES(i+1) = || A*ZC(:,i+1) - LAMBDA(i+1)*ZC(:,i+1) ||_2 +! where ZC(:,i) = Z(:,i) + sqrt(-1)*Z(:,i+1) +! ZC(:,i+1) = Z(:,i) - sqrt(-1)*Z(:,i+1) +! See the description of Z. +!..... +! B (output) REAL(KIND=WP) MIN(M,N)-by-(N-1) array. +! IF JOBF =='R', B(1:N,1:K) contains A*U(:,1:K), and can +! be used for computing the refined vectors; see further +! details in the provided references. +! If JOBF == 'E', B(1:N,1;K) contains +! A*U(:,1:K)*W(1:K,1:K), which are the vectors from the +! Exact DMD, up to scaling by the inverse eigenvalues. +! In both cases, the content of B can be lifted to the +! original dimension of the input data by pre-multiplying +! with the Q factor from the initial QR factorization. +! Here A denotes a compression of the underlying operator. +! See the descriptions of F and X. +! If JOBF =='N', then B is not referenced. +!..... +! LDB (input) INTEGER, LDB >= MIN(M,N) +! The leading dimension of the array B. +!..... +! V (workspace/output) REAL(KIND=WP) (N-1)-by-(N-1) array +! On exit, V(1:K,1:K) contains the K eigenvectors of +! the Rayleigh quotient. The eigenvectors of a complex +! conjugate pair of eigenvalues are returned in real form +! as explained in the description of Z. The Ritz vectors +! (returned in Z) are the product of X and V; see +! the descriptions of X and Z. +!..... +! LDV (input) INTEGER, LDV >= N-1 +! The leading dimension of the array V. +!..... +! S (output) REAL(KIND=WP) (N-1)-by-(N-1) array +! The array S(1:K,1:K) is used for the matrix Rayleigh +! quotient. This content is overwritten during +! the eigenvalue decomposition by SGEEV. +! See the description of K. +!..... +! LDS (input) INTEGER, LDS >= N-1 +! The leading dimension of the array S. +!..... +! WORK (workspace/output) REAL(KIND=WP) LWORK-by-1 array +! On exit, +! WORK(1:MIN(M,N)) contains the scalar factors of the +! elementary reflectors as returned by SGEQRF of the +! M-by-N input matrix F. +! WORK(MIN(M,N)+1:MIN(M,N)+N-1) contains the singular values of +! the input submatrix F(1:M,1:N-1). +! If the call to SGEDMDQ is only workspace query, then +! WORK(1) contains the minimal workspace length and +! WORK(2) is the optimal workspace length. Hence, the +! length of work is at least 2. +! See the description of LWORK. +!..... +! LWORK (input) INTEGER +! The minimal length of the workspace vector WORK. +! LWORK is calculated as follows: +! Let MLWQR = N (minimal workspace for SGEQRF[M,N]) +! MLWDMD = minimal workspace for SGEDMD (see the +! description of LWORK in SGEDMD) for +! snapshots of dimensions MIN(M,N)-by-(N-1) +! MLWMQR = N (minimal workspace for +! SORMQR['L','N',M,N,N]) +! MLWGQR = N (minimal workspace for SORGQR[M,N,N]) +! Then +! LWORK = MAX(N+MLWQR, N+MLWDMD) +! is updated as follows: +! if JOBZ == 'V' or JOBZ == 'F' THEN +! LWORK = MAX( LWORK,MIN(M,N)+N-1 +MLWMQR ) +! if JOBQ == 'Q' THEN +! LWORK = MAX( LWORK,MIN(M,N)+N-1+MLWGQR) +! If on entry LWORK = -1, then a workspace query is +! assumed and the procedure only computes the minimal +! and the optimal workspace lengths for both WORK and +! IWORK. See the descriptions of WORK and IWORK. +!..... +! IWORK (workspace/output) INTEGER LIWORK-by-1 array +! Workspace that is required only if WHTSVD equals +! 2 , 3 or 4. (See the description of WHTSVD). +! If on entry LWORK =-1 or LIWORK=-1, then the +! minimal length of IWORK is computed and returned in +! IWORK(1). See the description of LIWORK. +!..... +! LIWORK (input) INTEGER +! The minimal length of the workspace vector IWORK. +! If WHTSVD == 1, then only IWORK(1) is used; LIWORK >=1 +! Let M1=MIN(M,N), N1=N-1. Then +! If WHTSVD == 2, then LIWORK >= MAX(1,8*MIN(M1,N1)) +! If WHTSVD == 3, then LIWORK >= MAX(1,M1+N1-1) +! If WHTSVD == 4, then LIWORK >= MAX(3,M1+3*N1) +! If on entry LIWORK = -1, then a worskpace query is +! assumed and the procedure only computes the minimal +! and the optimal workspace lengths for both WORK and +! IWORK. See the descriptions of WORK and IWORK. +!..... +! INFO (output) INTEGER +! -i < 0 :: On entry, the i-th argument had an +! illegal value +! = 0 :: Successful return. +! = 1 :: Void input. Quick exit (M=0 or N=0). +! = 2 :: The SVD computation of X did not converge. +! Suggestion: Check the input data and/or +! repeat with different WHTSVD. +! = 3 :: The computation of the eigenvalues did not +! converge. +! = 4 :: If data scaling was requested on input and +! the procedure found inconsistency in the data +! such that for some column index i, +! X(:,i) = 0 but Y(:,i) /= 0, then Y(:,i) is set +! to zero if JOBS=='C'. The computation proceeds +! with original or modified data and warning +! flag is set with INFO=4. +!............................................................. +!............................................................. +! Parameters +! ~~~~~~~~~~ + REAL(KIND=WP), PARAMETER :: ONE = 1.0_WP + REAL(KIND=WP), PARAMETER :: ZERO = 0.0_WP +! +! Local scalars +! ~~~~~~~~~~~~~ + INTEGER :: IMINWR, INFO1, MLWDMD, MLWGQR, & + MLWMQR, MLWORK, MLWQR, MINMN, & + OLWDMD, OLWGQR, OLWMQR, OLWORK, & + OLWQR + LOGICAL :: LQUERY, SCCOLX, SCCOLY, WANTQ, & + WNTTRF, WNTRES, WNTVEC, WNTVCF, & + WNTVCQ, WNTREF, WNTEX + CHARACTER(LEN=1) :: JOBVL +! +! Local array +! ~~~~~~~~~~~ + REAL(KIND=WP) :: RDUMMY(2) +! +! External functions (BLAS and LAPACK) +! ~~~~~~~~~~~~~~~~~ + LOGICAL LSAME + EXTERNAL LSAME +! +! External subroutines (BLAS and LAPACK) +! ~~~~~~~~~~~~~~~~~~~~ + EXTERNAL SGEMM + EXTERNAL SGEQRF, SLACPY, SLASET, SORGQR, & + SORMQR, XERBLA + +! External subroutines +! ~~~~~~~~~~~~~~~~~~~~ + EXTERNAL SGEDMD + +! Intrinsic functions +! ~~~~~~~~~~~~~~~~~~~ + INTRINSIC MAX, MIN, INT + !.......................................................... + ! + ! Test the input arguments + WNTRES = LSAME(JOBR,'R') + SCCOLX = LSAME(JOBS,'S') .OR. LSAME( JOBS, 'C' ) + SCCOLY = LSAME(JOBS,'Y') + WNTVEC = LSAME(JOBZ,'V') + WNTVCF = LSAME(JOBZ,'F') + WNTVCQ = LSAME(JOBZ,'Q') + WNTREF = LSAME(JOBF,'R') + WNTEX = LSAME(JOBF,'E') + WANTQ = LSAME(JOBQ,'Q') + WNTTRF = LSAME(JOBT,'R') + MINMN = MIN(M,N) + INFO = 0 + LQUERY = ( ( LWORK == -1 ) .OR. ( LIWORK == -1 ) ) +! + IF ( .NOT. (SCCOLX .OR. SCCOLY .OR. LSAME(JOBS,'N')) ) THEN + INFO = -1 + ELSE IF ( .NOT. (WNTVEC .OR. WNTVCF .OR. WNTVCQ & + .OR. LSAME(JOBZ,'N')) ) THEN + INFO = -2 + ELSE IF ( .NOT. (WNTRES .OR. LSAME(JOBR,'N')) .OR. & + ( WNTRES .AND. LSAME(JOBZ,'N') ) ) THEN + INFO = -3 + ELSE IF ( .NOT. (WANTQ .OR. LSAME(JOBQ,'N')) ) THEN + INFO = -4 + ELSE IF ( .NOT. ( WNTTRF .OR. LSAME(JOBT,'N') ) ) THEN + INFO = -5 + ELSE IF ( .NOT. (WNTREF .OR. WNTEX .OR. & + LSAME(JOBF,'N') ) ) THEN + INFO = -6 + ELSE IF ( .NOT. ((WHTSVD == 1).OR.(WHTSVD == 2).OR. & + (WHTSVD == 3).OR.(WHTSVD == 4)) ) THEN + INFO = -7 + ELSE IF ( M < 0 ) THEN + INFO = -8 + ELSE IF ( ( N < 0 ) .OR. ( N > M+1 ) ) THEN + INFO = -9 + ELSE IF ( LDF < M ) THEN + INFO = -11 + ELSE IF ( LDX < MINMN ) THEN + INFO = -13 + ELSE IF ( LDY < MINMN ) THEN + INFO = -15 + ELSE IF ( .NOT. (( NRNK == -2).OR.(NRNK == -1).OR. & + ((NRNK >= 1).AND.(NRNK <=N ))) ) THEN + INFO = -16 + ELSE IF ( ( TOL < ZERO ) .OR. ( TOL >= ONE ) ) THEN + INFO = -17 + ELSE IF ( LDZ < M ) THEN + INFO = -22 + ELSE IF ( (WNTREF.OR.WNTEX ).AND.( LDB < MINMN ) ) THEN + INFO = -25 + ELSE IF ( LDV < N-1 ) THEN + INFO = -27 + ELSE IF ( LDS < N-1 ) THEN + INFO = -29 + END IF +! + IF ( WNTVEC .OR. WNTVCF ) THEN + JOBVL = 'V' + ELSE + JOBVL = 'N' + END IF + IF ( INFO == 0 ) THEN + ! Compute the minimal and the optimal workspace + ! requirements. Simulate running the code and + ! determine minimal and optimal sizes of the + ! workspace at any moment of the run. + IF ( ( N == 0 ) .OR. ( N == 1 ) ) THEN + ! All output except K is void. INFO=1 signals + ! the void input. In case of a workspace query, + ! the minimal workspace lengths are returned. + IF ( LQUERY ) THEN + IWORK(1) = 1 + WORK(1) = 2 + WORK(2) = 2 + ELSE + K = 0 + END IF + INFO = 1 + RETURN + END IF + MLWQR = MAX(1,N) ! Minimal workspace length for SGEQRF. + MLWORK = MIN(M,N) + MLWQR + IF ( LQUERY ) THEN + CALL SGEQRF( M, N, F, LDF, WORK, RDUMMY, -1, & + INFO1 ) + OLWQR = INT(RDUMMY(1)) + OLWORK = MIN(M,N) + OLWQR + END IF + CALL SGEDMD( JOBS, JOBVL, JOBR, JOBF, WHTSVD, MINMN,& + N-1, X, LDX, Y, LDY, NRNK, TOL, K, & + REIG, IMEIG, Z, LDZ, RES, B, LDB, & + V, LDV, S, LDS, WORK, -1, IWORK, & + LIWORK, INFO1 ) + MLWDMD = INT(WORK(1)) + MLWORK = MAX(MLWORK, MINMN + MLWDMD) + IMINWR = IWORK(1) + IF ( LQUERY ) THEN + OLWDMD = INT(WORK(2)) + OLWORK = MAX(OLWORK, MINMN+OLWDMD) + END IF + IF ( WNTVEC .OR. WNTVCF ) THEN + MLWMQR = MAX(1,N) + MLWORK = MAX(MLWORK,MINMN+N-1+MLWMQR) + IF ( LQUERY ) THEN + CALL SORMQR( 'L','N', M, N, MINMN, F, LDF, & + WORK, Z, LDZ, WORK, -1, INFO1 ) + OLWMQR = INT(WORK(1)) + OLWORK = MAX(OLWORK,MINMN+N-1+OLWMQR) + END IF + END IF + IF ( WANTQ ) THEN + MLWGQR = N + MLWORK = MAX(MLWORK,MINMN+N-1+MLWGQR) + IF ( LQUERY ) THEN + CALL SORGQR( M, MINMN, MINMN, F, LDF, WORK, & + WORK, -1, INFO1 ) + OLWGQR = INT(WORK(1)) + OLWORK = MAX(OLWORK,MINMN+N-1+OLWGQR) + END IF + END IF + IMINWR = MAX( 1, IMINWR ) + MLWORK = MAX( 2, MLWORK ) + IF ( LWORK < MLWORK .AND. (.NOT.LQUERY) ) INFO = -31 + IF ( LIWORK < IMINWR .AND. (.NOT.LQUERY) ) INFO = -33 + END IF + IF( INFO /= 0 ) THEN + CALL XERBLA( 'SGEDMDQ', -INFO ) + RETURN + ELSE IF ( LQUERY ) THEN +! Return minimal and optimal workspace sizes + IWORK(1) = IMINWR + WORK(1) = MLWORK + WORK(2) = OLWORK + RETURN + END IF +!..... +! Initial QR factorization that is used to represent the +! snapshots as elements of lower dimensional subspace. +! For large scale computation with M >>N , at this place +! one can use an out of core QRF. +! + CALL SGEQRF( M, N, F, LDF, WORK, & + WORK(MINMN+1), LWORK-MINMN, INFO1 ) +! +! Define X and Y as the snapshots representations in the +! orthogonal basis computed in the QR factorization. +! X corresponds to the leading N-1 and Y to the trailing +! N-1 snapshots. + CALL SLASET( 'L', MINMN, N-1, ZERO, ZERO, X, LDX ) + CALL SLACPY( 'U', MINMN, N-1, F, LDF, X, LDX ) + CALL SLACPY( 'A', MINMN, N-1, F(1,2), LDF, Y, LDY ) + IF ( M >= 3 ) THEN + CALL SLASET( 'L', MINMN-2, N-2, ZERO, ZERO, & + Y(3,1), LDY ) + END IF +! +! Compute the DMD of the projected snapshot pairs (X,Y) + CALL SGEDMD( JOBS, JOBVL, JOBR, JOBF, WHTSVD, MINMN, & + N-1, X, LDX, Y, LDY, NRNK, TOL, K, & + REIG, IMEIG, Z, LDZ, RES, B, LDB, V, & + LDV, S, LDS, WORK(MINMN+1), LWORK-MINMN, IWORK, & + LIWORK, INFO1 ) + IF ( INFO1 == 2 .OR. INFO1 == 3 ) THEN + ! Return with error code. + INFO = INFO1 + RETURN + ELSE + INFO = INFO1 + END IF +! +! The Ritz vectors (Koopman modes) can be explicitly +! formed or returned in factored form. + IF ( WNTVEC ) THEN + ! Compute the eigenvectors explicitly. + IF ( M > MINMN ) CALL SLASET( 'A', M-MINMN, K, ZERO, & + ZERO, Z(MINMN+1,1), LDZ ) + CALL SORMQR( 'L','N', M, K, MINMN, F, LDF, WORK, Z, & + LDZ, WORK(MINMN+N), LWORK-(MINMN+N-1), INFO1 ) + ELSE IF ( WNTVCF ) THEN + ! Return the Ritz vectors (eigenvectors) in factored + ! form Z*V, where Z contains orthonormal matrix (the + ! product of Q from the initial QR factorization and + ! the SVD/POD_basis returned by SGEDMD in X) and the + ! second factor (the eigenvectors of the Rayleigh + ! quotient) is in the array V, as returned by SGEDMD. + CALL SLACPY( 'A', N, K, X, LDX, Z, LDZ ) + IF ( M > N ) CALL SLASET( 'A', M-N, K, ZERO, ZERO, & + Z(N+1,1), LDZ ) + CALL SORMQR( 'L','N', M, K, MINMN, F, LDF, WORK, Z, & + LDZ, WORK(MINMN+N), LWORK-(MINMN+N-1), INFO1 ) + END IF +! +! Some optional output variables: +! +! The upper triangular factor in the initial QR +! factorization is optionally returned in the array Y. +! This is useful if this call to SGEDMDQ is to be +! followed by a streaming DMD that is implemented in a +! QR compressed form. + IF ( WNTTRF ) THEN ! Return the upper triangular R in Y + CALL SLASET( 'A', MINMN, N, ZERO, ZERO, Y, LDY ) + CALL SLACPY( 'U', MINMN, N, F, LDF, Y, LDY ) + END IF +! +! The orthonormal/orthogonal factor in the initial QR +! factorization is optionally returned in the array F. +! Same as with the triangular factor above, this is +! useful in a streaming DMD. + IF ( WANTQ ) THEN ! Q overwrites F + CALL SORGQR( M, MINMN, MINMN, F, LDF, WORK, & + WORK(MINMN+N), LWORK-(MINMN+N-1), INFO1 ) + END IF +! + RETURN +! + END SUBROUTINE SGEDMDQ + \ No newline at end of file diff --git a/lapack-netlib/SRC/zgedmd.f90 b/lapack-netlib/SRC/zgedmd.f90 new file mode 100644 index 000000000..090641ad8 --- /dev/null +++ b/lapack-netlib/SRC/zgedmd.f90 @@ -0,0 +1,996 @@ + SUBROUTINE ZGEDMD( JOBS, JOBZ, JOBR, JOBF, WHTSVD, & + M, N, X, LDX, Y, LDY, NRNK, TOL, & + K, EIGS, Z, LDZ, RES, B, LDB, & + W, LDW, S, LDS, ZWORK, LZWORK, & + RWORK, LRWORK, IWORK, LIWORK, INFO ) +! March 2023 +!..... + USE iso_fortran_env + IMPLICIT NONE + INTEGER, PARAMETER :: WP = real64 + +!..... +! Scalar arguments + CHARACTER, INTENT(IN) :: JOBS, JOBZ, JOBR, JOBF + INTEGER, INTENT(IN) :: WHTSVD, M, N, LDX, LDY, & + NRNK, LDZ, LDB, LDW, LDS, & + LIWORK, LRWORK, LZWORK + INTEGER, INTENT(OUT) :: K, INFO + REAL(KIND=WP), INTENT(IN) :: TOL +! Array arguments + COMPLEX(KIND=WP), INTENT(INOUT) :: X(LDX,*), Y(LDY,*) + COMPLEX(KIND=WP), INTENT(OUT) :: Z(LDZ,*), B(LDB,*), & + W(LDW,*), S(LDS,*) + COMPLEX(KIND=WP), INTENT(OUT) :: EIGS(*) + COMPLEX(KIND=WP), INTENT(OUT) :: ZWORK(*) + REAL(KIND=WP), INTENT(OUT) :: RES(*) + REAL(KIND=WP), INTENT(OUT) :: RWORK(*) + INTEGER, INTENT(OUT) :: IWORK(*) +!............................................................ +! Purpose +! ======= +! ZGEDMD computes the Dynamic Mode Decomposition (DMD) for +! a pair of data snapshot matrices. For the input matrices +! X and Y such that Y = A*X with an unaccessible matrix +! A, ZGEDMD computes a certain number of Ritz pairs of A using +! the standard Rayleigh-Ritz extraction from a subspace of +! range(X) that is determined using the leading left singular +! vectors of X. Optionally, ZGEDMD returns the residuals +! of the computed Ritz pairs, the information needed for +! a refinement of the Ritz vectors, or the eigenvectors of +! the Exact DMD. +! For further details see the references listed +! below. For more details of the implementation see [3]. +! +! References +! ========== +! [1] P. Schmid: Dynamic mode decomposition of numerical +! and experimental data, +! Journal of Fluid Mechanics 656, 5-28, 2010. +! [2] Z. Drmac, I. Mezic, R. Mohr: Data driven modal +! decompositions: analysis and enhancements, +! SIAM J. on Sci. Comp. 40 (4), A2253-A2285, 2018. +! [3] Z. Drmac: A LAPACK implementation of the Dynamic +! Mode Decomposition I. Technical report. AIMDyn Inc. +! and LAPACK Working Note 298. +! [4] J. Tu, C. W. Rowley, D. M. Luchtenburg, S. L. +! Brunton, N. Kutz: On Dynamic Mode Decomposition: +! Theory and Applications, Journal of Computational +! Dynamics 1(2), 391 -421, 2014. +! +!...................................................................... +! Developed and supported by: +! =========================== +! Developed and coded by Zlatko Drmac, Faculty of Science, +! University of Zagreb; drmac@math.hr +! In cooperation with +! AIMdyn Inc., Santa Barbara, CA. +! and supported by +! - DARPA SBIR project "Koopman Operator-Based Forecasting +! for Nonstationary Processes from Near-Term, Limited +! Observational Data" Contract No: W31P4Q-21-C-0007 +! - DARPA PAI project "Physics-Informed Machine Learning +! Methodologies" Contract No: HR0011-18-9-0033 +! - DARPA MoDyL project "A Data-Driven, Operator-Theoretic +! Framework for Space-Time Analysis of Process Dynamics" +! Contract No: HR0011-16-C-0116 +! Any opinions, findings and conclusions or recommendations +! expressed in this material are those of the author and +! do not necessarily reflect the views of the DARPA SBIR +! Program Office +!============================================================ +! Distribution Statement A: +! Approved for Public Release, Distribution Unlimited. +! Cleared by DARPA on September 29, 2022 +!============================================================ +!............................................................ +! Arguments +! ========= +! JOBS (input) CHARACTER*1 +! Determines whether the initial data snapshots are scaled +! by a diagonal matrix. +! 'S' :: The data snapshots matrices X and Y are multiplied +! with a diagonal matrix D so that X*D has unit +! nonzero columns (in the Euclidean 2-norm) +! 'C' :: The snapshots are scaled as with the 'S' option. +! If it is found that an i-th column of X is zero +! vector and the corresponding i-th column of Y is +! non-zero, then the i-th column of Y is set to +! zero and a warning flag is raised. +! 'Y' :: The data snapshots matrices X and Y are multiplied +! by a diagonal matrix D so that Y*D has unit +! nonzero columns (in the Euclidean 2-norm) +! 'N' :: No data scaling. +!..... +! JOBZ (input) CHARACTER*1 +! Determines whether the eigenvectors (Koopman modes) will +! be computed. +! 'V' :: The eigenvectors (Koopman modes) will be computed +! and returned in the matrix Z. +! See the description of Z. +! 'F' :: The eigenvectors (Koopman modes) will be returned +! in factored form as the product X(:,1:K)*W, where X +! contains a POD basis (leading left singular vectors +! of the data matrix X) and W contains the eigenvectors +! of the corresponding Rayleigh quotient. +! See the descriptions of K, X, W, Z. +! 'N' :: The eigenvectors are not computed. +!..... +! JOBR (input) CHARACTER*1 +! Determines whether to compute the residuals. +! 'R' :: The residuals for the computed eigenpairs will be +! computed and stored in the array RES. +! See the description of RES. +! For this option to be legal, JOBZ must be 'V'. +! 'N' :: The residuals are not computed. +!..... +! JOBF (input) CHARACTER*1 +! Specifies whether to store information needed for post- +! processing (e.g. computing refined Ritz vectors) +! 'R' :: The matrix needed for the refinement of the Ritz +! vectors is computed and stored in the array B. +! See the description of B. +! 'E' :: The unscaled eigenvectors of the Exact DMD are +! computed and returned in the array B. See the +! description of B. +! 'N' :: No eigenvector refinement data is computed. +!..... +! WHTSVD (input) INTEGER, WHSTVD in { 1, 2, 3, 4 } +! Allows for a selection of the SVD algorithm from the +! LAPACK library. +! 1 :: ZGESVD (the QR SVD algorithm) +! 2 :: ZGESDD (the Divide and Conquer algorithm; if enough +! workspace available, this is the fastest option) +! 3 :: ZGESVDQ (the preconditioned QR SVD ; this and 4 +! are the most accurate options) +! 4 :: ZGEJSV (the preconditioned Jacobi SVD; this and 3 +! are the most accurate options) +! For the four methods above, a significant difference in +! the accuracy of small singular values is possible if +! the snapshots vary in norm so that X is severely +! ill-conditioned. If small (smaller than EPS*||X||) +! singular values are of interest and JOBS=='N', then +! the options (3, 4) give the most accurate results, where +! the option 4 is slightly better and with stronger +! theoretical background. +! If JOBS=='S', i.e. the columns of X will be normalized, +! then all methods give nearly equally accurate results. +!..... +! M (input) INTEGER, M>= 0 +! The state space dimension (the row dimension of X, Y). +!..... +! N (input) INTEGER, 0 <= N <= M +! The number of data snapshot pairs +! (the number of columns of X and Y). +!..... +! X (input/output) COMPLEX(KIND=WP) M-by-N array +! > On entry, X contains the data snapshot matrix X. It is +! assumed that the column norms of X are in the range of +! the normalized floating point numbers. +! < On exit, the leading K columns of X contain a POD basis, +! i.e. the leading K left singular vectors of the input +! data matrix X, U(:,1:K). All N columns of X contain all +! left singular vectors of the input matrix X. +! See the descriptions of K, Z and W. +!..... +! LDX (input) INTEGER, LDX >= M +! The leading dimension of the array X. +!..... +! Y (input/workspace/output) COMPLEX(KIND=WP) M-by-N array +! > On entry, Y contains the data snapshot matrix Y +! < On exit, +! If JOBR == 'R', the leading K columns of Y contain +! the residual vectors for the computed Ritz pairs. +! See the description of RES. +! If JOBR == 'N', Y contains the original input data, +! scaled according to the value of JOBS. +!..... +! LDY (input) INTEGER , LDY >= M +! The leading dimension of the array Y. +!..... +! NRNK (input) INTEGER +! Determines the mode how to compute the numerical rank, +! i.e. how to truncate small singular values of the input +! matrix X. On input, if +! NRNK = -1 :: i-th singular value sigma(i) is truncated +! if sigma(i) <= TOL*sigma(1) +! This option is recommended. +! NRNK = -2 :: i-th singular value sigma(i) is truncated +! if sigma(i) <= TOL*sigma(i-1) +! This option is included for R&D purposes. +! It requires highly accurate SVD, which +! may not be feasible. +! The numerical rank can be enforced by using positive +! value of NRNK as follows: +! 0 < NRNK <= N :: at most NRNK largest singular values +! will be used. If the number of the computed nonzero +! singular values is less than NRNK, then only those +! nonzero values will be used and the actually used +! dimension is less than NRNK. The actual number of +! the nonzero singular values is returned in the variable +! K. See the descriptions of TOL and K. +!..... +! TOL (input) REAL(KIND=WP), 0 <= TOL < 1 +! The tolerance for truncating small singular values. +! See the description of NRNK. +!..... +! K (output) INTEGER, 0 <= K <= N +! The dimension of the POD basis for the data snapshot +! matrix X and the number of the computed Ritz pairs. +! The value of K is determined according to the rule set +! by the parameters NRNK and TOL. +! See the descriptions of NRNK and TOL. +!..... +! EIGS (output) COMPLEX(KIND=WP) N-by-1 array +! The leading K (K<=N) entries of EIGS contain +! the computed eigenvalues (Ritz values). +! See the descriptions of K, and Z. +!..... +! Z (workspace/output) COMPLEX(KIND=WP) M-by-N array +! If JOBZ =='V' then Z contains the Ritz vectors. Z(:,i) +! is an eigenvector of the i-th Ritz value; ||Z(:,i)||_2=1. +! If JOBZ == 'F', then the Z(:,i)'s are given implicitly as +! the columns of X(:,1:K)*W(1:K,1:K), i.e. X(:,1:K)*W(:,i) +! is an eigenvector corresponding to EIGS(i). The columns +! of W(1:k,1:K) are the computed eigenvectors of the +! K-by-K Rayleigh quotient. +! See the descriptions of EIGS, X and W. +!..... +! LDZ (input) INTEGER , LDZ >= M +! The leading dimension of the array Z. +!..... +! RES (output) REAL(KIND=WP) N-by-1 array +! RES(1:K) contains the residuals for the K computed +! Ritz pairs, +! RES(i) = || A * Z(:,i) - EIGS(i)*Z(:,i))||_2. +! See the description of EIGS and Z. +!..... +! B (output) COMPLEX(KIND=WP) M-by-N array. +! IF JOBF =='R', B(1:M,1:K) contains A*U(:,1:K), and can +! be used for computing the refined vectors; see further +! details in the provided references. +! If JOBF == 'E', B(1:M,1:K) contains +! A*U(:,1:K)*W(1:K,1:K), which are the vectors from the +! Exact DMD, up to scaling by the inverse eigenvalues. +! If JOBF =='N', then B is not referenced. +! See the descriptions of X, W, K. +!..... +! LDB (input) INTEGER, LDB >= M +! The leading dimension of the array B. +!..... +! W (workspace/output) COMPLEX(KIND=WP) N-by-N array +! On exit, W(1:K,1:K) contains the K computed +! eigenvectors of the matrix Rayleigh quotient. +! The Ritz vectors (returned in Z) are the +! product of X (containing a POD basis for the input +! matrix X) and W. See the descriptions of K, S, X and Z. +! W is also used as a workspace to temporarily store the +! right singular vectors of X. +!..... +! LDW (input) INTEGER, LDW >= N +! The leading dimension of the array W. +!..... +! S (workspace/output) COMPLEX(KIND=WP) N-by-N array +! The array S(1:K,1:K) is used for the matrix Rayleigh +! quotient. This content is overwritten during +! the eigenvalue decomposition by ZGEEV. +! See the description of K. +!..... +! LDS (input) INTEGER, LDS >= N +! The leading dimension of the array S. +!..... +! ZWORK (workspace/output) COMPLEX(KIND=WP) LZWORK-by-1 array +! ZWORK is used as complex workspace in the complex SVD, as +! specified by WHTSVD (1,2, 3 or 4) and for ZGEEV for computing +! the eigenvalues of a Rayleigh quotient. +! If the call to ZGEDMD is only workspace query, then +! ZWORK(1) contains the minimal complex workspace length and +! ZWORK(2) is the optimal complex workspace length. +! Hence, the length of work is at least 2. +! See the description of LZWORK. +!..... +! LZWORK (input) INTEGER +! The minimal length of the workspace vector ZWORK. +! LZWORK is calculated as MAX(LZWORK_SVD, LZWORK_ZGEEV), +! where LZWORK_ZGEEV = MAX( 1, 2*N ) and the minimal +! LZWORK_SVD is calculated as follows +! If WHTSVD == 1 :: ZGESVD :: +! LZWORK_SVD = MAX(1,2*MIN(M,N)+MAX(M,N)) +! If WHTSVD == 2 :: ZGESDD :: +! LZWORK_SVD = 2*MIN(M,N)*MIN(M,N)+2*MIN(M,N)+MAX(M,N) +! If WHTSVD == 3 :: ZGESVDQ :: +! LZWORK_SVD = obtainable by a query +! If WHTSVD == 4 :: ZGEJSV :: +! LZWORK_SVD = obtainable by a query +! If on entry LZWORK = -1, then a workspace query is +! assumed and the procedure only computes the minimal +! and the optimal workspace lengths and returns them in +! LZWORK(1) and LZWORK(2), respectively. +!..... +! RWORK (workspace/output) REAL(KIND=WP) LRWORK-by-1 array +! On exit, RWORK(1:N) contains the singular values of +! X (for JOBS=='N') or column scaled X (JOBS=='S', 'C'). +! If WHTSVD==4, then RWORK(N+1) and RWORK(N+2) contain +! scaling factor RWORK(N+2)/RWORK(N+1) used to scale X +! and Y to avoid overflow in the SVD of X. +! This may be of interest if the scaling option is off +! and as many as possible smallest eigenvalues are +! desired to the highest feasible accuracy. +! If the call to ZGEDMD is only workspace query, then +! RWORK(1) contains the minimal workspace length. +! See the description of LRWORK. +!..... +! LRWORK (input) INTEGER +! The minimal length of the workspace vector RWORK. +! LRWORK is calculated as follows: +! LRWORK = MAX(1, N+LRWORK_SVD,N+LRWORK_ZGEEV), where +! LRWORK_ZGEEV = MAX(1,2*N) and RWORK_SVD is the real workspace +! for the SVD subroutine determined by the input parameter +! WHTSVD. +! If WHTSVD == 1 :: ZGESVD :: +! LRWORK_SVD = 5*MIN(M,N) +! If WHTSVD == 2 :: ZGESDD :: +! LRWORK_SVD = MAX(5*MIN(M,N)*MIN(M,N)+7*MIN(M,N), +! 2*MAX(M,N)*MIN(M,N)+2*MIN(M,N)*MIN(M,N)+MIN(M,N) ) ) +! If WHTSVD == 3 :: ZGESVDQ :: +! LRWORK_SVD = obtainable by a query +! If WHTSVD == 4 :: ZGEJSV :: +! LRWORK_SVD = obtainable by a query +! If on entry LRWORK = -1, then a workspace query is +! assumed and the procedure only computes the minimal +! real workspace length and returns it in RWORK(1). +!..... +! IWORK (workspace/output) INTEGER LIWORK-by-1 array +! Workspace that is required only if WHTSVD equals +! 2 , 3 or 4. (See the description of WHTSVD). +! If on entry LWORK =-1 or LIWORK=-1, then the +! minimal length of IWORK is computed and returned in +! IWORK(1). See the description of LIWORK. +!..... +! LIWORK (input) INTEGER +! The minimal length of the workspace vector IWORK. +! If WHTSVD == 1, then only IWORK(1) is used; LIWORK >=1 +! If WHTSVD == 2, then LIWORK >= MAX(1,8*MIN(M,N)) +! If WHTSVD == 3, then LIWORK >= MAX(1,M+N-1) +! If WHTSVD == 4, then LIWORK >= MAX(3,M+3*N) +! If on entry LIWORK = -1, then a workspace query is +! assumed and the procedure only computes the minimal +! and the optimal workspace lengths for ZWORK, RWORK and +! IWORK. See the descriptions of ZWORK, RWORK and IWORK. +!..... +! INFO (output) INTEGER +! -i < 0 :: On entry, the i-th argument had an +! illegal value +! = 0 :: Successful return. +! = 1 :: Void input. Quick exit (M=0 or N=0). +! = 2 :: The SVD computation of X did not converge. +! Suggestion: Check the input data and/or +! repeat with different WHTSVD. +! = 3 :: The computation of the eigenvalues did not +! converge. +! = 4 :: If data scaling was requested on input and +! the procedure found inconsistency in the data +! such that for some column index i, +! X(:,i) = 0 but Y(:,i) /= 0, then Y(:,i) is set +! to zero if JOBS=='C'. The computation proceeds +! with original or modified data and warning +! flag is set with INFO=4. +!............................................................. +!............................................................. +! Parameters +! ~~~~~~~~~~ + REAL(KIND=WP), PARAMETER :: ONE = 1.0_WP + REAL(KIND=WP), PARAMETER :: ZERO = 0.0_WP + COMPLEX(KIND=WP), PARAMETER :: ZONE = ( 1.0_WP, 0.0_WP ) + COMPLEX(KIND=WP), PARAMETER :: ZZERO = ( 0.0_WP, 0.0_WP ) + +! Local scalars +! ~~~~~~~~~~~~~ + REAL(KIND=WP) :: OFL, ROOTSC, SCALE, SMALL, & + SSUM, XSCL1, XSCL2 + INTEGER :: i, j, IMINWR, INFO1, INFO2, & + LWRKEV, LWRSDD, LWRSVD, LWRSVJ, & + LWRSVQ, MLWORK, MWRKEV, MWRSDD, & + MWRSVD, MWRSVJ, MWRSVQ, NUMRNK, & + OLWORK, MLRWRK + LOGICAL :: BADXY, LQUERY, SCCOLX, SCCOLY, & + WNTEX, WNTREF, WNTRES, WNTVEC + CHARACTER :: JOBZL, T_OR_N + CHARACTER :: JSVOPT +! +! Local arrays +! ~~~~~~~~~~~~ + REAL(KIND=WP) :: RDUMMY(2) + +! External functions (BLAS and LAPACK) +! ~~~~~~~~~~~~~~~~~ + REAL(KIND=WP) ZLANGE, DLAMCH, DZNRM2 + EXTERNAL ZLANGE, DLAMCH, DZNRM2, IZAMAX + INTEGER IZAMAX + LOGICAL DISNAN, LSAME + EXTERNAL DISNAN, LSAME + +! External subroutines (BLAS and LAPACK) +! ~~~~~~~~~~~~~~~~~~~~ + EXTERNAL ZAXPY, ZGEMM, ZDSCAL + EXTERNAL ZGEEV, ZGEJSV, ZGESDD, ZGESVD, ZGESVDQ, & + ZLACPY, ZLASCL, ZLASSQ, XERBLA + +! Intrinsic functions +! ~~~~~~~~~~~~~~~~~~~ + INTRINSIC DBLE, INT, MAX, SQRT +!............................................................ +! +! Test the input arguments +! + WNTRES = LSAME(JOBR,'R') + SCCOLX = LSAME(JOBS,'S') .OR. LSAME(JOBS,'C') + SCCOLY = LSAME(JOBS,'Y') + WNTVEC = LSAME(JOBZ,'V') + WNTREF = LSAME(JOBF,'R') + WNTEX = LSAME(JOBF,'E') + INFO = 0 + LQUERY = ( ( LZWORK == -1 ) .OR. ( LIWORK == -1 ) & + .OR. ( LRWORK == -1 ) ) +! + IF ( .NOT. (SCCOLX .OR. SCCOLY .OR. & + LSAME(JOBS,'N')) ) THEN + INFO = -1 + ELSE IF ( .NOT. (WNTVEC .OR. LSAME(JOBZ,'N') & + .OR. LSAME(JOBZ,'F')) ) THEN + INFO = -2 + ELSE IF ( .NOT. (WNTRES .OR. LSAME(JOBR,'N')) .OR. & + ( WNTRES .AND. (.NOT.WNTVEC) ) ) THEN + INFO = -3 + ELSE IF ( .NOT. (WNTREF .OR. WNTEX .OR. & + LSAME(JOBF,'N') ) ) THEN + INFO = -4 + ELSE IF ( .NOT.((WHTSVD == 1) .OR. (WHTSVD == 2) .OR. & + (WHTSVD == 3) .OR. (WHTSVD == 4) )) THEN + INFO = -5 + ELSE IF ( M < 0 ) THEN + INFO = -6 + ELSE IF ( ( N < 0 ) .OR. ( N > M ) ) THEN + INFO = -7 + ELSE IF ( LDX < M ) THEN + INFO = -9 + ELSE IF ( LDY < M ) THEN + INFO = -11 + ELSE IF ( .NOT. (( NRNK == -2).OR.(NRNK == -1).OR. & + ((NRNK >= 1).AND.(NRNK <=N ))) ) THEN + INFO = -12 + ELSE IF ( ( TOL < ZERO ) .OR. ( TOL >= ONE ) ) THEN + INFO = -13 + ELSE IF ( LDZ < M ) THEN + INFO = -17 + ELSE IF ( (WNTREF .OR. WNTEX ) .AND. ( LDB < M ) ) THEN + INFO = -20 + ELSE IF ( LDW < N ) THEN + INFO = -22 + ELSE IF ( LDS < N ) THEN + INFO = -24 + END IF +! + IF ( INFO == 0 ) THEN + ! Compute the minimal and the optimal workspace + ! requirements. Simulate running the code and + ! determine minimal and optimal sizes of the + ! workspace at any moment of the run. + IF ( N == 0 ) THEN + ! Quick return. All output except K is void. + ! INFO=1 signals the void input. + ! In case of a workspace query, the default + ! minimal workspace lengths are returned. + IF ( LQUERY ) THEN + IWORK(1) = 1 + RWORK(1) = 1 + ZWORK(1) = 2 + ZWORK(2) = 2 + ELSE + K = 0 + END IF + INFO = 1 + RETURN + END IF + + IMINWR = 1 + MLRWRK = MAX(1,N) + MLWORK = 2 + OLWORK = 2 + SELECT CASE ( WHTSVD ) + CASE (1) + ! The following is specified as the minimal + ! length of WORK in the definition of ZGESVD: + ! MWRSVD = MAX(1,2*MIN(M,N)+MAX(M,N)) + MWRSVD = MAX(1,2*MIN(M,N)+MAX(M,N)) + MLWORK = MAX(MLWORK,MWRSVD) + MLRWRK = MAX(MLRWRK,N + 5*MIN(M,N)) + IF ( LQUERY ) THEN + CALL ZGESVD( 'O', 'S', M, N, X, LDX, RWORK, & + B, LDB, W, LDW, ZWORK, -1, RDUMMY, INFO1 ) + LWRSVD = INT( ZWORK(1) ) + OLWORK = MAX(OLWORK,LWRSVD) + END IF + CASE (2) + ! The following is specified as the minimal + ! length of WORK in the definition of ZGESDD: + ! MWRSDD = 2*min(M,N)*min(M,N)+2*min(M,N)+max(M,N). + ! RWORK length: 5*MIN(M,N)*MIN(M,N)+7*MIN(M,N) + ! In LAPACK 3.10.1 RWORK is defined differently. + ! Below we take max over the two versions. + ! IMINWR = 8*MIN(M,N) + MWRSDD = 2*MIN(M,N)*MIN(M,N)+2*MIN(M,N)+MAX(M,N) + MLWORK = MAX(MLWORK,MWRSDD) + IMINWR = 8*MIN(M,N) + MLRWRK = MAX( MLRWRK, N + & + MAX( 5*MIN(M,N)*MIN(M,N)+7*MIN(M,N), & + 5*MIN(M,N)*MIN(M,N)+5*MIN(M,N), & + 2*MAX(M,N)*MIN(M,N)+ & + 2*MIN(M,N)*MIN(M,N)+MIN(M,N) ) ) + IF ( LQUERY ) THEN + CALL ZGESDD( 'O', M, N, X, LDX, RWORK, B,LDB,& + W, LDW, ZWORK, -1, RDUMMY, IWORK, INFO1 ) + LWRSDD = MAX( MWRSDD,INT( ZWORK(1) )) + ! Possible bug in ZGESDD optimal workspace size. + OLWORK = MAX(OLWORK,LWRSDD) + END IF + CASE (3) + CALL ZGESVDQ( 'H', 'P', 'N', 'R', 'R', M, N, & + X, LDX, RWORK, Z, LDZ, W, LDW, NUMRNK, & + IWORK, -1, ZWORK, -1, RDUMMY, -1, INFO1 ) + IMINWR = IWORK(1) + MWRSVQ = INT(ZWORK(2)) + MLWORK = MAX(MLWORK,MWRSVQ) + MLRWRK = MAX(MLRWRK,N + INT(RDUMMY(1))) + IF ( LQUERY ) THEN + LWRSVQ = INT(ZWORK(1)) + OLWORK = MAX(OLWORK,LWRSVQ) + END IF + CASE (4) + JSVOPT = 'J' + CALL ZGEJSV( 'F', 'U', JSVOPT, 'R', 'N', 'P', M, & + N, X, LDX, RWORK, Z, LDZ, W, LDW, & + ZWORK, -1, RDUMMY, -1, IWORK, INFO1 ) + IMINWR = IWORK(1) + MWRSVJ = INT(ZWORK(2)) + MLWORK = MAX(MLWORK,MWRSVJ) + MLRWRK = MAX(MLRWRK,N + MAX(7,INT(RDUMMY(1)))) + IF ( LQUERY ) THEN + LWRSVJ = INT(ZWORK(1)) + OLWORK = MAX(OLWORK,LWRSVJ) + END IF + END SELECT + IF ( WNTVEC .OR. WNTEX .OR. LSAME(JOBZ,'F') ) THEN + JOBZL = 'V' + ELSE + JOBZL = 'N' + END IF + ! Workspace calculation to the ZGEEV call + MWRKEV = MAX( 1, 2*N ) + MLWORK = MAX(MLWORK,MWRKEV) + MLRWRK = MAX(MLRWRK,N+2*N) + IF ( LQUERY ) THEN + CALL ZGEEV( 'N', JOBZL, N, S, LDS, EIGS, & + W, LDW, W, LDW, ZWORK, -1, RWORK, INFO1 ) + LWRKEV = INT(ZWORK(1)) + OLWORK = MAX( OLWORK, LWRKEV ) + END IF +! + IF ( LIWORK < IMINWR .AND. (.NOT.LQUERY) ) INFO = -30 + IF ( LRWORK < MLRWRK .AND. (.NOT.LQUERY) ) INFO = -28 + IF ( LZWORK < MLWORK .AND. (.NOT.LQUERY) ) INFO = -26 + + END IF +! + IF( INFO /= 0 ) THEN + CALL XERBLA( 'ZGEDMD', -INFO ) + RETURN + ELSE IF ( LQUERY ) THEN +! Return minimal and optimal workspace sizes + IWORK(1) = IMINWR + RWORK(1) = MLRWRK + ZWORK(1) = MLWORK + ZWORK(2) = OLWORK + RETURN + END IF +!............................................................ +! + OFL = DLAMCH('O') + SMALL = DLAMCH('S') + BADXY = .FALSE. +! +! <1> Optional scaling of the snapshots (columns of X, Y) +! ========================================================== + IF ( SCCOLX ) THEN + ! The columns of X will be normalized. + ! To prevent overflows, the column norms of X are + ! carefully computed using ZLASSQ. + K = 0 + DO i = 1, N + !WORK(i) = DZNRM2( M, X(1,i), 1 ) + SCALE = ZERO + CALL ZLASSQ( M, X(1,i), 1, SCALE, SSUM ) + IF ( DISNAN(SCALE) .OR. DISNAN(SSUM) ) THEN + K = 0 + INFO = -8 + CALL XERBLA('ZGEDMD',-INFO) + END IF + IF ( (SCALE /= ZERO) .AND. (SSUM /= ZERO) ) THEN + ROOTSC = SQRT(SSUM) + IF ( SCALE .GE. (OFL / ROOTSC) ) THEN +! Norm of X(:,i) overflows. First, X(:,i) +! is scaled by +! ( ONE / ROOTSC ) / SCALE = 1/||X(:,i)||_2. +! Next, the norm of X(:,i) is stored without +! overflow as RWORK(i) = - SCALE * (ROOTSC/M), +! the minus sign indicating the 1/M factor. +! Scaling is performed without overflow, and +! underflow may occur in the smallest entries +! of X(:,i). The relative backward and forward +! errors are small in the ell_2 norm. + CALL ZLASCL( 'G', 0, 0, SCALE, ONE/ROOTSC, & + M, 1, X(1,i), LDX, INFO2 ) + RWORK(i) = - SCALE * ( ROOTSC / DBLE(M) ) + ELSE +! X(:,i) will be scaled to unit 2-norm + RWORK(i) = SCALE * ROOTSC + CALL ZLASCL( 'G',0, 0, RWORK(i), ONE, M, 1, & + X(1,i), LDX, INFO2 ) ! LAPACK CALL +! X(1:M,i) = (ONE/RWORK(i)) * X(1:M,i) ! INTRINSIC + END IF + ELSE + RWORK(i) = ZERO + K = K + 1 + END IF + END DO + IF ( K == N ) THEN + ! All columns of X are zero. Return error code -8. + ! (the 8th input variable had an illegal value) + K = 0 + INFO = -8 + CALL XERBLA('ZGEDMD',-INFO) + RETURN + END IF + DO i = 1, N +! Now, apply the same scaling to the columns of Y. + IF ( RWORK(i) > ZERO ) THEN + CALL ZDSCAL( M, ONE/RWORK(i), Y(1,i), 1 ) ! BLAS CALL +! Y(1:M,i) = (ONE/RWORK(i)) * Y(1:M,i) ! INTRINSIC + ELSE IF ( RWORK(i) < ZERO ) THEN + CALL ZLASCL( 'G', 0, 0, -RWORK(i), & + ONE/DBLE(M), M, 1, Y(1,i), LDY, INFO2 ) ! LAPACK CALL + ELSE IF ( ABS(Y(IZAMAX(M, Y(1,i),1),i )) & + /= ZERO ) THEN +! X(:,i) is zero vector. For consistency, +! Y(:,i) should also be zero. If Y(:,i) is not +! zero, then the data might be inconsistent or +! corrupted. If JOBS == 'C', Y(:,i) is set to +! zero and a warning flag is raised. +! The computation continues but the +! situation will be reported in the output. + BADXY = .TRUE. + IF ( LSAME(JOBS,'C')) & + CALL ZDSCAL( M, ZERO, Y(1,i), 1 ) ! BLAS CALL + END IF + END DO + END IF + ! + IF ( SCCOLY ) THEN + ! The columns of Y will be normalized. + ! To prevent overflows, the column norms of Y are + ! carefully computed using ZLASSQ. + DO i = 1, N + !RWORK(i) = DZNRM2( M, Y(1,i), 1 ) + SCALE = ZERO + CALL ZLASSQ( M, Y(1,i), 1, SCALE, SSUM ) + IF ( DISNAN(SCALE) .OR. DISNAN(SSUM) ) THEN + K = 0 + INFO = -10 + CALL XERBLA('ZGEDMD',-INFO) + END IF + IF ( SCALE /= ZERO .AND. (SSUM /= ZERO) ) THEN + ROOTSC = SQRT(SSUM) + IF ( SCALE .GE. (OFL / ROOTSC) ) THEN +! Norm of Y(:,i) overflows. First, Y(:,i) +! is scaled by +! ( ONE / ROOTSC ) / SCALE = 1/||Y(:,i)||_2. +! Next, the norm of Y(:,i) is stored without +! overflow as RWORK(i) = - SCALE * (ROOTSC/M), +! the minus sign indicating the 1/M factor. +! Scaling is performed without overflow, and +! underflow may occur in the smallest entries +! of Y(:,i). The relative backward and forward +! errors are small in the ell_2 norm. + CALL ZLASCL( 'G', 0, 0, SCALE, ONE/ROOTSC, & + M, 1, Y(1,i), LDY, INFO2 ) + RWORK(i) = - SCALE * ( ROOTSC / DBLE(M) ) + ELSE +! Y(:,i) will be scaled to unit 2-norm + RWORK(i) = SCALE * ROOTSC + CALL ZLASCL( 'G',0, 0, RWORK(i), ONE, M, 1, & + Y(1,i), LDY, INFO2 ) ! LAPACK CALL +! Y(1:M,i) = (ONE/RWORK(i)) * Y(1:M,i) ! INTRINSIC + END IF + ELSE + RWORK(i) = ZERO + END IF + END DO + DO i = 1, N +! Now, apply the same scaling to the columns of X. + IF ( RWORK(i) > ZERO ) THEN + CALL ZDSCAL( M, ONE/RWORK(i), X(1,i), 1 ) ! BLAS CALL +! X(1:M,i) = (ONE/RWORK(i)) * X(1:M,i) ! INTRINSIC + ELSE IF ( RWORK(i) < ZERO ) THEN + CALL ZLASCL( 'G', 0, 0, -RWORK(i), & + ONE/DBLE(M), M, 1, X(1,i), LDX, INFO2 ) ! LAPACK CALL + ELSE IF ( ABS(X(IZAMAX(M, X(1,i),1),i )) & + /= ZERO ) THEN +! Y(:,i) is zero vector. If X(:,i) is not +! zero, then a warning flag is raised. +! The computation continues but the +! situation will be reported in the output. + BADXY = .TRUE. + END IF + END DO + END IF +! +! <2> SVD of the data snapshot matrix X. +! ===================================== +! The left singular vectors are stored in the array X. +! The right singular vectors are in the array W. +! The array W will later on contain the eigenvectors +! of a Rayleigh quotient. + NUMRNK = N + SELECT CASE ( WHTSVD ) + CASE (1) + CALL ZGESVD( 'O', 'S', M, N, X, LDX, RWORK, B, & + LDB, W, LDW, ZWORK, LZWORK, RWORK(N+1), INFO1 ) ! LAPACK CALL + T_OR_N = 'C' + CASE (2) + CALL ZGESDD( 'O', M, N, X, LDX, RWORK, B, LDB, W, & + LDW, ZWORK, LZWORK, RWORK(N+1), IWORK, INFO1 ) ! LAPACK CALL + T_OR_N = 'C' + CASE (3) + CALL ZGESVDQ( 'H', 'P', 'N', 'R', 'R', M, N, & + X, LDX, RWORK, Z, LDZ, W, LDW, & + NUMRNK, IWORK, LIWORK, ZWORK, & + LZWORK, RWORK(N+1), LRWORK-N, INFO1) ! LAPACK CALL + CALL ZLACPY( 'A', M, NUMRNK, Z, LDZ, X, LDX ) ! LAPACK CALL + T_OR_N = 'C' + CASE (4) + CALL ZGEJSV( 'F', 'U', JSVOPT, 'R', 'N', 'P', M, & + N, X, LDX, RWORK, Z, LDZ, W, LDW, & + ZWORK, LZWORK, RWORK(N+1), LRWORK-N, IWORK, INFO1 ) ! LAPACK CALL + CALL ZLACPY( 'A', M, N, Z, LDZ, X, LDX ) ! LAPACK CALL + T_OR_N = 'N' + XSCL1 = RWORK(N+1) + XSCL2 = RWORK(N+2) + IF ( XSCL1 /= XSCL2 ) THEN + ! This is an exceptional situation. If the + ! data matrices are not scaled and the + ! largest singular value of X overflows. + ! In that case ZGEJSV can return the SVD + ! in scaled form. The scaling factor can be used + ! to rescale the data (X and Y). + CALL ZLASCL( 'G', 0, 0, XSCL1, XSCL2, M, N, Y, LDY, INFO2 ) + END IF + END SELECT +! + IF ( INFO1 > 0 ) THEN + ! The SVD selected subroutine did not converge. + ! Return with an error code. + INFO = 2 + RETURN + END IF +! + IF ( RWORK(1) == ZERO ) THEN + ! The largest computed singular value of (scaled) + ! X is zero. Return error code -8 + ! (the 8th input variable had an illegal value). + K = 0 + INFO = -8 + CALL XERBLA('ZGEDMD',-INFO) + RETURN + END IF +! + !<3> Determine the numerical rank of the data + ! snapshots matrix X. This depends on the + ! parameters NRNK and TOL. + + SELECT CASE ( NRNK ) + CASE ( -1 ) + K = 1 + DO i = 2, NUMRNK + IF ( ( RWORK(i) <= RWORK(1)*TOL ) .OR. & + ( RWORK(i) <= SMALL ) ) EXIT + K = K + 1 + END DO + CASE ( -2 ) + K = 1 + DO i = 1, NUMRNK-1 + IF ( ( RWORK(i+1) <= RWORK(i)*TOL ) .OR. & + ( RWORK(i) <= SMALL ) ) EXIT + K = K + 1 + END DO + CASE DEFAULT + K = 1 + DO i = 2, NRNK + IF ( RWORK(i) <= SMALL ) EXIT + K = K + 1 + END DO + END SELECT + ! Now, U = X(1:M,1:K) is the SVD/POD basis for the + ! snapshot data in the input matrix X. + + !<4> Compute the Rayleigh quotient S = U^H * A * U. + ! Depending on the requested outputs, the computation + ! is organized to compute additional auxiliary + ! matrices (for the residuals and refinements). + ! + ! In all formulas below, we need V_k*Sigma_k^(-1) + ! where either V_k is in W(1:N,1:K), or V_k^H is in + ! W(1:K,1:N). Here Sigma_k=diag(WORK(1:K)). + IF ( LSAME(T_OR_N, 'N') ) THEN + DO i = 1, K + CALL ZDSCAL( N, ONE/RWORK(i), W(1,i), 1 ) ! BLAS CALL + ! W(1:N,i) = (ONE/RWORK(i)) * W(1:N,i) ! INTRINSIC + END DO + ELSE + ! This non-unit stride access is due to the fact + ! that ZGESVD, ZGESVDQ and ZGESDD return the + ! adjoint matrix of the right singular vectors. + !DO i = 1, K + ! CALL ZDSCAL( N, ONE/RWORK(i), W(i,1), LDW ) ! BLAS CALL + ! ! W(i,1:N) = (ONE/RWORK(i)) * W(i,1:N) ! INTRINSIC + !END DO + DO i = 1, K + RWORK(N+i) = ONE/RWORK(i) + END DO + DO j = 1, N + DO i = 1, K + W(i,j) = CMPLX(RWORK(N+i),ZERO,KIND=WP)*W(i,j) + END DO + END DO + END IF +! + IF ( WNTREF ) THEN + ! + ! Need A*U(:,1:K)=Y*V_k*inv(diag(WORK(1:K))) + ! for computing the refined Ritz vectors + ! (optionally, outside ZGEDMD). + CALL ZGEMM( 'N', T_OR_N, M, K, N, ZONE, Y, LDY, W, & + LDW, ZZERO, Z, LDZ ) ! BLAS CALL + ! Z(1:M,1:K)=MATMUL(Y(1:M,1:N),TRANSPOSE(CONJG(W(1:K,1:N)))) ! INTRINSIC, for T_OR_N=='C' + ! Z(1:M,1:K)=MATMUL(Y(1:M,1:N),W(1:N,1:K)) ! INTRINSIC, for T_OR_N=='N' + ! + ! At this point Z contains + ! A * U(:,1:K) = Y * V_k * Sigma_k^(-1), and + ! this is needed for computing the residuals. + ! This matrix is returned in the array B and + ! it can be used to compute refined Ritz vectors. + CALL ZLACPY( 'A', M, K, Z, LDZ, B, LDB ) ! BLAS CALL + ! B(1:M,1:K) = Z(1:M,1:K) ! INTRINSIC + + CALL ZGEMM( 'C', 'N', K, K, M, ZONE, X, LDX, Z, & + LDZ, ZZERO, S, LDS ) ! BLAS CALL + ! S(1:K,1:K) = MATMUL(TRANSPOSE(CONJG(X(1:M,1:K))),Z(1:M,1:K)) ! INTRINSIC + ! At this point S = U^H * A * U is the Rayleigh quotient. + ELSE + ! A * U(:,1:K) is not explicitly needed and the + ! computation is organized differently. The Rayleigh + ! quotient is computed more efficiently. + CALL ZGEMM( 'C', 'N', K, N, M, ZONE, X, LDX, Y, LDY, & + ZZERO, Z, LDZ ) ! BLAS CALL + ! Z(1:K,1:N) = MATMUL( TRANSPOSE(CONJG(X(1:M,1:K))), Y(1:M,1:N) ) ! INTRINSIC + ! + CALL ZGEMM( 'N', T_OR_N, K, K, N, ZONE, Z, LDZ, W, & + LDW, ZZERO, S, LDS ) ! BLAS CALL + ! S(1:K,1:K) = MATMUL(Z(1:K,1:N),TRANSPOSE(CONJG(W(1:K,1:N)))) ! INTRINSIC, for T_OR_N=='T' + ! S(1:K,1:K) = MATMUL(Z(1:K,1:N),(W(1:N,1:K))) ! INTRINSIC, for T_OR_N=='N' + ! At this point S = U^H * A * U is the Rayleigh quotient. + ! If the residuals are requested, save scaled V_k into Z. + ! Recall that V_k or V_k^H is stored in W. + IF ( WNTRES .OR. WNTEX ) THEN + IF ( LSAME(T_OR_N, 'N') ) THEN + CALL ZLACPY( 'A', N, K, W, LDW, Z, LDZ ) + ELSE + CALL ZLACPY( 'A', K, N, W, LDW, Z, LDZ ) + END IF + END IF + END IF +! + !<5> Compute the Ritz values and (if requested) the + ! right eigenvectors of the Rayleigh quotient. + ! + CALL ZGEEV( 'N', JOBZL, K, S, LDS, EIGS, W, LDW, & + W, LDW, ZWORK, LZWORK, RWORK(N+1), INFO1 ) ! LAPACK CALL + ! + ! W(1:K,1:K) contains the eigenvectors of the Rayleigh + ! quotient. See the description of Z. + ! Also, see the description of ZGEEV. + IF ( INFO1 > 0 ) THEN + ! ZGEEV failed to compute the eigenvalues and + ! eigenvectors of the Rayleigh quotient. + INFO = 3 + RETURN + END IF +! + ! <6> Compute the eigenvectors (if requested) and, + ! the residuals (if requested). + ! + IF ( WNTVEC .OR. WNTEX ) THEN + IF ( WNTRES ) THEN + IF ( WNTREF ) THEN + ! Here, if the refinement is requested, we have + ! A*U(:,1:K) already computed and stored in Z. + ! For the residuals, need Y = A * U(:,1;K) * W. + CALL ZGEMM( 'N', 'N', M, K, K, ZONE, Z, LDZ, W, & + LDW, ZZERO, Y, LDY ) ! BLAS CALL + ! Y(1:M,1:K) = Z(1:M,1:K) * W(1:K,1:K) ! INTRINSIC + ! This frees Z; Y contains A * U(:,1:K) * W. + ELSE + ! Compute S = V_k * Sigma_k^(-1) * W, where + ! V_k * Sigma_k^(-1) (or its adjoint) is stored in Z + CALL ZGEMM( T_OR_N, 'N', N, K, K, ZONE, Z, LDZ, & + W, LDW, ZZERO, S, LDS ) + ! Then, compute Z = Y * S = + ! = Y * V_k * Sigma_k^(-1) * W(1:K,1:K) = + ! = A * U(:,1:K) * W(1:K,1:K) + CALL ZGEMM( 'N', 'N', M, K, N, ZONE, Y, LDY, S, & + LDS, ZZERO, Z, LDZ ) + ! Save a copy of Z into Y and free Z for holding + ! the Ritz vectors. + CALL ZLACPY( 'A', M, K, Z, LDZ, Y, LDY ) + IF ( WNTEX ) CALL ZLACPY( 'A', M, K, Z, LDZ, B, LDB ) + END IF + ELSE IF ( WNTEX ) THEN + ! Compute S = V_k * Sigma_k^(-1) * W, where + ! V_k * Sigma_k^(-1) is stored in Z + CALL ZGEMM( T_OR_N, 'N', N, K, K, ZONE, Z, LDZ, & + W, LDW, ZZERO, S, LDS ) + ! Then, compute Z = Y * S = + ! = Y * V_k * Sigma_k^(-1) * W(1:K,1:K) = + ! = A * U(:,1:K) * W(1:K,1:K) + CALL ZGEMM( 'N', 'N', M, K, N, ZONE, Y, LDY, S, & + LDS, ZZERO, B, LDB ) + ! The above call replaces the following two calls + ! that were used in the developing-testing phase. + ! CALL ZGEMM( 'N', 'N', M, K, N, ZONE, Y, LDY, S, & + ! LDS, ZZERO, Z, LDZ) + ! Save a copy of Z into B and free Z for holding + ! the Ritz vectors. + ! CALL ZLACPY( 'A', M, K, Z, LDZ, B, LDB ) + END IF +! + ! Compute the Ritz vectors + IF ( WNTVEC ) CALL ZGEMM( 'N', 'N', M, K, K, ZONE, X, LDX, W, LDW, & + ZZERO, Z, LDZ ) ! BLAS CALL + ! Z(1:M,1:K) = MATMUL(X(1:M,1:K), W(1:K,1:K)) ! INTRINSIC +! + IF ( WNTRES ) THEN + DO i = 1, K + CALL ZAXPY( M, -EIGS(i), Z(1,i), 1, Y(1,i), 1 ) ! BLAS CALL + ! Y(1:M,i) = Y(1:M,i) - EIGS(i) * Z(1:M,i) ! INTRINSIC + RES(i) = DZNRM2( M, Y(1,i), 1 ) ! BLAS CALL + END DO + END IF + END IF +! + IF ( WHTSVD == 4 ) THEN + RWORK(N+1) = XSCL1 + RWORK(N+2) = XSCL2 + END IF +! +! Successful exit. + IF ( .NOT. BADXY ) THEN + INFO = 0 + ELSE + ! A warning on possible data inconsistency. + ! This should be a rare event. + INFO = 4 + END IF +!............................................................ + RETURN +! ...... + END SUBROUTINE ZGEDMD + diff --git a/lapack-netlib/SRC/zgedmdq.f90 b/lapack-netlib/SRC/zgedmdq.f90 new file mode 100644 index 000000000..51be72a32 --- /dev/null +++ b/lapack-netlib/SRC/zgedmdq.f90 @@ -0,0 +1,689 @@ +SUBROUTINE ZGEDMDQ( JOBS, JOBZ, JOBR, JOBQ, JOBT, JOBF, & + WHTSVD, M, N, F, LDF, X, LDX, Y, & + LDY, NRNK, TOL, K, EIGS, & + Z, LDZ, RES, B, LDB, V, LDV, & + S, LDS, ZWORK, LZWORK, WORK, LWORK, & + IWORK, LIWORK, INFO ) +! March 2023 +!..... + USE iso_fortran_env + IMPLICIT NONE + INTEGER, PARAMETER :: WP = real64 +!..... +! Scalar arguments + CHARACTER, INTENT(IN) :: JOBS, JOBZ, JOBR, JOBQ, & + JOBT, JOBF + INTEGER, INTENT(IN) :: WHTSVD, M, N, LDF, LDX, & + LDY, NRNK, LDZ, LDB, LDV, & + LDS, LZWORK, LWORK, LIWORK + INTEGER, INTENT(OUT) :: INFO, K + REAL(KIND=WP), INTENT(IN) :: TOL +! Array arguments + COMPLEX(KIND=WP), INTENT(INOUT) :: F(LDF,*) + COMPLEX(KIND=WP), INTENT(OUT) :: X(LDX,*), Y(LDY,*), & + Z(LDZ,*), B(LDB,*), & + V(LDV,*), S(LDS,*) + COMPLEX(KIND=WP), INTENT(OUT) :: EIGS(*) + COMPLEX(KIND=WP), INTENT(OUT) :: ZWORK(*) + REAL(KIND=WP), INTENT(OUT) :: RES(*) + REAL(KIND=WP), INTENT(OUT) :: WORK(*) + INTEGER, INTENT(OUT) :: IWORK(*) +!..... +! Purpose +! ======= +! ZGEDMDQ computes the Dynamic Mode Decomposition (DMD) for +! a pair of data snapshot matrices, using a QR factorization +! based compression of the data. For the input matrices +! X and Y such that Y = A*X with an unaccessible matrix +! A, ZGEDMDQ computes a certain number of Ritz pairs of A using +! the standard Rayleigh-Ritz extraction from a subspace of +! range(X) that is determined using the leading left singular +! vectors of X. Optionally, ZGEDMDQ returns the residuals +! of the computed Ritz pairs, the information needed for +! a refinement of the Ritz vectors, or the eigenvectors of +! the Exact DMD. +! For further details see the references listed +! below. For more details of the implementation see [3]. +! +! References +! ========== +! [1] P. Schmid: Dynamic mode decomposition of numerical +! and experimental data, +! Journal of Fluid Mechanics 656, 5-28, 2010. +! [2] Z. Drmac, I. Mezic, R. Mohr: Data driven modal +! decompositions: analysis and enhancements, +! SIAM J. on Sci. Comp. 40 (4), A2253-A2285, 2018. +! [3] Z. Drmac: A LAPACK implementation of the Dynamic +! Mode Decomposition I. Technical report. AIMDyn Inc. +! and LAPACK Working Note 298. +! [4] J. Tu, C. W. Rowley, D. M. Luchtenburg, S. L. +! Brunton, N. Kutz: On Dynamic Mode Decomposition: +! Theory and Applications, Journal of Computational +! Dynamics 1(2), 391 -421, 2014. +! +! Developed and supported by: +! =========================== +! Developed and coded by Zlatko Drmac, Faculty of Science, +! University of Zagreb; drmac@math.hr +! In cooperation with +! AIMdyn Inc., Santa Barbara, CA. +! and supported by +! - DARPA SBIR project "Koopman Operator-Based Forecasting +! for Nonstationary Processes from Near-Term, Limited +! Observational Data" Contract No: W31P4Q-21-C-0007 +! - DARPA PAI project "Physics-Informed Machine Learning +! Methodologies" Contract No: HR0011-18-9-0033 +! - DARPA MoDyL project "A Data-Driven, Operator-Theoretic +! Framework for Space-Time Analysis of Process Dynamics" +! Contract No: HR0011-16-C-0116 +! Any opinions, findings and conclusions or recommendations +! expressed in this material are those of the author and +! do not necessarily reflect the views of the DARPA SBIR +! Program Office. +!============================================================ +! Distribution Statement A: +! Approved for Public Release, Distribution Unlimited. +! Cleared by DARPA on September 29, 2022 +!============================================================ +!...................................................................... +! Arguments +! ========= +! JOBS (input) CHARACTER*1 +! Determines whether the initial data snapshots are scaled +! by a diagonal matrix. The data snapshots are the columns +! of F. The leading N-1 columns of F are denoted X and the +! trailing N-1 columns are denoted Y. +! 'S' :: The data snapshots matrices X and Y are multiplied +! with a diagonal matrix D so that X*D has unit +! nonzero columns (in the Euclidean 2-norm) +! 'C' :: The snapshots are scaled as with the 'S' option. +! If it is found that an i-th column of X is zero +! vector and the corresponding i-th column of Y is +! non-zero, then the i-th column of Y is set to +! zero and a warning flag is raised. +! 'Y' :: The data snapshots matrices X and Y are multiplied +! by a diagonal matrix D so that Y*D has unit +! nonzero columns (in the Euclidean 2-norm) +! 'N' :: No data scaling. +!..... +! JOBZ (input) CHARACTER*1 +! Determines whether the eigenvectors (Koopman modes) will +! be computed. +! 'V' :: The eigenvectors (Koopman modes) will be computed +! and returned in the matrix Z. +! See the description of Z. +! 'F' :: The eigenvectors (Koopman modes) will be returned +! in factored form as the product Z*V, where Z +! is orthonormal and V contains the eigenvectors +! of the corresponding Rayleigh quotient. +! See the descriptions of F, V, Z. +! 'Q' :: The eigenvectors (Koopman modes) will be returned +! in factored form as the product Q*Z, where Z +! contains the eigenvectors of the compression of the +! underlying discretized operator onto the span of +! the data snapshots. See the descriptions of F, V, Z. +! Q is from the initial QR factorization. +! 'N' :: The eigenvectors are not computed. +!..... +! JOBR (input) CHARACTER*1 +! Determines whether to compute the residuals. +! 'R' :: The residuals for the computed eigenpairs will +! be computed and stored in the array RES. +! See the description of RES. +! For this option to be legal, JOBZ must be 'V'. +! 'N' :: The residuals are not computed. +!..... +! JOBQ (input) CHARACTER*1 +! Specifies whether to explicitly compute and return the +! unitary matrix from the QR factorization. +! 'Q' :: The matrix Q of the QR factorization of the data +! snapshot matrix is computed and stored in the +! array F. See the description of F. +! 'N' :: The matrix Q is not explicitly computed. +!..... +! JOBT (input) CHARACTER*1 +! Specifies whether to return the upper triangular factor +! from the QR factorization. +! 'R' :: The matrix R of the QR factorization of the data +! snapshot matrix F is returned in the array Y. +! See the description of Y and Further details. +! 'N' :: The matrix R is not returned. +!..... +! JOBF (input) CHARACTER*1 +! Specifies whether to store information needed for post- +! processing (e.g. computing refined Ritz vectors) +! 'R' :: The matrix needed for the refinement of the Ritz +! vectors is computed and stored in the array B. +! See the description of B. +! 'E' :: The unscaled eigenvectors of the Exact DMD are +! computed and returned in the array B. See the +! description of B. +! 'N' :: No eigenvector refinement data is computed. +! To be useful on exit, this option needs JOBQ='Q'. +!..... +! WHTSVD (input) INTEGER, WHSTVD in { 1, 2, 3, 4 } +! Allows for a selection of the SVD algorithm from the +! LAPACK library. +! 1 :: ZGESVD (the QR SVD algorithm) +! 2 :: ZGESDD (the Divide and Conquer algorithm; if enough +! workspace available, this is the fastest option) +! 3 :: ZGESVDQ (the preconditioned QR SVD ; this and 4 +! are the most accurate options) +! 4 :: ZGEJSV (the preconditioned Jacobi SVD; this and 3 +! are the most accurate options) +! For the four methods above, a significant difference in +! the accuracy of small singular values is possible if +! the snapshots vary in norm so that X is severely +! ill-conditioned. If small (smaller than EPS*||X||) +! singular values are of interest and JOBS=='N', then +! the options (3, 4) give the most accurate results, where +! the option 4 is slightly better and with stronger +! theoretical background. +! If JOBS=='S', i.e. the columns of X will be normalized, +! then all methods give nearly equally accurate results. +!..... +! M (input) INTEGER, M >= 0 +! The state space dimension (the number of rows of F). +!..... +! N (input) INTEGER, 0 <= N <= M +! The number of data snapshots from a single trajectory, +! taken at equidistant discrete times. This is the +! number of columns of F. +!..... +! F (input/output) COMPLEX(KIND=WP) M-by-N array +! > On entry, +! the columns of F are the sequence of data snapshots +! from a single trajectory, taken at equidistant discrete +! times. It is assumed that the column norms of F are +! in the range of the normalized floating point numbers. +! < On exit, +! If JOBQ == 'Q', the array F contains the orthogonal +! matrix/factor of the QR factorization of the initial +! data snapshots matrix F. See the description of JOBQ. +! If JOBQ == 'N', the entries in F strictly below the main +! diagonal contain, column-wise, the information on the +! Householder vectors, as returned by ZGEQRF. The +! remaining information to restore the orthogonal matrix +! of the initial QR factorization is stored in ZWORK(1:MIN(M,N)). +! See the description of ZWORK. +!..... +! LDF (input) INTEGER, LDF >= M +! The leading dimension of the array F. +!..... +! X (workspace/output) COMPLEX(KIND=WP) MIN(M,N)-by-(N-1) array +! X is used as workspace to hold representations of the +! leading N-1 snapshots in the orthonormal basis computed +! in the QR factorization of F. +! On exit, the leading K columns of X contain the leading +! K left singular vectors of the above described content +! of X. To lift them to the space of the left singular +! vectors U(:,1:K) of the input data, pre-multiply with the +! Q factor from the initial QR factorization. +! See the descriptions of F, K, V and Z. +!..... +! LDX (input) INTEGER, LDX >= N +! The leading dimension of the array X. +!..... +! Y (workspace/output) COMPLEX(KIND=WP) MIN(M,N)-by-(N) array +! Y is used as workspace to hold representations of the +! trailing N-1 snapshots in the orthonormal basis computed +! in the QR factorization of F. +! On exit, +! If JOBT == 'R', Y contains the MIN(M,N)-by-N upper +! triangular factor from the QR factorization of the data +! snapshot matrix F. +!..... +! LDY (input) INTEGER , LDY >= N +! The leading dimension of the array Y. +!..... +! NRNK (input) INTEGER +! Determines the mode how to compute the numerical rank, +! i.e. how to truncate small singular values of the input +! matrix X. On input, if +! NRNK = -1 :: i-th singular value sigma(i) is truncated +! if sigma(i) <= TOL*sigma(1) +! This option is recommended. +! NRNK = -2 :: i-th singular value sigma(i) is truncated +! if sigma(i) <= TOL*sigma(i-1) +! This option is included for R&D purposes. +! It requires highly accurate SVD, which +! may not be feasible. +! The numerical rank can be enforced by using positive +! value of NRNK as follows: +! 0 < NRNK <= N-1 :: at most NRNK largest singular values +! will be used. If the number of the computed nonzero +! singular values is less than NRNK, then only those +! nonzero values will be used and the actually used +! dimension is less than NRNK. The actual number of +! the nonzero singular values is returned in the variable +! K. See the description of K. +!..... +! TOL (input) REAL(KIND=WP), 0 <= TOL < 1 +! The tolerance for truncating small singular values. +! See the description of NRNK. +!..... +! K (output) INTEGER, 0 <= K <= N +! The dimension of the SVD/POD basis for the leading N-1 +! data snapshots (columns of F) and the number of the +! computed Ritz pairs. The value of K is determined +! according to the rule set by the parameters NRNK and +! TOL. See the descriptions of NRNK and TOL. +!..... +! EIGS (output) COMPLEX(KIND=WP) (N-1)-by-1 array +! The leading K (K<=N-1) entries of EIGS contain +! the computed eigenvalues (Ritz values). +! See the descriptions of K, and Z. +!..... +! Z (workspace/output) COMPLEX(KIND=WP) M-by-(N-1) array +! If JOBZ =='V' then Z contains the Ritz vectors. Z(:,i) +! is an eigenvector of the i-th Ritz value; ||Z(:,i)||_2=1. +! If JOBZ == 'F', then the Z(:,i)'s are given implicitly as +! Z*V, where Z contains orthonormal matrix (the product of +! Q from the initial QR factorization and the SVD/POD_basis +! returned by ZGEDMD in X) and the second factor (the +! eigenvectors of the Rayleigh quotient) is in the array V, +! as returned by ZGEDMD. That is, X(:,1:K)*V(:,i) +! is an eigenvector corresponding to EIGS(i). The columns +! of V(1:K,1:K) are the computed eigenvectors of the +! K-by-K Rayleigh quotient. +! See the descriptions of EIGS, X and V. +!..... +! LDZ (input) INTEGER , LDZ >= M +! The leading dimension of the array Z. +!..... +! RES (output) REAL(KIND=WP) (N-1)-by-1 array +! RES(1:K) contains the residuals for the K computed +! Ritz pairs, +! RES(i) = || A * Z(:,i) - EIGS(i)*Z(:,i))||_2. +! See the description of EIGS and Z. +!..... +! B (output) COMPLEX(KIND=WP) MIN(M,N)-by-(N-1) array. +! IF JOBF =='R', B(1:N,1:K) contains A*U(:,1:K), and can +! be used for computing the refined vectors; see further +! details in the provided references. +! If JOBF == 'E', B(1:N,1;K) contains +! A*U(:,1:K)*W(1:K,1:K), which are the vectors from the +! Exact DMD, up to scaling by the inverse eigenvalues. +! In both cases, the content of B can be lifted to the +! original dimension of the input data by pre-multiplying +! with the Q factor from the initial QR factorization. +! Here A denotes a compression of the underlying operator. +! See the descriptions of F and X. +! If JOBF =='N', then B is not referenced. +!..... +! LDB (input) INTEGER, LDB >= MIN(M,N) +! The leading dimension of the array B. +!..... +! V (workspace/output) COMPLEX(KIND=WP) (N-1)-by-(N-1) array +! On exit, V(1:K,1:K) V contains the K eigenvectors of +! the Rayleigh quotient. The Ritz vectors +! (returned in Z) are the product of Q from the initial QR +! factorization (see the description of F) X (see the +! description of X) and V. +!..... +! LDV (input) INTEGER, LDV >= N-1 +! The leading dimension of the array V. +!..... +! S (output) COMPLEX(KIND=WP) (N-1)-by-(N-1) array +! The array S(1:K,1:K) is used for the matrix Rayleigh +! quotient. This content is overwritten during +! the eigenvalue decomposition by ZGEEV. +! See the description of K. +!..... +! LDS (input) INTEGER, LDS >= N-1 +! The leading dimension of the array S. +!..... +! ZWORK (workspace/output) COMPLEX(KIND=WP) LWORK-by-1 array +! On exit, +! ZWORK(1:MIN(M,N)) contains the scalar factors of the +! elementary reflectors as returned by ZGEQRF of the +! M-by-N input matrix F. +! If the call to ZGEDMDQ is only workspace query, then +! ZWORK(1) contains the minimal complex workspace length and +! ZWORK(2) is the optimal complex workspace length. +! Hence, the length of work is at least 2. +! See the description of LZWORK. +!..... +! LZWORK (input) INTEGER +! The minimal length of the workspace vector ZWORK. +! LZWORK is calculated as follows: +! Let MLWQR = N (minimal workspace for ZGEQRF[M,N]) +! MLWDMD = minimal workspace for ZGEDMD (see the +! description of LWORK in ZGEDMD) +! MLWMQR = N (minimal workspace for +! ZUNMQR['L','N',M,N,N]) +! MLWGQR = N (minimal workspace for ZUNGQR[M,N,N]) +! MINMN = MIN(M,N) +! Then +! LZWORK = MAX(2, MIN(M,N)+MLWQR, MINMN+MLWDMD) +! is further updated as follows: +! if JOBZ == 'V' or JOBZ == 'F' THEN +! LZWORK = MAX(LZWORK, MINMN+MLWMQR) +! if JOBQ == 'Q' THEN +! LZWORK = MAX(ZLWORK, MINMN+MLWGQR) +! +!..... +! WORK (workspace/output) REAL(KIND=WP) LWORK-by-1 array +! On exit, +! WORK(1:N-1) contains the singular values of +! the input submatrix F(1:M,1:N-1). +! If the call to ZGEDMDQ is only workspace query, then +! WORK(1) contains the minimal workspace length and +! WORK(2) is the optimal workspace length. hence, the +! length of work is at least 2. +! See the description of LWORK. +!..... +! LWORK (input) INTEGER +! The minimal length of the workspace vector WORK. +! LWORK is the same as in ZGEDMD, because in ZGEDMDQ +! only ZGEDMD requires real workspace for snapshots +! of dimensions MIN(M,N)-by-(N-1). +! If on entry LWORK = -1, then a workspace query is +! assumed and the procedure only computes the minimal +! and the optimal workspace length for WORK. +!..... +! IWORK (workspace/output) INTEGER LIWORK-by-1 array +! Workspace that is required only if WHTSVD equals +! 2 , 3 or 4. (See the description of WHTSVD). +! If on entry LWORK =-1 or LIWORK=-1, then the +! minimal length of IWORK is computed and returned in +! IWORK(1). See the description of LIWORK. +!..... +! LIWORK (input) INTEGER +! The minimal length of the workspace vector IWORK. +! If WHTSVD == 1, then only IWORK(1) is used; LIWORK >=1 +! Let M1=MIN(M,N), N1=N-1. Then +! If WHTSVD == 2, then LIWORK >= MAX(1,8*MIN(M1,N1)) +! If WHTSVD == 3, then LIWORK >= MAX(1,M1+N1-1) +! If WHTSVD == 4, then LIWORK >= MAX(3,M1+3*N1) +! If on entry LIWORK = -1, then a workspace query is +! assumed and the procedure only computes the minimal +! and the optimal workspace lengths for both WORK and +! IWORK. See the descriptions of WORK and IWORK. +!..... +! INFO (output) INTEGER +! -i < 0 :: On entry, the i-th argument had an +! illegal value +! = 0 :: Successful return. +! = 1 :: Void input. Quick exit (M=0 or N=0). +! = 2 :: The SVD computation of X did not converge. +! Suggestion: Check the input data and/or +! repeat with different WHTSVD. +! = 3 :: The computation of the eigenvalues did not +! converge. +! = 4 :: If data scaling was requested on input and +! the procedure found inconsistency in the data +! such that for some column index i, +! X(:,i) = 0 but Y(:,i) /= 0, then Y(:,i) is set +! to zero if JOBS=='C'. The computation proceeds +! with original or modified data and warning +! flag is set with INFO=4. +!............................................................. +!............................................................. +! Parameters +! ~~~~~~~~~~ + REAL(KIND=WP), PARAMETER :: ONE = 1.0_WP + REAL(KIND=WP), PARAMETER :: ZERO = 0.0_WP +! COMPLEX(KIND=WP), PARAMETER :: ZONE = ( 1.0_WP, 0.0_WP ) + COMPLEX(KIND=WP), PARAMETER :: ZZERO = ( 0.0_WP, 0.0_WP ) +! +! Local scalars +! ~~~~~~~~~~~~~ + INTEGER :: IMINWR, INFO1, MINMN, MLRWRK, & + MLWDMD, MLWGQR, MLWMQR, MLWORK, & + MLWQR, OLWDMD, OLWGQR, OLWMQR, & + OLWORK, OLWQR + LOGICAL :: LQUERY, SCCOLX, SCCOLY, WANTQ, & + WNTTRF, WNTRES, WNTVEC, WNTVCF, & + WNTVCQ, WNTREF, WNTEX + CHARACTER(LEN=1) :: JOBVL +! +! External functions (BLAS and LAPACK) +! ~~~~~~~~~~~~~~~~~ + LOGICAL LSAME + EXTERNAL LSAME +! +! External subroutines (BLAS and LAPACK) +! ~~~~~~~~~~~~~~~~~~~~ + EXTERNAL ZGEQRF, ZLACPY, ZLASET, ZUNGQR, & + ZUNMQR, XERBLA + +! External subroutines +! ~~~~~~~~~~~~~~~~~~~~ + EXTERNAL ZGEDMD + +! Intrinsic functions +! ~~~~~~~~~~~~~~~~~~~ + INTRINSIC MAX, MIN, INT + !.......................................................... + ! + ! Test the input arguments + WNTRES = LSAME(JOBR,'R') + SCCOLX = LSAME(JOBS,'S') .OR. LSAME( JOBS, 'C' ) + SCCOLY = LSAME(JOBS,'Y') + WNTVEC = LSAME(JOBZ,'V') + WNTVCF = LSAME(JOBZ,'F') + WNTVCQ = LSAME(JOBZ,'Q') + WNTREF = LSAME(JOBF,'R') + WNTEX = LSAME(JOBF,'E') + WANTQ = LSAME(JOBQ,'Q') + WNTTRF = LSAME(JOBT,'R') + MINMN = MIN(M,N) + INFO = 0 + LQUERY = ( (LZWORK == -1) .OR. (LWORK == -1) .OR. (LIWORK == -1) ) +! + IF ( .NOT. (SCCOLX .OR. SCCOLY .OR. & + LSAME(JOBS,'N')) ) THEN + INFO = -1 + ELSE IF ( .NOT. (WNTVEC .OR. WNTVCF .OR. WNTVCQ & + .OR. LSAME(JOBZ,'N')) ) THEN + INFO = -2 + ELSE IF ( .NOT. (WNTRES .OR. LSAME(JOBR,'N')) .OR. & + ( WNTRES .AND. LSAME(JOBZ,'N') ) ) THEN + INFO = -3 + ELSE IF ( .NOT. (WANTQ .OR. LSAME(JOBQ,'N')) ) THEN + INFO = -4 + ELSE IF ( .NOT. ( WNTTRF .OR. LSAME(JOBT,'N') ) ) THEN + INFO = -5 + ELSE IF ( .NOT. (WNTREF .OR. WNTEX .OR. & + LSAME(JOBF,'N') ) ) THEN + INFO = -6 + ELSE IF ( .NOT. ((WHTSVD == 1).OR.(WHTSVD == 2).OR. & + (WHTSVD == 3).OR.(WHTSVD == 4)) ) THEN + INFO = -7 + ELSE IF ( M < 0 ) THEN + INFO = -8 + ELSE IF ( ( N < 0 ) .OR. ( N > M+1 ) ) THEN + INFO = -9 + ELSE IF ( LDF < M ) THEN + INFO = -11 + ELSE IF ( LDX < MINMN ) THEN + INFO = -13 + ELSE IF ( LDY < MINMN ) THEN + INFO = -15 + ELSE IF ( .NOT. (( NRNK == -2).OR.(NRNK == -1).OR. & + ((NRNK >= 1).AND.(NRNK <=N ))) ) THEN + INFO = -16 + ELSE IF ( ( TOL < ZERO ) .OR. ( TOL >= ONE ) ) THEN + INFO = -17 + ELSE IF ( LDZ < M ) THEN + INFO = -21 + ELSE IF ( (WNTREF.OR.WNTEX ).AND.( LDB < MINMN ) ) THEN + INFO = -24 + ELSE IF ( LDV < N-1 ) THEN + INFO = -26 + ELSE IF ( LDS < N-1 ) THEN + INFO = -28 + END IF +! + IF ( WNTVEC .OR. WNTVCF .OR. WNTVCQ ) THEN + JOBVL = 'V' + ELSE + JOBVL = 'N' + END IF + IF ( INFO == 0 ) THEN + ! Compute the minimal and the optimal workspace + ! requirements. Simulate running the code and + ! determine minimal and optimal sizes of the + ! workspace at any moment of the run. + IF ( ( N == 0 ) .OR. ( N == 1 ) ) THEN + ! All output except K is void. INFO=1 signals + ! the void input. In case of a workspace query, + ! the minimal workspace lengths are returned. + IF ( LQUERY ) THEN + IWORK(1) = 1 + ZWORK(1) = 2 + ZWORK(2) = 2 + WORK(1) = 2 + WORK(2) = 2 + ELSE + K = 0 + END IF + INFO = 1 + RETURN + END IF + + MLRWRK = 2 + MLWORK = 2 + OLWORK = 2 + IMINWR = 1 + MLWQR = MAX(1,N) ! Minimal workspace length for ZGEQRF. + MLWORK = MAX(MLWORK,MINMN + MLWQR) + + IF ( LQUERY ) THEN + CALL ZGEQRF( M, N, F, LDF, ZWORK, ZWORK, -1, & + INFO1 ) + OLWQR = INT(ZWORK(1)) + OLWORK = MAX(OLWORK,MINMN + OLWQR) + END IF + CALL ZGEDMD( JOBS, JOBVL, JOBR, JOBF, WHTSVD, MINMN,& + N-1, X, LDX, Y, LDY, NRNK, TOL, K, & + EIGS, Z, LDZ, RES, B, LDB, V, LDV, & + S, LDS, ZWORK, -1, WORK, -1, IWORK,& + -1, INFO1 ) + MLWDMD = INT(ZWORK(1)) + MLWORK = MAX(MLWORK, MINMN + MLWDMD) + MLRWRK = MAX(MLRWRK, INT(WORK(1))) + IMINWR = MAX(IMINWR, IWORK(1)) + IF ( LQUERY ) THEN + OLWDMD = INT(ZWORK(2)) + OLWORK = MAX(OLWORK, MINMN+OLWDMD) + END IF + IF ( WNTVEC .OR. WNTVCF ) THEN + MLWMQR = MAX(1,N) + MLWORK = MAX(MLWORK,MINMN+MLWMQR) + IF ( LQUERY ) THEN + CALL ZUNMQR( 'L','N', M, N, MINMN, F, LDF, & + ZWORK, Z, LDZ, ZWORK, -1, INFO1 ) + OLWMQR = INT(ZWORK(1)) + OLWORK = MAX(OLWORK,MINMN+OLWMQR) + END IF + END IF + IF ( WANTQ ) THEN + MLWGQR = MAX(1,N) + MLWORK = MAX(MLWORK,MINMN+MLWGQR) + IF ( LQUERY ) THEN + CALL ZUNGQR( M, MINMN, MINMN, F, LDF, ZWORK, & + ZWORK, -1, INFO1 ) + OLWGQR = INT(ZWORK(1)) + OLWORK = MAX(OLWORK,MINMN+OLWGQR) + END IF + END IF + IF ( LIWORK < IMINWR .AND. (.NOT.LQUERY) ) INFO = -34 + IF ( LWORK < MLRWRK .AND. (.NOT.LQUERY) ) INFO = -32 + IF ( LZWORK < MLWORK .AND. (.NOT.LQUERY) ) INFO = -30 + END IF + IF( INFO /= 0 ) THEN + CALL XERBLA( 'ZGEDMDQ', -INFO ) + RETURN + ELSE IF ( LQUERY ) THEN +! Return minimal and optimal workspace sizes + IWORK(1) = IMINWR + ZWORK(1) = MLWORK + ZWORK(2) = OLWORK + WORK(1) = MLRWRK + WORK(2) = MLRWRK + RETURN + END IF +!..... +! Initial QR factorization that is used to represent the +! snapshots as elements of lower dimensional subspace. +! For large scale computation with M >> N, at this place +! one can use an out of core QRF. +! + CALL ZGEQRF( M, N, F, LDF, ZWORK, & + ZWORK(MINMN+1), LZWORK-MINMN, INFO1 ) +! +! Define X and Y as the snapshots representations in the +! orthogonal basis computed in the QR factorization. +! X corresponds to the leading N-1 and Y to the trailing +! N-1 snapshots. + CALL ZLASET( 'L', MINMN, N-1, ZZERO, ZZERO, X, LDX ) + CALL ZLACPY( 'U', MINMN, N-1, F, LDF, X, LDX ) + CALL ZLACPY( 'A', MINMN, N-1, F(1,2), LDF, Y, LDY ) + IF ( M >= 3 ) THEN + CALL ZLASET( 'L', MINMN-2, N-2, ZZERO, ZZERO, & + Y(3,1), LDY ) + END IF +! +! Compute the DMD of the projected snapshot pairs (X,Y) + CALL ZGEDMD( JOBS, JOBVL, JOBR, JOBF, WHTSVD, MINMN, & + N-1, X, LDX, Y, LDY, NRNK, TOL, K, & + EIGS, Z, LDZ, RES, B, LDB, V, LDV, & + S, LDS, ZWORK(MINMN+1), LZWORK-MINMN, & + WORK, LWORK, IWORK, LIWORK, INFO1 ) + IF ( INFO1 == 2 .OR. INFO1 == 3 ) THEN + ! Return with error code. See ZGEDMD for details. + INFO = INFO1 + RETURN + ELSE + INFO = INFO1 + END IF +! +! The Ritz vectors (Koopman modes) can be explicitly +! formed or returned in factored form. + IF ( WNTVEC ) THEN + ! Compute the eigenvectors explicitly. + IF ( M > MINMN ) CALL ZLASET( 'A', M-MINMN, K, ZZERO, & + ZZERO, Z(MINMN+1,1), LDZ ) + CALL ZUNMQR( 'L','N', M, K, MINMN, F, LDF, ZWORK, Z, & + LDZ, ZWORK(MINMN+1), LZWORK-MINMN, INFO1 ) + ELSE IF ( WNTVCF ) THEN + ! Return the Ritz vectors (eigenvectors) in factored + ! form Z*V, where Z contains orthonormal matrix (the + ! product of Q from the initial QR factorization and + ! the SVD/POD_basis returned by ZGEDMD in X) and the + ! second factor (the eigenvectors of the Rayleigh + ! quotient) is in the array V, as returned by ZGEDMD. + CALL ZLACPY( 'A', N, K, X, LDX, Z, LDZ ) + IF ( M > N ) CALL ZLASET( 'A', M-N, K, ZZERO, ZZERO, & + Z(N+1,1), LDZ ) + CALL ZUNMQR( 'L','N', M, K, MINMN, F, LDF, ZWORK, Z, & + LDZ, ZWORK(MINMN+1), LZWORK-MINMN, INFO1 ) + END IF +! +! Some optional output variables: +! +! The upper triangular factor R in the initial QR +! factorization is optionally returned in the array Y. +! This is useful if this call to ZGEDMDQ is to be +! followed by a streaming DMD that is implemented in a +! QR compressed form. + IF ( WNTTRF ) THEN ! Return the upper triangular R in Y + CALL ZLASET( 'A', MINMN, N, ZZERO, ZZERO, Y, LDY ) + CALL ZLACPY( 'U', MINMN, N, F, LDF, Y, LDY ) + END IF +! +! The orthonormal/unitary factor Q in the initial QR +! factorization is optionally returned in the array F. +! Same as with the triangular factor above, this is +! useful in a streaming DMD. + IF ( WANTQ ) THEN ! Q overwrites F + CALL ZUNGQR( M, MINMN, MINMN, F, LDF, ZWORK, & + ZWORK(MINMN+1), LZWORK-MINMN, INFO1 ) + END IF +! + RETURN +! + END SUBROUTINE ZGEDMDQ + \ No newline at end of file From c0865ab0fe8579a2716b4f12fae4e3f2f6b1bede Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Tue, 20 Jun 2023 10:45:29 +0200 Subject: [PATCH 02/15] Add LAPACKE interfaces for Dynamic Mode Decomposition (Reference-LAPACK PR 736) --- lapack-netlib/LAPACKE/src/lapacke_cgedmd.c | 115 ++++++++++ .../LAPACKE/src/lapacke_cgedmd_work.c | 180 +++++++++++++++ lapack-netlib/LAPACKE/src/lapacke_cgedmdq.c | 123 +++++++++++ .../LAPACKE/src/lapacke_cgedmdq_work.c | 205 ++++++++++++++++++ lapack-netlib/LAPACKE/src/lapacke_dgedmd.c | 112 ++++++++++ .../LAPACKE/src/lapacke_dgedmd_work.c | 179 +++++++++++++++ lapack-netlib/LAPACKE/src/lapacke_dgedmdq.c | 119 ++++++++++ .../LAPACKE/src/lapacke_dgedmdq_work.c | 200 +++++++++++++++++ lapack-netlib/LAPACKE/src/lapacke_sgedmd.c | 112 ++++++++++ .../LAPACKE/src/lapacke_sgedmd_work.c | 179 +++++++++++++++ lapack-netlib/LAPACKE/src/lapacke_sgedmdq.c | 119 ++++++++++ .../LAPACKE/src/lapacke_sgedmdq_work.c | 200 +++++++++++++++++ lapack-netlib/LAPACKE/src/lapacke_zgedmd.c | 116 ++++++++++ .../LAPACKE/src/lapacke_zgedmd_work.c | 182 ++++++++++++++++ lapack-netlib/LAPACKE/src/lapacke_zgedmdq.c | 123 +++++++++++ .../LAPACKE/src/lapacke_zgedmdq_work.c | 205 ++++++++++++++++++ 16 files changed, 2469 insertions(+) create mode 100644 lapack-netlib/LAPACKE/src/lapacke_cgedmd.c create mode 100644 lapack-netlib/LAPACKE/src/lapacke_cgedmd_work.c create mode 100644 lapack-netlib/LAPACKE/src/lapacke_cgedmdq.c create mode 100644 lapack-netlib/LAPACKE/src/lapacke_cgedmdq_work.c create mode 100644 lapack-netlib/LAPACKE/src/lapacke_dgedmd.c create mode 100644 lapack-netlib/LAPACKE/src/lapacke_dgedmd_work.c create mode 100644 lapack-netlib/LAPACKE/src/lapacke_dgedmdq.c create mode 100644 lapack-netlib/LAPACKE/src/lapacke_dgedmdq_work.c create mode 100644 lapack-netlib/LAPACKE/src/lapacke_sgedmd.c create mode 100644 lapack-netlib/LAPACKE/src/lapacke_sgedmd_work.c create mode 100644 lapack-netlib/LAPACKE/src/lapacke_sgedmdq.c create mode 100644 lapack-netlib/LAPACKE/src/lapacke_sgedmdq_work.c create mode 100644 lapack-netlib/LAPACKE/src/lapacke_zgedmd.c create mode 100644 lapack-netlib/LAPACKE/src/lapacke_zgedmd_work.c create mode 100644 lapack-netlib/LAPACKE/src/lapacke_zgedmdq.c create mode 100644 lapack-netlib/LAPACKE/src/lapacke_zgedmdq_work.c diff --git a/lapack-netlib/LAPACKE/src/lapacke_cgedmd.c b/lapack-netlib/LAPACKE/src/lapacke_cgedmd.c new file mode 100644 index 000000000..a269b0daf --- /dev/null +++ b/lapack-netlib/LAPACKE/src/lapacke_cgedmd.c @@ -0,0 +1,115 @@ +/***************************************************************************** + Copyright (c) 2014, Intel Corp. + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + THE POSSIBILITY OF SUCH DAMAGE. +***************************************************************************** +* Contents: Native high-level C interface to LAPACK function cgedmd +* Author: Intel Corporation +*****************************************************************************/ + +#include "lapacke_utils.h" + +lapack_int LAPACKE_cgedmd( int matrix_layout, char jobs, char jobz, char jobf, + lapack_int whtsvd, lapack_int m, lapack_int n, + lapack_complex_float* x, lapack_int ldx, + lapack_complex_float* y, lapack_int ldy, lapack_int k, + lapack_complex_float* reig, lapack_complex_float* imeig, + lapack_complex_float* z, lapack_int ldz, + lapack_complex_float* res, lapack_complex_float* b, + lapack_int ldb, lapack_complex_float* w, + lapack_int ldw, lapack_complex_float* s, lapack_int lds) +{ + lapack_int info = 0; + lapack_int lwork = -1; + lapack_int liwork = -1; + lapack_complex_float* work = NULL; + lapack_int* iwork = NULL; + lapack_complex_float work_query; + lapack_int iwork_query; + if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) { + LAPACKE_xerbla( "LAPACKE_cgedmd", -1 ); + return -1; + } +#ifndef LAPACK_DISABLE_NAN_CHECK + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_cge_nancheck( matrix_layout, m, n, x, ldx ) ) { + return -8; + } + if( LAPACKE_cge_nancheck( matrix_layout, m, n, y, ldy ) ) { + return -10; + } + if( LAPACKE_cge_nancheck( matrix_layout, m, n, z, ldz ) ) { + return -15; + } + if( LAPACKE_cge_nancheck( matrix_layout, m, n, b, ldb ) ) { + return -18; + } + if( LAPACKE_cge_nancheck( matrix_layout, m, n, w, ldw ) ) { + return -20; + } + if( LAPACKE_cge_nancheck( matrix_layout, m, n, s, lds ) ) { + return -22; + } + } +#endif + /* Query optimal working array(s) size */ + info = LAPACKE_cgedmd_work( matrix_layout, jobs, jobz, jobf, whtsvd, m, n, + x, ldx, y, ldy, k, reig, imeig, z, ldz, res, + b, ldb, w, ldw, s, lds, &work_query, lwork, + &iwork_query, liwork ); + + if( info != 0 ) { + goto exit_level_0; + } + lwork = LAPACK_C2INT( work_query ); + liwork = iwork_query; + /* Allocate memory for work arrays */ + work = (lapack_complex_float*)LAPACKE_malloc( sizeof(lapack_complex_float) * lwork ); + if( work == NULL ) { + info = LAPACK_WORK_MEMORY_ERROR; + goto exit_level_0; + } + iwork = (lapack_int*)LAPACKE_malloc( sizeof(lapack_int) * liwork ); + if( iwork == NULL ) { + info = LAPACK_WORK_MEMORY_ERROR; + goto exit_level_1; + } + /* Call middle-level interface */ + info = LAPACKE_cgedmd_work( matrix_layout, jobs, jobz, jobf, whtsvd, m, n, + x, ldx, y, ldy, k, reig, imeig, z, ldz, res, + b, ldb, w, ldw, s, lds, work, lwork, iwork, + liwork ); + /* Release memory and exit */ + LAPACKE_free( iwork ); +exit_level_1: + LAPACKE_free( work ); +exit_level_0: + if( info == LAPACK_WORK_MEMORY_ERROR ) { + LAPACKE_xerbla( "LAPACKE_cgedmd", info ); + } + return info; +} diff --git a/lapack-netlib/LAPACKE/src/lapacke_cgedmd_work.c b/lapack-netlib/LAPACKE/src/lapacke_cgedmd_work.c new file mode 100644 index 000000000..534934efb --- /dev/null +++ b/lapack-netlib/LAPACKE/src/lapacke_cgedmd_work.c @@ -0,0 +1,180 @@ +/***************************************************************************** + Copyright (c) 2014, Intel Corp. + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + THE POSSIBILITY OF SUCH DAMAGE. +***************************************************************************** +* Contents: Native middle-level C interface to LAPACK function cgedmd +* Author: Intel Corporation +*****************************************************************************/ + +#include "lapacke_utils.h" + +lapack_int LAPACKE_cgedmd_work( int matrix_layout, char jobs, char jobz, + char jobf, lapack_int whtsvd, lapack_int m, + lapack_int n, lapack_complex_float* x, lapack_int ldx, + lapack_complex_float* y, lapack_int ldy, lapack_int k, + lapack_complex_float* reig, lapack_complex_float* imeig, + lapack_complex_float* z, lapack_int ldz, + lapack_complex_float* res, lapack_complex_float* b, + lapack_int ldb, lapack_complex_float* w, + lapack_int ldw, lapack_complex_float* s, lapack_int lds, + lapack_complex_float* work, lapack_int lwork, + lapack_int* iwork, lapack_int liwork ) +{ + lapack_int info = 0; + if( matrix_layout == LAPACK_COL_MAJOR ) { + /* Call LAPACK function and adjust info */ + LAPACK_cgedmd( &jobs, &jobz, &jobf, &whtsvd, &m, &n, x, &ldx, y, &ldy, + &k, reig, imeig, z, &ldz, res, b, &ldb, w, &ldw, s, &lds, + work, &lwork, iwork, &liwork, &info ); + if( info < 0 ) { + info = info - 1; + } + } else if( matrix_layout == LAPACK_ROW_MAJOR ) { + lapack_int ldx_t = MAX(1,m); + lapack_int ldy_t = MAX(1,m); + lapack_int ldz_t = MAX(1,m); + lapack_int ldb_t = MAX(1,m); + lapack_int ldw_t = MAX(1,m); + lapack_int lds_t = MAX(1,m); + lapack_complex_float* x_t = NULL; + lapack_complex_float* y_t = NULL; + lapack_complex_float* z_t = NULL; + lapack_complex_float* b_t = NULL; + lapack_complex_float* w_t = NULL; + lapack_complex_float* s_t = NULL; + /* Check leading dimension(s) */ + if( ldx < n ) { + info = -9; + LAPACKE_xerbla( "LAPACKE_cgedmd_work", info ); + return info; + } + if( ldy < n ) { + info = -11; + LAPACKE_xerbla( "LAPACKE_cgedmd_work", info ); + return info; + } + if( ldz < n ) { + info = -16; + LAPACKE_xerbla( "LAPACKE_cgedmd_work", info ); + return info; + } + if( ldb < n ) { + info = -19; + LAPACKE_xerbla( "LAPACKE_cgedmd_work", info ); + return info; + } + if( ldw < n ) { + info = -21; + LAPACKE_xerbla( "LAPACKE_cgedmd_work", info ); + return info; + } + if( lds < n ) { + info = -23; + LAPACKE_xerbla( "LAPACKE_cgedmd_work", info ); + return info; + } + /* Query optimal working array(s) size if requested */ + if( lwork == -1 ) { + LAPACK_cgedmd( &jobs, &jobz, &jobf, &whtsvd, &m, &n, x, &ldx, y, &ldy, + &k, reig, imeig, z, &ldz, res, b, &ldb, w, &ldw, s, &lds, + work, &lwork, iwork, &liwork, &info ); + return (info < 0) ? (info - 1) : info; + } + /* Allocate memory for temporary array(s) */ + x_t = (lapack_complex_float*)LAPACKE_malloc( sizeof(lapack_complex_float) * ldx_t * MAX(1,n) ); + if( x_t == NULL ) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_0; + } + y_t = (lapack_complex_float*)LAPACKE_malloc( sizeof(lapack_complex_float) * ldy_t * MAX(1,n) ); + if( y_t == NULL ) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_1; + } + z_t = (lapack_complex_float*)LAPACKE_malloc( sizeof(lapack_complex_float) * ldz_t * MAX(1,n) ); + if( z_t == NULL ) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_2; + } + b_t = (lapack_complex_float*)LAPACKE_malloc( sizeof(lapack_complex_float) * ldb_t * MAX(1,n) ); + if( b_t == NULL ) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_3; + } + w_t = (lapack_complex_float*)LAPACKE_malloc( sizeof(lapack_complex_float) * ldw_t * MAX(1,n) ); + if( w_t == NULL ) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_4; + } + s_t = (lapack_complex_float*)LAPACKE_malloc( sizeof(lapack_complex_float) * lds_t * MAX(1,n) ); + if( s_t == NULL ) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_5; + } + /* Transpose input matrices */ + LAPACKE_cge_trans( matrix_layout, m, n, x, ldx, x_t, ldx_t ); + LAPACKE_cge_trans( matrix_layout, m, n, y, ldy, y_t, ldy_t ); + LAPACKE_cge_trans( matrix_layout, m, n, z, ldz, z_t, ldz_t ); + LAPACKE_cge_trans( matrix_layout, m, n, b, ldb, b_t, ldb_t ); + LAPACKE_cge_trans( matrix_layout, m, n, w, ldw, w_t, ldw_t ); + LAPACKE_cge_trans( matrix_layout, m, n, s, lds, s_t, lds_t ); + /* Call LAPACK function and adjust info */ + LAPACK_cgedmd( &jobs, &jobz, &jobf, &whtsvd, &m, &n, x_t, &ldx_t, y_t, + &ldy_t, &k, reig, imeig, z_t, &ldz_t, res, b_t, &ldb_t, + w_t, &ldw_t, s_t, &lds_t, work, &lwork, iwork, &liwork, &info ); + if( info < 0 ) { + info = info - 1; + } + /* Transpose output matrices */ + LAPACKE_cge_trans( LAPACK_COL_MAJOR, m, n, x_t, ldx_t, x, ldx ); + LAPACKE_cge_trans( LAPACK_COL_MAJOR, m, n, y_t, ldy_t, y, ldy ); + LAPACKE_cge_trans( LAPACK_COL_MAJOR, m, n, z_t, ldz_t, z, ldz ); + LAPACKE_cge_trans( LAPACK_COL_MAJOR, m, n, b_t, ldb_t, b, ldb ); + LAPACKE_cge_trans( LAPACK_COL_MAJOR, m, n, w_t, ldw_t, w, ldw ); + LAPACKE_cge_trans( LAPACK_COL_MAJOR, m, n, s_t, lds_t, s, lds ); + /* Release memory and exit */ + LAPACKE_free( s_t ); +exit_level_5: + LAPACKE_free( w_t ); +exit_level_4: + LAPACKE_free( b_t ); +exit_level_3: + LAPACKE_free( z_t ); +exit_level_2: + LAPACKE_free( y_t ); +exit_level_1: + LAPACKE_free( x_t ); +exit_level_0: + if( info == LAPACK_TRANSPOSE_MEMORY_ERROR ) { + LAPACKE_xerbla( "LAPACKE_cgedmd_work", info ); + } + } else { + info = -1; + LAPACKE_xerbla( "LAPACKE_cgedmd_work", info ); + } + return info; +} diff --git a/lapack-netlib/LAPACKE/src/lapacke_cgedmdq.c b/lapack-netlib/LAPACKE/src/lapacke_cgedmdq.c new file mode 100644 index 000000000..60e83729b --- /dev/null +++ b/lapack-netlib/LAPACKE/src/lapacke_cgedmdq.c @@ -0,0 +1,123 @@ +/***************************************************************************** + Copyright (c) 2014, Intel Corp. + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + THE POSSIBILITY OF SUCH DAMAGE. +***************************************************************************** +* Contents: Native high-level C interface to LAPACK function cgedmdq +* Author: Intel Corporation +*****************************************************************************/ + +#include "lapacke_utils.h" + +lapack_int LAPACKE_cgedmdq( int matrix_layout, char jobs, char jobz, char jobr, + char jobq, char jobt, char jobf, lapack_int whtsvd, + lapack_int m, lapack_int n, lapack_complex_float* f, + lapack_int ldf, lapack_complex_float* x, + lapack_int ldx, lapack_complex_float* y, + lapack_int ldy, lapack_int nrnk, float tol, + lapack_int k, lapack_complex_float* reig, + lapack_complex_float* imeig, + lapack_complex_float* z, lapack_int ldz, + lapack_complex_float* res, lapack_complex_float* b, + lapack_int ldb, lapack_complex_float* v, + lapack_int ldv, lapack_complex_float* s, lapack_int lds) +{ + lapack_int info = 0; + lapack_int lwork = -1; + lapack_int liwork = -1; + lapack_complex_float* work = NULL; + lapack_int* iwork = NULL; + lapack_complex_float work_query; + lapack_int iwork_query; + if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) { + LAPACKE_xerbla( "LAPACKE_cgedmdq", -1 ); + return -1; + } +#ifndef LAPACK_DISABLE_NAN_CHECK + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_cge_nancheck( matrix_layout, m, n, f, ldf ) ) { + return -11; + } + if( LAPACKE_cge_nancheck( matrix_layout, m, n, x, ldx ) ) { + return -13; + } + if( LAPACKE_cge_nancheck( matrix_layout, m, n, y, ldy ) ) { + return -15; + } + if( LAPACKE_cge_nancheck( matrix_layout, m, n, z, ldz ) ) { + return -22; + } + if( LAPACKE_cge_nancheck( matrix_layout, m, n, b, ldb ) ) { + return -25; + } + if( LAPACKE_cge_nancheck( matrix_layout, m, n, v, ldv ) ) { + return -27; + } + if( LAPACKE_cge_nancheck( matrix_layout, m, n, s, lds ) ) { + return -29; + } + } +#endif + /* Query optimal working array(s) size */ + info = LAPACKE_cgedmdq_work( matrix_layout, jobs, jobz, jobr, jobq, jobt, + jobf, whtsvd, m, n, f, ldf, x, ldx, y, ldy, + nrnk, tol, k, reig, imeig, z, ldz, res, + b, ldb, v, ldv, s, lds, &work_query, lwork, + &iwork_query, liwork ); + + if( info != 0 ) { + goto exit_level_0; + } + lwork = LAPACK_C2INT( work_query ); + liwork = iwork_query; + /* Allocate memory for work arrays */ + work = (lapack_complex_float*)LAPACKE_malloc( sizeof(lapack_complex_float) * lwork ); + if( work == NULL ) { + info = LAPACK_WORK_MEMORY_ERROR; + goto exit_level_0; + } + iwork = (lapack_int*)LAPACKE_malloc( sizeof(lapack_int) * liwork ); + if( iwork == NULL ) { + info = LAPACK_WORK_MEMORY_ERROR; + goto exit_level_1; + } + /* Call middle-level interface */ + info = LAPACKE_cgedmdq_work( matrix_layout, jobs, jobz, jobr, jobq, jobt, + jobf, whtsvd, m, n, f, ldf, x, ldx, y, ldy, + nrnk, tol, k, reig, imeig, z, ldz, res, + b, ldb, v, ldv, s, lds, work, lwork, iwork, + liwork ); + /* Release memory and exit */ + LAPACKE_free( iwork ); +exit_level_1: + LAPACKE_free( work ); +exit_level_0: + if( info == LAPACK_WORK_MEMORY_ERROR ) { + LAPACKE_xerbla( "LAPACKE_cgedmdq", info ); + } + return info; +} diff --git a/lapack-netlib/LAPACKE/src/lapacke_cgedmdq_work.c b/lapack-netlib/LAPACKE/src/lapacke_cgedmdq_work.c new file mode 100644 index 000000000..5bdbd3f56 --- /dev/null +++ b/lapack-netlib/LAPACKE/src/lapacke_cgedmdq_work.c @@ -0,0 +1,205 @@ +/***************************************************************************** + Copyright (c) 2014, Intel Corp. + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + THE POSSIBILITY OF SUCH DAMAGE. +***************************************************************************** +* Contents: Native middle-level C interface to LAPACK function cgedmdq +* Author: Intel Corporation +*****************************************************************************/ + +#include "lapacke_utils.h" + +lapack_int LAPACKE_cgedmdq_work( int matrix_layout, char jobs, char jobz, + char jobr, char jobq, char jobt, char jobf, + lapack_int whtsvd, lapack_int m, lapack_int n, + lapack_complex_float* f, lapack_int ldf, + lapack_complex_float* x, lapack_int ldx, + lapack_complex_float* y, lapack_int ldy, + lapack_int nrnk, float tol, lapack_int k, + lapack_complex_float* reig, + lapack_complex_float* imeig, + lapack_complex_float* z, + lapack_int ldz, lapack_complex_float* res, + lapack_complex_float* b, + lapack_int ldb, lapack_complex_float* v, + lapack_int ldv, lapack_complex_float* s, + lapack_int lds, lapack_complex_float* work, + lapack_int lwork, lapack_int* iwork, + lapack_int liwork ) +{ + lapack_int info = 0; + if( matrix_layout == LAPACK_COL_MAJOR ) { + /* Call LAPACK function and adjust info */ + LAPACK_cgedmdq( &jobs, &jobz, &jobr, &jobq, &jobt, &jobf, &whtsvd, &m, + &n, f, &ldf, x, &ldx, y, &ldy, &nrnk, &tol, &k, reig, + imeig, z, &ldz, res, b, &ldb, v, &ldv, s, &lds, + work, &lwork, iwork, &liwork, &info ); + if( info < 0 ) { + info = info - 1; + } + } else if( matrix_layout == LAPACK_ROW_MAJOR ) { + lapack_int ldf_t = MAX(1,m); + lapack_int ldx_t = MAX(1,m); + lapack_int ldy_t = MAX(1,m); + lapack_int ldz_t = MAX(1,m); + lapack_int ldb_t = MAX(1,m); + lapack_int ldv_t = MAX(1,m); + lapack_int lds_t = MAX(1,m); + lapack_complex_float* f_t = NULL; + lapack_complex_float* x_t = NULL; + lapack_complex_float* y_t = NULL; + lapack_complex_float* z_t = NULL; + lapack_complex_float* b_t = NULL; + lapack_complex_float* v_t = NULL; + lapack_complex_float* s_t = NULL; + /* Check leading dimension(s) */ + if( ldf < n ) { + info = -12; + LAPACKE_xerbla( "LAPACKE_cgedmdq_work", info ); + return info; + } + if( ldx < n ) { + info = -14; + LAPACKE_xerbla( "LAPACKE_cgedmdq_work", info ); + return info; + } + if( ldy < n ) { + info = -16; + LAPACKE_xerbla( "LAPACKE_cgedmdq_work", info ); + return info; + } + if( ldz < n ) { + info = -23; + LAPACKE_xerbla( "LAPACKE_cgedmdq_work", info ); + return info; + } + if( ldb < n ) { + info = -26; + LAPACKE_xerbla( "LAPACKE_cgedmdq_work", info ); + return info; + } + if( ldv < n ) { + info = -28; + LAPACKE_xerbla( "LAPACKE_cgedmdq_work", info ); + return info; + } + if( lds < n ) { + info = -30; + LAPACKE_xerbla( "LAPACKE_cgedmdq_work", info ); + return info; + } + /* Query optimal working array(s) size if requested */ + if( lwork == -1 || liwork == -1 ) { + LAPACK_cgedmdq( &jobs, &jobz, &jobr, &jobq, &jobt, &jobf, &whtsvd, &m, + &n, f, &ldf, x, &ldx, y, &ldy, &nrnk, &tol, &k, reig, + imeig, z, &ldz, res, b, &ldb, v, &ldv, s, &lds, + work, &lwork, iwork, &liwork, &info ); + return (info < 0) ? (info - 1) : info; + } + /* Allocate memory for temporary array(s) */ + f_t = (lapack_complex_float*)LAPACKE_malloc( sizeof(lapack_complex_float) * ldf_t * MAX(1,n) ); + if( f_t == NULL ) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_0; + } + x_t = (lapack_complex_float*)LAPACKE_malloc( sizeof(lapack_complex_float) * ldx_t * MAX(1,n) ); + if( x_t == NULL ) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_1; + } + y_t = (lapack_complex_float*)LAPACKE_malloc( sizeof(lapack_complex_float) * ldy_t * MAX(1,n) ); + if( y_t == NULL ) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_2; + } + z_t = (lapack_complex_float*)LAPACKE_malloc( sizeof(lapack_complex_float) * ldz_t * MAX(1,n) ); + if( z_t == NULL ) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_3; + } + b_t = (lapack_complex_float*)LAPACKE_malloc( sizeof(lapack_complex_float) * ldb_t * MAX(1,n) ); + if( b_t == NULL ) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_4; + } + v_t = (lapack_complex_float*)LAPACKE_malloc( sizeof(lapack_complex_float) * ldv_t * MAX(1,n) ); + if( v_t == NULL ) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_5; + } + s_t = (lapack_complex_float*)LAPACKE_malloc( sizeof(lapack_complex_float) * lds_t * MAX(1,n) ); + if( s_t == NULL ) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_6; + } + /* Transpose input matrices */ + LAPACKE_cge_trans( matrix_layout, m, n, f, ldf, f_t, ldf_t ); + LAPACKE_cge_trans( matrix_layout, m, n, x, ldx, x_t, ldx_t ); + LAPACKE_cge_trans( matrix_layout, m, n, y, ldy, y_t, ldy_t ); + LAPACKE_cge_trans( matrix_layout, m, n, z, ldz, z_t, ldz_t ); + LAPACKE_cge_trans( matrix_layout, m, n, b, ldb, b_t, ldb_t ); + LAPACKE_cge_trans( matrix_layout, m, n, v, ldv, v_t, ldv_t ); + LAPACKE_cge_trans( matrix_layout, m, n, s, lds, s_t, lds_t ); + /* Call LAPACK function and adjust info */ + LAPACK_cgedmdq( &jobs, &jobz, &jobr, &jobq, &jobt, &jobf, &whtsvd, &m, + &n, f, &ldf, x, &ldx, y, &ldy, &nrnk, &tol, &k, reig, + imeig, z, &ldz, res, b, &ldb, v, &ldv, s, &lds, + work, &lwork, iwork, &liwork, &info ); + if( info < 0 ) { + info = info - 1; + } + /* Transpose output matrices */ + LAPACKE_cge_trans( LAPACK_COL_MAJOR, m, n, f_t, ldf_t, f, ldf ); + LAPACKE_cge_trans( LAPACK_COL_MAJOR, m, n, x_t, ldx_t, x, ldx ); + LAPACKE_cge_trans( LAPACK_COL_MAJOR, m, n, y_t, ldy_t, y, ldy ); + LAPACKE_cge_trans( LAPACK_COL_MAJOR, m, n, z_t, ldz_t, z, ldz ); + LAPACKE_cge_trans( LAPACK_COL_MAJOR, m, n, b_t, ldb_t, b, ldb ); + LAPACKE_cge_trans( LAPACK_COL_MAJOR, m, n, v_t, ldv_t, v, ldv ); + LAPACKE_cge_trans( LAPACK_COL_MAJOR, m, n, s_t, lds_t, s, lds ); + /* Release memory and exit */ + LAPACKE_free( s_t ); +exit_level_6: + LAPACKE_free( v_t ); +exit_level_5: + LAPACKE_free( b_t ); +exit_level_4: + LAPACKE_free( z_t ); +exit_level_3: + LAPACKE_free( y_t ); +exit_level_2: + LAPACKE_free( x_t ); +exit_level_1: + LAPACKE_free( f_t ); +exit_level_0: + if( info == LAPACK_TRANSPOSE_MEMORY_ERROR ) { + LAPACKE_xerbla( "LAPACKE_cgedmdq_work", info ); + } + } else { + info = -1; + LAPACKE_xerbla( "LAPACKE_cgedmdq_work", info ); + } + return info; +} diff --git a/lapack-netlib/LAPACKE/src/lapacke_dgedmd.c b/lapack-netlib/LAPACKE/src/lapacke_dgedmd.c new file mode 100644 index 000000000..246d7f649 --- /dev/null +++ b/lapack-netlib/LAPACKE/src/lapacke_dgedmd.c @@ -0,0 +1,112 @@ +/***************************************************************************** + Copyright (c) 2014, Intel Corp. + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + THE POSSIBILITY OF SUCH DAMAGE. +***************************************************************************** +* Contents: Native high-level C interface to LAPACK function dgedmd +* Author: Intel Corporation +*****************************************************************************/ + +#include "lapacke_utils.h" + +lapack_int LAPACKE_dgedmd( int matrix_layout, char jobs, char jobz, char jobf, + lapack_int whtsvd, lapack_int m, lapack_int n, + double* x, lapack_int ldx, double* y, lapack_int ldy, + lapack_int k, double* reig, double* imeig, double* z, + lapack_int ldz, double* res, double* b, lapack_int ldb, + double* w, lapack_int ldw, double* s, lapack_int lds) +{ + lapack_int info = 0; + lapack_int lwork = -1; + lapack_int liwork = -1; + double* work = NULL; + lapack_int* iwork = NULL; + double work_query; + lapack_int iwork_query; + if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) { + LAPACKE_xerbla( "LAPACKE_dgedmd", -1 ); + return -1; + } +#ifndef LAPACK_DISABLE_NAN_CHECK + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dge_nancheck( matrix_layout, m, n, x, ldx ) ) { + return -8; + } + if( LAPACKE_dge_nancheck( matrix_layout, m, n, y, ldy ) ) { + return -10; + } + if( LAPACKE_dge_nancheck( matrix_layout, m, n, z, ldz ) ) { + return -15; + } + if( LAPACKE_dge_nancheck( matrix_layout, m, n, b, ldb ) ) { + return -18; + } + if( LAPACKE_dge_nancheck( matrix_layout, m, n, s, lds ) ) { + return -20; + } + if( LAPACKE_dge_nancheck( matrix_layout, m, n, w, ldw ) ) { + return -22; + } + } +#endif + /* Query optimal working array(s) size */ + info = LAPACKE_dgedmd_work( matrix_layout, jobs, jobz, jobf, whtsvd, m, n, + x, ldx, y, ldy, k, reig, imeig, z, ldz, res, + b, ldb, w, ldw, s, lds, &work_query, lwork, + &iwork_query, liwork ); + + if( info != 0 ) { + goto exit_level_0; + } + lwork = (lapack_int) work_query; + liwork = iwork_query; + /* Allocate memory for work arrays */ + work = (double*)LAPACKE_malloc( sizeof(double) * lwork ); + if( work == NULL ) { + info = LAPACK_WORK_MEMORY_ERROR; + goto exit_level_0; + } + iwork = (lapack_int*)LAPACKE_malloc( sizeof(lapack_int) * liwork ); + if( iwork == NULL ) { + info = LAPACK_WORK_MEMORY_ERROR; + goto exit_level_1; + } + /* Call middle-level interface */ + info = LAPACKE_dgedmd_work( matrix_layout, jobs, jobz, jobf, whtsvd, m, n, + x, ldx, y, ldy, k, reig, imeig, z, ldz, res, + b, ldb, w, ldw, s, lds, work, lwork, iwork, + liwork ); + /* Release memory and exit */ + LAPACKE_free( iwork ); +exit_level_1: + LAPACKE_free( work ); +exit_level_0: + if( info == LAPACK_WORK_MEMORY_ERROR ) { + LAPACKE_xerbla( "LAPACKE_dgedmd", info ); + } + return info; +} diff --git a/lapack-netlib/LAPACKE/src/lapacke_dgedmd_work.c b/lapack-netlib/LAPACKE/src/lapacke_dgedmd_work.c new file mode 100644 index 000000000..4d1169de9 --- /dev/null +++ b/lapack-netlib/LAPACKE/src/lapacke_dgedmd_work.c @@ -0,0 +1,179 @@ +/***************************************************************************** + Copyright (c) 2014, Intel Corp. + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + THE POSSIBILITY OF SUCH DAMAGE. +***************************************************************************** +* Contents: Native middle-level C interface to LAPACK function dgedmd +* Author: Intel Corporation +*****************************************************************************/ + +#include "lapacke_utils.h" + +lapack_int LAPACKE_dgedmd_work( int matrix_layout, char jobs, char jobz, + char jobf, lapack_int whtsvd, lapack_int m, + lapack_int n, double* x, lapack_int ldx, + double* y, lapack_int ldy, lapack_int k, + double* reig, double* imeig, double* z, + lapack_int ldz, double* res, double* b, + lapack_int ldb, double* w, lapack_int ldw, + double* s, lapack_int lds, double* work, + lapack_int lwork, lapack_int* iwork, + lapack_int liwork ) +{ + lapack_int info = 0; + if( matrix_layout == LAPACK_COL_MAJOR ) { + /* Call LAPACK function and adjust info */ + LAPACK_dgedmd( &jobs, &jobz, &jobf, &whtsvd, &m, &n, x, &ldx, y, &ldy, + &k, reig, imeig, z, &ldz, res, b, &ldb, w, &ldw, s, &lds, + work, &lwork, iwork, &liwork, &info ); + if( info < 0 ) { + info = info - 1; + } + } else if( matrix_layout == LAPACK_ROW_MAJOR ) { + lapack_int ldx_t = MAX(1,m); + lapack_int ldy_t = MAX(1,m); + lapack_int ldz_t = MAX(1,m); + lapack_int ldb_t = MAX(1,m); + lapack_int ldw_t = MAX(1,m); + lapack_int lds_t = MAX(1,m); + double* x_t = NULL; + double* y_t = NULL; + double* z_t = NULL; + double* b_t = NULL; + double* w_t = NULL; + double* s_t = NULL; + /* Check leading dimension(s) */ + if( ldx < n ) { + info = -9; + LAPACKE_xerbla( "LAPACKE_dgedmd_work", info ); + return info; + } + if( ldy < n ) { + info = -11; + LAPACKE_xerbla( "LAPACKE_dgedmd_work", info ); + return info; + } + if( ldz < n ) { + info = -16; + LAPACKE_xerbla( "LAPACKE_dgedmd_work", info ); + return info; + } + if( ldb < n ) { + info = -19; + LAPACKE_xerbla( "LAPACKE_dgedmd_work", info ); + return info; + } + if( ldw < n ) { + info = -21; + LAPACKE_xerbla( "LAPACKE_dgedmd_work", info ); + return info; + } + if( lds < n ) { + info = -23; + LAPACKE_xerbla( "LAPACKE_dgedmd_work", info ); + return info; + } + /* Query optimal working array(s) size if requested */ + if( lwork == -1 ) { + LAPACK_dgedmd( &jobs, &jobz, &jobf, &whtsvd, &m, &n, x, &ldx, y, &ldy, + &k, reig, imeig, z, &ldz, res, b, &ldb, w, &ldw, s, &lds, + work, &lwork, iwork, &liwork, &info ); + return (info < 0) ? (info - 1) : info; + } + /* Allocate memory for temporary array(s) */ + x_t = (double*)LAPACKE_malloc( sizeof(double) * ldx_t * MAX(1,n) ); + if( x_t == NULL ) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_0; + } + y_t = (double*)LAPACKE_malloc( sizeof(double) * ldy_t * MAX(1,n) ); + if( y_t == NULL ) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_1; + } + z_t = (double*)LAPACKE_malloc( sizeof(double) * ldz_t * MAX(1,n) ); + if( z_t == NULL ) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_2; + } + b_t = (double*)LAPACKE_malloc( sizeof(double) * ldb_t * MAX(1,n) ); + if( b_t == NULL ) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_3; + } + w_t = (double*)LAPACKE_malloc( sizeof(double) * ldw_t * MAX(1,n) ); + if( w_t == NULL ) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_4; + } + s_t = (double*)LAPACKE_malloc( sizeof(double) * lds_t * MAX(1,n) ); + if( s_t == NULL ) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_5; + } + /* Transpose input matrices */ + LAPACKE_dge_trans( matrix_layout, m, n, x, ldx, x_t, ldx_t ); + LAPACKE_dge_trans( matrix_layout, m, n, y, ldy, y_t, ldy_t ); + LAPACKE_dge_trans( matrix_layout, m, n, z, ldz, z_t, ldz_t ); + LAPACKE_dge_trans( matrix_layout, m, n, b, ldb, b_t, ldb_t ); + LAPACKE_dge_trans( matrix_layout, m, n, w, ldw, w_t, ldw_t ); + LAPACKE_dge_trans( matrix_layout, m, n, s, lds, s_t, lds_t ); + /* Call LAPACK function and adjust info */ + LAPACK_dgedmd( &jobs, &jobz, &jobf, &whtsvd, &m, &n, x_t, &ldx_t, y_t, + &ldy_t, &k, reig, imeig, z_t, &ldz_t, res, b_t, &ldb_t, + w_t, &ldw_t, s_t, &lds_t, work, &lwork, iwork, &liwork, &info ); + if( info < 0 ) { + info = info - 1; + } + /* Transpose output matrices */ + LAPACKE_dge_trans( LAPACK_COL_MAJOR, m, n, x_t, ldx_t, x, ldx ); + LAPACKE_dge_trans( LAPACK_COL_MAJOR, m, n, y_t, ldy_t, y, ldy ); + LAPACKE_dge_trans( LAPACK_COL_MAJOR, m, n, z_t, ldz_t, z, ldz ); + LAPACKE_dge_trans( LAPACK_COL_MAJOR, m, n, b_t, ldb_t, b, ldb ); + LAPACKE_dge_trans( LAPACK_COL_MAJOR, m, n, w_t, ldw_t, w, ldw ); + LAPACKE_dge_trans( LAPACK_COL_MAJOR, m, n, s_t, lds_t, s, lds ); + /* Release memory and exit */ + LAPACKE_free( s_t ); +exit_level_5: + LAPACKE_free( w_t ); +exit_level_4: + LAPACKE_free( b_t ); +exit_level_3: + LAPACKE_free( z_t ); +exit_level_2: + LAPACKE_free( y_t ); +exit_level_1: + LAPACKE_free( x_t ); +exit_level_0: + if( info == LAPACK_TRANSPOSE_MEMORY_ERROR ) { + LAPACKE_xerbla( "LAPACKE_dgedmd_work", info ); + } + } else { + info = -1; + LAPACKE_xerbla( "LAPACKE_dgedmd_work", info ); + } + return info; +} diff --git a/lapack-netlib/LAPACKE/src/lapacke_dgedmdq.c b/lapack-netlib/LAPACKE/src/lapacke_dgedmdq.c new file mode 100644 index 000000000..f3d621ba9 --- /dev/null +++ b/lapack-netlib/LAPACKE/src/lapacke_dgedmdq.c @@ -0,0 +1,119 @@ +/***************************************************************************** + Copyright (c) 2014, Intel Corp. + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + THE POSSIBILITY OF SUCH DAMAGE. +***************************************************************************** +* Contents: Native high-level C interface to LAPACK function dgedmdq +* Author: Intel Corporation +*****************************************************************************/ + +#include "lapacke_utils.h" + +lapack_int LAPACKE_dgedmdq( int matrix_layout, char jobs, char jobz, char jobr, + char jobq, char jobt, char jobf, lapack_int whtsvd, + lapack_int m, lapack_int n, double* f, lapack_int ldf, + double* x, lapack_int ldx, double* y, lapack_int ldy, + lapack_int nrnk, double tol, lapack_int k, + double* reig, double* imeig, double* z, + lapack_int ldz, double* res, double* b, lapack_int ldb, + double* v, lapack_int ldv, double* s, lapack_int lds) +{ + lapack_int info = 0; + lapack_int lwork = -1; + lapack_int liwork = -1; + double* work = NULL; + lapack_int* iwork = NULL; + double work_query; + lapack_int iwork_query; + if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) { + LAPACKE_xerbla( "LAPACKE_dgedmdq", -1 ); + return -1; + } +#ifndef LAPACK_DISABLE_NAN_CHECK + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dge_nancheck( matrix_layout, m, n, f, ldf ) ) { + return -11; + } + if( LAPACKE_dge_nancheck( matrix_layout, m, n, x, ldx ) ) { + return -13; + } + if( LAPACKE_dge_nancheck( matrix_layout, m, n, y, ldy ) ) { + return -15; + } + if( LAPACKE_dge_nancheck( matrix_layout, m, n, z, ldz ) ) { + return -22; + } + if( LAPACKE_dge_nancheck( matrix_layout, m, n, b, ldb ) ) { + return -25; + } + if( LAPACKE_dge_nancheck( matrix_layout, m, n, v, ldv ) ) { + return -27; + } + if( LAPACKE_dge_nancheck( matrix_layout, m, n, s, lds ) ) { + return -29; + } + } +#endif + /* Query optimal working array(s) size */ + info = LAPACKE_dgedmdq_work( matrix_layout, jobs, jobz, jobr, jobq, jobt, + jobf, whtsvd, m, n, f, ldf, x, ldx, y, ldy, + nrnk, tol, k, reig, imeig, z, ldz, res, + b, ldb, v, ldv, s, lds, &work_query, lwork, + &iwork_query, liwork ); + + if( info != 0 ) { + goto exit_level_0; + } + lwork = (lapack_int) work_query; + liwork = iwork_query; + /* Allocate memory for work arrays */ + work = (double*)LAPACKE_malloc( sizeof(double) * lwork ); + if( work == NULL ) { + info = LAPACK_WORK_MEMORY_ERROR; + goto exit_level_0; + } + iwork = (lapack_int*)LAPACKE_malloc( sizeof(lapack_int) * liwork ); + if( iwork == NULL ) { + info = LAPACK_WORK_MEMORY_ERROR; + goto exit_level_1; + } + /* Call middle-level interface */ + info = LAPACKE_dgedmdq_work( matrix_layout, jobs, jobz, jobr, jobq, jobt, + jobf, whtsvd, m, n, f, ldf, x, ldx, y, ldy, + nrnk, tol, k, reig, imeig, z, ldz, res, + b, ldb, v, ldv, s, lds, work, lwork, iwork, + liwork ); + /* Release memory and exit */ + LAPACKE_free( iwork ); +exit_level_1: + LAPACKE_free( work ); +exit_level_0: + if( info == LAPACK_WORK_MEMORY_ERROR ) { + LAPACKE_xerbla( "LAPACKE_dgedmdq", info ); + } + return info; +} diff --git a/lapack-netlib/LAPACKE/src/lapacke_dgedmdq_work.c b/lapack-netlib/LAPACKE/src/lapacke_dgedmdq_work.c new file mode 100644 index 000000000..51b2a66d8 --- /dev/null +++ b/lapack-netlib/LAPACKE/src/lapacke_dgedmdq_work.c @@ -0,0 +1,200 @@ +/***************************************************************************** + Copyright (c) 2014, Intel Corp. + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + THE POSSIBILITY OF SUCH DAMAGE. +***************************************************************************** +* Contents: Native middle-level C interface to LAPACK function dgedmdq +* Author: Intel Corporation +*****************************************************************************/ + +#include "lapacke_utils.h" + +lapack_int LAPACKE_dgedmdq_work( int matrix_layout, char jobs, char jobz, + char jobr, char jobq, char jobt, char jobf, + lapack_int whtsvd, lapack_int m, lapack_int n, + double* f, lapack_int ldf, double* x, + lapack_int ldx, double* y, lapack_int ldy, + lapack_int nrnk, double tol, lapack_int k, + double* reig, double* imeig, double* z, + lapack_int ldz, double* res, double* b, + lapack_int ldb, double* v, lapack_int ldv, + double* s, lapack_int lds, double* work, + lapack_int lwork, lapack_int* iwork, + lapack_int liwork ) +{ + lapack_int info = 0; + if( matrix_layout == LAPACK_COL_MAJOR ) { + /* Call LAPACK function and adjust info */ + LAPACK_dgedmdq( &jobs, &jobz, &jobr, &jobq, &jobt, &jobf, &whtsvd, &m, + &n, f, &ldf, x, &ldx, y, &ldy, &nrnk, &tol, &k, reig, + imeig, z, &ldz, res, b, &ldb, v, &ldv, s, &lds, + work, &lwork, iwork, &liwork, &info ); + if( info < 0 ) { + info = info - 1; + } + } else if( matrix_layout == LAPACK_ROW_MAJOR ) { + lapack_int ldf_t = MAX(1,m); + lapack_int ldx_t = MAX(1,m); + lapack_int ldy_t = MAX(1,m); + lapack_int ldz_t = MAX(1,m); + lapack_int ldb_t = MAX(1,m); + lapack_int ldv_t = MAX(1,m); + lapack_int lds_t = MAX(1,m); + double* f_t = NULL; + double* x_t = NULL; + double* y_t = NULL; + double* z_t = NULL; + double* b_t = NULL; + double* v_t = NULL; + double* s_t = NULL; + /* Check leading dimension(s) */ + if( ldf < n ) { + info = -12; + LAPACKE_xerbla( "LAPACKE_dgedmdq_work", info ); + return info; + } + if( ldx < n ) { + info = -14; + LAPACKE_xerbla( "LAPACKE_dgedmdq_work", info ); + return info; + } + if( ldy < n ) { + info = -16; + LAPACKE_xerbla( "LAPACKE_dgedmdq_work", info ); + return info; + } + if( ldz < n ) { + info = -23; + LAPACKE_xerbla( "LAPACKE_dgedmdq_work", info ); + return info; + } + if( ldb < n ) { + info = -26; + LAPACKE_xerbla( "LAPACKE_dgedmdq_work", info ); + return info; + } + if( ldv < n ) { + info = -28; + LAPACKE_xerbla( "LAPACKE_dgedmdq_work", info ); + return info; + } + if( lds < n ) { + info = -30; + LAPACKE_xerbla( "LAPACKE_dgedmdq_work", info ); + return info; + } + /* Query optimal working array(s) size if requested */ + if( lwork == -1 || liwork == -1 ) { + LAPACK_dgedmdq( &jobs, &jobz, &jobr, &jobq, &jobt, &jobf, &whtsvd, &m, + &n, f, &ldf, x, &ldx, y, &ldy, &nrnk, &tol, &k, reig, + imeig, z, &ldz, res, b, &ldb, v, &ldv, s, &lds, + work, &lwork, iwork, &liwork, &info ); + return (info < 0) ? (info - 1) : info; + } + /* Allocate memory for temporary array(s) */ + f_t = (double*)LAPACKE_malloc( sizeof(double) * ldf_t * MAX(1,n) ); + if( f_t == NULL ) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_0; + } + x_t = (double*)LAPACKE_malloc( sizeof(double) * ldx_t * MAX(1,n) ); + if( x_t == NULL ) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_1; + } + y_t = (double*)LAPACKE_malloc( sizeof(double) * ldy_t * MAX(1,n) ); + if( y_t == NULL ) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_2; + } + z_t = (double*)LAPACKE_malloc( sizeof(double) * ldz_t * MAX(1,n) ); + if( z_t == NULL ) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_3; + } + b_t = (double*)LAPACKE_malloc( sizeof(double) * ldb_t * MAX(1,n) ); + if( b_t == NULL ) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_4; + } + v_t = (double*)LAPACKE_malloc( sizeof(double) * ldv_t * MAX(1,n) ); + if( v_t == NULL ) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_5; + } + s_t = (double*)LAPACKE_malloc( sizeof(double) * lds_t * MAX(1,n) ); + if( s_t == NULL ) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_6; + } + /* Transpose input matrices */ + LAPACKE_dge_trans( matrix_layout, m, n, f, ldf, f_t, ldf_t ); + LAPACKE_dge_trans( matrix_layout, m, n, x, ldx, x_t, ldx_t ); + LAPACKE_dge_trans( matrix_layout, m, n, y, ldy, y_t, ldy_t ); + LAPACKE_dge_trans( matrix_layout, m, n, z, ldz, z_t, ldz_t ); + LAPACKE_dge_trans( matrix_layout, m, n, b, ldb, b_t, ldb_t ); + LAPACKE_dge_trans( matrix_layout, m, n, v, ldv, v_t, ldv_t ); + LAPACKE_dge_trans( matrix_layout, m, n, s, lds, s_t, lds_t ); + /* Call LAPACK function and adjust info */ + LAPACK_dgedmdq( &jobs, &jobz, &jobr, &jobq, &jobt, &jobf, &whtsvd, &m, + &n, f, &ldf, x, &ldx, y, &ldy, &nrnk, &tol, &k, reig, + imeig, z, &ldz, res, b, &ldb, v, &ldv, s, &lds, + work, &lwork, iwork, &liwork, &info ); + if( info < 0 ) { + info = info - 1; + } + /* Transpose output matrices */ + LAPACKE_dge_trans( LAPACK_COL_MAJOR, m, n, f_t, ldf_t, f, ldf ); + LAPACKE_dge_trans( LAPACK_COL_MAJOR, m, n, x_t, ldx_t, x, ldx ); + LAPACKE_dge_trans( LAPACK_COL_MAJOR, m, n, y_t, ldy_t, y, ldy ); + LAPACKE_dge_trans( LAPACK_COL_MAJOR, m, n, z_t, ldz_t, z, ldz ); + LAPACKE_dge_trans( LAPACK_COL_MAJOR, m, n, b_t, ldb_t, b, ldb ); + LAPACKE_dge_trans( LAPACK_COL_MAJOR, m, n, v_t, ldv_t, v, ldv ); + LAPACKE_dge_trans( LAPACK_COL_MAJOR, m, n, s_t, lds_t, s, lds ); + /* Release memory and exit */ + LAPACKE_free( s_t ); +exit_level_6: + LAPACKE_free( v_t ); +exit_level_5: + LAPACKE_free( b_t ); +exit_level_4: + LAPACKE_free( z_t ); +exit_level_3: + LAPACKE_free( y_t ); +exit_level_2: + LAPACKE_free( x_t ); +exit_level_1: + LAPACKE_free( f_t ); +exit_level_0: + if( info == LAPACK_TRANSPOSE_MEMORY_ERROR ) { + LAPACKE_xerbla( "LAPACKE_dgedmdq_work", info ); + } + } else { + info = -1; + LAPACKE_xerbla( "LAPACKE_dgedmdq_work", info ); + } + return info; +} diff --git a/lapack-netlib/LAPACKE/src/lapacke_sgedmd.c b/lapack-netlib/LAPACKE/src/lapacke_sgedmd.c new file mode 100644 index 000000000..879631b1d --- /dev/null +++ b/lapack-netlib/LAPACKE/src/lapacke_sgedmd.c @@ -0,0 +1,112 @@ +/***************************************************************************** + Copyright (c) 2014, Intel Corp. + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + THE POSSIBILITY OF SUCH DAMAGE. +***************************************************************************** +* Contents: Native high-level C interface to LAPACK function sgedmd +* Author: Intel Corporation +*****************************************************************************/ + +#include "lapacke_utils.h" + +lapack_int LAPACKE_sgedmd( int matrix_layout, char jobs, char jobz, char jobf, + lapack_int whtsvd, lapack_int m, lapack_int n, + float* x, lapack_int ldx, float* y, lapack_int ldy, + lapack_int k, float* reig, float* imeig, float* z, + lapack_int ldz, float* res, float* b, lapack_int ldb, + float* w, lapack_int ldw, float* s, lapack_int lds) +{ + lapack_int info = 0; + lapack_int lwork = -1; + lapack_int liwork = -1; + float* work = NULL; + lapack_int* iwork = NULL; + float work_query; + lapack_int iwork_query; + if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) { + LAPACKE_xerbla( "LAPACKE_sgedmd", -1 ); + return -1; + } +#ifndef LAPACK_DISABLE_NAN_CHECK + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_sge_nancheck( matrix_layout, m, n, x, ldx ) ) { + return -8; + } + if( LAPACKE_sge_nancheck( matrix_layout, m, n, y, ldy ) ) { + return -10; + } + if( LAPACKE_sge_nancheck( matrix_layout, m, n, z, ldz ) ) { + return -15; + } + if( LAPACKE_sge_nancheck( matrix_layout, m, n, b, ldb ) ) { + return -18; + } + if( LAPACKE_sge_nancheck( matrix_layout, m, n, s, lds ) ) { + return -20; + } + if( LAPACKE_sge_nancheck( matrix_layout, m, n, w, ldw ) ) { + return -22; + } + } +#endif + /* Query optimal working array(s) size */ + info = LAPACKE_sgedmd_work( matrix_layout, jobs, jobz, jobf, whtsvd, m, n, + x, ldx, y, ldy, k, reig, imeig, z, ldz, res, + b, ldb, w, ldw, s, lds, &work_query, lwork, + &iwork_query, liwork ); + + if( info != 0 ) { + goto exit_level_0; + } + lwork = (lapack_int) work_query; + liwork = iwork_query; + /* Allocate memory for work arrays */ + work = (float*)LAPACKE_malloc( sizeof(float) * lwork ); + if( work == NULL ) { + info = LAPACK_WORK_MEMORY_ERROR; + goto exit_level_0; + } + iwork = (lapack_int*)LAPACKE_malloc( sizeof(lapack_int) * liwork ); + if( iwork == NULL ) { + info = LAPACK_WORK_MEMORY_ERROR; + goto exit_level_1; + } + /* Call middle-level interface */ + info = LAPACKE_sgedmd_work( matrix_layout, jobs, jobz, jobf, whtsvd, m, n, + x, ldx, y, ldy, k, reig, imeig, z, ldz, res, + b, ldb, w, ldw, s, lds, work, lwork, iwork, + liwork ); + /* Release memory and exit */ + LAPACKE_free( iwork ); +exit_level_1: + LAPACKE_free( work ); +exit_level_0: + if( info == LAPACK_WORK_MEMORY_ERROR ) { + LAPACKE_xerbla( "LAPACKE_sgedmd", info ); + } + return info; +} diff --git a/lapack-netlib/LAPACKE/src/lapacke_sgedmd_work.c b/lapack-netlib/LAPACKE/src/lapacke_sgedmd_work.c new file mode 100644 index 000000000..762a9b271 --- /dev/null +++ b/lapack-netlib/LAPACKE/src/lapacke_sgedmd_work.c @@ -0,0 +1,179 @@ +/***************************************************************************** + Copyright (c) 2014, Intel Corp. + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + THE POSSIBILITY OF SUCH DAMAGE. +***************************************************************************** +* Contents: Native middle-level C interface to LAPACK function sgedmd +* Author: Intel Corporation +*****************************************************************************/ + +#include "lapacke_utils.h" + +lapack_int LAPACKE_sgedmd_work( int matrix_layout, char jobs, char jobz, + char jobf, lapack_int whtsvd, lapack_int m, + lapack_int n, float* x, lapack_int ldx, + float* y, lapack_int ldy, lapack_int k, + float* reig, float* imeig, float* z, + lapack_int ldz, float* res, float* b, + lapack_int ldb, float* w, lapack_int ldw, + float* s, lapack_int lds, float* work, + lapack_int lwork, lapack_int* iwork, + lapack_int liwork ) +{ + lapack_int info = 0; + if( matrix_layout == LAPACK_COL_MAJOR ) { + /* Call LAPACK function and adjust info */ + LAPACK_sgedmd( &jobs, &jobz, &jobf, &whtsvd, &m, &n, x, &ldx, y, &ldy, + &k, reig, imeig, z, &ldz, res, b, &ldb, w, &ldw, s, &lds, + work, &lwork, iwork, &liwork, &info ); + if( info < 0 ) { + info = info - 1; + } + } else if( matrix_layout == LAPACK_ROW_MAJOR ) { + lapack_int ldx_t = MAX(1,m); + lapack_int ldy_t = MAX(1,m); + lapack_int ldz_t = MAX(1,m); + lapack_int ldb_t = MAX(1,m); + lapack_int ldw_t = MAX(1,m); + lapack_int lds_t = MAX(1,m); + float* x_t = NULL; + float* y_t = NULL; + float* z_t = NULL; + float* b_t = NULL; + float* w_t = NULL; + float* s_t = NULL; + /* Check leading dimension(s) */ + if( ldx < n ) { + info = -9; + LAPACKE_xerbla( "LAPACKE_sgedmd_work", info ); + return info; + } + if( ldy < n ) { + info = -11; + LAPACKE_xerbla( "LAPACKE_sgedmd_work", info ); + return info; + } + if( ldz < n ) { + info = -16; + LAPACKE_xerbla( "LAPACKE_sgedmd_work", info ); + return info; + } + if( ldb < n ) { + info = -19; + LAPACKE_xerbla( "LAPACKE_sgedmd_work", info ); + return info; + } + if( ldw < n ) { + info = -21; + LAPACKE_xerbla( "LAPACKE_sgedmd_work", info ); + return info; + } + if( lds < n ) { + info = -23; + LAPACKE_xerbla( "LAPACKE_sgedmd_work", info ); + return info; + } + /* Query optimal working array(s) size if requested */ + if( lwork == -1 ) { + LAPACK_sgedmd( &jobs, &jobz, &jobf, &whtsvd, &m, &n, x, &ldx, y, &ldy, + &k, reig, imeig, z, &ldz, res, b, &ldb, w, &ldw, s, &lds, + work, &lwork, iwork, &liwork, &info ); + return (info < 0) ? (info - 1) : info; + } + /* Allocate memory for temporary array(s) */ + x_t = (float*)LAPACKE_malloc( sizeof(float) * ldx_t * MAX(1,n) ); + if( x_t == NULL ) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_0; + } + y_t = (float*)LAPACKE_malloc( sizeof(float) * ldy_t * MAX(1,n) ); + if( y_t == NULL ) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_1; + } + z_t = (float*)LAPACKE_malloc( sizeof(float) * ldz_t * MAX(1,n) ); + if( z_t == NULL ) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_2; + } + b_t = (float*)LAPACKE_malloc( sizeof(float) * ldb_t * MAX(1,n) ); + if( b_t == NULL ) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_3; + } + w_t = (float*)LAPACKE_malloc( sizeof(float) * ldw_t * MAX(1,n) ); + if( w_t == NULL ) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_4; + } + s_t = (float*)LAPACKE_malloc( sizeof(float) * lds_t * MAX(1,n) ); + if( s_t == NULL ) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_5; + } + /* Transpose input matrices */ + LAPACKE_sge_trans( matrix_layout, m, n, x, ldx, x_t, ldx_t ); + LAPACKE_sge_trans( matrix_layout, m, n, y, ldy, y_t, ldy_t ); + LAPACKE_sge_trans( matrix_layout, m, n, z, ldz, z_t, ldz_t ); + LAPACKE_sge_trans( matrix_layout, m, n, b, ldb, b_t, ldb_t ); + LAPACKE_sge_trans( matrix_layout, m, n, w, ldw, w_t, ldw_t ); + LAPACKE_sge_trans( matrix_layout, m, n, s, lds, s_t, lds_t ); + /* Call LAPACK function and adjust info */ + LAPACK_sgedmd( &jobs, &jobz, &jobf, &whtsvd, &m, &n, x_t, &ldx_t, y_t, + &ldy_t, &k, reig, imeig, z_t, &ldz_t, res, b_t, &ldb_t, + w_t, &ldw_t, s_t, &lds_t, work, &lwork, iwork, &liwork, &info ); + if( info < 0 ) { + info = info - 1; + } + /* Transpose output matrices */ + LAPACKE_sge_trans( LAPACK_COL_MAJOR, m, n, x_t, ldx_t, x, ldx ); + LAPACKE_sge_trans( LAPACK_COL_MAJOR, m, n, y_t, ldy_t, y, ldy ); + LAPACKE_sge_trans( LAPACK_COL_MAJOR, m, n, z_t, ldz_t, z, ldz ); + LAPACKE_sge_trans( LAPACK_COL_MAJOR, m, n, b_t, ldb_t, b, ldb ); + LAPACKE_sge_trans( LAPACK_COL_MAJOR, m, n, w_t, ldw_t, w, ldw ); + LAPACKE_sge_trans( LAPACK_COL_MAJOR, m, n, s_t, lds_t, s, lds ); + /* Release memory and exit */ + LAPACKE_free( s_t ); +exit_level_5: + LAPACKE_free( w_t ); +exit_level_4: + LAPACKE_free( b_t ); +exit_level_3: + LAPACKE_free( z_t ); +exit_level_2: + LAPACKE_free( y_t ); +exit_level_1: + LAPACKE_free( x_t ); +exit_level_0: + if( info == LAPACK_TRANSPOSE_MEMORY_ERROR ) { + LAPACKE_xerbla( "LAPACKE_sgedmd_work", info ); + } + } else { + info = -1; + LAPACKE_xerbla( "LAPACKE_sgedmd_work", info ); + } + return info; +} diff --git a/lapack-netlib/LAPACKE/src/lapacke_sgedmdq.c b/lapack-netlib/LAPACKE/src/lapacke_sgedmdq.c new file mode 100644 index 000000000..e202d7fbd --- /dev/null +++ b/lapack-netlib/LAPACKE/src/lapacke_sgedmdq.c @@ -0,0 +1,119 @@ +/***************************************************************************** + Copyright (c) 2014, Intel Corp. + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + THE POSSIBILITY OF SUCH DAMAGE. +***************************************************************************** +* Contents: Native high-level C interface to LAPACK function sgedmdq +* Author: Intel Corporation +*****************************************************************************/ + +#include "lapacke_utils.h" + +lapack_int LAPACKE_sgedmdq( int matrix_layout, char jobs, char jobz, char jobr, + char jobq, char jobt, char jobf, lapack_int whtsvd, + lapack_int m, lapack_int n, float* f, lapack_int ldf, + float* x, lapack_int ldx, float* y, lapack_int ldy, + lapack_int nrnk, float tol, lapack_int k, + float* reig, float* imeig, float* z, + lapack_int ldz, float* res, float* b, lapack_int ldb, + float* v, lapack_int ldv, float* s, lapack_int lds) +{ + lapack_int info = 0; + lapack_int lwork = -1; + lapack_int liwork = -1; + float* work = NULL; + lapack_int* iwork = NULL; + float work_query; + lapack_int iwork_query; + if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) { + LAPACKE_xerbla( "LAPACKE_sgedmdq", -1 ); + return -1; + } +#ifndef LAPACK_DISABLE_NAN_CHECK + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_sge_nancheck( matrix_layout, m, n, f, ldf ) ) { + return -11; + } + if( LAPACKE_sge_nancheck( matrix_layout, m, n, x, ldx ) ) { + return -13; + } + if( LAPACKE_sge_nancheck( matrix_layout, m, n, y, ldy ) ) { + return -15; + } + if( LAPACKE_sge_nancheck( matrix_layout, m, n, z, ldz ) ) { + return -22; + } + if( LAPACKE_sge_nancheck( matrix_layout, m, n, b, ldb ) ) { + return -25; + } + if( LAPACKE_sge_nancheck( matrix_layout, m, n, v, ldv ) ) { + return -27; + } + if( LAPACKE_sge_nancheck( matrix_layout, m, n, s, lds ) ) { + return -29; + } + } +#endif + /* Query optimal working array(s) size */ + info = LAPACKE_sgedmdq_work( matrix_layout, jobs, jobz, jobr, jobq, jobt, + jobf, whtsvd, m, n, f, ldf, x, ldx, y, ldy, + nrnk, tol, k, reig, imeig, z, ldz, res, + b, ldb, v, ldv, s, lds, &work_query, lwork, + &iwork_query, liwork ); + + if( info != 0 ) { + goto exit_level_0; + } + lwork = (lapack_int) work_query; + liwork = iwork_query; + /* Allocate memory for work arrays */ + work = (float*)LAPACKE_malloc( sizeof(float) * lwork ); + if( work == NULL ) { + info = LAPACK_WORK_MEMORY_ERROR; + goto exit_level_0; + } + iwork = (lapack_int*)LAPACKE_malloc( sizeof(lapack_int) * liwork ); + if( iwork == NULL ) { + info = LAPACK_WORK_MEMORY_ERROR; + goto exit_level_1; + } + /* Call middle-level interface */ + info = LAPACKE_sgedmdq_work( matrix_layout, jobs, jobz, jobr, jobq, jobt, + jobf, whtsvd, m, n, f, ldf, x, ldx, y, ldy, + nrnk, tol, k, reig, imeig, z, ldz, res, + b, ldb, v, ldv, s, lds, work, lwork, iwork, + liwork ); + /* Release memory and exit */ + LAPACKE_free( iwork ); +exit_level_1: + LAPACKE_free( work ); +exit_level_0: + if( info == LAPACK_WORK_MEMORY_ERROR ) { + LAPACKE_xerbla( "LAPACKE_sgedmdq", info ); + } + return info; +} diff --git a/lapack-netlib/LAPACKE/src/lapacke_sgedmdq_work.c b/lapack-netlib/LAPACKE/src/lapacke_sgedmdq_work.c new file mode 100644 index 000000000..9039898d2 --- /dev/null +++ b/lapack-netlib/LAPACKE/src/lapacke_sgedmdq_work.c @@ -0,0 +1,200 @@ +/***************************************************************************** + Copyright (c) 2014, Intel Corp. + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + THE POSSIBILITY OF SUCH DAMAGE. +***************************************************************************** +* Contents: Native middle-level C interface to LAPACK function sgedmdq +* Author: Intel Corporation +*****************************************************************************/ + +#include "lapacke_utils.h" + +lapack_int LAPACKE_sgedmdq_work( int matrix_layout, char jobs, char jobz, + char jobr, char jobq, char jobt, char jobf, + lapack_int whtsvd, lapack_int m, lapack_int n, + float* f, lapack_int ldf, float* x, + lapack_int ldx, float* y, lapack_int ldy, + lapack_int nrnk, float tol, lapack_int k, + float* reig, float* imeig, float* z, + lapack_int ldz, float* res, float* b, + lapack_int ldb, float* v, lapack_int ldv, + float* s, lapack_int lds, float* work, + lapack_int lwork, lapack_int* iwork, + lapack_int liwork ) +{ + lapack_int info = 0; + if( matrix_layout == LAPACK_COL_MAJOR ) { + /* Call LAPACK function and adjust info */ + LAPACK_sgedmdq( &jobs, &jobz, &jobr, &jobq, &jobt, &jobf, &whtsvd, &m, + &n, f, &ldf, x, &ldx, y, &ldy, &nrnk, &tol, &k, reig, + imeig, z, &ldz, res, b, &ldb, v, &ldv, s, &lds, + work, &lwork, iwork, &liwork, &info ); + if( info < 0 ) { + info = info - 1; + } + } else if( matrix_layout == LAPACK_ROW_MAJOR ) { + lapack_int ldf_t = MAX(1,m); + lapack_int ldx_t = MAX(1,m); + lapack_int ldy_t = MAX(1,m); + lapack_int ldz_t = MAX(1,m); + lapack_int ldb_t = MAX(1,m); + lapack_int ldv_t = MAX(1,m); + lapack_int lds_t = MAX(1,m); + float* f_t = NULL; + float* x_t = NULL; + float* y_t = NULL; + float* z_t = NULL; + float* b_t = NULL; + float* v_t = NULL; + float* s_t = NULL; + /* Check leading dimension(s) */ + if( ldf < n ) { + info = -12; + LAPACKE_xerbla( "LAPACKE_sgedmdq_work", info ); + return info; + } + if( ldx < n ) { + info = -14; + LAPACKE_xerbla( "LAPACKE_sgedmdq_work", info ); + return info; + } + if( ldy < n ) { + info = -16; + LAPACKE_xerbla( "LAPACKE_sgedmdq_work", info ); + return info; + } + if( ldz < n ) { + info = -23; + LAPACKE_xerbla( "LAPACKE_sgedmdq_work", info ); + return info; + } + if( ldb < n ) { + info = -26; + LAPACKE_xerbla( "LAPACKE_sgedmdq_work", info ); + return info; + } + if( ldv < n ) { + info = -28; + LAPACKE_xerbla( "LAPACKE_sgedmdq_work", info ); + return info; + } + if( lds < n ) { + info = -30; + LAPACKE_xerbla( "LAPACKE_sgedmdq_work", info ); + return info; + } + /* Query optimal working array(s) size if requested */ + if( lwork == -1 || liwork == -1 ) { + LAPACK_sgedmdq( &jobs, &jobz, &jobr, &jobq, &jobt, &jobf, &whtsvd, &m, + &n, f, &ldf, x, &ldx, y, &ldy, &nrnk, &tol, &k, reig, + imeig, z, &ldz, res, b, &ldb, v, &ldv, s, &lds, + work, &lwork, iwork, &liwork, &info ); + return (info < 0) ? (info - 1) : info; + } + /* Allocate memory for temporary array(s) */ + f_t = (float*)LAPACKE_malloc( sizeof(float) * ldf_t * MAX(1,n) ); + if( f_t == NULL ) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_0; + } + x_t = (float*)LAPACKE_malloc( sizeof(float) * ldx_t * MAX(1,n) ); + if( x_t == NULL ) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_1; + } + y_t = (float*)LAPACKE_malloc( sizeof(float) * ldy_t * MAX(1,n) ); + if( y_t == NULL ) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_2; + } + z_t = (float*)LAPACKE_malloc( sizeof(float) * ldz_t * MAX(1,n) ); + if( z_t == NULL ) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_3; + } + b_t = (float*)LAPACKE_malloc( sizeof(float) * ldb_t * MAX(1,n) ); + if( b_t == NULL ) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_4; + } + v_t = (float*)LAPACKE_malloc( sizeof(float) * ldv_t * MAX(1,n) ); + if( v_t == NULL ) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_5; + } + s_t = (float*)LAPACKE_malloc( sizeof(float) * lds_t * MAX(1,n) ); + if( s_t == NULL ) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_6; + } + /* Transpose input matrices */ + LAPACKE_sge_trans( matrix_layout, m, n, f, ldf, f_t, ldf_t ); + LAPACKE_sge_trans( matrix_layout, m, n, x, ldx, x_t, ldx_t ); + LAPACKE_sge_trans( matrix_layout, m, n, y, ldy, y_t, ldy_t ); + LAPACKE_sge_trans( matrix_layout, m, n, z, ldz, z_t, ldz_t ); + LAPACKE_sge_trans( matrix_layout, m, n, b, ldb, b_t, ldb_t ); + LAPACKE_sge_trans( matrix_layout, m, n, v, ldv, v_t, ldv_t ); + LAPACKE_sge_trans( matrix_layout, m, n, s, lds, s_t, lds_t ); + /* Call LAPACK function and adjust info */ + LAPACK_sgedmdq( &jobs, &jobz, &jobr, &jobq, &jobt, &jobf, &whtsvd, &m, + &n, f, &ldf, x, &ldx, y, &ldy, &nrnk, &tol, &k, reig, + imeig, z, &ldz, res, b, &ldb, v, &ldv, s, &lds, + work, &lwork, iwork, &liwork, &info ); + if( info < 0 ) { + info = info - 1; + } + /* Transpose output matrices */ + LAPACKE_sge_trans( LAPACK_COL_MAJOR, m, n, f_t, ldf_t, f, ldf ); + LAPACKE_sge_trans( LAPACK_COL_MAJOR, m, n, x_t, ldx_t, x, ldx ); + LAPACKE_sge_trans( LAPACK_COL_MAJOR, m, n, y_t, ldy_t, y, ldy ); + LAPACKE_sge_trans( LAPACK_COL_MAJOR, m, n, z_t, ldz_t, z, ldz ); + LAPACKE_sge_trans( LAPACK_COL_MAJOR, m, n, b_t, ldb_t, b, ldb ); + LAPACKE_sge_trans( LAPACK_COL_MAJOR, m, n, v_t, ldv_t, v, ldv ); + LAPACKE_sge_trans( LAPACK_COL_MAJOR, m, n, s_t, lds_t, s, lds ); + /* Release memory and exit */ + LAPACKE_free( s_t ); +exit_level_6: + LAPACKE_free( v_t ); +exit_level_5: + LAPACKE_free( b_t ); +exit_level_4: + LAPACKE_free( z_t ); +exit_level_3: + LAPACKE_free( y_t ); +exit_level_2: + LAPACKE_free( x_t ); +exit_level_1: + LAPACKE_free( f_t ); +exit_level_0: + if( info == LAPACK_TRANSPOSE_MEMORY_ERROR ) { + LAPACKE_xerbla( "LAPACKE_sgedmdq_work", info ); + } + } else { + info = -1; + LAPACKE_xerbla( "LAPACKE_sgedmdq_work", info ); + } + return info; +} diff --git a/lapack-netlib/LAPACKE/src/lapacke_zgedmd.c b/lapack-netlib/LAPACKE/src/lapacke_zgedmd.c new file mode 100644 index 000000000..f3f421c54 --- /dev/null +++ b/lapack-netlib/LAPACKE/src/lapacke_zgedmd.c @@ -0,0 +1,116 @@ +/***************************************************************************** + Copyright (c) 2014, Intel Corp. + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + THE POSSIBILITY OF SUCH DAMAGE. +***************************************************************************** +* Contents: Native high-level C interface to LAPACK function zgedmd +* Author: Intel Corporation +*****************************************************************************/ + +#include "lapacke_utils.h" + +lapack_int LAPACKE_zgedmd( int matrix_layout, char jobs, char jobz, char jobf, + lapack_int whtsvd, lapack_int m, lapack_int n, + lapack_complex_double* x, lapack_int ldx, + lapack_complex_double* y, lapack_int ldy, + lapack_int k, lapack_complex_double* reig, + lapack_complex_double* imeig, lapack_complex_double* z, + lapack_int ldz, lapack_complex_double* res, + lapack_complex_double* b, lapack_int ldb, + lapack_complex_double* w, lapack_int ldw, + lapack_complex_double* s, lapack_int lds) +{ + lapack_int info = 0; + lapack_int lwork = -1; + lapack_int liwork = -1; + lapack_complex_double* work = NULL; + lapack_int* iwork = NULL; + lapack_complex_double work_query; + lapack_int iwork_query; + if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) { + LAPACKE_xerbla( "LAPACKE_zgedmd", -1 ); + return -1; + } +#ifndef LAPACK_DISABLE_NAN_CHECK + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zge_nancheck( matrix_layout, m, n, x, ldx ) ) { + return -8; + } + if( LAPACKE_zge_nancheck( matrix_layout, m, n, y, ldy ) ) { + return -10; + } + if( LAPACKE_zge_nancheck( matrix_layout, m, n, z, ldz ) ) { + return -15; + } + if( LAPACKE_zge_nancheck( matrix_layout, m, n, b, ldb ) ) { + return -18; + } + if( LAPACKE_zge_nancheck( matrix_layout, m, n, s, lds ) ) { + return -20; + } + if( LAPACKE_zge_nancheck( matrix_layout, m, n, w, ldw ) ) { + return -22; + } + } +#endif + /* Query optimal working array(s) size */ + info = LAPACKE_zgedmd_work( matrix_layout, jobs, jobz, jobf, whtsvd, m, n, + x, ldx, y, ldy, k, reig, imeig, z, ldz, res, + b, ldb, w, ldw, s, lds, &work_query, lwork, + &iwork_query, liwork ); + + if( info != 0 ) { + goto exit_level_0; + } + lwork = LAPACK_Z2INT( work_query ); + liwork = iwork_query; + /* Allocate memory for work arrays */ + work = (lapack_complex_double*)LAPACKE_malloc( sizeof(lapack_complex_double) * lwork ); + if( work == NULL ) { + info = LAPACK_WORK_MEMORY_ERROR; + goto exit_level_0; + } + iwork = (lapack_int*)LAPACKE_malloc( sizeof(lapack_int) * liwork ); + if( iwork == NULL ) { + info = LAPACK_WORK_MEMORY_ERROR; + goto exit_level_1; + } + /* Call middle-level interface */ + info = LAPACKE_zgedmd_work( matrix_layout, jobs, jobz, jobf, whtsvd, m, n, + x, ldx, y, ldy, k, reig, imeig, z, ldz, res, + b, ldb, w, ldw, s, lds, work, lwork, iwork, + liwork ); + /* Release memory and exit */ + LAPACKE_free( iwork ); +exit_level_1: + LAPACKE_free( work ); +exit_level_0: + if( info == LAPACK_WORK_MEMORY_ERROR ) { + LAPACKE_xerbla( "LAPACKE_zgedmd", info ); + } + return info; +} diff --git a/lapack-netlib/LAPACKE/src/lapacke_zgedmd_work.c b/lapack-netlib/LAPACKE/src/lapacke_zgedmd_work.c new file mode 100644 index 000000000..2554411ec --- /dev/null +++ b/lapack-netlib/LAPACKE/src/lapacke_zgedmd_work.c @@ -0,0 +1,182 @@ +/***************************************************************************** + Copyright (c) 2014, Intel Corp. + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + THE POSSIBILITY OF SUCH DAMAGE. +***************************************************************************** +* Contents: Native middle-level C interface to LAPACK function zgedmd +* Author: Intel Corporation +*****************************************************************************/ + +#include "lapacke_utils.h" + +lapack_int LAPACKE_zgedmd_work( int matrix_layout, char jobs, char jobz, + char jobf, lapack_int whtsvd, lapack_int m, + lapack_int n, lapack_complex_double* x, + lapack_int ldx, lapack_complex_double* y, + lapack_int ldy, lapack_int k, + lapack_complex_double* reig, + lapack_complex_double* imeig, lapack_complex_double* z, + lapack_int ldz, lapack_complex_double* res, + lapack_complex_double* b, lapack_int ldb, + lapack_complex_double* w, lapack_int ldw, + lapack_complex_double* s, lapack_int lds, + lapack_complex_double* work, lapack_int lwork, + lapack_int* iwork, lapack_int liwork ) +{ + lapack_int info = 0; + if( matrix_layout == LAPACK_COL_MAJOR ) { + /* Call LAPACK function and adjust info */ + LAPACK_zgedmd( &jobs, &jobz, &jobf, &whtsvd, &m, &n, x, &ldx, y, &ldy, + &k, reig, imeig, z, &ldz, res, b, &ldb, w, &ldw, s, &lds, + work, &lwork, iwork, &liwork, &info ); + if( info < 0 ) { + info = info - 1; + } + } else if( matrix_layout == LAPACK_ROW_MAJOR ) { + lapack_int ldx_t = MAX(1,m); + lapack_int ldy_t = MAX(1,m); + lapack_int ldz_t = MAX(1,m); + lapack_int ldb_t = MAX(1,m); + lapack_int ldw_t = MAX(1,m); + lapack_int lds_t = MAX(1,m); + lapack_complex_double* x_t = NULL; + lapack_complex_double* y_t = NULL; + lapack_complex_double* z_t = NULL; + lapack_complex_double* b_t = NULL; + lapack_complex_double* w_t = NULL; + lapack_complex_double* s_t = NULL; + /* Check leading dimension(s) */ + if( ldx < n ) { + info = -9; + LAPACKE_xerbla( "LAPACKE_zgedmd_work", info ); + return info; + } + if( ldy < n ) { + info = -11; + LAPACKE_xerbla( "LAPACKE_zgedmd_work", info ); + return info; + } + if( ldz < n ) { + info = -16; + LAPACKE_xerbla( "LAPACKE_zgedmd_work", info ); + return info; + } + if( ldb < n ) { + info = -19; + LAPACKE_xerbla( "LAPACKE_zgedmd_work", info ); + return info; + } + if( ldw < n ) { + info = -21; + LAPACKE_xerbla( "LAPACKE_zgedmd_work", info ); + return info; + } + if( lds < n ) { + info = -23; + LAPACKE_xerbla( "LAPACKE_zgedmd_work", info ); + return info; + } + /* Query optimal working array(s) size if requested */ + if( lwork == -1 ) { + LAPACK_zgedmd( &jobs, &jobz, &jobf, &whtsvd, &m, &n, x, &ldx, y, &ldy, + &k, reig, imeig, z, &ldz, res, b, &ldb, w, &ldw, s, &lds, + work, &lwork, iwork, &liwork, &info ); + return (info < 0) ? (info - 1) : info; + } + /* Allocate memory for temporary array(s) */ + x_t = (lapack_complex_double*)LAPACKE_malloc( sizeof(lapack_complex_double) * ldx_t * MAX(1,n) ); + if( x_t == NULL ) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_0; + } + y_t = (lapack_complex_double*)LAPACKE_malloc( sizeof(lapack_complex_double) * ldy_t * MAX(1,n) ); + if( y_t == NULL ) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_1; + } + z_t = (lapack_complex_double*)LAPACKE_malloc( sizeof(lapack_complex_double) * ldz_t * MAX(1,n) ); + if( z_t == NULL ) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_2; + } + b_t = (lapack_complex_double*)LAPACKE_malloc( sizeof(lapack_complex_double) * ldb_t * MAX(1,n) ); + if( b_t == NULL ) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_3; + } + w_t = (lapack_complex_double*)LAPACKE_malloc( sizeof(lapack_complex_double) * ldw_t * MAX(1,n) ); + if( w_t == NULL ) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_4; + } + s_t = (lapack_complex_double*)LAPACKE_malloc( sizeof(lapack_complex_double) * lds_t * MAX(1,n) ); + if( s_t == NULL ) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_5; + } + /* Transpose input matrices */ + LAPACKE_zge_trans( matrix_layout, m, n, x, ldx, x_t, ldx_t ); + LAPACKE_zge_trans( matrix_layout, m, n, y, ldy, y_t, ldy_t ); + LAPACKE_zge_trans( matrix_layout, m, n, z, ldz, z_t, ldz_t ); + LAPACKE_zge_trans( matrix_layout, m, n, b, ldb, b_t, ldb_t ); + LAPACKE_zge_trans( matrix_layout, m, n, w, ldw, w_t, ldw_t ); + LAPACKE_zge_trans( matrix_layout, m, n, s, lds, s_t, lds_t ); + /* Call LAPACK function and adjust info */ + LAPACK_zgedmd( &jobs, &jobz, &jobf, &whtsvd, &m, &n, x_t, &ldx_t, y_t, + &ldy_t, &k, reig, imeig, z_t, &ldz_t, res, b_t, &ldb_t, + w_t, &ldw_t, s_t, &lds_t, work, &lwork, iwork, &liwork, &info ); + if( info < 0 ) { + info = info - 1; + } + /* Transpose output matrices */ + LAPACKE_zge_trans( LAPACK_COL_MAJOR, m, n, x_t, ldx_t, x, ldx ); + LAPACKE_zge_trans( LAPACK_COL_MAJOR, m, n, y_t, ldy_t, y, ldy ); + LAPACKE_zge_trans( LAPACK_COL_MAJOR, m, n, z_t, ldz_t, z, ldz ); + LAPACKE_zge_trans( LAPACK_COL_MAJOR, m, n, b_t, ldb_t, b, ldb ); + LAPACKE_zge_trans( LAPACK_COL_MAJOR, m, n, w_t, ldw_t, w, ldw ); + LAPACKE_zge_trans( LAPACK_COL_MAJOR, m, n, s_t, lds_t, s, lds ); + /* Release memory and exit */ + LAPACKE_free( s_t ); +exit_level_5: + LAPACKE_free( w_t ); +exit_level_4: + LAPACKE_free( b_t ); +exit_level_3: + LAPACKE_free( z_t ); +exit_level_2: + LAPACKE_free( y_t ); +exit_level_1: + LAPACKE_free( x_t ); +exit_level_0: + if( info == LAPACK_TRANSPOSE_MEMORY_ERROR ) { + LAPACKE_xerbla( "LAPACKE_zgedmd_work", info ); + } + } else { + info = -1; + LAPACKE_xerbla( "LAPACKE_zgedmd_work", info ); + } + return info; +} diff --git a/lapack-netlib/LAPACKE/src/lapacke_zgedmdq.c b/lapack-netlib/LAPACKE/src/lapacke_zgedmdq.c new file mode 100644 index 000000000..3648ffdf2 --- /dev/null +++ b/lapack-netlib/LAPACKE/src/lapacke_zgedmdq.c @@ -0,0 +1,123 @@ +/***************************************************************************** + Copyright (c) 2014, Intel Corp. + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + THE POSSIBILITY OF SUCH DAMAGE. +***************************************************************************** +* Contents: Native high-level C interface to LAPACK function zgedmdq +* Author: Intel Corporation +*****************************************************************************/ + +#include "lapacke_utils.h" + +lapack_int LAPACKE_zgedmdq( int matrix_layout, char jobs, char jobz, char jobr, + char jobq, char jobt, char jobf, lapack_int whtsvd, + lapack_int m, lapack_int n, lapack_complex_double* f, + lapack_int ldf, lapack_complex_double* x, + lapack_int ldx, lapack_complex_double* y, + lapack_int ldy, lapack_int nrnk, double tol, + lapack_int k, lapack_complex_double* reig, + lapack_complex_double* imeig, + lapack_complex_double* z, lapack_int ldz, + lapack_complex_double* res, lapack_complex_double* b, + lapack_int ldb, lapack_complex_double* v, + lapack_int ldv, lapack_complex_double* s, lapack_int lds) +{ + lapack_int info = 0; + lapack_int lwork = -1; + lapack_int liwork = -1; + lapack_complex_double* work = NULL; + lapack_int* iwork = NULL; + lapack_complex_double work_query; + lapack_int iwork_query; + if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) { + LAPACKE_xerbla( "LAPACKE_cgedmdq", -1 ); + return -1; + } +#ifndef LAPACK_DISABLE_NAN_CHECK + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zge_nancheck( matrix_layout, m, n, f, ldf ) ) { + return -11; + } + if( LAPACKE_zge_nancheck( matrix_layout, m, n, x, ldx ) ) { + return -13; + } + if( LAPACKE_zge_nancheck( matrix_layout, m, n, y, ldy ) ) { + return -15; + } + if( LAPACKE_zge_nancheck( matrix_layout, m, n, z, ldz ) ) { + return -22; + } + if( LAPACKE_zge_nancheck( matrix_layout, m, n, b, ldb ) ) { + return -25; + } + if( LAPACKE_zge_nancheck( matrix_layout, m, n, v, ldv ) ) { + return -27; + } + if( LAPACKE_zge_nancheck( matrix_layout, m, n, s, lds ) ) { + return -29; + } + } +#endif + /* Query optimal working array(s) size */ + info = LAPACKE_zgedmdq_work( matrix_layout, jobs, jobz, jobr, jobq, jobt, + jobf, whtsvd, m, n, f, ldf, x, ldx, y, ldy, + nrnk, tol, k, reig, imeig, z, ldz, res, + b, ldb, v, ldv, s, lds, &work_query, lwork, + &iwork_query, liwork ); + + if( info != 0 ) { + goto exit_level_0; + } + lwork = LAPACK_Z2INT( work_query ); + liwork = iwork_query; + /* Allocate memory for work arrays */ + work = (lapack_complex_double*)LAPACKE_malloc( sizeof(lapack_complex_double) * lwork ); + if( work == NULL ) { + info = LAPACK_WORK_MEMORY_ERROR; + goto exit_level_0; + } + iwork = (lapack_int*)LAPACKE_malloc( sizeof(lapack_int) * liwork ); + if( iwork == NULL ) { + info = LAPACK_WORK_MEMORY_ERROR; + goto exit_level_1; + } + /* Call middle-level interface */ + info = LAPACKE_zgedmdq_work( matrix_layout, jobs, jobz, jobr, jobq, jobt, + jobf, whtsvd, m, n, f, ldf, x, ldx, y, ldy, + nrnk, tol, k, reig, imeig, z, ldz, res, + b, ldb, v, ldv, s, lds, work, lwork, iwork, + liwork ); + /* Release memory and exit */ + LAPACKE_free( iwork ); +exit_level_1: + LAPACKE_free( work ); +exit_level_0: + if( info == LAPACK_WORK_MEMORY_ERROR ) { + LAPACKE_xerbla( "LAPACKE_zgedmdq", info ); + } + return info; +} diff --git a/lapack-netlib/LAPACKE/src/lapacke_zgedmdq_work.c b/lapack-netlib/LAPACKE/src/lapacke_zgedmdq_work.c new file mode 100644 index 000000000..9afceba07 --- /dev/null +++ b/lapack-netlib/LAPACKE/src/lapacke_zgedmdq_work.c @@ -0,0 +1,205 @@ +/***************************************************************************** + Copyright (c) 2014, Intel Corp. + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + THE POSSIBILITY OF SUCH DAMAGE. +***************************************************************************** +* Contents: Native middle-level C interface to LAPACK function zgedmdq +* Author: Intel Corporation +*****************************************************************************/ + +#include "lapacke_utils.h" + +lapack_int LAPACKE_zgedmdq_work( int matrix_layout, char jobs, char jobz, + char jobr, char jobq, char jobt, char jobf, + lapack_int whtsvd, lapack_int m, lapack_int n, + lapack_complex_double* f, lapack_int ldf, + lapack_complex_double* x, lapack_int ldx, + lapack_complex_double* y, lapack_int ldy, + lapack_int nrnk, double tol, lapack_int k, + lapack_complex_double* reig, + lapack_complex_double* imeig, + lapack_complex_double* z, + lapack_int ldz, lapack_complex_double* res, + lapack_complex_double* b, + lapack_int ldb, lapack_complex_double* v, + lapack_int ldv, lapack_complex_double* s, + lapack_int lds, lapack_complex_double* work, + lapack_int lwork, lapack_int* iwork, + lapack_int liwork ) +{ + lapack_int info = 0; + if( matrix_layout == LAPACK_COL_MAJOR ) { + /* Call LAPACK function and adjust info */ + LAPACK_zgedmdq( &jobs, &jobz, &jobr, &jobq, &jobt, &jobf, &whtsvd, &m, + &n, f, &ldf, x, &ldx, y, &ldy, &nrnk, &tol, &k, reig, + imeig, z, &ldz, res, b, &ldb, v, &ldv, s, &lds, + work, &lwork, iwork, &liwork, &info ); + if( info < 0 ) { + info = info - 1; + } + } else if( matrix_layout == LAPACK_ROW_MAJOR ) { + lapack_int ldf_t = MAX(1,m); + lapack_int ldx_t = MAX(1,m); + lapack_int ldy_t = MAX(1,m); + lapack_int ldz_t = MAX(1,m); + lapack_int ldb_t = MAX(1,m); + lapack_int ldv_t = MAX(1,m); + lapack_int lds_t = MAX(1,m); + lapack_complex_double* f_t = NULL; + lapack_complex_double* x_t = NULL; + lapack_complex_double* y_t = NULL; + lapack_complex_double* z_t = NULL; + lapack_complex_double* b_t = NULL; + lapack_complex_double* v_t = NULL; + lapack_complex_double* s_t = NULL; + /* Check leading dimension(s) */ + if( ldf < n ) { + info = -12; + LAPACKE_xerbla( "LAPACKE_zgedmdq_work", info ); + return info; + } + if( ldx < n ) { + info = -14; + LAPACKE_xerbla( "LAPACKE_zgedmdq_work", info ); + return info; + } + if( ldy < n ) { + info = -16; + LAPACKE_xerbla( "LAPACKE_zgedmdq_work", info ); + return info; + } + if( ldz < n ) { + info = -23; + LAPACKE_xerbla( "LAPACKE_zgedmdq_work", info ); + return info; + } + if( ldb < n ) { + info = -26; + LAPACKE_xerbla( "LAPACKE_zgedmdq_work", info ); + return info; + } + if( ldv < n ) { + info = -28; + LAPACKE_xerbla( "LAPACKE_zgedmdq_work", info ); + return info; + } + if( lds < n ) { + info = -30; + LAPACKE_xerbla( "LAPACKE_zgedmdq_work", info ); + return info; + } + /* Query optimal working array(s) size if requested */ + if( lwork == -1 || liwork == -1 ) { + LAPACK_zgedmdq( &jobs, &jobz, &jobr, &jobq, &jobt, &jobf, &whtsvd, &m, + &n, f, &ldf, x, &ldx, y, &ldy, &nrnk, &tol, &k, reig, + imeig, z, &ldz, res, b, &ldb, v, &ldv, s, &lds, + work, &lwork, iwork, &liwork, &info ); + return (info < 0) ? (info - 1) : info; + } + /* Allocate memory for temporary array(s) */ + f_t = (lapack_complex_double*)LAPACKE_malloc( sizeof(lapack_complex_double) * ldf_t * MAX(1,n) ); + if( f_t == NULL ) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_0; + } + x_t = (lapack_complex_double*)LAPACKE_malloc( sizeof(lapack_complex_double) * ldx_t * MAX(1,n) ); + if( x_t == NULL ) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_1; + } + y_t = (lapack_complex_double*)LAPACKE_malloc( sizeof(lapack_complex_double) * ldy_t * MAX(1,n) ); + if( y_t == NULL ) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_2; + } + z_t = (lapack_complex_double*)LAPACKE_malloc( sizeof(lapack_complex_double) * ldz_t * MAX(1,n) ); + if( z_t == NULL ) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_3; + } + b_t = (lapack_complex_double*)LAPACKE_malloc( sizeof(lapack_complex_double) * ldb_t * MAX(1,n) ); + if( b_t == NULL ) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_4; + } + v_t = (lapack_complex_double*)LAPACKE_malloc( sizeof(lapack_complex_double) * ldv_t * MAX(1,n) ); + if( v_t == NULL ) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_5; + } + s_t = (lapack_complex_double*)LAPACKE_malloc( sizeof(lapack_complex_double) * lds_t * MAX(1,n) ); + if( s_t == NULL ) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_6; + } + /* Transpose input matrices */ + LAPACKE_zge_trans( matrix_layout, m, n, f, ldf, f_t, ldf_t ); + LAPACKE_zge_trans( matrix_layout, m, n, x, ldx, x_t, ldx_t ); + LAPACKE_zge_trans( matrix_layout, m, n, y, ldy, y_t, ldy_t ); + LAPACKE_zge_trans( matrix_layout, m, n, z, ldz, z_t, ldz_t ); + LAPACKE_zge_trans( matrix_layout, m, n, b, ldb, b_t, ldb_t ); + LAPACKE_zge_trans( matrix_layout, m, n, v, ldv, v_t, ldv_t ); + LAPACKE_zge_trans( matrix_layout, m, n, s, lds, s_t, lds_t ); + /* Call LAPACK function and adjust info */ + LAPACK_zgedmdq( &jobs, &jobz, &jobr, &jobq, &jobt, &jobf, &whtsvd, &m, + &n, f, &ldf, x, &ldx, y, &ldy, &nrnk, &tol, &k, reig, + imeig, z, &ldz, res, b, &ldb, v, &ldv, s, &lds, + work, &lwork, iwork, &liwork, &info ); + if( info < 0 ) { + info = info - 1; + } + /* Transpose output matrices */ + LAPACKE_zge_trans( LAPACK_COL_MAJOR, m, n, f_t, ldf_t, f, ldf ); + LAPACKE_zge_trans( LAPACK_COL_MAJOR, m, n, x_t, ldx_t, x, ldx ); + LAPACKE_zge_trans( LAPACK_COL_MAJOR, m, n, y_t, ldy_t, y, ldy ); + LAPACKE_zge_trans( LAPACK_COL_MAJOR, m, n, z_t, ldz_t, z, ldz ); + LAPACKE_zge_trans( LAPACK_COL_MAJOR, m, n, b_t, ldb_t, b, ldb ); + LAPACKE_zge_trans( LAPACK_COL_MAJOR, m, n, v_t, ldv_t, v, ldv ); + LAPACKE_zge_trans( LAPACK_COL_MAJOR, m, n, s_t, lds_t, s, lds ); + /* Release memory and exit */ + LAPACKE_free( s_t ); +exit_level_6: + LAPACKE_free( v_t ); +exit_level_5: + LAPACKE_free( b_t ); +exit_level_4: + LAPACKE_free( z_t ); +exit_level_3: + LAPACKE_free( y_t ); +exit_level_2: + LAPACKE_free( x_t ); +exit_level_1: + LAPACKE_free( f_t ); +exit_level_0: + if( info == LAPACK_TRANSPOSE_MEMORY_ERROR ) { + LAPACKE_xerbla( "LAPACKE_zgedmdq_work", info ); + } + } else { + info = -1; + LAPACKE_xerbla( "LAPACKE_zgedmdq_work", info ); + } + return info; +} From e28fdf71b78613073701b3fc2f05f60ad46bb6ed Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Tue, 20 Jun 2023 14:50:54 +0200 Subject: [PATCH 03/15] Add LAPACKE interfaces for Dynamic Mode Decomposition (Reference-LAPACK PR 736) --- lapack-netlib/LAPACKE/src/Makefile | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/lapack-netlib/LAPACKE/src/Makefile b/lapack-netlib/LAPACKE/src/Makefile index 9d81e2416..969288f42 100644 --- a/lapack-netlib/LAPACKE/src/Makefile +++ b/lapack-netlib/LAPACKE/src/Makefile @@ -137,6 +137,10 @@ lapacke_cgerqf.o \ lapacke_cgerqf_work.o \ lapacke_cgesdd.o \ lapacke_cgesdd_work.o \ +lapacke_cgedmd.o \ +lapacke_cgedmd_work.o \ +lapacke_cgedmdq.o \ +lapacke_cgedmdq_work.o \ lapacke_cgesv.o \ lapacke_cgesv_work.o \ lapacke_cgesvd.o \ @@ -763,6 +767,10 @@ lapacke_dgerqf.o \ lapacke_dgerqf_work.o \ lapacke_dgesdd.o \ lapacke_dgesdd_work.o \ +lapacke_dgedmd.o \ +lapacke_dgedmd_work.o \ +lapacke_dgedmdq.o \ +lapacke_dgedmdq_work.o \ lapacke_dgesv.o \ lapacke_dgesv_work.o \ lapacke_dgesvd.o \ @@ -1343,6 +1351,10 @@ lapacke_sgerqf.o \ lapacke_sgerqf_work.o \ lapacke_sgesdd.o \ lapacke_sgesdd_work.o \ +lapacke_sgedmd.o \ +lapacke_sgedmd_work.o \ +lapacke_sgedmdq.o \ +lapacke_sgedmdq_work.o \ lapacke_sgesv.o \ lapacke_sgesv_work.o \ lapacke_sgesvd.o \ @@ -1913,6 +1925,10 @@ lapacke_zgerqf.o \ lapacke_zgerqf_work.o \ lapacke_zgesdd.o \ lapacke_zgesdd_work.o \ +lapacke_zgedmd.o \ +lapacke_zgedmd_work.o \ +lapacke_zgedmdq.o \ +lapacke_zgedmdq_work.o \ lapacke_zgesv.o \ lapacke_zgesv_work.o \ lapacke_zgesvd.o \ From 83d6ce12893e182bfb212ea5f6d7f3d2f489cda3 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Tue, 20 Jun 2023 14:55:26 +0200 Subject: [PATCH 04/15] Add interfaces for Dynamic Mode Decomposition (Reference-LAPACK PR 736) --- cmake/lapacke.cmake | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/cmake/lapacke.cmake b/cmake/lapacke.cmake index 6713beefc..f43bf10d0 100644 --- a/cmake/lapacke.cmake +++ b/cmake/lapacke.cmake @@ -90,6 +90,10 @@ set(CSRC lapacke_cgerqf_work.c lapacke_cgesdd.c lapacke_cgesdd_work.c + lapacke_cgedmd.c + lapacke_cgedmd_work.c + lapacke_cgedmdq.c + lapacke_cgedmdq_work.c lapacke_cgesv.c lapacke_cgesv_work.c lapacke_cgesvd.c @@ -713,6 +717,10 @@ set(DSRC lapacke_dgerqf_work.c lapacke_dgesdd.c lapacke_dgesdd_work.c + lapacke_dgedmd.c + lapacke_dgedmd_work.c + lapacke_dgedmdq.c + lapacke_dgedmdq_work.c lapacke_dgesv.c lapacke_dgesv_work.c lapacke_dgesvd.c @@ -1291,6 +1299,10 @@ set(SSRC lapacke_sgerqf_work.c lapacke_sgesdd.c lapacke_sgesdd_work.c + lapacke_sgedmd.c + lapacke_sgedmd_work.c + lapacke_sgedmdq.c + lapacke_sgedmdq_work.c lapacke_sgesv.c lapacke_sgesv_work.c lapacke_sgesvd.c @@ -1863,6 +1875,10 @@ set(ZSRC lapacke_zgerqf_work.c lapacke_zgesdd.c lapacke_zgesdd_work.c + lapacke_zgedmd.c + lapacke_zgedmd_work.c + lapacke_zgedmdq.c + lapacke_zgedmdq_work.c lapacke_zgesv.c lapacke_zgesv_work.c lapacke_zgesvd.c From defafd1353a13e20452f94a3320593ecf6ffe0b4 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Tue, 20 Jun 2023 15:07:53 +0200 Subject: [PATCH 05/15] Add functions for Dynamic Mode Decomposition (Reference-LAPACK PR 736) --- cmake/lapack.cmake | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/cmake/lapack.cmake b/cmake/lapack.cmake index 544e226ab..077390d90 100644 --- a/cmake/lapack.cmake +++ b/cmake/lapack.cmake @@ -124,7 +124,7 @@ set(SLASRC ssbev_2stage.f ssbevx_2stage.f ssbevd_2stage.f ssygv_2stage.f sgesvdq.f slaorhr_col_getrfnp.f slaorhr_col_getrfnp2.f sorgtsqr.f sorgtsqr_row.f sorhr_col.f - slatrs3.f strsyl3.f sgelst.f) + slatrs3.f strsyl3.f sgelst.f sgedmd.f90 sgedmdq.f90) set(SXLASRC sgesvxx.f sgerfsx.f sla_gerfsx_extended.f sla_geamv.f sla_gercond.f sla_gerpvgrw.f ssysvxx.f ssyrfsx.f @@ -223,7 +223,7 @@ set(CLASRC chbev_2stage.f chbevx_2stage.f chbevd_2stage.f chegv_2stage.f cgesvdq.f claunhr_col_getrfnp.f claunhr_col_getrfnp2.f cungtsqr.f cungtsqr_row.f cunhr_col.f - clatrs3.f ctrsyl3.f cgelst.f) + clatrs3.f ctrsyl3.f cgelst.f cgedmd.f90 cgedmdq.f90) set(CXLASRC cgesvxx.f cgerfsx.f cla_gerfsx_extended.f cla_geamv.f cla_gercond_c.f cla_gercond_x.f cla_gerpvgrw.f @@ -316,7 +316,7 @@ set(DLASRC dsbev_2stage.f dsbevx_2stage.f dsbevd_2stage.f dsygv_2stage.f dcombssq.f dgesvdq.f dlaorhr_col_getrfnp.f dlaorhr_col_getrfnp2.f dorgtsqr.f dorgtsqr_row.f dorhr_col.f - dlatrs3.f dtrsyl3.f dgelst.f) + dlatrs3.f dtrsyl3.f dgelst.f dgedmd.f90 dgedmdq.f90) set(DXLASRC dgesvxx.f dgerfsx.f dla_gerfsx_extended.f dla_geamv.f dla_gercond.f dla_gerpvgrw.f dsysvxx.f dsyrfsx.f @@ -419,7 +419,7 @@ set(ZLASRC zhbev_2stage.f zhbevx_2stage.f zhbevd_2stage.f zhegv_2stage.f zgesvdq.f zlaunhr_col_getrfnp.f zlaunhr_col_getrfnp2.f zungtsqr.f zungtsqr_row.f zunhr_col.f - zlatrs3.f ztrsyl3.f zgelst.f) + zlatrs3.f ztrsyl3.f zgelst.f zgedmd.f90 zgedmdq.f90) set(ZXLASRC zgesvxx.f zgerfsx.f zla_gerfsx_extended.f zla_geamv.f zla_gercond_c.f zla_gercond_x.f zla_gerpvgrw.f zsysvxx.f zsyrfsx.f From 9f0ef475b4fe9e287cd253aa9c285155887ee99c Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Tue, 20 Jun 2023 15:12:02 +0200 Subject: [PATCH 06/15] Add Dynamic Mode Decomposition functions (Reference-LAPACK PR 736) --- lapack-netlib/SRC/Makefile | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/lapack-netlib/SRC/Makefile b/lapack-netlib/SRC/Makefile index 8cac42330..74db14e46 100644 --- a/lapack-netlib/SRC/Makefile +++ b/lapack-netlib/SRC/Makefile @@ -207,7 +207,7 @@ SLASRC_O = \ ssytrd_2stage.o ssytrd_sy2sb.o ssytrd_sb2st.o ssb2st_kernels.o \ ssyevd_2stage.o ssyev_2stage.o ssyevx_2stage.o ssyevr_2stage.o \ ssbev_2stage.o ssbevx_2stage.o ssbevd_2stage.o ssygv_2stage.o \ - sgesvdq.o slatrs3.o strsyl3.o sgelst.o + sgesvdq.o slatrs3.o strsyl3.o sgelst.o sgedmd.o sgedmdq.o endif @@ -316,7 +316,7 @@ CLASRC_O = \ chetrd_2stage.o chetrd_he2hb.o chetrd_hb2st.o chb2st_kernels.o \ cheevd_2stage.o cheev_2stage.o cheevx_2stage.o cheevr_2stage.o \ chbev_2stage.o chbevx_2stage.o chbevd_2stage.o chegv_2stage.o \ - cgesvdq.o clatrs3.o ctrsyl3.o cgelst.o + cgesvdq.o clatrs3.o ctrsyl3.o cgelst.o cgedmd.o cgedmdq.o endif ifdef USEXBLAS @@ -417,7 +417,7 @@ DLASRC_O = \ dsytrd_2stage.o dsytrd_sy2sb.o dsytrd_sb2st.o dsb2st_kernels.o \ dsyevd_2stage.o dsyev_2stage.o dsyevx_2stage.o dsyevr_2stage.o \ dsbev_2stage.o dsbevx_2stage.o dsbevd_2stage.o dsygv_2stage.o \ - dgesvdq.o dlatrs3.o dtrsyl3.o dgelst.o + dgesvdq.o dlatrs3.o dtrsyl3.o dgelst.o dgedmd.o dgedmdq.o endif ifdef USEXBLAS @@ -526,7 +526,7 @@ ZLASRC_O = \ zhetrd_2stage.o zhetrd_he2hb.o zhetrd_hb2st.o zhb2st_kernels.o \ zheevd_2stage.o zheev_2stage.o zheevx_2stage.o zheevr_2stage.o \ zhbev_2stage.o zhbevx_2stage.o zhbevd_2stage.o zhegv_2stage.o \ - zgesvdq.o zlatrs3.o ztrsyl3.o zgelst.o + zgesvdq.o zlatrs3.o ztrsyl3.o zgelst.o zgedmd.o zgedmdq.o endif ifdef USEXBLAS From c7a05458cd6d315ac3cc377c3760f59784d9e9e8 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Tue, 20 Jun 2023 15:52:44 +0200 Subject: [PATCH 07/15] Add Dynamic Mode Decomposition functions (Reference-LAPACK PR 736) --- lapack-netlib/LAPACKE/include/lapack.h | 132 +++++++++++++++++++++++++ 1 file changed, 132 insertions(+) diff --git a/lapack-netlib/LAPACKE/include/lapack.h b/lapack-netlib/LAPACKE/include/lapack.h index a0fcaa259..a5d02b42e 100644 --- a/lapack-netlib/LAPACKE/include/lapack.h +++ b/lapack-netlib/LAPACKE/include/lapack.h @@ -3323,6 +3323,138 @@ void LAPACK_zgesdd_base( #define LAPACK_zgesdd(...) LAPACK_zgesdd_base(__VA_ARGS__) #endif +#define LAPACK_cgedmd LAPACK_GLOBAL(cgedmd,CGEDMD) +void LAPACK_cgedmd( + char const* jobs, char const* jobz, char const* jobf, + lapack_int const* whtsvd, lapack_int const* m, lapack_int const* n, + lapack_complex_float* x, lapack_int const* ldx, + lapack_complex_float* y, lapack_int const* ldy, lapack_int const* k, + lapack_complex_float* reig, lapack_complex_float* imeig, + lapack_complex_float* z, lapack_int const* ldz, lapack_complex_float* res, + lapack_complex_float* b, lapack_int const* ldb, + lapack_complex_float* w, lapack_int const* ldw, + lapack_complex_float* s, lapack_int const* lds, + lapack_complex_float* work, lapack_int const* lwork, + lapack_int* iwork, lapack_int const* liwork, + lapack_int* info ); + +#define LAPACK_dgedmd LAPACK_GLOBAL(dgedmd,DGEDMD) +void LAPACK_dgedmd( + char const* jobs, char const* jobz, char const* jobf, + lapack_int const* whtsvd, lapack_int const* m, lapack_int const* n, + double* x, lapack_int const* ldx, + double* y, lapack_int const* ldy, lapack_int const* k, + double* reig, double* imeig, + double* z, lapack_int const* ldz, double* res, + double* b, lapack_int const* ldb, + double* w, lapack_int const* ldw, + double* s, lapack_int const* lds, + double* work, lapack_int const* lwork, + lapack_int* iwork, lapack_int const* liwork, + lapack_int* info ); + +#define LAPACK_sgedmd LAPACK_GLOBAL(sgedmd,SGEDMD) +void LAPACK_sgedmd( + char const* jobs, char const* jobz, char const* jobf, + lapack_int const* whtsvd, lapack_int const* m, lapack_int const* n, + float* x, lapack_int const* ldx, + float* y, lapack_int const* ldy, lapack_int const* k, + float* reig, float* imeig, + float* z, lapack_int const* ldz, float* res, + float* b, lapack_int const* ldb, + float* w, lapack_int const* ldw, + float* s, lapack_int const* lds, + float* work, lapack_int const* lwork, + lapack_int* iwork, lapack_int const* liwork, + lapack_int* info ); + +#define LAPACK_zgedmd LAPACK_GLOBAL(zgedmd,ZGEDMD) +void LAPACK_zgedmd( + char const* jobs, char const* jobz, char const* jobf, + lapack_int const* whtsvd, lapack_int const* m, lapack_int const* n, + lapack_complex_double* x, lapack_int const* ldx, + lapack_complex_double* y, lapack_int const* ldy, lapack_int const* k, + lapack_complex_double* reig, lapack_complex_double* imeig, + lapack_complex_double* z, lapack_int const* ldz, lapack_complex_double* res, + lapack_complex_double* b, lapack_int const* ldb, + lapack_complex_double* w, lapack_int const* ldw, + lapack_complex_double* s, lapack_int const* lds, + lapack_complex_double* work, lapack_int const* lwork, + lapack_int* iwork, lapack_int const* liwork, + lapack_int* info ); + +#define LAPACK_cgedmdq LAPACK_GLOBAL(cgedmdq,CGEDMDQ) +void LAPACK_cgedmdq( + char const* jobs, char const* jobz, char const* jobr, char const* jobq, + char const* jobt, char const* jobf, lapack_int const* whtsvd, + lapack_int const* m, lapack_int const* n, + lapack_complex_float* f, lapack_int const* ldf, + lapack_complex_float* x, lapack_int const* ldx, + lapack_complex_float* y, lapack_int const* ldy, lapack_int const* nrnk, + float const* tol, lapack_int const* k, + lapack_complex_float* reig, lapack_complex_float* imeig, + lapack_complex_float* z, lapack_int const* ldz, lapack_complex_float* res, + lapack_complex_float* b, lapack_int const* ldb, + lapack_complex_float* v, lapack_int const* ldv, + lapack_complex_float* s, lapack_int const* lds, + lapack_complex_float* work, lapack_int const* lwork, + lapack_int* iwork, lapack_int const* liwork, + lapack_int* info ); + +#define LAPACK_dgedmdq LAPACK_GLOBAL(dgedmdq,DGEDMDQ) +void LAPACK_dgedmdq( + char const* jobs, char const* jobz, char const* jobr, char const* jobq, + char const* jobt, char const* jobf, lapack_int const* whtsvd, + lapack_int const* m, lapack_int const* n, + double* f, lapack_int const* ldf, + double* x, lapack_int const* ldx, + double* y, lapack_int const* ldy, lapack_int const* nrnk, + double const* tol, lapack_int const* k, + double* reig, double* imeig, + double* z, lapack_int const* ldz, double* res, + double* b, lapack_int const* ldb, + double* v, lapack_int const* ldv, + double* s, lapack_int const* lds, + double* work, lapack_int const* lwork, + lapack_int* iwork, lapack_int const* liwork, + lapack_int* info ); + +#define LAPACK_sgedmdq LAPACK_GLOBAL(sgedmdq,SGEDMDQ) +void LAPACK_sgedmdq( + char const* jobs, char const* jobz, char const* jobr, char const* jobq, + char const* jobt, char const* jobf, lapack_int const* whtsvd, + lapack_int const* m, lapack_int const* n, + float* f, lapack_int const* ldf, + float* x, lapack_int const* ldx, + float* y, lapack_int const* ldy, lapack_int const* nrnk, + float const* tol, lapack_int const* k, + float* reig, float* imeig, + float* z, lapack_int const* ldz, float* res, + float* b, lapack_int const* ldb, + float* v, lapack_int const* ldv, + float* s, lapack_int const* lds, + float* work, lapack_int const* lwork, + lapack_int* iwork, lapack_int const* liwork, + lapack_int* info ); + +#define LAPACK_zgedmdq LAPACK_GLOBAL(zgedmdq,ZGEDMDQ) +void LAPACK_zgedmdq( + char const* jobs, char const* jobz, char const* jobr, char const* jobq, + char const* jobt, char const* jobf, lapack_int const* whtsvd, + lapack_int const* m, lapack_int const* n, + lapack_complex_double* f, lapack_int const* ldf, + lapack_complex_double* x, lapack_int const* ldx, + lapack_complex_double* y, lapack_int const* ldy, lapack_int const* nrnk, + double const* tol, lapack_int const* k, + lapack_complex_double* reig, lapack_complex_double* imeig, + lapack_complex_double* z, lapack_int const* ldz, lapack_complex_double* res, + lapack_complex_double* b, lapack_int const* ldb, + lapack_complex_double* v, lapack_int const* ldv, + lapack_complex_double* s, lapack_int const* lds, + lapack_complex_double* work, lapack_int const* lwork, + lapack_int* iwork, lapack_int const* liwork, + lapack_int* info ) + #define LAPACK_cgesv LAPACK_GLOBAL(cgesv,CGESV) lapack_int LAPACK_cgesv( lapack_int const* n, lapack_int const* nrhs, From de88063aa2795fd25c00efeb92791b9c1d38c5be Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Tue, 20 Jun 2023 15:56:10 +0200 Subject: [PATCH 08/15] Add interfaces for Dynamic Mode Decomposition functions (Reference-LAPACK PR 736) --- lapack-netlib/LAPACKE/include/lapacke.h | 131 ++++++++++++++++++++++-- 1 file changed, 122 insertions(+), 9 deletions(-) diff --git a/lapack-netlib/LAPACKE/include/lapacke.h b/lapack-netlib/LAPACKE/include/lapacke.h index 9bd228064..9a9ab4753 100644 --- a/lapack-netlib/LAPACKE/include/lapacke.h +++ b/lapack-netlib/LAPACKE/include/lapacke.h @@ -956,7 +956,7 @@ lapack_int LAPACKE_zgesvdq( int matrix_layout, char joba, char jobp, char jobr, lapack_int lda, double* s, lapack_complex_double* u, lapack_int ldu, lapack_complex_double* v, lapack_int ldv, lapack_int* numrank ); - + lapack_int LAPACKE_sgesvj( int matrix_layout, char joba, char jobu, char jobv, lapack_int m, lapack_int n, float* a, lapack_int lda, float* sva, lapack_int mv, float* v, lapack_int ldv, @@ -5712,6 +5712,120 @@ lapack_int LAPACKE_zgesdd_work( int matrix_layout, char jobz, lapack_int m, lapack_complex_double* work, lapack_int lwork, double* rwork, lapack_int* iwork ); +lapack_int LAPACKE_sgedmd_work( int matrix_layout, char jobs, char jobz, + char jobf, lapack_int whtsvd, lapack_int m, + lapack_int n, float* x, lapack_int ldx, + float* y, lapack_int ldy, lapack_int k, + float* reig, float* imeig, float* z, + lapack_int ldz, float* res, float* b, + lapack_int ldb, float* w, lapack_int ldw, + float* s, lapack_int lds, float* work, + lapack_int lwork, lapack_int* iwork, + lapack_int liwork ); + +lapack_int LAPACKE_dgedmd_work( int matrix_layout, char jobs, char jobz, + char jobf, lapack_int whtsvd, lapack_int m, + lapack_int n, double* x, lapack_int ldx, + double* y, lapack_int ldy, lapack_int k, + double* reig, double* imeig, double* z, + lapack_int ldz, double* res, double* b, + lapack_int ldb, double* w, lapack_int ldw, + double* s, lapack_int lds, double* work, + lapack_int lwork, lapack_int* iwork, + lapack_int liwork ); + +lapack_int LAPACKE_cgedmd_work( int matrix_layout, char jobs, char jobz, + char jobf, lapack_int whtsvd, lapack_int m, + lapack_int n, lapack_complex_float* x, + lapack_int ldx, lapack_complex_float* y, + lapack_int ldy, lapack_int k, + lapack_complex_float* reig, + lapack_complex_float* imeig, + lapack_complex_float* z, lapack_int ldz, + lapack_complex_float* res, + lapack_complex_float* b, lapack_int ldb, + lapack_complex_float* w, lapack_int ldw, + lapack_complex_float* s, lapack_int lds, + lapack_complex_float* work, lapack_int lwork, + lapack_int* iwork, lapack_int liwork ); + +lapack_int LAPACKE_zgedmd_work( int matrix_layout, char jobs, char jobz, + char jobf, lapack_int whtsvd, lapack_int m, + lapack_int n, lapack_complex_double* x, + lapack_int ldx, lapack_complex_double* y, + lapack_int ldy, lapack_int k, + lapack_complex_double* reig, + lapack_complex_double* imeig, + lapack_complex_double* z, lapack_int ldz, + lapack_complex_double* res, + lapack_complex_double* b, lapack_int ldb, + lapack_complex_double* w, lapack_int ldw, + lapack_complex_double* s, lapack_int lds, + lapack_complex_double* work, lapack_int lwork, + lapack_int* iwork, lapack_int liwork ); + +lapack_int LAPACKE_sgedmdq_work( int matrix_layout, char jobs, char jobz, + char jobr, char jobq, char jobt, char jobf, + lapack_int whtsvd, lapack_int m, lapack_int n, + float* f, lapack_int ldf, float* x, + lapack_int ldx, float* y, lapack_int ldy, + lapack_int nrnk, float tol, lapack_int k, + float* reig, float* imeig, float* z, + lapack_int ldz, float* res, float* b, + lapack_int ldb, float* v, lapack_int ldv, + float* s, lapack_int lds, float* work, + lapack_int lwork, lapack_int* iwork, + lapack_int liwork ); + +lapack_int LAPACKE_dgedmdq_work( int matrix_layout, char jobs, char jobz, + char jobr, char jobq, char jobt, char jobf, + lapack_int whtsvd, lapack_int m, lapack_int n, + double* f, lapack_int ldf, double* x, + lapack_int ldx, double* y, lapack_int ldy, + lapack_int nrnk, double tol, lapack_int k, + double* reig, double* imeig, double* z, + lapack_int ldz, double* res, double* b, + lapack_int ldb, double* v, lapack_int ldv, + double* s, lapack_int lds, double* work, + lapack_int lwork, lapack_int* iwork, + lapack_int liwork ); + +lapack_int LAPACKE_cgedmdq_work( int matrix_layout, char jobs, char jobz, + char jobr, char jobq, char jobt, char jobf, + lapack_int whtsvd, lapack_int m, lapack_int n, + lapack_complex_float* f, lapack_int ldf, + lapack_complex_float* x, lapack_int ldx, + lapack_complex_float* y, lapack_int ldy, + lapack_int nrnk, float tol, lapack_int k, + lapack_complex_float* reig, + lapack_complex_float* imeig, + lapack_complex_float* z, lapack_int ldz, + lapack_complex_float* res, + lapack_complex_float* b, lapack_int ldb, + lapack_complex_float* v, lapack_int ldv, + lapack_complex_float* s, lapack_int lds, + lapack_complex_float* work, lapack_int lwork, + lapack_int* iwork, + lapack_int liwork ); + +lapack_int LAPACKE_zgedmdq_work( int matrix_layout, char jobs, char jobz, + char jobr, char jobq, char jobt, char jobf, + lapack_int whtsvd, lapack_int m, lapack_int n, + lapack_complex_double* f, lapack_int ldf, + lapack_complex_double* x, lapack_int ldx, + lapack_complex_double* y, lapack_int ldy, + lapack_int nrnk, double tol, lapack_int k, + lapack_complex_double* reig, + lapack_complex_double* imeig, + lapack_complex_double* z, lapack_int ldz, + lapack_complex_double* res, + lapack_complex_double* b, lapack_int ldb, + lapack_complex_double* v, lapack_int ldv, + lapack_complex_double* s, lapack_int lds, + lapack_complex_double* work, lapack_int lwork, + lapack_int* iwork, + lapack_int liwork ); + lapack_int LAPACKE_sgesv_work( int matrix_layout, lapack_int n, lapack_int nrhs, float* a, lapack_int lda, lapack_int* ipiv, float* b, lapack_int ldb ); @@ -5833,7 +5947,7 @@ lapack_int LAPACKE_zgesvdq_work( int matrix_layout, char joba, char jobp, lapack_int* iwork, lapack_int liwork, lapack_complex_double* cwork, lapack_int lcwork, double* rwork, lapack_int lrwork); - + lapack_int LAPACKE_sgesvj_work( int matrix_layout, char joba, char jobu, char jobv, lapack_int m, lapack_int n, float* a, lapack_int lda, float* sva, lapack_int mv, @@ -12550,7 +12664,7 @@ lapack_int LAPACKE_zhegv_2stage_work( int matrix_layout, lapack_int itype, char //LAPACK 3.8.0 lapack_int LAPACKE_ssysv_aa_2stage( int matrix_layout, char uplo, lapack_int n, lapack_int nrhs, float* a, lapack_int lda, - float* tb, lapack_int ltb, lapack_int* ipiv, + float* tb, lapack_int ltb, lapack_int* ipiv, lapack_int* ipiv2, float* b, lapack_int ldb ); lapack_int LAPACKE_ssysv_aa_2stage_work( int matrix_layout, char uplo, lapack_int n, lapack_int nrhs, float* a, lapack_int lda, @@ -12560,7 +12674,7 @@ lapack_int LAPACKE_ssysv_aa_2stage_work( int matrix_layout, char uplo, lapack_in lapack_int LAPACKE_dsysv_aa_2stage( int matrix_layout, char uplo, lapack_int n, lapack_int nrhs, double* a, lapack_int lda, double* tb, lapack_int ltb, - lapack_int* ipiv, lapack_int* ipiv2, + lapack_int* ipiv, lapack_int* ipiv2, double* b, lapack_int ldb ); lapack_int LAPACKE_dsysv_aa_2stage_work( int matrix_layout, char uplo, lapack_int n, lapack_int nrhs, double* a, lapack_int lda, @@ -12612,10 +12726,10 @@ lapack_int LAPACKE_zhesv_aa_2stage_work( int matrix_layout, char uplo, lapack_in lapack_int ltb, lapack_int* ipiv, lapack_int* ipiv2, lapack_complex_double* b, lapack_int ldb, lapack_complex_double* work, lapack_int lwork ); - + lapack_int LAPACKE_ssytrf_aa_2stage( int matrix_layout, char uplo, lapack_int n, float* a, lapack_int lda, - float* tb, lapack_int ltb, lapack_int* ipiv, + float* tb, lapack_int ltb, lapack_int* ipiv, lapack_int* ipiv2 ); lapack_int LAPACKE_ssytrf_aa_2stage_work( int matrix_layout, char uplo, lapack_int n, float* a, lapack_int lda, @@ -12671,7 +12785,7 @@ lapack_int LAPACKE_zhetrf_aa_2stage_work( int matrix_layout, char uplo, lapack_i lapack_int LAPACKE_ssytrs_aa_2stage( int matrix_layout, char uplo, lapack_int n, lapack_int nrhs, float* a, lapack_int lda, - float* tb, lapack_int ltb, lapack_int* ipiv, + float* tb, lapack_int ltb, lapack_int* ipiv, lapack_int* ipiv2, float* b, lapack_int ldb ); lapack_int LAPACKE_ssytrs_aa_2stage_work( int matrix_layout, char uplo, lapack_int n, lapack_int nrhs, float* a, lapack_int lda, @@ -12680,7 +12794,7 @@ lapack_int LAPACKE_ssytrs_aa_2stage_work( int matrix_layout, char uplo, lapack_i lapack_int LAPACKE_dsytrs_aa_2stage( int matrix_layout, char uplo, lapack_int n, lapack_int nrhs, double* a, lapack_int lda, double* tb, lapack_int ltb, - lapack_int* ipiv, lapack_int* ipiv2, + lapack_int* ipiv, lapack_int* ipiv2, double* b, lapack_int ldb ); lapack_int LAPACKE_dsytrs_aa_2stage_work( int matrix_layout, char uplo, lapack_int n, lapack_int nrhs, double* a, lapack_int lda, @@ -12727,7 +12841,6 @@ lapack_int LAPACKE_zhetrs_aa_2stage_work( int matrix_layout, char uplo, lapack_i lapack_int lda, lapack_complex_double* tb, lapack_int ltb, lapack_int* ipiv, lapack_int* ipiv2, lapack_complex_double* b, lapack_int ldb ); - //LAPACK 3.10.0 lapack_int LAPACKE_sorhr_col( int matrix_layout, lapack_int m, lapack_int n, lapack_int nb, float* a, From 5eef0793bac9637b80f9ce02363d80abcda24840 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Tue, 20 Jun 2023 18:56:33 +0200 Subject: [PATCH 09/15] Fix missing semicolon --- lapack-netlib/LAPACKE/include/lapack.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lapack-netlib/LAPACKE/include/lapack.h b/lapack-netlib/LAPACKE/include/lapack.h index a5d02b42e..f510c8c80 100644 --- a/lapack-netlib/LAPACKE/include/lapack.h +++ b/lapack-netlib/LAPACKE/include/lapack.h @@ -3453,7 +3453,7 @@ void LAPACK_zgedmdq( lapack_complex_double* s, lapack_int const* lds, lapack_complex_double* work, lapack_int const* lwork, lapack_int* iwork, lapack_int const* liwork, - lapack_int* info ) + lapack_int* info ); #define LAPACK_cgesv LAPACK_GLOBAL(cgesv,CGESV) lapack_int LAPACK_cgesv( From 649ab6481adec7e4bb2a411a8d2956458523733f Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Tue, 20 Jun 2023 20:03:44 +0200 Subject: [PATCH 10/15] Add dummy C files for ?GEMDQ? --- lapack-netlib/SRC/cgedmd.c | 511 ++++++++++++++++++++++++++++++++++++ lapack-netlib/SRC/cgedmdq.c | 511 ++++++++++++++++++++++++++++++++++++ lapack-netlib/SRC/dgedmd.c | 511 ++++++++++++++++++++++++++++++++++++ lapack-netlib/SRC/dgedmdq.c | 511 ++++++++++++++++++++++++++++++++++++ lapack-netlib/SRC/sgedmd.c | 511 ++++++++++++++++++++++++++++++++++++ lapack-netlib/SRC/sgedmdq.c | 511 ++++++++++++++++++++++++++++++++++++ lapack-netlib/SRC/zgedmd.c | 511 ++++++++++++++++++++++++++++++++++++ lapack-netlib/SRC/zgedmdq.c | 511 ++++++++++++++++++++++++++++++++++++ 8 files changed, 4088 insertions(+) create mode 100644 lapack-netlib/SRC/cgedmd.c create mode 100644 lapack-netlib/SRC/cgedmdq.c create mode 100644 lapack-netlib/SRC/dgedmd.c create mode 100644 lapack-netlib/SRC/dgedmdq.c create mode 100644 lapack-netlib/SRC/sgedmd.c create mode 100644 lapack-netlib/SRC/sgedmdq.c create mode 100644 lapack-netlib/SRC/zgedmd.c create mode 100644 lapack-netlib/SRC/zgedmdq.c diff --git a/lapack-netlib/SRC/cgedmd.c b/lapack-netlib/SRC/cgedmd.c new file mode 100644 index 000000000..447b23014 --- /dev/null +++ b/lapack-netlib/SRC/cgedmd.c @@ -0,0 +1,511 @@ +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +#if defined(_WIN64) +typedef long long BLASLONG; +typedef unsigned long long BLASULONG; +#else +typedef long BLASLONG; +typedef unsigned long BLASULONG; +#endif + +#ifdef LAPACK_ILP64 +typedef BLASLONG blasint; +#if defined(_WIN64) +#define blasabs(x) llabs(x) +#else +#define blasabs(x) labs(x) +#endif +#else +typedef int blasint; +#define blasabs(x) abs(x) +#endif + +typedef blasint integer; + +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +#ifdef _MSC_VER +static inline _Fcomplex Cf(complex *z) {_Fcomplex zz={z->r , z->i}; return zz;} +static inline _Dcomplex Cd(doublecomplex *z) {_Dcomplex zz={z->r , z->i};return zz;} +static inline _Fcomplex * _pCf(complex *z) {return (_Fcomplex*)z;} +static inline _Dcomplex * _pCd(doublecomplex *z) {return (_Dcomplex*)z;} +#else +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#endif +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#ifdef _MSC_VER +#define c_div(c, a, b) {Cf(c)._Val[0] = (Cf(a)._Val[0]/Cf(b)._Val[0]); Cf(c)._Val[1]=(Cf(a)._Val[1]/Cf(b)._Val[1]);} +#define z_div(c, a, b) {Cd(c)._Val[0] = (Cd(a)._Val[0]/Cd(b)._Val[0]); Cd(c)._Val[1]=(Cd(a)._Val[1]/Cd(b)._Val[1]);} +#else +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#endif +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conjf(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimagf(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle_() continue; +#define myceiling_(w) {ceil(w)} +#define myhuge_(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc_(w,s,e,n) dmaxloc_(w,*(s),*(e),n) + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +#ifdef _MSC_VER +static _Fcomplex cpow_ui(complex x, integer n) { + complex pow={1.0,0.0}; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i; + for(u = n; ; ) { + if(u & 01) pow.r *= x.r, pow.i *= x.i; + if(u >>= 1) x.r *= x.r, x.i *= x.i; + else break; + } + } + _Fcomplex p={pow.r, pow.i}; + return p; +} +#else +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +#endif +#ifdef _MSC_VER +static _Dcomplex zpow_ui(_Dcomplex x, integer n) { + _Dcomplex pow={1.0,0.0}; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1]; + for(u = n; ; ) { + if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1]; + if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1]; + else break; + } + } + _Dcomplex p = {pow._Val[0], pow._Val[1]}; + return p; +} +#else +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +#endif +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; +#ifdef _MSC_VER + _Fcomplex zdotc = {0.0, 0.0}; + if (incx == 1 && incy == 1) { + for (i=0;i +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +#if defined(_WIN64) +typedef long long BLASLONG; +typedef unsigned long long BLASULONG; +#else +typedef long BLASLONG; +typedef unsigned long BLASULONG; +#endif + +#ifdef LAPACK_ILP64 +typedef BLASLONG blasint; +#if defined(_WIN64) +#define blasabs(x) llabs(x) +#else +#define blasabs(x) labs(x) +#endif +#else +typedef int blasint; +#define blasabs(x) abs(x) +#endif + +typedef blasint integer; + +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +#ifdef _MSC_VER +static inline _Fcomplex Cf(complex *z) {_Fcomplex zz={z->r , z->i}; return zz;} +static inline _Dcomplex Cd(doublecomplex *z) {_Dcomplex zz={z->r , z->i};return zz;} +static inline _Fcomplex * _pCf(complex *z) {return (_Fcomplex*)z;} +static inline _Dcomplex * _pCd(doublecomplex *z) {return (_Dcomplex*)z;} +#else +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#endif +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#ifdef _MSC_VER +#define c_div(c, a, b) {Cf(c)._Val[0] = (Cf(a)._Val[0]/Cf(b)._Val[0]); Cf(c)._Val[1]=(Cf(a)._Val[1]/Cf(b)._Val[1]);} +#define z_div(c, a, b) {Cd(c)._Val[0] = (Cd(a)._Val[0]/Cd(b)._Val[0]); Cd(c)._Val[1]=(Cd(a)._Val[1]/Cd(b)._Val[1]);} +#else +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#endif +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conjf(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimagf(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle_() continue; +#define myceiling_(w) {ceil(w)} +#define myhuge_(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc_(w,s,e,n) dmaxloc_(w,*(s),*(e),n) + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +#ifdef _MSC_VER +static _Fcomplex cpow_ui(complex x, integer n) { + complex pow={1.0,0.0}; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i; + for(u = n; ; ) { + if(u & 01) pow.r *= x.r, pow.i *= x.i; + if(u >>= 1) x.r *= x.r, x.i *= x.i; + else break; + } + } + _Fcomplex p={pow.r, pow.i}; + return p; +} +#else +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +#endif +#ifdef _MSC_VER +static _Dcomplex zpow_ui(_Dcomplex x, integer n) { + _Dcomplex pow={1.0,0.0}; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1]; + for(u = n; ; ) { + if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1]; + if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1]; + else break; + } + } + _Dcomplex p = {pow._Val[0], pow._Val[1]}; + return p; +} +#else +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +#endif +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; +#ifdef _MSC_VER + _Fcomplex zdotc = {0.0, 0.0}; + if (incx == 1 && incy == 1) { + for (i=0;i +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +#if defined(_WIN64) +typedef long long BLASLONG; +typedef unsigned long long BLASULONG; +#else +typedef long BLASLONG; +typedef unsigned long BLASULONG; +#endif + +#ifdef LAPACK_ILP64 +typedef BLASLONG blasint; +#if defined(_WIN64) +#define blasabs(x) llabs(x) +#else +#define blasabs(x) labs(x) +#endif +#else +typedef int blasint; +#define blasabs(x) abs(x) +#endif + +typedef blasint integer; + +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +#ifdef _MSC_VER +static inline _Fcomplex Cf(complex *z) {_Fcomplex zz={z->r , z->i}; return zz;} +static inline _Dcomplex Cd(doublecomplex *z) {_Dcomplex zz={z->r , z->i};return zz;} +static inline _Fcomplex * _pCf(complex *z) {return (_Fcomplex*)z;} +static inline _Dcomplex * _pCd(doublecomplex *z) {return (_Dcomplex*)z;} +#else +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#endif +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#ifdef _MSC_VER +#define c_div(c, a, b) {Cf(c)._Val[0] = (Cf(a)._Val[0]/Cf(b)._Val[0]); Cf(c)._Val[1]=(Cf(a)._Val[1]/Cf(b)._Val[1]);} +#define z_div(c, a, b) {Cd(c)._Val[0] = (Cd(a)._Val[0]/Cd(b)._Val[0]); Cd(c)._Val[1]=(Cd(a)._Val[1]/Cd(b)._Val[1]);} +#else +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#endif +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conjf(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimagf(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle_() continue; +#define myceiling_(w) {ceil(w)} +#define myhuge_(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc_(w,s,e,n) dmaxloc_(w,*(s),*(e),n) + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +#ifdef _MSC_VER +static _Fcomplex cpow_ui(complex x, integer n) { + complex pow={1.0,0.0}; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i; + for(u = n; ; ) { + if(u & 01) pow.r *= x.r, pow.i *= x.i; + if(u >>= 1) x.r *= x.r, x.i *= x.i; + else break; + } + } + _Fcomplex p={pow.r, pow.i}; + return p; +} +#else +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +#endif +#ifdef _MSC_VER +static _Dcomplex zpow_ui(_Dcomplex x, integer n) { + _Dcomplex pow={1.0,0.0}; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1]; + for(u = n; ; ) { + if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1]; + if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1]; + else break; + } + } + _Dcomplex p = {pow._Val[0], pow._Val[1]}; + return p; +} +#else +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +#endif +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; +#ifdef _MSC_VER + _Fcomplex zdotc = {0.0, 0.0}; + if (incx == 1 && incy == 1) { + for (i=0;i +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +#if defined(_WIN64) +typedef long long BLASLONG; +typedef unsigned long long BLASULONG; +#else +typedef long BLASLONG; +typedef unsigned long BLASULONG; +#endif + +#ifdef LAPACK_ILP64 +typedef BLASLONG blasint; +#if defined(_WIN64) +#define blasabs(x) llabs(x) +#else +#define blasabs(x) labs(x) +#endif +#else +typedef int blasint; +#define blasabs(x) abs(x) +#endif + +typedef blasint integer; + +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +#ifdef _MSC_VER +static inline _Fcomplex Cf(complex *z) {_Fcomplex zz={z->r , z->i}; return zz;} +static inline _Dcomplex Cd(doublecomplex *z) {_Dcomplex zz={z->r , z->i};return zz;} +static inline _Fcomplex * _pCf(complex *z) {return (_Fcomplex*)z;} +static inline _Dcomplex * _pCd(doublecomplex *z) {return (_Dcomplex*)z;} +#else +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#endif +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#ifdef _MSC_VER +#define c_div(c, a, b) {Cf(c)._Val[0] = (Cf(a)._Val[0]/Cf(b)._Val[0]); Cf(c)._Val[1]=(Cf(a)._Val[1]/Cf(b)._Val[1]);} +#define z_div(c, a, b) {Cd(c)._Val[0] = (Cd(a)._Val[0]/Cd(b)._Val[0]); Cd(c)._Val[1]=(Cd(a)._Val[1]/Cd(b)._Val[1]);} +#else +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#endif +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conjf(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimagf(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle_() continue; +#define myceiling_(w) {ceil(w)} +#define myhuge_(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc_(w,s,e,n) dmaxloc_(w,*(s),*(e),n) + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +#ifdef _MSC_VER +static _Fcomplex cpow_ui(complex x, integer n) { + complex pow={1.0,0.0}; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i; + for(u = n; ; ) { + if(u & 01) pow.r *= x.r, pow.i *= x.i; + if(u >>= 1) x.r *= x.r, x.i *= x.i; + else break; + } + } + _Fcomplex p={pow.r, pow.i}; + return p; +} +#else +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +#endif +#ifdef _MSC_VER +static _Dcomplex zpow_ui(_Dcomplex x, integer n) { + _Dcomplex pow={1.0,0.0}; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1]; + for(u = n; ; ) { + if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1]; + if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1]; + else break; + } + } + _Dcomplex p = {pow._Val[0], pow._Val[1]}; + return p; +} +#else +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +#endif +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; +#ifdef _MSC_VER + _Fcomplex zdotc = {0.0, 0.0}; + if (incx == 1 && incy == 1) { + for (i=0;i +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +#if defined(_WIN64) +typedef long long BLASLONG; +typedef unsigned long long BLASULONG; +#else +typedef long BLASLONG; +typedef unsigned long BLASULONG; +#endif + +#ifdef LAPACK_ILP64 +typedef BLASLONG blasint; +#if defined(_WIN64) +#define blasabs(x) llabs(x) +#else +#define blasabs(x) labs(x) +#endif +#else +typedef int blasint; +#define blasabs(x) abs(x) +#endif + +typedef blasint integer; + +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +#ifdef _MSC_VER +static inline _Fcomplex Cf(complex *z) {_Fcomplex zz={z->r , z->i}; return zz;} +static inline _Dcomplex Cd(doublecomplex *z) {_Dcomplex zz={z->r , z->i};return zz;} +static inline _Fcomplex * _pCf(complex *z) {return (_Fcomplex*)z;} +static inline _Dcomplex * _pCd(doublecomplex *z) {return (_Dcomplex*)z;} +#else +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#endif +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#ifdef _MSC_VER +#define c_div(c, a, b) {Cf(c)._Val[0] = (Cf(a)._Val[0]/Cf(b)._Val[0]); Cf(c)._Val[1]=(Cf(a)._Val[1]/Cf(b)._Val[1]);} +#define z_div(c, a, b) {Cd(c)._Val[0] = (Cd(a)._Val[0]/Cd(b)._Val[0]); Cd(c)._Val[1]=(Cd(a)._Val[1]/Cd(b)._Val[1]);} +#else +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#endif +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conjf(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimagf(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle_() continue; +#define myceiling_(w) {ceil(w)} +#define myhuge_(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc_(w,s,e,n) dmaxloc_(w,*(s),*(e),n) + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +#ifdef _MSC_VER +static _Fcomplex cpow_ui(complex x, integer n) { + complex pow={1.0,0.0}; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i; + for(u = n; ; ) { + if(u & 01) pow.r *= x.r, pow.i *= x.i; + if(u >>= 1) x.r *= x.r, x.i *= x.i; + else break; + } + } + _Fcomplex p={pow.r, pow.i}; + return p; +} +#else +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +#endif +#ifdef _MSC_VER +static _Dcomplex zpow_ui(_Dcomplex x, integer n) { + _Dcomplex pow={1.0,0.0}; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1]; + for(u = n; ; ) { + if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1]; + if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1]; + else break; + } + } + _Dcomplex p = {pow._Val[0], pow._Val[1]}; + return p; +} +#else +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +#endif +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; +#ifdef _MSC_VER + _Fcomplex zdotc = {0.0, 0.0}; + if (incx == 1 && incy == 1) { + for (i=0;i +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +#if defined(_WIN64) +typedef long long BLASLONG; +typedef unsigned long long BLASULONG; +#else +typedef long BLASLONG; +typedef unsigned long BLASULONG; +#endif + +#ifdef LAPACK_ILP64 +typedef BLASLONG blasint; +#if defined(_WIN64) +#define blasabs(x) llabs(x) +#else +#define blasabs(x) labs(x) +#endif +#else +typedef int blasint; +#define blasabs(x) abs(x) +#endif + +typedef blasint integer; + +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +#ifdef _MSC_VER +static inline _Fcomplex Cf(complex *z) {_Fcomplex zz={z->r , z->i}; return zz;} +static inline _Dcomplex Cd(doublecomplex *z) {_Dcomplex zz={z->r , z->i};return zz;} +static inline _Fcomplex * _pCf(complex *z) {return (_Fcomplex*)z;} +static inline _Dcomplex * _pCd(doublecomplex *z) {return (_Dcomplex*)z;} +#else +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#endif +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#ifdef _MSC_VER +#define c_div(c, a, b) {Cf(c)._Val[0] = (Cf(a)._Val[0]/Cf(b)._Val[0]); Cf(c)._Val[1]=(Cf(a)._Val[1]/Cf(b)._Val[1]);} +#define z_div(c, a, b) {Cd(c)._Val[0] = (Cd(a)._Val[0]/Cd(b)._Val[0]); Cd(c)._Val[1]=(Cd(a)._Val[1]/Cd(b)._Val[1]);} +#else +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#endif +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conjf(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimagf(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle_() continue; +#define myceiling_(w) {ceil(w)} +#define myhuge_(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc_(w,s,e,n) dmaxloc_(w,*(s),*(e),n) + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +#ifdef _MSC_VER +static _Fcomplex cpow_ui(complex x, integer n) { + complex pow={1.0,0.0}; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i; + for(u = n; ; ) { + if(u & 01) pow.r *= x.r, pow.i *= x.i; + if(u >>= 1) x.r *= x.r, x.i *= x.i; + else break; + } + } + _Fcomplex p={pow.r, pow.i}; + return p; +} +#else +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +#endif +#ifdef _MSC_VER +static _Dcomplex zpow_ui(_Dcomplex x, integer n) { + _Dcomplex pow={1.0,0.0}; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1]; + for(u = n; ; ) { + if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1]; + if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1]; + else break; + } + } + _Dcomplex p = {pow._Val[0], pow._Val[1]}; + return p; +} +#else +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +#endif +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; +#ifdef _MSC_VER + _Fcomplex zdotc = {0.0, 0.0}; + if (incx == 1 && incy == 1) { + for (i=0;i +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +#if defined(_WIN64) +typedef long long BLASLONG; +typedef unsigned long long BLASULONG; +#else +typedef long BLASLONG; +typedef unsigned long BLASULONG; +#endif + +#ifdef LAPACK_ILP64 +typedef BLASLONG blasint; +#if defined(_WIN64) +#define blasabs(x) llabs(x) +#else +#define blasabs(x) labs(x) +#endif +#else +typedef int blasint; +#define blasabs(x) abs(x) +#endif + +typedef blasint integer; + +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +#ifdef _MSC_VER +static inline _Fcomplex Cf(complex *z) {_Fcomplex zz={z->r , z->i}; return zz;} +static inline _Dcomplex Cd(doublecomplex *z) {_Dcomplex zz={z->r , z->i};return zz;} +static inline _Fcomplex * _pCf(complex *z) {return (_Fcomplex*)z;} +static inline _Dcomplex * _pCd(doublecomplex *z) {return (_Dcomplex*)z;} +#else +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#endif +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#ifdef _MSC_VER +#define c_div(c, a, b) {Cf(c)._Val[0] = (Cf(a)._Val[0]/Cf(b)._Val[0]); Cf(c)._Val[1]=(Cf(a)._Val[1]/Cf(b)._Val[1]);} +#define z_div(c, a, b) {Cd(c)._Val[0] = (Cd(a)._Val[0]/Cd(b)._Val[0]); Cd(c)._Val[1]=(Cd(a)._Val[1]/Cd(b)._Val[1]);} +#else +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#endif +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conjf(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimagf(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle_() continue; +#define myceiling_(w) {ceil(w)} +#define myhuge_(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc_(w,s,e,n) dmaxloc_(w,*(s),*(e),n) + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +#ifdef _MSC_VER +static _Fcomplex cpow_ui(complex x, integer n) { + complex pow={1.0,0.0}; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i; + for(u = n; ; ) { + if(u & 01) pow.r *= x.r, pow.i *= x.i; + if(u >>= 1) x.r *= x.r, x.i *= x.i; + else break; + } + } + _Fcomplex p={pow.r, pow.i}; + return p; +} +#else +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +#endif +#ifdef _MSC_VER +static _Dcomplex zpow_ui(_Dcomplex x, integer n) { + _Dcomplex pow={1.0,0.0}; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1]; + for(u = n; ; ) { + if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1]; + if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1]; + else break; + } + } + _Dcomplex p = {pow._Val[0], pow._Val[1]}; + return p; +} +#else +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +#endif +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; +#ifdef _MSC_VER + _Fcomplex zdotc = {0.0, 0.0}; + if (incx == 1 && incy == 1) { + for (i=0;i +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +#if defined(_WIN64) +typedef long long BLASLONG; +typedef unsigned long long BLASULONG; +#else +typedef long BLASLONG; +typedef unsigned long BLASULONG; +#endif + +#ifdef LAPACK_ILP64 +typedef BLASLONG blasint; +#if defined(_WIN64) +#define blasabs(x) llabs(x) +#else +#define blasabs(x) labs(x) +#endif +#else +typedef int blasint; +#define blasabs(x) abs(x) +#endif + +typedef blasint integer; + +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +#ifdef _MSC_VER +static inline _Fcomplex Cf(complex *z) {_Fcomplex zz={z->r , z->i}; return zz;} +static inline _Dcomplex Cd(doublecomplex *z) {_Dcomplex zz={z->r , z->i};return zz;} +static inline _Fcomplex * _pCf(complex *z) {return (_Fcomplex*)z;} +static inline _Dcomplex * _pCd(doublecomplex *z) {return (_Dcomplex*)z;} +#else +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#endif +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#ifdef _MSC_VER +#define c_div(c, a, b) {Cf(c)._Val[0] = (Cf(a)._Val[0]/Cf(b)._Val[0]); Cf(c)._Val[1]=(Cf(a)._Val[1]/Cf(b)._Val[1]);} +#define z_div(c, a, b) {Cd(c)._Val[0] = (Cd(a)._Val[0]/Cd(b)._Val[0]); Cd(c)._Val[1]=(Cd(a)._Val[1]/Cd(b)._Val[1]);} +#else +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#endif +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conjf(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimagf(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle_() continue; +#define myceiling_(w) {ceil(w)} +#define myhuge_(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc_(w,s,e,n) dmaxloc_(w,*(s),*(e),n) + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +#ifdef _MSC_VER +static _Fcomplex cpow_ui(complex x, integer n) { + complex pow={1.0,0.0}; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i; + for(u = n; ; ) { + if(u & 01) pow.r *= x.r, pow.i *= x.i; + if(u >>= 1) x.r *= x.r, x.i *= x.i; + else break; + } + } + _Fcomplex p={pow.r, pow.i}; + return p; +} +#else +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +#endif +#ifdef _MSC_VER +static _Dcomplex zpow_ui(_Dcomplex x, integer n) { + _Dcomplex pow={1.0,0.0}; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1]; + for(u = n; ; ) { + if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1]; + if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1]; + else break; + } + } + _Dcomplex p = {pow._Val[0], pow._Val[1]}; + return p; +} +#else +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +#endif +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; +#ifdef _MSC_VER + _Fcomplex zdotc = {0.0, 0.0}; + if (incx == 1 && incy == 1) { + for (i=0;i Date: Tue, 20 Jun 2023 21:39:29 +0200 Subject: [PATCH 11/15] Add dummy C sources for ?GEDMD --- cmake/lapack.cmake | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/cmake/lapack.cmake b/cmake/lapack.cmake index 077390d90..12127531d 100644 --- a/cmake/lapack.cmake +++ b/cmake/lapack.cmake @@ -624,7 +624,7 @@ set(SLASRC ssbev_2stage.c ssbevx_2stage.c ssbevd_2stage.c ssygv_2stage.c sgesvdq.c slaorhr_col_getrfnp.c slaorhr_col_getrfnp2.c sorgtsqr.c sorgtsqr_row.c sorhr_col.c - slatrs3.c strsyl3.c sgelst.c) + slatrs3.c strsyl3.c sgelst.c sgedmd.c sgedmdq.c) set(SXLASRC sgesvxx.c sgerfsx.c sla_gerfsx_extended.c sla_geamv.c sla_gercond.c sla_gerpvgrw.c ssysvxx.c ssyrfsx.c @@ -722,7 +722,7 @@ set(CLASRC chbev_2stage.c chbevx_2stage.c chbevd_2stage.c chegv_2stage.c cgesvdq.c claunhr_col_getrfnp.c claunhr_col_getrfnp2.c cungtsqr.c cungtsqr_row.c cunhr_col.c - clatrs3.c ctrsyl3.c cgelst.c) + clatrs3.c ctrsyl3.c cgelst.c cgedmd.c cgedmdq.c) set(CXLASRC cgesvxx.c cgerfsx.c cla_gerfsx_extended.c cla_geamv.c cla_gercond_c.c cla_gercond_x.c cla_gerpvgrw.c @@ -814,7 +814,7 @@ set(DLASRC dsbev_2stage.c dsbevx_2stage.c dsbevd_2stage.c dsygv_2stage.c dcombssq.c dgesvdq.c dlaorhr_col_getrfnp.c dlaorhr_col_getrfnp2.c dorgtsqr.c dorgtsqr_row.c dorhr_col.c - dlatrs3.c dtrsyl3.c dgelst.c) + dlatrs3.c dtrsyl3.c dgelst.c dgedmd.c dgedmdq.c) set(DXLASRC dgesvxx.c dgerfsx.c dla_gerfsx_extended.c dla_geamv.c dla_gercond.c dla_gerpvgrw.c dsysvxx.c dsyrfsx.c @@ -925,7 +925,7 @@ set(ZXLASRC zgesvxx.c zgerfsx.c zla_gerfsx_extended.c zla_geamv.c zla_gbrfsx_extended.c zla_gbamv.c zla_gbrcond_c.c zla_gbrcond_x.c zla_gbrpvgrw.c zhesvxx.c zherfsx.c zla_herfsx_extended.c zla_heamv.c zla_hercond_c.c zla_hercond_x.c zla_herpvgrw.c - zla_lin_berr.c zlarscl2.c zlascl2.c zla_wwaddw.c) + zla_lin_berr.c zlarscl2.c zlascl2.c zla_wwaddw.c zgedmd.c zgedmdq.c) if(USE_XBLAS) From 174f4e65e320241c7b57136db95d367cac7583d7 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Tue, 20 Jun 2023 23:14:35 +0200 Subject: [PATCH 12/15] Add LAPACK/LAPACKE functions for Dynamic Mode Decomposition --- exports/gensymbol | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/exports/gensymbol b/exports/gensymbol index b584167a4..704eab06f 100755 --- a/exports/gensymbol +++ b/exports/gensymbol @@ -844,6 +844,23 @@ lapackobjs2z="$lapackobjs2z zungtsqr_row " +#functions added for lapack-3.11 +lapackobjs2c="$lapackobjs2c + cgedmd + cgedmdq + " +lapackobjs2d="$lapackobjs2d + dgedmd + dgedmdq + " +lapackobjs2s="$lapackobjs2s + sgedmd + sgedmdq + " +lapackobjs2z="$lapackobjs2z + zgedmd + zgedmdq + " lapack_extendedprecision_objs=" zposvxx clagge clatms chesvxx cposvxx cgesvxx ssyrfssx csyrfsx dlagsy dsysvxx sporfsx slatms zlatms zherfsx csysvxx @@ -1013,6 +1030,10 @@ lapackeobjsc=" LAPACKE_cgebrd_work LAPACKE_cgecon LAPACKE_cgecon_work + LAPACKE_cgedmd + LAPACKE_cgedmd_work + LAPACKE_cgedmdq + LAPACKE_cgedmdq_work LAPACKE_cgeequ LAPACKE_cgeequ_work LAPACKE_cgeequb @@ -1672,6 +1693,10 @@ lapackeobjsd=" LAPACKE_dgebrd_work LAPACKE_dgecon LAPACKE_dgecon_work + LAPACKE_dgedmd + LAPACKE_dgedmd_work + LAPACKE_dgedmdq + LAPACKE_dgedmdq_work LAPACKE_dgeequ LAPACKE_dgeequ_work LAPACKE_dgeequb @@ -2285,6 +2310,10 @@ lapackeobjss=" LAPACKE_sgebrd_work LAPACKE_sgecon LAPACKE_sgecon_work + LAPACKE_sgedmd + LAPACKE_sgedmd_work + LAPACKE_sgedmdq + LAPACKE_sgedmdq_work LAPACKE_sgeequ LAPACKE_sgeequ_work LAPACKE_sgeequb @@ -2894,6 +2923,10 @@ lapackeobjsz=" LAPACKE_zgebrd_work LAPACKE_zgecon LAPACKE_zgecon_work + LAPACKE_zgedmd + LAPACKE_zgedmd_work + LAPACKE_zgedmdq + LAPACKE_zgedmdq_work LAPACKE_zgeequ LAPACKE_zgeequ_work LAPACKE_zgeequb From 5cc1f7a0bab55bd8c10c9fceaa88d67a7246dede Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Wed, 21 Jun 2023 15:43:20 +0200 Subject: [PATCH 13/15] Add functional C replacements for ?GEDMD? --- lapack-netlib/SRC/cgedmd.c | 1159 ++++++++++++++++++++++++++++++++ lapack-netlib/SRC/cgedmdq.c | 778 ++++++++++++++++++++++ lapack-netlib/SRC/dgedmd.c | 1242 +++++++++++++++++++++++++++++++++++ lapack-netlib/SRC/dgedmdq.c | 789 ++++++++++++++++++++++ lapack-netlib/SRC/sgedmd.c | 1235 ++++++++++++++++++++++++++++++++++ lapack-netlib/SRC/sgedmdq.c | 785 ++++++++++++++++++++++ lapack-netlib/SRC/zgedmd.c | 1165 ++++++++++++++++++++++++++++++++ lapack-netlib/SRC/zgedmdq.c | 782 ++++++++++++++++++++++ 8 files changed, 7935 insertions(+) diff --git a/lapack-netlib/SRC/cgedmd.c b/lapack-netlib/SRC/cgedmd.c index 447b23014..570395c7b 100644 --- a/lapack-netlib/SRC/cgedmd.c +++ b/lapack-netlib/SRC/cgedmd.c @@ -509,3 +509,1162 @@ static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integ +/* -- translated by f2c (version 20000121). + You must link the resulting object file with the libraries: + -lf2c -lm (in that order) +*/ + + + +/* Table of constant values */ + +static integer c_n1 = -1; +static integer c__1 = 1; +static integer c__0 = 0; + +/* Subroutine */ int cgedmd_(char *jobs, char *jobz, char *jobr, char *jobf, + integer *whtsvd, integer *m, integer *n, complex *x, integer *ldx, + complex *y, integer *ldy, integer *nrnk, real *tol, integer *k, + complex *eigs, complex *z__, integer *ldz, real *res, complex *b, + integer *ldb, complex *w, integer *ldw, complex *s, integer *lds, + complex *zwork, integer *lzwork, real *rwork, integer *lrwork, + integer *iwork, integer *liwork, integer *info) +{ + /* System generated locals */ + integer x_dim1, x_offset, y_dim1, y_offset, z_dim1, z_offset, b_dim1, + b_offset, w_dim1, w_offset, s_dim1, s_offset, i__1, i__2, i__3, + i__4, i__5; + real r__1, r__2; + complex q__1, q__2; + + /* Local variables */ + complex zone; + real zero, ssum; + integer info1, info2; + real xscl1, xscl2; + integer i__, j; + real scale; + extern /* Subroutine */ int cgemm_(char *, char *, integer *, integer *, + integer *, complex *, complex *, integer *, complex *, integer *, + complex *, complex *, integer *), cgeev_(char *, + char *, integer *, complex *, integer *, complex *, complex *, + integer *, complex *, integer *, complex *, integer *, real *, + integer *); + extern logical lsame_(char *, char *); + logical badxy; + real small; + char jobzl[1]; + extern /* Subroutine */ int caxpy_(integer *, complex *, complex *, + integer *, complex *, integer *); + logical wntex; + complex zzero; + extern real scnrm2_(integer *, complex *, integer *); + extern /* Subroutine */ int cgesdd_(char *, integer *, integer *, complex + *, integer *, real *, complex *, integer *, complex *, integer *, + complex *, integer *, real *, integer *, integer *), + clascl_(char *, integer *, integer *, real *, real *, integer *, + integer *, complex *, integer *, integer *); + extern integer icamax_(integer *, complex *, integer *); + extern real slamch_(char *); + extern /* Subroutine */ int csscal_(integer *, real *, complex *, integer + *), cgesvd_(char *, char *, integer *, integer *, complex *, + integer *, real *, complex *, integer *, complex *, integer *, + complex *, integer *, real *, integer *), clacpy_( + char *, integer *, integer *, complex *, integer *, complex *, + integer *), xerbla_(char *, integer *); + char t_or_n__[1]; + extern /* Subroutine */ int cgejsv_(char *, char *, char *, char *, char * + , char *, integer *, integer *, complex *, integer *, real *, + complex *, integer *, complex *, integer *, complex *, integer *, + real *, integer *, integer *, integer *), classq_(integer *, complex *, integer *, + real *, real *); + logical sccolx, sccoly; + extern logical sisnan_(real *); + integer lwrsdd, mwrsdd, iminwr; + logical wntref, wntvec; + real rootsc; + integer lwrkev, mlwork, mwrkev, numrnk, olwork, lwrsvd, mwrsvd, mlrwrk; + logical lquery, wntres; + char jsvopt[1]; + integer lwrsvj, mwrsvj; + real rdummy[2]; + extern /* Subroutine */ int mecago_(); + integer lwrsvq, mwrsvq; + real ofl, one; + extern /* Subroutine */ int cgesvdq_(char *, char *, char *, char *, char + *, integer *, integer *, complex *, integer *, real *, complex *, + integer *, complex *, integer *, integer *, integer *, integer *, + complex *, integer *, real *, integer *, integer *); + +/* March 2023 */ +/* ..... */ +/* USE iso_fortran_env */ +/* INTEGER, PARAMETER :: WP = real32 */ +/* ..... */ +/* Scalar arguments */ +/* Array arguments */ +/* ............................................................ */ +/* Purpose */ +/* ======= */ +/* CGEDMD computes the Dynamic Mode Decomposition (DMD) for */ +/* a pair of data snapshot matrices. For the input matrices */ +/* X and Y such that Y = A*X with an unaccessible matrix */ +/* A, CGEDMD computes a certain number of Ritz pairs of A using */ +/* the standard Rayleigh-Ritz extraction from a subspace of */ +/* range(X) that is determined using the leading left singular */ +/* vectors of X. Optionally, CGEDMD returns the residuals */ +/* of the computed Ritz pairs, the information needed for */ +/* a refinement of the Ritz vectors, or the eigenvectors of */ +/* the Exact DMD. */ +/* For further details see the references listed */ +/* below. For more details of the implementation see [3]. */ + +/* References */ +/* ========== */ +/* [1] P. Schmid: Dynamic mode decomposition of numerical */ +/* and experimental data, */ +/* Journal of Fluid Mechanics 656, 5-28, 2010. */ +/* [2] Z. Drmac, I. Mezic, R. Mohr: Data driven modal */ +/* decompositions: analysis and enhancements, */ +/* SIAM J. on Sci. Comp. 40 (4), A2253-A2285, 2018. */ +/* [3] Z. Drmac: A LAPACK implementation of the Dynamic */ +/* Mode Decomposition I. Technical report. AIMDyn Inc. */ +/* and LAPACK Working Note 298. */ +/* [4] J. Tu, C. W. Rowley, D. M. Luchtenburg, S. L. */ +/* Brunton, N. Kutz: On Dynamic Mode Decomposition: */ +/* Theory and Applications, Journal of Computational */ +/* Dynamics 1(2), 391 -421, 2014. */ + +/* ...................................................................... */ +/* Developed and supported by: */ +/* =========================== */ +/* Developed and coded by Zlatko Drmac, Faculty of Science, */ +/* University of Zagreb; drmac@math.hr */ +/* In cooperation with */ +/* AIMdyn Inc., Santa Barbara, CA. */ +/* and supported by */ +/* - DARPA SBIR project "Koopman Operator-Based Forecasting */ +/* for Nonstationary Processes from Near-Term, Limited */ +/* Observational Data" Contract No: W31P4Q-21-C-0007 */ +/* - DARPA PAI project "Physics-Informed Machine Learning */ +/* Methodologies" Contract No: HR0011-18-9-0033 */ +/* - DARPA MoDyL project "A Data-Driven, Operator-Theoretic */ +/* Framework for Space-Time Analysis of Process Dynamics" */ +/* Contract No: HR0011-16-C-0116 */ +/* Any opinions, findings and conclusions or recommendations */ +/* expressed in this material are those of the author and */ +/* do not necessarily reflect the views of the DARPA SBIR */ +/* Program Office */ +/* ============================================================ */ +/* Distribution Statement A: */ +/* Approved for Public Release, Distribution Unlimited. */ +/* Cleared by DARPA on September 29, 2022 */ +/* ============================================================ */ +/* ...................................................................... */ +/* Arguments */ +/* ========= */ +/* JOBS (input) CHARACTER*1 */ +/* Determines whether the initial data snapshots are scaled */ +/* by a diagonal matrix. */ +/* 'S' :: The data snapshots matrices X and Y are multiplied */ +/* with a diagonal matrix D so that X*D has unit */ +/* nonzero columns (in the Euclidean 2-norm) */ +/* 'C' :: The snapshots are scaled as with the 'S' option. */ +/* If it is found that an i-th column of X is zero */ +/* vector and the corresponding i-th column of Y is */ +/* non-zero, then the i-th column of Y is set to */ +/* zero and a warning flag is raised. */ +/* 'Y' :: The data snapshots matrices X and Y are multiplied */ +/* by a diagonal matrix D so that Y*D has unit */ +/* nonzero columns (in the Euclidean 2-norm) */ +/* 'N' :: No data scaling. */ +/* ..... */ +/* JOBZ (input) CHARACTER*1 */ +/* Determines whether the eigenvectors (Koopman modes) will */ +/* be computed. */ +/* 'V' :: The eigenvectors (Koopman modes) will be computed */ +/* and returned in the matrix Z. */ +/* See the description of Z. */ +/* 'F' :: The eigenvectors (Koopman modes) will be returned */ +/* in factored form as the product X(:,1:K)*W, where X */ +/* contains a POD basis (leading left singular vectors */ +/* of the data matrix X) and W contains the eigenvectors */ +/* of the corresponding Rayleigh quotient. */ +/* See the descriptions of K, X, W, Z. */ +/* 'N' :: The eigenvectors are not computed. */ +/* ..... */ +/* JOBR (input) CHARACTER*1 */ +/* Determines whether to compute the residuals. */ +/* 'R' :: The residuals for the computed eigenpairs will be */ +/* computed and stored in the array RES. */ +/* See the description of RES. */ +/* For this option to be legal, JOBZ must be 'V'. */ +/* 'N' :: The residuals are not computed. */ +/* ..... */ +/* JOBF (input) CHARACTER*1 */ +/* Specifies whether to store information needed for post- */ +/* processing (e.g. computing refined Ritz vectors) */ +/* 'R' :: The matrix needed for the refinement of the Ritz */ +/* vectors is computed and stored in the array B. */ +/* See the description of B. */ +/* 'E' :: The unscaled eigenvectors of the Exact DMD are */ +/* computed and returned in the array B. See the */ +/* description of B. */ +/* 'N' :: No eigenvector refinement data is computed. */ +/* ..... */ +/* WHTSVD (input) INTEGER, WHSTVD in { 1, 2, 3, 4 } */ +/* Allows for a selection of the SVD algorithm from the */ +/* LAPACK library. */ +/* 1 :: CGESVD (the QR SVD algorithm) */ +/* 2 :: CGESDD (the Divide and Conquer algorithm; if enough */ +/* workspace available, this is the fastest option) */ +/* 3 :: CGESVDQ (the preconditioned QR SVD ; this and 4 */ +/* are the most accurate options) */ +/* 4 :: CGEJSV (the preconditioned Jacobi SVD; this and 3 */ +/* are the most accurate options) */ +/* For the four methods above, a significant difference in */ +/* the accuracy of small singular values is possible if */ +/* the snapshots vary in norm so that X is severely */ +/* ill-conditioned. If small (smaller than EPS*||X||) */ +/* singular values are of interest and JOBS=='N', then */ +/* the options (3, 4) give the most accurate results, where */ +/* the option 4 is slightly better and with stronger */ +/* theoretical background. */ +/* If JOBS=='S', i.e. the columns of X will be normalized, */ +/* then all methods give nearly equally accurate results. */ +/* ..... */ +/* M (input) INTEGER, M>= 0 */ +/* The state space dimension (the row dimension of X, Y). */ +/* ..... */ +/* N (input) INTEGER, 0 <= N <= M */ +/* The number of data snapshot pairs */ +/* (the number of columns of X and Y). */ +/* ..... */ +/* X (input/output) COMPLEX(KIND=WP) M-by-N array */ +/* > On entry, X contains the data snapshot matrix X. It is */ +/* assumed that the column norms of X are in the range of */ +/* the normalized floating point numbers. */ +/* < On exit, the leading K columns of X contain a POD basis, */ +/* i.e. the leading K left singular vectors of the input */ +/* data matrix X, U(:,1:K). All N columns of X contain all */ +/* left singular vectors of the input matrix X. */ +/* See the descriptions of K, Z and W. */ +/* ..... */ +/* LDX (input) INTEGER, LDX >= M */ +/* The leading dimension of the array X. */ +/* ..... */ +/* Y (input/workspace/output) COMPLEX(KIND=WP) M-by-N array */ +/* > On entry, Y contains the data snapshot matrix Y */ +/* < On exit, */ +/* If JOBR == 'R', the leading K columns of Y contain */ +/* the residual vectors for the computed Ritz pairs. */ +/* See the description of RES. */ +/* If JOBR == 'N', Y contains the original input data, */ +/* scaled according to the value of JOBS. */ +/* ..... */ +/* LDY (input) INTEGER , LDY >= M */ +/* The leading dimension of the array Y. */ +/* ..... */ +/* NRNK (input) INTEGER */ +/* Determines the mode how to compute the numerical rank, */ +/* i.e. how to truncate small singular values of the input */ +/* matrix X. On input, if */ +/* NRNK = -1 :: i-th singular value sigma(i) is truncated */ +/* if sigma(i) <= TOL*sigma(1) */ +/* This option is recommended. */ +/* NRNK = -2 :: i-th singular value sigma(i) is truncated */ +/* if sigma(i) <= TOL*sigma(i-1) */ +/* This option is included for R&D purposes. */ +/* It requires highly accurate SVD, which */ +/* may not be feasible. */ +/* The numerical rank can be enforced by using positive */ +/* value of NRNK as follows: */ +/* 0 < NRNK <= N :: at most NRNK largest singular values */ +/* will be used. If the number of the computed nonzero */ +/* singular values is less than NRNK, then only those */ +/* nonzero values will be used and the actually used */ +/* dimension is less than NRNK. The actual number of */ +/* the nonzero singular values is returned in the variable */ +/* K. See the descriptions of TOL and K. */ +/* ..... */ +/* TOL (input) REAL(KIND=WP), 0 <= TOL < 1 */ +/* The tolerance for truncating small singular values. */ +/* See the description of NRNK. */ +/* ..... */ +/* K (output) INTEGER, 0 <= K <= N */ +/* The dimension of the POD basis for the data snapshot */ +/* matrix X and the number of the computed Ritz pairs. */ +/* The value of K is determined according to the rule set */ +/* by the parameters NRNK and TOL. */ +/* See the descriptions of NRNK and TOL. */ +/* ..... */ +/* EIGS (output) COMPLEX(KIND=WP) N-by-1 array */ +/* The leading K (K<=N) entries of EIGS contain */ +/* the computed eigenvalues (Ritz values). */ +/* See the descriptions of K, and Z. */ +/* ..... */ +/* Z (workspace/output) COMPLEX(KIND=WP) M-by-N array */ +/* If JOBZ =='V' then Z contains the Ritz vectors. Z(:,i) */ +/* is an eigenvector of the i-th Ritz value; ||Z(:,i)||_2=1. */ +/* If JOBZ == 'F', then the Z(:,i)'s are given implicitly as */ +/* the columns of X(:,1:K)*W(1:K,1:K), i.e. X(:,1:K)*W(:,i) */ +/* is an eigenvector corresponding to EIGS(i). The columns */ +/* of W(1:k,1:K) are the computed eigenvectors of the */ +/* K-by-K Rayleigh quotient. */ +/* See the descriptions of EIGS, X and W. */ +/* ..... */ +/* LDZ (input) INTEGER , LDZ >= M */ +/* The leading dimension of the array Z. */ +/* ..... */ +/* RES (output) REAL(KIND=WP) N-by-1 array */ +/* RES(1:K) contains the residuals for the K computed */ +/* Ritz pairs, */ +/* RES(i) = || A * Z(:,i) - EIGS(i)*Z(:,i))||_2. */ +/* See the description of EIGS and Z. */ +/* ..... */ +/* B (output) COMPLEX(KIND=WP) M-by-N array. */ +/* IF JOBF =='R', B(1:M,1:K) contains A*U(:,1:K), and can */ +/* be used for computing the refined vectors; see further */ +/* details in the provided references. */ +/* If JOBF == 'E', B(1:M,1:K) contains */ +/* A*U(:,1:K)*W(1:K,1:K), which are the vectors from the */ +/* Exact DMD, up to scaling by the inverse eigenvalues. */ +/* If JOBF =='N', then B is not referenced. */ +/* See the descriptions of X, W, K. */ +/* ..... */ +/* LDB (input) INTEGER, LDB >= M */ +/* The leading dimension of the array B. */ +/* ..... */ +/* W (workspace/output) COMPLEX(KIND=WP) N-by-N array */ +/* On exit, W(1:K,1:K) contains the K computed */ +/* eigenvectors of the matrix Rayleigh quotient. */ +/* The Ritz vectors (returned in Z) are the */ +/* product of X (containing a POD basis for the input */ +/* matrix X) and W. See the descriptions of K, S, X and Z. */ +/* W is also used as a workspace to temporarily store the */ +/* right singular vectors of X. */ +/* ..... */ +/* LDW (input) INTEGER, LDW >= N */ +/* The leading dimension of the array W. */ +/* ..... */ +/* S (workspace/output) COMPLEX(KIND=WP) N-by-N array */ +/* The array S(1:K,1:K) is used for the matrix Rayleigh */ +/* quotient. This content is overwritten during */ +/* the eigenvalue decomposition by CGEEV. */ +/* See the description of K. */ +/* ..... */ +/* LDS (input) INTEGER, LDS >= N */ +/* The leading dimension of the array S. */ +/* ..... */ +/* ZWORK (workspace/output) COMPLEX(KIND=WP) LZWORK-by-1 array */ +/* ZWORK is used as complex workspace in the complex SVD, as */ +/* specified by WHTSVD (1,2, 3 or 4) and for CGEEV for computing */ +/* the eigenvalues of a Rayleigh quotient. */ +/* If the call to CGEDMD is only workspace query, then */ +/* ZWORK(1) contains the minimal complex workspace length and */ +/* ZWORK(2) is the optimal complex workspace length. */ +/* Hence, the length of work is at least 2. */ +/* See the description of LZWORK. */ +/* ..... */ +/* LZWORK (input) INTEGER */ +/* The minimal length of the workspace vector ZWORK. */ +/* LZWORK is calculated as MAX(LZWORK_SVD, LZWORK_CGEEV), */ +/* where LZWORK_CGEEV = MAX( 1, 2*N ) and the minimal */ +/* LZWORK_SVD is calculated as follows */ +/* If WHTSVD == 1 :: CGESVD :: */ +/* LZWORK_SVD = MAX(1,2*MIN(M,N)+MAX(M,N)) */ +/* If WHTSVD == 2 :: CGESDD :: */ +/* LZWORK_SVD = 2*MIN(M,N)*MIN(M,N)+2*MIN(M,N)+MAX(M,N) */ +/* If WHTSVD == 3 :: CGESVDQ :: */ +/* LZWORK_SVD = obtainable by a query */ +/* If WHTSVD == 4 :: CGEJSV :: */ +/* LZWORK_SVD = obtainable by a query */ +/* If on entry LZWORK = -1, then a workspace query is */ +/* assumed and the procedure only computes the minimal */ +/* and the optimal workspace lengths and returns them in */ +/* LZWORK(1) and LZWORK(2), respectively. */ +/* ..... */ +/* RWORK (workspace/output) REAL(KIND=WP) LRWORK-by-1 array */ +/* On exit, RWORK(1:N) contains the singular values of */ +/* X (for JOBS=='N') or column scaled X (JOBS=='S', 'C'). */ +/* If WHTSVD==4, then RWORK(N+1) and RWORK(N+2) contain */ +/* scaling factor RWORK(N+2)/RWORK(N+1) used to scale X */ +/* and Y to avoid overflow in the SVD of X. */ +/* This may be of interest if the scaling option is off */ +/* and as many as possible smallest eigenvalues are */ +/* desired to the highest feasible accuracy. */ +/* If the call to CGEDMD is only workspace query, then */ +/* RWORK(1) contains the minimal workspace length. */ +/* See the description of LRWORK. */ +/* ..... */ +/* LRWORK (input) INTEGER */ +/* The minimal length of the workspace vector RWORK. */ +/* LRWORK is calculated as follows: */ +/* LRWORK = MAX(1, N+LRWORK_SVD,N+LRWORK_CGEEV), where */ +/* LRWORK_CGEEV = MAX(1,2*N) and RWORK_SVD is the real workspace */ +/* for the SVD subroutine determined by the input parameter */ +/* WHTSVD. */ +/* If WHTSVD == 1 :: CGESVD :: */ +/* LRWORK_SVD = 5*MIN(M,N) */ +/* If WHTSVD == 2 :: CGESDD :: */ +/* LRWORK_SVD = MAX(5*MIN(M,N)*MIN(M,N)+7*MIN(M,N), */ +/* 2*MAX(M,N)*MIN(M,N)+2*MIN(M,N)*MIN(M,N)+MIN(M,N) ) ) */ +/* If WHTSVD == 3 :: CGESVDQ :: */ +/* LRWORK_SVD = obtainable by a query */ +/* If WHTSVD == 4 :: CGEJSV :: */ +/* LRWORK_SVD = obtainable by a query */ +/* If on entry LRWORK = -1, then a workspace query is */ +/* assumed and the procedure only computes the minimal */ +/* real workspace length and returns it in RWORK(1). */ +/* ..... */ +/* IWORK (workspace/output) INTEGER LIWORK-by-1 array */ +/* Workspace that is required only if WHTSVD equals */ +/* 2 , 3 or 4. (See the description of WHTSVD). */ +/* If on entry LWORK =-1 or LIWORK=-1, then the */ +/* minimal length of IWORK is computed and returned in */ +/* IWORK(1). See the description of LIWORK. */ +/* ..... */ +/* LIWORK (input) INTEGER */ +/* The minimal length of the workspace vector IWORK. */ +/* If WHTSVD == 1, then only IWORK(1) is used; LIWORK >=1 */ +/* If WHTSVD == 2, then LIWORK >= MAX(1,8*MIN(M,N)) */ +/* If WHTSVD == 3, then LIWORK >= MAX(1,M+N-1) */ +/* If WHTSVD == 4, then LIWORK >= MAX(3,M+3*N) */ +/* If on entry LIWORK = -1, then a workspace query is */ +/* assumed and the procedure only computes the minimal */ +/* and the optimal workspace lengths for ZWORK, RWORK and */ +/* IWORK. See the descriptions of ZWORK, RWORK and IWORK. */ +/* ..... */ +/* INFO (output) INTEGER */ +/* -i < 0 :: On entry, the i-th argument had an */ +/* illegal value */ +/* = 0 :: Successful return. */ +/* = 1 :: Void input. Quick exit (M=0 or N=0). */ +/* = 2 :: The SVD computation of X did not converge. */ +/* Suggestion: Check the input data and/or */ +/* repeat with different WHTSVD. */ +/* = 3 :: The computation of the eigenvalues did not */ +/* converge. */ +/* = 4 :: If data scaling was requested on input and */ +/* the procedure found inconsistency in the data */ +/* such that for some column index i, */ +/* X(:,i) = 0 but Y(:,i) /= 0, then Y(:,i) is set */ +/* to zero if JOBS=='C'. The computation proceeds */ +/* with original or modified data and warning */ +/* flag is set with INFO=4. */ +/* ............................................................. */ +/* ............................................................. */ +/* Parameters */ +/* ~~~~~~~~~~ */ +/* Local scalars */ +/* ~~~~~~~~~~~~~ */ + +/* Local arrays */ +/* ~~~~~~~~~~~~ */ +/* External functions (BLAS and LAPACK) */ +/* ~~~~~~~~~~~~~~~~~ */ +/* External subroutines (BLAS and LAPACK) */ +/* ~~~~~~~~~~~~~~~~~~~~ */ +/* Intrinsic functions */ +/* ~~~~~~~~~~~~~~~~~~~ */ +/* ............................................................ */ + /* Parameter adjustments */ + x_dim1 = *ldx; + x_offset = 1 + x_dim1 * 1; + x -= x_offset; + y_dim1 = *ldy; + y_offset = 1 + y_dim1 * 1; + y -= y_offset; + --eigs; + z_dim1 = *ldz; + z_offset = 1 + z_dim1 * 1; + z__ -= z_offset; + --res; + b_dim1 = *ldb; + b_offset = 1 + b_dim1 * 1; + b -= b_offset; + w_dim1 = *ldw; + w_offset = 1 + w_dim1 * 1; + w -= w_offset; + s_dim1 = *lds; + s_offset = 1 + s_dim1 * 1; + s -= s_offset; + --zwork; + --rwork; + --iwork; + + /* Function Body */ + zero = 0.f; + one = 1.f; + zzero.r = 0.f, zzero.i = 0.f; + zone.r = 1.f, zone.i = 0.f; + +/* Test the input arguments */ + + wntres = lsame_(jobr, "R"); + sccolx = lsame_(jobs, "S") || lsame_(jobs, "C"); + sccoly = lsame_(jobs, "Y"); + wntvec = lsame_(jobz, "V"); + wntref = lsame_(jobf, "R"); + wntex = lsame_(jobf, "E"); + *info = 0; + lquery = *lzwork == -1 || *liwork == -1 || *lrwork == -1; + + if (! (sccolx || sccoly || lsame_(jobs, "N"))) { + *info = -1; + } else if (! (wntvec || lsame_(jobz, "N") || lsame_( + jobz, "F"))) { + *info = -2; + } else if (! (wntres || lsame_(jobr, "N")) || + wntres && ! wntvec) { + *info = -3; + } else if (! (wntref || wntex || lsame_(jobf, "N"))) + { + *info = -4; + } else if (! (*whtsvd == 1 || *whtsvd == 2 || *whtsvd == 3 || *whtsvd == + 4)) { + *info = -5; + } else if (*m < 0) { + *info = -6; + } else if (*n < 0 || *n > *m) { + *info = -7; + } else if (*ldx < *m) { + *info = -9; + } else if (*ldy < *m) { + *info = -11; + } else if (! (*nrnk == -2 || *nrnk == -1 || *nrnk >= 1 && *nrnk <= *n)) { + *info = -12; + } else if (*tol < zero || *tol >= one) { + *info = -13; + } else if (*ldz < *m) { + *info = -17; + } else if ((wntref || wntex) && *ldb < *m) { + *info = -20; + } else if (*ldw < *n) { + *info = -22; + } else if (*lds < *n) { + *info = -24; + } + + if (*info == 0) { +/* Compute the minimal and the optimal workspace */ +/* requirements. Simulate running the code and */ +/* determine minimal and optimal sizes of the */ +/* workspace at any moment of the run. */ + if (*n == 0) { +/* Quick return. All output except K is void. */ +/* INFO=1 signals the void input. */ +/* In case of a workspace query, the default */ +/* minimal workspace lengths are returned. */ + if (lquery) { + iwork[1] = 1; + rwork[1] = 1.f; + zwork[1].r = 2.f, zwork[1].i = 0.f; + zwork[2].r = 2.f, zwork[2].i = 0.f; + } else { + *k = 0; + } + *info = 1; + return 0; + } + iminwr = 1; + mlrwrk = f2cmax(1,*n); + mlwork = 2; + olwork = 2; +/* SELECT CASE ( WHTSVD ) */ + if (*whtsvd == 1) { +/* The following is specified as the minimal */ +/* length of WORK in the definition of CGESVD: */ +/* MWRSVD = MAX(1,2*MIN(M,N)+MAX(M,N)) */ +/* Computing MAX */ + i__1 = 1, i__2 = (f2cmin(*m,*n) << 1) + f2cmax(*m,*n); + mwrsvd = f2cmax(i__1,i__2); + mlwork = f2cmax(mlwork,mwrsvd); +/* Computing MAX */ + i__1 = mlrwrk, i__2 = *n + f2cmin(*m,*n) * 5; + mlrwrk = f2cmax(i__1,i__2); + if (lquery) { + cgesvd_("O", "S", m, n, &x[x_offset], ldx, &rwork[1], &b[ + b_offset], ldb, &w[w_offset], ldw, &zwork[1], &c_n1, + rdummy, &info1); + lwrsvd = (integer) zwork[1].r; + olwork = f2cmax(olwork,lwrsvd); + } + } else if (*whtsvd == 2) { +/* The following is specified as the minimal */ +/* length of WORK in the definition of CGESDD: */ +/* MWRSDD = 2*f2cmin(M,N)*f2cmin(M,N)+2*f2cmin(M,N)+f2cmax(M,N). */ +/* RWORK length: 5*MIN(M,N)*MIN(M,N)+7*MIN(M,N) */ +/* In LAPACK 3.10.1 RWORK is defined differently. */ +/* Below we take f2cmax over the two versions. */ +/* IMINWR = 8*MIN(M,N) */ + mwrsdd = (f2cmin(*m,*n) << 1) * f2cmin(*m,*n) + (f2cmin(*m,*n) << 1) + f2cmax( + *m,*n); + mlwork = f2cmax(mlwork,mwrsdd); + iminwr = f2cmin(*m,*n) << 3; +/* Computing MAX */ +/* Computing MAX */ + i__3 = f2cmin(*m,*n) * 5 * f2cmin(*m,*n) + f2cmin(*m,*n) * 7, i__4 = f2cmin(* + m,*n) * 5 * f2cmin(*m,*n) + f2cmin(*m,*n) * 5, i__3 = f2cmax(i__3, + i__4), i__4 = (f2cmax(*m,*n) << 1) * f2cmin(*m,*n) + (f2cmin(*m,*n) + << 1) * f2cmin(*m,*n) + f2cmin(*m,*n); + i__1 = mlrwrk, i__2 = *n + f2cmax(i__3,i__4); + mlrwrk = f2cmax(i__1,i__2); + if (lquery) { + cgesdd_("O", m, n, &x[x_offset], ldx, &rwork[1], &b[b_offset], + ldb, &w[w_offset], ldw, &zwork[1], &c_n1, rdummy, & + iwork[1], &info1); +/* Computing MAX */ + i__1 = mwrsdd, i__2 = (integer) zwork[1].r; + lwrsdd = f2cmax(i__1,i__2); + olwork = f2cmax(olwork,lwrsdd); + } + } else if (*whtsvd == 3) { + cgesvdq_("H", "P", "N", "R", "R", m, n, &x[x_offset], ldx, &rwork[ + 1], &z__[z_offset], ldz, &w[w_offset], ldw, &numrnk, & + iwork[1], &c_n1, &zwork[1], &c_n1, rdummy, &c_n1, &info1); + iminwr = iwork[1]; + mwrsvq = (integer) zwork[2].r; + mlwork = f2cmax(mlwork,mwrsvq); +/* Computing MAX */ + i__1 = mlrwrk, i__2 = *n + (integer) rdummy[0]; + mlrwrk = f2cmax(i__1,i__2); + if (lquery) { + lwrsvq = (integer) zwork[1].r; + olwork = f2cmax(olwork,lwrsvq); + } + } else if (*whtsvd == 4) { + *(unsigned char *)jsvopt = 'J'; + cgejsv_("F", "U", jsvopt, "N", "N", "P", m, n, &x[x_offset], ldx, + &rwork[1], &z__[z_offset], ldz, &w[w_offset], ldw, &zwork[ + 1], &c_n1, rdummy, &c_n1, &iwork[1], &info1); + iminwr = iwork[1]; + mwrsvj = (integer) zwork[2].r; + mlwork = f2cmax(mlwork,mwrsvj); +/* Computing MAX */ +/* Computing MAX */ + i__3 = 7, i__4 = (integer) rdummy[0]; + i__1 = mlrwrk, i__2 = *n + f2cmax(i__3,i__4); + mlrwrk = f2cmax(i__1,i__2); + if (lquery) { + lwrsvj = (integer) zwork[1].r; + olwork = f2cmax(olwork,lwrsvj); + } +/* END SELECT */ + } + if (wntvec || wntex || lsame_(jobz, "F")) { + *(unsigned char *)jobzl = 'V'; + } else { + *(unsigned char *)jobzl = 'N'; + } +/* Workspace calculation to the CGEEV call */ +/* Computing MAX */ + i__1 = 1, i__2 = *n << 1; + mwrkev = f2cmax(i__1,i__2); + mlwork = f2cmax(mlwork,mwrkev); +/* Computing MAX */ + i__1 = mlrwrk, i__2 = *n + (*n << 1); + mlrwrk = f2cmax(i__1,i__2); + if (lquery) { + cgeev_("N", jobzl, n, &s[s_offset], lds, &eigs[1], &w[w_offset], + ldw, &w[w_offset], ldw, &zwork[1], &c_n1, &rwork[1], & + info1); +/* LAPACK CALL */ + lwrkev = (integer) zwork[1].r; + olwork = f2cmax(olwork,lwrkev); + olwork = f2cmax(2,olwork); + } + + if (*liwork < iminwr && ! lquery) { + *info = -30; + } + if (*lrwork < mlrwrk && ! lquery) { + *info = -28; + } + if (*lzwork < mlwork && ! lquery) { + *info = -26; + } + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("CGEDMD", &i__1); + return 0; + } else if (lquery) { +/* Return minimal and optimal workspace sizes */ + iwork[1] = iminwr; + rwork[1] = (real) mlrwrk; + zwork[1].r = (real) mlwork, zwork[1].i = 0.f; + zwork[2].r = (real) olwork, zwork[2].i = 0.f; + return 0; + } +/* ............................................................ */ + + ofl = slamch_("O") * slamch_("P"); + small = slamch_("S"); + badxy = FALSE_; + +/* <1> Optional scaling of the snapshots (columns of X, Y) */ +/* ========================================================== */ + if (sccolx) { +/* The columns of X will be normalized. */ +/* To prevent overflows, the column norms of X are */ +/* carefully computed using CLASSQ. */ + *k = 0; + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { +/* WORK(i) = SCNRM2( M, X(1,i), 1 ) */ + scale = zero; + classq_(m, &x[i__ * x_dim1 + 1], &c__1, &scale, &ssum); + if (sisnan_(&scale) || sisnan_(&ssum)) { + *k = 0; + *info = -8; + i__2 = -(*info); + xerbla_("CGEDMD", &i__2); + } + if (scale != zero && ssum != zero) { + rootsc = sqrt(ssum); + if (scale >= ofl / rootsc) { +/* Norm of X(:,i) overflows. First, X(:,i) */ +/* is scaled by */ +/* ( ONE / ROOTSC ) / SCALE = 1/||X(:,i)||_2. */ +/* Next, the norm of X(:,i) is stored without */ +/* overflow as WORK(i) = - SCALE * (ROOTSC/M), */ +/* the minus sign indicating the 1/M factor. */ +/* Scaling is performed without overflow, and */ +/* underflow may occur in the smallest entries */ +/* of X(:,i). The relative backward and forward */ +/* errors are small in the ell_2 norm. */ + r__1 = one / rootsc; + clascl_("G", &c__0, &c__0, &scale, &r__1, m, &c__1, &x[ + i__ * x_dim1 + 1], ldx, &info2); + rwork[i__] = -scale * (rootsc / (real) (*m)); + } else { +/* X(:,i) will be scaled to unit 2-norm */ + rwork[i__] = scale * rootsc; + clascl_("G", &c__0, &c__0, &rwork[i__], &one, m, &c__1, & + x[i__ * x_dim1 + 1], ldx, &info2); +/* X(1:M,i) = (ONE/RWORK(i)) * X(1:M,i) ! INTRINSIC */ +/* LAPAC */ + } + } else { + rwork[i__] = zero; + ++(*k); + } + } + if (*k == *n) { +/* All columns of X are zero. Return error code -8. */ +/* (the 8th input variable had an illegal value) */ + *k = 0; + *info = -8; + i__1 = -(*info); + xerbla_("CGEDMD", &i__1); + return 0; + } + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { +/* Now, apply the same scaling to the columns of Y. */ + if (rwork[i__] > zero) { + r__1 = one / rwork[i__]; + csscal_(m, &r__1, &y[i__ * y_dim1 + 1], &c__1); +/* Y(1:M,i) = (ONE/RWORK(i)) * Y(1:M,i) ! INTRINSIC */ +/* BLAS CALL */ + } else if (rwork[i__] < zero) { + r__1 = -rwork[i__]; + r__2 = one / (real) (*m); + clascl_("G", &c__0, &c__0, &r__1, &r__2, m, &c__1, &y[i__ * + y_dim1 + 1], ldy, &info2); +/* LAPACK C */ + } else if (c_abs(&y[icamax_(m, &y[i__ * y_dim1 + 1], &c__1) + i__ + * y_dim1]) != zero) { +/* X(:,i) is zero vector. For consistency, */ +/* Y(:,i) should also be zero. If Y(:,i) is not */ +/* zero, then the data might be inconsistent or */ +/* corrupted. If JOBS == 'C', Y(:,i) is set to */ +/* zero and a warning flag is raised. */ +/* The computation continues but the */ +/* situation will be reported in the output. */ + badxy = TRUE_; + if (lsame_(jobs, "C")) { + csscal_(m, &zero, &y[i__ * y_dim1 + 1], &c__1); + } +/* BLAS CALL */ + } + } + } + + if (sccoly) { +/* The columns of Y will be normalized. */ +/* To prevent overflows, the column norms of Y are */ +/* carefully computed using CLASSQ. */ + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { +/* RWORK(i) = SCNRM2( M, Y(1,i), 1 ) */ + scale = zero; + classq_(m, &y[i__ * y_dim1 + 1], &c__1, &scale, &ssum); + if (sisnan_(&scale) || sisnan_(&ssum)) { + *k = 0; + *info = -10; + i__2 = -(*info); + xerbla_("CGEDMD", &i__2); + } + if (scale != zero && ssum != zero) { + rootsc = sqrt(ssum); + if (scale >= ofl / rootsc) { +/* Norm of Y(:,i) overflows. First, Y(:,i) */ +/* is scaled by */ +/* ( ONE / ROOTSC ) / SCALE = 1/||Y(:,i)||_2. */ +/* Next, the norm of Y(:,i) is stored without */ +/* overflow as RWORK(i) = - SCALE * (ROOTSC/M), */ +/* the minus sign indicating the 1/M factor. */ +/* Scaling is performed without overflow, and */ +/* underflow may occur in the smallest entries */ +/* of Y(:,i). The relative backward and forward */ +/* errors are small in the ell_2 norm. */ + r__1 = one / rootsc; + clascl_("G", &c__0, &c__0, &scale, &r__1, m, &c__1, &y[ + i__ * y_dim1 + 1], ldy, &info2); + rwork[i__] = -scale * (rootsc / (real) (*m)); + } else { +/* Y(:,i) will be scaled to unit 2-norm */ + rwork[i__] = scale * rootsc; + clascl_("G", &c__0, &c__0, &rwork[i__], &one, m, &c__1, & + y[i__ * y_dim1 + 1], ldy, &info2); +/* Y(1:M,i) = (ONE/RWORK(i)) * Y(1:M,i) ! INTRINSIC */ +/* LAPA */ + } + } else { + rwork[i__] = zero; + } + } + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { +/* Now, apply the same scaling to the columns of X. */ + if (rwork[i__] > zero) { + r__1 = one / rwork[i__]; + csscal_(m, &r__1, &x[i__ * x_dim1 + 1], &c__1); +/* X(1:M,i) = (ONE/RWORK(i)) * X(1:M,i) ! INTRINSIC */ +/* BLAS CALL */ + } else if (rwork[i__] < zero) { + r__1 = -rwork[i__]; + r__2 = one / (real) (*m); + clascl_("G", &c__0, &c__0, &r__1, &r__2, m, &c__1, &x[i__ * + x_dim1 + 1], ldx, &info2); +/* LAPACK */ + } else if (c_abs(&x[icamax_(m, &x[i__ * x_dim1 + 1], &c__1) + i__ + * x_dim1]) != zero) { +/* Y(:,i) is zero vector. If X(:,i) is not */ +/* zero, then a warning flag is raised. */ +/* The computation continues but the */ +/* situation will be reported in the output. */ + badxy = TRUE_; + } + } + } + +/* <2> SVD of the data snapshot matrix X. */ +/* ===================================== */ +/* The left singular vectors are stored in the array X. */ +/* The right singular vectors are in the array W. */ +/* The array W will later on contain the eigenvectors */ +/* of a Rayleigh quotient. */ + numrnk = *n; +/* SELECT CASE ( WHTSVD ) */ + if (*whtsvd == 1) { + cgesvd_("O", "S", m, n, &x[x_offset], ldx, &rwork[1], &b[b_offset], + ldb, &w[w_offset], ldw, &zwork[1], lzwork, &rwork[*n + 1], & + info1); +/* LA */ + *(unsigned char *)t_or_n__ = 'C'; + } else if (*whtsvd == 2) { + cgesdd_("O", m, n, &x[x_offset], ldx, &rwork[1], &b[b_offset], ldb, & + w[w_offset], ldw, &zwork[1], lzwork, &rwork[*n + 1], &iwork[1] + , &info1); +/* LAP */ + *(unsigned char *)t_or_n__ = 'C'; + } else if (*whtsvd == 3) { + i__1 = *lrwork - *n; + cgesvdq_("H", "P", "N", "R", "R", m, n, &x[x_offset], ldx, &rwork[1], + &z__[z_offset], ldz, &w[w_offset], ldw, &numrnk, &iwork[1], + liwork, &zwork[1], lzwork, &rwork[*n + 1], &i__1, &info1); +/* LAPACK CA */ + clacpy_("A", m, &numrnk, &z__[z_offset], ldz, &x[x_offset], ldx); +/* LAPACK C */ + *(unsigned char *)t_or_n__ = 'C'; + } else if (*whtsvd == 4) { + i__1 = *lrwork - *n; + cgejsv_("F", "U", jsvopt, "N", "N", "P", m, n, &x[x_offset], ldx, & + rwork[1], &z__[z_offset], ldz, &w[w_offset], ldw, &zwork[1], + lzwork, &rwork[*n + 1], &i__1, &iwork[1], &info1); + clacpy_("A", m, n, &z__[z_offset], ldz, &x[x_offset], ldx); +/* LAPACK CALL */ + *(unsigned char *)t_or_n__ = 'N'; + xscl1 = rwork[*n + 1]; + xscl2 = rwork[*n + 2]; + if (xscl1 != xscl2) { +/* This is an exceptional situation. If the */ +/* data matrices are not scaled and the */ +/* largest singular value of X overflows. */ +/* In that case CGEJSV can return the SVD */ +/* in scaled form. The scaling factor can be used */ +/* to rescale the data (X and Y). */ + clascl_("G", &c__0, &c__0, &xscl1, &xscl2, m, n, &y[y_offset], + ldy, &info2); + } +/* END SELECT */ + } + + if (info1 > 0) { +/* The SVD selected subroutine did not converge. */ +/* Return with an error code. */ + *info = 2; + return 0; + } + + if (rwork[1] == zero) { +/* The largest computed singular value of (scaled) */ +/* X is zero. Return error code -8 */ +/* (the 8th input variable had an illegal value). */ + *k = 0; + *info = -8; + i__1 = -(*info); + xerbla_("CGEDMD", &i__1); + return 0; + } + +/* <3> Determine the numerical rank of the data */ +/* snapshots matrix X. This depends on the */ +/* parameters NRNK and TOL. */ +/* SELECT CASE ( NRNK ) */ + if (*nrnk == -1) { + *k = 1; + i__1 = numrnk; + for (i__ = 2; i__ <= i__1; ++i__) { + if (rwork[i__] <= rwork[1] * *tol || rwork[i__] <= small) { + myexit_(); + } + ++(*k); + } + } else if (*nrnk == -2) { + *k = 1; + i__1 = numrnk - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + if (rwork[i__ + 1] <= rwork[i__] * *tol || rwork[i__] <= small) { + myexit_(); + } + ++(*k); + } + } else { + *k = 1; + i__1 = *nrnk; + for (i__ = 2; i__ <= i__1; ++i__) { + if (rwork[i__] <= small) { + myexit_(); + } + ++(*k); + } +/* END SELECT */ + } +/* Now, U = X(1:M,1:K) is the SVD/POD basis for the */ +/* snapshot data in the input matrix X. */ +/* <4> Compute the Rayleigh quotient S = U^H * A * U. */ +/* Depending on the requested outputs, the computation */ +/* is organized to compute additional auxiliary */ +/* matrices (for the residuals and refinements). */ + +/* In all formulas below, we need V_k*Sigma_k^(-1) */ +/* where either V_k is in W(1:N,1:K), or V_k^H is in */ +/* W(1:K,1:N). Here Sigma_k=diag(WORK(1:K)). */ + if (lsame_(t_or_n__, "N")) { + i__1 = *k; + for (i__ = 1; i__ <= i__1; ++i__) { + r__1 = one / rwork[i__]; + csscal_(n, &r__1, &w[i__ * w_dim1 + 1], &c__1); +/* W(1:N,i) = (ONE/RWORK(i)) * W(1:N,i) ! INTRINSIC */ +/* BLAS CALL */ + } + } else { +/* This non-unit stride access is due to the fact */ +/* that CGESVD, CGESVDQ and CGESDD return the */ +/* adjoint matrix of the right singular vectors. */ +/* DO i = 1, K */ +/* CALL DSCAL( N, ONE/RWORK(i), W(i,1), LDW ) ! BLAS CALL */ +/* ! W(i,1:N) = (ONE/RWORK(i)) * W(i,1:N) ! INTRINSIC */ +/* END DO */ + i__1 = *k; + for (i__ = 1; i__ <= i__1; ++i__) { + rwork[*n + i__] = one / rwork[i__]; + } + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *k; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * w_dim1; + i__4 = *n + i__; + q__2.r = rwork[i__4], q__2.i = zero; + i__5 = i__ + j * w_dim1; + q__1.r = q__2.r * w[i__5].r - q__2.i * w[i__5].i, q__1.i = + q__2.r * w[i__5].i + q__2.i * w[i__5].r; + w[i__3].r = q__1.r, w[i__3].i = q__1.i; + } + } + } + + if (wntref) { + +/* Need A*U(:,1:K)=Y*V_k*inv(diag(WORK(1:K))) */ +/* for computing the refined Ritz vectors */ +/* (optionally, outside CGEDMD). */ + cgemm_("N", t_or_n__, m, k, n, &zone, &y[y_offset], ldy, &w[w_offset], + ldw, &zzero, &z__[z_offset], ldz); +/* Z(1:M,1:K)=MATMUL(Y(1:M,1:N),TRANSPOSE(W(1:K,1:N))) ! INTRI */ +/* Z(1:M,1:K)=MATMUL(Y(1:M,1:N),W(1:N,1:K)) ! INTRI */ + +/* At this point Z contains */ +/* A * U(:,1:K) = Y * V_k * Sigma_k^(-1), and */ +/* this is needed for computing the residuals. */ +/* This matrix is returned in the array B and */ +/* it can be used to compute refined Ritz vectors. */ +/* BLAS */ + clacpy_("A", m, k, &z__[z_offset], ldz, &b[b_offset], ldb); +/* B(1:M,1:K) = Z(1:M,1:K) ! INTRINSIC */ +/* BLAS CALL */ + cgemm_("C", "N", k, k, m, &zone, &x[x_offset], ldx, &z__[z_offset], + ldz, &zzero, &s[s_offset], lds); +/* S(1:K,1:K) = MATMUL(TANSPOSE(X(1:M,1:K)),Z(1:M,1:K)) ! INTRI */ +/* At this point S = U^H * A * U is the Rayleigh quotient. */ +/* BLAS */ + } else { +/* A * U(:,1:K) is not explicitly needed and the */ +/* computation is organized differently. The Rayleigh */ +/* quotient is computed more efficiently. */ + cgemm_("C", "N", k, n, m, &zone, &x[x_offset], ldx, &y[y_offset], ldy, + &zzero, &z__[z_offset], ldz); +/* Z(1:K,1:N) = MATMUL( TRANSPOSE(X(1:M,1:K)), Y(1:M,1:N) ) ! IN */ + +/* B */ + cgemm_("N", t_or_n__, k, k, n, &zone, &z__[z_offset], ldz, &w[ + w_offset], ldw, &zzero, &s[s_offset], lds); +/* S(1:K,1:K) = MATMUL(Z(1:K,1:N),TRANSPOSE(W(1:K,1:N))) ! INTRIN */ +/* S(1:K,1:K) = MATMUL(Z(1:K,1:N),(W(1:N,1:K))) ! INTRIN */ +/* At this point S = U^H * A * U is the Rayleigh quotient. */ +/* If the residuals are requested, save scaled V_k into Z. */ +/* Recall that V_k or V_k^H is stored in W. */ +/* BLAS */ + if (wntres || wntex) { + if (lsame_(t_or_n__, "N")) { + clacpy_("A", n, k, &w[w_offset], ldw, &z__[z_offset], ldz); + } else { + clacpy_("A", k, n, &w[w_offset], ldw, &z__[z_offset], ldz); + } + } + } + +/* <5> Compute the Ritz values and (if requested) the */ +/* right eigenvectors of the Rayleigh quotient. */ + + cgeev_("N", jobzl, k, &s[s_offset], lds, &eigs[1], &w[w_offset], ldw, &w[ + w_offset], ldw, &zwork[1], lzwork, &rwork[*n + 1], &info1); + +/* W(1:K,1:K) contains the eigenvectors of the Rayleigh */ +/* quotient. See the description of Z. */ +/* Also, see the description of CGEEV. */ +/* LAPACK CA */ + if (info1 > 0) { +/* CGEEV failed to compute the eigenvalues and */ +/* eigenvectors of the Rayleigh quotient. */ + *info = 3; + return 0; + } + +/* <6> Compute the eigenvectors (if requested) and, */ +/* the residuals (if requested). */ + + if (wntvec || wntex) { + if (wntres) { + if (wntref) { +/* Here, if the refinement is requested, we have */ +/* A*U(:,1:K) already computed and stored in Z. */ +/* For the residuals, need Y = A * U(:,1;K) * W. */ + cgemm_("N", "N", m, k, k, &zone, &z__[z_offset], ldz, &w[ + w_offset], ldw, &zzero, &y[y_offset], ldy); +/* Y(1:M,1:K) = Z(1:M,1:K) * W(1:K,1:K) ! INTRINSIC */ +/* This frees Z; Y contains A * U(:,1:K) * W. */ +/* BLAS CALL */ + } else { +/* Compute S = V_k * Sigma_k^(-1) * W, where */ +/* V_k * Sigma_k^(-1) (or its adjoint) is stored in Z */ + cgemm_(t_or_n__, "N", n, k, k, &zone, &z__[z_offset], ldz, &w[ + w_offset], ldw, &zzero, &s[s_offset], lds); +/* Then, compute Z = Y * S = */ +/* = Y * V_k * Sigma_k^(-1) * W(1:K,1:K) = */ +/* = A * U(:,1:K) * W(1:K,1:K) */ + cgemm_("N", "N", m, k, n, &zone, &y[y_offset], ldy, &s[ + s_offset], lds, &zzero, &z__[z_offset], ldz); +/* Save a copy of Z into Y and free Z for holding */ +/* the Ritz vectors. */ + clacpy_("A", m, k, &z__[z_offset], ldz, &y[y_offset], ldy); + if (wntex) { + clacpy_("A", m, k, &z__[z_offset], ldz, &b[b_offset], ldb); + } + } + } else if (wntex) { +/* Compute S = V_k * Sigma_k^(-1) * W, where */ +/* V_k * Sigma_k^(-1) is stored in Z */ + cgemm_(t_or_n__, "N", n, k, k, &zone, &z__[z_offset], ldz, &w[ + w_offset], ldw, &zzero, &s[s_offset], lds); +/* Then, compute Z = Y * S = */ +/* = Y * V_k * Sigma_k^(-1) * W(1:K,1:K) = */ +/* = A * U(:,1:K) * W(1:K,1:K) */ + cgemm_("N", "N", m, k, n, &zone, &y[y_offset], ldy, &s[s_offset], + lds, &zzero, &b[b_offset], ldb); +/* The above call replaces the following two calls */ +/* that were used in the developing-testing phase. */ +/* CALL CGEMM( 'N', 'N', M, K, N, ZONE, Y, LDY, S, & */ +/* LDS, ZZERO, Z, LDZ) */ +/* Save a copy of Z into Y and free Z for holding */ +/* the Ritz vectors. */ +/* CALL CLACPY( 'A', M, K, Z, LDZ, B, LDB ) */ + } + +/* Compute the Ritz vectors */ + if (wntvec) { + cgemm_("N", "N", m, k, k, &zone, &x[x_offset], ldx, &w[w_offset], + ldw, &zzero, &z__[z_offset], ldz); + } +/* Z(1:M,1:K) = MATMUL(X(1:M,1:K), W(1:K,1:K)) ! INTRIN */ + +/* BLAS CALL */ + if (wntres) { + i__1 = *k; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__; + q__1.r = -eigs[i__2].r, q__1.i = -eigs[i__2].i; + caxpy_(m, &q__1, &z__[i__ * z_dim1 + 1], &c__1, &y[i__ * + y_dim1 + 1], &c__1); +/* Y(1:M,i) = Y(1:M,i) - EIGS(i) * Z(1:M,i) ! */ + + res[i__] = scnrm2_(m, &y[i__ * y_dim1 + 1], &c__1); + + } + } + } + + if (*whtsvd == 4) { + rwork[*n + 1] = xscl1; + rwork[*n + 2] = xscl2; + } + +/* Successful exit. */ + if (! badxy) { + *info = 0; + } else { +/* A warning on possible data inconsistency. */ +/* This should be a rare event. */ + *info = 4; + } +/* ............................................................ */ + return 0; +/* ...... */ +} /* cgedmd_ */ + diff --git a/lapack-netlib/SRC/cgedmdq.c b/lapack-netlib/SRC/cgedmdq.c index 447b23014..6e3a1faca 100644 --- a/lapack-netlib/SRC/cgedmdq.c +++ b/lapack-netlib/SRC/cgedmdq.c @@ -509,3 +509,781 @@ static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integ +/* -- translated by f2c (version 20000121). + You must link the resulting object file with the libraries: + -lf2c -lm (in that order) +*/ + + + +/* Table of constant values */ + +static integer c_n1 = -1; + +/* Subroutine */ int cgedmdq_(char *jobs, char *jobz, char *jobr, char *jobq, + char *jobt, char *jobf, integer *whtsvd, integer *m, integer *n, + complex *f, integer *ldf, complex *x, integer *ldx, complex *y, + integer *ldy, integer *nrnk, real *tol, integer *k, complex *eigs, + complex *z__, integer *ldz, real *res, complex *b, integer *ldb, + complex *v, integer *ldv, complex *s, integer *lds, complex *zwork, + integer *lzwork, real *work, integer *lwork, integer *iwork, integer * + liwork, integer *info) +{ + /* System generated locals */ + integer f_dim1, f_offset, x_dim1, x_offset, y_dim1, y_offset, z_dim1, + z_offset, b_dim1, b_offset, v_dim1, v_offset, s_dim1, s_offset, + i__1, i__2; + + /* Local variables */ + real zero; + integer info1; + extern logical lsame_(char *, char *); + char jobvl[1]; + integer minmn; + logical wantq; + integer mlwqr, olwqr; + logical wntex; + complex zzero; + extern /* Subroutine */ int cgedmd_(char *, char *, char *, char *, + integer *, integer *, integer *, complex *, integer *, complex *, + integer *, integer *, real *, integer *, complex *, complex *, + integer *, real *, complex *, integer *, complex *, integer *, + complex *, integer *, complex *, integer *, real *, integer *, + integer *, integer *, integer *), + cgeqrf_(integer *, integer *, complex *, integer *, complex *, + complex *, integer *, integer *), clacpy_(char *, integer *, + integer *, complex *, integer *, complex *, integer *), + claset_(char *, integer *, integer *, complex *, complex *, + complex *, integer *), xerbla_(char *, integer *); + integer mlwdmd, olwdmd; + logical sccolx, sccoly; + extern /* Subroutine */ int cungqr_(integer *, integer *, integer *, + complex *, integer *, complex *, complex *, integer *, integer *); + integer iminwr; + logical wntvec, wntvcf; + integer mlwgqr; + logical wntref; + integer mlwork, olwgqr, olwork, mlrwrk, mlwmqr, olwmqr; + logical lquery, wntres, wnttrf, wntvcq; + extern /* Subroutine */ int cunmqr_(char *, char *, integer *, integer *, + integer *, complex *, integer *, complex *, complex *, integer *, + complex *, integer *, integer *); + real one; + +/* March 2023 */ +/* ..... */ +/* USE iso_fortran_env */ +/* INTEGER, PARAMETER :: WP = real32 */ +/* ..... */ +/* Scalar arguments */ +/* Array arguments */ +/* ..... */ +/* Purpose */ +/* ======= */ +/* CGEDMDQ computes the Dynamic Mode Decomposition (DMD) for */ +/* a pair of data snapshot matrices, using a QR factorization */ +/* based compression of the data. For the input matrices */ +/* X and Y such that Y = A*X with an unaccessible matrix */ +/* A, CGEDMDQ computes a certain number of Ritz pairs of A using */ +/* the standard Rayleigh-Ritz extraction from a subspace of */ +/* range(X) that is determined using the leading left singular */ +/* vectors of X. Optionally, CGEDMDQ returns the residuals */ +/* of the computed Ritz pairs, the information needed for */ +/* a refinement of the Ritz vectors, or the eigenvectors of */ +/* the Exact DMD. */ +/* For further details see the references listed */ +/* below. For more details of the implementation see [3]. */ + +/* References */ +/* ========== */ +/* [1] P. Schmid: Dynamic mode decomposition of numerical */ +/* and experimental data, */ +/* Journal of Fluid Mechanics 656, 5-28, 2010. */ +/* [2] Z. Drmac, I. Mezic, R. Mohr: Data driven modal */ +/* decompositions: analysis and enhancements, */ +/* SIAM J. on Sci. Comp. 40 (4), A2253-A2285, 2018. */ +/* [3] Z. Drmac: A LAPACK implementation of the Dynamic */ +/* Mode Decomposition I. Technical report. AIMDyn Inc. */ +/* and LAPACK Working Note 298. */ +/* [4] J. Tu, C. W. Rowley, D. M. Luchtenburg, S. L. */ +/* Brunton, N. Kutz: On Dynamic Mode Decomposition: */ +/* Theory and Applications, Journal of Computational */ +/* Dynamics 1(2), 391 -421, 2014. */ + +/* Developed and supported by: */ +/* =========================== */ +/* Developed and coded by Zlatko Drmac, Faculty of Science, */ +/* University of Zagreb; drmac@math.hr */ +/* In cooperation with */ +/* AIMdyn Inc., Santa Barbara, CA. */ +/* and supported by */ +/* - DARPA SBIR project "Koopman Operator-Based Forecasting */ +/* for Nonstationary Processes from Near-Term, Limited */ +/* Observational Data" Contract No: W31P4Q-21-C-0007 */ +/* - DARPA PAI project "Physics-Informed Machine Learning */ +/* Methodologies" Contract No: HR0011-18-9-0033 */ +/* - DARPA MoDyL project "A Data-Driven, Operator-Theoretic */ +/* Framework for Space-Time Analysis of Process Dynamics" */ +/* Contract No: HR0011-16-C-0116 */ +/* Any opinions, findings and conclusions or recommendations */ +/* expressed in this material are those of the author and */ +/* do not necessarily reflect the views of the DARPA SBIR */ +/* Program Office. */ +/* ============================================================ */ +/* Distribution Statement A: */ +/* Approved for Public Release, Distribution Unlimited. */ +/* Cleared by DARPA on September 29, 2022 */ +/* ============================================================ */ +/* ...................................................................... */ +/* Arguments */ +/* ========= */ +/* JOBS (input) CHARACTER*1 */ +/* Determines whether the initial data snapshots are scaled */ +/* by a diagonal matrix. The data snapshots are the columns */ +/* of F. The leading N-1 columns of F are denoted X and the */ +/* trailing N-1 columns are denoted Y. */ +/* 'S' :: The data snapshots matrices X and Y are multiplied */ +/* with a diagonal matrix D so that X*D has unit */ +/* nonzero columns (in the Euclidean 2-norm) */ +/* 'C' :: The snapshots are scaled as with the 'S' option. */ +/* If it is found that an i-th column of X is zero */ +/* vector and the corresponding i-th column of Y is */ +/* non-zero, then the i-th column of Y is set to */ +/* zero and a warning flag is raised. */ +/* 'Y' :: The data snapshots matrices X and Y are multiplied */ +/* by a diagonal matrix D so that Y*D has unit */ +/* nonzero columns (in the Euclidean 2-norm) */ +/* 'N' :: No data scaling. */ +/* ..... */ +/* JOBZ (input) CHARACTER*1 */ +/* Determines whether the eigenvectors (Koopman modes) will */ +/* be computed. */ +/* 'V' :: The eigenvectors (Koopman modes) will be computed */ +/* and returned in the matrix Z. */ +/* See the description of Z. */ +/* 'F' :: The eigenvectors (Koopman modes) will be returned */ +/* in factored form as the product Z*V, where Z */ +/* is orthonormal and V contains the eigenvectors */ +/* of the corresponding Rayleigh quotient. */ +/* See the descriptions of F, V, Z. */ +/* 'Q' :: The eigenvectors (Koopman modes) will be returned */ +/* in factored form as the product Q*Z, where Z */ +/* contains the eigenvectors of the compression of the */ +/* underlying discretised operator onto the span of */ +/* the data snapshots. See the descriptions of F, V, Z. */ +/* Q is from the inital QR facorization. */ +/* 'N' :: The eigenvectors are not computed. */ +/* ..... */ +/* JOBR (input) CHARACTER*1 */ +/* Determines whether to compute the residuals. */ +/* 'R' :: The residuals for the computed eigenpairs will */ +/* be computed and stored in the array RES. */ +/* See the description of RES. */ +/* For this option to be legal, JOBZ must be 'V'. */ +/* 'N' :: The residuals are not computed. */ +/* ..... */ +/* JOBQ (input) CHARACTER*1 */ +/* Specifies whether to explicitly compute and return the */ +/* unitary matrix from the QR factorization. */ +/* 'Q' :: The matrix Q of the QR factorization of the data */ +/* snapshot matrix is computed and stored in the */ +/* array F. See the description of F. */ +/* 'N' :: The matrix Q is not explicitly computed. */ +/* ..... */ +/* JOBT (input) CHARACTER*1 */ +/* Specifies whether to return the upper triangular factor */ +/* from the QR factorization. */ +/* 'R' :: The matrix R of the QR factorization of the data */ +/* snapshot matrix F is returned in the array Y. */ +/* See the description of Y and Further details. */ +/* 'N' :: The matrix R is not returned. */ +/* ..... */ +/* JOBF (input) CHARACTER*1 */ +/* Specifies whether to store information needed for post- */ +/* processing (e.g. computing refined Ritz vectors) */ +/* 'R' :: The matrix needed for the refinement of the Ritz */ +/* vectors is computed and stored in the array B. */ +/* See the description of B. */ +/* 'E' :: The unscaled eigenvectors of the Exact DMD are */ +/* computed and returned in the array B. See the */ +/* description of B. */ +/* 'N' :: No eigenvector refinement data is computed. */ +/* To be useful on exit, this option needs JOBQ='Q'. */ +/* ..... */ +/* WHTSVD (input) INTEGER, WHSTVD in { 1, 2, 3, 4 } */ +/* Allows for a selection of the SVD algorithm from the */ +/* LAPACK library. */ +/* 1 :: CGESVD (the QR SVD algorithm) */ +/* 2 :: CGESDD (the Divide and Conquer algorithm; if enough */ +/* workspace available, this is the fastest option) */ +/* 3 :: CGESVDQ (the preconditioned QR SVD ; this and 4 */ +/* are the most accurate options) */ +/* 4 :: CGEJSV (the preconditioned Jacobi SVD; this and 3 */ +/* are the most accurate options) */ +/* For the four methods above, a significant difference in */ +/* the accuracy of small singular values is possible if */ +/* the snapshots vary in norm so that X is severely */ +/* ill-conditioned. If small (smaller than EPS*||X||) */ +/* singular values are of interest and JOBS=='N', then */ +/* the options (3, 4) give the most accurate results, where */ +/* the option 4 is slightly better and with stronger */ +/* theoretical background. */ +/* If JOBS=='S', i.e. the columns of X will be normalized, */ +/* then all methods give nearly equally accurate results. */ +/* ..... */ +/* M (input) INTEGER, M >= 0 */ +/* The state space dimension (the number of rows of F). */ +/* ..... */ +/* N (input) INTEGER, 0 <= N <= M */ +/* The number of data snapshots from a single trajectory, */ +/* taken at equidistant discrete times. This is the */ +/* number of columns of F. */ +/* ..... */ +/* F (input/output) COMPLEX(KIND=WP) M-by-N array */ +/* > On entry, */ +/* the columns of F are the sequence of data snapshots */ +/* from a single trajectory, taken at equidistant discrete */ +/* times. It is assumed that the column norms of F are */ +/* in the range of the normalized floating point numbers. */ +/* < On exit, */ +/* If JOBQ == 'Q', the array F contains the orthogonal */ +/* matrix/factor of the QR factorization of the initial */ +/* data snapshots matrix F. See the description of JOBQ. */ +/* If JOBQ == 'N', the entries in F strictly below the main */ +/* diagonal contain, column-wise, the information on the */ +/* Householder vectors, as returned by CGEQRF. The */ +/* remaining information to restore the orthogonal matrix */ +/* of the initial QR factorization is stored in ZWORK(1:MIN(M,N)). */ +/* See the description of ZWORK. */ +/* ..... */ +/* LDF (input) INTEGER, LDF >= M */ +/* The leading dimension of the array F. */ +/* ..... */ +/* X (workspace/output) COMPLEX(KIND=WP) MIN(M,N)-by-(N-1) array */ +/* X is used as workspace to hold representations of the */ +/* leading N-1 snapshots in the orthonormal basis computed */ +/* in the QR factorization of F. */ +/* On exit, the leading K columns of X contain the leading */ +/* K left singular vectors of the above described content */ +/* of X. To lift them to the space of the left singular */ +/* vectors U(:,1:K) of the input data, pre-multiply with the */ +/* Q factor from the initial QR factorization. */ +/* See the descriptions of F, K, V and Z. */ +/* ..... */ +/* LDX (input) INTEGER, LDX >= N */ +/* The leading dimension of the array X. */ +/* ..... */ +/* Y (workspace/output) COMPLEX(KIND=WP) MIN(M,N)-by-(N) array */ +/* Y is used as workspace to hold representations of the */ +/* trailing N-1 snapshots in the orthonormal basis computed */ +/* in the QR factorization of F. */ +/* On exit, */ +/* If JOBT == 'R', Y contains the MIN(M,N)-by-N upper */ +/* triangular factor from the QR factorization of the data */ +/* snapshot matrix F. */ +/* ..... */ +/* LDY (input) INTEGER , LDY >= N */ +/* The leading dimension of the array Y. */ +/* ..... */ +/* NRNK (input) INTEGER */ +/* Determines the mode how to compute the numerical rank, */ +/* i.e. how to truncate small singular values of the input */ +/* matrix X. On input, if */ +/* NRNK = -1 :: i-th singular value sigma(i) is truncated */ +/* if sigma(i) <= TOL*sigma(1) */ +/* This option is recommended. */ +/* NRNK = -2 :: i-th singular value sigma(i) is truncated */ +/* if sigma(i) <= TOL*sigma(i-1) */ +/* This option is included for R&D purposes. */ +/* It requires highly accurate SVD, which */ +/* may not be feasible. */ +/* The numerical rank can be enforced by using positive */ +/* value of NRNK as follows: */ +/* 0 < NRNK <= N-1 :: at most NRNK largest singular values */ +/* will be used. If the number of the computed nonzero */ +/* singular values is less than NRNK, then only those */ +/* nonzero values will be used and the actually used */ +/* dimension is less than NRNK. The actual number of */ +/* the nonzero singular values is returned in the variable */ +/* K. See the description of K. */ +/* ..... */ +/* TOL (input) REAL(KIND=WP), 0 <= TOL < 1 */ +/* The tolerance for truncating small singular values. */ +/* See the description of NRNK. */ +/* ..... */ +/* K (output) INTEGER, 0 <= K <= N */ +/* The dimension of the SVD/POD basis for the leading N-1 */ +/* data snapshots (columns of F) and the number of the */ +/* computed Ritz pairs. The value of K is determined */ +/* according to the rule set by the parameters NRNK and */ +/* TOL. See the descriptions of NRNK and TOL. */ +/* ..... */ +/* EIGS (output) COMPLEX(KIND=WP) (N-1)-by-1 array */ +/* The leading K (K<=N-1) entries of EIGS contain */ +/* the computed eigenvalues (Ritz values). */ +/* See the descriptions of K, and Z. */ +/* ..... */ +/* Z (workspace/output) COMPLEX(KIND=WP) M-by-(N-1) array */ +/* If JOBZ =='V' then Z contains the Ritz vectors. Z(:,i) */ +/* is an eigenvector of the i-th Ritz value; ||Z(:,i)||_2=1. */ +/* If JOBZ == 'F', then the Z(:,i)'s are given implicitly as */ +/* Z*V, where Z contains orthonormal matrix (the product of */ +/* Q from the initial QR factorization and the SVD/POD_basis */ +/* returned by CGEDMD in X) and the second factor (the */ +/* eigenvectors of the Rayleigh quotient) is in the array V, */ +/* as returned by CGEDMD. That is, X(:,1:K)*V(:,i) */ +/* is an eigenvector corresponding to EIGS(i). The columns */ +/* of V(1:K,1:K) are the computed eigenvectors of the */ +/* K-by-K Rayleigh quotient. */ +/* See the descriptions of EIGS, X and V. */ +/* ..... */ +/* LDZ (input) INTEGER , LDZ >= M */ +/* The leading dimension of the array Z. */ +/* ..... */ +/* RES (output) REAL(KIND=WP) (N-1)-by-1 array */ +/* RES(1:K) contains the residuals for the K computed */ +/* Ritz pairs, */ +/* RES(i) = || A * Z(:,i) - EIGS(i)*Z(:,i))||_2. */ +/* See the description of EIGS and Z. */ +/* ..... */ +/* B (output) COMPLEX(KIND=WP) MIN(M,N)-by-(N-1) array. */ +/* IF JOBF =='R', B(1:N,1:K) contains A*U(:,1:K), and can */ +/* be used for computing the refined vectors; see further */ +/* details in the provided references. */ +/* If JOBF == 'E', B(1:N,1;K) contains */ +/* A*U(:,1:K)*W(1:K,1:K), which are the vectors from the */ +/* Exact DMD, up to scaling by the inverse eigenvalues. */ +/* In both cases, the content of B can be lifted to the */ +/* original dimension of the input data by pre-multiplying */ +/* with the Q factor from the initial QR factorization. */ +/* Here A denotes a compression of the underlying operator. */ +/* See the descriptions of F and X. */ +/* If JOBF =='N', then B is not referenced. */ +/* ..... */ +/* LDB (input) INTEGER, LDB >= MIN(M,N) */ +/* The leading dimension of the array B. */ +/* ..... */ +/* V (workspace/output) COMPLEX(KIND=WP) (N-1)-by-(N-1) array */ +/* On exit, V(1:K,1:K) V contains the K eigenvectors of */ +/* the Rayleigh quotient. The Ritz vectors */ +/* (returned in Z) are the product of Q from the initial QR */ +/* factorization (see the description of F) X (see the */ +/* description of X) and V. */ +/* ..... */ +/* LDV (input) INTEGER, LDV >= N-1 */ +/* The leading dimension of the array V. */ +/* ..... */ +/* S (output) COMPLEX(KIND=WP) (N-1)-by-(N-1) array */ +/* The array S(1:K,1:K) is used for the matrix Rayleigh */ +/* quotient. This content is overwritten during */ +/* the eigenvalue decomposition by CGEEV. */ +/* See the description of K. */ +/* ..... */ +/* LDS (input) INTEGER, LDS >= N-1 */ +/* The leading dimension of the array S. */ +/* ..... */ +/* ZWORK (workspace/output) COMPLEX(KIND=WP) LWORK-by-1 array */ +/* On exit, */ +/* ZWORK(1:MIN(M,N)) contains the scalar factors of the */ +/* elementary reflectors as returned by CGEQRF of the */ +/* M-by-N input matrix F. */ +/* If the call to CGEDMDQ is only workspace query, then */ +/* ZWORK(1) contains the minimal complex workspace length and */ +/* ZWORK(2) is the optimal complex workspace length. */ +/* Hence, the length of work is at least 2. */ +/* See the description of LZWORK. */ +/* ..... */ +/* LZWORK (input) INTEGER */ +/* The minimal length of the workspace vector ZWORK. */ +/* LZWORK is calculated as follows: */ +/* Let MLWQR = N (minimal workspace for CGEQRF[M,N]) */ +/* MLWDMD = minimal workspace for CGEDMD (see the */ +/* description of LWORK in CGEDMD) */ +/* MLWMQR = N (minimal workspace for */ +/* ZUNMQR['L','N',M,N,N]) */ +/* MLWGQR = N (minimal workspace for ZUNGQR[M,N,N]) */ +/* MINMN = MIN(M,N) */ +/* Then */ +/* LZWORK = MAX(2, MIN(M,N)+MLWQR, MINMN+MLWDMD) */ +/* is further updated as follows: */ +/* if JOBZ == 'V' or JOBZ == 'F' THEN */ +/* LZWORK = MAX( LZWORK, MINMN+MLWMQR ) */ +/* if JOBQ == 'Q' THEN */ +/* LZWORK = MAX( ZLWORK, MINMN+MLWGQR) */ + +/* ..... */ +/* WORK (workspace/output) REAL(KIND=WP) LWORK-by-1 array */ +/* On exit, */ +/* WORK(1:N-1) contains the singular values of */ +/* the input submatrix F(1:M,1:N-1). */ +/* If the call to CGEDMDQ is only workspace query, then */ +/* WORK(1) contains the minimal workspace length and */ +/* WORK(2) is the optimal workspace length. hence, the */ +/* length of work is at least 2. */ +/* See the description of LWORK. */ +/* ..... */ +/* LWORK (input) INTEGER */ +/* The minimal length of the workspace vector WORK. */ +/* LWORK is the same as in CGEDMD, because in CGEDMDQ */ +/* only CGEDMD requires real workspace for snapshots */ +/* of dimensions MIN(M,N)-by-(N-1). */ +/* If on entry LWORK = -1, then a workspace query is */ +/* assumed and the procedure only computes the minimal */ +/* and the optimal workspace lengths for both WORK and */ +/* IWORK. See the descriptions of WORK and IWORK. */ +/* ..... */ +/* IWORK (workspace/output) INTEGER LIWORK-by-1 array */ +/* Workspace that is required only if WHTSVD equals */ +/* 2 , 3 or 4. (See the description of WHTSVD). */ +/* If on entry LWORK =-1 or LIWORK=-1, then the */ +/* minimal length of IWORK is computed and returned in */ +/* IWORK(1). See the description of LIWORK. */ +/* ..... */ +/* LIWORK (input) INTEGER */ +/* The minimal length of the workspace vector IWORK. */ +/* If WHTSVD == 1, then only IWORK(1) is used; LIWORK >=1 */ +/* Let M1=MIN(M,N), N1=N-1. Then */ +/* If WHTSVD == 2, then LIWORK >= MAX(1,8*MIN(M,N)) */ +/* If WHTSVD == 3, then LIWORK >= MAX(1,M+N-1) */ +/* If WHTSVD == 4, then LIWORK >= MAX(3,M+3*N) */ +/* If on entry LIWORK = -1, then a workspace query is */ +/* assumed and the procedure only computes the minimal */ +/* and the optimal workspace lengths for both WORK and */ +/* IWORK. See the descriptions of WORK and IWORK. */ +/* ..... */ +/* INFO (output) INTEGER */ +/* -i < 0 :: On entry, the i-th argument had an */ +/* illegal value */ +/* = 0 :: Successful return. */ +/* = 1 :: Void input. Quick exit (M=0 or N=0). */ +/* = 2 :: The SVD computation of X did not converge. */ +/* Suggestion: Check the input data and/or */ +/* repeat with different WHTSVD. */ +/* = 3 :: The computation of the eigenvalues did not */ +/* converge. */ +/* = 4 :: If data scaling was requested on input and */ +/* the procedure found inconsistency in the data */ +/* such that for some column index i, */ +/* X(:,i) = 0 but Y(:,i) /= 0, then Y(:,i) is set */ +/* to zero if JOBS=='C'. The computation proceeds */ +/* with original or modified data and warning */ +/* flag is set with INFO=4. */ +/* ............................................................. */ +/* ............................................................. */ +/* Parameters */ +/* ~~~~~~~~~~ */ +/* COMPLEX(KIND=WP), PARAMETER :: ZONE = ( 1.0_WP, 0.0_WP ) */ + +/* Local scalars */ +/* ~~~~~~~~~~~~~ */ + +/* External functions (BLAS and LAPACK) */ +/* ~~~~~~~~~~~~~~~~~ */ + +/* External subroutines (BLAS and LAPACK) */ +/* ~~~~~~~~~~~~~~~~~~~~ */ +/* External subroutines */ +/* ~~~~~~~~~~~~~~~~~~~~ */ +/* Intrinsic functions */ +/* ~~~~~~~~~~~~~~~~~~~ */ +/* .......................................................... */ + /* Parameter adjustments */ + f_dim1 = *ldf; + f_offset = 1 + f_dim1 * 1; + f -= f_offset; + x_dim1 = *ldx; + x_offset = 1 + x_dim1 * 1; + x -= x_offset; + y_dim1 = *ldy; + y_offset = 1 + y_dim1 * 1; + y -= y_offset; + --eigs; + z_dim1 = *ldz; + z_offset = 1 + z_dim1 * 1; + z__ -= z_offset; + --res; + b_dim1 = *ldb; + b_offset = 1 + b_dim1 * 1; + b -= b_offset; + v_dim1 = *ldv; + v_offset = 1 + v_dim1 * 1; + v -= v_offset; + s_dim1 = *lds; + s_offset = 1 + s_dim1 * 1; + s -= s_offset; + --zwork; + --work; + --iwork; + + /* Function Body */ + one = 1.f; + zero = 0.f; + zzero.r = 0.f, zzero.i = 0.f; + +/* Test the input arguments */ + wntres = lsame_(jobr, "R"); + sccolx = lsame_(jobs, "S") || lsame_(jobs, "C"); + sccoly = lsame_(jobs, "Y"); + wntvec = lsame_(jobz, "V"); + wntvcf = lsame_(jobz, "F"); + wntvcq = lsame_(jobz, "Q"); + wntref = lsame_(jobf, "R"); + wntex = lsame_(jobf, "E"); + wantq = lsame_(jobq, "Q"); + wnttrf = lsame_(jobt, "R"); + minmn = f2cmin(*m,*n); + *info = 0; + lquery = *lwork == -1 || *liwork == -1; + + if (! (sccolx || sccoly || lsame_(jobs, "N"))) { + *info = -1; + } else if (! (wntvec || wntvcf || wntvcq || lsame_(jobz, "N"))) { + *info = -2; + } else if (! (wntres || lsame_(jobr, "N")) || + wntres && lsame_(jobz, "N")) { + *info = -3; + } else if (! (wantq || lsame_(jobq, "N"))) { + *info = -4; + } else if (! (wnttrf || lsame_(jobt, "N"))) { + *info = -5; + } else if (! (wntref || wntex || lsame_(jobf, "N"))) + { + *info = -6; + } else if (! (*whtsvd == 1 || *whtsvd == 2 || *whtsvd == 3 || *whtsvd == + 4)) { + *info = -7; + } else if (*m < 0) { + *info = -8; + } else if (*n < 0 || *n > *m + 1) { + *info = -9; + } else if (*ldf < *m) { + *info = -11; + } else if (*ldx < minmn) { + *info = -13; + } else if (*ldy < minmn) { + *info = -15; + } else if (! (*nrnk == -2 || *nrnk == -1 || *nrnk >= 1 && *nrnk <= *n)) { + *info = -16; + } else if (*tol < zero || *tol >= one) { + *info = -17; + } else if (*ldz < *m) { + *info = -21; + } else if ((wntref || wntex) && *ldb < minmn) { + *info = -24; + } else if (*ldv < *n - 1) { + *info = -26; + } else if (*lds < *n - 1) { + *info = -28; + } + + if (wntvec || wntvcf || wntvcq) { + *(unsigned char *)jobvl = 'V'; + } else { + *(unsigned char *)jobvl = 'N'; + } + if (*info == 0) { +/* Compute the minimal and the optimal workspace */ +/* requirements. Simulate running the code and */ +/* determine minimal and optimal sizes of the */ +/* workspace at any moment of the run. */ + if (*n == 0 || *n == 1) { +/* All output except K is void. INFO=1 signals */ +/* the void input. In case of a workspace query, */ +/* the minimal workspace lengths are returned. */ + if (lquery) { + iwork[1] = 1; + work[1] = 2.f; + work[2] = 2.f; + } else { + *k = 0; + } + *info = 1; + return 0; + } + mlrwrk = 2; + mlwork = 2; + olwork = 2; + iminwr = 1; + mlwqr = f2cmax(1,*n); +/* Minimal workspace length for CGEQRF. */ +/* Computing MAX */ + i__1 = mlwork, i__2 = minmn + mlwqr; + mlwork = f2cmax(i__1,i__2); + if (lquery) { + cgeqrf_(m, n, &f[f_offset], ldf, &zwork[1], &zwork[1], &c_n1, & + info1); + olwqr = (integer) zwork[1].r; +/* Computing MAX */ + i__1 = olwork, i__2 = minmn + olwqr; + olwork = f2cmax(i__1,i__2); + } + i__1 = *n - 1; + cgedmd_(jobs, jobvl, jobr, jobf, whtsvd, &minmn, &i__1, &x[x_offset], + ldx, &y[y_offset], ldy, nrnk, tol, k, &eigs[1], &z__[z_offset] + , ldz, &res[1], &b[b_offset], ldb, &v[v_offset], ldv, &s[ + s_offset], lds, &zwork[1], lzwork, &work[1], &c_n1, &iwork[1], + liwork, &info1); + mlwdmd = (integer) zwork[1].r; +/* Computing MAX */ + i__1 = mlwork, i__2 = minmn + mlwdmd; + mlwork = f2cmax(i__1,i__2); +/* Computing MAX */ + i__1 = mlrwrk, i__2 = (integer) work[1]; + mlrwrk = f2cmax(i__1,i__2); + iminwr = f2cmax(iminwr,iwork[1]); + if (lquery) { + olwdmd = (integer) zwork[2].r; +/* Computing MAX */ + i__1 = olwork, i__2 = minmn + olwdmd; + olwork = f2cmax(i__1,i__2); + } + if (wntvec || wntvcf) { + mlwmqr = f2cmax(1,*n); +/* Computing MAX */ + i__1 = mlwork, i__2 = minmn + mlwmqr; + mlwork = f2cmax(i__1,i__2); + if (lquery) { + cunmqr_("L", "N", m, n, &minmn, &f[f_offset], ldf, &zwork[1], + &z__[z_offset], ldz, &zwork[1], &c_n1, &info1); + olwmqr = (integer) zwork[1].r; +/* Computing MAX */ + i__1 = olwork, i__2 = minmn + olwmqr; + olwork = f2cmax(i__1,i__2); + } + } + if (wantq) { + mlwgqr = f2cmax(1,*n); +/* Computing MAX */ + i__1 = mlwork, i__2 = minmn + mlwgqr; + mlwork = f2cmax(i__1,i__2); + if (lquery) { + cungqr_(m, &minmn, &minmn, &f[f_offset], ldf, &zwork[1], & + zwork[1], &c_n1, &info1); + olwgqr = (integer) zwork[1].r; +/* Computing MAX */ + i__1 = olwork, i__2 = minmn + olwgqr; + olwork = f2cmax(i__1,i__2); + } + } + if (*liwork < iminwr && ! lquery) { + *info = -34; + } + if (*lwork < mlrwrk && ! lquery) { + *info = -32; + } + if (*lzwork < mlwork && ! lquery) { + *info = -30; + } + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("CGEDMDQ", &i__1); + return 0; + } else if (lquery) { +/* Return minimal and optimal workspace sizes */ + iwork[1] = iminwr; + zwork[1].r = (real) mlwork, zwork[1].i = 0.f; + zwork[2].r = (real) olwork, zwork[2].i = 0.f; + work[1] = (real) mlrwrk; + work[2] = (real) mlrwrk; + return 0; + } +/* ..... */ +/* Initial QR factorization that is used to represent the */ +/* snapshots as elements of lower dimensional subspace. */ +/* For large scale computation with M >>N , at this place */ +/* one can use an out of core QRF. */ + + i__1 = *lzwork - minmn; + cgeqrf_(m, n, &f[f_offset], ldf, &zwork[1], &zwork[minmn + 1], &i__1, & + info1); + +/* Define X and Y as the snapshots representations in the */ +/* orthogonal basis computed in the QR factorization. */ +/* X corresponds to the leading N-1 and Y to the trailing */ +/* N-1 snapshots. */ + i__1 = *n - 1; + claset_("L", &minmn, &i__1, &zzero, &zzero, &x[x_offset], ldx); + i__1 = *n - 1; + clacpy_("U", &minmn, &i__1, &f[f_offset], ldf, &x[x_offset], ldx); + i__1 = *n - 1; + clacpy_("A", &minmn, &i__1, &f[(f_dim1 << 1) + 1], ldf, &y[y_offset], ldy); + if (*m >= 3) { + i__1 = minmn - 2; + i__2 = *n - 2; + claset_("L", &i__1, &i__2, &zzero, &zzero, &y[y_dim1 + 3], ldy); + } + +/* Compute the DMD of the projected snapshot pairs (X,Y) */ + i__1 = *n - 1; + i__2 = *lzwork - minmn; + cgedmd_(jobs, jobvl, jobr, jobf, whtsvd, &minmn, &i__1, &x[x_offset], ldx, + &y[y_offset], ldy, nrnk, tol, k, &eigs[1], &z__[z_offset], ldz, & + res[1], &b[b_offset], ldb, &v[v_offset], ldv, &s[s_offset], lds, & + zwork[minmn + 1], &i__2, &work[1], lwork, &iwork[1], liwork, & + info1); + if (info1 == 2 || info1 == 3) { +/* Return with error code. See CGEDMD for details. */ + *info = info1; + return 0; + } else { + *info = info1; + } + +/* The Ritz vectors (Koopman modes) can be explicitly */ +/* formed or returned in factored form. */ + if (wntvec) { +/* Compute the eigenvectors explicitly. */ + if (*m > minmn) { + i__1 = *m - minmn; + claset_("A", &i__1, k, &zzero, &zzero, &z__[minmn + 1 + z_dim1], + ldz); + } + i__1 = *lzwork - minmn; + cunmqr_("L", "N", m, k, &minmn, &f[f_offset], ldf, &zwork[1], &z__[ + z_offset], ldz, &zwork[minmn + 1], &i__1, &info1); + } else if (wntvcf) { +/* Return the Ritz vectors (eigenvectors) in factored */ +/* form Z*V, where Z contains orthonormal matrix (the */ +/* product of Q from the initial QR factorization and */ +/* the SVD/POD_basis returned by CGEDMD in X) and the */ +/* second factor (the eigenvectors of the Rayleigh */ +/* quotient) is in the array V, as returned by CGEDMD. */ + clacpy_("A", n, k, &x[x_offset], ldx, &z__[z_offset], ldz); + if (*m > *n) { + i__1 = *m - *n; + claset_("A", &i__1, k, &zzero, &zzero, &z__[*n + 1 + z_dim1], ldz); + } + i__1 = *lzwork - minmn; + cunmqr_("L", "N", m, k, &minmn, &f[f_offset], ldf, &zwork[1], &z__[ + z_offset], ldz, &zwork[minmn + 1], &i__1, &info1); + } + +/* Some optional output variables: */ + +/* The upper triangular factor R in the initial QR */ +/* factorization is optionally returned in the array Y. */ +/* This is useful if this call to CGEDMDQ is to be */ +/* followed by a streaming DMD that is implemented in a */ +/* QR compressed form. */ + if (wnttrf) { +/* Return the upper triangular R in Y */ + claset_("A", &minmn, n, &zzero, &zzero, &y[y_offset], ldy); + clacpy_("U", &minmn, n, &f[f_offset], ldf, &y[y_offset], ldy); + } + +/* The orthonormal/unitary factor Q in the initial QR */ +/* factorization is optionally returned in the array F. */ +/* Same as with the triangular factor above, this is */ +/* useful in a streaming DMD. */ + if (wantq) { +/* Q overwrites F */ + i__1 = *lzwork - minmn; + cungqr_(m, &minmn, &minmn, &f[f_offset], ldf, &zwork[1], &zwork[minmn + + 1], &i__1, &info1); + } + + return 0; + +} /* cgedmdq_ */ + diff --git a/lapack-netlib/SRC/dgedmd.c b/lapack-netlib/SRC/dgedmd.c index 447b23014..66b4d5da6 100644 --- a/lapack-netlib/SRC/dgedmd.c +++ b/lapack-netlib/SRC/dgedmd.c @@ -509,3 +509,1245 @@ static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integ +/* -- translated by f2c (version 20000121). + You must link the resulting object file with the libraries: + -lf2c -lm (in that order) +*/ + + + +/* Table of constant values */ + +static integer c_n1 = -1; +static integer c__1 = 1; +static integer c__0 = 0; +static integer c__2 = 2; + +/* Subroutine */ int dgedmd_(char *jobs, char *jobz, char *jobr, char *jobf, + integer *whtsvd, integer *m, integer *n, doublereal *x, integer *ldx, + doublereal *y, integer *ldy, integer *nrnk, doublereal *tol, integer * + k, doublereal *reig, doublereal *imeig, doublereal *z__, integer *ldz, + doublereal *res, doublereal *b, integer *ldb, doublereal *w, integer + *ldw, doublereal *s, integer *lds, doublereal *work, integer *lwork, + integer *iwork, integer *liwork, integer *info) +{ + /* System generated locals */ + integer x_dim1, x_offset, y_dim1, y_offset, z_dim1, z_offset, b_dim1, + b_offset, w_dim1, w_offset, s_dim1, s_offset, i__1, i__2; + doublereal d__1, d__2; + + /* Local variables */ + doublereal zero, ssum; + integer info1, info2; + extern doublereal dnrm2_(integer *, doublereal *, integer *); + doublereal xscl1, xscl2; + integer i__, j; + extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *, + integer *); + doublereal scale; + extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *, + integer *, doublereal *, doublereal *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *), + dgeev_(char *, char *, integer *, doublereal *, integer *, + doublereal *, doublereal *, doublereal *, integer *, doublereal *, + integer *, doublereal *, integer *, integer *); + extern logical lsame_(char *, char *); + logical badxy; + doublereal small; + char jobzl[1]; + extern /* Subroutine */ int daxpy_(integer *, doublereal *, doublereal *, + integer *, doublereal *, integer *); + logical wntex; + doublereal ab[4] /* was [2][2] */; + extern doublereal dlamch_(char *), dlange_(char *, integer *, + integer *, doublereal *, integer *, doublereal *); + extern /* Subroutine */ int dgesdd_(char *, integer *, integer *, + doublereal *, integer *, doublereal *, doublereal *, integer *, + doublereal *, integer *, doublereal *, integer *, integer *, + integer *), dlascl_(char *, integer *, integer *, + doublereal *, doublereal *, integer *, integer *, doublereal *, + integer *, integer *); + extern integer idamax_(integer *, doublereal *, integer *); + extern logical disnan_(doublereal *); + extern /* Subroutine */ int dgesvd_(char *, char *, integer *, integer *, + doublereal *, integer *, doublereal *, doublereal *, integer *, + doublereal *, integer *, doublereal *, integer *, integer *), dlacpy_(char *, integer *, integer *, doublereal + *, integer *, doublereal *, integer *), xerbla_(char *, + integer *); + char t_or_n__[1]; + extern /* Subroutine */ int dgejsv_(char *, char *, char *, char *, char * + , char *, integer *, integer *, doublereal *, integer *, + doublereal *, doublereal *, integer *, doublereal *, integer *, + doublereal *, integer *, integer *, integer *), dlassq_(integer *, doublereal *, + integer *, doublereal *, doublereal *); + logical sccolx, sccoly; + integer lwrsdd, mwrsdd, iminwr; + logical wntref, wntvec; + doublereal rootsc; + integer lwrkev, mlwork, mwrkev, numrnk, olwork; + doublereal rdummy[2]; + integer lwrsvd, mwrsvd; + logical lquery, wntres; + char jsvopt[1]; + extern /* Subroutine */ int mecago_(); + integer mwrsvj, lwrsvq, mwrsvq; + doublereal rdummy2[2], ofl, one; + extern /* Subroutine */ int dgesvdq_(char *, char *, char *, char *, char + *, integer *, integer *, doublereal *, integer *, doublereal *, + doublereal *, integer *, doublereal *, integer *, integer *, + integer *, integer *, doublereal *, integer *, doublereal *, + integer *, integer *); + +/* March 2023 */ +/* ..... */ +/* USE iso_fortran_env */ +/* INTEGER, PARAMETER :: WP = real64 */ +/* ..... */ +/* Scalar arguments */ +/* Array arguments */ +/* ............................................................ */ +/* Purpose */ +/* ======= */ +/* DGEDMD computes the Dynamic Mode Decomposition (DMD) for */ +/* a pair of data snapshot matrices. For the input matrices */ +/* X and Y such that Y = A*X with an unaccessible matrix */ +/* A, DGEDMD computes a certain number of Ritz pairs of A using */ +/* the standard Rayleigh-Ritz extraction from a subspace of */ +/* range(X) that is determined using the leading left singular */ +/* vectors of X. Optionally, DGEDMD returns the residuals */ +/* of the computed Ritz pairs, the information needed for */ +/* a refinement of the Ritz vectors, or the eigenvectors of */ +/* the Exact DMD. */ +/* For further details see the references listed */ +/* below. For more details of the implementation see [3]. */ + +/* References */ +/* ========== */ +/* [1] P. Schmid: Dynamic mode decomposition of numerical */ +/* and experimental data, */ +/* Journal of Fluid Mechanics 656, 5-28, 2010. */ +/* [2] Z. Drmac, I. Mezic, R. Mohr: Data driven modal */ +/* decompositions: analysis and enhancements, */ +/* SIAM J. on Sci. Comp. 40 (4), A2253-A2285, 2018. */ +/* [3] Z. Drmac: A LAPACK implementation of the Dynamic */ +/* Mode Decomposition I. Technical report. AIMDyn Inc. */ +/* and LAPACK Working Note 298. */ +/* [4] J. Tu, C. W. Rowley, D. M. Luchtenburg, S. L. */ +/* Brunton, N. Kutz: On Dynamic Mode Decomposition: */ +/* Theory and Applications, Journal of Computational */ +/* Dynamics 1(2), 391 -421, 2014. */ + +/* ...................................................................... */ +/* Developed and supported by: */ +/* =========================== */ +/* Developed and coded by Zlatko Drmac, Faculty of Science, */ +/* University of Zagreb; drmac@math.hr */ +/* In cooperation with */ +/* AIMdyn Inc., Santa Barbara, CA. */ +/* and supported by */ +/* - DARPA SBIR project "Koopman Operator-Based Forecasting */ +/* for Nonstationary Processes from Near-Term, Limited */ +/* Observational Data" Contract No: W31P4Q-21-C-0007 */ +/* - DARPA PAI project "Physics-Informed Machine Learning */ +/* Methodologies" Contract No: HR0011-18-9-0033 */ +/* - DARPA MoDyL project "A Data-Driven, Operator-Theoretic */ +/* Framework for Space-Time Analysis of Process Dynamics" */ +/* Contract No: HR0011-16-C-0116 */ +/* Any opinions, findings and conclusions or recommendations */ +/* expressed in this material are those of the author and */ +/* do not necessarily reflect the views of the DARPA SBIR */ +/* Program Office */ +/* ============================================================ */ +/* Distribution Statement A: */ +/* Approved for Public Release, Distribution Unlimited. */ +/* Cleared by DARPA on September 29, 2022 */ +/* ============================================================ */ +/* ............................................................ */ +/* Arguments */ +/* ========= */ +/* JOBS (input) CHARACTER*1 */ +/* Determines whether the initial data snapshots are scaled */ +/* by a diagonal matrix. */ +/* 'S' :: The data snapshots matrices X and Y are multiplied */ +/* with a diagonal matrix D so that X*D has unit */ +/* nonzero columns (in the Euclidean 2-norm) */ +/* 'C' :: The snapshots are scaled as with the 'S' option. */ +/* If it is found that an i-th column of X is zero */ +/* vector and the corresponding i-th column of Y is */ +/* non-zero, then the i-th column of Y is set to */ +/* zero and a warning flag is raised. */ +/* 'Y' :: The data snapshots matrices X and Y are multiplied */ +/* by a diagonal matrix D so that Y*D has unit */ +/* nonzero columns (in the Euclidean 2-norm) */ +/* 'N' :: No data scaling. */ +/* ..... */ +/* JOBZ (input) CHARACTER*1 */ +/* Determines whether the eigenvectors (Koopman modes) will */ +/* be computed. */ +/* 'V' :: The eigenvectors (Koopman modes) will be computed */ +/* and returned in the matrix Z. */ +/* See the description of Z. */ +/* 'F' :: The eigenvectors (Koopman modes) will be returned */ +/* in factored form as the product X(:,1:K)*W, where X */ +/* contains a POD basis (leading left singular vectors */ +/* of the data matrix X) and W contains the eigenvectors */ +/* of the corresponding Rayleigh quotient. */ +/* See the descriptions of K, X, W, Z. */ +/* 'N' :: The eigenvectors are not computed. */ +/* ..... */ +/* JOBR (input) CHARACTER*1 */ +/* Determines whether to compute the residuals. */ +/* 'R' :: The residuals for the computed eigenpairs will be */ +/* computed and stored in the array RES. */ +/* See the description of RES. */ +/* For this option to be legal, JOBZ must be 'V'. */ +/* 'N' :: The residuals are not computed. */ +/* ..... */ +/* JOBF (input) CHARACTER*1 */ +/* Specifies whether to store information needed for post- */ +/* processing (e.g. computing refined Ritz vectors) */ +/* 'R' :: The matrix needed for the refinement of the Ritz */ +/* vectors is computed and stored in the array B. */ +/* See the description of B. */ +/* 'E' :: The unscaled eigenvectors of the Exact DMD are */ +/* computed and returned in the array B. See the */ +/* description of B. */ +/* 'N' :: No eigenvector refinement data is computed. */ +/* ..... */ +/* WHTSVD (input) INTEGER, WHSTVD in { 1, 2, 3, 4 } */ +/* Allows for a selection of the SVD algorithm from the */ +/* LAPACK library. */ +/* 1 :: DGESVD (the QR SVD algorithm) */ +/* 2 :: DGESDD (the Divide and Conquer algorithm; if enough */ +/* workspace available, this is the fastest option) */ +/* 3 :: DGESVDQ (the preconditioned QR SVD ; this and 4 */ +/* are the most accurate options) */ +/* 4 :: DGEJSV (the preconditioned Jacobi SVD; this and 3 */ +/* are the most accurate options) */ +/* For the four methods above, a significant difference in */ +/* the accuracy of small singular values is possible if */ +/* the snapshots vary in norm so that X is severely */ +/* ill-conditioned. If small (smaller than EPS*||X||) */ +/* singular values are of interest and JOBS=='N', then */ +/* the options (3, 4) give the most accurate results, where */ +/* the option 4 is slightly better and with stronger */ +/* theoretical background. */ +/* If JOBS=='S', i.e. the columns of X will be normalized, */ +/* then all methods give nearly equally accurate results. */ +/* ..... */ +/* M (input) INTEGER, M>= 0 */ +/* The state space dimension (the row dimension of X, Y). */ +/* ..... */ +/* N (input) INTEGER, 0 <= N <= M */ +/* The number of data snapshot pairs */ +/* (the number of columns of X and Y). */ +/* ..... */ +/* X (input/output) REAL(KIND=WP) M-by-N array */ +/* > On entry, X contains the data snapshot matrix X. It is */ +/* assumed that the column norms of X are in the range of */ +/* the normalized floating point numbers. */ +/* < On exit, the leading K columns of X contain a POD basis, */ +/* i.e. the leading K left singular vectors of the input */ +/* data matrix X, U(:,1:K). All N columns of X contain all */ +/* left singular vectors of the input matrix X. */ +/* See the descriptions of K, Z and W. */ +/* ..... */ +/* LDX (input) INTEGER, LDX >= M */ +/* The leading dimension of the array X. */ +/* ..... */ +/* Y (input/workspace/output) REAL(KIND=WP) M-by-N array */ +/* > On entry, Y contains the data snapshot matrix Y */ +/* < On exit, */ +/* If JOBR == 'R', the leading K columns of Y contain */ +/* the residual vectors for the computed Ritz pairs. */ +/* See the description of RES. */ +/* If JOBR == 'N', Y contains the original input data, */ +/* scaled according to the value of JOBS. */ +/* ..... */ +/* LDY (input) INTEGER , LDY >= M */ +/* The leading dimension of the array Y. */ +/* ..... */ +/* NRNK (input) INTEGER */ +/* Determines the mode how to compute the numerical rank, */ +/* i.e. how to truncate small singular values of the input */ +/* matrix X. On input, if */ +/* NRNK = -1 :: i-th singular value sigma(i) is truncated */ +/* if sigma(i) <= TOL*sigma(1). */ +/* This option is recommended. */ +/* NRNK = -2 :: i-th singular value sigma(i) is truncated */ +/* if sigma(i) <= TOL*sigma(i-1) */ +/* This option is included for R&D purposes. */ +/* It requires highly accurate SVD, which */ +/* may not be feasible. */ + +/* The numerical rank can be enforced by using positive */ +/* value of NRNK as follows: */ +/* 0 < NRNK <= N :: at most NRNK largest singular values */ +/* will be used. If the number of the computed nonzero */ +/* singular values is less than NRNK, then only those */ +/* nonzero values will be used and the actually used */ +/* dimension is less than NRNK. The actual number of */ +/* the nonzero singular values is returned in the variable */ +/* K. See the descriptions of TOL and K. */ +/* ..... */ +/* TOL (input) REAL(KIND=WP), 0 <= TOL < 1 */ +/* The tolerance for truncating small singular values. */ +/* See the description of NRNK. */ +/* ..... */ +/* K (output) INTEGER, 0 <= K <= N */ +/* The dimension of the POD basis for the data snapshot */ +/* matrix X and the number of the computed Ritz pairs. */ +/* The value of K is determined according to the rule set */ +/* by the parameters NRNK and TOL. */ +/* See the descriptions of NRNK and TOL. */ +/* ..... */ +/* REIG (output) REAL(KIND=WP) N-by-1 array */ +/* The leading K (K<=N) entries of REIG contain */ +/* the real parts of the computed eigenvalues */ +/* REIG(1:K) + sqrt(-1)*IMEIG(1:K). */ +/* See the descriptions of K, IMEIG, and Z. */ +/* ..... */ +/* IMEIG (output) REAL(KIND=WP) N-by-1 array */ +/* The leading K (K<=N) entries of IMEIG contain */ +/* the imaginary parts of the computed eigenvalues */ +/* REIG(1:K) + sqrt(-1)*IMEIG(1:K). */ +/* The eigenvalues are determined as follows: */ +/* If IMEIG(i) == 0, then the corresponding eigenvalue is */ +/* real, LAMBDA(i) = REIG(i). */ +/* If IMEIG(i)>0, then the corresponding complex */ +/* conjugate pair of eigenvalues reads */ +/* LAMBDA(i) = REIG(i) + sqrt(-1)*IMAG(i) */ +/* LAMBDA(i+1) = REIG(i) - sqrt(-1)*IMAG(i) */ +/* That is, complex conjugate pairs have consecutive */ +/* indices (i,i+1), with the positive imaginary part */ +/* listed first. */ +/* See the descriptions of K, REIG, and Z. */ +/* ..... */ +/* Z (workspace/output) REAL(KIND=WP) M-by-N array */ +/* If JOBZ =='V' then */ +/* Z contains real Ritz vectors as follows: */ +/* If IMEIG(i)=0, then Z(:,i) is an eigenvector of */ +/* the i-th Ritz value; ||Z(:,i)||_2=1. */ +/* If IMEIG(i) > 0 (and IMEIG(i+1) < 0) then */ +/* [Z(:,i) Z(:,i+1)] span an invariant subspace and */ +/* the Ritz values extracted from this subspace are */ +/* REIG(i) + sqrt(-1)*IMEIG(i) and */ +/* REIG(i) - sqrt(-1)*IMEIG(i). */ +/* The corresponding eigenvectors are */ +/* Z(:,i) + sqrt(-1)*Z(:,i+1) and */ +/* Z(:,i) - sqrt(-1)*Z(:,i+1), respectively. */ +/* || Z(:,i:i+1)||_F = 1. */ +/* If JOBZ == 'F', then the above descriptions hold for */ +/* the columns of X(:,1:K)*W(1:K,1:K), where the columns */ +/* of W(1:k,1:K) are the computed eigenvectors of the */ +/* K-by-K Rayleigh quotient. The columns of W(1:K,1:K) */ +/* are similarly structured: If IMEIG(i) == 0 then */ +/* X(:,1:K)*W(:,i) is an eigenvector, and if IMEIG(i)>0 */ +/* then X(:,1:K)*W(:,i)+sqrt(-1)*X(:,1:K)*W(:,i+1) and */ +/* X(:,1:K)*W(:,i)-sqrt(-1)*X(:,1:K)*W(:,i+1) */ +/* are the eigenvectors of LAMBDA(i), LAMBDA(i+1). */ +/* See the descriptions of REIG, IMEIG, X and W. */ +/* ..... */ +/* LDZ (input) INTEGER , LDZ >= M */ +/* The leading dimension of the array Z. */ +/* ..... */ +/* RES (output) REAL(KIND=WP) N-by-1 array */ +/* RES(1:K) contains the residuals for the K computed */ +/* Ritz pairs. */ +/* If LAMBDA(i) is real, then */ +/* RES(i) = || A * Z(:,i) - LAMBDA(i)*Z(:,i))||_2. */ +/* If [LAMBDA(i), LAMBDA(i+1)] is a complex conjugate pair */ +/* then */ +/* RES(i)=RES(i+1) = || A * Z(:,i:i+1) - Z(:,i:i+1) *B||_F */ +/* where B = [ real(LAMBDA(i)) imag(LAMBDA(i)) ] */ +/* [-imag(LAMBDA(i)) real(LAMBDA(i)) ]. */ +/* It holds that */ +/* RES(i) = || A*ZC(:,i) - LAMBDA(i) *ZC(:,i) ||_2 */ +/* RES(i+1) = || A*ZC(:,i+1) - LAMBDA(i+1)*ZC(:,i+1) ||_2 */ +/* where ZC(:,i) = Z(:,i) + sqrt(-1)*Z(:,i+1) */ +/* ZC(:,i+1) = Z(:,i) - sqrt(-1)*Z(:,i+1) */ +/* See the description of REIG, IMEIG and Z. */ +/* ..... */ +/* B (output) REAL(KIND=WP) M-by-N array. */ +/* IF JOBF =='R', B(1:M,1:K) contains A*U(:,1:K), and can */ +/* be used for computing the refined vectors; see further */ +/* details in the provided references. */ +/* If JOBF == 'E', B(1:M,1;K) contains */ +/* A*U(:,1:K)*W(1:K,1:K), which are the vectors from the */ +/* Exact DMD, up to scaling by the inverse eigenvalues. */ +/* If JOBF =='N', then B is not referenced. */ +/* See the descriptions of X, W, K. */ +/* ..... */ +/* LDB (input) INTEGER, LDB >= M */ +/* The leading dimension of the array B. */ +/* ..... */ +/* W (workspace/output) REAL(KIND=WP) N-by-N array */ +/* On exit, W(1:K,1:K) contains the K computed */ +/* eigenvectors of the matrix Rayleigh quotient (real and */ +/* imaginary parts for each complex conjugate pair of the */ +/* eigenvalues). The Ritz vectors (returned in Z) are the */ +/* product of X (containing a POD basis for the input */ +/* matrix X) and W. See the descriptions of K, S, X and Z. */ +/* W is also used as a workspace to temporarily store the */ +/* right singular vectors of X. */ +/* ..... */ +/* LDW (input) INTEGER, LDW >= N */ +/* The leading dimension of the array W. */ +/* ..... */ +/* S (workspace/output) REAL(KIND=WP) N-by-N array */ +/* The array S(1:K,1:K) is used for the matrix Rayleigh */ +/* quotient. This content is overwritten during */ +/* the eigenvalue decomposition by DGEEV. */ +/* See the description of K. */ +/* ..... */ +/* LDS (input) INTEGER, LDS >= N */ +/* The leading dimension of the array S. */ +/* ..... */ +/* WORK (workspace/output) REAL(KIND=WP) LWORK-by-1 array */ +/* On exit, WORK(1:N) contains the singular values of */ +/* X (for JOBS=='N') or column scaled X (JOBS=='S', 'C'). */ +/* If WHTSVD==4, then WORK(N+1) and WORK(N+2) contain */ +/* scaling factor WORK(N+2)/WORK(N+1) used to scale X */ +/* and Y to avoid overflow in the SVD of X. */ +/* This may be of interest if the scaling option is off */ +/* and as many as possible smallest eigenvalues are */ +/* desired to the highest feasible accuracy. */ +/* If the call to DGEDMD is only workspace query, then */ +/* WORK(1) contains the minimal workspace length and */ +/* WORK(2) is the optimal workspace length. Hence, the */ +/* leng of work is at least 2. */ +/* See the description of LWORK. */ +/* ..... */ +/* LWORK (input) INTEGER */ +/* The minimal length of the workspace vector WORK. */ +/* LWORK is calculated as follows: */ +/* If WHTSVD == 1 :: */ +/* If JOBZ == 'V', then */ +/* LWORK >= MAX(2, N + LWORK_SVD, N+MAX(1,4*N)). */ +/* If JOBZ == 'N' then */ +/* LWORK >= MAX(2, N + LWORK_SVD, N+MAX(1,3*N)). */ +/* Here LWORK_SVD = MAX(1,3*N+M,5*N) is the minimal */ +/* workspace length of DGESVD. */ +/* If WHTSVD == 2 :: */ +/* If JOBZ == 'V', then */ +/* LWORK >= MAX(2, N + LWORK_SVD, N+MAX(1,4*N)) */ +/* If JOBZ == 'N', then */ +/* LWORK >= MAX(2, N + LWORK_SVD, N+MAX(1,3*N)) */ +/* Here LWORK_SVD = MAX(M, 5*N*N+4*N)+3*N*N is the */ +/* minimal workspace length of DGESDD. */ +/* If WHTSVD == 3 :: */ +/* If JOBZ == 'V', then */ +/* LWORK >= MAX(2, N+LWORK_SVD,N+MAX(1,4*N)) */ +/* If JOBZ == 'N', then */ +/* LWORK >= MAX(2, N+LWORK_SVD,N+MAX(1,3*N)) */ +/* Here LWORK_SVD = N+M+MAX(3*N+1, */ +/* MAX(1,3*N+M,5*N),MAX(1,N)) */ +/* is the minimal workspace length of DGESVDQ. */ +/* If WHTSVD == 4 :: */ +/* If JOBZ == 'V', then */ +/* LWORK >= MAX(2, N+LWORK_SVD,N+MAX(1,4*N)) */ +/* If JOBZ == 'N', then */ +/* LWORK >= MAX(2, N+LWORK_SVD,N+MAX(1,3*N)) */ +/* Here LWORK_SVD = MAX(7,2*M+N,6*N+2*N*N) is the */ +/* minimal workspace length of DGEJSV. */ +/* The above expressions are not simplified in order to */ +/* make the usage of WORK more transparent, and for */ +/* easier checking. In any case, LWORK >= 2. */ +/* If on entry LWORK = -1, then a workspace query is */ +/* assumed and the procedure only computes the minimal */ +/* and the optimal workspace lengths for both WORK and */ +/* IWORK. See the descriptions of WORK and IWORK. */ +/* ..... */ +/* IWORK (workspace/output) INTEGER LIWORK-by-1 array */ +/* Workspace that is required only if WHTSVD equals */ +/* 2 , 3 or 4. (See the description of WHTSVD). */ +/* If on entry LWORK =-1 or LIWORK=-1, then the */ +/* minimal length of IWORK is computed and returned in */ +/* IWORK(1). See the description of LIWORK. */ +/* ..... */ +/* LIWORK (input) INTEGER */ +/* The minimal length of the workspace vector IWORK. */ +/* If WHTSVD == 1, then only IWORK(1) is used; LIWORK >=1 */ +/* If WHTSVD == 2, then LIWORK >= MAX(1,8*MIN(M,N)) */ +/* If WHTSVD == 3, then LIWORK >= MAX(1,M+N-1) */ +/* If WHTSVD == 4, then LIWORK >= MAX(3,M+3*N) */ +/* If on entry LIWORK = -1, then a workspace query is */ +/* assumed and the procedure only computes the minimal */ +/* and the optimal workspace lengths for both WORK and */ +/* IWORK. See the descriptions of WORK and IWORK. */ +/* ..... */ +/* INFO (output) INTEGER */ +/* -i < 0 :: On entry, the i-th argument had an */ +/* illegal value */ +/* = 0 :: Successful return. */ +/* = 1 :: Void input. Quick exit (M=0 or N=0). */ +/* = 2 :: The SVD computation of X did not converge. */ +/* Suggestion: Check the input data and/or */ +/* repeat with different WHTSVD. */ +/* = 3 :: The computation of the eigenvalues did not */ +/* converge. */ +/* = 4 :: If data scaling was requested on input and */ +/* the procedure found inconsistency in the data */ +/* such that for some column index i, */ +/* X(:,i) = 0 but Y(:,i) /= 0, then Y(:,i) is set */ +/* to zero if JOBS=='C'. The computation proceeds */ +/* with original or modified data and warning */ +/* flag is set with INFO=4. */ +/* ............................................................. */ +/* ............................................................. */ +/* Parameters */ +/* ~~~~~~~~~~ */ +/* Local scalars */ +/* ~~~~~~~~~~~~~ */ +/* Local arrays */ +/* ~~~~~~~~~~~~ */ +/* External functions (BLAS and LAPACK) */ +/* ~~~~~~~~~~~~~~~~~ */ +/* External subroutines (BLAS and LAPACK) */ +/* ~~~~~~~~~~~~~~~~~~~~ */ +/* Intrinsic functions */ +/* ~~~~~~~~~~~~~~~~~~~ */ +/* ............................................................ */ + /* Parameter adjustments */ + x_dim1 = *ldx; + x_offset = 1 + x_dim1 * 1; + x -= x_offset; + y_dim1 = *ldy; + y_offset = 1 + y_dim1 * 1; + y -= y_offset; + --reig; + --imeig; + z_dim1 = *ldz; + z_offset = 1 + z_dim1 * 1; + z__ -= z_offset; + --res; + b_dim1 = *ldb; + b_offset = 1 + b_dim1 * 1; + b -= b_offset; + w_dim1 = *ldw; + w_offset = 1 + w_dim1 * 1; + w -= w_offset; + s_dim1 = *lds; + s_offset = 1 + s_dim1 * 1; + s -= s_offset; + --work; + --iwork; + + /* Function Body */ + one = 1.f; + zero = 0.f; + +/* Test the input arguments */ + + wntres = lsame_(jobr, "R"); + sccolx = lsame_(jobs, "S") || lsame_(jobs, "C"); + sccoly = lsame_(jobs, "Y"); + wntvec = lsame_(jobz, "V"); + wntref = lsame_(jobf, "R"); + wntex = lsame_(jobf, "E"); + *info = 0; + lquery = *lwork == -1 || *liwork == -1; + + if (! (sccolx || sccoly || lsame_(jobs, "N"))) { + *info = -1; + } else if (! (wntvec || lsame_(jobz, "N") || lsame_( + jobz, "F"))) { + *info = -2; + } else if (! (wntres || lsame_(jobr, "N")) || + wntres && ! wntvec) { + *info = -3; + } else if (! (wntref || wntex || lsame_(jobf, "N"))) + { + *info = -4; + } else if (! (*whtsvd == 1 || *whtsvd == 2 || *whtsvd == 3 || *whtsvd == + 4)) { + *info = -5; + } else if (*m < 0) { + *info = -6; + } else if (*n < 0 || *n > *m) { + *info = -7; + } else if (*ldx < *m) { + *info = -9; + } else if (*ldy < *m) { + *info = -11; + } else if (! (*nrnk == -2 || *nrnk == -1 || *nrnk >= 1 && *nrnk <= *n)) { + *info = -12; + } else if (*tol < zero || *tol >= one) { + *info = -13; + } else if (*ldz < *m) { + *info = -18; + } else if ((wntref || wntex) && *ldb < *m) { + *info = -21; + } else if (*ldw < *n) { + *info = -23; + } else if (*lds < *n) { + *info = -25; + } + + if (*info == 0) { +/* Compute the minimal and the optimal workspace */ +/* requirements. Simulate running the code and */ +/* determine minimal and optimal sizes of the */ +/* workspace at any moment of the run. */ + if (*n == 0) { +/* Quick return. All output except K is void. */ +/* INFO=1 signals the void input. */ +/* In case of a workspace query, the default */ +/* minimal workspace lengths are returned. */ + if (lquery) { + iwork[1] = 1; + work[1] = 2.; + work[2] = 2.; + } else { + *k = 0; + } + *info = 1; + return 0; + } + mlwork = f2cmax(2,*n); + olwork = f2cmax(2,*n); + iminwr = 1; +/* SELECT CASE ( WHTSVD ) */ + if (*whtsvd == 1) { +/* The following is specified as the minimal */ +/* length of WORK in the definition of DGESVD: */ +/* MWRSVD = MAX(1,3*MIN(M,N)+MAX(M,N),5*MIN(M,N)) */ +/* Computing MAX */ + i__1 = 1, i__2 = f2cmin(*m,*n) * 3 + f2cmax(*m,*n), i__1 = f2cmax(i__1, + i__2), i__2 = f2cmin(*m,*n) * 5; + mwrsvd = f2cmax(i__1,i__2); +/* Computing MAX */ + i__1 = mlwork, i__2 = *n + mwrsvd; + mlwork = f2cmax(i__1,i__2); + if (lquery) { + dgesvd_("O", "S", m, n, &x[x_offset], ldx, &work[1], &b[ + b_offset], ldb, &w[w_offset], ldw, rdummy, &c_n1, & + info1); +/* Computing MAX */ + i__1 = mwrsvd, i__2 = (integer) rdummy[0]; + lwrsvd = f2cmax(i__1,i__2); +/* Computing MAX */ + i__1 = olwork, i__2 = *n + lwrsvd; + olwork = f2cmax(i__1,i__2); + } + } else if (*whtsvd == 2) { +/* The following is specified as the minimal */ +/* length of WORK in the definition of DGESDD: */ +/* MWRSDD = 3*MIN(M,N)*MIN(M,N) + */ +/* MAX( MAX(M,N),5*MIN(M,N)*MIN(M,N)+4*MIN(M,N) ) */ +/* IMINWR = 8*MIN(M,N) */ +/* Computing MAX */ + i__1 = f2cmax(*m,*n), i__2 = f2cmin(*m,*n) * 5 * f2cmin(*m,*n) + (f2cmin(*m,* + n) << 2); + mwrsdd = f2cmin(*m,*n) * 3 * f2cmin(*m,*n) + f2cmax(i__1,i__2); +/* Computing MAX */ + i__1 = mlwork, i__2 = *n + mwrsdd; + mlwork = f2cmax(i__1,i__2); + iminwr = f2cmin(*m,*n) << 3; + if (lquery) { + dgesdd_("O", m, n, &x[x_offset], ldx, &work[1], &b[b_offset], + ldb, &w[w_offset], ldw, rdummy, &c_n1, &iwork[1], & + info1); +/* Computing MAX */ + i__1 = mwrsdd, i__2 = (integer) rdummy[0]; + lwrsdd = f2cmax(i__1,i__2); +/* Computing MAX */ + i__1 = olwork, i__2 = *n + lwrsdd; + olwork = f2cmax(i__1,i__2); + } + } else if (*whtsvd == 3) { +/* LWQP3 = 3*N+1 */ +/* LWORQ = MAX(N, 1) */ +/* MWRSVD = MAX(1,3*MIN(M,N)+MAX(M,N),5*MIN(M,N)) */ +/* MWRSVQ = N + MAX( LWQP3, MWRSVD, LWORQ ) + MAX(M,2) */ +/* MLWORK = N + MWRSVQ */ +/* IMINWR = M+N-1 */ + dgesvdq_("H", "P", "N", "R", "R", m, n, &x[x_offset], ldx, &work[ + 1], &z__[z_offset], ldz, &w[w_offset], ldw, &numrnk, & + iwork[1], liwork, rdummy, &c_n1, rdummy2, &c_n1, &info1); + iminwr = iwork[1]; + mwrsvq = (integer) rdummy[1]; +/* Computing MAX */ + i__1 = mlwork, i__2 = *n + mwrsvq + (integer) rdummy2[0]; + mlwork = f2cmax(i__1,i__2); + if (lquery) { +/* Computing MAX */ + i__1 = mwrsvq, i__2 = (integer) rdummy[0]; + lwrsvq = f2cmax(i__1,i__2); +/* Computing MAX */ + i__1 = olwork, i__2 = *n + lwrsvq + (integer) rdummy2[0]; + olwork = f2cmax(i__1,i__2); + } + } else if (*whtsvd == 4) { + *(unsigned char *)jsvopt = 'J'; +/* MWRSVJ = MAX( 7, 2*M+N, 6*N+2*N*N ) ! for JSVOPT='V' */ +/* Computing MAX */ + i__1 = 7, i__2 = (*m << 1) + *n, i__1 = f2cmax(i__1,i__2), i__2 = (* + n << 2) + *n * *n, i__1 = f2cmax(i__1,i__2), i__2 = (*n << 1) + + *n * *n + 6; + mwrsvj = f2cmax(i__1,i__2); +/* Computing MAX */ + i__1 = mlwork, i__2 = *n + mwrsvj; + mlwork = f2cmax(i__1,i__2); +/* Computing MAX */ + i__1 = 3, i__2 = *m + *n * 3; + iminwr = f2cmax(i__1,i__2); + if (lquery) { +/* Computing MAX */ + i__1 = olwork, i__2 = *n + mwrsvj; + olwork = f2cmax(i__1,i__2); + } +/* END SELECT */ + } + if (wntvec || wntex || lsame_(jobz, "F")) { + *(unsigned char *)jobzl = 'V'; + } else { + *(unsigned char *)jobzl = 'N'; + } +/* Workspace calculation to the DGEEV call */ + if (lsame_(jobzl, "V")) { +/* Computing MAX */ + i__1 = 1, i__2 = *n << 2; + mwrkev = f2cmax(i__1,i__2); + } else { +/* Computing MAX */ + i__1 = 1, i__2 = *n * 3; + mwrkev = f2cmax(i__1,i__2); + } +/* Computing MAX */ + i__1 = mlwork, i__2 = *n + mwrkev; + mlwork = f2cmax(i__1,i__2); + if (lquery) { + dgeev_("N", jobzl, n, &s[s_offset], lds, &reig[1], &imeig[1], &w[ + w_offset], ldw, &w[w_offset], ldw, rdummy, &c_n1, &info1); +/* Computing MAX */ + i__1 = mwrkev, i__2 = (integer) rdummy[0]; + lwrkev = f2cmax(i__1,i__2); +/* Computing MAX */ + i__1 = olwork, i__2 = *n + lwrkev; + olwork = f2cmax(i__1,i__2); + } + + if (*liwork < iminwr && ! lquery) { + *info = -29; + } + if (*lwork < mlwork && ! lquery) { + *info = -27; + } + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("DGEDMD", &i__1); + return 0; + } else if (lquery) { +/* Return minimal and optimal workspace sizes */ + iwork[1] = iminwr; + work[1] = (doublereal) mlwork; + work[2] = (doublereal) olwork; + return 0; + } +/* ............................................................ */ + + ofl = dlamch_("O"); + small = dlamch_("S"); + badxy = FALSE_; + +/* <1> Optional scaling of the snapshots (columns of X, Y) */ +/* ========================================================== */ + if (sccolx) { +/* The columns of X will be normalized. */ +/* To prevent overflows, the column norms of X are */ +/* carefully computed using DLASSQ. */ + *k = 0; + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { +/* WORK(i) = DNRM2( M, X(1,i), 1 ) */ + scale = zero; + dlassq_(m, &x[i__ * x_dim1 + 1], &c__1, &scale, &ssum); + if (disnan_(&scale) || disnan_(&ssum)) { + *k = 0; + *info = -8; + i__2 = -(*info); + xerbla_("DGEDMD", &i__2); + } + if (scale != zero && ssum != zero) { + rootsc = sqrt(ssum); + if (scale >= ofl / rootsc) { +/* Norm of X(:,i) overflows. First, X(:,i) */ +/* is scaled by */ +/* ( ONE / ROOTSC ) / SCALE = 1/||X(:,i)||_2. */ +/* Next, the norm of X(:,i) is stored without */ +/* overflow as WORK(i) = - SCALE * (ROOTSC/M), */ +/* the minus sign indicating the 1/M factor. */ +/* Scaling is performed without overflow, and */ +/* underflow may occur in the smallest entries */ +/* of X(:,i). The relative backward and forward */ +/* errors are small in the ell_2 norm. */ + d__1 = one / rootsc; + dlascl_("G", &c__0, &c__0, &scale, &d__1, m, &c__1, &x[ + i__ * x_dim1 + 1], m, &info2); + work[i__] = -scale * (rootsc / (doublereal) (*m)); + } else { +/* X(:,i) will be scaled to unit 2-norm */ + work[i__] = scale * rootsc; + dlascl_("G", &c__0, &c__0, &work[i__], &one, m, &c__1, &x[ + i__ * x_dim1 + 1], m, &info2); +/* X(1:M,i) = (ONE/WORK(i)) * X(1:M,i) ! INTRINSIC */ +/* LAPACK */ + } + } else { + work[i__] = zero; + ++(*k); + } + } + if (*k == *n) { +/* All columns of X are zero. Return error code -8. */ +/* (the 8th input variable had an illegal value) */ + *k = 0; + *info = -8; + i__1 = -(*info); + xerbla_("DGEDMD", &i__1); + return 0; + } + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { +/* Now, apply the same scaling to the columns of Y. */ + if (work[i__] > zero) { + d__1 = one / work[i__]; + dscal_(m, &d__1, &y[i__ * y_dim1 + 1], &c__1); +/* Y(1:M,i) = (ONE/WORK(i)) * Y(1:M,i) ! INTRINSIC */ +/* BLAS CALL */ + } else if (work[i__] < zero) { + d__1 = -work[i__]; + d__2 = one / (doublereal) (*m); + dlascl_("G", &c__0, &c__0, &d__1, &d__2, m, &c__1, &y[i__ * + y_dim1 + 1], m, &info2); +/* LAPACK CAL */ + } else if (y[idamax_(m, &y[i__ * y_dim1 + 1], &c__1) + i__ * + y_dim1] != zero) { +/* X(:,i) is zero vector. For consistency, */ +/* Y(:,i) should also be zero. If Y(:,i) is not */ +/* zero, then the data might be inconsistent or */ +/* corrupted. If JOBS == 'C', Y(:,i) is set to */ +/* zero and a warning flag is raised. */ +/* The computation continues but the */ +/* situation will be reported in the output. */ + badxy = TRUE_; + if (lsame_(jobs, "C")) { + dscal_(m, &zero, &y[i__ * y_dim1 + 1], &c__1); + } +/* BLAS CALL */ + } + } + } + + if (sccoly) { +/* The columns of Y will be normalized. */ +/* To prevent overflows, the column norms of Y are */ +/* carefully computed using DLASSQ. */ + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { +/* WORK(i) = DNRM2( M, Y(1,i), 1 ) */ + scale = zero; + dlassq_(m, &y[i__ * y_dim1 + 1], &c__1, &scale, &ssum); + if (disnan_(&scale) || disnan_(&ssum)) { + *k = 0; + *info = -10; + i__2 = -(*info); + xerbla_("DGEDMD", &i__2); + } + if (scale != zero && ssum != zero) { + rootsc = sqrt(ssum); + if (scale >= ofl / rootsc) { +/* Norm of Y(:,i) overflows. First, Y(:,i) */ +/* is scaled by */ +/* ( ONE / ROOTSC ) / SCALE = 1/||Y(:,i)||_2. */ +/* Next, the norm of Y(:,i) is stored without */ +/* overflow as WORK(i) = - SCALE * (ROOTSC/M), */ +/* the minus sign indicating the 1/M factor. */ +/* Scaling is performed without overflow, and */ +/* underflow may occur in the smallest entries */ +/* of Y(:,i). The relative backward and forward */ +/* errors are small in the ell_2 norm. */ + d__1 = one / rootsc; + dlascl_("G", &c__0, &c__0, &scale, &d__1, m, &c__1, &y[ + i__ * y_dim1 + 1], m, &info2); + work[i__] = -scale * (rootsc / (doublereal) (*m)); + } else { +/* X(:,i) will be scaled to unit 2-norm */ + work[i__] = scale * rootsc; + dlascl_("G", &c__0, &c__0, &work[i__], &one, m, &c__1, &y[ + i__ * y_dim1 + 1], m, &info2); +/* Y(1:M,i) = (ONE/WORK(i)) * Y(1:M,i) ! INTRINSIC */ +/* LAPACK */ + } + } else { + work[i__] = zero; + } + } + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { +/* Now, apply the same scaling to the columns of X. */ + if (work[i__] > zero) { + d__1 = one / work[i__]; + dscal_(m, &d__1, &x[i__ * x_dim1 + 1], &c__1); +/* X(1:M,i) = (ONE/WORK(i)) * X(1:M,i) ! INTRINSIC */ +/* BLAS CALL */ + } else if (work[i__] < zero) { + d__1 = -work[i__]; + d__2 = one / (doublereal) (*m); + dlascl_("G", &c__0, &c__0, &d__1, &d__2, m, &c__1, &x[i__ * + x_dim1 + 1], m, &info2); +/* LAPACK CAL */ + } else if (x[idamax_(m, &x[i__ * x_dim1 + 1], &c__1) + i__ * + x_dim1] != zero) { +/* Y(:,i) is zero vector. If X(:,i) is not */ +/* zero, then a warning flag is raised. */ +/* The computation continues but the */ +/* situation will be reported in the output. */ + badxy = TRUE_; + } + } + } + +/* <2> SVD of the data snapshot matrix X. */ +/* ===================================== */ +/* The left singular vectors are stored in the array X. */ +/* The right singular vectors are in the array W. */ +/* The array W will later on contain the eigenvectors */ +/* of a Rayleigh quotient. */ + numrnk = *n; +/* SELECT CASE ( WHTSVD ) */ + if (*whtsvd == 1) { + i__1 = *lwork - *n; + dgesvd_("O", "S", m, n, &x[x_offset], ldx, &work[1], &b[b_offset], + ldb, &w[w_offset], ldw, &work[*n + 1], &i__1, &info1); +/* LAPACK CAL */ + *(unsigned char *)t_or_n__ = 'T'; + } else if (*whtsvd == 2) { + i__1 = *lwork - *n; + dgesdd_("O", m, n, &x[x_offset], ldx, &work[1], &b[b_offset], ldb, &w[ + w_offset], ldw, &work[*n + 1], &i__1, &iwork[1], &info1); +/* LAPACK CAL */ + *(unsigned char *)t_or_n__ = 'T'; + } else if (*whtsvd == 3) { + i__1 = *lwork - *n - f2cmax(2,*m); + i__2 = f2cmax(2,*m); + dgesvdq_("H", "P", "N", "R", "R", m, n, &x[x_offset], ldx, &work[1], & + z__[z_offset], ldz, &w[w_offset], ldw, &numrnk, &iwork[1], + liwork, &work[*n + f2cmax(2,*m) + 1], &i__1, &work[*n + 1], & + i__2, &info1); +/* L */ + dlacpy_("A", m, &numrnk, &z__[z_offset], ldz, &x[x_offset], ldx); +/* LAPACK C */ + *(unsigned char *)t_or_n__ = 'T'; + } else if (*whtsvd == 4) { + i__1 = *lwork - *n; + dgejsv_("F", "U", jsvopt, "N", "N", "P", m, n, &x[x_offset], ldx, & + work[1], &z__[z_offset], ldz, &w[w_offset], ldw, &work[*n + 1] + , &i__1, &iwork[1], &info1); +/* LAPACK CALL */ + dlacpy_("A", m, n, &z__[z_offset], ldz, &x[x_offset], ldx); +/* LAPACK CALL */ + *(unsigned char *)t_or_n__ = 'N'; + xscl1 = work[*n + 1]; + xscl2 = work[*n + 2]; + if (xscl1 != xscl2) { +/* This is an exceptional situation. If the */ +/* data matrices are not scaled and the */ +/* largest singular value of X overflows. */ +/* In that case DGEJSV can return the SVD */ +/* in scaled form. The scaling factor can be used */ +/* to rescale the data (X and Y). */ + dlascl_("G", &c__0, &c__0, &xscl1, &xscl2, m, n, &y[y_offset], + ldy, &info2); + } +/* END SELECT */ + } + + if (info1 > 0) { +/* The SVD selected subroutine did not converge. */ +/* Return with an error code. */ + *info = 2; + return 0; + } + + if (work[1] == zero) { +/* The largest computed singular value of (scaled) */ +/* X is zero. Return error code -8 */ +/* (the 8th input variable had an illegal value). */ + *k = 0; + *info = -8; + i__1 = -(*info); + xerbla_("DGEDMD", &i__1); + return 0; + } + +/* <3> Determine the numerical rank of the data */ +/* snapshots matrix X. This depends on the */ +/* parameters NRNK and TOL. */ +/* SELECT CASE ( NRNK ) */ + if (*nrnk == -1) { + *k = 1; + i__1 = numrnk; + for (i__ = 2; i__ <= i__1; ++i__) { + if (work[i__] <= work[1] * *tol || work[i__] <= small) { + myexit_(); + } + ++(*k); + } + } else if (*nrnk == -2) { + *k = 1; + i__1 = numrnk - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + if (work[i__ + 1] <= work[i__] * *tol || work[i__] <= small) { + myexit_(); + } + ++(*k); + } + } else { + *k = 1; + i__1 = *nrnk; + for (i__ = 2; i__ <= i__1; ++i__) { + if (work[i__] <= small) { + myexit_(); + } + ++(*k); + } +/* END SELECT */ + } +/* Now, U = X(1:M,1:K) is the SVD/POD basis for the */ +/* snapshot data in the input matrix X. */ +/* <4> Compute the Rayleigh quotient S = U^T * A * U. */ +/* Depending on the requested outputs, the computation */ +/* is organized to compute additional auxiliary */ +/* matrices (for the residuals and refinements). */ + +/* In all formulas below, we need V_k*Sigma_k^(-1) */ +/* where either V_k is in W(1:N,1:K), or V_k^T is in */ +/* W(1:K,1:N). Here Sigma_k=diag(WORK(1:K)). */ + if (lsame_(t_or_n__, "N")) { + i__1 = *k; + for (i__ = 1; i__ <= i__1; ++i__) { + d__1 = one / work[i__]; + dscal_(n, &d__1, &w[i__ * w_dim1 + 1], &c__1); +/* W(1:N,i) = (ONE/WORK(i)) * W(1:N,i) ! INTRINSIC */ +/* BLAS CALL */ + } + } else { +/* This non-unit stride access is due to the fact */ +/* that DGESVD, DGESVDQ and DGESDD return the */ +/* transposed matrix of the right singular vectors. */ +/* DO i = 1, K */ +/* CALL DSCAL( N, ONE/WORK(i), W(i,1), LDW ) ! BLAS CALL */ +/* ! W(i,1:N) = (ONE/WORK(i)) * W(i,1:N) ! INTRINSIC */ +/* END DO */ + i__1 = *k; + for (i__ = 1; i__ <= i__1; ++i__) { + work[*n + i__] = one / work[i__]; + } + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *k; + for (i__ = 1; i__ <= i__2; ++i__) { + w[i__ + j * w_dim1] = work[*n + i__] * w[i__ + j * w_dim1]; + } + } + } + + if (wntref) { + +/* Need A*U(:,1:K)=Y*V_k*inv(diag(WORK(1:K))) */ +/* for computing the refined Ritz vectors */ +/* (optionally, outside DGEDMD). */ + dgemm_("N", t_or_n__, m, k, n, &one, &y[y_offset], ldy, &w[w_offset], + ldw, &zero, &z__[z_offset], ldz); +/* Z(1:M,1:K)=MATMUL(Y(1:M,1:N),TRANSPOSE(W(1:K,1:N))) ! INTRI */ +/* Z(1:M,1:K)=MATMUL(Y(1:M,1:N),W(1:N,1:K)) ! INTRI */ + +/* At this point Z contains */ +/* A * U(:,1:K) = Y * V_k * Sigma_k^(-1), and */ +/* this is needed for computing the residuals. */ +/* This matrix is returned in the array B and */ +/* it can be used to compute refined Ritz vectors. */ +/* BLAS */ + dlacpy_("A", m, k, &z__[z_offset], ldz, &b[b_offset], ldb); +/* B(1:M,1:K) = Z(1:M,1:K) ! INTRINSIC */ +/* BLAS CALL */ + dgemm_("T", "N", k, k, m, &one, &x[x_offset], ldx, &z__[z_offset], + ldz, &zero, &s[s_offset], lds); +/* S(1:K,1:K) = MATMUL(TANSPOSE(X(1:M,1:K)),Z(1:M,1:K)) ! INTRI */ +/* At this point S = U^T * A * U is the Rayleigh quotient. */ +/* BLAS */ + } else { +/* A * U(:,1:K) is not explicitly needed and the */ +/* computation is organized differently. The Rayleigh */ +/* quotient is computed more efficiently. */ + dgemm_("T", "N", k, n, m, &one, &x[x_offset], ldx, &y[y_offset], ldy, + &zero, &z__[z_offset], ldz); +/* Z(1:K,1:N) = MATMUL( TRANSPOSE(X(1:M,1:K)), Y(1:M,1:N) ) ! IN */ +/* In the two DGEMM calls here, can use K for LDZ. */ +/* B */ + dgemm_("N", t_or_n__, k, k, n, &one, &z__[z_offset], ldz, &w[w_offset] + , ldw, &zero, &s[s_offset], lds); +/* S(1:K,1:K) = MATMUL(Z(1:K,1:N),TRANSPOSE(W(1:K,1:N))) ! INTRIN */ +/* S(1:K,1:K) = MATMUL(Z(1:K,1:N),(W(1:N,1:K))) ! INTRIN */ +/* At this point S = U^T * A * U is the Rayleigh quotient. */ +/* If the residuals are requested, save scaled V_k into Z. */ +/* Recall that V_k or V_k^T is stored in W. */ +/* BLAS */ + if (wntres || wntex) { + if (lsame_(t_or_n__, "N")) { + dlacpy_("A", n, k, &w[w_offset], ldw, &z__[z_offset], ldz); + } else { + dlacpy_("A", k, n, &w[w_offset], ldw, &z__[z_offset], ldz); + } + } + } + +/* <5> Compute the Ritz values and (if requested) the */ +/* right eigenvectors of the Rayleigh quotient. */ + + i__1 = *lwork - *n; + dgeev_("N", jobzl, k, &s[s_offset], lds, &reig[1], &imeig[1], &w[w_offset] + , ldw, &w[w_offset], ldw, &work[*n + 1], &i__1, &info1); + +/* W(1:K,1:K) contains the eigenvectors of the Rayleigh */ +/* quotient. Even in the case of complex spectrum, all */ +/* computation is done in real arithmetic. REIG and */ +/* IMEIG are the real and the imaginary parts of the */ +/* eigenvalues, so that the spectrum is given as */ +/* REIG(:) + sqrt(-1)*IMEIG(:). Complex conjugate pairs */ +/* are listed at consecutive positions. For such a */ +/* complex conjugate pair of the eigenvalues, the */ +/* corresponding eigenvectors are also a complex */ +/* conjugate pair with the real and imaginary parts */ +/* stored column-wise in W at the corresponding */ +/* consecutive column indices. See the description of Z. */ +/* Also, see the description of DGEEV. */ +/* LAPACK C */ + if (info1 > 0) { +/* DGEEV failed to compute the eigenvalues and */ +/* eigenvectors of the Rayleigh quotient. */ + *info = 3; + return 0; + } + +/* <6> Compute the eigenvectors (if requested) and, */ +/* the residuals (if requested). */ + + if (wntvec || wntex) { + if (wntres) { + if (wntref) { +/* Here, if the refinement is requested, we have */ +/* A*U(:,1:K) already computed and stored in Z. */ +/* For the residuals, need Y = A * U(:,1;K) * W. */ + dgemm_("N", "N", m, k, k, &one, &z__[z_offset], ldz, &w[ + w_offset], ldw, &zero, &y[y_offset], ldy); +/* Y(1:M,1:K) = Z(1:M,1:K) * W(1:K,1:K) ! INTRINSIC */ +/* This frees Z; Y contains A * U(:,1:K) * W. */ +/* BLAS CALL */ + } else { +/* Compute S = V_k * Sigma_k^(-1) * W, where */ +/* V_k * Sigma_k^(-1) is stored in Z */ + dgemm_(t_or_n__, "N", n, k, k, &one, &z__[z_offset], ldz, &w[ + w_offset], ldw, &zero, &s[s_offset], lds); +/* Then, compute Z = Y * S = */ +/* = Y * V_k * Sigma_k^(-1) * W(1:K,1:K) = */ +/* = A * U(:,1:K) * W(1:K,1:K) */ + dgemm_("N", "N", m, k, n, &one, &y[y_offset], ldy, &s[ + s_offset], lds, &zero, &z__[z_offset], ldz); +/* Save a copy of Z into Y and free Z for holding */ +/* the Ritz vectors. */ + dlacpy_("A", m, k, &z__[z_offset], ldz, &y[y_offset], ldy); + if (wntex) { + dlacpy_("A", m, k, &z__[z_offset], ldz, &b[b_offset], ldb); + } + } + } else if (wntex) { +/* Compute S = V_k * Sigma_k^(-1) * W, where */ +/* V_k * Sigma_k^(-1) is stored in Z */ + dgemm_(t_or_n__, "N", n, k, k, &one, &z__[z_offset], ldz, &w[ + w_offset], ldw, &zero, &s[s_offset], lds); +/* Then, compute Z = Y * S = */ +/* = Y * V_k * Sigma_k^(-1) * W(1:K,1:K) = */ +/* = A * U(:,1:K) * W(1:K,1:K) */ + dgemm_("N", "N", m, k, n, &one, &y[y_offset], ldy, &s[s_offset], + lds, &zero, &b[b_offset], ldb); +/* The above call replaces the following two calls */ +/* that were used in the developing-testing phase. */ +/* CALL DGEMM( 'N', 'N', M, K, N, ONE, Y, LDY, S, & */ +/* LDS, ZERO, Z, LDZ) */ +/* Save a copy of Z into B and free Z for holding */ +/* the Ritz vectors. */ +/* CALL DLACPY( 'A', M, K, Z, LDZ, B, LDB ) */ + } + +/* Compute the real form of the Ritz vectors */ + if (wntvec) { + dgemm_("N", "N", m, k, k, &one, &x[x_offset], ldx, &w[w_offset], + ldw, &zero, &z__[z_offset], ldz); + } +/* Z(1:M,1:K) = MATMUL(X(1:M,1:K), W(1:K,1:K)) ! INTRINSIC */ + +/* BLAS CALL */ + if (wntres) { + i__ = 1; + while(i__ <= *k) { + if (imeig[i__] == zero) { +/* have a real eigenvalue with real eigenvector */ + d__1 = -reig[i__]; + daxpy_(m, &d__1, &z__[i__ * z_dim1 + 1], &c__1, &y[i__ * + y_dim1 + 1], &c__1); +/* Y(1:M,i) = Y(1:M,i) - REIG(i) * Z(1:M,i) ! */ + + res[i__] = dnrm2_(m, &y[i__ * y_dim1 + 1], &c__1); + + ++i__; + } else { +/* Have a complex conjugate pair */ +/* REIG(i) +- sqrt(-1)*IMEIG(i). */ +/* Since all computation is done in real */ +/* arithmetic, the formula for the residual */ +/* is recast for real representation of the */ +/* complex conjugate eigenpair. See the */ +/* description of RES. */ + ab[0] = reig[i__]; + ab[1] = -imeig[i__]; + ab[2] = imeig[i__]; + ab[3] = reig[i__]; + d__1 = -one; + dgemm_("N", "N", m, &c__2, &c__2, &d__1, &z__[i__ * + z_dim1 + 1], ldz, ab, &c__2, &one, &y[i__ * + y_dim1 + 1], ldy); +/* Y(1:M,i:i+1) = Y(1:M,i:i+1) - Z(1:M,i:i+1) * AB ! INT */ +/* BL */ + res[i__] = dlange_("F", m, &c__2, &y[i__ * y_dim1 + 1], + ldy, &work[*n + 1]); +/* LA */ + res[i__ + 1] = res[i__]; + i__ += 2; + } + } + } + } + + if (*whtsvd == 4) { + work[*n + 1] = xscl1; + work[*n + 2] = xscl2; + } + +/* Successful exit. */ + if (! badxy) { + *info = 0; + } else { +/* A warning on possible data inconsistency. */ +/* This should be a rare event. */ + *info = 4; + } +/* ............................................................ */ + return 0; +/* ...... */ +} /* dgedmd_ */ + diff --git a/lapack-netlib/SRC/dgedmdq.c b/lapack-netlib/SRC/dgedmdq.c index 447b23014..a743a3156 100644 --- a/lapack-netlib/SRC/dgedmdq.c +++ b/lapack-netlib/SRC/dgedmdq.c @@ -509,3 +509,792 @@ static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integ +/* -- translated by f2c (version 20000121). + You must link the resulting object file with the libraries: + -lf2c -lm (in that order) +*/ + + + +/* Table of constant values */ + +static integer c_n1 = -1; + +/* Subroutine */ int dgedmdq_(char *jobs, char *jobz, char *jobr, char *jobq, + char *jobt, char *jobf, integer *whtsvd, integer *m, integer *n, + doublereal *f, integer *ldf, doublereal *x, integer *ldx, doublereal * + y, integer *ldy, integer *nrnk, doublereal *tol, integer *k, + doublereal *reig, doublereal *imeig, doublereal *z__, integer *ldz, + doublereal *res, doublereal *b, integer *ldb, doublereal *v, integer * + ldv, doublereal *s, integer *lds, doublereal *work, integer *lwork, + integer *iwork, integer *liwork, integer *info) +{ + /* System generated locals */ + integer f_dim1, f_offset, x_dim1, x_offset, y_dim1, y_offset, z_dim1, + z_offset, b_dim1, b_offset, v_dim1, v_offset, s_dim1, s_offset, + i__1, i__2; + + /* Local variables */ + doublereal zero; + integer info1; + extern logical lsame_(char *, char *); + char jobvl[1]; + integer minmn; + logical wantq; + integer mlwqr, olwqr; + logical wntex; + extern /* Subroutine */ int dgedmd_(char *, char *, char *, char *, + integer *, integer *, integer *, doublereal *, integer *, + doublereal *, integer *, integer *, doublereal *, integer *, + doublereal *, doublereal *, doublereal *, integer *, doublereal *, + doublereal *, integer *, doublereal *, integer *, doublereal *, + integer *, doublereal *, integer *, integer *, integer *, integer + *), dgeqrf_(integer *, integer *, + doublereal *, integer *, doublereal *, doublereal *, integer *, + integer *), dlacpy_(char *, integer *, integer *, doublereal *, + integer *, doublereal *, integer *), dlaset_(char *, + integer *, integer *, doublereal *, doublereal *, doublereal *, + integer *), xerbla_(char *, integer *); + integer mlwdmd, olwdmd; + logical sccolx, sccoly; + extern /* Subroutine */ int dorgqr_(integer *, integer *, integer *, + doublereal *, integer *, doublereal *, doublereal *, integer *, + integer *), dormqr_(char *, char *, integer *, integer *, integer + *, doublereal *, integer *, doublereal *, doublereal *, integer *, + doublereal *, integer *, integer *); + integer iminwr; + logical wntvec, wntvcf; + integer mlwgqr; + logical wntref; + integer mlwork, olwgqr, olwork; + doublereal rdummy[2]; + integer mlwmqr, olwmqr; + logical lquery, wntres, wnttrf, wntvcq; + doublereal one; + +/* March 2023 */ +/* ..... */ +/* USE iso_fortran_env */ +/* INTEGER, PARAMETER :: WP = real64 */ +/* ..... */ +/* Scalar arguments */ +/* Array arguments */ +/* ..... */ +/* Purpose */ +/* ======= */ +/* DGEDMDQ computes the Dynamic Mode Decomposition (DMD) for */ +/* a pair of data snapshot matrices, using a QR factorization */ +/* based compression of the data. For the input matrices */ +/* X and Y such that Y = A*X with an unaccessible matrix */ +/* A, DGEDMDQ computes a certain number of Ritz pairs of A using */ +/* the standard Rayleigh-Ritz extraction from a subspace of */ +/* range(X) that is determined using the leading left singular */ +/* vectors of X. Optionally, DGEDMDQ returns the residuals */ +/* of the computed Ritz pairs, the information needed for */ +/* a refinement of the Ritz vectors, or the eigenvectors of */ +/* the Exact DMD. */ +/* For further details see the references listed */ +/* below. For more details of the implementation see [3]. */ + +/* References */ +/* ========== */ +/* [1] P. Schmid: Dynamic mode decomposition of numerical */ +/* and experimental data, */ +/* Journal of Fluid Mechanics 656, 5-28, 2010. */ +/* [2] Z. Drmac, I. Mezic, R. Mohr: Data driven modal */ +/* decompositions: analysis and enhancements, */ +/* SIAM J. on Sci. Comp. 40 (4), A2253-A2285, 2018. */ +/* [3] Z. Drmac: A LAPACK implementation of the Dynamic */ +/* Mode Decomposition I. Technical report. AIMDyn Inc. */ +/* and LAPACK Working Note 298. */ +/* [4] J. Tu, C. W. Rowley, D. M. Luchtenburg, S. L. */ +/* Brunton, N. Kutz: On Dynamic Mode Decomposition: */ +/* Theory and Applications, Journal of Computational */ +/* Dynamics 1(2), 391 -421, 2014. */ + +/* Developed and supported by: */ +/* =========================== */ +/* Developed and coded by Zlatko Drmac, Faculty of Science, */ +/* University of Zagreb; drmac@math.hr */ +/* In cooperation with */ +/* AIMdyn Inc., Santa Barbara, CA. */ +/* and supported by */ +/* - DARPA SBIR project "Koopman Operator-Based Forecasting */ +/* for Nonstationary Processes from Near-Term, Limited */ +/* Observational Data" Contract No: W31P4Q-21-C-0007 */ +/* - DARPA PAI project "Physics-Informed Machine Learning */ +/* Methodologies" Contract No: HR0011-18-9-0033 */ +/* - DARPA MoDyL project "A Data-Driven, Operator-Theoretic */ +/* Framework for Space-Time Analysis of Process Dynamics" */ +/* Contract No: HR0011-16-C-0116 */ +/* Any opinions, findings and conclusions or recommendations */ +/* expressed in this material are those of the author and */ +/* do not necessarily reflect the views of the DARPA SBIR */ +/* Program Office. */ +/* ============================================================ */ +/* Distribution Statement A: */ +/* Approved for Public Release, Distribution Unlimited. */ +/* Cleared by DARPA on September 29, 2022 */ +/* ============================================================ */ +/* ...................................................................... */ +/* Arguments */ +/* ========= */ +/* JOBS (input) CHARACTER*1 */ +/* Determines whether the initial data snapshots are scaled */ +/* by a diagonal matrix. The data snapshots are the columns */ +/* of F. The leading N-1 columns of F are denoted X and the */ +/* trailing N-1 columns are denoted Y. */ +/* 'S' :: The data snapshots matrices X and Y are multiplied */ +/* with a diagonal matrix D so that X*D has unit */ +/* nonzero columns (in the Euclidean 2-norm) */ +/* 'C' :: The snapshots are scaled as with the 'S' option. */ +/* If it is found that an i-th column of X is zero */ +/* vector and the corresponding i-th column of Y is */ +/* non-zero, then the i-th column of Y is set to */ +/* zero and a warning flag is raised. */ +/* 'Y' :: The data snapshots matrices X and Y are multiplied */ +/* by a diagonal matrix D so that Y*D has unit */ +/* nonzero columns (in the Euclidean 2-norm) */ +/* 'N' :: No data scaling. */ +/* ..... */ +/* JOBZ (input) CHARACTER*1 */ +/* Determines whether the eigenvectors (Koopman modes) will */ +/* be computed. */ +/* 'V' :: The eigenvectors (Koopman modes) will be computed */ +/* and returned in the matrix Z. */ +/* See the description of Z. */ +/* 'F' :: The eigenvectors (Koopman modes) will be returned */ +/* in factored form as the product Z*V, where Z */ +/* is orthonormal and V contains the eigenvectors */ +/* of the corresponding Rayleigh quotient. */ +/* See the descriptions of F, V, Z. */ +/* 'Q' :: The eigenvectors (Koopman modes) will be returned */ +/* in factored form as the product Q*Z, where Z */ +/* contains the eigenvectors of the compression of the */ +/* underlying discretized operator onto the span of */ +/* the data snapshots. See the descriptions of F, V, Z. */ +/* Q is from the initial QR factorization. */ +/* 'N' :: The eigenvectors are not computed. */ +/* ..... */ +/* JOBR (input) CHARACTER*1 */ +/* Determines whether to compute the residuals. */ +/* 'R' :: The residuals for the computed eigenpairs will */ +/* be computed and stored in the array RES. */ +/* See the description of RES. */ +/* For this option to be legal, JOBZ must be 'V'. */ +/* 'N' :: The residuals are not computed. */ +/* ..... */ +/* JOBQ (input) CHARACTER*1 */ +/* Specifies whether to explicitly compute and return the */ +/* orthogonal matrix from the QR factorization. */ +/* 'Q' :: The matrix Q of the QR factorization of the data */ +/* snapshot matrix is computed and stored in the */ +/* array F. See the description of F. */ +/* 'N' :: The matrix Q is not explicitly computed. */ +/* ..... */ +/* JOBT (input) CHARACTER*1 */ +/* Specifies whether to return the upper triangular factor */ +/* from the QR factorization. */ +/* 'R' :: The matrix R of the QR factorization of the data */ +/* snapshot matrix F is returned in the array Y. */ +/* See the description of Y and Further details. */ +/* 'N' :: The matrix R is not returned. */ +/* ..... */ +/* JOBF (input) CHARACTER*1 */ +/* Specifies whether to store information needed for post- */ +/* processing (e.g. computing refined Ritz vectors) */ +/* 'R' :: The matrix needed for the refinement of the Ritz */ +/* vectors is computed and stored in the array B. */ +/* See the description of B. */ +/* 'E' :: The unscaled eigenvectors of the Exact DMD are */ +/* computed and returned in the array B. See the */ +/* description of B. */ +/* 'N' :: No eigenvector refinement data is computed. */ +/* To be useful on exit, this option needs JOBQ='Q'. */ +/* ..... */ +/* WHTSVD (input) INTEGER, WHSTVD in { 1, 2, 3, 4 } */ +/* Allows for a selection of the SVD algorithm from the */ +/* LAPACK library. */ +/* 1 :: DGESVD (the QR SVD algorithm) */ +/* 2 :: DGESDD (the Divide and Conquer algorithm; if enough */ +/* workspace available, this is the fastest option) */ +/* 3 :: DGESVDQ (the preconditioned QR SVD ; this and 4 */ +/* are the most accurate options) */ +/* 4 :: DGEJSV (the preconditioned Jacobi SVD; this and 3 */ +/* are the most accurate options) */ +/* For the four methods above, a significant difference in */ +/* the accuracy of small singular values is possible if */ +/* the snapshots vary in norm so that X is severely */ +/* ill-conditioned. If small (smaller than EPS*||X||) */ +/* singular values are of interest and JOBS=='N', then */ +/* the options (3, 4) give the most accurate results, where */ +/* the option 4 is slightly better and with stronger */ +/* theoretical background. */ +/* If JOBS=='S', i.e. the columns of X will be normalized, */ +/* then all methods give nearly equally accurate results. */ +/* ..... */ +/* M (input) INTEGER, M >= 0 */ +/* The state space dimension (the number of rows of F). */ +/* ..... */ +/* N (input) INTEGER, 0 <= N <= M */ +/* The number of data snapshots from a single trajectory, */ +/* taken at equidistant discrete times. This is the */ +/* number of columns of F. */ +/* ..... */ +/* F (input/output) REAL(KIND=WP) M-by-N array */ +/* > On entry, */ +/* the columns of F are the sequence of data snapshots */ +/* from a single trajectory, taken at equidistant discrete */ +/* times. It is assumed that the column norms of F are */ +/* in the range of the normalized floating point numbers. */ +/* < On exit, */ +/* If JOBQ == 'Q', the array F contains the orthogonal */ +/* matrix/factor of the QR factorization of the initial */ +/* data snapshots matrix F. See the description of JOBQ. */ +/* If JOBQ == 'N', the entries in F strictly below the main */ +/* diagonal contain, column-wise, the information on the */ +/* Householder vectors, as returned by DGEQRF. The */ +/* remaining information to restore the orthogonal matrix */ +/* of the initial QR factorization is stored in WORK(1:N). */ +/* See the description of WORK. */ +/* ..... */ +/* LDF (input) INTEGER, LDF >= M */ +/* The leading dimension of the array F. */ +/* ..... */ +/* X (workspace/output) REAL(KIND=WP) MIN(M,N)-by-(N-1) array */ +/* X is used as workspace to hold representations of the */ +/* leading N-1 snapshots in the orthonormal basis computed */ +/* in the QR factorization of F. */ +/* On exit, the leading K columns of X contain the leading */ +/* K left singular vectors of the above described content */ +/* of X. To lift them to the space of the left singular */ +/* vectors U(:,1:K)of the input data, pre-multiply with the */ +/* Q factor from the initial QR factorization. */ +/* See the descriptions of F, K, V and Z. */ +/* ..... */ +/* LDX (input) INTEGER, LDX >= N */ +/* The leading dimension of the array X. */ +/* ..... */ +/* Y (workspace/output) REAL(KIND=WP) MIN(M,N)-by-(N-1) array */ +/* Y is used as workspace to hold representations of the */ +/* trailing N-1 snapshots in the orthonormal basis computed */ +/* in the QR factorization of F. */ +/* On exit, */ +/* If JOBT == 'R', Y contains the MIN(M,N)-by-N upper */ +/* triangular factor from the QR factorization of the data */ +/* snapshot matrix F. */ +/* ..... */ +/* LDY (input) INTEGER , LDY >= N */ +/* The leading dimension of the array Y. */ +/* ..... */ +/* NRNK (input) INTEGER */ +/* Determines the mode how to compute the numerical rank, */ +/* i.e. how to truncate small singular values of the input */ +/* matrix X. On input, if */ +/* NRNK = -1 :: i-th singular value sigma(i) is truncated */ +/* if sigma(i) <= TOL*sigma(1) */ +/* This option is recommended. */ +/* NRNK = -2 :: i-th singular value sigma(i) is truncated */ +/* if sigma(i) <= TOL*sigma(i-1) */ +/* This option is included for R&D purposes. */ +/* It requires highly accurate SVD, which */ +/* may not be feasible. */ +/* The numerical rank can be enforced by using positive */ +/* value of NRNK as follows: */ +/* 0 < NRNK <= N-1 :: at most NRNK largest singular values */ +/* will be used. If the number of the computed nonzero */ +/* singular values is less than NRNK, then only those */ +/* nonzero values will be used and the actually used */ +/* dimension is less than NRNK. The actual number of */ +/* the nonzero singular values is returned in the variable */ +/* K. See the description of K. */ +/* ..... */ +/* TOL (input) REAL(KIND=WP), 0 <= TOL < 1 */ +/* The tolerance for truncating small singular values. */ +/* See the description of NRNK. */ +/* ..... */ +/* K (output) INTEGER, 0 <= K <= N */ +/* The dimension of the SVD/POD basis for the leading N-1 */ +/* data snapshots (columns of F) and the number of the */ +/* computed Ritz pairs. The value of K is determined */ +/* according to the rule set by the parameters NRNK and */ +/* TOL. See the descriptions of NRNK and TOL. */ +/* ..... */ +/* REIG (output) REAL(KIND=WP) (N-1)-by-1 array */ +/* The leading K (K<=N) entries of REIG contain */ +/* the real parts of the computed eigenvalues */ +/* REIG(1:K) + sqrt(-1)*IMEIG(1:K). */ +/* See the descriptions of K, IMEIG, Z. */ +/* ..... */ +/* IMEIG (output) REAL(KIND=WP) (N-1)-by-1 array */ +/* The leading K (K0, then the corresponding complex */ +/* conjugate pair of eigenvalues reads */ +/* LAMBDA(i) = REIG(i) + sqrt(-1)*IMAG(i) */ +/* LAMBDA(i+1) = REIG(i) - sqrt(-1)*IMAG(i) */ +/* That is, complex conjugate pairs have consequtive */ +/* indices (i,i+1), with the positive imaginary part */ +/* listed first. */ +/* See the descriptions of K, REIG, Z. */ +/* ..... */ +/* Z (workspace/output) REAL(KIND=WP) M-by-(N-1) array */ +/* If JOBZ =='V' then */ +/* Z contains real Ritz vectors as follows: */ +/* If IMEIG(i)=0, then Z(:,i) is an eigenvector of */ +/* the i-th Ritz value. */ +/* If IMEIG(i) > 0 (and IMEIG(i+1) < 0) then */ +/* [Z(:,i) Z(:,i+1)] span an invariant subspace and */ +/* the Ritz values extracted from this subspace are */ +/* REIG(i) + sqrt(-1)*IMEIG(i) and */ +/* REIG(i) - sqrt(-1)*IMEIG(i). */ +/* The corresponding eigenvectors are */ +/* Z(:,i) + sqrt(-1)*Z(:,i+1) and */ +/* Z(:,i) - sqrt(-1)*Z(:,i+1), respectively. */ +/* If JOBZ == 'F', then the above descriptions hold for */ +/* the columns of Z*V, where the columns of V are the */ +/* eigenvectors of the K-by-K Rayleigh quotient, and Z is */ +/* orthonormal. The columns of V are similarly structured: */ +/* If IMEIG(i) == 0 then Z*V(:,i) is an eigenvector, and if */ +/* IMEIG(i) > 0 then Z*V(:,i)+sqrt(-1)*Z*V(:,i+1) and */ +/* Z*V(:,i)-sqrt(-1)*Z*V(:,i+1) */ +/* are the eigenvectors of LAMBDA(i), LAMBDA(i+1). */ +/* See the descriptions of REIG, IMEIG, X and V. */ +/* ..... */ +/* LDZ (input) INTEGER , LDZ >= M */ +/* The leading dimension of the array Z. */ +/* ..... */ +/* RES (output) REAL(KIND=WP) (N-1)-by-1 array */ +/* RES(1:K) contains the residuals for the K computed */ +/* Ritz pairs. */ +/* If LAMBDA(i) is real, then */ +/* RES(i) = || A * Z(:,i) - LAMBDA(i)*Z(:,i))||_2. */ +/* If [LAMBDA(i), LAMBDA(i+1)] is a complex conjugate pair */ +/* then */ +/* RES(i)=RES(i+1) = || A * Z(:,i:i+1) - Z(:,i:i+1) *B||_F */ +/* where B = [ real(LAMBDA(i)) imag(LAMBDA(i)) ] */ +/* [-imag(LAMBDA(i)) real(LAMBDA(i)) ]. */ +/* It holds that */ +/* RES(i) = || A*ZC(:,i) - LAMBDA(i) *ZC(:,i) ||_2 */ +/* RES(i+1) = || A*ZC(:,i+1) - LAMBDA(i+1)*ZC(:,i+1) ||_2 */ +/* where ZC(:,i) = Z(:,i) + sqrt(-1)*Z(:,i+1) */ +/* ZC(:,i+1) = Z(:,i) - sqrt(-1)*Z(:,i+1) */ +/* See the description of Z. */ +/* ..... */ +/* B (output) REAL(KIND=WP) MIN(M,N)-by-(N-1) array. */ +/* IF JOBF =='R', B(1:N,1:K) contains A*U(:,1:K), and can */ +/* be used for computing the refined vectors; see further */ +/* details in the provided references. */ +/* If JOBF == 'E', B(1:N,1;K) contains */ +/* A*U(:,1:K)*W(1:K,1:K), which are the vectors from the */ +/* Exact DMD, up to scaling by the inverse eigenvalues. */ +/* In both cases, the content of B can be lifted to the */ +/* original dimension of the input data by pre-multiplying */ +/* with the Q factor from the initial QR factorization. */ +/* Here A denotes a compression of the underlying operator. */ +/* See the descriptions of F and X. */ +/* If JOBF =='N', then B is not referenced. */ +/* ..... */ +/* LDB (input) INTEGER, LDB >= MIN(M,N) */ +/* The leading dimension of the array B. */ +/* ..... */ +/* V (workspace/output) REAL(KIND=WP) (N-1)-by-(N-1) array */ +/* On exit, V(1:K,1:K) contains the K eigenvectors of */ +/* the Rayleigh quotient. The eigenvectors of a complex */ +/* conjugate pair of eigenvalues are returned in real form */ +/* as explained in the description of Z. The Ritz vectors */ +/* (returned in Z) are the product of X and V; see */ +/* the descriptions of X and Z. */ +/* ..... */ +/* LDV (input) INTEGER, LDV >= N-1 */ +/* The leading dimension of the array V. */ +/* ..... */ +/* S (output) REAL(KIND=WP) (N-1)-by-(N-1) array */ +/* The array S(1:K,1:K) is used for the matrix Rayleigh */ +/* quotient. This content is overwritten during */ +/* the eigenvalue decomposition by DGEEV. */ +/* See the description of K. */ +/* ..... */ +/* LDS (input) INTEGER, LDS >= N-1 */ +/* The leading dimension of the array S. */ +/* ..... */ +/* WORK (workspace/output) REAL(KIND=WP) LWORK-by-1 array */ +/* On exit, */ +/* WORK(1:MIN(M,N)) contains the scalar factors of the */ +/* elementary reflectors as returned by DGEQRF of the */ +/* M-by-N input matrix F. */ +/* WORK(MIN(M,N)+1:MIN(M,N)+N-1) contains the singular values of */ +/* the input submatrix F(1:M,1:N-1). */ +/* If the call to DGEDMDQ is only workspace query, then */ +/* WORK(1) contains the minimal workspace length and */ +/* WORK(2) is the optimal workspace length. Hence, the */ +/* length of work is at least 2. */ +/* See the description of LWORK. */ +/* ..... */ +/* LWORK (input) INTEGER */ +/* The minimal length of the workspace vector WORK. */ +/* LWORK is calculated as follows: */ +/* Let MLWQR = N (minimal workspace for DGEQRF[M,N]) */ +/* MLWDMD = minimal workspace for DGEDMD (see the */ +/* description of LWORK in DGEDMD) for */ +/* snapshots of dimensions MIN(M,N)-by-(N-1) */ +/* MLWMQR = N (minimal workspace for */ +/* DORMQR['L','N',M,N,N]) */ +/* MLWGQR = N (minimal workspace for DORGQR[M,N,N]) */ +/* Then */ +/* LWORK = MAX(N+MLWQR, N+MLWDMD) */ +/* is updated as follows: */ +/* if JOBZ == 'V' or JOBZ == 'F' THEN */ +/* LWORK = MAX( LWORK, MIN(M,N)+N-1+MLWMQR ) */ +/* if JOBQ == 'Q' THEN */ +/* LWORK = MAX( LWORK, MIN(M,N)+N-1+MLWGQR) */ +/* If on entry LWORK = -1, then a workspace query is */ +/* assumed and the procedure only computes the minimal */ +/* and the optimal workspace lengths for both WORK and */ +/* IWORK. See the descriptions of WORK and IWORK. */ +/* ..... */ +/* IWORK (workspace/output) INTEGER LIWORK-by-1 array */ +/* Workspace that is required only if WHTSVD equals */ +/* 2 , 3 or 4. (See the description of WHTSVD). */ +/* If on entry LWORK =-1 or LIWORK=-1, then the */ +/* minimal length of IWORK is computed and returned in */ +/* IWORK(1). See the description of LIWORK. */ +/* ..... */ +/* LIWORK (input) INTEGER */ +/* The minimal length of the workspace vector IWORK. */ +/* If WHTSVD == 1, then only IWORK(1) is used; LIWORK >=1 */ +/* Let M1=MIN(M,N), N1=N-1. Then */ +/* If WHTSVD == 2, then LIWORK >= MAX(1,8*MIN(M1,N1)) */ +/* If WHTSVD == 3, then LIWORK >= MAX(1,M1+N1-1) */ +/* If WHTSVD == 4, then LIWORK >= MAX(3,M1+3*N1) */ +/* If on entry LIWORK = -1, then a workspace query is */ +/* assumed and the procedure only computes the minimal */ +/* and the optimal workspace lengths for both WORK and */ +/* IWORK. See the descriptions of WORK and IWORK. */ +/* ..... */ +/* INFO (output) INTEGER */ +/* -i < 0 :: On entry, the i-th argument had an */ +/* illegal value */ +/* = 0 :: Successful return. */ +/* = 1 :: Void input. Quick exit (M=0 or N=0). */ +/* = 2 :: The SVD computation of X did not converge. */ +/* Suggestion: Check the input data and/or */ +/* repeat with different WHTSVD. */ +/* = 3 :: The computation of the eigenvalues did not */ +/* converge. */ +/* = 4 :: If data scaling was requested on input and */ +/* the procedure found inconsistency in the data */ +/* such that for some column index i, */ +/* X(:,i) = 0 but Y(:,i) /= 0, then Y(:,i) is set */ +/* to zero if JOBS=='C'. The computation proceeds */ +/* with original or modified data and warning */ +/* flag is set with INFO=4. */ +/* ............................................................. */ +/* ............................................................. */ +/* Parameters */ +/* ~~~~~~~~~~ */ + +/* Local scalars */ +/* ~~~~~~~~~~~~~ */ + +/* Local array */ +/* ~~~~~~~~~~~ */ + +/* External functions (BLAS and LAPACK) */ +/* ~~~~~~~~~~~~~~~~~ */ + +/* External subroutines (BLAS and LAPACK) */ +/* ~~~~~~~~~~~~~~~~~~~~ */ +/* External subroutines */ +/* ~~~~~~~~~~~~~~~~~~~~ */ +/* Intrinsic functions */ +/* ~~~~~~~~~~~~~~~~~~~ */ +/* .......................................................... */ + /* Parameter adjustments */ + f_dim1 = *ldf; + f_offset = 1 + f_dim1 * 1; + f -= f_offset; + x_dim1 = *ldx; + x_offset = 1 + x_dim1 * 1; + x -= x_offset; + y_dim1 = *ldy; + y_offset = 1 + y_dim1 * 1; + y -= y_offset; + --reig; + --imeig; + z_dim1 = *ldz; + z_offset = 1 + z_dim1 * 1; + z__ -= z_offset; + --res; + b_dim1 = *ldb; + b_offset = 1 + b_dim1 * 1; + b -= b_offset; + v_dim1 = *ldv; + v_offset = 1 + v_dim1 * 1; + v -= v_offset; + s_dim1 = *lds; + s_offset = 1 + s_dim1 * 1; + s -= s_offset; + --work; + --iwork; + + /* Function Body */ + zero = 0.f; + one = 1.f; + +/* Test the input arguments */ + wntres = lsame_(jobr, "R"); + sccolx = lsame_(jobs, "S") || lsame_(jobs, "C"); + sccoly = lsame_(jobs, "Y"); + wntvec = lsame_(jobz, "V"); + wntvcf = lsame_(jobz, "F"); + wntvcq = lsame_(jobz, "Q"); + wntref = lsame_(jobf, "R"); + wntex = lsame_(jobf, "E"); + wantq = lsame_(jobq, "Q"); + wnttrf = lsame_(jobt, "R"); + minmn = f2cmin(*m,*n); + *info = 0; + lquery = *lwork == -1 || *liwork == -1; + + if (! (sccolx || sccoly || lsame_(jobs, "N"))) { + *info = -1; + } else if (! (wntvec || wntvcf || wntvcq || lsame_(jobz, "N"))) { + *info = -2; + } else if (! (wntres || lsame_(jobr, "N")) || + wntres && lsame_(jobz, "N")) { + *info = -3; + } else if (! (wantq || lsame_(jobq, "N"))) { + *info = -4; + } else if (! (wnttrf || lsame_(jobt, "N"))) { + *info = -5; + } else if (! (wntref || wntex || lsame_(jobf, "N"))) + { + *info = -6; + } else if (! (*whtsvd == 1 || *whtsvd == 2 || *whtsvd == 3 || *whtsvd == + 4)) { + *info = -7; + } else if (*m < 0) { + *info = -8; + } else if (*n < 0 || *n > *m + 1) { + *info = -9; + } else if (*ldf < *m) { + *info = -11; + } else if (*ldx < minmn) { + *info = -13; + } else if (*ldy < minmn) { + *info = -15; + } else if (! (*nrnk == -2 || *nrnk == -1 || *nrnk >= 1 && *nrnk <= *n)) { + *info = -16; + } else if (*tol < zero || *tol >= one) { + *info = -17; + } else if (*ldz < *m) { + *info = -22; + } else if ((wntref || wntex) && *ldb < minmn) { + *info = -25; + } else if (*ldv < *n - 1) { + *info = -27; + } else if (*lds < *n - 1) { + *info = -29; + } + + if (wntvec || wntvcf || wntvcq) { + *(unsigned char *)jobvl = 'V'; + } else { + *(unsigned char *)jobvl = 'N'; + } + if (*info == 0) { +/* Compute the minimal and the optimal workspace */ +/* requirements. Simulate running the code and */ +/* determine minimal and optimal sizes of the */ +/* workspace at any moment of the run. */ + if (*n == 0 || *n == 1) { +/* All output except K is void. INFO=1 signals */ +/* the void input. In case of a workspace query, */ +/* the minimal workspace lengths are returned. */ + if (lquery) { + iwork[1] = 1; + work[1] = 2.; + work[2] = 2.; + } else { + *k = 0; + } + *info = 1; + return 0; + } + mlwqr = f2cmax(1,*n); +/* Minimal workspace length for DGEQRF. */ + mlwork = minmn + mlwqr; + if (lquery) { + dgeqrf_(m, n, &f[f_offset], ldf, &work[1], rdummy, &c_n1, &info1); + olwqr = (integer) rdummy[0]; + olwork = f2cmin(*m,*n) + olwqr; + } + i__1 = *n - 1; + dgedmd_(jobs, jobvl, jobr, jobf, whtsvd, &minmn, &i__1, &x[x_offset], + ldx, &y[y_offset], ldy, nrnk, tol, k, &reig[1], &imeig[1], & + z__[z_offset], ldz, &res[1], &b[b_offset], ldb, &v[v_offset], + ldv, &s[s_offset], lds, &work[1], &c_n1, &iwork[1], liwork, & + info1); + mlwdmd = (integer) work[1]; +/* Computing MAX */ + i__1 = mlwork, i__2 = minmn + mlwdmd; + mlwork = f2cmax(i__1,i__2); + iminwr = iwork[1]; + if (lquery) { + olwdmd = (integer) work[2]; +/* Computing MAX */ + i__1 = olwork, i__2 = minmn + olwdmd; + olwork = f2cmax(i__1,i__2); + } + if (wntvec || wntvcf) { + mlwmqr = f2cmax(1,*n); +/* Computing MAX */ + i__1 = mlwork, i__2 = minmn + *n - 1 + mlwmqr; + mlwork = f2cmax(i__1,i__2); + if (lquery) { + dormqr_("L", "N", m, n, &minmn, &f[f_offset], ldf, &work[1], & + z__[z_offset], ldz, &work[1], &c_n1, &info1); + olwmqr = (integer) work[1]; +/* Computing MAX */ + i__1 = olwork, i__2 = minmn + *n - 1 + olwmqr; + olwork = f2cmax(i__1,i__2); + } + } + if (wantq) { + mlwgqr = *n; +/* Computing MAX */ + i__1 = mlwork, i__2 = minmn + *n - 1 + mlwgqr; + mlwork = f2cmax(i__1,i__2); + if (lquery) { + dorgqr_(m, &minmn, &minmn, &f[f_offset], ldf, &work[1], &work[ + 1], &c_n1, &info1); + olwgqr = (integer) work[1]; +/* Computing MAX */ + i__1 = olwork, i__2 = minmn + *n - 1 + olwgqr; + olwork = f2cmax(i__1,i__2); + } + } + iminwr = f2cmax(1,iminwr); + mlwork = f2cmax(2,mlwork); + if (*lwork < mlwork && ! lquery) { + *info = -31; + } + if (*liwork < iminwr && ! lquery) { + *info = -33; + } + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("DGEDMDQ", &i__1); + return 0; + } else if (lquery) { +/* Return minimal and optimal workspace sizes */ + iwork[1] = iminwr; + work[1] = (doublereal) mlwork; + work[2] = (doublereal) olwork; + return 0; + } +/* ..... */ +/* Initial QR factorization that is used to represent the */ +/* snapshots as elements of lower dimensional subspace. */ +/* For large scale computation with M >>N , at this place */ +/* one can use an out of core QRF. */ + + i__1 = *lwork - minmn; + dgeqrf_(m, n, &f[f_offset], ldf, &work[1], &work[minmn + 1], &i__1, & + info1); + +/* Define X and Y as the snapshots representations in the */ +/* orthogonal basis computed in the QR factorization. */ +/* X corresponds to the leading N-1 and Y to the trailing */ +/* N-1 snapshots. */ + i__1 = *n - 1; + dlaset_("L", &minmn, &i__1, &zero, &zero, &x[x_offset], ldx); + i__1 = *n - 1; + dlacpy_("U", &minmn, &i__1, &f[f_offset], ldf, &x[x_offset], ldx); + i__1 = *n - 1; + dlacpy_("A", &minmn, &i__1, &f[(f_dim1 << 1) + 1], ldf, &y[y_offset], ldy); + if (*m >= 3) { + i__1 = minmn - 2; + i__2 = *n - 2; + dlaset_("L", &i__1, &i__2, &zero, &zero, &y[y_dim1 + 3], ldy); + } + +/* Compute the DMD of the projected snapshot pairs (X,Y) */ + i__1 = *n - 1; + i__2 = *lwork - minmn; + dgedmd_(jobs, jobvl, jobr, jobf, whtsvd, &minmn, &i__1, &x[x_offset], ldx, + &y[y_offset], ldy, nrnk, tol, k, &reig[1], &imeig[1], &z__[ + z_offset], ldz, &res[1], &b[b_offset], ldb, &v[v_offset], ldv, &s[ + s_offset], lds, &work[minmn + 1], &i__2, &iwork[1], liwork, & + info1); + if (info1 == 2 || info1 == 3) { +/* Return with error code. See DGEDMD for details. */ + *info = info1; + return 0; + } else { + *info = info1; + } + +/* The Ritz vectors (Koopman modes) can be explicitly */ +/* formed or returned in factored form. */ + if (wntvec) { +/* Compute the eigenvectors explicitly. */ + if (*m > minmn) { + i__1 = *m - minmn; + dlaset_("A", &i__1, k, &zero, &zero, &z__[minmn + 1 + z_dim1], + ldz); + } + i__1 = *lwork - (minmn + *n - 1); + dormqr_("L", "N", m, k, &minmn, &f[f_offset], ldf, &work[1], &z__[ + z_offset], ldz, &work[minmn + *n], &i__1, &info1); + } else if (wntvcf) { +/* Return the Ritz vectors (eigenvectors) in factored */ +/* form Z*V, where Z contains orthonormal matrix (the */ +/* product of Q from the initial QR factorization and */ +/* the SVD/POD_basis returned by DGEDMD in X) and the */ +/* second factor (the eigenvectors of the Rayleigh */ +/* quotient) is in the array V, as returned by DGEDMD. */ + dlacpy_("A", n, k, &x[x_offset], ldx, &z__[z_offset], ldz); + if (*m > *n) { + i__1 = *m - *n; + dlaset_("A", &i__1, k, &zero, &zero, &z__[*n + 1 + z_dim1], ldz); + } + i__1 = *lwork - (minmn + *n - 1); + dormqr_("L", "N", m, k, &minmn, &f[f_offset], ldf, &work[1], &z__[ + z_offset], ldz, &work[minmn + *n], &i__1, &info1); + } + +/* Some optional output variables: */ + +/* The upper triangular factor R in the initial QR */ +/* factorization is optionally returned in the array Y. */ +/* This is useful if this call to DGEDMDQ is to be */ +/* followed by a streaming DMD that is implemented in a */ +/* QR compressed form. */ + if (wnttrf) { +/* Return the upper triangular R in Y */ + dlaset_("A", &minmn, n, &zero, &zero, &y[y_offset], ldy); + dlacpy_("U", &minmn, n, &f[f_offset], ldf, &y[y_offset], ldy); + } + +/* The orthonormal/orthogonal factor Q in the initial QR */ +/* factorization is optionally returned in the array F. */ +/* Same as with the triangular factor above, this is */ +/* useful in a streaming DMD. */ + if (wantq) { +/* Q overwrites F */ + i__1 = *lwork - (minmn + *n - 1); + dorgqr_(m, &minmn, &minmn, &f[f_offset], ldf, &work[1], &work[minmn + + *n], &i__1, &info1); + } + + return 0; + +} /* dgedmdq_ */ + diff --git a/lapack-netlib/SRC/sgedmd.c b/lapack-netlib/SRC/sgedmd.c index 447b23014..c8f3a5964 100644 --- a/lapack-netlib/SRC/sgedmd.c +++ b/lapack-netlib/SRC/sgedmd.c @@ -509,3 +509,1238 @@ static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integ +/* -- translated by f2c (version 20000121). + You must link the resulting object file with the libraries: + -lf2c -lm (in that order) +*/ + + + +/* Table of constant values */ + +static integer c_n1 = -1; +static integer c__1 = 1; +static integer c__0 = 0; +static integer c__2 = 2; + +/* Subroutine */ int sgedmd_(char *jobs, char *jobz, char *jobr, char *jobf, + integer *whtsvd, integer *m, integer *n, real *x, integer *ldx, real * + y, integer *ldy, integer *nrnk, real *tol, integer *k, real *reig, + real *imeig, real *z__, integer *ldz, real *res, real *b, integer * + ldb, real *w, integer *ldw, real *s, integer *lds, real *work, + integer *lwork, integer *iwork, integer *liwork, integer *info) +{ + /* System generated locals */ + integer x_dim1, x_offset, y_dim1, y_offset, z_dim1, z_offset, b_dim1, + b_offset, w_dim1, w_offset, s_dim1, s_offset, i__1, i__2; + real r__1, r__2; + + /* Local variables */ + real zero, ssum; + integer info1, info2; + real xscl1, xscl2; + extern real snrm2_(integer *, real *, integer *); + integer i__, j; + real scale; + extern logical lsame_(char *, char *); + extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *); + logical badxy; + real small; + extern /* Subroutine */ int sgemm_(char *, char *, integer *, integer *, + integer *, real *, real *, integer *, real *, integer *, real *, + real *, integer *), sgeev_(char *, char *, + integer *, real *, integer *, real *, real *, real *, integer *, + real *, integer *, real *, integer *, integer *); + char jobzl[1]; + extern /* Subroutine */ int saxpy_(integer *, real *, real *, integer *, + real *, integer *); + logical wntex; + real ab[4] /* was [2][2] */; + extern real slamch_(char *), slange_(char *, integer *, integer *, + real *, integer *, real *); + extern /* Subroutine */ int sgesdd_(char *, integer *, integer *, real *, + integer *, real *, real *, integer *, real *, integer *, real *, + integer *, integer *, integer *), xerbla_(char *, integer + *); + char t_or_n__[1]; + extern /* Subroutine */ int slascl_(char *, integer *, integer *, real *, + real *, integer *, integer *, real *, integer *, integer *); + extern integer isamax_(integer *, real *, integer *); + logical sccolx, sccoly; + extern logical sisnan_(real *); + extern /* Subroutine */ int sgesvd_(char *, char *, integer *, integer *, + real *, integer *, real *, real *, integer *, real *, integer *, + real *, integer *, integer *); + integer lwrsdd, mwrsdd; + extern /* Subroutine */ int sgejsv_(char *, char *, char *, char *, char * + , char *, integer *, integer *, real *, integer *, real *, real *, + integer *, real *, integer *, real *, integer *, integer *, + integer *), + slacpy_(char *, integer *, integer *, real *, integer *, real *, + integer *); + integer iminwr; + logical wntref, wntvec; + real rootsc; + integer lwrkev, mlwork, mwrkev, numrnk, olwork; + real rdummy[2]; + integer lwrsvd, mwrsvd; + logical lquery, wntres; + char jsvopt[1]; + extern /* Subroutine */ int slassq_(integer *, real *, integer *, real *, + real *), mecago_(); + integer mwrsvj, lwrsvq, mwrsvq; + real rdummy2[2], ofl, one; + extern /* Subroutine */ int sgesvdq_(char *, char *, char *, char *, char + *, integer *, integer *, real *, integer *, real *, real *, + integer *, real *, integer *, integer *, integer *, integer *, + real *, integer *, real *, integer *, integer *); + +/* March 2023 */ +/* ..... */ +/* USE iso_fortran_env */ +/* INTEGER, PARAMETER :: WP = real32 */ +/* ..... */ +/* Scalar arguments */ +/* Array arguments */ +/* ............................................................ */ +/* Purpose */ +/* ======= */ +/* SGEDMD computes the Dynamic Mode Decomposition (DMD) for */ +/* a pair of data snapshot matrices. For the input matrices */ +/* X and Y such that Y = A*X with an unaccessible matrix */ +/* A, SGEDMD computes a certain number of Ritz pairs of A using */ +/* the standard Rayleigh-Ritz extraction from a subspace of */ +/* range(X) that is determined using the leading left singular */ +/* vectors of X. Optionally, SGEDMD returns the residuals */ +/* of the computed Ritz pairs, the information needed for */ +/* a refinement of the Ritz vectors, or the eigenvectors of */ +/* the Exact DMD. */ +/* For further details see the references listed */ +/* below. For more details of the implementation see [3]. */ + +/* References */ +/* ========== */ +/* [1] P. Schmid: Dynamic mode decomposition of numerical */ +/* and experimental data, */ +/* Journal of Fluid Mechanics 656, 5-28, 2010. */ +/* [2] Z. Drmac, I. Mezic, R. Mohr: Data driven modal */ +/* decompositions: analysis and enhancements, */ +/* SIAM J. on Sci. Comp. 40 (4), A2253-A2285, 2018. */ +/* [3] Z. Drmac: A LAPACK implementation of the Dynamic */ +/* Mode Decomposition I. Technical report. AIMDyn Inc. */ +/* and LAPACK Working Note 298. */ +/* [4] J. Tu, C. W. Rowley, D. M. Luchtenburg, S. L. */ +/* Brunton, N. Kutz: On Dynamic Mode Decomposition: */ +/* Theory and Applications, Journal of Computational */ +/* Dynamics 1(2), 391 -421, 2014. */ + +/* ...................................................................... */ +/* Developed and supported by: */ +/* =========================== */ +/* Developed and coded by Zlatko Drmac, Faculty of Science, */ +/* University of Zagreb; drmac@math.hr */ +/* In cooperation with */ +/* AIMdyn Inc., Santa Barbara, CA. */ +/* and supported by */ +/* - DARPA SBIR project "Koopman Operator-Based Forecasting */ +/* for Nonstationary Processes from Near-Term, Limited */ +/* Observational Data" Contract No: W31P4Q-21-C-0007 */ +/* - DARPA PAI project "Physics-Informed Machine Learning */ +/* Methodologies" Contract No: HR0011-18-9-0033 */ +/* - DARPA MoDyL project "A Data-Driven, Operator-Theoretic */ +/* Framework for Space-Time Analysis of Process Dynamics" */ +/* Contract No: HR0011-16-C-0116 */ +/* Any opinions, findings and conclusions or recommendations */ +/* expressed in this material are those of the author and */ +/* do not necessarily reflect the views of the DARPA SBIR */ +/* Program Office */ +/* ============================================================ */ +/* Distribution Statement A: */ +/* Approved for Public Release, Distribution Unlimited. */ +/* Cleared by DARPA on September 29, 2022 */ +/* ============================================================ */ +/* ...................................................................... */ +/* Arguments */ +/* ========= */ +/* JOBS (input) CHARACTER*1 */ +/* Determines whether the initial data snapshots are scaled */ +/* by a diagonal matrix. */ +/* 'S' :: The data snapshots matrices X and Y are multiplied */ +/* with a diagonal matrix D so that X*D has unit */ +/* nonzero columns (in the Euclidean 2-norm) */ +/* 'C' :: The snapshots are scaled as with the 'S' option. */ +/* If it is found that an i-th column of X is zero */ +/* vector and the corresponding i-th column of Y is */ +/* non-zero, then the i-th column of Y is set to */ +/* zero and a warning flag is raised. */ +/* 'Y' :: The data snapshots matrices X and Y are multiplied */ +/* by a diagonal matrix D so that Y*D has unit */ +/* nonzero columns (in the Euclidean 2-norm) */ +/* 'N' :: No data scaling. */ +/* ..... */ +/* JOBZ (input) CHARACTER*1 */ +/* Determines whether the eigenvectors (Koopman modes) will */ +/* be computed. */ +/* 'V' :: The eigenvectors (Koopman modes) will be computed */ +/* and returned in the matrix Z. */ +/* See the description of Z. */ +/* 'F' :: The eigenvectors (Koopman modes) will be returned */ +/* in factored form as the product X(:,1:K)*W, where X */ +/* contains a POD basis (leading left singular vectors */ +/* of the data matrix X) and W contains the eigenvectors */ +/* of the corresponding Rayleigh quotient. */ +/* See the descriptions of K, X, W, Z. */ +/* 'N' :: The eigenvectors are not computed. */ +/* ..... */ +/* JOBR (input) CHARACTER*1 */ +/* Determines whether to compute the residuals. */ +/* 'R' :: The residuals for the computed eigenpairs will be */ +/* computed and stored in the array RES. */ +/* See the description of RES. */ +/* For this option to be legal, JOBZ must be 'V'. */ +/* 'N' :: The residuals are not computed. */ +/* ..... */ +/* JOBF (input) CHARACTER*1 */ +/* Specifies whether to store information needed for post- */ +/* processing (e.g. computing refined Ritz vectors) */ +/* 'R' :: The matrix needed for the refinement of the Ritz */ +/* vectors is computed and stored in the array B. */ +/* See the description of B. */ +/* 'E' :: The unscaled eigenvectors of the Exact DMD are */ +/* computed and returned in the array B. See the */ +/* description of B. */ +/* 'N' :: No eigenvector refinement data is computed. */ +/* ..... */ +/* WHTSVD (input) INTEGER, WHSTVD in { 1, 2, 3, 4 } */ +/* Allows for a selection of the SVD algorithm from the */ +/* LAPACK library. */ +/* 1 :: SGESVD (the QR SVD algorithm) */ +/* 2 :: SGESDD (the Divide and Conquer algorithm; if enough */ +/* workspace available, this is the fastest option) */ +/* 3 :: SGESVDQ (the preconditioned QR SVD ; this and 4 */ +/* are the most accurate options) */ +/* 4 :: SGEJSV (the preconditioned Jacobi SVD; this and 3 */ +/* are the most accurate options) */ +/* For the four methods above, a significant difference in */ +/* the accuracy of small singular values is possible if */ +/* the snapshots vary in norm so that X is severely */ +/* ill-conditioned. If small (smaller than EPS*||X||) */ +/* singular values are of interest and JOBS=='N', then */ +/* the options (3, 4) give the most accurate results, where */ +/* the option 4 is slightly better and with stronger */ +/* theoretical background. */ +/* If JOBS=='S', i.e. the columns of X will be normalized, */ +/* then all methods give nearly equally accurate results. */ +/* ..... */ +/* M (input) INTEGER, M>= 0 */ +/* The state space dimension (the row dimension of X, Y). */ +/* ..... */ +/* N (input) INTEGER, 0 <= N <= M */ +/* The number of data snapshot pairs */ +/* (the number of columns of X and Y). */ +/* ..... */ +/* X (input/output) REAL(KIND=WP) M-by-N array */ +/* > On entry, X contains the data snapshot matrix X. It is */ +/* assumed that the column norms of X are in the range of */ +/* the normalized floating point numbers. */ +/* < On exit, the leading K columns of X contain a POD basis, */ +/* i.e. the leading K left singular vectors of the input */ +/* data matrix X, U(:,1:K). All N columns of X contain all */ +/* left singular vectors of the input matrix X. */ +/* See the descriptions of K, Z and W. */ +/* ..... */ +/* LDX (input) INTEGER, LDX >= M */ +/* The leading dimension of the array X. */ +/* ..... */ +/* Y (input/workspace/output) REAL(KIND=WP) M-by-N array */ +/* > On entry, Y contains the data snapshot matrix Y */ +/* < On exit, */ +/* If JOBR == 'R', the leading K columns of Y contain */ +/* the residual vectors for the computed Ritz pairs. */ +/* See the description of RES. */ +/* If JOBR == 'N', Y contains the original input data, */ +/* scaled according to the value of JOBS. */ +/* ..... */ +/* LDY (input) INTEGER , LDY >= M */ +/* The leading dimension of the array Y. */ +/* ..... */ +/* NRNK (input) INTEGER */ +/* Determines the mode how to compute the numerical rank, */ +/* i.e. how to truncate small singular values of the input */ +/* matrix X. On input, if */ +/* NRNK = -1 :: i-th singular value sigma(i) is truncated */ +/* if sigma(i) <= TOL*sigma(1) */ +/* This option is recommended. */ +/* NRNK = -2 :: i-th singular value sigma(i) is truncated */ +/* if sigma(i) <= TOL*sigma(i-1) */ +/* This option is included for R&D purposes. */ +/* It requires highly accurate SVD, which */ +/* may not be feasible. */ +/* The numerical rank can be enforced by using positive */ +/* value of NRNK as follows: */ +/* 0 < NRNK <= N :: at most NRNK largest singular values */ +/* will be used. If the number of the computed nonzero */ +/* singular values is less than NRNK, then only those */ +/* nonzero values will be used and the actually used */ +/* dimension is less than NRNK. The actual number of */ +/* the nonzero singular values is returned in the variable */ +/* K. See the descriptions of TOL and K. */ +/* ..... */ +/* TOL (input) REAL(KIND=WP), 0 <= TOL < 1 */ +/* The tolerance for truncating small singular values. */ +/* See the description of NRNK. */ +/* ..... */ +/* K (output) INTEGER, 0 <= K <= N */ +/* The dimension of the POD basis for the data snapshot */ +/* matrix X and the number of the computed Ritz pairs. */ +/* The value of K is determined according to the rule set */ +/* by the parameters NRNK and TOL. */ +/* See the descriptions of NRNK and TOL. */ +/* ..... */ +/* REIG (output) REAL(KIND=WP) N-by-1 array */ +/* The leading K (K<=N) entries of REIG contain */ +/* the real parts of the computed eigenvalues */ +/* REIG(1:K) + sqrt(-1)*IMEIG(1:K). */ +/* See the descriptions of K, IMEIG, and Z. */ +/* ..... */ +/* IMEIG (output) REAL(KIND=WP) N-by-1 array */ +/* The leading K (K<=N) entries of IMEIG contain */ +/* the imaginary parts of the computed eigenvalues */ +/* REIG(1:K) + sqrt(-1)*IMEIG(1:K). */ +/* The eigenvalues are determined as follows: */ +/* If IMEIG(i) == 0, then the corresponding eigenvalue is */ +/* real, LAMBDA(i) = REIG(i). */ +/* If IMEIG(i)>0, then the corresponding complex */ +/* conjugate pair of eigenvalues reads */ +/* LAMBDA(i) = REIG(i) + sqrt(-1)*IMAG(i) */ +/* LAMBDA(i+1) = REIG(i) - sqrt(-1)*IMAG(i) */ +/* That is, complex conjugate pairs have consecutive */ +/* indices (i,i+1), with the positive imaginary part */ +/* listed first. */ +/* See the descriptions of K, REIG, and Z. */ +/* ..... */ +/* Z (workspace/output) REAL(KIND=WP) M-by-N array */ +/* If JOBZ =='V' then */ +/* Z contains real Ritz vectors as follows: */ +/* If IMEIG(i)=0, then Z(:,i) is an eigenvector of */ +/* the i-th Ritz value; ||Z(:,i)||_2=1. */ +/* If IMEIG(i) > 0 (and IMEIG(i+1) < 0) then */ +/* [Z(:,i) Z(:,i+1)] span an invariant subspace and */ +/* the Ritz values extracted from this subspace are */ +/* REIG(i) + sqrt(-1)*IMEIG(i) and */ +/* REIG(i) - sqrt(-1)*IMEIG(i). */ +/* The corresponding eigenvectors are */ +/* Z(:,i) + sqrt(-1)*Z(:,i+1) and */ +/* Z(:,i) - sqrt(-1)*Z(:,i+1), respectively. */ +/* || Z(:,i:i+1)||_F = 1. */ +/* If JOBZ == 'F', then the above descriptions hold for */ +/* the columns of X(:,1:K)*W(1:K,1:K), where the columns */ +/* of W(1:k,1:K) are the computed eigenvectors of the */ +/* K-by-K Rayleigh quotient. The columns of W(1:K,1:K) */ +/* are similarly structured: If IMEIG(i) == 0 then */ +/* X(:,1:K)*W(:,i) is an eigenvector, and if IMEIG(i)>0 */ +/* then X(:,1:K)*W(:,i)+sqrt(-1)*X(:,1:K)*W(:,i+1) and */ +/* X(:,1:K)*W(:,i)-sqrt(-1)*X(:,1:K)*W(:,i+1) */ +/* are the eigenvectors of LAMBDA(i), LAMBDA(i+1). */ +/* See the descriptions of REIG, IMEIG, X and W. */ +/* ..... */ +/* LDZ (input) INTEGER , LDZ >= M */ +/* The leading dimension of the array Z. */ +/* ..... */ +/* RES (output) REAL(KIND=WP) N-by-1 array */ +/* RES(1:K) contains the residuals for the K computed */ +/* Ritz pairs. */ +/* If LAMBDA(i) is real, then */ +/* RES(i) = || A * Z(:,i) - LAMBDA(i)*Z(:,i))||_2. */ +/* If [LAMBDA(i), LAMBDA(i+1)] is a complex conjugate pair */ +/* then */ +/* RES(i)=RES(i+1) = || A * Z(:,i:i+1) - Z(:,i:i+1) *B||_F */ +/* where B = [ real(LAMBDA(i)) imag(LAMBDA(i)) ] */ +/* [-imag(LAMBDA(i)) real(LAMBDA(i)) ]. */ +/* It holds that */ +/* RES(i) = || A*ZC(:,i) - LAMBDA(i) *ZC(:,i) ||_2 */ +/* RES(i+1) = || A*ZC(:,i+1) - LAMBDA(i+1)*ZC(:,i+1) ||_2 */ +/* where ZC(:,i) = Z(:,i) + sqrt(-1)*Z(:,i+1) */ +/* ZC(:,i+1) = Z(:,i) - sqrt(-1)*Z(:,i+1) */ +/* See the description of REIG, IMEIG and Z. */ +/* ..... */ +/* B (output) REAL(KIND=WP) M-by-N array. */ +/* IF JOBF =='R', B(1:M,1:K) contains A*U(:,1:K), and can */ +/* be used for computing the refined vectors; see further */ +/* details in the provided references. */ +/* If JOBF == 'E', B(1:M,1;K) contains */ +/* A*U(:,1:K)*W(1:K,1:K), which are the vectors from the */ +/* Exact DMD, up to scaling by the inverse eigenvalues. */ +/* If JOBF =='N', then B is not referenced. */ +/* See the descriptions of X, W, K. */ +/* ..... */ +/* LDB (input) INTEGER, LDB >= M */ +/* The leading dimension of the array B. */ +/* ..... */ +/* W (workspace/output) REAL(KIND=WP) N-by-N array */ +/* On exit, W(1:K,1:K) contains the K computed */ +/* eigenvectors of the matrix Rayleigh quotient (real and */ +/* imaginary parts for each complex conjugate pair of the */ +/* eigenvalues). The Ritz vectors (returned in Z) are the */ +/* product of X (containing a POD basis for the input */ +/* matrix X) and W. See the descriptions of K, S, X and Z. */ +/* W is also used as a workspace to temporarily store the */ +/* left singular vectors of X. */ +/* ..... */ +/* LDW (input) INTEGER, LDW >= N */ +/* The leading dimension of the array W. */ +/* ..... */ +/* S (workspace/output) REAL(KIND=WP) N-by-N array */ +/* The array S(1:K,1:K) is used for the matrix Rayleigh */ +/* quotient. This content is overwritten during */ +/* the eigenvalue decomposition by SGEEV. */ +/* See the description of K. */ +/* ..... */ +/* LDS (input) INTEGER, LDS >= N */ +/* The leading dimension of the array S. */ +/* ..... */ +/* WORK (workspace/output) REAL(KIND=WP) LWORK-by-1 array */ +/* On exit, WORK(1:N) contains the singular values of */ +/* X (for JOBS=='N') or column scaled X (JOBS=='S', 'C'). */ +/* If WHTSVD==4, then WORK(N+1) and WORK(N+2) contain */ +/* scaling factor WORK(N+2)/WORK(N+1) used to scale X */ +/* and Y to avoid overflow in the SVD of X. */ +/* This may be of interest if the scaling option is off */ +/* and as many as possible smallest eigenvalues are */ +/* desired to the highest feasible accuracy. */ +/* If the call to SGEDMD is only workspace query, then */ +/* WORK(1) contains the minimal workspace length and */ +/* WORK(2) is the optimal workspace length. Hence, the */ +/* length of work is at least 2. */ +/* See the description of LWORK. */ +/* ..... */ +/* LWORK (input) INTEGER */ +/* The minimal length of the workspace vector WORK. */ +/* LWORK is calculated as follows: */ +/* If WHTSVD == 1 :: */ +/* If JOBZ == 'V', then */ +/* LWORK >= MAX(2, N + LWORK_SVD, N+MAX(1,4*N)). */ +/* If JOBZ == 'N' then */ +/* LWORK >= MAX(2, N + LWORK_SVD, N+MAX(1,3*N)). */ +/* Here LWORK_SVD = MAX(1,3*N+M,5*N) is the minimal */ +/* workspace length of SGESVD. */ +/* If WHTSVD == 2 :: */ +/* If JOBZ == 'V', then */ +/* LWORK >= MAX(2, N + LWORK_SVD, N+MAX(1,4*N)) */ +/* If JOBZ == 'N', then */ +/* LWORK >= MAX(2, N + LWORK_SVD, N+MAX(1,3*N)) */ +/* Here LWORK_SVD = MAX(M, 5*N*N+4*N)+3*N*N is the */ +/* minimal workspace length of SGESDD. */ +/* If WHTSVD == 3 :: */ +/* If JOBZ == 'V', then */ +/* LWORK >= MAX(2, N+LWORK_SVD,N+MAX(1,4*N)) */ +/* If JOBZ == 'N', then */ +/* LWORK >= MAX(2, N+LWORK_SVD,N+MAX(1,3*N)) */ +/* Here LWORK_SVD = N+M+MAX(3*N+1, */ +/* MAX(1,3*N+M,5*N),MAX(1,N)) */ +/* is the minimal workspace length of SGESVDQ. */ +/* If WHTSVD == 4 :: */ +/* If JOBZ == 'V', then */ +/* LWORK >= MAX(2, N+LWORK_SVD,N+MAX(1,4*N)) */ +/* If JOBZ == 'N', then */ +/* LWORK >= MAX(2, N+LWORK_SVD,N+MAX(1,3*N)) */ +/* Here LWORK_SVD = MAX(7,2*M+N,6*N+2*N*N) is the */ +/* minimal workspace length of SGEJSV. */ +/* The above expressions are not simplified in order to */ +/* make the usage of WORK more transparent, and for */ +/* easier checking. In any case, LWORK >= 2. */ +/* If on entry LWORK = -1, then a workspace query is */ +/* assumed and the procedure only computes the minimal */ +/* and the optimal workspace lengths for both WORK and */ +/* IWORK. See the descriptions of WORK and IWORK. */ +/* ..... */ +/* IWORK (workspace/output) INTEGER LIWORK-by-1 array */ +/* Workspace that is required only if WHTSVD equals */ +/* 2 , 3 or 4. (See the description of WHTSVD). */ +/* If on entry LWORK =-1 or LIWORK=-1, then the */ +/* minimal length of IWORK is computed and returned in */ +/* IWORK(1). See the description of LIWORK. */ +/* ..... */ +/* LIWORK (input) INTEGER */ +/* The minimal length of the workspace vector IWORK. */ +/* If WHTSVD == 1, then only IWORK(1) is used; LIWORK >=1 */ +/* If WHTSVD == 2, then LIWORK >= MAX(1,8*MIN(M,N)) */ +/* If WHTSVD == 3, then LIWORK >= MAX(1,M+N-1) */ +/* If WHTSVD == 4, then LIWORK >= MAX(3,M+3*N) */ +/* If on entry LIWORK = -1, then a workspace query is */ +/* assumed and the procedure only computes the minimal */ +/* and the optimal workspace lengths for both WORK and */ +/* IWORK. See the descriptions of WORK and IWORK. */ +/* ..... */ +/* INFO (output) INTEGER */ +/* -i < 0 :: On entry, the i-th argument had an */ +/* illegal value */ +/* = 0 :: Successful return. */ +/* = 1 :: Void input. Quick exit (M=0 or N=0). */ +/* = 2 :: The SVD computation of X did not converge. */ +/* Suggestion: Check the input data and/or */ +/* repeat with different WHTSVD. */ +/* = 3 :: The computation of the eigenvalues did not */ +/* converge. */ +/* = 4 :: If data scaling was requested on input and */ +/* the procedure found inconsistency in the data */ +/* such that for some column index i, */ +/* X(:,i) = 0 but Y(:,i) /= 0, then Y(:,i) is set */ +/* to zero if JOBS=='C'. The computation proceeds */ +/* with original or modified data and warning */ +/* flag is set with INFO=4. */ +/* ............................................................. */ +/* ............................................................. */ +/* Parameters */ +/* ~~~~~~~~~~ */ +/* Local scalars */ +/* ~~~~~~~~~~~~~ */ +/* Local arrays */ +/* ~~~~~~~~~~~~ */ +/* External functions (BLAS and LAPACK) */ +/* ~~~~~~~~~~~~~~~~~ */ +/* External subroutines (BLAS and LAPACK) */ +/* ~~~~~~~~~~~~~~~~~~~~ */ +/* Intrinsic functions */ +/* ~~~~~~~~~~~~~~~~~~~ */ +/* ............................................................ */ + /* Parameter adjustments */ + x_dim1 = *ldx; + x_offset = 1 + x_dim1 * 1; + x -= x_offset; + y_dim1 = *ldy; + y_offset = 1 + y_dim1 * 1; + y -= y_offset; + --reig; + --imeig; + z_dim1 = *ldz; + z_offset = 1 + z_dim1 * 1; + z__ -= z_offset; + --res; + b_dim1 = *ldb; + b_offset = 1 + b_dim1 * 1; + b -= b_offset; + w_dim1 = *ldw; + w_offset = 1 + w_dim1 * 1; + w -= w_offset; + s_dim1 = *lds; + s_offset = 1 + s_dim1 * 1; + s -= s_offset; + --work; + --iwork; + + /* Function Body */ + one = 1.f; + zero = 0.f; + +/* Test the input arguments */ + + wntres = lsame_(jobr, "R"); + sccolx = lsame_(jobs, "S") || lsame_(jobs, "C"); + sccoly = lsame_(jobs, "Y"); + wntvec = lsame_(jobz, "V"); + wntref = lsame_(jobf, "R"); + wntex = lsame_(jobf, "E"); + *info = 0; + lquery = *lwork == -1 || *liwork == -1; + + if (! (sccolx || sccoly || lsame_(jobs, "N"))) { + *info = -1; + } else if (! (wntvec || lsame_(jobz, "N") || lsame_( + jobz, "F"))) { + *info = -2; + } else if (! (wntres || lsame_(jobr, "N")) || + wntres && ! wntvec) { + *info = -3; + } else if (! (wntref || wntex || lsame_(jobf, "N"))) + { + *info = -4; + } else if (! (*whtsvd == 1 || *whtsvd == 2 || *whtsvd == 3 || *whtsvd == + 4)) { + *info = -5; + } else if (*m < 0) { + *info = -6; + } else if (*n < 0 || *n > *m) { + *info = -7; + } else if (*ldx < *m) { + *info = -9; + } else if (*ldy < *m) { + *info = -11; + } else if (! (*nrnk == -2 || *nrnk == -1 || *nrnk >= 1 && *nrnk <= *n)) { + *info = -12; + } else if (*tol < zero || *tol >= one) { + *info = -13; + } else if (*ldz < *m) { + *info = -18; + } else if ((wntref || wntex) && *ldb < *m) { + *info = -21; + } else if (*ldw < *n) { + *info = -23; + } else if (*lds < *n) { + *info = -25; + } + + if (*info == 0) { +/* Compute the minimal and the optimal workspace */ +/* requirements. Simulate running the code and */ +/* determine minimal and optimal sizes of the */ +/* workspace at any moment of the run. */ + if (*n == 0) { +/* Quick return. All output except K is void. */ +/* INFO=1 signals the void input. */ +/* In case of a workspace query, the default */ +/* minimal workspace lengths are returned. */ + if (lquery) { + iwork[1] = 1; + work[1] = 2.f; + work[2] = 2.f; + } else { + *k = 0; + } + *info = 1; + return 0; + } + mlwork = f2cmax(2,*n); + olwork = f2cmax(2,*n); + iminwr = 1; +/* SELECT CASE ( WHTSVD ) */ + if (*whtsvd == 1) { +/* The following is specified as the minimal */ +/* length of WORK in the definition of SGESVD: */ +/* MWRSVD = MAX(1,3*MIN(M,N)+MAX(M,N),5*MIN(M,N)) */ +/* Computing MAX */ + i__1 = 1, i__2 = f2cmin(*m,*n) * 3 + f2cmax(*m,*n), i__1 = f2cmax(i__1, + i__2), i__2 = f2cmin(*m,*n) * 5; + mwrsvd = f2cmax(i__1,i__2); +/* Computing MAX */ + i__1 = mlwork, i__2 = *n + mwrsvd; + mlwork = f2cmax(i__1,i__2); + if (lquery) { + sgesvd_("O", "S", m, n, &x[x_offset], ldx, &work[1], &b[ + b_offset], ldb, &w[w_offset], ldw, rdummy, &c_n1, & + info1); +/* Computing MAX */ + i__1 = mwrsvd, i__2 = (integer) rdummy[0]; + lwrsvd = f2cmax(i__1,i__2); +/* Computing MAX */ + i__1 = olwork, i__2 = *n + lwrsvd; + olwork = f2cmax(i__1,i__2); + } + } else if (*whtsvd == 2) { +/* The following is specified as the minimal */ +/* length of WORK in the definition of SGESDD: */ +/* MWRSDD = 3*MIN(M,N)*MIN(M,N) + */ +/* MAX( MAX(M,N),5*MIN(M,N)*MIN(M,N)+4*MIN(M,N) ) */ +/* IMINWR = 8*MIN(M,N) */ +/* Computing MAX */ + i__1 = f2cmax(*m,*n), i__2 = f2cmin(*m,*n) * 5 * f2cmin(*m,*n) + (f2cmin(*m,* + n) << 2); + mwrsdd = f2cmin(*m,*n) * 3 * f2cmin(*m,*n) + f2cmax(i__1,i__2); +/* Computing MAX */ + i__1 = mlwork, i__2 = *n + mwrsdd; + mlwork = f2cmax(i__1,i__2); + iminwr = f2cmin(*m,*n) << 3; + if (lquery) { + sgesdd_("O", m, n, &x[x_offset], ldx, &work[1], &b[b_offset], + ldb, &w[w_offset], ldw, rdummy, &c_n1, &iwork[1], & + info1); +/* Computing MAX */ + i__1 = mwrsdd, i__2 = (integer) rdummy[0]; + lwrsdd = f2cmax(i__1,i__2); +/* Computing MAX */ + i__1 = olwork, i__2 = *n + lwrsdd; + olwork = f2cmax(i__1,i__2); + } + } else if (*whtsvd == 3) { +/* LWQP3 = 3*N+1 */ +/* LWORQ = MAX(N, 1) */ +/* MWRSVD = MAX(1,3*MIN(M,N)+MAX(M,N),5*MIN(M,N)) */ +/* MWRSVQ = N + MAX( LWQP3, MWRSVD, LWORQ )+ MAX(M,2) */ +/* MLWORK = N + MWRSVQ */ +/* IMINWR = M+N-1 */ + sgesvdq_("H", "P", "N", "R", "R", m, n, &x[x_offset], ldx, &work[ + 1], &z__[z_offset], ldz, &w[w_offset], ldw, &numrnk, & + iwork[1], &c_n1, rdummy, &c_n1, rdummy2, &c_n1, &info1); + iminwr = iwork[1]; + mwrsvq = (integer) rdummy[1]; +/* Computing MAX */ + i__1 = mlwork, i__2 = *n + mwrsvq + (integer) rdummy2[0]; + mlwork = f2cmax(i__1,i__2); + if (lquery) { + lwrsvq = (integer) rdummy[0]; +/* Computing MAX */ + i__1 = olwork, i__2 = *n + lwrsvq + (integer) rdummy2[0]; + olwork = f2cmax(i__1,i__2); + } + } else if (*whtsvd == 4) { + *(unsigned char *)jsvopt = 'J'; +/* MWRSVJ = MAX( 7, 2*M+N, 6*N+2*N*N )! for JSVOPT='V' */ +/* Computing MAX */ + i__1 = 7, i__2 = (*m << 1) + *n, i__1 = f2cmax(i__1,i__2), i__2 = (* + n << 2) + *n * *n, i__1 = f2cmax(i__1,i__2), i__2 = (*n << 1) + + *n * *n + 6; + mwrsvj = f2cmax(i__1,i__2); +/* Computing MAX */ + i__1 = mlwork, i__2 = *n + mwrsvj; + mlwork = f2cmax(i__1,i__2); +/* Computing MAX */ + i__1 = 3, i__2 = *m + *n * 3; + iminwr = f2cmax(i__1,i__2); + if (lquery) { +/* Computing MAX */ + i__1 = olwork, i__2 = *n + mwrsvj; + olwork = f2cmax(i__1,i__2); + } + } +/* END SELECT */ + if (wntvec || wntex || lsame_(jobz, "F")) { + *(unsigned char *)jobzl = 'V'; + } else { + *(unsigned char *)jobzl = 'N'; + } +/* Workspace calculation to the SGEEV call */ + if (lsame_(jobzl, "V")) { +/* Computing MAX */ + i__1 = 1, i__2 = *n << 2; + mwrkev = f2cmax(i__1,i__2); + } else { +/* Computing MAX */ + i__1 = 1, i__2 = *n * 3; + mwrkev = f2cmax(i__1,i__2); + } +/* Computing MAX */ + i__1 = mlwork, i__2 = *n + mwrkev; + mlwork = f2cmax(i__1,i__2); + if (lquery) { + sgeev_("N", jobzl, n, &s[s_offset], lds, &reig[1], &imeig[1], &w[ + w_offset], ldw, &w[w_offset], ldw, rdummy, &c_n1, &info1); +/* Computing MAX */ + i__1 = mwrkev, i__2 = (integer) rdummy[0]; + lwrkev = f2cmax(i__1,i__2); +/* Computing MAX */ + i__1 = olwork, i__2 = *n + lwrkev; + olwork = f2cmax(i__1,i__2); + } + + if (*liwork < iminwr && ! lquery) { + *info = -29; + } + if (*lwork < mlwork && ! lquery) { + *info = -27; + } + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("SGEDMD", &i__1); + return 0; + } else if (lquery) { +/* Return minimal and optimal workspace sizes */ + iwork[1] = iminwr; + work[1] = (real) mlwork; + work[2] = (real) olwork; + return 0; + } +/* ............................................................ */ + + ofl = slamch_("O"); + small = slamch_("S"); + badxy = FALSE_; + +/* <1> Optional scaling of the snapshots (columns of X, Y) */ +/* ========================================================== */ + if (sccolx) { +/* The columns of X will be normalized. */ +/* To prevent overflows, the column norms of X are */ +/* carefully computed using SLASSQ. */ + *k = 0; + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { +/* WORK(i) = DNRM2( M, X(1,i), 1 ) */ + scale = zero; + slassq_(m, &x[i__ * x_dim1 + 1], &c__1, &scale, &ssum); + if (sisnan_(&scale) || sisnan_(&ssum)) { + *k = 0; + *info = -8; + i__2 = -(*info); + xerbla_("SGEDMD", &i__2); + } + if (scale != zero && ssum != zero) { + rootsc = sqrt(ssum); + if (scale >= ofl / rootsc) { +/* Norm of X(:,i) overflows. First, X(:,i) */ +/* is scaled by */ +/* ( ONE / ROOTSC ) / SCALE = 1/||X(:,i)||_2. */ +/* Next, the norm of X(:,i) is stored without */ +/* overflow as WORK(i) = - SCALE * (ROOTSC/M), */ +/* the minus sign indicating the 1/M factor. */ +/* Scaling is performed without overflow, and */ +/* underflow may occur in the smallest entries */ +/* of X(:,i). The relative backward and forward */ +/* errors are small in the ell_2 norm. */ + r__1 = one / rootsc; + slascl_("G", &c__0, &c__0, &scale, &r__1, m, &c__1, &x[ + i__ * x_dim1 + 1], m, &info2); + work[i__] = -scale * (rootsc / (real) (*m)); + } else { +/* X(:,i) will be scaled to unit 2-norm */ + work[i__] = scale * rootsc; + slascl_("G", &c__0, &c__0, &work[i__], &one, m, &c__1, &x[ + i__ * x_dim1 + 1], m, &info2); +/* X(1:M,i) = (ONE/WORK(i)) * X(1:M,i) ! INTRINSIC */ +/* LAPACK */ + } + } else { + work[i__] = zero; + ++(*k); + } + } + if (*k == *n) { +/* All columns of X are zero. Return error code -8. */ +/* (the 8th input variable had an illegal value) */ + *k = 0; + *info = -8; + i__1 = -(*info); + xerbla_("SGEDMD", &i__1); + return 0; + } + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { +/* Now, apply the same scaling to the columns of Y. */ + if (work[i__] > zero) { + r__1 = one / work[i__]; + sscal_(m, &r__1, &y[i__ * y_dim1 + 1], &c__1); +/* Y(1:M,i) = (ONE/WORK(i)) * Y(1:M,i) ! INTRINSIC */ +/* BLAS CALL */ + } else if (work[i__] < zero) { + r__1 = -work[i__]; + r__2 = one / (real) (*m); + slascl_("G", &c__0, &c__0, &r__1, &r__2, m, &c__1, &y[i__ * + y_dim1 + 1], m, &info2); +/* LAPACK CA */ + } else if (y[isamax_(m, &y[i__ * y_dim1 + 1], &c__1) + i__ * + y_dim1] != zero) { +/* X(:,i) is zero vector. For consistency, */ +/* Y(:,i) should also be zero. If Y(:,i) is not */ +/* zero, then the data might be inconsistent or */ +/* corrupted. If JOBS == 'C', Y(:,i) is set to */ +/* zero and a warning flag is raised. */ +/* The computation continues but the */ +/* situation will be reported in the output. */ + badxy = TRUE_; + if (lsame_(jobs, "C")) { + sscal_(m, &zero, &y[i__ * y_dim1 + 1], &c__1); + } +/* BLAS CALL */ + } + } + } + + if (sccoly) { +/* The columns of Y will be normalized. */ +/* To prevent overflows, the column norms of Y are */ +/* carefully computed using SLASSQ. */ + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { +/* WORK(i) = DNRM2( M, Y(1,i), 1 ) */ + scale = zero; + slassq_(m, &y[i__ * y_dim1 + 1], &c__1, &scale, &ssum); + if (sisnan_(&scale) || sisnan_(&ssum)) { + *k = 0; + *info = -10; + i__2 = -(*info); + xerbla_("SGEDMD", &i__2); + } + if (scale != zero && ssum != zero) { + rootsc = sqrt(ssum); + if (scale >= ofl / rootsc) { +/* Norm of Y(:,i) overflows. First, Y(:,i) */ +/* is scaled by */ +/* ( ONE / ROOTSC ) / SCALE = 1/||Y(:,i)||_2. */ +/* Next, the norm of Y(:,i) is stored without */ +/* overflow as WORK(i) = - SCALE * (ROOTSC/M), */ +/* the minus sign indicating the 1/M factor. */ +/* Scaling is performed without overflow, and */ +/* underflow may occur in the smallest entries */ +/* of Y(:,i). The relative backward and forward */ +/* errors are small in the ell_2 norm. */ + r__1 = one / rootsc; + slascl_("G", &c__0, &c__0, &scale, &r__1, m, &c__1, &y[ + i__ * y_dim1 + 1], m, &info2); + work[i__] = -scale * (rootsc / (real) (*m)); + } else { +/* X(:,i) will be scaled to unit 2-norm */ + work[i__] = scale * rootsc; + slascl_("G", &c__0, &c__0, &work[i__], &one, m, &c__1, &y[ + i__ * y_dim1 + 1], m, &info2); +/* Y(1:M,i) = (ONE/WORK(i)) * Y(1:M,i) ! INTRINSIC */ +/* LAPACK */ + } + } else { + work[i__] = zero; + } + } + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { +/* Now, apply the same scaling to the columns of X. */ + if (work[i__] > zero) { + r__1 = one / work[i__]; + sscal_(m, &r__1, &x[i__ * x_dim1 + 1], &c__1); +/* X(1:M,i) = (ONE/WORK(i)) * X(1:M,i) ! INTRINSIC */ +/* BLAS CALL */ + } else if (work[i__] < zero) { + r__1 = -work[i__]; + r__2 = one / (real) (*m); + slascl_("G", &c__0, &c__0, &r__1, &r__2, m, &c__1, &x[i__ * + x_dim1 + 1], m, &info2); +/* LAPACK CA */ + } else if (x[isamax_(m, &x[i__ * x_dim1 + 1], &c__1) + i__ * + x_dim1] != zero) { +/* Y(:,i) is zero vector. If X(:,i) is not */ +/* zero, then a warning flag is raised. */ +/* The computation continues but the */ +/* situation will be reported in the output. */ + badxy = TRUE_; + } + } + } + +/* <2> SVD of the data snapshot matrix X. */ +/* ===================================== */ +/* The left singular vectors are stored in the array X. */ +/* The right singular vectors are in the array W. */ +/* The array W will later on contain the eigenvectors */ +/* of a Rayleigh quotient. */ + numrnk = *n; +/* SELECT CASE ( WHTSVD ) */ + if (*whtsvd == 1) { + i__1 = *lwork - *n; + sgesvd_("O", "S", m, n, &x[x_offset], ldx, &work[1], &b[b_offset], + ldb, &w[w_offset], ldw, &work[*n + 1], &i__1, &info1); +/* LAPACK CAL */ + *(unsigned char *)t_or_n__ = 'T'; + } else if (*whtsvd == 2) { + i__1 = *lwork - *n; + sgesdd_("O", m, n, &x[x_offset], ldx, &work[1], &b[b_offset], ldb, &w[ + w_offset], ldw, &work[*n + 1], &i__1, &iwork[1], &info1); +/* LAPACK CAL */ + *(unsigned char *)t_or_n__ = 'T'; + } else if (*whtsvd == 3) { + i__1 = *lwork - *n - f2cmax(2,*m); + i__2 = f2cmax(2,*m); + sgesvdq_("H", "P", "N", "R", "R", m, n, &x[x_offset], ldx, &work[1], & + z__[z_offset], ldz, &w[w_offset], ldw, &numrnk, &iwork[1], + liwork, &work[*n + f2cmax(2,*m) + 1], &i__1, &work[*n + 1], & + i__2, &info1); + + slacpy_("A", m, &numrnk, &z__[z_offset], ldz, &x[x_offset], ldx); +/* LAPACK C */ + *(unsigned char *)t_or_n__ = 'T'; + } else if (*whtsvd == 4) { + i__1 = *lwork - *n; + sgejsv_("F", "U", jsvopt, "N", "N", "P", m, n, &x[x_offset], ldx, & + work[1], &z__[z_offset], ldz, &w[w_offset], ldw, &work[*n + 1] + , &i__1, &iwork[1], &info1); +/* LAPACK CALL */ + slacpy_("A", m, n, &z__[z_offset], ldz, &x[x_offset], ldx); +/* LAPACK CALL */ + *(unsigned char *)t_or_n__ = 'N'; + xscl1 = work[*n + 1]; + xscl2 = work[*n + 2]; + if (xscl1 != xscl2) { +/* This is an exceptional situation. If the */ +/* data matrices are not scaled and the */ +/* largest singular value of X overflows. */ +/* In that case SGEJSV can return the SVD */ +/* in scaled form. The scaling factor can be used */ +/* to rescale the data (X and Y). */ + slascl_("G", &c__0, &c__0, &xscl1, &xscl2, m, n, &y[y_offset], + ldy, &info2); + } +/* END SELECT */ + } + + if (info1 > 0) { +/* The SVD selected subroutine did not converge. */ +/* Return with an error code. */ + *info = 2; + return 0; + } + + if (work[1] == zero) { +/* The largest computed singular value of (scaled) */ +/* X is zero. Return error code -8 */ +/* (the 8th input variable had an illegal value). */ + *k = 0; + *info = -8; + i__1 = -(*info); + xerbla_("SGEDMD", &i__1); + return 0; + } + +/* <3> Determine the numerical rank of the data */ +/* snapshots matrix X. This depends on the */ +/* parameters NRNK and TOL. */ +/* SELECT CASE ( NRNK ) */ + if (*nrnk == -1) { + *k = 1; + i__1 = numrnk; + for (i__ = 2; i__ <= i__1; ++i__) { + if (work[i__] <= work[1] * *tol || work[i__] <= small) { + myexit_(); + } + ++(*k); + } + } else if (*nrnk == -2) { + *k = 1; + i__1 = numrnk - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + if (work[i__ + 1] <= work[i__] * *tol || work[i__] <= small) { + myexit_(); + } + ++(*k); + } + } else { + *k = 1; + i__1 = *nrnk; + for (i__ = 2; i__ <= i__1; ++i__) { + if (work[i__] <= small) { + myexit_(); + } + ++(*k); + } +/* END SELECT */ + } +/* Now, U = X(1:M,1:K) is the SVD/POD basis for the */ +/* snapshot data in the input matrix X. */ +/* <4> Compute the Rayleigh quotient S = U^T * A * U. */ +/* Depending on the requested outputs, the computation */ +/* is organized to compute additional auxiliary */ +/* matrices (for the residuals and refinements). */ + +/* In all formulas below, we need V_k*Sigma_k^(-1) */ +/* where either V_k is in W(1:N,1:K), or V_k^T is in */ +/* W(1:K,1:N). Here Sigma_k=diag(WORK(1:K)). */ + if (lsame_(t_or_n__, "N")) { + i__1 = *k; + for (i__ = 1; i__ <= i__1; ++i__) { + r__1 = one / work[i__]; + sscal_(n, &r__1, &w[i__ * w_dim1 + 1], &c__1); +/* W(1:N,i) = (ONE/WORK(i)) * W(1:N,i) ! INTRINSIC */ +/* BLAS CALL */ + } + } else { +/* This non-unit stride access is due to the fact */ +/* that SGESVD, SGESVDQ and SGESDD return the */ +/* transposed matrix of the right singular vectors. */ +/* DO i = 1, K */ +/* CALL SSCAL( N, ONE/WORK(i), W(i,1), LDW ) ! BLAS CALL */ +/* ! W(i,1:N) = (ONE/WORK(i)) * W(i,1:N) ! INTRINSIC */ +/* END DO */ + i__1 = *k; + for (i__ = 1; i__ <= i__1; ++i__) { + work[*n + i__] = one / work[i__]; + } + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *k; + for (i__ = 1; i__ <= i__2; ++i__) { + w[i__ + j * w_dim1] = work[*n + i__] * w[i__ + j * w_dim1]; + } + } + } + + if (wntref) { + +/* Need A*U(:,1:K)=Y*V_k*inv(diag(WORK(1:K))) */ +/* for computing the refined Ritz vectors */ +/* (optionally, outside SGEDMD). */ + sgemm_("N", t_or_n__, m, k, n, &one, &y[y_offset], ldy, &w[w_offset], + ldw, &zero, &z__[z_offset], ldz); +/* Z(1:M,1:K)=MATMUL(Y(1:M,1:N),TRANSPOSE(W(1:K,1:N))) ! INTRI */ +/* Z(1:M,1:K)=MATMUL(Y(1:M,1:N),W(1:N,1:K)) ! INTRI */ + +/* At this point Z contains */ +/* A * U(:,1:K) = Y * V_k * Sigma_k^(-1), and */ +/* this is needed for computing the residuals. */ +/* This matrix is returned in the array B and */ +/* it can be used to compute refined Ritz vectors. */ +/* BLAS */ + slacpy_("A", m, k, &z__[z_offset], ldz, &b[b_offset], ldb); +/* B(1:M,1:K) = Z(1:M,1:K) ! INTRINSIC */ +/* BLAS CALL */ + sgemm_("T", "N", k, k, m, &one, &x[x_offset], ldx, &z__[z_offset], + ldz, &zero, &s[s_offset], lds); +/* S(1:K,1:K) = MATMUL(TANSPOSE(X(1:M,1:K)),Z(1:M,1:K)) ! INTRI */ +/* At this point S = U^T * A * U is the Rayleigh quotient. */ +/* BLAS */ + } else { +/* A * U(:,1:K) is not explicitly needed and the */ +/* computation is organized differently. The Rayleigh */ +/* quotient is computed more efficiently. */ + sgemm_("T", "N", k, n, m, &one, &x[x_offset], ldx, &y[y_offset], ldy, + &zero, &z__[z_offset], ldz); +/* Z(1:K,1:N) = MATMUL( TRANSPOSE(X(1:M,1:K)), Y(1:M,1:N) ) ! IN */ +/* In the two SGEMM calls here, can use K for LDZ */ +/* B */ + sgemm_("N", t_or_n__, k, k, n, &one, &z__[z_offset], ldz, &w[w_offset] + , ldw, &zero, &s[s_offset], lds); +/* S(1:K,1:K) = MATMUL(Z(1:K,1:N),TRANSPOSE(W(1:K,1:N))) ! INTRIN */ +/* S(1:K,1:K) = MATMUL(Z(1:K,1:N),(W(1:N,1:K))) ! INTRIN */ +/* At this point S = U^T * A * U is the Rayleigh quotient. */ +/* If the residuals are requested, save scaled V_k into Z. */ +/* Recall that V_k or V_k^T is stored in W. */ +/* BLAS */ + if (wntres || wntex) { + if (lsame_(t_or_n__, "N")) { + slacpy_("A", n, k, &w[w_offset], ldw, &z__[z_offset], ldz); + } else { + slacpy_("A", k, n, &w[w_offset], ldw, &z__[z_offset], ldz); + } + } + } + +/* <5> Compute the Ritz values and (if requested) the */ +/* right eigenvectors of the Rayleigh quotient. */ + + i__1 = *lwork - *n; + sgeev_("N", jobzl, k, &s[s_offset], lds, &reig[1], &imeig[1], &w[w_offset] + , ldw, &w[w_offset], ldw, &work[*n + 1], &i__1, &info1); + +/* W(1:K,1:K) contains the eigenvectors of the Rayleigh */ +/* quotient. Even in the case of complex spectrum, all */ +/* computation is done in real arithmetic. REIG and */ +/* IMEIG are the real and the imaginary parts of the */ +/* eigenvalues, so that the spectrum is given as */ +/* REIG(:) + sqrt(-1)*IMEIG(:). Complex conjugate pairs */ +/* are listed at consecutive positions. For such a */ +/* complex conjugate pair of the eigenvalues, the */ +/* corresponding eigenvectors are also a complex */ +/* conjugate pair with the real and imaginary parts */ +/* stored column-wise in W at the corresponding */ +/* consecutive column indices. See the description of Z. */ +/* Also, see the description of SGEEV. */ +/* LAPACK C */ + if (info1 > 0) { +/* SGEEV failed to compute the eigenvalues and */ +/* eigenvectors of the Rayleigh quotient. */ + *info = 3; + return 0; + } + +/* <6> Compute the eigenvectors (if requested) and, */ +/* the residuals (if requested). */ + + if (wntvec || wntex) { + if (wntres) { + if (wntref) { +/* Here, if the refinement is requested, we have */ +/* A*U(:,1:K) already computed and stored in Z. */ +/* For the residuals, need Y = A * U(:,1;K) * W. */ + sgemm_("N", "N", m, k, k, &one, &z__[z_offset], ldz, &w[ + w_offset], ldw, &zero, &y[y_offset], ldy); +/* Y(1:M,1:K) = Z(1:M,1:K) * W(1:K,1:K) ! INTRINSIC */ +/* This frees Z; Y contains A * U(:,1:K) * W. */ +/* BLAS CALL */ + } else { +/* Compute S = V_k * Sigma_k^(-1) * W, where */ +/* V_k * Sigma_k^(-1) is stored in Z */ + sgemm_(t_or_n__, "N", n, k, k, &one, &z__[z_offset], ldz, &w[ + w_offset], ldw, &zero, &s[s_offset], lds); +/* Then, compute Z = Y * S = */ +/* = Y * V_k * Sigma_k^(-1) * W(1:K,1:K) = */ +/* = A * U(:,1:K) * W(1:K,1:K) */ + sgemm_("N", "N", m, k, n, &one, &y[y_offset], ldy, &s[ + s_offset], lds, &zero, &z__[z_offset], ldz); +/* Save a copy of Z into Y and free Z for holding */ +/* the Ritz vectors. */ + slacpy_("A", m, k, &z__[z_offset], ldz, &y[y_offset], ldy); + if (wntex) { + slacpy_("A", m, k, &z__[z_offset], ldz, &b[b_offset], ldb); + } + } + } else if (wntex) { +/* Compute S = V_k * Sigma_k^(-1) * W, where */ +/* V_k * Sigma_k^(-1) is stored in Z */ + sgemm_(t_or_n__, "N", n, k, k, &one, &z__[z_offset], ldz, &w[ + w_offset], ldw, &zero, &s[s_offset], lds); +/* Then, compute Z = Y * S = */ +/* = Y * V_k * Sigma_k^(-1) * W(1:K,1:K) = */ +/* = A * U(:,1:K) * W(1:K,1:K) */ + sgemm_("N", "N", m, k, n, &one, &y[y_offset], ldy, &s[s_offset], + lds, &zero, &b[b_offset], ldb); +/* The above call replaces the following two calls */ +/* that were used in the developing-testing phase. */ +/* CALL SGEMM( 'N', 'N', M, K, N, ONE, Y, LDY, S, & */ +/* LDS, ZERO, Z, LDZ) */ +/* Save a copy of Z into B and free Z for holding */ +/* the Ritz vectors. */ +/* CALL SLACPY( 'A', M, K, Z, LDZ, B, LDB ) */ + } + +/* Compute the real form of the Ritz vectors */ + if (wntvec) { + sgemm_("N", "N", m, k, k, &one, &x[x_offset], ldx, &w[w_offset], + ldw, &zero, &z__[z_offset], ldz); + } +/* Z(1:M,1:K) = MATMUL(X(1:M,1:K), W(1:K,1:K)) ! INTRINSIC */ + +/* BLAS CALL */ + if (wntres) { + i__ = 1; + while(i__ <= *k) { + if (imeig[i__] == zero) { +/* have a real eigenvalue with real eigenvector */ + r__1 = -reig[i__]; + saxpy_(m, &r__1, &z__[i__ * z_dim1 + 1], &c__1, &y[i__ * + y_dim1 + 1], &c__1); +/* Y(1:M,i) = Y(1:M,i) - REIG(i) * Z(1:M,i) ! */ + + res[i__] = snrm2_(m, &y[i__ * y_dim1 + 1], &c__1); + ++i__; + } else { +/* Have a complex conjugate pair */ +/* REIG(i) +- sqrt(-1)*IMEIG(i). */ +/* Since all computation is done in real */ +/* arithmetic, the formula for the residual */ +/* is recast for real representation of the */ +/* complex conjugate eigenpair. See the */ +/* description of RES. */ + ab[0] = reig[i__]; + ab[1] = -imeig[i__]; + ab[2] = imeig[i__]; + ab[3] = reig[i__]; + r__1 = -one; + sgemm_("N", "N", m, &c__2, &c__2, &r__1, &z__[i__ * + z_dim1 + 1], ldz, ab, &c__2, &one, &y[i__ * + y_dim1 + 1], ldy); +/* Y(1:M,i:i+1) = Y(1:M,i:i+1) - Z(1:M,i:i+1) * AB ! INT */ +/* BL */ + res[i__] = slange_("F", m, &c__2, &y[i__ * y_dim1 + 1], + ldy, &work[*n + 1]); +/* LA */ + res[i__ + 1] = res[i__]; + i__ += 2; + } + } + } + } + + if (*whtsvd == 4) { + work[*n + 1] = xscl1; + work[*n + 2] = xscl2; + } + +/* Successful exit. */ + if (! badxy) { + *info = 0; + } else { +/* A warning on possible data inconsistency. */ +/* This should be a rare event. */ + *info = 4; + } +/* ............................................................ */ + return 0; +/* ...... */ +} /* sgedmd_ */ + diff --git a/lapack-netlib/SRC/sgedmdq.c b/lapack-netlib/SRC/sgedmdq.c index 447b23014..0adf3bda3 100644 --- a/lapack-netlib/SRC/sgedmdq.c +++ b/lapack-netlib/SRC/sgedmdq.c @@ -509,3 +509,788 @@ static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integ +/* -- translated by f2c (version 20000121). + You must link the resulting object file with the libraries: + -lf2c -lm (in that order) +*/ + + + +/* Table of constant values */ + +static integer c_n1 = -1; + +/* Subroutine */ int sgedmdq_(char *jobs, char *jobz, char *jobr, char *jobq, + char *jobt, char *jobf, integer *whtsvd, integer *m, integer *n, real + *f, integer *ldf, real *x, integer *ldx, real *y, integer *ldy, + integer *nrnk, real *tol, integer *k, real *reig, real *imeig, real * + z__, integer *ldz, real *res, real *b, integer *ldb, real *v, integer + *ldv, real *s, integer *lds, real *work, integer *lwork, integer * + iwork, integer *liwork, integer *info) +{ + /* System generated locals */ + integer f_dim1, f_offset, x_dim1, x_offset, y_dim1, y_offset, z_dim1, + z_offset, b_dim1, b_offset, v_dim1, v_offset, s_dim1, s_offset, + i__1, i__2; + + /* Local variables */ + real zero; + integer info1; + extern logical lsame_(char *, char *); + char jobvl[1]; + integer minmn; + logical wantq; + integer mlwqr, olwqr; + logical wntex; + extern /* Subroutine */ int sgedmd_(char *, char *, char *, char *, + integer *, integer *, integer *, real *, integer *, real *, + integer *, integer *, real *, integer *, real *, real *, real *, + integer *, real *, real *, integer *, real *, integer *, real *, + integer *, real *, integer *, integer *, integer *, integer *), xerbla_(char *, integer *); + integer mlwdmd, olwdmd; + extern /* Subroutine */ int sgeqrf_(integer *, integer *, real *, integer + *, real *, real *, integer *, integer *); + logical sccolx, sccoly; + extern /* Subroutine */ int slacpy_(char *, integer *, integer *, real *, + integer *, real *, integer *), slaset_(char *, integer *, + integer *, real *, real *, real *, integer *); + integer iminwr; + logical wntvec, wntvcf; + integer mlwgqr; + logical wntref; + integer mlwork, olwgqr, olwork; + real rdummy[2]; + integer mlwmqr, olwmqr; + logical lquery, wntres, wnttrf, wntvcq; + extern /* Subroutine */ int sorgqr_(integer *, integer *, integer *, real + *, integer *, real *, real *, integer *, integer *), sormqr_(char + *, char *, integer *, integer *, integer *, real *, integer *, + real *, real *, integer *, real *, integer *, integer *); + real one; + +/* March 2023 */ +/* ..... */ +/* USE iso_fortran_env */ +/* INTEGER, PARAMETER :: WP = real32 */ +/* ..... */ +/* Scalar arguments */ +/* Array arguments */ +/* ..... */ +/* Purpose */ +/* ======= */ +/* SGEDMDQ computes the Dynamic Mode Decomposition (DMD) for */ +/* a pair of data snapshot matrices, using a QR factorization */ +/* based compression of the data. For the input matrices */ +/* X and Y such that Y = A*X with an unaccessible matrix */ +/* A, SGEDMDQ computes a certain number of Ritz pairs of A using */ +/* the standard Rayleigh-Ritz extraction from a subspace of */ +/* range(X) that is determined using the leading left singular */ +/* vectors of X. Optionally, SGEDMDQ returns the residuals */ +/* of the computed Ritz pairs, the information needed for */ +/* a refinement of the Ritz vectors, or the eigenvectors of */ +/* the Exact DMD. */ +/* For further details see the references listed */ +/* below. For more details of the implementation see [3]. */ + +/* References */ +/* ========== */ +/* [1] P. Schmid: Dynamic mode decomposition of numerical */ +/* and experimental data, */ +/* Journal of Fluid Mechanics 656, 5-28, 2010. */ +/* [2] Z. Drmac, I. Mezic, R. Mohr: Data driven modal */ +/* decompositions: analysis and enhancements, */ +/* SIAM J. on Sci. Comp. 40 (4), A2253-A2285, 2018. */ +/* [3] Z. Drmac: A LAPACK implementation of the Dynamic */ +/* Mode Decomposition I. Technical report. AIMDyn Inc. */ +/* and LAPACK Working Note 298. */ +/* [4] J. Tu, C. W. Rowley, D. M. Luchtenburg, S. L. */ +/* Brunton, N. Kutz: On Dynamic Mode Decomposition: */ +/* Theory and Applications, Journal of Computational */ +/* Dynamics 1(2), 391 -421, 2014. */ + +/* Developed and supported by: */ +/* =========================== */ +/* Developed and coded by Zlatko Drmac, Faculty of Science, */ +/* University of Zagreb; drmac@math.hr */ +/* In cooperation with */ +/* AIMdyn Inc., Santa Barbara, CA. */ +/* and supported by */ +/* - DARPA SBIR project "Koopman Operator-Based Forecasting */ +/* for Nonstationary Processes from Near-Term, Limited */ +/* Observational Data" Contract No: W31P4Q-21-C-0007 */ +/* - DARPA PAI project "Physics-Informed Machine Learning */ +/* Methodologies" Contract No: HR0011-18-9-0033 */ +/* - DARPA MoDyL project "A Data-Driven, Operator-Theoretic */ +/* Framework for Space-Time Analysis of Process Dynamics" */ +/* Contract No: HR0011-16-C-0116 */ +/* Any opinions, findings and conclusions or recommendations */ +/* expressed in this material are those of the author and */ +/* do not necessarily reflect the views of the DARPA SBIR */ +/* Program Office. */ +/* ============================================================ */ +/* Distribution Statement A: */ +/* Approved for Public Release, Distribution Unlimited. */ +/* Cleared by DARPA on September 29, 2022 */ +/* ============================================================ */ +/* ...................................................................... */ +/* Arguments */ +/* ========= */ +/* JOBS (input) CHARACTER*1 */ +/* Determines whether the initial data snapshots are scaled */ +/* by a diagonal matrix. The data snapshots are the columns */ +/* of F. The leading N-1 columns of F are denoted X and the */ +/* trailing N-1 columns are denoted Y. */ +/* 'S' :: The data snapshots matrices X and Y are multiplied */ +/* with a diagonal matrix D so that X*D has unit */ +/* nonzero columns (in the Euclidean 2-norm) */ +/* 'C' :: The snapshots are scaled as with the 'S' option. */ +/* If it is found that an i-th column of X is zero */ +/* vector and the corresponding i-th column of Y is */ +/* non-zero, then the i-th column of Y is set to */ +/* zero and a warning flag is raised. */ +/* 'Y' :: The data snapshots matrices X and Y are multiplied */ +/* by a diagonal matrix D so that Y*D has unit */ +/* nonzero columns (in the Euclidean 2-norm) */ +/* 'N' :: No data scaling. */ +/* ..... */ +/* JOBZ (input) CHARACTER*1 */ +/* Determines whether the eigenvectors (Koopman modes) will */ +/* be computed. */ +/* 'V' :: The eigenvectors (Koopman modes) will be computed */ +/* and returned in the matrix Z. */ +/* See the description of Z. */ +/* 'F' :: The eigenvectors (Koopman modes) will be returned */ +/* in factored form as the product Z*V, where Z */ +/* is orthonormal and V contains the eigenvectors */ +/* of the corresponding Rayleigh quotient. */ +/* See the descriptions of F, V, Z. */ +/* 'Q' :: The eigenvectors (Koopman modes) will be returned */ +/* in factored form as the product Q*Z, where Z */ +/* contains the eigenvectors of the compression of the */ +/* underlying discretized operator onto the span of */ +/* the data snapshots. See the descriptions of F, V, Z. */ +/* Q is from the initial QR factorization. */ +/* 'N' :: The eigenvectors are not computed. */ +/* ..... */ +/* JOBR (input) CHARACTER*1 */ +/* Determines whether to compute the residuals. */ +/* 'R' :: The residuals for the computed eigenpairs will */ +/* be computed and stored in the array RES. */ +/* See the description of RES. */ +/* For this option to be legal, JOBZ must be 'V'. */ +/* 'N' :: The residuals are not computed. */ +/* ..... */ +/* JOBQ (input) CHARACTER*1 */ +/* Specifies whether to explicitly compute and return the */ +/* orthogonal matrix from the QR factorization. */ +/* 'Q' :: The matrix Q of the QR factorization of the data */ +/* snapshot matrix is computed and stored in the */ +/* array F. See the description of F. */ +/* 'N' :: The matrix Q is not explicitly computed. */ +/* ..... */ +/* JOBT (input) CHARACTER*1 */ +/* Specifies whether to return the upper triangular factor */ +/* from the QR factorization. */ +/* 'R' :: The matrix R of the QR factorization of the data */ +/* snapshot matrix F is returned in the array Y. */ +/* See the description of Y and Further details. */ +/* 'N' :: The matrix R is not returned. */ +/* ..... */ +/* JOBF (input) CHARACTER*1 */ +/* Specifies whether to store information needed for post- */ +/* processing (e.g. computing refined Ritz vectors) */ +/* 'R' :: The matrix needed for the refinement of the Ritz */ +/* vectors is computed and stored in the array B. */ +/* See the description of B. */ +/* 'E' :: The unscaled eigenvectors of the Exact DMD are */ +/* computed and returned in the array B. See the */ +/* description of B. */ +/* 'N' :: No eigenvector refinement data is computed. */ +/* To be useful on exit, this option needs JOBQ='Q'. */ +/* ..... */ +/* WHTSVD (input) INTEGER, WHSTVD in { 1, 2, 3, 4 } */ +/* Allows for a selection of the SVD algorithm from the */ +/* LAPACK library. */ +/* 1 :: SGESVD (the QR SVD algorithm) */ +/* 2 :: SGESDD (the Divide and Conquer algorithm; if enough */ +/* workspace available, this is the fastest option) */ +/* 3 :: SGESVDQ (the preconditioned QR SVD ; this and 4 */ +/* are the most accurate options) */ +/* 4 :: SGEJSV (the preconditioned Jacobi SVD; this and 3 */ +/* are the most accurate options) */ +/* For the four methods above, a significant difference in */ +/* the accuracy of small singular values is possible if */ +/* the snapshots vary in norm so that X is severely */ +/* ill-conditioned. If small (smaller than EPS*||X||) */ +/* singular values are of interest and JOBS=='N', then */ +/* the options (3, 4) give the most accurate results, where */ +/* the option 4 is slightly better and with stronger */ +/* theoretical background. */ +/* If JOBS=='S', i.e. the columns of X will be normalized, */ +/* then all methods give nearly equally accurate results. */ +/* ..... */ +/* M (input) INTEGER, M >= 0 */ +/* The state space dimension (the number of rows of F) */ +/* ..... */ +/* N (input) INTEGER, 0 <= N <= M */ +/* The number of data snapshots from a single trajectory, */ +/* taken at equidistant discrete times. This is the */ +/* number of columns of F. */ +/* ..... */ +/* F (input/output) REAL(KIND=WP) M-by-N array */ +/* > On entry, */ +/* the columns of F are the sequence of data snapshots */ +/* from a single trajectory, taken at equidistant discrete */ +/* times. It is assumed that the column norms of F are */ +/* in the range of the normalized floating point numbers. */ +/* < On exit, */ +/* If JOBQ == 'Q', the array F contains the orthogonal */ +/* matrix/factor of the QR factorization of the initial */ +/* data snapshots matrix F. See the description of JOBQ. */ +/* If JOBQ == 'N', the entries in F strictly below the main */ +/* diagonal contain, column-wise, the information on the */ +/* Householder vectors, as returned by SGEQRF. The */ +/* remaining information to restore the orthogonal matrix */ +/* of the initial QR factorization is stored in WORK(1:N). */ +/* See the description of WORK. */ +/* ..... */ +/* LDF (input) INTEGER, LDF >= M */ +/* The leading dimension of the array F. */ +/* ..... */ +/* X (workspace/output) REAL(KIND=WP) MIN(M,N)-by-(N-1) array */ +/* X is used as workspace to hold representations of the */ +/* leading N-1 snapshots in the orthonormal basis computed */ +/* in the QR factorization of F. */ +/* On exit, the leading K columns of X contain the leading */ +/* K left singular vectors of the above described content */ +/* of X. To lift them to the space of the left singular */ +/* vectors U(:,1:K)of the input data, pre-multiply with the */ +/* Q factor from the initial QR factorization. */ +/* See the descriptions of F, K, V and Z. */ +/* ..... */ +/* LDX (input) INTEGER, LDX >= N */ +/* The leading dimension of the array X */ +/* ..... */ +/* Y (workspace/output) REAL(KIND=WP) MIN(M,N)-by-(N-1) array */ +/* Y is used as workspace to hold representations of the */ +/* trailing N-1 snapshots in the orthonormal basis computed */ +/* in the QR factorization of F. */ +/* On exit, */ +/* If JOBT == 'R', Y contains the MIN(M,N)-by-N upper */ +/* triangular factor from the QR factorization of the data */ +/* snapshot matrix F. */ +/* ..... */ +/* LDY (input) INTEGER , LDY >= N */ +/* The leading dimension of the array Y */ +/* ..... */ +/* NRNK (input) INTEGER */ +/* Determines the mode how to compute the numerical rank, */ +/* i.e. how to truncate small singular values of the input */ +/* matrix X. On input, if */ +/* NRNK = -1 :: i-th singular value sigma(i) is truncated */ +/* if sigma(i) <= TOL*sigma(1) */ +/* This option is recommended. */ +/* NRNK = -2 :: i-th singular value sigma(i) is truncated */ +/* if sigma(i) <= TOL*sigma(i-1) */ +/* This option is included for R&D purposes. */ +/* It requires highly accurate SVD, which */ +/* may not be feasible. */ +/* The numerical rank can be enforced by using positive */ +/* value of NRNK as follows: */ +/* 0 < NRNK <= N-1 :: at most NRNK largest singular values */ +/* will be used. If the number of the computed nonzero */ +/* singular values is less than NRNK, then only those */ +/* nonzero values will be used and the actually used */ +/* dimension is less than NRNK. The actual number of */ +/* the nonzero singular values is returned in the variable */ +/* K. See the description of K. */ +/* ..... */ +/* TOL (input) REAL(KIND=WP), 0 <= TOL < 1 */ +/* The tolerance for truncating small singular values. */ +/* See the description of NRNK. */ +/* ..... */ +/* K (output) INTEGER, 0 <= K <= N */ +/* The dimension of the SVD/POD basis for the leading N-1 */ +/* data snapshots (columns of F) and the number of the */ +/* computed Ritz pairs. The value of K is determined */ +/* according to the rule set by the parameters NRNK and */ +/* TOL. See the descriptions of NRNK and TOL. */ +/* ..... */ +/* REIG (output) REAL(KIND=WP) (N-1)-by-1 array */ +/* The leading K (K<=N) entries of REIG contain */ +/* the real parts of the computed eigenvalues */ +/* REIG(1:K) + sqrt(-1)*IMEIG(1:K). */ +/* See the descriptions of K, IMEIG, Z. */ +/* ..... */ +/* IMEIG (output) REAL(KIND=WP) (N-1)-by-1 array */ +/* The leading K (K0, then the corresponding complex */ +/* conjugate pair of eigenvalues reads */ +/* LAMBDA(i) = REIG(i) + sqrt(-1)*IMAG(i) */ +/* LAMBDA(i+1) = REIG(i) - sqrt(-1)*IMAG(i) */ +/* That is, complex conjugate pairs have consecutive */ +/* indices (i,i+1), with the positive imaginary part */ +/* listed first. */ +/* See the descriptions of K, REIG, Z. */ +/* ..... */ +/* Z (workspace/output) REAL(KIND=WP) M-by-(N-1) array */ +/* If JOBZ =='V' then */ +/* Z contains real Ritz vectors as follows: */ +/* If IMEIG(i)=0, then Z(:,i) is an eigenvector of */ +/* the i-th Ritz value. */ +/* If IMEIG(i) > 0 (and IMEIG(i+1) < 0) then */ +/* [Z(:,i) Z(:,i+1)] span an invariant subspace and */ +/* the Ritz values extracted from this subspace are */ +/* REIG(i) + sqrt(-1)*IMEIG(i) and */ +/* REIG(i) - sqrt(-1)*IMEIG(i). */ +/* The corresponding eigenvectors are */ +/* Z(:,i) + sqrt(-1)*Z(:,i+1) and */ +/* Z(:,i) - sqrt(-1)*Z(:,i+1), respectively. */ +/* If JOBZ == 'F', then the above descriptions hold for */ +/* the columns of Z*V, where the columns of V are the */ +/* eigenvectors of the K-by-K Rayleigh quotient, and Z is */ +/* orthonormal. The columns of V are similarly structured: */ +/* If IMEIG(i) == 0 then Z*V(:,i) is an eigenvector, and if */ +/* IMEIG(i) > 0 then Z*V(:,i)+sqrt(-1)*Z*V(:,i+1) and */ +/* Z*V(:,i)-sqrt(-1)*Z*V(:,i+1) */ +/* are the eigenvectors of LAMBDA(i), LAMBDA(i+1). */ +/* See the descriptions of REIG, IMEIG, X and V. */ +/* ..... */ +/* LDZ (input) INTEGER , LDZ >= M */ +/* The leading dimension of the array Z. */ +/* ..... */ +/* RES (output) REAL(KIND=WP) (N-1)-by-1 array */ +/* RES(1:K) contains the residuals for the K computed */ +/* Ritz pairs. */ +/* If LAMBDA(i) is real, then */ +/* RES(i) = || A * Z(:,i) - LAMBDA(i)*Z(:,i))||_2. */ +/* If [LAMBDA(i), LAMBDA(i+1)] is a complex conjugate pair */ +/* then */ +/* RES(i)=RES(i+1) = || A * Z(:,i:i+1) - Z(:,i:i+1) *B||_F */ +/* where B = [ real(LAMBDA(i)) imag(LAMBDA(i)) ] */ +/* [-imag(LAMBDA(i)) real(LAMBDA(i)) ]. */ +/* It holds that */ +/* RES(i) = || A*ZC(:,i) - LAMBDA(i) *ZC(:,i) ||_2 */ +/* RES(i+1) = || A*ZC(:,i+1) - LAMBDA(i+1)*ZC(:,i+1) ||_2 */ +/* where ZC(:,i) = Z(:,i) + sqrt(-1)*Z(:,i+1) */ +/* ZC(:,i+1) = Z(:,i) - sqrt(-1)*Z(:,i+1) */ +/* See the description of Z. */ +/* ..... */ +/* B (output) REAL(KIND=WP) MIN(M,N)-by-(N-1) array. */ +/* IF JOBF =='R', B(1:N,1:K) contains A*U(:,1:K), and can */ +/* be used for computing the refined vectors; see further */ +/* details in the provided references. */ +/* If JOBF == 'E', B(1:N,1;K) contains */ +/* A*U(:,1:K)*W(1:K,1:K), which are the vectors from the */ +/* Exact DMD, up to scaling by the inverse eigenvalues. */ +/* In both cases, the content of B can be lifted to the */ +/* original dimension of the input data by pre-multiplying */ +/* with the Q factor from the initial QR factorization. */ +/* Here A denotes a compression of the underlying operator. */ +/* See the descriptions of F and X. */ +/* If JOBF =='N', then B is not referenced. */ +/* ..... */ +/* LDB (input) INTEGER, LDB >= MIN(M,N) */ +/* The leading dimension of the array B. */ +/* ..... */ +/* V (workspace/output) REAL(KIND=WP) (N-1)-by-(N-1) array */ +/* On exit, V(1:K,1:K) contains the K eigenvectors of */ +/* the Rayleigh quotient. The eigenvectors of a complex */ +/* conjugate pair of eigenvalues are returned in real form */ +/* as explained in the description of Z. The Ritz vectors */ +/* (returned in Z) are the product of X and V; see */ +/* the descriptions of X and Z. */ +/* ..... */ +/* LDV (input) INTEGER, LDV >= N-1 */ +/* The leading dimension of the array V. */ +/* ..... */ +/* S (output) REAL(KIND=WP) (N-1)-by-(N-1) array */ +/* The array S(1:K,1:K) is used for the matrix Rayleigh */ +/* quotient. This content is overwritten during */ +/* the eigenvalue decomposition by SGEEV. */ +/* See the description of K. */ +/* ..... */ +/* LDS (input) INTEGER, LDS >= N-1 */ +/* The leading dimension of the array S. */ +/* ..... */ +/* WORK (workspace/output) REAL(KIND=WP) LWORK-by-1 array */ +/* On exit, */ +/* WORK(1:MIN(M,N)) contains the scalar factors of the */ +/* elementary reflectors as returned by SGEQRF of the */ +/* M-by-N input matrix F. */ +/* WORK(MIN(M,N)+1:MIN(M,N)+N-1) contains the singular values of */ +/* the input submatrix F(1:M,1:N-1). */ +/* If the call to SGEDMDQ is only workspace query, then */ +/* WORK(1) contains the minimal workspace length and */ +/* WORK(2) is the optimal workspace length. Hence, the */ +/* length of work is at least 2. */ +/* See the description of LWORK. */ +/* ..... */ +/* LWORK (input) INTEGER */ +/* The minimal length of the workspace vector WORK. */ +/* LWORK is calculated as follows: */ +/* Let MLWQR = N (minimal workspace for SGEQRF[M,N]) */ +/* MLWDMD = minimal workspace for SGEDMD (see the */ +/* description of LWORK in SGEDMD) for */ +/* snapshots of dimensions MIN(M,N)-by-(N-1) */ +/* MLWMQR = N (minimal workspace for */ +/* SORMQR['L','N',M,N,N]) */ +/* MLWGQR = N (minimal workspace for SORGQR[M,N,N]) */ +/* Then */ +/* LWORK = MAX(N+MLWQR, N+MLWDMD) */ +/* is updated as follows: */ +/* if JOBZ == 'V' or JOBZ == 'F' THEN */ +/* LWORK = MAX( LWORK,MIN(M,N)+N-1 +MLWMQR ) */ +/* if JOBQ == 'Q' THEN */ +/* LWORK = MAX( LWORK,MIN(M,N)+N-1+MLWGQR) */ +/* If on entry LWORK = -1, then a workspace query is */ +/* assumed and the procedure only computes the minimal */ +/* and the optimal workspace lengths for both WORK and */ +/* IWORK. See the descriptions of WORK and IWORK. */ +/* ..... */ +/* IWORK (workspace/output) INTEGER LIWORK-by-1 array */ +/* Workspace that is required only if WHTSVD equals */ +/* 2 , 3 or 4. (See the description of WHTSVD). */ +/* If on entry LWORK =-1 or LIWORK=-1, then the */ +/* minimal length of IWORK is computed and returned in */ +/* IWORK(1). See the description of LIWORK. */ +/* ..... */ +/* LIWORK (input) INTEGER */ +/* The minimal length of the workspace vector IWORK. */ +/* If WHTSVD == 1, then only IWORK(1) is used; LIWORK >=1 */ +/* Let M1=MIN(M,N), N1=N-1. Then */ +/* If WHTSVD == 2, then LIWORK >= MAX(1,8*MIN(M1,N1)) */ +/* If WHTSVD == 3, then LIWORK >= MAX(1,M1+N1-1) */ +/* If WHTSVD == 4, then LIWORK >= MAX(3,M1+3*N1) */ +/* If on entry LIWORK = -1, then a worskpace query is */ +/* assumed and the procedure only computes the minimal */ +/* and the optimal workspace lengths for both WORK and */ +/* IWORK. See the descriptions of WORK and IWORK. */ +/* ..... */ +/* INFO (output) INTEGER */ +/* -i < 0 :: On entry, the i-th argument had an */ +/* illegal value */ +/* = 0 :: Successful return. */ +/* = 1 :: Void input. Quick exit (M=0 or N=0). */ +/* = 2 :: The SVD computation of X did not converge. */ +/* Suggestion: Check the input data and/or */ +/* repeat with different WHTSVD. */ +/* = 3 :: The computation of the eigenvalues did not */ +/* converge. */ +/* = 4 :: If data scaling was requested on input and */ +/* the procedure found inconsistency in the data */ +/* such that for some column index i, */ +/* X(:,i) = 0 but Y(:,i) /= 0, then Y(:,i) is set */ +/* to zero if JOBS=='C'. The computation proceeds */ +/* with original or modified data and warning */ +/* flag is set with INFO=4. */ +/* ............................................................. */ +/* ............................................................. */ +/* Parameters */ +/* ~~~~~~~~~~ */ + +/* Local scalars */ +/* ~~~~~~~~~~~~~ */ + +/* Local array */ +/* ~~~~~~~~~~~ */ + +/* External functions (BLAS and LAPACK) */ +/* ~~~~~~~~~~~~~~~~~ */ + +/* External subroutines (BLAS and LAPACK) */ +/* ~~~~~~~~~~~~~~~~~~~~ */ +/* External subroutines */ +/* ~~~~~~~~~~~~~~~~~~~~ */ +/* Intrinsic functions */ +/* ~~~~~~~~~~~~~~~~~~~ */ + /* Parameter adjustments */ + f_dim1 = *ldf; + f_offset = 1 + f_dim1 * 1; + f -= f_offset; + x_dim1 = *ldx; + x_offset = 1 + x_dim1 * 1; + x -= x_offset; + y_dim1 = *ldy; + y_offset = 1 + y_dim1 * 1; + y -= y_offset; + --reig; + --imeig; + z_dim1 = *ldz; + z_offset = 1 + z_dim1 * 1; + z__ -= z_offset; + --res; + b_dim1 = *ldb; + b_offset = 1 + b_dim1 * 1; + b -= b_offset; + v_dim1 = *ldv; + v_offset = 1 + v_dim1 * 1; + v -= v_offset; + s_dim1 = *lds; + s_offset = 1 + s_dim1 * 1; + s -= s_offset; + --work; + --iwork; + + /* Function Body */ + one = 1.f; + zero = 0.f; +/* .......................................................... */ + +/* Test the input arguments */ + wntres = lsame_(jobr, "R"); + sccolx = lsame_(jobs, "S") || lsame_(jobs, "C"); + sccoly = lsame_(jobs, "Y"); + wntvec = lsame_(jobz, "V"); + wntvcf = lsame_(jobz, "F"); + wntvcq = lsame_(jobz, "Q"); + wntref = lsame_(jobf, "R"); + wntex = lsame_(jobf, "E"); + wantq = lsame_(jobq, "Q"); + wnttrf = lsame_(jobt, "R"); + minmn = f2cmin(*m,*n); + *info = 0; + lquery = *lwork == -1 || *liwork == -1; + + if (! (sccolx || sccoly || lsame_(jobs, "N"))) { + *info = -1; + } else if (! (wntvec || wntvcf || wntvcq || lsame_(jobz, "N"))) { + *info = -2; + } else if (! (wntres || lsame_(jobr, "N")) || + wntres && lsame_(jobz, "N")) { + *info = -3; + } else if (! (wantq || lsame_(jobq, "N"))) { + *info = -4; + } else if (! (wnttrf || lsame_(jobt, "N"))) { + *info = -5; + } else if (! (wntref || wntex || lsame_(jobf, "N"))) + { + *info = -6; + } else if (! (*whtsvd == 1 || *whtsvd == 2 || *whtsvd == 3 || *whtsvd == + 4)) { + *info = -7; + } else if (*m < 0) { + *info = -8; + } else if (*n < 0 || *n > *m + 1) { + *info = -9; + } else if (*ldf < *m) { + *info = -11; + } else if (*ldx < minmn) { + *info = -13; + } else if (*ldy < minmn) { + *info = -15; + } else if (! (*nrnk == -2 || *nrnk == -1 || *nrnk >= 1 && *nrnk <= *n)) { + *info = -16; + } else if (*tol < zero || *tol >= one) { + *info = -17; + } else if (*ldz < *m) { + *info = -22; + } else if ((wntref || wntex) && *ldb < minmn) { + *info = -25; + } else if (*ldv < *n - 1) { + *info = -27; + } else if (*lds < *n - 1) { + *info = -29; + } + + if (wntvec || wntvcf) { + *(unsigned char *)jobvl = 'V'; + } else { + *(unsigned char *)jobvl = 'N'; + } + if (*info == 0) { +/* Compute the minimal and the optimal workspace */ +/* requirements. Simulate running the code and */ +/* determine minimal and optimal sizes of the */ +/* workspace at any moment of the run. */ + if (*n == 0 || *n == 1) { +/* All output except K is void. INFO=1 signals */ +/* the void input. In case of a workspace query, */ +/* the minimal workspace lengths are returned. */ + if (lquery) { + iwork[1] = 1; + work[1] = 2.f; + work[2] = 2.f; + } else { + *k = 0; + } + *info = 1; + return 0; + } + mlwqr = f2cmax(1,*n); +/* Minimal workspace length for SGEQRF. */ + mlwork = f2cmin(*m,*n) + mlwqr; + if (lquery) { + sgeqrf_(m, n, &f[f_offset], ldf, &work[1], rdummy, &c_n1, &info1); + olwqr = (integer) rdummy[0]; + olwork = f2cmin(*m,*n) + olwqr; + } + i__1 = *n - 1; + sgedmd_(jobs, jobvl, jobr, jobf, whtsvd, &minmn, &i__1, &x[x_offset], + ldx, &y[y_offset], ldy, nrnk, tol, k, &reig[1], &imeig[1], & + z__[z_offset], ldz, &res[1], &b[b_offset], ldb, &v[v_offset], + ldv, &s[s_offset], lds, &work[1], &c_n1, &iwork[1], liwork, & + info1); + mlwdmd = (integer) work[1]; +/* Computing MAX */ + i__1 = mlwork, i__2 = minmn + mlwdmd; + mlwork = f2cmax(i__1,i__2); + iminwr = iwork[1]; + if (lquery) { + olwdmd = (integer) work[2]; +/* Computing MAX */ + i__1 = olwork, i__2 = minmn + olwdmd; + olwork = f2cmax(i__1,i__2); + } + if (wntvec || wntvcf) { + mlwmqr = f2cmax(1,*n); +/* Computing MAX */ + i__1 = mlwork, i__2 = minmn + *n - 1 + mlwmqr; + mlwork = f2cmax(i__1,i__2); + if (lquery) { + sormqr_("L", "N", m, n, &minmn, &f[f_offset], ldf, &work[1], & + z__[z_offset], ldz, &work[1], &c_n1, &info1); + olwmqr = (integer) work[1]; +/* Computing MAX */ + i__1 = olwork, i__2 = minmn + *n - 1 + olwmqr; + olwork = f2cmax(i__1,i__2); + } + } + if (wantq) { + mlwgqr = *n; +/* Computing MAX */ + i__1 = mlwork, i__2 = minmn + *n - 1 + mlwgqr; + mlwork = f2cmax(i__1,i__2); + if (lquery) { + sorgqr_(m, &minmn, &minmn, &f[f_offset], ldf, &work[1], &work[ + 1], &c_n1, &info1); + olwgqr = (integer) work[1]; +/* Computing MAX */ + i__1 = olwork, i__2 = minmn + *n - 1 + olwgqr; + olwork = f2cmax(i__1,i__2); + } + } + iminwr = f2cmax(1,iminwr); + mlwork = f2cmax(2,mlwork); + if (*lwork < mlwork && ! lquery) { + *info = -31; + } + if (*liwork < iminwr && ! lquery) { + *info = -33; + } + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("SGEDMDQ", &i__1); + return 0; + } else if (lquery) { +/* Return minimal and optimal workspace sizes */ + iwork[1] = iminwr; + work[1] = (real) mlwork; + work[2] = (real) olwork; + return 0; + } +/* ..... */ +/* Initial QR factorization that is used to represent the */ +/* snapshots as elements of lower dimensional subspace. */ +/* For large scale computation with M >>N , at this place */ +/* one can use an out of core QRF. */ + + i__1 = *lwork - minmn; + sgeqrf_(m, n, &f[f_offset], ldf, &work[1], &work[minmn + 1], &i__1, & + info1); + +/* Define X and Y as the snapshots representations in the */ +/* orthogonal basis computed in the QR factorization. */ +/* X corresponds to the leading N-1 and Y to the trailing */ +/* N-1 snapshots. */ + i__1 = *n - 1; + slaset_("L", &minmn, &i__1, &zero, &zero, &x[x_offset], ldx); + i__1 = *n - 1; + slacpy_("U", &minmn, &i__1, &f[f_offset], ldf, &x[x_offset], ldx); + i__1 = *n - 1; + slacpy_("A", &minmn, &i__1, &f[(f_dim1 << 1) + 1], ldf, &y[y_offset], ldy); + if (*m >= 3) { + i__1 = minmn - 2; + i__2 = *n - 2; + slaset_("L", &i__1, &i__2, &zero, &zero, &y[y_dim1 + 3], ldy); + } + +/* Compute the DMD of the projected snapshot pairs (X,Y) */ + i__1 = *n - 1; + i__2 = *lwork - minmn; + sgedmd_(jobs, jobvl, jobr, jobf, whtsvd, &minmn, &i__1, &x[x_offset], ldx, + &y[y_offset], ldy, nrnk, tol, k, &reig[1], &imeig[1], &z__[ + z_offset], ldz, &res[1], &b[b_offset], ldb, &v[v_offset], ldv, &s[ + s_offset], lds, &work[minmn + 1], &i__2, &iwork[1], liwork, & + info1); + if (info1 == 2 || info1 == 3) { +/* Return with error code. */ + *info = info1; + return 0; + } else { + *info = info1; + } + +/* The Ritz vectors (Koopman modes) can be explicitly */ +/* formed or returned in factored form. */ + if (wntvec) { +/* Compute the eigenvectors explicitly. */ + if (*m > minmn) { + i__1 = *m - minmn; + slaset_("A", &i__1, k, &zero, &zero, &z__[minmn + 1 + z_dim1], + ldz); + } + i__1 = *lwork - (minmn + *n - 1); + sormqr_("L", "N", m, k, &minmn, &f[f_offset], ldf, &work[1], &z__[ + z_offset], ldz, &work[minmn + *n], &i__1, &info1); + } else if (wntvcf) { +/* Return the Ritz vectors (eigenvectors) in factored */ +/* form Z*V, where Z contains orthonormal matrix (the */ +/* product of Q from the initial QR factorization and */ +/* the SVD/POD_basis returned by SGEDMD in X) and the */ +/* second factor (the eigenvectors of the Rayleigh */ +/* quotient) is in the array V, as returned by SGEDMD. */ + slacpy_("A", n, k, &x[x_offset], ldx, &z__[z_offset], ldz); + if (*m > *n) { + i__1 = *m - *n; + slaset_("A", &i__1, k, &zero, &zero, &z__[*n + 1 + z_dim1], ldz); + } + i__1 = *lwork - (minmn + *n - 1); + sormqr_("L", "N", m, k, &minmn, &f[f_offset], ldf, &work[1], &z__[ + z_offset], ldz, &work[minmn + *n], &i__1, &info1); + } + +/* Some optional output variables: */ + +/* The upper triangular factor in the initial QR */ +/* factorization is optionally returned in the array Y. */ +/* This is useful if this call to SGEDMDQ is to be */ +/* followed by a streaming DMD that is implemented in a */ +/* QR compressed form. */ + if (wnttrf) { +/* Return the upper triangular R in Y */ + slaset_("A", &minmn, n, &zero, &zero, &y[y_offset], ldy); + slacpy_("U", &minmn, n, &f[f_offset], ldf, &y[y_offset], ldy); + } + +/* The orthonormal/orthogonal factor in the initial QR */ +/* factorization is optionally returned in the array F. */ +/* Same as with the triangular factor above, this is */ +/* useful in a streaming DMD. */ + if (wantq) { +/* Q overwrites F */ + i__1 = *lwork - (minmn + *n - 1); + sorgqr_(m, &minmn, &minmn, &f[f_offset], ldf, &work[1], &work[minmn + + *n], &i__1, &info1); + } + + return 0; + +} /* sgedmdq_ */ + diff --git a/lapack-netlib/SRC/zgedmd.c b/lapack-netlib/SRC/zgedmd.c index 447b23014..c1b39ba3e 100644 --- a/lapack-netlib/SRC/zgedmd.c +++ b/lapack-netlib/SRC/zgedmd.c @@ -509,3 +509,1168 @@ static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integ +/* -- translated by f2c (version 20000121). + You must link the resulting object file with the libraries: + -lf2c -lm (in that order) +*/ + + + +/* Table of constant values */ + +static integer c_n1 = -1; +static integer c__1 = 1; +static integer c__0 = 0; + +/* Subroutine */ int zgedmd_(char *jobs, char *jobz, char *jobr, char *jobf, + integer *whtsvd, integer *m, integer *n, doublecomplex *x, integer * + ldx, doublecomplex *y, integer *ldy, integer *nrnk, doublereal *tol, + integer *k, doublecomplex *eigs, doublecomplex *z__, integer *ldz, + doublereal *res, doublecomplex *b, integer *ldb, doublecomplex *w, + integer *ldw, doublecomplex *s, integer *lds, doublecomplex *zwork, + integer *lzwork, doublereal *rwork, integer *lrwork, integer *iwork, + integer *liwork, integer *info) +{ + /* System generated locals */ + integer x_dim1, x_offset, y_dim1, y_offset, z_dim1, z_offset, b_dim1, + b_offset, w_dim1, w_offset, s_dim1, s_offset, i__1, i__2, i__3, + i__4, i__5; + doublereal d__1, d__2; + doublecomplex z__1, z__2; + + /* Local variables */ + doublecomplex zone; + doublereal zero, ssum; + integer info1, info2; + doublereal xscl1, xscl2; + integer i__, j; + doublereal scale; + extern logical lsame_(char *, char *); + logical badxy; + doublereal small; + extern /* Subroutine */ int zgemm_(char *, char *, integer *, integer *, + integer *, doublecomplex *, doublecomplex *, integer *, + doublecomplex *, integer *, doublecomplex *, doublecomplex *, + integer *); + char jobzl[1]; + extern /* Subroutine */ int zgeev_(char *, char *, integer *, + doublecomplex *, integer *, doublecomplex *, doublecomplex *, + integer *, doublecomplex *, integer *, doublecomplex *, integer *, + doublereal *, integer *); + logical wntex; + doublecomplex zzero; + extern /* Subroutine */ int zaxpy_(integer *, doublecomplex *, + doublecomplex *, integer *, doublecomplex *, integer *); + extern doublereal dznrm2_(integer *, doublecomplex *, integer *), dlamch_( + char *); + extern logical disnan_(doublereal *); + extern /* Subroutine */ int xerbla_(char *, integer *); + char t_or_n__[1]; + extern /* Subroutine */ int zdscal_(integer *, doublereal *, + doublecomplex *, integer *), zgesdd_(char *, integer *, integer *, + doublecomplex *, integer *, doublereal *, doublecomplex *, + integer *, doublecomplex *, integer *, doublecomplex *, integer *, + doublereal *, integer *, integer *), zlascl_(char *, + integer *, integer *, doublereal *, doublereal *, integer *, + integer *, doublecomplex *, integer *, integer *); + extern integer izamax_(integer *, doublecomplex *, integer *); + logical sccolx, sccoly; + integer lwrsdd, mwrsdd; + extern /* Subroutine */ int zgesvd_(char *, char *, integer *, integer *, + doublecomplex *, integer *, doublereal *, doublecomplex *, + integer *, doublecomplex *, integer *, doublecomplex *, integer *, + doublereal *, integer *), zlacpy_(char *, + integer *, integer *, doublecomplex *, integer *, doublecomplex *, + integer *); + integer iminwr; + logical wntref, wntvec; + doublereal rootsc; + integer lwrkev, mlwork, mwrkev, numrnk, olwork, lwrsvd, mwrsvd, mlrwrk; + logical lquery, wntres; + char jsvopt[1]; + integer lwrsvj, mwrsvj; + doublereal rdummy[2]; + extern /* Subroutine */ int zgejsv_(char *, char *, char *, char *, char * + , char *, integer *, integer *, doublecomplex *, integer *, + doublereal *, doublecomplex *, integer *, doublecomplex *, + integer *, doublecomplex *, integer *, doublereal *, integer *, + integer *, integer *), zlassq_(integer *, doublecomplex *, integer *, + doublereal *, doublereal *), mecago_(); + integer lwrsvq, mwrsvq; + doublereal ofl, one; + extern /* Subroutine */ int zgesvdq_(char *, char *, char *, char *, char + *, integer *, integer *, doublecomplex *, integer *, doublereal *, + doublecomplex *, integer *, doublecomplex *, integer *, integer * + , integer *, integer *, doublecomplex *, integer *, doublereal *, + integer *, integer *); + +/* March 2023 */ +/* ..... */ +/* USE iso_fortran_env */ +/* INTEGER, PARAMETER :: WP = real64 */ +/* ..... */ +/* Scalar arguments */ +/* Array arguments */ +/* ............................................................ */ +/* Purpose */ +/* ======= */ +/* ZGEDMD computes the Dynamic Mode Decomposition (DMD) for */ +/* a pair of data snapshot matrices. For the input matrices */ +/* X and Y such that Y = A*X with an unaccessible matrix */ +/* A, ZGEDMD computes a certain number of Ritz pairs of A using */ +/* the standard Rayleigh-Ritz extraction from a subspace of */ +/* range(X) that is determined using the leading left singular */ +/* vectors of X. Optionally, ZGEDMD returns the residuals */ +/* of the computed Ritz pairs, the information needed for */ +/* a refinement of the Ritz vectors, or the eigenvectors of */ +/* the Exact DMD. */ +/* For further details see the references listed */ +/* below. For more details of the implementation see [3]. */ + +/* References */ +/* ========== */ +/* [1] P. Schmid: Dynamic mode decomposition of numerical */ +/* and experimental data, */ +/* Journal of Fluid Mechanics 656, 5-28, 2010. */ +/* [2] Z. Drmac, I. Mezic, R. Mohr: Data driven modal */ +/* decompositions: analysis and enhancements, */ +/* SIAM J. on Sci. Comp. 40 (4), A2253-A2285, 2018. */ +/* [3] Z. Drmac: A LAPACK implementation of the Dynamic */ +/* Mode Decomposition I. Technical report. AIMDyn Inc. */ +/* and LAPACK Working Note 298. */ +/* [4] J. Tu, C. W. Rowley, D. M. Luchtenburg, S. L. */ +/* Brunton, N. Kutz: On Dynamic Mode Decomposition: */ +/* Theory and Applications, Journal of Computational */ +/* Dynamics 1(2), 391 -421, 2014. */ + +/* ...................................................................... */ +/* Developed and supported by: */ +/* =========================== */ +/* Developed and coded by Zlatko Drmac, Faculty of Science, */ +/* University of Zagreb; drmac@math.hr */ +/* In cooperation with */ +/* AIMdyn Inc., Santa Barbara, CA. */ +/* and supported by */ +/* - DARPA SBIR project "Koopman Operator-Based Forecasting */ +/* for Nonstationary Processes from Near-Term, Limited */ +/* Observational Data" Contract No: W31P4Q-21-C-0007 */ +/* - DARPA PAI project "Physics-Informed Machine Learning */ +/* Methodologies" Contract No: HR0011-18-9-0033 */ +/* - DARPA MoDyL project "A Data-Driven, Operator-Theoretic */ +/* Framework for Space-Time Analysis of Process Dynamics" */ +/* Contract No: HR0011-16-C-0116 */ +/* Any opinions, findings and conclusions or recommendations */ +/* expressed in this material are those of the author and */ +/* do not necessarily reflect the views of the DARPA SBIR */ +/* Program Office */ +/* ============================================================ */ +/* Distribution Statement A: */ +/* Approved for Public Release, Distribution Unlimited. */ +/* Cleared by DARPA on September 29, 2022 */ +/* ============================================================ */ +/* ............................................................ */ +/* Arguments */ +/* ========= */ +/* JOBS (input) CHARACTER*1 */ +/* Determines whether the initial data snapshots are scaled */ +/* by a diagonal matrix. */ +/* 'S' :: The data snapshots matrices X and Y are multiplied */ +/* with a diagonal matrix D so that X*D has unit */ +/* nonzero columns (in the Euclidean 2-norm) */ +/* 'C' :: The snapshots are scaled as with the 'S' option. */ +/* If it is found that an i-th column of X is zero */ +/* vector and the corresponding i-th column of Y is */ +/* non-zero, then the i-th column of Y is set to */ +/* zero and a warning flag is raised. */ +/* 'Y' :: The data snapshots matrices X and Y are multiplied */ +/* by a diagonal matrix D so that Y*D has unit */ +/* nonzero columns (in the Euclidean 2-norm) */ +/* 'N' :: No data scaling. */ +/* ..... */ +/* JOBZ (input) CHARACTER*1 */ +/* Determines whether the eigenvectors (Koopman modes) will */ +/* be computed. */ +/* 'V' :: The eigenvectors (Koopman modes) will be computed */ +/* and returned in the matrix Z. */ +/* See the description of Z. */ +/* 'F' :: The eigenvectors (Koopman modes) will be returned */ +/* in factored form as the product X(:,1:K)*W, where X */ +/* contains a POD basis (leading left singular vectors */ +/* of the data matrix X) and W contains the eigenvectors */ +/* of the corresponding Rayleigh quotient. */ +/* See the descriptions of K, X, W, Z. */ +/* 'N' :: The eigenvectors are not computed. */ +/* ..... */ +/* JOBR (input) CHARACTER*1 */ +/* Determines whether to compute the residuals. */ +/* 'R' :: The residuals for the computed eigenpairs will be */ +/* computed and stored in the array RES. */ +/* See the description of RES. */ +/* For this option to be legal, JOBZ must be 'V'. */ +/* 'N' :: The residuals are not computed. */ +/* ..... */ +/* JOBF (input) CHARACTER*1 */ +/* Specifies whether to store information needed for post- */ +/* processing (e.g. computing refined Ritz vectors) */ +/* 'R' :: The matrix needed for the refinement of the Ritz */ +/* vectors is computed and stored in the array B. */ +/* See the description of B. */ +/* 'E' :: The unscaled eigenvectors of the Exact DMD are */ +/* computed and returned in the array B. See the */ +/* description of B. */ +/* 'N' :: No eigenvector refinement data is computed. */ +/* ..... */ +/* WHTSVD (input) INTEGER, WHSTVD in { 1, 2, 3, 4 } */ +/* Allows for a selection of the SVD algorithm from the */ +/* LAPACK library. */ +/* 1 :: ZGESVD (the QR SVD algorithm) */ +/* 2 :: ZGESDD (the Divide and Conquer algorithm; if enough */ +/* workspace available, this is the fastest option) */ +/* 3 :: ZGESVDQ (the preconditioned QR SVD ; this and 4 */ +/* are the most accurate options) */ +/* 4 :: ZGEJSV (the preconditioned Jacobi SVD; this and 3 */ +/* are the most accurate options) */ +/* For the four methods above, a significant difference in */ +/* the accuracy of small singular values is possible if */ +/* the snapshots vary in norm so that X is severely */ +/* ill-conditioned. If small (smaller than EPS*||X||) */ +/* singular values are of interest and JOBS=='N', then */ +/* the options (3, 4) give the most accurate results, where */ +/* the option 4 is slightly better and with stronger */ +/* theoretical background. */ +/* If JOBS=='S', i.e. the columns of X will be normalized, */ +/* then all methods give nearly equally accurate results. */ +/* ..... */ +/* M (input) INTEGER, M>= 0 */ +/* The state space dimension (the row dimension of X, Y). */ +/* ..... */ +/* N (input) INTEGER, 0 <= N <= M */ +/* The number of data snapshot pairs */ +/* (the number of columns of X and Y). */ +/* ..... */ +/* X (input/output) COMPLEX(KIND=WP) M-by-N array */ +/* > On entry, X contains the data snapshot matrix X. It is */ +/* assumed that the column norms of X are in the range of */ +/* the normalized floating point numbers. */ +/* < On exit, the leading K columns of X contain a POD basis, */ +/* i.e. the leading K left singular vectors of the input */ +/* data matrix X, U(:,1:K). All N columns of X contain all */ +/* left singular vectors of the input matrix X. */ +/* See the descriptions of K, Z and W. */ +/* ..... */ +/* LDX (input) INTEGER, LDX >= M */ +/* The leading dimension of the array X. */ +/* ..... */ +/* Y (input/workspace/output) COMPLEX(KIND=WP) M-by-N array */ +/* > On entry, Y contains the data snapshot matrix Y */ +/* < On exit, */ +/* If JOBR == 'R', the leading K columns of Y contain */ +/* the residual vectors for the computed Ritz pairs. */ +/* See the description of RES. */ +/* If JOBR == 'N', Y contains the original input data, */ +/* scaled according to the value of JOBS. */ +/* ..... */ +/* LDY (input) INTEGER , LDY >= M */ +/* The leading dimension of the array Y. */ +/* ..... */ +/* NRNK (input) INTEGER */ +/* Determines the mode how to compute the numerical rank, */ +/* i.e. how to truncate small singular values of the input */ +/* matrix X. On input, if */ +/* NRNK = -1 :: i-th singular value sigma(i) is truncated */ +/* if sigma(i) <= TOL*sigma(1) */ +/* This option is recommended. */ +/* NRNK = -2 :: i-th singular value sigma(i) is truncated */ +/* if sigma(i) <= TOL*sigma(i-1) */ +/* This option is included for R&D purposes. */ +/* It requires highly accurate SVD, which */ +/* may not be feasible. */ +/* The numerical rank can be enforced by using positive */ +/* value of NRNK as follows: */ +/* 0 < NRNK <= N :: at most NRNK largest singular values */ +/* will be used. If the number of the computed nonzero */ +/* singular values is less than NRNK, then only those */ +/* nonzero values will be used and the actually used */ +/* dimension is less than NRNK. The actual number of */ +/* the nonzero singular values is returned in the variable */ +/* K. See the descriptions of TOL and K. */ +/* ..... */ +/* TOL (input) REAL(KIND=WP), 0 <= TOL < 1 */ +/* The tolerance for truncating small singular values. */ +/* See the description of NRNK. */ +/* ..... */ +/* K (output) INTEGER, 0 <= K <= N */ +/* The dimension of the POD basis for the data snapshot */ +/* matrix X and the number of the computed Ritz pairs. */ +/* The value of K is determined according to the rule set */ +/* by the parameters NRNK and TOL. */ +/* See the descriptions of NRNK and TOL. */ +/* ..... */ +/* EIGS (output) COMPLEX(KIND=WP) N-by-1 array */ +/* The leading K (K<=N) entries of EIGS contain */ +/* the computed eigenvalues (Ritz values). */ +/* See the descriptions of K, and Z. */ +/* ..... */ +/* Z (workspace/output) COMPLEX(KIND=WP) M-by-N array */ +/* If JOBZ =='V' then Z contains the Ritz vectors. Z(:,i) */ +/* is an eigenvector of the i-th Ritz value; ||Z(:,i)||_2=1. */ +/* If JOBZ == 'F', then the Z(:,i)'s are given implicitly as */ +/* the columns of X(:,1:K)*W(1:K,1:K), i.e. X(:,1:K)*W(:,i) */ +/* is an eigenvector corresponding to EIGS(i). The columns */ +/* of W(1:k,1:K) are the computed eigenvectors of the */ +/* K-by-K Rayleigh quotient. */ +/* See the descriptions of EIGS, X and W. */ +/* ..... */ +/* LDZ (input) INTEGER , LDZ >= M */ +/* The leading dimension of the array Z. */ +/* ..... */ +/* RES (output) REAL(KIND=WP) N-by-1 array */ +/* RES(1:K) contains the residuals for the K computed */ +/* Ritz pairs, */ +/* RES(i) = || A * Z(:,i) - EIGS(i)*Z(:,i))||_2. */ +/* See the description of EIGS and Z. */ +/* ..... */ +/* B (output) COMPLEX(KIND=WP) M-by-N array. */ +/* IF JOBF =='R', B(1:M,1:K) contains A*U(:,1:K), and can */ +/* be used for computing the refined vectors; see further */ +/* details in the provided references. */ +/* If JOBF == 'E', B(1:M,1:K) contains */ +/* A*U(:,1:K)*W(1:K,1:K), which are the vectors from the */ +/* Exact DMD, up to scaling by the inverse eigenvalues. */ +/* If JOBF =='N', then B is not referenced. */ +/* See the descriptions of X, W, K. */ +/* ..... */ +/* LDB (input) INTEGER, LDB >= M */ +/* The leading dimension of the array B. */ +/* ..... */ +/* W (workspace/output) COMPLEX(KIND=WP) N-by-N array */ +/* On exit, W(1:K,1:K) contains the K computed */ +/* eigenvectors of the matrix Rayleigh quotient. */ +/* The Ritz vectors (returned in Z) are the */ +/* product of X (containing a POD basis for the input */ +/* matrix X) and W. See the descriptions of K, S, X and Z. */ +/* W is also used as a workspace to temporarily store the */ +/* right singular vectors of X. */ +/* ..... */ +/* LDW (input) INTEGER, LDW >= N */ +/* The leading dimension of the array W. */ +/* ..... */ +/* S (workspace/output) COMPLEX(KIND=WP) N-by-N array */ +/* The array S(1:K,1:K) is used for the matrix Rayleigh */ +/* quotient. This content is overwritten during */ +/* the eigenvalue decomposition by ZGEEV. */ +/* See the description of K. */ +/* ..... */ +/* LDS (input) INTEGER, LDS >= N */ +/* The leading dimension of the array S. */ +/* ..... */ +/* ZWORK (workspace/output) COMPLEX(KIND=WP) LZWORK-by-1 array */ +/* ZWORK is used as complex workspace in the complex SVD, as */ +/* specified by WHTSVD (1,2, 3 or 4) and for ZGEEV for computing */ +/* the eigenvalues of a Rayleigh quotient. */ +/* If the call to ZGEDMD is only workspace query, then */ +/* ZWORK(1) contains the minimal complex workspace length and */ +/* ZWORK(2) is the optimal complex workspace length. */ +/* Hence, the length of work is at least 2. */ +/* See the description of LZWORK. */ +/* ..... */ +/* LZWORK (input) INTEGER */ +/* The minimal length of the workspace vector ZWORK. */ +/* LZWORK is calculated as MAX(LZWORK_SVD, LZWORK_ZGEEV), */ +/* where LZWORK_ZGEEV = MAX( 1, 2*N ) and the minimal */ +/* LZWORK_SVD is calculated as follows */ +/* If WHTSVD == 1 :: ZGESVD :: */ +/* LZWORK_SVD = MAX(1,2*MIN(M,N)+MAX(M,N)) */ +/* If WHTSVD == 2 :: ZGESDD :: */ +/* LZWORK_SVD = 2*MIN(M,N)*MIN(M,N)+2*MIN(M,N)+MAX(M,N) */ +/* If WHTSVD == 3 :: ZGESVDQ :: */ +/* LZWORK_SVD = obtainable by a query */ +/* If WHTSVD == 4 :: ZGEJSV :: */ +/* LZWORK_SVD = obtainable by a query */ +/* If on entry LZWORK = -1, then a workspace query is */ +/* assumed and the procedure only computes the minimal */ +/* and the optimal workspace lengths and returns them in */ +/* LZWORK(1) and LZWORK(2), respectively. */ +/* ..... */ +/* RWORK (workspace/output) REAL(KIND=WP) LRWORK-by-1 array */ +/* On exit, RWORK(1:N) contains the singular values of */ +/* X (for JOBS=='N') or column scaled X (JOBS=='S', 'C'). */ +/* If WHTSVD==4, then RWORK(N+1) and RWORK(N+2) contain */ +/* scaling factor RWORK(N+2)/RWORK(N+1) used to scale X */ +/* and Y to avoid overflow in the SVD of X. */ +/* This may be of interest if the scaling option is off */ +/* and as many as possible smallest eigenvalues are */ +/* desired to the highest feasible accuracy. */ +/* If the call to ZGEDMD is only workspace query, then */ +/* RWORK(1) contains the minimal workspace length. */ +/* See the description of LRWORK. */ +/* ..... */ +/* LRWORK (input) INTEGER */ +/* The minimal length of the workspace vector RWORK. */ +/* LRWORK is calculated as follows: */ +/* LRWORK = MAX(1, N+LRWORK_SVD,N+LRWORK_ZGEEV), where */ +/* LRWORK_ZGEEV = MAX(1,2*N) and RWORK_SVD is the real workspace */ +/* for the SVD subroutine determined by the input parameter */ +/* WHTSVD. */ +/* If WHTSVD == 1 :: ZGESVD :: */ +/* LRWORK_SVD = 5*MIN(M,N) */ +/* If WHTSVD == 2 :: ZGESDD :: */ +/* LRWORK_SVD = MAX(5*MIN(M,N)*MIN(M,N)+7*MIN(M,N), */ +/* 2*MAX(M,N)*MIN(M,N)+2*MIN(M,N)*MIN(M,N)+MIN(M,N) ) ) */ +/* If WHTSVD == 3 :: ZGESVDQ :: */ +/* LRWORK_SVD = obtainable by a query */ +/* If WHTSVD == 4 :: ZGEJSV :: */ +/* LRWORK_SVD = obtainable by a query */ +/* If on entry LRWORK = -1, then a workspace query is */ +/* assumed and the procedure only computes the minimal */ +/* real workspace length and returns it in RWORK(1). */ +/* ..... */ +/* IWORK (workspace/output) INTEGER LIWORK-by-1 array */ +/* Workspace that is required only if WHTSVD equals */ +/* 2 , 3 or 4. (See the description of WHTSVD). */ +/* If on entry LWORK =-1 or LIWORK=-1, then the */ +/* minimal length of IWORK is computed and returned in */ +/* IWORK(1). See the description of LIWORK. */ +/* ..... */ +/* LIWORK (input) INTEGER */ +/* The minimal length of the workspace vector IWORK. */ +/* If WHTSVD == 1, then only IWORK(1) is used; LIWORK >=1 */ +/* If WHTSVD == 2, then LIWORK >= MAX(1,8*MIN(M,N)) */ +/* If WHTSVD == 3, then LIWORK >= MAX(1,M+N-1) */ +/* If WHTSVD == 4, then LIWORK >= MAX(3,M+3*N) */ +/* If on entry LIWORK = -1, then a workspace query is */ +/* assumed and the procedure only computes the minimal */ +/* and the optimal workspace lengths for ZWORK, RWORK and */ +/* IWORK. See the descriptions of ZWORK, RWORK and IWORK. */ +/* ..... */ +/* INFO (output) INTEGER */ +/* -i < 0 :: On entry, the i-th argument had an */ +/* illegal value */ +/* = 0 :: Successful return. */ +/* = 1 :: Void input. Quick exit (M=0 or N=0). */ +/* = 2 :: The SVD computation of X did not converge. */ +/* Suggestion: Check the input data and/or */ +/* repeat with different WHTSVD. */ +/* = 3 :: The computation of the eigenvalues did not */ +/* converge. */ +/* = 4 :: If data scaling was requested on input and */ +/* the procedure found inconsistency in the data */ +/* such that for some column index i, */ +/* X(:,i) = 0 but Y(:,i) /= 0, then Y(:,i) is set */ +/* to zero if JOBS=='C'. The computation proceeds */ +/* with original or modified data and warning */ +/* flag is set with INFO=4. */ +/* ............................................................. */ +/* ............................................................. */ +/* Parameters */ +/* ~~~~~~~~~~ */ +/* Local scalars */ +/* ~~~~~~~~~~~~~ */ + +/* Local arrays */ +/* ~~~~~~~~~~~~ */ +/* External functions (BLAS and LAPACK) */ +/* ~~~~~~~~~~~~~~~~~ */ +/* External subroutines (BLAS and LAPACK) */ +/* ~~~~~~~~~~~~~~~~~~~~ */ +/* Intrinsic functions */ +/* ~~~~~~~~~~~~~~~~~~~ */ +/* ............................................................ */ + /* Parameter adjustments */ + x_dim1 = *ldx; + x_offset = 1 + x_dim1 * 1; + x -= x_offset; + y_dim1 = *ldy; + y_offset = 1 + y_dim1 * 1; + y -= y_offset; + --eigs; + z_dim1 = *ldz; + z_offset = 1 + z_dim1 * 1; + z__ -= z_offset; + --res; + b_dim1 = *ldb; + b_offset = 1 + b_dim1 * 1; + b -= b_offset; + w_dim1 = *ldw; + w_offset = 1 + w_dim1 * 1; + w -= w_offset; + s_dim1 = *lds; + s_offset = 1 + s_dim1 * 1; + s -= s_offset; + --zwork; + --rwork; + --iwork; + + /* Function Body */ + zero = 0.f; + one = 1.f; + zzero.r = 0.f, zzero.i = 0.f; + zone.r = 1.f, zone.i = 0.f; + +/* Test the input arguments */ + + wntres = lsame_(jobr, "R"); + sccolx = lsame_(jobs, "S") || lsame_(jobs, "C"); + sccoly = lsame_(jobs, "Y"); + wntvec = lsame_(jobz, "V"); + wntref = lsame_(jobf, "R"); + wntex = lsame_(jobf, "E"); + *info = 0; + lquery = *lzwork == -1 || *liwork == -1 || *lrwork == -1; + + if (! (sccolx || sccoly || lsame_(jobs, "N"))) { + *info = -1; + } else if (! (wntvec || lsame_(jobz, "N") || lsame_( + jobz, "F"))) { + *info = -2; + } else if (! (wntres || lsame_(jobr, "N")) || + wntres && ! wntvec) { + *info = -3; + } else if (! (wntref || wntex || lsame_(jobf, "N"))) + { + *info = -4; + } else if (! (*whtsvd == 1 || *whtsvd == 2 || *whtsvd == 3 || *whtsvd == + 4)) { + *info = -5; + } else if (*m < 0) { + *info = -6; + } else if (*n < 0 || *n > *m) { + *info = -7; + } else if (*ldx < *m) { + *info = -9; + } else if (*ldy < *m) { + *info = -11; + } else if (! (*nrnk == -2 || *nrnk == -1 || *nrnk >= 1 && *nrnk <= *n)) { + *info = -12; + } else if (*tol < zero || *tol >= one) { + *info = -13; + } else if (*ldz < *m) { + *info = -17; + } else if ((wntref || wntex) && *ldb < *m) { + *info = -20; + } else if (*ldw < *n) { + *info = -22; + } else if (*lds < *n) { + *info = -24; + } + + if (*info == 0) { +/* Compute the minimal and the optimal workspace */ +/* requirements. Simulate running the code and */ +/* determine minimal and optimal sizes of the */ +/* workspace at any moment of the run. */ + if (*n == 0) { +/* Quick return. All output except K is void. */ +/* INFO=1 signals the void input. */ +/* In case of a workspace query, the default */ +/* minimal workspace lengths are returned. */ + if (lquery) { + iwork[1] = 1; + rwork[1] = 1.; + zwork[1].r = 2., zwork[1].i = 0.; + zwork[2].r = 2., zwork[2].i = 0.; + } else { + *k = 0; + } + *info = 1; + return 0; + } + iminwr = 1; + mlrwrk = f2cmax(1,*n); + mlwork = 2; + olwork = 2; +/* SELECT CASE ( WHTSVD ) */ + if (*whtsvd == 1) { +/* The following is specified as the minimal */ +/* length of WORK in the definition of ZGESVD: */ +/* MWRSVD = MAX(1,2*MIN(M,N)+MAX(M,N)) */ +/* Computing MAX */ + i__1 = 1, i__2 = (f2cmin(*m,*n) << 1) + f2cmax(*m,*n); + mwrsvd = f2cmax(i__1,i__2); + mlwork = f2cmax(mlwork,mwrsvd); +/* Computing MAX */ + i__1 = mlrwrk, i__2 = *n + f2cmin(*m,*n) * 5; + mlrwrk = f2cmax(i__1,i__2); + if (lquery) { + zgesvd_("O", "S", m, n, &x[x_offset], ldx, &rwork[1], &b[ + b_offset], ldb, &w[w_offset], ldw, &zwork[1], &c_n1, + rdummy, &info1); + lwrsvd = (integer) zwork[1].r; + olwork = f2cmax(olwork,lwrsvd); + } + } else if (*whtsvd == 2) { +/* The following is specified as the minimal */ +/* length of WORK in the definition of ZGESDD: */ +/* MWRSDD = 2*f2cmin(M,N)*f2cmin(M,N)+2*f2cmin(M,N)+f2cmax(M,N). */ +/* RWORK length: 5*MIN(M,N)*MIN(M,N)+7*MIN(M,N) */ +/* In LAPACK 3.10.1 RWORK is defined differently. */ +/* Below we take f2cmax over the two versions. */ +/* IMINWR = 8*MIN(M,N) */ + mwrsdd = (f2cmin(*m,*n) << 1) * f2cmin(*m,*n) + (f2cmin(*m,*n) << 1) + f2cmax( + *m,*n); + mlwork = f2cmax(mlwork,mwrsdd); + iminwr = f2cmin(*m,*n) << 3; +/* Computing MAX */ +/* Computing MAX */ + i__3 = f2cmin(*m,*n) * 5 * f2cmin(*m,*n) + f2cmin(*m,*n) * 7, i__4 = f2cmin(* + m,*n) * 5 * f2cmin(*m,*n) + f2cmin(*m,*n) * 5, i__3 = f2cmax(i__3, + i__4), i__4 = (f2cmax(*m,*n) << 1) * f2cmin(*m,*n) + (f2cmin(*m,*n) + << 1) * f2cmin(*m,*n) + f2cmin(*m,*n); + i__1 = mlrwrk, i__2 = *n + f2cmax(i__3,i__4); + mlrwrk = f2cmax(i__1,i__2); + if (lquery) { + zgesdd_("O", m, n, &x[x_offset], ldx, &rwork[1], &b[b_offset], + ldb, &w[w_offset], ldw, &zwork[1], &c_n1, rdummy, & + iwork[1], &info1); +/* Computing MAX */ + i__1 = mwrsdd, i__2 = (integer) zwork[1].r; + lwrsdd = f2cmax(i__1,i__2); +/* Possible bug in ZGESDD optimal workspace size. */ + olwork = f2cmax(olwork,lwrsdd); + } + } else if (*whtsvd == 3) { + zgesvdq_("H", "P", "N", "R", "R", m, n, &x[x_offset], ldx, &rwork[ + 1], &z__[z_offset], ldz, &w[w_offset], ldw, &numrnk, & + iwork[1], &c_n1, &zwork[1], &c_n1, rdummy, &c_n1, &info1); + iminwr = iwork[1]; + mwrsvq = (integer) zwork[2].r; + mlwork = f2cmax(mlwork,mwrsvq); +/* Computing MAX */ + i__1 = mlrwrk, i__2 = *n + (integer) rdummy[0]; + mlrwrk = f2cmax(i__1,i__2); + if (lquery) { + lwrsvq = (integer) zwork[1].r; + olwork = f2cmax(olwork,lwrsvq); + } + } else if (*whtsvd == 4) { + *(unsigned char *)jsvopt = 'J'; + zgejsv_("F", "U", jsvopt, "R", "N", "P", m, n, &x[x_offset], ldx, + &rwork[1], &z__[z_offset], ldz, &w[w_offset], ldw, &zwork[ + 1], &c_n1, rdummy, &c_n1, &iwork[1], &info1); + iminwr = iwork[1]; + mwrsvj = (integer) zwork[2].r; + mlwork = f2cmax(mlwork,mwrsvj); +/* Computing MAX */ +/* Computing MAX */ + i__3 = 7, i__4 = (integer) rdummy[0]; + i__1 = mlrwrk, i__2 = *n + f2cmax(i__3,i__4); + mlrwrk = f2cmax(i__1,i__2); + if (lquery) { + lwrsvj = (integer) zwork[1].r; + olwork = f2cmax(olwork,lwrsvj); + } +/* END SELECT */ + } + if (wntvec || wntex || lsame_(jobz, "F")) { + *(unsigned char *)jobzl = 'V'; + } else { + *(unsigned char *)jobzl = 'N'; + } +/* Workspace calculation to the ZGEEV call */ +/* Computing MAX */ + i__1 = 1, i__2 = *n << 1; + mwrkev = f2cmax(i__1,i__2); + mlwork = f2cmax(mlwork,mwrkev); +/* Computing MAX */ + i__1 = mlrwrk, i__2 = *n + (*n << 1); + mlrwrk = f2cmax(i__1,i__2); + if (lquery) { + zgeev_("N", jobzl, n, &s[s_offset], lds, &eigs[1], &w[w_offset], + ldw, &w[w_offset], ldw, &zwork[1], &c_n1, &rwork[1], & + info1); + lwrkev = (integer) zwork[1].r; + olwork = f2cmax(olwork,lwrkev); + } + + if (*liwork < iminwr && ! lquery) { + *info = -30; + } + if (*lrwork < mlrwrk && ! lquery) { + *info = -28; + } + if (*lzwork < mlwork && ! lquery) { + *info = -26; + } + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZGEDMD", &i__1); + return 0; + } else if (lquery) { +/* Return minimal and optimal workspace sizes */ + iwork[1] = iminwr; + rwork[1] = (doublereal) mlrwrk; + zwork[1].r = (doublereal) mlwork, zwork[1].i = 0.; + zwork[2].r = (doublereal) olwork, zwork[2].i = 0.; + return 0; + } +/* ............................................................ */ + + ofl = dlamch_("O"); + small = dlamch_("S"); + badxy = FALSE_; + +/* <1> Optional scaling of the snapshots (columns of X, Y) */ +/* ========================================================== */ + if (sccolx) { +/* The columns of X will be normalized. */ +/* To prevent overflows, the column norms of X are */ +/* carefully computed using ZLASSQ. */ + *k = 0; + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { +/* WORK(i) = DZNRM2( M, X(1,i), 1 ) */ + scale = zero; + zlassq_(m, &x[i__ * x_dim1 + 1], &c__1, &scale, &ssum); + if (disnan_(&scale) || disnan_(&ssum)) { + *k = 0; + *info = -8; + i__2 = -(*info); + xerbla_("ZGEDMD", &i__2); + } + if (scale != zero && ssum != zero) { + rootsc = sqrt(ssum); + if (scale >= ofl / rootsc) { +/* Norm of X(:,i) overflows. First, X(:,i) */ +/* is scaled by */ +/* ( ONE / ROOTSC ) / SCALE = 1/||X(:,i)||_2. */ +/* Next, the norm of X(:,i) is stored without */ +/* overflow as RWORK(i) = - SCALE * (ROOTSC/M), */ +/* the minus sign indicating the 1/M factor. */ +/* Scaling is performed without overflow, and */ +/* underflow may occur in the smallest entries */ +/* of X(:,i). The relative backward and forward */ +/* errors are small in the ell_2 norm. */ + d__1 = one / rootsc; + zlascl_("G", &c__0, &c__0, &scale, &d__1, m, &c__1, &x[ + i__ * x_dim1 + 1], ldx, &info2); + rwork[i__] = -scale * (rootsc / (doublereal) (*m)); + } else { +/* X(:,i) will be scaled to unit 2-norm */ + rwork[i__] = scale * rootsc; + zlascl_("G", &c__0, &c__0, &rwork[i__], &one, m, &c__1, & + x[i__ * x_dim1 + 1], ldx, &info2); +/* X(1:M,i) = (ONE/RWORK(i)) * X(1:M,i) ! INTRINSIC */ +/* LAPACK CALL */ + } + } else { + rwork[i__] = zero; + ++(*k); + } + } + if (*k == *n) { +/* All columns of X are zero. Return error code -8. */ +/* (the 8th input variable had an illegal value) */ + *k = 0; + *info = -8; + i__1 = -(*info); + xerbla_("ZGEDMD", &i__1); + return 0; + } + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { +/* Now, apply the same scaling to the columns of Y. */ + if (rwork[i__] > zero) { + d__1 = one / rwork[i__]; + zdscal_(m, &d__1, &y[i__ * y_dim1 + 1], &c__1); +/* Y(1:M,i) = (ONE/RWORK(i)) * Y(1:M,i) ! INTRINSIC */ +/* BLAS CALL */ + } else if (rwork[i__] < zero) { + d__1 = -rwork[i__]; + d__2 = one / (doublereal) (*m); + zlascl_("G", &c__0, &c__0, &d__1, &d__2, m, &c__1, &y[i__ * + y_dim1 + 1], ldy, &info2); +/* LAPACK C */ + } else if (z_abs(&y[izamax_(m, &y[i__ * y_dim1 + 1], &c__1) + i__ + * y_dim1]) != zero) { +/* X(:,i) is zero vector. For consistency, */ +/* Y(:,i) should also be zero. If Y(:,i) is not */ +/* zero, then the data might be inconsistent or */ +/* corrupted. If JOBS == 'C', Y(:,i) is set to */ +/* zero and a warning flag is raised. */ +/* The computation continues but the */ +/* situation will be reported in the output. */ + badxy = TRUE_; + if (lsame_(jobs, "C")) { + zdscal_(m, &zero, &y[i__ * y_dim1 + 1], &c__1); + } +/* BLAS CALL */ + } + } + } + + if (sccoly) { +/* The columns of Y will be normalized. */ +/* To prevent overflows, the column norms of Y are */ +/* carefully computed using ZLASSQ. */ + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { +/* RWORK(i) = DZNRM2( M, Y(1,i), 1 ) */ + scale = zero; + zlassq_(m, &y[i__ * y_dim1 + 1], &c__1, &scale, &ssum); + if (disnan_(&scale) || disnan_(&ssum)) { + *k = 0; + *info = -10; + i__2 = -(*info); + xerbla_("ZGEDMD", &i__2); + } + if (scale != zero && ssum != zero) { + rootsc = sqrt(ssum); + if (scale >= ofl / rootsc) { +/* Norm of Y(:,i) overflows. First, Y(:,i) */ +/* is scaled by */ +/* ( ONE / ROOTSC ) / SCALE = 1/||Y(:,i)||_2. */ +/* Next, the norm of Y(:,i) is stored without */ +/* overflow as RWORK(i) = - SCALE * (ROOTSC/M), */ +/* the minus sign indicating the 1/M factor. */ +/* Scaling is performed without overflow, and */ +/* underflow may occur in the smallest entries */ +/* of Y(:,i). The relative backward and forward */ +/* errors are small in the ell_2 norm. */ + d__1 = one / rootsc; + zlascl_("G", &c__0, &c__0, &scale, &d__1, m, &c__1, &y[ + i__ * y_dim1 + 1], ldy, &info2); + rwork[i__] = -scale * (rootsc / (doublereal) (*m)); + } else { +/* Y(:,i) will be scaled to unit 2-norm */ + rwork[i__] = scale * rootsc; + zlascl_("G", &c__0, &c__0, &rwork[i__], &one, m, &c__1, & + y[i__ * y_dim1 + 1], ldy, &info2); +/* Y(1:M,i) = (ONE/RWORK(i)) * Y(1:M,i) ! INTRINSIC */ +/* LAPAC */ + } + } else { + rwork[i__] = zero; + } + } + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { +/* Now, apply the same scaling to the columns of X. */ + if (rwork[i__] > zero) { + d__1 = one / rwork[i__]; + zdscal_(m, &d__1, &x[i__ * x_dim1 + 1], &c__1); +/* X(1:M,i) = (ONE/RWORK(i)) * X(1:M,i) ! INTRINSIC */ +/* BLAS CALL */ + } else if (rwork[i__] < zero) { + d__1 = -rwork[i__]; + d__2 = one / (doublereal) (*m); + zlascl_("G", &c__0, &c__0, &d__1, &d__2, m, &c__1, &x[i__ * + x_dim1 + 1], ldx, &info2); +/* LAPACK C */ + } else if (z_abs(&x[izamax_(m, &x[i__ * x_dim1 + 1], &c__1) + i__ + * x_dim1]) != zero) { +/* Y(:,i) is zero vector. If X(:,i) is not */ +/* zero, then a warning flag is raised. */ +/* The computation continues but the */ +/* situation will be reported in the output. */ + badxy = TRUE_; + } + } + } + +/* <2> SVD of the data snapshot matrix X. */ +/* ===================================== */ +/* The left singular vectors are stored in the array X. */ +/* The right singular vectors are in the array W. */ +/* The array W will later on contain the eigenvectors */ +/* of a Rayleigh quotient. */ + numrnk = *n; +/* SELECT CASE ( WHTSVD ) */ + if (*whtsvd == 1) { + zgesvd_("O", "S", m, n, &x[x_offset], ldx, &rwork[1], &b[b_offset], + ldb, &w[w_offset], ldw, &zwork[1], lzwork, &rwork[*n + 1], & + info1); +/* LA */ + *(unsigned char *)t_or_n__ = 'C'; + } else if (*whtsvd == 2) { + zgesdd_("O", m, n, &x[x_offset], ldx, &rwork[1], &b[b_offset], ldb, & + w[w_offset], ldw, &zwork[1], lzwork, &rwork[*n + 1], &iwork[1] + , &info1); +/* LAP */ + *(unsigned char *)t_or_n__ = 'C'; + } else if (*whtsvd == 3) { + i__1 = *lrwork - *n; + zgesvdq_("H", "P", "N", "R", "R", m, n, &x[x_offset], ldx, &rwork[1], + &z__[z_offset], ldz, &w[w_offset], ldw, &numrnk, &iwork[1], + liwork, &zwork[1], lzwork, &rwork[*n + 1], &i__1, &info1); +/* LAPACK CA */ + zlacpy_("A", m, &numrnk, &z__[z_offset], ldz, &x[x_offset], ldx); +/* LAPACK C */ + *(unsigned char *)t_or_n__ = 'C'; + } else if (*whtsvd == 4) { + i__1 = *lrwork - *n; + zgejsv_("F", "U", jsvopt, "R", "N", "P", m, n, &x[x_offset], ldx, & + rwork[1], &z__[z_offset], ldz, &w[w_offset], ldw, &zwork[1], + lzwork, &rwork[*n + 1], &i__1, &iwork[1], &info1); + zlacpy_("A", m, n, &z__[z_offset], ldz, &x[x_offset], ldx); +/* LAPACK CALL */ + *(unsigned char *)t_or_n__ = 'N'; + xscl1 = rwork[*n + 1]; + xscl2 = rwork[*n + 2]; + if (xscl1 != xscl2) { +/* This is an exceptional situation. If the */ +/* data matrices are not scaled and the */ +/* largest singular value of X overflows. */ +/* In that case ZGEJSV can return the SVD */ +/* in scaled form. The scaling factor can be used */ +/* to rescale the data (X and Y). */ + zlascl_("G", &c__0, &c__0, &xscl1, &xscl2, m, n, &y[y_offset], + ldy, &info2); + } +/* END SELECT */ + } + + if (info1 > 0) { +/* The SVD selected subroutine did not converge. */ +/* Return with an error code. */ + *info = 2; + return 0; + } + + if (rwork[1] == zero) { +/* The largest computed singular value of (scaled) */ +/* X is zero. Return error code -8 */ +/* (the 8th input variable had an illegal value). */ + *k = 0; + *info = -8; + i__1 = -(*info); + xerbla_("ZGEDMD", &i__1); + return 0; + } + +/* <3> Determine the numerical rank of the data */ +/* snapshots matrix X. This depends on the */ +/* parameters NRNK and TOL. */ +/* SELECT CASE ( NRNK ) */ + if (*nrnk == -1) { + *k = 1; + i__1 = numrnk; + for (i__ = 2; i__ <= i__1; ++i__) { + if (rwork[i__] <= rwork[1] * *tol || rwork[i__] <= small) { + myexit_(); + } + ++(*k); + } + } else if (*nrnk == -2) { + *k = 1; + i__1 = numrnk - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + if (rwork[i__ + 1] <= rwork[i__] * *tol || rwork[i__] <= small) { + myexit_(); + } + ++(*k); + } + } else { + *k = 1; + i__1 = *nrnk; + for (i__ = 2; i__ <= i__1; ++i__) { + if (rwork[i__] <= small) { + myexit_(); + } + ++(*k); + } +/* END SELECT */ + } +/* Now, U = X(1:M,1:K) is the SVD/POD basis for the */ +/* snapshot data in the input matrix X. */ +/* <4> Compute the Rayleigh quotient S = U^H * A * U. */ +/* Depending on the requested outputs, the computation */ +/* is organized to compute additional auxiliary */ +/* matrices (for the residuals and refinements). */ + +/* In all formulas below, we need V_k*Sigma_k^(-1) */ +/* where either V_k is in W(1:N,1:K), or V_k^H is in */ +/* W(1:K,1:N). Here Sigma_k=diag(WORK(1:K)). */ + if (lsame_(t_or_n__, "N")) { + i__1 = *k; + for (i__ = 1; i__ <= i__1; ++i__) { + d__1 = one / rwork[i__]; + zdscal_(n, &d__1, &w[i__ * w_dim1 + 1], &c__1); +/* W(1:N,i) = (ONE/RWORK(i)) * W(1:N,i) ! INTRINSIC */ +/* BLAS CALL */ + } + } else { +/* This non-unit stride access is due to the fact */ +/* that ZGESVD, ZGESVDQ and ZGESDD return the */ +/* adjoint matrix of the right singular vectors. */ +/* DO i = 1, K */ +/* CALL ZDSCAL( N, ONE/RWORK(i), W(i,1), LDW ) ! BLAS CALL */ +/* ! W(i,1:N) = (ONE/RWORK(i)) * W(i,1:N) ! INTRINSIC */ +/* END DO */ + i__1 = *k; + for (i__ = 1; i__ <= i__1; ++i__) { + rwork[*n + i__] = one / rwork[i__]; + } + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *k; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * w_dim1; + i__4 = *n + i__; + z__2.r = rwork[i__4], z__2.i = zero; + i__5 = i__ + j * w_dim1; + z__1.r = z__2.r * w[i__5].r - z__2.i * w[i__5].i, z__1.i = + z__2.r * w[i__5].i + z__2.i * w[i__5].r; + w[i__3].r = z__1.r, w[i__3].i = z__1.i; + } + } + } + + if (wntref) { + +/* Need A*U(:,1:K)=Y*V_k*inv(diag(WORK(1:K))) */ +/* for computing the refined Ritz vectors */ +/* (optionally, outside ZGEDMD). */ + zgemm_("N", t_or_n__, m, k, n, &zone, &y[y_offset], ldy, &w[w_offset], + ldw, &zzero, &z__[z_offset], ldz); +/* Z(1:M,1:K)=MATMUL(Y(1:M,1:N),TRANSPOSE(CONJG(W(1:K,1:N)))) ! */ +/* Z(1:M,1:K)=MATMUL(Y(1:M,1:N),W(1:N,1:K)) ! */ + +/* At this point Z contains */ +/* A * U(:,1:K) = Y * V_k * Sigma_k^(-1), and */ +/* this is needed for computing the residuals. */ +/* This matrix is returned in the array B and */ +/* it can be used to compute refined Ritz vectors. */ +/* BLA */ + zlacpy_("A", m, k, &z__[z_offset], ldz, &b[b_offset], ldb); +/* B(1:M,1:K) = Z(1:M,1:K) ! INTRINSIC */ +/* BLAS CALL */ + zgemm_("C", "N", k, k, m, &zone, &x[x_offset], ldx, &z__[z_offset], + ldz, &zzero, &s[s_offset], lds); +/* S(1:K,1:K) = MATMUL(TRANSPOSE(CONJG(X(1:M,1:K))),Z(1:M,1:K)) */ +/* At this point S = U^H * A * U is the Rayleigh quotient. */ +/* BLA */ + } else { +/* A * U(:,1:K) is not explicitly needed and the */ +/* computation is organized differently. The Rayleigh */ +/* quotient is computed more efficiently. */ + zgemm_("C", "N", k, n, m, &zone, &x[x_offset], ldx, &y[y_offset], ldy, + &zzero, &z__[z_offset], ldz); +/* Z(1:K,1:N) = MATMUL( TRANSPOSE(CONJG(X(1:M,1:K))), Y(1:M,1:N) */ + + zgemm_("N", t_or_n__, k, k, n, &zone, &z__[z_offset], ldz, &w[ + w_offset], ldw, &zzero, &s[s_offset], lds); +/* S(1:K,1:K) = MATMUL(Z(1:K,1:N),TRANSPOSE(CONJG(W(1:K,1:N)))) ! */ +/* S(1:K,1:K) = MATMUL(Z(1:K,1:N),(W(1:N,1:K))) ! */ +/* At this point S = U^H * A * U is the Rayleigh quotient. */ +/* If the residuals are requested, save scaled V_k into Z. */ +/* Recall that V_k or V_k^H is stored in W. */ +/* BLAS */ + if (wntres || wntex) { + if (lsame_(t_or_n__, "N")) { + zlacpy_("A", n, k, &w[w_offset], ldw, &z__[z_offset], ldz); + } else { + zlacpy_("A", k, n, &w[w_offset], ldw, &z__[z_offset], ldz); + } + } + } + +/* <5> Compute the Ritz values and (if requested) the */ +/* right eigenvectors of the Rayleigh quotient. */ + + zgeev_("N", jobzl, k, &s[s_offset], lds, &eigs[1], &w[w_offset], ldw, &w[ + w_offset], ldw, &zwork[1], lzwork, &rwork[*n + 1], &info1); + +/* W(1:K,1:K) contains the eigenvectors of the Rayleigh */ +/* quotient. See the description of Z. */ +/* Also, see the description of ZGEEV. */ +/* LAPACK CALL */ + if (info1 > 0) { +/* ZGEEV failed to compute the eigenvalues and */ +/* eigenvectors of the Rayleigh quotient. */ + *info = 3; + return 0; + } + +/* <6> Compute the eigenvectors (if requested) and, */ +/* the residuals (if requested). */ + + if (wntvec || wntex) { + if (wntres) { + if (wntref) { +/* Here, if the refinement is requested, we have */ +/* A*U(:,1:K) already computed and stored in Z. */ +/* For the residuals, need Y = A * U(:,1;K) * W. */ + zgemm_("N", "N", m, k, k, &zone, &z__[z_offset], ldz, &w[ + w_offset], ldw, &zzero, &y[y_offset], ldy); +/* Y(1:M,1:K) = Z(1:M,1:K) * W(1:K,1:K) ! INTRINSIC */ +/* This frees Z; Y contains A * U(:,1:K) * W. */ +/* BLAS CALL */ + } else { +/* Compute S = V_k * Sigma_k^(-1) * W, where */ +/* V_k * Sigma_k^(-1) (or its adjoint) is stored in Z */ + zgemm_(t_or_n__, "N", n, k, k, &zone, &z__[z_offset], ldz, &w[ + w_offset], ldw, &zzero, &s[s_offset], lds); +/* Then, compute Z = Y * S = */ +/* = Y * V_k * Sigma_k^(-1) * W(1:K,1:K) = */ +/* = A * U(:,1:K) * W(1:K,1:K) */ + zgemm_("N", "N", m, k, n, &zone, &y[y_offset], ldy, &s[ + s_offset], lds, &zzero, &z__[z_offset], ldz); +/* Save a copy of Z into Y and free Z for holding */ +/* the Ritz vectors. */ + zlacpy_("A", m, k, &z__[z_offset], ldz, &y[y_offset], ldy); + if (wntex) { + zlacpy_("A", m, k, &z__[z_offset], ldz, &b[b_offset], ldb); + } + } + } else if (wntex) { +/* Compute S = V_k * Sigma_k^(-1) * W, where */ +/* V_k * Sigma_k^(-1) is stored in Z */ + zgemm_(t_or_n__, "N", n, k, k, &zone, &z__[z_offset], ldz, &w[ + w_offset], ldw, &zzero, &s[s_offset], lds); +/* Then, compute Z = Y * S = */ +/* = Y * V_k * Sigma_k^(-1) * W(1:K,1:K) = */ +/* = A * U(:,1:K) * W(1:K,1:K) */ + zgemm_("N", "N", m, k, n, &zone, &y[y_offset], ldy, &s[s_offset], + lds, &zzero, &b[b_offset], ldb); +/* The above call replaces the following two calls */ +/* that were used in the developing-testing phase. */ +/* CALL ZGEMM( 'N', 'N', M, K, N, ZONE, Y, LDY, S, & */ +/* LDS, ZZERO, Z, LDZ) */ +/* Save a copy of Z into B and free Z for holding */ +/* the Ritz vectors. */ +/* CALL ZLACPY( 'A', M, K, Z, LDZ, B, LDB ) */ + } + +/* Compute the Ritz vectors */ + if (wntvec) { + zgemm_("N", "N", m, k, k, &zone, &x[x_offset], ldx, &w[w_offset], + ldw, &zzero, &z__[z_offset], ldz); + } +/* Z(1:M,1:K) = MATMUL(X(1:M,1:K), W(1:K,1:K)) ! INTRINSIC */ + +/* BLAS CALL */ + if (wntres) { + i__1 = *k; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__; + z__1.r = -eigs[i__2].r, z__1.i = -eigs[i__2].i; + zaxpy_(m, &z__1, &z__[i__ * z_dim1 + 1], &c__1, &y[i__ * + y_dim1 + 1], &c__1); +/* Y(1:M,i) = Y(1:M,i) - EIGS(i) * Z(1:M,i) ! INTR */ +/* BLAS */ + res[i__] = dznrm2_(m, &y[i__ * y_dim1 + 1], &c__1); +/* BLAS */ + } + } + } + + if (*whtsvd == 4) { + rwork[*n + 1] = xscl1; + rwork[*n + 2] = xscl2; + } + +/* Successful exit. */ + if (! badxy) { + *info = 0; + } else { +/* A warning on possible data inconsistency. */ +/* This should be a rare event. */ + *info = 4; + } +/* ............................................................ */ + return 0; +/* ...... */ +} /* zgedmd_ */ + diff --git a/lapack-netlib/SRC/zgedmdq.c b/lapack-netlib/SRC/zgedmdq.c index 447b23014..1815f0814 100644 --- a/lapack-netlib/SRC/zgedmdq.c +++ b/lapack-netlib/SRC/zgedmdq.c @@ -509,3 +509,785 @@ static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integ +/* -- translated by f2c (version 20000121). + You must link the resulting object file with the libraries: + -lf2c -lm (in that order) +*/ + + + +/* Table of constant values */ + +static integer c_n1 = -1; + +/* Subroutine */ int zgedmdq_(char *jobs, char *jobz, char *jobr, char *jobq, + char *jobt, char *jobf, integer *whtsvd, integer *m, integer *n, + doublecomplex *f, integer *ldf, doublecomplex *x, integer *ldx, + doublecomplex *y, integer *ldy, integer *nrnk, doublereal *tol, + integer *k, doublecomplex *eigs, doublecomplex *z__, integer *ldz, + doublereal *res, doublecomplex *b, integer *ldb, doublecomplex *v, + integer *ldv, doublecomplex *s, integer *lds, doublecomplex *zwork, + integer *lzwork, doublereal *work, integer *lwork, integer *iwork, + integer *liwork, integer *info) +{ + /* System generated locals */ + integer f_dim1, f_offset, x_dim1, x_offset, y_dim1, y_offset, z_dim1, + z_offset, b_dim1, b_offset, v_dim1, v_offset, s_dim1, s_offset, + i__1, i__2; + + /* Local variables */ + doublereal zero; + integer info1; + extern logical lsame_(char *, char *); + char jobvl[1]; + integer minmn; + logical wantq; + integer mlwqr, olwqr; + logical wntex; + doublecomplex zzero; + extern /* Subroutine */ int zgedmd_(char *, char *, char *, char *, + integer *, integer *, integer *, doublecomplex *, integer *, + doublecomplex *, integer *, integer *, doublereal *, integer *, + doublecomplex *, doublecomplex *, integer *, doublereal *, + doublecomplex *, integer *, doublecomplex *, integer *, + doublecomplex *, integer *, doublecomplex *, integer *, + doublereal *, integer *, integer *, integer *, integer *), xerbla_(char *, integer *); + integer mlwdmd, olwdmd; + logical sccolx, sccoly; + extern /* Subroutine */ int zgeqrf_(integer *, integer *, doublecomplex *, + integer *, doublecomplex *, doublecomplex *, integer *, integer * + ), zlacpy_(char *, integer *, integer *, doublecomplex *, integer + *, doublecomplex *, integer *), zlaset_(char *, integer *, + integer *, doublecomplex *, doublecomplex *, doublecomplex *, + integer *); + integer iminwr; + logical wntvec, wntvcf; + integer mlwgqr; + logical wntref; + integer mlwork, olwgqr, olwork, mlrwrk, mlwmqr, olwmqr; + logical lquery, wntres, wnttrf, wntvcq; + extern /* Subroutine */ int zungqr_(integer *, integer *, integer *, + doublecomplex *, integer *, doublecomplex *, doublecomplex *, + integer *, integer *), zunmqr_(char *, char *, integer *, integer + *, integer *, doublecomplex *, integer *, doublecomplex *, + doublecomplex *, integer *, doublecomplex *, integer *, integer *); + doublereal one; + +/* March 2023 */ +/* ..... */ +/* USE iso_fortran_env */ +/* INTEGER, PARAMETER :: WP = real64 */ +/* ..... */ +/* Scalar arguments */ +/* Array arguments */ +/* ..... */ +/* Purpose */ +/* ======= */ +/* ZGEDMDQ computes the Dynamic Mode Decomposition (DMD) for */ +/* a pair of data snapshot matrices, using a QR factorization */ +/* based compression of the data. For the input matrices */ +/* X and Y such that Y = A*X with an unaccessible matrix */ +/* A, ZGEDMDQ computes a certain number of Ritz pairs of A using */ +/* the standard Rayleigh-Ritz extraction from a subspace of */ +/* range(X) that is determined using the leading left singular */ +/* vectors of X. Optionally, ZGEDMDQ returns the residuals */ +/* of the computed Ritz pairs, the information needed for */ +/* a refinement of the Ritz vectors, or the eigenvectors of */ +/* the Exact DMD. */ +/* For further details see the references listed */ +/* below. For more details of the implementation see [3]. */ + +/* References */ +/* ========== */ +/* [1] P. Schmid: Dynamic mode decomposition of numerical */ +/* and experimental data, */ +/* Journal of Fluid Mechanics 656, 5-28, 2010. */ +/* [2] Z. Drmac, I. Mezic, R. Mohr: Data driven modal */ +/* decompositions: analysis and enhancements, */ +/* SIAM J. on Sci. Comp. 40 (4), A2253-A2285, 2018. */ +/* [3] Z. Drmac: A LAPACK implementation of the Dynamic */ +/* Mode Decomposition I. Technical report. AIMDyn Inc. */ +/* and LAPACK Working Note 298. */ +/* [4] J. Tu, C. W. Rowley, D. M. Luchtenburg, S. L. */ +/* Brunton, N. Kutz: On Dynamic Mode Decomposition: */ +/* Theory and Applications, Journal of Computational */ +/* Dynamics 1(2), 391 -421, 2014. */ + +/* Developed and supported by: */ +/* =========================== */ +/* Developed and coded by Zlatko Drmac, Faculty of Science, */ +/* University of Zagreb; drmac@math.hr */ +/* In cooperation with */ +/* AIMdyn Inc., Santa Barbara, CA. */ +/* and supported by */ +/* - DARPA SBIR project "Koopman Operator-Based Forecasting */ +/* for Nonstationary Processes from Near-Term, Limited */ +/* Observational Data" Contract No: W31P4Q-21-C-0007 */ +/* - DARPA PAI project "Physics-Informed Machine Learning */ +/* Methodologies" Contract No: HR0011-18-9-0033 */ +/* - DARPA MoDyL project "A Data-Driven, Operator-Theoretic */ +/* Framework for Space-Time Analysis of Process Dynamics" */ +/* Contract No: HR0011-16-C-0116 */ +/* Any opinions, findings and conclusions or recommendations */ +/* expressed in this material are those of the author and */ +/* do not necessarily reflect the views of the DARPA SBIR */ +/* Program Office. */ +/* ============================================================ */ +/* Distribution Statement A: */ +/* Approved for Public Release, Distribution Unlimited. */ +/* Cleared by DARPA on September 29, 2022 */ +/* ============================================================ */ +/* ...................................................................... */ +/* Arguments */ +/* ========= */ +/* JOBS (input) CHARACTER*1 */ +/* Determines whether the initial data snapshots are scaled */ +/* by a diagonal matrix. The data snapshots are the columns */ +/* of F. The leading N-1 columns of F are denoted X and the */ +/* trailing N-1 columns are denoted Y. */ +/* 'S' :: The data snapshots matrices X and Y are multiplied */ +/* with a diagonal matrix D so that X*D has unit */ +/* nonzero columns (in the Euclidean 2-norm) */ +/* 'C' :: The snapshots are scaled as with the 'S' option. */ +/* If it is found that an i-th column of X is zero */ +/* vector and the corresponding i-th column of Y is */ +/* non-zero, then the i-th column of Y is set to */ +/* zero and a warning flag is raised. */ +/* 'Y' :: The data snapshots matrices X and Y are multiplied */ +/* by a diagonal matrix D so that Y*D has unit */ +/* nonzero columns (in the Euclidean 2-norm) */ +/* 'N' :: No data scaling. */ +/* ..... */ +/* JOBZ (input) CHARACTER*1 */ +/* Determines whether the eigenvectors (Koopman modes) will */ +/* be computed. */ +/* 'V' :: The eigenvectors (Koopman modes) will be computed */ +/* and returned in the matrix Z. */ +/* See the description of Z. */ +/* 'F' :: The eigenvectors (Koopman modes) will be returned */ +/* in factored form as the product Z*V, where Z */ +/* is orthonormal and V contains the eigenvectors */ +/* of the corresponding Rayleigh quotient. */ +/* See the descriptions of F, V, Z. */ +/* 'Q' :: The eigenvectors (Koopman modes) will be returned */ +/* in factored form as the product Q*Z, where Z */ +/* contains the eigenvectors of the compression of the */ +/* underlying discretized operator onto the span of */ +/* the data snapshots. See the descriptions of F, V, Z. */ +/* Q is from the initial QR factorization. */ +/* 'N' :: The eigenvectors are not computed. */ +/* ..... */ +/* JOBR (input) CHARACTER*1 */ +/* Determines whether to compute the residuals. */ +/* 'R' :: The residuals for the computed eigenpairs will */ +/* be computed and stored in the array RES. */ +/* See the description of RES. */ +/* For this option to be legal, JOBZ must be 'V'. */ +/* 'N' :: The residuals are not computed. */ +/* ..... */ +/* JOBQ (input) CHARACTER*1 */ +/* Specifies whether to explicitly compute and return the */ +/* unitary matrix from the QR factorization. */ +/* 'Q' :: The matrix Q of the QR factorization of the data */ +/* snapshot matrix is computed and stored in the */ +/* array F. See the description of F. */ +/* 'N' :: The matrix Q is not explicitly computed. */ +/* ..... */ +/* JOBT (input) CHARACTER*1 */ +/* Specifies whether to return the upper triangular factor */ +/* from the QR factorization. */ +/* 'R' :: The matrix R of the QR factorization of the data */ +/* snapshot matrix F is returned in the array Y. */ +/* See the description of Y and Further details. */ +/* 'N' :: The matrix R is not returned. */ +/* ..... */ +/* JOBF (input) CHARACTER*1 */ +/* Specifies whether to store information needed for post- */ +/* processing (e.g. computing refined Ritz vectors) */ +/* 'R' :: The matrix needed for the refinement of the Ritz */ +/* vectors is computed and stored in the array B. */ +/* See the description of B. */ +/* 'E' :: The unscaled eigenvectors of the Exact DMD are */ +/* computed and returned in the array B. See the */ +/* description of B. */ +/* 'N' :: No eigenvector refinement data is computed. */ +/* To be useful on exit, this option needs JOBQ='Q'. */ +/* ..... */ +/* WHTSVD (input) INTEGER, WHSTVD in { 1, 2, 3, 4 } */ +/* Allows for a selection of the SVD algorithm from the */ +/* LAPACK library. */ +/* 1 :: ZGESVD (the QR SVD algorithm) */ +/* 2 :: ZGESDD (the Divide and Conquer algorithm; if enough */ +/* workspace available, this is the fastest option) */ +/* 3 :: ZGESVDQ (the preconditioned QR SVD ; this and 4 */ +/* are the most accurate options) */ +/* 4 :: ZGEJSV (the preconditioned Jacobi SVD; this and 3 */ +/* are the most accurate options) */ +/* For the four methods above, a significant difference in */ +/* the accuracy of small singular values is possible if */ +/* the snapshots vary in norm so that X is severely */ +/* ill-conditioned. If small (smaller than EPS*||X||) */ +/* singular values are of interest and JOBS=='N', then */ +/* the options (3, 4) give the most accurate results, where */ +/* the option 4 is slightly better and with stronger */ +/* theoretical background. */ +/* If JOBS=='S', i.e. the columns of X will be normalized, */ +/* then all methods give nearly equally accurate results. */ +/* ..... */ +/* M (input) INTEGER, M >= 0 */ +/* The state space dimension (the number of rows of F). */ +/* ..... */ +/* N (input) INTEGER, 0 <= N <= M */ +/* The number of data snapshots from a single trajectory, */ +/* taken at equidistant discrete times. This is the */ +/* number of columns of F. */ +/* ..... */ +/* F (input/output) COMPLEX(KIND=WP) M-by-N array */ +/* > On entry, */ +/* the columns of F are the sequence of data snapshots */ +/* from a single trajectory, taken at equidistant discrete */ +/* times. It is assumed that the column norms of F are */ +/* in the range of the normalized floating point numbers. */ +/* < On exit, */ +/* If JOBQ == 'Q', the array F contains the orthogonal */ +/* matrix/factor of the QR factorization of the initial */ +/* data snapshots matrix F. See the description of JOBQ. */ +/* If JOBQ == 'N', the entries in F strictly below the main */ +/* diagonal contain, column-wise, the information on the */ +/* Householder vectors, as returned by ZGEQRF. The */ +/* remaining information to restore the orthogonal matrix */ +/* of the initial QR factorization is stored in ZWORK(1:MIN(M,N)). */ +/* See the description of ZWORK. */ +/* ..... */ +/* LDF (input) INTEGER, LDF >= M */ +/* The leading dimension of the array F. */ +/* ..... */ +/* X (workspace/output) COMPLEX(KIND=WP) MIN(M,N)-by-(N-1) array */ +/* X is used as workspace to hold representations of the */ +/* leading N-1 snapshots in the orthonormal basis computed */ +/* in the QR factorization of F. */ +/* On exit, the leading K columns of X contain the leading */ +/* K left singular vectors of the above described content */ +/* of X. To lift them to the space of the left singular */ +/* vectors U(:,1:K) of the input data, pre-multiply with the */ +/* Q factor from the initial QR factorization. */ +/* See the descriptions of F, K, V and Z. */ +/* ..... */ +/* LDX (input) INTEGER, LDX >= N */ +/* The leading dimension of the array X. */ +/* ..... */ +/* Y (workspace/output) COMPLEX(KIND=WP) MIN(M,N)-by-(N) array */ +/* Y is used as workspace to hold representations of the */ +/* trailing N-1 snapshots in the orthonormal basis computed */ +/* in the QR factorization of F. */ +/* On exit, */ +/* If JOBT == 'R', Y contains the MIN(M,N)-by-N upper */ +/* triangular factor from the QR factorization of the data */ +/* snapshot matrix F. */ +/* ..... */ +/* LDY (input) INTEGER , LDY >= N */ +/* The leading dimension of the array Y. */ +/* ..... */ +/* NRNK (input) INTEGER */ +/* Determines the mode how to compute the numerical rank, */ +/* i.e. how to truncate small singular values of the input */ +/* matrix X. On input, if */ +/* NRNK = -1 :: i-th singular value sigma(i) is truncated */ +/* if sigma(i) <= TOL*sigma(1) */ +/* This option is recommended. */ +/* NRNK = -2 :: i-th singular value sigma(i) is truncated */ +/* if sigma(i) <= TOL*sigma(i-1) */ +/* This option is included for R&D purposes. */ +/* It requires highly accurate SVD, which */ +/* may not be feasible. */ +/* The numerical rank can be enforced by using positive */ +/* value of NRNK as follows: */ +/* 0 < NRNK <= N-1 :: at most NRNK largest singular values */ +/* will be used. If the number of the computed nonzero */ +/* singular values is less than NRNK, then only those */ +/* nonzero values will be used and the actually used */ +/* dimension is less than NRNK. The actual number of */ +/* the nonzero singular values is returned in the variable */ +/* K. See the description of K. */ +/* ..... */ +/* TOL (input) REAL(KIND=WP), 0 <= TOL < 1 */ +/* The tolerance for truncating small singular values. */ +/* See the description of NRNK. */ +/* ..... */ +/* K (output) INTEGER, 0 <= K <= N */ +/* The dimension of the SVD/POD basis for the leading N-1 */ +/* data snapshots (columns of F) and the number of the */ +/* computed Ritz pairs. The value of K is determined */ +/* according to the rule set by the parameters NRNK and */ +/* TOL. See the descriptions of NRNK and TOL. */ +/* ..... */ +/* EIGS (output) COMPLEX(KIND=WP) (N-1)-by-1 array */ +/* The leading K (K<=N-1) entries of EIGS contain */ +/* the computed eigenvalues (Ritz values). */ +/* See the descriptions of K, and Z. */ +/* ..... */ +/* Z (workspace/output) COMPLEX(KIND=WP) M-by-(N-1) array */ +/* If JOBZ =='V' then Z contains the Ritz vectors. Z(:,i) */ +/* is an eigenvector of the i-th Ritz value; ||Z(:,i)||_2=1. */ +/* If JOBZ == 'F', then the Z(:,i)'s are given implicitly as */ +/* Z*V, where Z contains orthonormal matrix (the product of */ +/* Q from the initial QR factorization and the SVD/POD_basis */ +/* returned by ZGEDMD in X) and the second factor (the */ +/* eigenvectors of the Rayleigh quotient) is in the array V, */ +/* as returned by ZGEDMD. That is, X(:,1:K)*V(:,i) */ +/* is an eigenvector corresponding to EIGS(i). The columns */ +/* of V(1:K,1:K) are the computed eigenvectors of the */ +/* K-by-K Rayleigh quotient. */ +/* See the descriptions of EIGS, X and V. */ +/* ..... */ +/* LDZ (input) INTEGER , LDZ >= M */ +/* The leading dimension of the array Z. */ +/* ..... */ +/* RES (output) REAL(KIND=WP) (N-1)-by-1 array */ +/* RES(1:K) contains the residuals for the K computed */ +/* Ritz pairs, */ +/* RES(i) = || A * Z(:,i) - EIGS(i)*Z(:,i))||_2. */ +/* See the description of EIGS and Z. */ +/* ..... */ +/* B (output) COMPLEX(KIND=WP) MIN(M,N)-by-(N-1) array. */ +/* IF JOBF =='R', B(1:N,1:K) contains A*U(:,1:K), and can */ +/* be used for computing the refined vectors; see further */ +/* details in the provided references. */ +/* If JOBF == 'E', B(1:N,1;K) contains */ +/* A*U(:,1:K)*W(1:K,1:K), which are the vectors from the */ +/* Exact DMD, up to scaling by the inverse eigenvalues. */ +/* In both cases, the content of B can be lifted to the */ +/* original dimension of the input data by pre-multiplying */ +/* with the Q factor from the initial QR factorization. */ +/* Here A denotes a compression of the underlying operator. */ +/* See the descriptions of F and X. */ +/* If JOBF =='N', then B is not referenced. */ +/* ..... */ +/* LDB (input) INTEGER, LDB >= MIN(M,N) */ +/* The leading dimension of the array B. */ +/* ..... */ +/* V (workspace/output) COMPLEX(KIND=WP) (N-1)-by-(N-1) array */ +/* On exit, V(1:K,1:K) V contains the K eigenvectors of */ +/* the Rayleigh quotient. The Ritz vectors */ +/* (returned in Z) are the product of Q from the initial QR */ +/* factorization (see the description of F) X (see the */ +/* description of X) and V. */ +/* ..... */ +/* LDV (input) INTEGER, LDV >= N-1 */ +/* The leading dimension of the array V. */ +/* ..... */ +/* S (output) COMPLEX(KIND=WP) (N-1)-by-(N-1) array */ +/* The array S(1:K,1:K) is used for the matrix Rayleigh */ +/* quotient. This content is overwritten during */ +/* the eigenvalue decomposition by ZGEEV. */ +/* See the description of K. */ +/* ..... */ +/* LDS (input) INTEGER, LDS >= N-1 */ +/* The leading dimension of the array S. */ +/* ..... */ +/* ZWORK (workspace/output) COMPLEX(KIND=WP) LWORK-by-1 array */ +/* On exit, */ +/* ZWORK(1:MIN(M,N)) contains the scalar factors of the */ +/* elementary reflectors as returned by ZGEQRF of the */ +/* M-by-N input matrix F. */ +/* If the call to ZGEDMDQ is only workspace query, then */ +/* ZWORK(1) contains the minimal complex workspace length and */ +/* ZWORK(2) is the optimal complex workspace length. */ +/* Hence, the length of work is at least 2. */ +/* See the description of LZWORK. */ +/* ..... */ +/* LZWORK (input) INTEGER */ +/* The minimal length of the workspace vector ZWORK. */ +/* LZWORK is calculated as follows: */ +/* Let MLWQR = N (minimal workspace for ZGEQRF[M,N]) */ +/* MLWDMD = minimal workspace for ZGEDMD (see the */ +/* description of LWORK in ZGEDMD) */ +/* MLWMQR = N (minimal workspace for */ +/* ZUNMQR['L','N',M,N,N]) */ +/* MLWGQR = N (minimal workspace for ZUNGQR[M,N,N]) */ +/* MINMN = MIN(M,N) */ +/* Then */ +/* LZWORK = MAX(2, MIN(M,N)+MLWQR, MINMN+MLWDMD) */ +/* is further updated as follows: */ +/* if JOBZ == 'V' or JOBZ == 'F' THEN */ +/* LZWORK = MAX(LZWORK, MINMN+MLWMQR) */ +/* if JOBQ == 'Q' THEN */ +/* LZWORK = MAX(ZLWORK, MINMN+MLWGQR) */ + +/* ..... */ +/* WORK (workspace/output) REAL(KIND=WP) LWORK-by-1 array */ +/* On exit, */ +/* WORK(1:N-1) contains the singular values of */ +/* the input submatrix F(1:M,1:N-1). */ +/* If the call to ZGEDMDQ is only workspace query, then */ +/* WORK(1) contains the minimal workspace length and */ +/* WORK(2) is the optimal workspace length. hence, the */ +/* length of work is at least 2. */ +/* See the description of LWORK. */ +/* ..... */ +/* LWORK (input) INTEGER */ +/* The minimal length of the workspace vector WORK. */ +/* LWORK is the same as in ZGEDMD, because in ZGEDMDQ */ +/* only ZGEDMD requires real workspace for snapshots */ +/* of dimensions MIN(M,N)-by-(N-1). */ +/* If on entry LWORK = -1, then a workspace query is */ +/* assumed and the procedure only computes the minimal */ +/* and the optimal workspace length for WORK. */ +/* ..... */ +/* IWORK (workspace/output) INTEGER LIWORK-by-1 array */ +/* Workspace that is required only if WHTSVD equals */ +/* 2 , 3 or 4. (See the description of WHTSVD). */ +/* If on entry LWORK =-1 or LIWORK=-1, then the */ +/* minimal length of IWORK is computed and returned in */ +/* IWORK(1). See the description of LIWORK. */ +/* ..... */ +/* LIWORK (input) INTEGER */ +/* The minimal length of the workspace vector IWORK. */ +/* If WHTSVD == 1, then only IWORK(1) is used; LIWORK >=1 */ +/* Let M1=MIN(M,N), N1=N-1. Then */ +/* If WHTSVD == 2, then LIWORK >= MAX(1,8*MIN(M1,N1)) */ +/* If WHTSVD == 3, then LIWORK >= MAX(1,M1+N1-1) */ +/* If WHTSVD == 4, then LIWORK >= MAX(3,M1+3*N1) */ +/* If on entry LIWORK = -1, then a workspace query is */ +/* assumed and the procedure only computes the minimal */ +/* and the optimal workspace lengths for both WORK and */ +/* IWORK. See the descriptions of WORK and IWORK. */ +/* ..... */ +/* INFO (output) INTEGER */ +/* -i < 0 :: On entry, the i-th argument had an */ +/* illegal value */ +/* = 0 :: Successful return. */ +/* = 1 :: Void input. Quick exit (M=0 or N=0). */ +/* = 2 :: The SVD computation of X did not converge. */ +/* Suggestion: Check the input data and/or */ +/* repeat with different WHTSVD. */ +/* = 3 :: The computation of the eigenvalues did not */ +/* converge. */ +/* = 4 :: If data scaling was requested on input and */ +/* the procedure found inconsistency in the data */ +/* such that for some column index i, */ +/* X(:,i) = 0 but Y(:,i) /= 0, then Y(:,i) is set */ +/* to zero if JOBS=='C'. The computation proceeds */ +/* with original or modified data and warning */ +/* flag is set with INFO=4. */ +/* ............................................................. */ +/* ............................................................. */ +/* Parameters */ +/* ~~~~~~~~~~ */ +/* COMPLEX(KIND=WP), PARAMETER :: ZONE = ( 1.0_WP, 0.0_WP ) */ + +/* Local scalars */ +/* ~~~~~~~~~~~~~ */ + +/* External functions (BLAS and LAPACK) */ +/* ~~~~~~~~~~~~~~~~~ */ + +/* External subroutines (BLAS and LAPACK) */ +/* ~~~~~~~~~~~~~~~~~~~~ */ +/* External subroutines */ +/* ~~~~~~~~~~~~~~~~~~~~ */ +/* Intrinsic functions */ +/* ~~~~~~~~~~~~~~~~~~~ */ +/* .......................................................... */ + /* Parameter adjustments */ + f_dim1 = *ldf; + f_offset = 1 + f_dim1 * 1; + f -= f_offset; + x_dim1 = *ldx; + x_offset = 1 + x_dim1 * 1; + x -= x_offset; + y_dim1 = *ldy; + y_offset = 1 + y_dim1 * 1; + y -= y_offset; + --eigs; + z_dim1 = *ldz; + z_offset = 1 + z_dim1 * 1; + z__ -= z_offset; + --res; + b_dim1 = *ldb; + b_offset = 1 + b_dim1 * 1; + b -= b_offset; + v_dim1 = *ldv; + v_offset = 1 + v_dim1 * 1; + v -= v_offset; + s_dim1 = *lds; + s_offset = 1 + s_dim1 * 1; + s -= s_offset; + --zwork; + --work; + --iwork; + + /* Function Body */ + one = 1.f; + zero = 0.f; + zzero.r = 0.f, zzero.i = 0.f; + +/* Test the input arguments */ + wntres = lsame_(jobr, "R"); + sccolx = lsame_(jobs, "S") || lsame_(jobs, "C"); + sccoly = lsame_(jobs, "Y"); + wntvec = lsame_(jobz, "V"); + wntvcf = lsame_(jobz, "F"); + wntvcq = lsame_(jobz, "Q"); + wntref = lsame_(jobf, "R"); + wntex = lsame_(jobf, "E"); + wantq = lsame_(jobq, "Q"); + wnttrf = lsame_(jobt, "R"); + minmn = f2cmin(*m,*n); + *info = 0; + lquery = *lzwork == -1 || *lwork == -1 || *liwork == -1; + + if (! (sccolx || sccoly || lsame_(jobs, "N"))) { + *info = -1; + } else if (! (wntvec || wntvcf || wntvcq || lsame_(jobz, "N"))) { + *info = -2; + } else if (! (wntres || lsame_(jobr, "N")) || + wntres && lsame_(jobz, "N")) { + *info = -3; + } else if (! (wantq || lsame_(jobq, "N"))) { + *info = -4; + } else if (! (wnttrf || lsame_(jobt, "N"))) { + *info = -5; + } else if (! (wntref || wntex || lsame_(jobf, "N"))) + { + *info = -6; + } else if (! (*whtsvd == 1 || *whtsvd == 2 || *whtsvd == 3 || *whtsvd == + 4)) { + *info = -7; + } else if (*m < 0) { + *info = -8; + } else if (*n < 0 || *n > *m + 1) { + *info = -9; + } else if (*ldf < *m) { + *info = -11; + } else if (*ldx < minmn) { + *info = -13; + } else if (*ldy < minmn) { + *info = -15; + } else if (! (*nrnk == -2 || *nrnk == -1 || *nrnk >= 1 && *nrnk <= *n)) { + *info = -16; + } else if (*tol < zero || *tol >= one) { + *info = -17; + } else if (*ldz < *m) { + *info = -21; + } else if ((wntref || wntex) && *ldb < minmn) { + *info = -24; + } else if (*ldv < *n - 1) { + *info = -26; + } else if (*lds < *n - 1) { + *info = -28; + } + + if (wntvec || wntvcf || wntvcq) { + *(unsigned char *)jobvl = 'V'; + } else { + *(unsigned char *)jobvl = 'N'; + } + if (*info == 0) { +/* Compute the minimal and the optimal workspace */ +/* requirements. Simulate running the code and */ +/* determine minimal and optimal sizes of the */ +/* workspace at any moment of the run. */ + if (*n == 0 || *n == 1) { +/* All output except K is void. INFO=1 signals */ +/* the void input. In case of a workspace query, */ +/* the minimal workspace lengths are returned. */ + if (lquery) { + iwork[1] = 1; + zwork[1].r = 2., zwork[1].i = 0.; + zwork[2].r = 2., zwork[2].i = 0.; + work[1] = 2.; + work[2] = 2.; + } else { + *k = 0; + } + *info = 1; + return 0; + } + mlrwrk = 2; + mlwork = 2; + olwork = 2; + iminwr = 1; + mlwqr = f2cmax(1,*n); +/* Minimal workspace length for ZGEQRF. */ +/* Computing MAX */ + i__1 = mlwork, i__2 = minmn + mlwqr; + mlwork = f2cmax(i__1,i__2); + if (lquery) { + zgeqrf_(m, n, &f[f_offset], ldf, &zwork[1], &zwork[1], &c_n1, & + info1); + olwqr = (integer) zwork[1].r; +/* Computing MAX */ + i__1 = olwork, i__2 = minmn + olwqr; + olwork = f2cmax(i__1,i__2); + } + i__1 = *n - 1; + zgedmd_(jobs, jobvl, jobr, jobf, whtsvd, &minmn, &i__1, &x[x_offset], + ldx, &y[y_offset], ldy, nrnk, tol, k, &eigs[1], &z__[z_offset] + , ldz, &res[1], &b[b_offset], ldb, &v[v_offset], ldv, &s[ + s_offset], lds, &zwork[1], &c_n1, &work[1], &c_n1, &iwork[1], + &c_n1, &info1); + mlwdmd = (integer) zwork[1].r; +/* Computing MAX */ + i__1 = mlwork, i__2 = minmn + mlwdmd; + mlwork = f2cmax(i__1,i__2); +/* Computing MAX */ + i__1 = mlrwrk, i__2 = (integer) work[1]; + mlrwrk = f2cmax(i__1,i__2); + iminwr = f2cmax(iminwr,iwork[1]); + if (lquery) { + olwdmd = (integer) zwork[2].r; +/* Computing MAX */ + i__1 = olwork, i__2 = minmn + olwdmd; + olwork = f2cmax(i__1,i__2); + } + if (wntvec || wntvcf) { + mlwmqr = f2cmax(1,*n); +/* Computing MAX */ + i__1 = mlwork, i__2 = minmn + mlwmqr; + mlwork = f2cmax(i__1,i__2); + if (lquery) { + zunmqr_("L", "N", m, n, &minmn, &f[f_offset], ldf, &zwork[1], + &z__[z_offset], ldz, &zwork[1], &c_n1, &info1); + olwmqr = (integer) zwork[1].r; +/* Computing MAX */ + i__1 = olwork, i__2 = minmn + olwmqr; + olwork = f2cmax(i__1,i__2); + } + } + if (wantq) { + mlwgqr = f2cmax(1,*n); +/* Computing MAX */ + i__1 = mlwork, i__2 = minmn + mlwgqr; + mlwork = f2cmax(i__1,i__2); + if (lquery) { + zungqr_(m, &minmn, &minmn, &f[f_offset], ldf, &zwork[1], & + zwork[1], &c_n1, &info1); + olwgqr = (integer) zwork[1].r; +/* Computing MAX */ + i__1 = olwork, i__2 = minmn + olwgqr; + olwork = f2cmax(i__1,i__2); + } + } + if (*liwork < iminwr && ! lquery) { + *info = -34; + } + if (*lwork < mlrwrk && ! lquery) { + *info = -32; + } + if (*lzwork < mlwork && ! lquery) { + *info = -30; + } + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZGEDMDQ", &i__1); + return 0; + } else if (lquery) { +/* Return minimal and optimal workspace sizes */ + iwork[1] = iminwr; + zwork[1].r = (doublereal) mlwork, zwork[1].i = 0.; + zwork[2].r = (doublereal) olwork, zwork[2].i = 0.; + work[1] = (doublereal) mlrwrk; + work[2] = (doublereal) mlrwrk; + return 0; + } +/* ..... */ +/* Initial QR factorization that is used to represent the */ +/* snapshots as elements of lower dimensional subspace. */ +/* For large scale computation with M >> N, at this place */ +/* one can use an out of core QRF. */ + + i__1 = *lzwork - minmn; + zgeqrf_(m, n, &f[f_offset], ldf, &zwork[1], &zwork[minmn + 1], &i__1, & + info1); + +/* Define X and Y as the snapshots representations in the */ +/* orthogonal basis computed in the QR factorization. */ +/* X corresponds to the leading N-1 and Y to the trailing */ +/* N-1 snapshots. */ + i__1 = *n - 1; + zlaset_("L", &minmn, &i__1, &zzero, &zzero, &x[x_offset], ldx); + i__1 = *n - 1; + zlacpy_("U", &minmn, &i__1, &f[f_offset], ldf, &x[x_offset], ldx); + i__1 = *n - 1; + zlacpy_("A", &minmn, &i__1, &f[(f_dim1 << 1) + 1], ldf, &y[y_offset], ldy); + if (*m >= 3) { + i__1 = minmn - 2; + i__2 = *n - 2; + zlaset_("L", &i__1, &i__2, &zzero, &zzero, &y[y_dim1 + 3], ldy); + } + +/* Compute the DMD of the projected snapshot pairs (X,Y) */ + i__1 = *n - 1; + i__2 = *lzwork - minmn; + zgedmd_(jobs, jobvl, jobr, jobf, whtsvd, &minmn, &i__1, &x[x_offset], ldx, + &y[y_offset], ldy, nrnk, tol, k, &eigs[1], &z__[z_offset], ldz, & + res[1], &b[b_offset], ldb, &v[v_offset], ldv, &s[s_offset], lds, & + zwork[minmn + 1], &i__2, &work[1], lwork, &iwork[1], liwork, & + info1); + if (info1 == 2 || info1 == 3) { +/* Return with error code. See ZGEDMD for details. */ + *info = info1; + return 0; + } else { + *info = info1; + } + +/* The Ritz vectors (Koopman modes) can be explicitly */ +/* formed or returned in factored form. */ + if (wntvec) { +/* Compute the eigenvectors explicitly. */ + if (*m > minmn) { + i__1 = *m - minmn; + zlaset_("A", &i__1, k, &zzero, &zzero, &z__[minmn + 1 + z_dim1], + ldz); + } + i__1 = *lzwork - minmn; + zunmqr_("L", "N", m, k, &minmn, &f[f_offset], ldf, &zwork[1], &z__[ + z_offset], ldz, &zwork[minmn + 1], &i__1, &info1); + } else if (wntvcf) { +/* Return the Ritz vectors (eigenvectors) in factored */ +/* form Z*V, where Z contains orthonormal matrix (the */ +/* product of Q from the initial QR factorization and */ +/* the SVD/POD_basis returned by ZGEDMD in X) and the */ +/* second factor (the eigenvectors of the Rayleigh */ +/* quotient) is in the array V, as returned by ZGEDMD. */ + zlacpy_("A", n, k, &x[x_offset], ldx, &z__[z_offset], ldz); + if (*m > *n) { + i__1 = *m - *n; + zlaset_("A", &i__1, k, &zzero, &zzero, &z__[*n + 1 + z_dim1], ldz); + } + i__1 = *lzwork - minmn; + zunmqr_("L", "N", m, k, &minmn, &f[f_offset], ldf, &zwork[1], &z__[ + z_offset], ldz, &zwork[minmn + 1], &i__1, &info1); + } + +/* Some optional output variables: */ + +/* The upper triangular factor R in the initial QR */ +/* factorization is optionally returned in the array Y. */ +/* This is useful if this call to ZGEDMDQ is to be */ +/* followed by a streaming DMD that is implemented in a */ +/* QR compressed form. */ + if (wnttrf) { +/* Return the upper triangular R in Y */ + zlaset_("A", &minmn, n, &zzero, &zzero, &y[y_offset], ldy); + zlacpy_("U", &minmn, n, &f[f_offset], ldf, &y[y_offset], ldy); + } + +/* The orthonormal/unitary factor Q in the initial QR */ +/* factorization is optionally returned in the array F. */ +/* Same as with the triangular factor above, this is */ +/* useful in a streaming DMD. */ + if (wantq) { +/* Q overwrites F */ + i__1 = *lzwork - minmn; + zungqr_(m, &minmn, &minmn, &f[f_offset], ldf, &zwork[1], &zwork[minmn + + 1], &i__1, &info1); + } + + return 0; + +} /* zgedmdq_ */ + From fa6d06359a28ddba5996d9e25120d9907d83a1dd Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Wed, 21 Jun 2023 17:17:31 +0200 Subject: [PATCH 14/15] correct list placement of zgedmd/zgedmdq --- cmake/lapack.cmake | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/cmake/lapack.cmake b/cmake/lapack.cmake index 12127531d..d339f0ce9 100644 --- a/cmake/lapack.cmake +++ b/cmake/lapack.cmake @@ -915,7 +915,8 @@ set(ZLASRC zheevd_2stage.c zheev_2stage.c zheevx_2stage.c zheevr_2stage.c zhbev_2stage.c zhbevx_2stage.c zhbevd_2stage.c zhegv_2stage.c zgesvdq.c zlaunhr_col_getrfnp.c zlaunhr_col_getrfnp2.c - zungtsqr.c zungtsqr_row.c zunhr_col.c zlatrs3.c ztrsyl3.c zgelst.c) + zungtsqr.c zungtsqr_row.c zunhr_col.c zlatrs3.c ztrsyl3.c zgelst.c + zgedmd.c zgedmdq.c) set(ZXLASRC zgesvxx.c zgerfsx.c zla_gerfsx_extended.c zla_geamv.c zla_gercond_c.c zla_gercond_x.c zla_gerpvgrw.c zsysvxx.c zsyrfsx.c @@ -925,7 +926,7 @@ set(ZXLASRC zgesvxx.c zgerfsx.c zla_gerfsx_extended.c zla_geamv.c zla_gbrfsx_extended.c zla_gbamv.c zla_gbrcond_c.c zla_gbrcond_x.c zla_gbrpvgrw.c zhesvxx.c zherfsx.c zla_herfsx_extended.c zla_heamv.c zla_hercond_c.c zla_hercond_x.c zla_herpvgrw.c - zla_lin_berr.c zlarscl2.c zlascl2.c zla_wwaddw.c zgedmd.c zgedmdq.c) + zla_lin_berr.c zlarscl2.c zlascl2.c zla_wwaddw.c) if(USE_XBLAS) From 42fd3f4ec7d091519eafa60e4e08849aa19af98d Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Wed, 21 Jun 2023 22:52:31 +0200 Subject: [PATCH 15/15] Add standard module path for Windows flang --- azure-pipelines.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 16b9da4f5..65ef538e9 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -115,7 +115,7 @@ jobs: mkdir build cd build call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvars64.bat" - cmake -G "Ninja" -DCMAKE_C_COMPILER=clang-cl -DCMAKE_CXX_COMPILER=clang-cl -DCMAKE_Fortran_COMPILER=flang -DBUILD_TESTING=OFF -DCMAKE_MT=mt -DCMAKE_BUILD_TYPE=Release -DMSVC_STATIC_CRT=ON .. + cmake -G "Ninja" -DCMAKE_C_COMPILER=clang-cl -DCMAKE_CXX_COMPILER=clang-cl -DCMAKE_Fortran_COMPILER="flang -I C:\Miniconda\Library\include\flang" -DBUILD_TESTING=OFF -DCMAKE_MT=mt -DCMAKE_BUILD_TYPE=Release -DMSVC_STATIC_CRT=ON .. cmake --build . --config Release ctest